From 72ac285654112f566dcb36c86ac7a83cd173ef7a Mon Sep 17 00:00:00 2001 From: billmetangmo <25366207+billmetangmo@users.noreply.github.com> Date: Fri, 28 Oct 2022 16:11:57 +0000 Subject: [PATCH] refactor: include objet and title to pipeline (#15) --- etl/experiments/4.process_data.ipynb | 1211 ++++++++++++++++++++++++++ etl/filter-cameroon.ipynb | 1176 ++----------------------- 2 files changed, 1299 insertions(+), 1088 deletions(-) create mode 100644 etl/experiments/4.process_data.ipynb diff --git a/etl/experiments/4.process_data.ipynb b/etl/experiments/4.process_data.ipynb new file mode 100644 index 0000000..3a6cbd8 --- /dev/null +++ b/etl/experiments/4.process_data.ipynb @@ -0,0 +1,1211 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Necessary imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load and viz data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"./ref-rna-real-mars-2022-enriched-not-qualified.csv\", index_col=0)\n", + "data = data[data.columns[1:]] # ignore first column it is index not correctly saved" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 1071 entries, 0 to 1070\n", + "Data columns (total 18 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 1071 non-null object \n", + " 1 titre 1071 non-null object \n", + " 2 objet 1071 non-null object \n", + " 3 adrs_numvoie 978 non-null object \n", + " 4 adrs_typevoie 989 non-null object \n", + " 5 adrs_libvoie 1015 non-null object \n", + " 6 adrs_codepostal 1070 non-null float64\n", + " 7 adrs_libcommune 1071 non-null object \n", + " 8 siteweb 32 non-null object \n", + " 9 adrs 1071 non-null object \n", + " 10 dept 1071 non-null object \n", + " 11 region 1071 non-null object \n", + " 12 social_object1_libelle 1071 non-null object \n", + " 13 social_object2_libelle 1071 non-null object \n", + " 14 longitude 1071 non-null float64\n", + " 15 latitude 1071 non-null float64\n", + " 16 facebook_url 1071 non-null object \n", + " 17 helloasso_url 1071 non-null object \n", + "dtypes: float64(3), object(15)\n", + "memory usage: 159.0+ KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitreobjetadrs_numvoieadrs_typevoieadrs_libvoieadrs_libcommunesitewebadrsdeptregionsocial_object1_libellesocial_object2_libellefacebook_urlhelloasso_url
count10711071107197898910151071321071107110711071107110711071
unique1071106510692433390253331102885162421945658
topW751075369LES ENFANTS DU CAMEROUNo2RUEJean JaurèsParisFRANCE80000 AmiensParisÎle-de-FranceAMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT...AUTRESnot foundnot found
freq133495938952121154603578041469
\n", + "
" + ], + "text/plain": [ + " id titre objet adrs_numvoie adrs_typevoie \\\n", + "count 1071 1071 1071 978 989 \n", + "unique 1071 1065 1069 243 33 \n", + "top W751075369 LES ENFANTS DU CAMEROUN o 2 RUE \n", + "freq 1 3 3 49 593 \n", + "\n", + " adrs_libvoie adrs_libcommune siteweb adrs dept \\\n", + "count 1015 1071 32 1071 1071 \n", + "unique 902 533 31 1028 85 \n", + "top Jean Jaurès Paris FRANCE 80000 Amiens Paris \n", + "freq 8 95 2 12 115 \n", + "\n", + " region social_object1_libelle \\\n", + "count 1071 1071 \n", + "unique 16 24 \n", + "top Île-de-France AMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT... \n", + "freq 460 357 \n", + "\n", + " social_object2_libelle facebook_url helloasso_url \n", + "count 1071 1071 1071 \n", + "unique 21 945 658 \n", + "top AUTRES not found not found \n", + "freq 804 14 69 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.describe(include=object)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitreobjetadrs_numvoieadrs_typevoieadrs_libvoieadrs_codepostaladrs_libcommunesitewebadrsdeptregionsocial_object1_libellesocial_object2_libellelongitudelatitudefacebook_urlhelloasso_url
938W212005614ASPAUC : AIDE SCOLAIRE ET PROFESSIONNELLE AU C...aider à la formation des jeunes au cameroun da...1RUEdes hauts de la Combe21000.0DijonNaN1 RUE des hauts de la Combe 21000 DijonCôte-d'OrBourgogne-Franche-ComtéASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...ÉDUCATION FORMATION4.99307447.321649https://www.facebook.com/Aide-%C3%A0-lorientat...https://www.helloasso.com/associations/dynamiq...
1030W181002787SOLIDARITE AVEC LE CAMEROUN (S.O.C.A.M)toutes formes d'actions de solidarité entre la...4bisRUELouis Mallet18000.0BourgesNaN4bis RUE Louis Mallet 18000 BourgesCherCentre-Val de LoireASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES2.38495547.079429https://www.facebook.com/Solidarit%C3%A9-Camer...https://www.helloasso.com/associations/solidar...
973W272000935JACKY LOBE POUR LE CAMEROUNcette association a un but humanitaire ; son a...71RUEdu Val de Risle27290.0Illeville-sur-MontfortNaN71 RUE du Val de Risle 27290 Illeville-sur-Mon...EureNormandieASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES0.72640549.326329https://www.facebook.com/public/Jacky-Lob%C3%A9https://www.helloasso.com/associations/maison-...
702W951000628BANA BA NGODI Ô MBENGUEde promouvoir le developement tant economique ...73RUEde la Coulée Verte91700.0Fleury-MérogisNaN73 RUE de la Coulée Verte 91700 Fleury-MérogisEssonneÎle-de-FranceAMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT...AUTRES2.35762848.639857https://www.facebook.com/bana.mbenguenot found
699W493006887ASSOCIATION CULTURELLE ET HUMANITAIRE EBOLOWA ...promouvoir les liens d'amitié de solidarité et...13AVDavid d'Angers49400.0SaumurNaN13 AV David d'Angers 49400 SaumurMaine-et-LoirePays de la LoireASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES-0.07207247.269654https://www.facebook.com/AssociationCulturelle...https://www.helloasso.com/associations/associa...
\n", + "
" + ], + "text/plain": [ + " id titre \\\n", + "938 W212005614 ASPAUC : AIDE SCOLAIRE ET PROFESSIONNELLE AU C... \n", + "1030 W181002787 SOLIDARITE AVEC LE CAMEROUN (S.O.C.A.M) \n", + "973 W272000935 JACKY LOBE POUR LE CAMEROUN \n", + "702 W951000628 BANA BA NGODI Ô MBENGUE \n", + "699 W493006887 ASSOCIATION CULTURELLE ET HUMANITAIRE EBOLOWA ... \n", + "\n", + " objet adrs_numvoie \\\n", + "938 aider à la formation des jeunes au cameroun da... 1 \n", + "1030 toutes formes d'actions de solidarité entre la... 4bis \n", + "973 cette association a un but humanitaire ; son a... 71 \n", + "702 de promouvoir le developement tant economique ... 73 \n", + "699 promouvoir les liens d'amitié de solidarité et... 13 \n", + "\n", + " adrs_typevoie adrs_libvoie adrs_codepostal \\\n", + "938 RUE des hauts de la Combe 21000.0 \n", + "1030 RUE Louis Mallet 18000.0 \n", + "973 RUE du Val de Risle 27290.0 \n", + "702 RUE de la Coulée Verte 91700.0 \n", + "699 AV David d'Angers 49400.0 \n", + "\n", + " adrs_libcommune siteweb \\\n", + "938 Dijon NaN \n", + "1030 Bourges NaN \n", + "973 Illeville-sur-Montfort NaN \n", + "702 Fleury-Mérogis NaN \n", + "699 Saumur NaN \n", + "\n", + " adrs dept \\\n", + "938 1 RUE des hauts de la Combe 21000 Dijon Côte-d'Or \n", + "1030 4bis RUE Louis Mallet 18000 Bourges Cher \n", + "973 71 RUE du Val de Risle 27290 Illeville-sur-Mon... Eure \n", + "702 73 RUE de la Coulée Verte 91700 Fleury-Mérogis Essonne \n", + "699 13 AV David d'Angers 49400 Saumur Maine-et-Loire \n", + "\n", + " region \\\n", + "938 Bourgogne-Franche-Comté \n", + "1030 Centre-Val de Loire \n", + "973 Normandie \n", + "702 Île-de-France \n", + "699 Pays de la Loire \n", + "\n", + " social_object1_libelle \\\n", + "938 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", + "1030 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", + "973 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", + "702 AMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT... \n", + "699 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", + "\n", + " social_object2_libelle longitude latitude \\\n", + "938 ÉDUCATION FORMATION 4.993074 47.321649 \n", + "1030 AUTRES 2.384955 47.079429 \n", + "973 AUTRES 0.726405 49.326329 \n", + "702 AUTRES 2.357628 48.639857 \n", + "699 AUTRES -0.072072 47.269654 \n", + "\n", + " facebook_url \\\n", + "938 https://www.facebook.com/Aide-%C3%A0-lorientat... \n", + "1030 https://www.facebook.com/Solidarit%C3%A9-Camer... \n", + "973 https://www.facebook.com/public/Jacky-Lob%C3%A9 \n", + "702 https://www.facebook.com/bana.mbengue \n", + "699 https://www.facebook.com/AssociationCulturelle... \n", + "\n", + " helloasso_url \n", + "938 https://www.helloasso.com/associations/dynamiq... \n", + "1030 https://www.helloasso.com/associations/solidar... \n", + "973 https://www.helloasso.com/associations/maison-... \n", + "702 not found \n", + "699 https://www.helloasso.com/associations/associa... " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sample(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Title in uppercase and objet in lower" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "data[\"titre\"] = data[\"titre\"].apply(lambda x: str(x).upper())\n", + "data[\"objet\"] = data[\"objet\"].apply(lambda x: str(x).lower())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitreobjetadrs_numvoieadrs_typevoieadrs_libvoieadrs_codepostaladrs_libcommunesitewebadrsdeptregionsocial_object1_libellesocial_object2_libellelongitudelatitudefacebook_urlhelloasso_url
717W952002610BAMENDA COMMUNE INTERETS GROUPE (B.C.I.G.F.)faciliter la reinsertion dans le pays d'origin...4ALLRaguenet95210.0Saint-GratienNaN4 ALL Raguenet 95210 Saint-GratienVal-d'OiseÎle-de-FranceASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES2.40813449.965700https://www.facebook.com/AllianceFrancaiseBame...https://www.helloasso.com/associations/l-as-de...
188W691062833PARTAGE AVEC PETTE SON HOPITAL ET SA REGIONsoutien matériel et moral de l'hôpital de pett...6RUEdes Farges Via Romana69005.0Lyon 5e ArrondissementNaN6 RUE des Farges Via Romana 69005 Lyon 5e Arro...RhôneAuvergne-Rhône-AlpesSANTÉSANTÉ4.82843645.763571https://www.facebook.com/Hopital-Pett%C3%A9-10...https://www.helloasso.com/associations/collect...
865W632002712ELAT MEYONG (L'UNION DES PEUPLES)promotion des actions de développement en dire...7BDWinston Churchill63000.0Clermont-FerrandNaN7 BD Winston Churchill 63000 Clermont-FerrandPuy-de-DômeAuvergne-Rhône-AlpesASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...SANTÉ3.09314945.761807https://www.facebook.com/Elat-Meyong-108297724...https://www.helloasso.com/associations/florest...
866W632002784ASSOCIATION CAMEROUNAISE DES ETUDIANTS ET ANCI...créer des liens entre les membres ; contribuer...4RUEGerbert63000.0Clermont-FerrandNaN4 RUE Gerbert 63000 Clermont-FerrandPuy-de-DômeAuvergne-Rhône-AlpesÉDUCATION FORMATIONÉDUCATION FORMATION3.07911045.769604https://www.facebook.com/etudiantsetanciensetu...https://www.helloasso.com/associations/ccalumn...
496W922011968CAMEROON PATRIOTIC DIASPORAmobiliser la diaspora camerounaise dans un cad...4ALLDES ERABLES92000.0NanterreNaN4 ALL DES ERABLES 92000 NanterreHauts-de-SeineÎle-de-FranceDOMAINES DIVERS/ DOMAINES DE NOMENCLATURE SITA...AUTRES2.19175648.896417https://www.facebook.com/cameroonpatriotic.dia...not found
\n", + "
" + ], + "text/plain": [ + " id titre \\\n", + "717 W952002610 BAMENDA COMMUNE INTERETS GROUPE (B.C.I.G.F.) \n", + "188 W691062833 PARTAGE AVEC PETTE SON HOPITAL ET SA REGION \n", + "865 W632002712 ELAT MEYONG (L'UNION DES PEUPLES) \n", + "866 W632002784 ASSOCIATION CAMEROUNAISE DES ETUDIANTS ET ANCI... \n", + "496 W922011968 CAMEROON PATRIOTIC DIASPORA \n", + "\n", + " objet adrs_numvoie \\\n", + "717 faciliter la reinsertion dans le pays d'origin... 4 \n", + "188 soutien matériel et moral de l'hôpital de pett... 6 \n", + "865 promotion des actions de développement en dire... 7 \n", + "866 créer des liens entre les membres ; contribuer... 4 \n", + "496 mobiliser la diaspora camerounaise dans un cad... 4 \n", + "\n", + " adrs_typevoie adrs_libvoie adrs_codepostal \\\n", + "717 ALL Raguenet 95210.0 \n", + "188 RUE des Farges Via Romana 69005.0 \n", + "865 BD Winston Churchill 63000.0 \n", + "866 RUE Gerbert 63000.0 \n", + "496 ALL DES ERABLES 92000.0 \n", + "\n", + " adrs_libcommune siteweb \\\n", + "717 Saint-Gratien NaN \n", + "188 Lyon 5e Arrondissement NaN \n", + "865 Clermont-Ferrand NaN \n", + "866 Clermont-Ferrand NaN \n", + "496 Nanterre NaN \n", + "\n", + " adrs dept \\\n", + "717 4 ALL Raguenet 95210 Saint-Gratien Val-d'Oise \n", + "188 6 RUE des Farges Via Romana 69005 Lyon 5e Arro... Rhône \n", + "865 7 BD Winston Churchill 63000 Clermont-Ferrand Puy-de-Dôme \n", + "866 4 RUE Gerbert 63000 Clermont-Ferrand Puy-de-Dôme \n", + "496 4 ALL DES ERABLES 92000 Nanterre Hauts-de-Seine \n", + "\n", + " region social_object1_libelle \\\n", + "717 Île-de-France ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", + "188 Auvergne-Rhône-Alpes SANTÉ \n", + "865 Auvergne-Rhône-Alpes ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", + "866 Auvergne-Rhône-Alpes ÉDUCATION FORMATION \n", + "496 Île-de-France DOMAINES DIVERS/ DOMAINES DE NOMENCLATURE SITA... \n", + "\n", + " social_object2_libelle longitude latitude \\\n", + "717 AUTRES 2.408134 49.965700 \n", + "188 SANTÉ 4.828436 45.763571 \n", + "865 SANTÉ 3.093149 45.761807 \n", + "866 ÉDUCATION FORMATION 3.079110 45.769604 \n", + "496 AUTRES 2.191756 48.896417 \n", + "\n", + " facebook_url \\\n", + "717 https://www.facebook.com/AllianceFrancaiseBame... \n", + "188 https://www.facebook.com/Hopital-Pett%C3%A9-10... \n", + "865 https://www.facebook.com/Elat-Meyong-108297724... \n", + "866 https://www.facebook.com/etudiantsetanciensetu... \n", + "496 https://www.facebook.com/cameroonpatriotic.dia... \n", + "\n", + " helloasso_url \n", + "717 https://www.helloasso.com/associations/l-as-de... \n", + "188 https://www.helloasso.com/associations/collect... \n", + "865 https://www.helloasso.com/associations/florest... \n", + "866 https://www.helloasso.com/associations/ccalumn... \n", + "496 not found " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sample(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remove unecessary spaces" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "text_columns = [\n", + " \"titre\", \"objet\", \"social_object1_libelle\", \"social_object2_libelle\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def remove_space_at_the_end(x: str):\n", + " if x is not None:\n", + " return x.strip()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "for column in text_columns:\n", + " data[column] = data[column].apply(remove_space_at_the_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitreobjetadrs_numvoieadrs_typevoieadrs_libvoieadrs_codepostaladrs_libcommunesitewebadrsdeptregionsocial_object1_libellesocial_object2_libellelongitudelatitudefacebook_urlhelloasso_url
797W452009254UN ESPOIR POUR LES ENFANTS D'EBOJEvenir en aide materiellement, sanitairement et...10ALLDE REUILLY45430.0ChécyNaN10 ALL DE REUILLY 45430 ChécyLoiretCentre-Val de LoireASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES2.03907347.894688https://fr-fr.facebook.com/espoirpour1enfant/https://www.helloasso.com/associations/espoir-...
10W751159475AKIBApromouvoir les échanges artistiques entre la r...56RUEdes Maraîchers75020.0ParisNaN56 RUE des Maraîchers 75020 ParisParisÎle-de-FranceCULTURE/ PRATIQUES D'ACTIVITÉS ARTISTIQUES/ PR...ACTION SOCIOCULTURELLE2.40690548.852059https://fr-fr.facebook.com/AKIBA-STATION-32461...https://www.helloasso.com/associations/akiba-a...
908W715000341AMITIE-SOLIDARITE ET COOPERATION FRANCE-CAMEROUNdévelopper et intensifier la collaboration ent...NaNRUEFavier71960.0IgéNaNRUE Favier 71960 IgéSaône-et-LoireBourgogne-Franche-ComtéASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES4.73906246.398325https://fr-fr.facebook.com/amicoopfchttps://www.helloasso.com/associations/amitie-...
262W332008081LES VEILLEUSES CAMEROUNAISESrassembler les femmes camerounaises et de tout...NaNNaNappartement 162033400.0TalenceNaNappartement 1620 33400 TalenceGirondeNouvelle-AquitaineAMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT...AUTRES-0.58796344.808844https://fr-fr.facebook.com/LesVeilleuses/https://www.helloasso.com/associations/associa...
439W771012288ASSOCIATION PREMIER SECOURS AUX SOINS D'URGENCEl'association est d'oeuvrer dans l'humanitaire...63RUEdu Manoir77700.0Magny-le-HongreNaN63 RUE du Manoir 77700 Magny-le-HongreSeine-et-MarneÎle-de-FranceAMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT...AUTRES2.81846148.869894https://www.facebook.com/Premiers-Soins-et-Sec...https://www.helloasso.com/associations/associa...
\n", + "
" + ], + "text/plain": [ + " id titre \\\n", + "797 W452009254 UN ESPOIR POUR LES ENFANTS D'EBOJE \n", + "10 W751159475 AKIBA \n", + "908 W715000341 AMITIE-SOLIDARITE ET COOPERATION FRANCE-CAMEROUN \n", + "262 W332008081 LES VEILLEUSES CAMEROUNAISES \n", + "439 W771012288 ASSOCIATION PREMIER SECOURS AUX SOINS D'URGENCE \n", + "\n", + " objet adrs_numvoie \\\n", + "797 venir en aide materiellement, sanitairement et... 10 \n", + "10 promouvoir les échanges artistiques entre la r... 56 \n", + "908 développer et intensifier la collaboration ent... NaN \n", + "262 rassembler les femmes camerounaises et de tout... NaN \n", + "439 l'association est d'oeuvrer dans l'humanitaire... 63 \n", + "\n", + " adrs_typevoie adrs_libvoie adrs_codepostal adrs_libcommune siteweb \\\n", + "797 ALL DE REUILLY 45430.0 Chécy NaN \n", + "10 RUE des Maraîchers 75020.0 Paris NaN \n", + "908 RUE Favier 71960.0 Igé NaN \n", + "262 NaN appartement 1620 33400.0 Talence NaN \n", + "439 RUE du Manoir 77700.0 Magny-le-Hongre NaN \n", + "\n", + " adrs dept \\\n", + "797 10 ALL DE REUILLY 45430 Chécy Loiret \n", + "10 56 RUE des Maraîchers 75020 Paris Paris \n", + "908 RUE Favier 71960 Igé Saône-et-Loire \n", + "262 appartement 1620 33400 Talence Gironde \n", + "439 63 RUE du Manoir 77700 Magny-le-Hongre Seine-et-Marne \n", + "\n", + " region \\\n", + "797 Centre-Val de Loire \n", + "10 Île-de-France \n", + "908 Bourgogne-Franche-Comté \n", + "262 Nouvelle-Aquitaine \n", + "439 Île-de-France \n", + "\n", + " social_object1_libelle \\\n", + "797 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", + "10 CULTURE/ PRATIQUES D'ACTIVITÉS ARTISTIQUES/ PR... \n", + "908 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", + "262 AMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT... \n", + "439 AMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT... \n", + "\n", + " social_object2_libelle longitude latitude \\\n", + "797 AUTRES 2.039073 47.894688 \n", + "10 ACTION SOCIOCULTURELLE 2.406905 48.852059 \n", + "908 AUTRES 4.739062 46.398325 \n", + "262 AUTRES -0.587963 44.808844 \n", + "439 AUTRES 2.818461 48.869894 \n", + "\n", + " facebook_url \\\n", + "797 https://fr-fr.facebook.com/espoirpour1enfant/ \n", + "10 https://fr-fr.facebook.com/AKIBA-STATION-32461... \n", + "908 https://fr-fr.facebook.com/amicoopfc \n", + "262 https://fr-fr.facebook.com/LesVeilleuses/ \n", + "439 https://www.facebook.com/Premiers-Soins-et-Sec... \n", + "\n", + " helloasso_url \n", + "797 https://www.helloasso.com/associations/espoir-... \n", + "10 https://www.helloasso.com/associations/akiba-a... \n", + "908 https://www.helloasso.com/associations/amitie-... \n", + "262 https://www.helloasso.com/associations/associa... \n", + "439 https://www.helloasso.com/associations/associa... " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sample(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remove extra \"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitreobjetadrs_numvoieadrs_typevoieadrs_libvoieadrs_codepostaladrs_libcommunesitewebadrsdeptregionsocial_object1_libellesocial_object2_libellelongitudelatitudefacebook_urlhelloasso_url
69W751228313PARIS-KRIBI\"association dite \"\"paris - kribi\"\" fondée le ...sensibiliser les enfants et les jeunes à la p...020015000000NaNRUENaN8 rue des Haies 75013 Paris 13eGirondeNouvelle-AquitaineASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES1.66730550.272874https://www.facebook.com/ParisKribiBroacante/https://www.helloasso.com/associations/kri-gou...
\n", + "
" + ], + "text/plain": [ + " id titre \\\n", + "69 W751228313 PARIS-KRIBI \n", + "\n", + " objet \\\n", + "69 \"association dite \"\"paris - kribi\"\" fondée le ... \n", + "\n", + " adrs_numvoie adrs_typevoie \\\n", + "69 sensibiliser les enfants et les jeunes à la p... 020015 \n", + "\n", + " adrs_libvoie adrs_codepostal adrs_libcommune siteweb \\\n", + "69 000000 NaN RUE NaN \n", + "\n", + " adrs dept region \\\n", + "69 8 rue des Haies 75013 Paris 13e Gironde Nouvelle-Aquitaine \n", + "\n", + " social_object1_libelle social_object2_libelle \\\n", + "69 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... AUTRES \n", + "\n", + " longitude latitude facebook_url \\\n", + "69 1.667305 50.272874 https://www.facebook.com/ParisKribiBroacante/ \n", + "\n", + " helloasso_url \n", + "69 https://www.helloasso.com/associations/kri-gou... " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data[\"id\"] == \"W751228313\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['\"association dite \"\"paris - kribi\"\" fondée le 10 janvier 2015 a pour but, en france, au cameroun et dans le reste du monde, de promouvoir le développement social des enfants camerounais par l\\'éducation, la formation, la culture, le sport, l\\'accès à des sanitaires, l\\'insertion sociale et la médiation culturelle'],\n", + " dtype=object)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data[\"id\"] == \"W751228313\"][\"objet\"].values" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "def replace_double_quote(x: str):\n", + " if x is not None:\n", + " return x.replace(\"\\\"\\\"\", \"'\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "data[\"objet\"] = data[\"objet\"].apply(replace_double_quote)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"association dite 'paris - kribi' fondée le 10 janvier 2015 a pour but, en france, au cameroun et dans le reste du monde, de promouvoir le développement social des enfants camerounais par l'éducation, la formation, la culture, le sport, l'accès à des sanitaires, l'insertion sociale et la médiation culturelle\n" + ] + } + ], + "source": [ + "print(data[data[\"id\"] == \"W751228313\"][\"objet\"].values[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save without index" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "filename = 'ref-rna-real-mars-2022-enriched-not-qualified-process'\n", + "compression_options = dict(method='zip', archive_name=f'{filename}.csv')\n", + "data.to_csv(f'./{filename}.zip', compression=compression_options, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.7 64-bit ('3.9.7')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "53aeb7dd7f44d25b714b697de6bfda6330a126948f4d762b521a64be9dc2d11e" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/etl/filter-cameroon.ipynb b/etl/filter-cameroon.ipynb index 53bbbd2..1bfefb0 100644 --- a/etl/filter-cameroon.ipynb +++ b/etl/filter-cameroon.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "headed-carnival", "metadata": { "collapsed": false, @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "charged-fairy", "metadata": { "collapsed": false, @@ -33,81 +33,81 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (1,3,18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (1,3,18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (2,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (2,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (1,3,18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (1,3,18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (22) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (22) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (1,2,3,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (1,2,3,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,18) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,18) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (15,18) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (15,18) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (1,3,18) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (1,3,18) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (1,18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (1,18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (18,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (3,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (3,22) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n", - "/tmp/ipykernel_17912/2272544456.py:4: DtypeWarning: Columns (1,3,18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_14988/2272544456.py:4: DtypeWarning: Columns (1,3,18,22,30) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.concat((pd.read_csv(f, delimiter=\";\", header=0, encoding=\"ISO-8859-1\") for f in all_files), ignore_index=True)\n" ] } @@ -121,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "muslim-directive", "metadata": { "collapsed": false, @@ -145,7 +145,7 @@ " dtype='object')" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -156,293 +156,33 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "id": "local-stanley", "metadata": { "tags": [] }, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtitreobjetobjet_social1objet_social2adrs_numvoieadrs_typevoieadrs_libvoieadrs_codepostaladrs_libcommunesitewebadrs
5359W012002482TRAIT D'UNION FRANCO CAMEROUNAISfavoriser le développement économique, culture...20005062RUEde la Ramassière1600Reyrieux62 rue de la ramassiere 1600 reyrieux
10200W012007337LA MAIN TENDUE (LMT)promotion des actions et expériences locales d...20000020 bisRUEde Lyon1800Meximieux20 bis rue de lyon 1800 meximieux
11259W012008396ASSOCIATION BIA-BIA (A.B.B.)promouvoir l'intérêt général des villages came...2000001340Marsonnas1340 marsonnas
12172W012009309SOLIDARITE BRESSANE CAMEROUNAISE (SBC)solidarité et entraide par l'assistance de ces...2000002RUELalande1000Bourg-en-Bresse2 rue lalande 1000 bourg-en-bresse
12436W012009573ASSOCIATION TREMPLINGALLE - KAMKAfaire participer l'immigration d'origine afric...2000002BDIrène Joliot Curie1000Bourg-en-Bresse2 boulevard irene joliot curie 1000 bourg-en-b...
.......................................
1834654W953011922CAMEROONIAN BUSINESS NETWORKrassembler les entrepreneurs et porteurs de pr...1404002RUEdu désert aux nuages95800Cergy2 rue du desert aux nuages 95800 cergy
1875746W9C1003716KAMER973-ASSOCIATION DES CAMEROUNAIS DE LA GUYANEpermettre aux personnes d'origine camerounaise...14040071RUELieutenant Goinet97300Cayenne71 rue lieutenant goinet 97300 cayenne
1884141W9R1005550KAMER OCEAN INDIENpromouvoir l'amitié, la solidarité et la frate...1404070756 BCHEM1Romely97419La Possession6 b chem1 romely 97419 la possession
1885143W9R1009657SOLIDARITES CAMEROUN REUNIONraffermir l'amitié, la solidarité la fraternit...9000140356RUEPastel97438Sainte-Marie6 rue pastel 97438 sainte-marie
1903427W9T1002080ASSOCIATIONS DES CAMEROUNAIS DE MAYOTTE (ACAM)rencontre des camerounais résidents ou de pass...1404008RTEde Vahibé97600Mamoudzou8 route de vahibe 97600 mamoudzou
\n", - "

2177 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " id titre \\\n", - "5359 W012002482 TRAIT D'UNION FRANCO CAMEROUNAIS \n", - "10200 W012007337 LA MAIN TENDUE (LMT) \n", - "11259 W012008396 ASSOCIATION BIA-BIA (A.B.B.) \n", - "12172 W012009309 SOLIDARITE BRESSANE CAMEROUNAISE (SBC) \n", - "12436 W012009573 ASSOCIATION TREMPLINGALLE - KAMKA \n", - "... ... ... \n", - "1834654 W953011922 CAMEROONIAN BUSINESS NETWORK \n", - "1875746 W9C1003716 KAMER973-ASSOCIATION DES CAMEROUNAIS DE LA GUYANE \n", - "1884141 W9R1005550 KAMER OCEAN INDIEN \n", - "1885143 W9R1009657 SOLIDARITES CAMEROUN REUNION \n", - "1903427 W9T1002080 ASSOCIATIONS DES CAMEROUNAIS DE MAYOTTE (ACAM) \n", - "\n", - " objet objet_social1 \\\n", - "5359 favoriser le développement économique, culture... 20005 \n", - "10200 promotion des actions et expériences locales d... 20000 \n", - "11259 promouvoir l'intérêt général des villages came... 20000 \n", - "12172 solidarité et entraide par l'assistance de ces... 20000 \n", - "12436 faire participer l'immigration d'origine afric... 20000 \n", - "... ... ... \n", - "1834654 rassembler les entrepreneurs et porteurs de pr... 14040 \n", - "1875746 permettre aux personnes d'origine camerounaise... 14040 \n", - "1884141 promouvoir l'amitié, la solidarité et la frate... 14040 \n", - "1885143 raffermir l'amitié, la solidarité la fraternit... 9000 \n", - "1903427 rencontre des camerounais résidents ou de pass... 14040 \n", - "\n", - " objet_social2 adrs_numvoie adrs_typevoie adrs_libvoie \\\n", - "5359 0 62 RUE de la Ramassière \n", - "10200 0 20 bis RUE de Lyon \n", - "11259 0 \n", - "12172 0 2 RUE Lalande \n", - "12436 0 2 BD Irène Joliot Curie \n", - "... ... ... ... ... \n", - "1834654 0 2 RUE du désert aux nuages \n", - "1875746 0 71 RUE Lieutenant Goinet \n", - "1884141 7075 6 B CHEM1 Romely \n", - "1885143 14035 6 RUE Pastel \n", - "1903427 0 8 RTE de Vahibé \n", - "\n", - " adrs_codepostal adrs_libcommune siteweb \\\n", - "5359 1600 Reyrieux \n", - "10200 1800 Meximieux \n", - "11259 1340 Marsonnas \n", - "12172 1000 Bourg-en-Bresse \n", - "12436 1000 Bourg-en-Bresse \n", - "... ... ... ... \n", - "1834654 95800 Cergy \n", - "1875746 97300 Cayenne \n", - "1884141 97419 La Possession \n", - "1885143 97438 Sainte-Marie \n", - "1903427 97600 Mamoudzou \n", - "\n", - " adrs \n", - "5359 62 rue de la ramassiere 1600 reyrieux \n", - "10200 20 bis rue de lyon 1800 meximieux \n", - "11259 1340 marsonnas \n", - "12172 2 rue lalande 1000 bourg-en-bresse \n", - "12436 2 boulevard irene joliot curie 1000 bourg-en-b... \n", - "... ... \n", - "1834654 2 rue du desert aux nuages 95800 cergy \n", - "1875746 71 rue lieutenant goinet 97300 cayenne \n", - "1884141 6 b chem1 romely 97419 la possession \n", - "1885143 6 rue pastel 97438 sainte-marie \n", - "1903427 8 route de vahibe 97600 mamoudzou \n", - "\n", - "[2177 rows x 12 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" + "ename": "ImportError", + "evalue": "libpostal.so.1: cannot open shared object file: No such file or directory", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn [11], line 41\u001b[0m\n\u001b[1;32m 38\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124madrs\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m row: expand_address(row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124madrs\u001b[39m\u001b[38;5;124m\"\u001b[39m])[\u001b[38;5;241m0\u001b[39m], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df\n\u001b[0;32m---> 41\u001b[0m df2 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mpipe(filter_cameroon) \\\n\u001b[1;32m 42\u001b[0m \u001b[38;5;241m.\u001b[39mpipe(remove_closed) \\\n\u001b[1;32m 43\u001b[0m \u001b[38;5;241m.\u001b[39mpipe(normalize) \\\n\u001b[1;32m 44\u001b[0m \u001b[38;5;241m.\u001b[39mpipe(select_relevant_columns) \\\n\u001b[1;32m 45\u001b[0m \u001b[38;5;241m.\u001b[39mpipe(add_column_adrs)\n\u001b[1;32m 47\u001b[0m df2\n", + "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.8/site-packages/pandas/core/generic.py:5839\u001b[0m, in \u001b[0;36mNDFrame.pipe\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5781\u001b[0m \u001b[38;5;129m@final\u001b[39m\n\u001b[1;32m 5782\u001b[0m \u001b[38;5;129m@doc\u001b[39m(klass\u001b[38;5;241m=\u001b[39m_shared_doc_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mklass\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 5783\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpipe\u001b[39m(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 5787\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 5788\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m 5789\u001b[0m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 5790\u001b[0m \u001b[38;5;124;03m Apply chainable functions that expect Series or DataFrames.\u001b[39;00m\n\u001b[1;32m 5791\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 5837\u001b[0m \u001b[38;5;124;03m ... ) # doctest: +SKIP\u001b[39;00m\n\u001b[1;32m 5838\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 5839\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.8/site-packages/pandas/core/common.py:503\u001b[0m, in \u001b[0;36mpipe\u001b[0;34m(obj, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 502\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 503\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn [11], line 37\u001b[0m, in \u001b[0;36madd_column_adrs\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21madd_column_adrs\u001b[39m(df):\n\u001b[1;32m 34\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124madrs\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madrs_numvoie\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmap(\u001b[38;5;28mstr\u001b[39m)\u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m+\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madrs_typevoie\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmap(\u001b[38;5;28mstr\u001b[39m)\u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m+\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madrs_libvoie\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmap(\u001b[38;5;28mstr\u001b[39m)\u001b[38;5;241m+\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m+\u001b[39m \\\n\u001b[1;32m 35\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madrs_codepostal\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmap(\u001b[38;5;28mstr\u001b[39m)\u001b[38;5;241m+\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m+\u001b[39mdf[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madrs_libcommune\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmap(\u001b[38;5;28mstr\u001b[39m)\n\u001b[0;32m---> 37\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpostal\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexpand\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m expand_address\n\u001b[1;32m 38\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124madrs\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m row: expand_address(row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124madrs\u001b[39m\u001b[38;5;124m\"\u001b[39m])[\u001b[38;5;241m0\u001b[39m], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df\n", + "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.8/site-packages/postal/expand.py:5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;03m\"\"\"Python bindings to libpostal expand_address.\"\"\"\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m__future__\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m unicode_literals\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpostal\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _expand\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpostal\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mencoding\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m safe_decode\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mexpand_address\u001b[39m(address, languages\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw):\n", + "\u001b[0;31mImportError\u001b[0m: libpostal.so.1: cannot open shared object file: No such file or directory" + ] } ], "source": [ + "def remove_space_at_the_end(x: str):\n", + " if x is not None:\n", + " return x.strip()\n", + "\n", "def filter_cameroon(df):\n", " return df[df['titre'].str.contains(\"CAMEROUN\", case=False,na=False) | df['objet'].str.contains(\"CAMEROUN\", case=False,na=False)]\n", "\n", @@ -455,7 +195,16 @@ " df['adrs_codepostal'] = df[\"adrs_codepostal\"].astype(int)\n", " df['objet_social1'] = df[\"objet_social1\"].astype(int)\n", " df['objet_social2'] = df[\"objet_social2\"].astype(int)\n", + "\n", + " text_columns = [\"titre\", \"objet\"]\n", + " for column in text_columns:\n", + " df[column] = df[column].apply(remove_space_at_the_end)\n", " df = df.fillna('') # this will avoid nan in adrs which concatenate multiple values\n", + " \n", + " def replace_double_quote(x: str):\n", + " if x is not None:\n", + " return x.replace(\"\\\"\\\"\", \"'\")\n", + " df[\"objet\"] = df[\"objet\"].apply(replace_double_quote)\n", "\n", " return df\n", "\n", @@ -481,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "alive-venue", "metadata": { "tags": [] @@ -518,364 +267,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "450186b2", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtitreobjetobjet_social1objet_social2adrs_numvoieadrs_typevoieadrs_libvoieadrs_codepostaladrs_libcommunesitewebadrsdeptregionsocial_object1_libellesocial_object2_libelle
5359W012002482TRAIT D'UNION FRANCO CAMEROUNAISfavoriser le développement économique, culture...20005062RUEde la Ramassière1600Reyrieux62 rue de la ramassiere 1600 reyrieuxEssonneÎle-de-FranceASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A...AUTRES
10200W012007337LA MAIN TENDUE (LMT)promotion des actions et expériences locales d...20000020 bisRUEde Lyon1800Meximieux20 bis rue de lyon 1800 meximieuxEssonneÎle-de-FranceASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A...AUTRES
11259W012008396ASSOCIATION BIA-BIA (A.B.B.)promouvoir l'intérêt général des villages came...2000001340Marsonnas1340 marsonnasEssonneÎle-de-FranceASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A...AUTRES
12172W012009309SOLIDARITE BRESSANE CAMEROUNAISE (SBC)solidarité et entraide par l'assistance de ces...2000002RUELalande1000Bourg-en-Bresse2 rue lalande 1000 bourg-en-bresseEssonneÎle-de-FranceASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A...AUTRES
12436W012009573ASSOCIATION TREMPLINGALLE - KAMKAfaire participer l'immigration d'origine afric...2000002BDIrène Joliot Curie1000Bourg-en-Bresse2 boulevard irene joliot curie 1000 bourg-en-b...EssonneÎle-de-FranceASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A...AUTRES
...................................................
1834654W953011922CAMEROONIAN BUSINESS NETWORKrassembler les entrepreneurs et porteurs de pr...1404002RUEdu désert aux nuages95800Cergy2 rue du desert aux nuages 95800 cergyVal-d'OiseÎle-de-FranceAMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT...AUTRES
1875746W9C1003716KAMER973-ASSOCIATION DES CAMEROUNAIS DE LA GUYANEpermettre aux personnes d'origine camerounaise...14040071RUELieutenant Goinet97300Cayenne71 rue lieutenant goinet 97300 cayenneGuyaneGuyaneAMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT...AUTRES
1884141W9R1005550KAMER OCEAN INDIENpromouvoir l'amitié, la solidarité et la frate...1404070756 BCHEM1Romely97419La Possession6 b chem1 romely 97419 la possessionRéunionRéunionAMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT...CLUBS DE LOISIRS, RELATIONS
1885143W9R1009657SOLIDARITES CAMEROUN REUNIONraffermir l'amitié, la solidarité la fraternit...9000140356RUEPastel97438Sainte-Marie6 rue pastel 97438 sainte-marieRéunionRéunionACTION SOCIOCULTURELLEAMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT...
1903427W9T1002080ASSOCIATIONS DES CAMEROUNAIS DE MAYOTTE (ACAM)rencontre des camerounais résidents ou de pass...1404008RTEde Vahibé97600Mamoudzou8 route de vahibe 97600 mamoudzouMayotteMayotteAMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT...AUTRES
\n", - "

2177 rows × 16 columns

\n", - "
" - ], - "text/plain": [ - " id titre \\\n", - "5359 W012002482 TRAIT D'UNION FRANCO CAMEROUNAIS \n", - "10200 W012007337 LA MAIN TENDUE (LMT) \n", - "11259 W012008396 ASSOCIATION BIA-BIA (A.B.B.) \n", - "12172 W012009309 SOLIDARITE BRESSANE CAMEROUNAISE (SBC) \n", - "12436 W012009573 ASSOCIATION TREMPLINGALLE - KAMKA \n", - "... ... ... \n", - "1834654 W953011922 CAMEROONIAN BUSINESS NETWORK \n", - "1875746 W9C1003716 KAMER973-ASSOCIATION DES CAMEROUNAIS DE LA GUYANE \n", - "1884141 W9R1005550 KAMER OCEAN INDIEN \n", - "1885143 W9R1009657 SOLIDARITES CAMEROUN REUNION \n", - "1903427 W9T1002080 ASSOCIATIONS DES CAMEROUNAIS DE MAYOTTE (ACAM) \n", - "\n", - " objet objet_social1 \\\n", - "5359 favoriser le développement économique, culture... 20005 \n", - "10200 promotion des actions et expériences locales d... 20000 \n", - "11259 promouvoir l'intérêt général des villages came... 20000 \n", - "12172 solidarité et entraide par l'assistance de ces... 20000 \n", - "12436 faire participer l'immigration d'origine afric... 20000 \n", - "... ... ... \n", - "1834654 rassembler les entrepreneurs et porteurs de pr... 14040 \n", - "1875746 permettre aux personnes d'origine camerounaise... 14040 \n", - "1884141 promouvoir l'amitié, la solidarité et la frate... 14040 \n", - "1885143 raffermir l'amitié, la solidarité la fraternit... 9000 \n", - "1903427 rencontre des camerounais résidents ou de pass... 14040 \n", - "\n", - " objet_social2 adrs_numvoie adrs_typevoie adrs_libvoie \\\n", - "5359 0 62 RUE de la Ramassière \n", - "10200 0 20 bis RUE de Lyon \n", - "11259 0 \n", - "12172 0 2 RUE Lalande \n", - "12436 0 2 BD Irène Joliot Curie \n", - "... ... ... ... ... \n", - "1834654 0 2 RUE du désert aux nuages \n", - "1875746 0 71 RUE Lieutenant Goinet \n", - "1884141 7075 6 B CHEM1 Romely \n", - "1885143 14035 6 RUE Pastel \n", - "1903427 0 8 RTE de Vahibé \n", - "\n", - " adrs_codepostal adrs_libcommune siteweb \\\n", - "5359 1600 Reyrieux \n", - "10200 1800 Meximieux \n", - "11259 1340 Marsonnas \n", - "12172 1000 Bourg-en-Bresse \n", - "12436 1000 Bourg-en-Bresse \n", - "... ... ... ... \n", - "1834654 95800 Cergy \n", - "1875746 97300 Cayenne \n", - "1884141 97419 La Possession \n", - "1885143 97438 Sainte-Marie \n", - "1903427 97600 Mamoudzou \n", - "\n", - " adrs dept \\\n", - "5359 62 rue de la ramassiere 1600 reyrieux Essonne \n", - "10200 20 bis rue de lyon 1800 meximieux Essonne \n", - "11259 1340 marsonnas Essonne \n", - "12172 2 rue lalande 1000 bourg-en-bresse Essonne \n", - "12436 2 boulevard irene joliot curie 1000 bourg-en-b... Essonne \n", - "... ... ... \n", - "1834654 2 rue du desert aux nuages 95800 cergy Val-d'Oise \n", - "1875746 71 rue lieutenant goinet 97300 cayenne Guyane \n", - "1884141 6 b chem1 romely 97419 la possession Réunion \n", - "1885143 6 rue pastel 97438 sainte-marie Réunion \n", - "1903427 8 route de vahibe 97600 mamoudzou Mayotte \n", - "\n", - " region social_object1_libelle \\\n", - "5359 Île-de-France ASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A... \n", - "10200 Île-de-France ASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A... \n", - "11259 Île-de-France ASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A... \n", - "12172 Île-de-France ASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A... \n", - "12436 Île-de-France ASSOCIATIONS CARITATIVES, HUMANITAIRES, AIDE A... \n", - "... ... ... \n", - "1834654 Île-de-France AMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT... \n", - "1875746 Guyane AMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT... \n", - "1884141 Réunion AMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT... \n", - "1885143 Réunion ACTION SOCIOCULTURELLE \n", - "1903427 Mayotte AMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT... \n", - "\n", - " social_object2_libelle \n", - "5359 AUTRES \n", - "10200 AUTRES \n", - "11259 AUTRES \n", - "12172 AUTRES \n", - "12436 AUTRES \n", - "... ... \n", - "1834654 AUTRES \n", - "1875746 AUTRES \n", - "1884141 CLUBS DE LOISIRS, RELATIONS \n", - "1885143 AMICALES, GROUPEMENTS AFFINITAIRES, GROUPEMENT... \n", - "1903427 AUTRES \n", - "\n", - "[2177 rows x 16 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import geocoder\n", "\n", @@ -920,413 +315,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "ade0c7df", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO: Pandarallel will run on 6 workers.\n", - "INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "33ad411fa68942a5953c6e00241435cc", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=363), Label(value='0 / 363'))), HB…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bf4f3824dbfd40a8bf43bd3e2808a3b7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=70), Label(value='0 / 70'))), HBox…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtitreobjetadrs_numvoieadrs_typevoieadrs_libvoieadrs_codepostaladrs_libcommunesitewebadrsdeptregionsocial_object1_libellesocial_object2_libellelongitudelatitude
5359W012002482TRAIT D'UNION FRANCO CAMEROUNAISfavoriser le développement économique, culture...62RUEde la Ramassière1600Reyrieux62 rue de la ramassiere 1600 reyrieuxEssonneÎle-de-FranceASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES4.82217345.936916
10200W012007337LA MAIN TENDUE (LMT)promotion des actions et expériences locales d...20 bisRUEde Lyon1800Meximieux20 bis rue de lyon 1800 meximieuxEssonneÎle-de-FranceASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES5.19129145.902716
11259W012008396ASSOCIATION BIA-BIA (A.B.B.)promouvoir l'intérêt général des villages came...1340Marsonnas1340 marsonnasEssonneÎle-de-FranceASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES5.07183546.340862
12172W012009309SOLIDARITE BRESSANE CAMEROUNAISE (SBC)solidarité et entraide par l'assistance de ces...2RUELalande1000Bourg-en-Bresse2 rue lalande 1000 bourg-en-bresseEssonneÎle-de-FranceASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES5.22335046.204509
12436W012009573ASSOCIATION TREMPLINGALLE - KAMKAfaire participer l'immigration d'origine afric...2BDIrène Joliot Curie1000Bourg-en-Bresse2 boulevard irene joliot curie 1000 bourg-en-b...EssonneÎle-de-FranceASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A...AUTRES5.23215846.209581
...................................................
1834654W953011922CAMEROONIAN BUSINESS NETWORKrassembler les entrepreneurs et porteurs de pr...2RUEdu désert aux nuages95800Cergy2 rue du desert aux nuages 95800 cergyVal-d'OiseÎle-de-FranceAMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT...AUTRES2.00857349.047606
1875746W9C1003716KAMER973-ASSOCIATION DES CAMEROUNAIS DE LA GUYANEpermettre aux personnes d'origine camerounaise...71RUELieutenant Goinet97300Cayenne71 rue lieutenant goinet 97300 cayenneGuyaneGuyaneAMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT...AUTRES-52.3276934.940521
1884141W9R1005550KAMER OCEAN INDIENpromouvoir l'amitié, la solidarité et la frate...6 BCHEM1Romely97419La Possession6 b chem1 romely 97419 la possessionRéunionRéunionAMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT...CLUBS DE LOISIRS/ RELATIONS55.336276-20.926966
1885143W9R1009657SOLIDARITES CAMEROUN REUNIONraffermir l'amitié, la solidarité la fraternit...6RUEPastel97438Sainte-Marie6 rue pastel 97438 sainte-marieRéunionRéunionACTION SOCIOCULTURELLEAMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT...55.563840-20.896432
1903427W9T1002080ASSOCIATIONS DES CAMEROUNAIS DE MAYOTTE (ACAM)rencontre des camerounais résidents ou de pass...8RTEde Vahibé97600Mamoudzou8 route de vahibe 97600 mamoudzouMayotteMayotteAMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT...AUTRES45.211314-12.801536
\n", - "

2177 rows × 16 columns

\n", - "
" - ], - "text/plain": [ - " id titre \\\n", - "5359 W012002482 TRAIT D'UNION FRANCO CAMEROUNAIS \n", - "10200 W012007337 LA MAIN TENDUE (LMT) \n", - "11259 W012008396 ASSOCIATION BIA-BIA (A.B.B.) \n", - "12172 W012009309 SOLIDARITE BRESSANE CAMEROUNAISE (SBC) \n", - "12436 W012009573 ASSOCIATION TREMPLINGALLE - KAMKA \n", - "... ... ... \n", - "1834654 W953011922 CAMEROONIAN BUSINESS NETWORK \n", - "1875746 W9C1003716 KAMER973-ASSOCIATION DES CAMEROUNAIS DE LA GUYANE \n", - "1884141 W9R1005550 KAMER OCEAN INDIEN \n", - "1885143 W9R1009657 SOLIDARITES CAMEROUN REUNION \n", - "1903427 W9T1002080 ASSOCIATIONS DES CAMEROUNAIS DE MAYOTTE (ACAM) \n", - "\n", - " objet adrs_numvoie \\\n", - "5359 favoriser le développement économique, culture... 62 \n", - "10200 promotion des actions et expériences locales d... 20 bis \n", - "11259 promouvoir l'intérêt général des villages came... \n", - "12172 solidarité et entraide par l'assistance de ces... 2 \n", - "12436 faire participer l'immigration d'origine afric... 2 \n", - "... ... ... \n", - "1834654 rassembler les entrepreneurs et porteurs de pr... 2 \n", - "1875746 permettre aux personnes d'origine camerounaise... 71 \n", - "1884141 promouvoir l'amitié, la solidarité et la frate... 6 B \n", - "1885143 raffermir l'amitié, la solidarité la fraternit... 6 \n", - "1903427 rencontre des camerounais résidents ou de pass... 8 \n", - "\n", - " adrs_typevoie adrs_libvoie adrs_codepostal adrs_libcommune \\\n", - "5359 RUE de la Ramassière 1600 Reyrieux \n", - "10200 RUE de Lyon 1800 Meximieux \n", - "11259 1340 Marsonnas \n", - "12172 RUE Lalande 1000 Bourg-en-Bresse \n", - "12436 BD Irène Joliot Curie 1000 Bourg-en-Bresse \n", - "... ... ... ... ... \n", - "1834654 RUE du désert aux nuages 95800 Cergy \n", - "1875746 RUE Lieutenant Goinet 97300 Cayenne \n", - "1884141 CHEM1 Romely 97419 La Possession \n", - "1885143 RUE Pastel 97438 Sainte-Marie \n", - "1903427 RTE de Vahibé 97600 Mamoudzou \n", - "\n", - " siteweb adrs \\\n", - "5359 62 rue de la ramassiere 1600 reyrieux \n", - "10200 20 bis rue de lyon 1800 meximieux \n", - "11259 1340 marsonnas \n", - "12172 2 rue lalande 1000 bourg-en-bresse \n", - "12436 2 boulevard irene joliot curie 1000 bourg-en-b... \n", - "... ... ... \n", - "1834654 2 rue du desert aux nuages 95800 cergy \n", - "1875746 71 rue lieutenant goinet 97300 cayenne \n", - "1884141 6 b chem1 romely 97419 la possession \n", - "1885143 6 rue pastel 97438 sainte-marie \n", - "1903427 8 route de vahibe 97600 mamoudzou \n", - "\n", - " dept region \\\n", - "5359 Essonne Île-de-France \n", - "10200 Essonne Île-de-France \n", - "11259 Essonne Île-de-France \n", - "12172 Essonne Île-de-France \n", - "12436 Essonne Île-de-France \n", - "... ... ... \n", - "1834654 Val-d'Oise Île-de-France \n", - "1875746 Guyane Guyane \n", - "1884141 Réunion Réunion \n", - "1885143 Réunion Réunion \n", - "1903427 Mayotte Mayotte \n", - "\n", - " social_object1_libelle \\\n", - "5359 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", - "10200 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", - "11259 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", - "12172 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", - "12436 ASSOCIATIONS CARITATIVES/ HUMANITAIRES/ AIDE A... \n", - "... ... \n", - "1834654 AMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT... \n", - "1875746 AMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT... \n", - "1884141 AMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT... \n", - "1885143 ACTION SOCIOCULTURELLE \n", - "1903427 AMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT... \n", - "\n", - " social_object2_libelle longitude \\\n", - "5359 AUTRES 4.822173 \n", - "10200 AUTRES 5.191291 \n", - "11259 AUTRES 5.071835 \n", - "12172 AUTRES 5.223350 \n", - "12436 AUTRES 5.232158 \n", - "... ... ... \n", - "1834654 AUTRES 2.008573 \n", - "1875746 AUTRES -52.327693 \n", - "1884141 CLUBS DE LOISIRS/ RELATIONS 55.336276 \n", - "1885143 AMICALES/ GROUPEMENTS AFFINITAIRES/ GROUPEMENT... 55.563840 \n", - "1903427 AUTRES 45.211314 \n", - "\n", - " latitude \n", - "5359 45.936916 \n", - "10200 45.902716 \n", - "11259 46.340862 \n", - "12172 46.204509 \n", - "12436 46.209581 \n", - "... ... \n", - "1834654 49.047606 \n", - "1875746 4.940521 \n", - "1884141 -20.926966 \n", - "1885143 -20.896432 \n", - "1903427 -12.801536 \n", - "\n", - "[2177 rows x 16 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from geopy.geocoders import Nominatim\n", "from pandarallel import pandarallel\n", @@ -1361,9 +353,12 @@ "def format_libelle_for_gogocarto(df):\n", " # Gogocarto lit une liste de catégories sur un champ défini et considère la virgule comme le caractère de séparation\n", " # On a donc opté pour remplacer la virgule(\",\") par le slash(\"/\")\n", + " \n", " df[\"social_object1_libelle\"] = df[\"social_object1_libelle\"].apply(lambda x: x.replace(\",\",\"/\"))\n", " df[\"social_object2_libelle\"] = df[\"social_object2_libelle\"].apply(lambda x: x.replace(\",\",\"/\"))\n", - " \n", + " text_columns = [\"social_object1_libelle\", \"social_object2_libelle\"]\n", + " for column in text_columns:\n", + " df[column] = df[column].apply(remove_space_at_the_end)\n", " return df\n", "\n", "df2 = df2.pipe(add_lat_lon) \\\n", @@ -1373,7 +368,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "federal-third", "metadata": { "collapsed": false, @@ -1389,9 +384,9 @@ ], "metadata": { "kernelspec": { - "display_name": "etl", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "etl" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1403,11 +398,16 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.8.13 (default, Oct 4 2022, 14:00:32) \n[GCC 9.4.0]" }, "toc-autonumbering": true, "toc-showcode": false, - "toc-showmarkdowntxt": false + "toc-showmarkdowntxt": false, + "vscode": { + "interpreter": { + "hash": "110cc1dee26208153f2972f08a2ad52b6a56238dc66d48e87fb757ef2996db56" + } + } }, "nbformat": 4, "nbformat_minor": 5