diff --git a/pynsee/download/download_file.py b/pynsee/download/download_file.py index 51164f0d..28cab40c 100644 --- a/pynsee/download/download_file.py +++ b/pynsee/download/download_file.py @@ -9,7 +9,7 @@ @save_df(day_lapse_max=90) -def download_file(id, variables=None, update=False, silent=False): +def download_file(id, variables=None, update=False, silent=True): """User level function to download files from insee.fr Args: diff --git a/pynsee/geodata/_get_geodata.py b/pynsee/geodata/_get_geodata.py index 6821afb0..06869ab2 100644 --- a/pynsee/geodata/_get_geodata.py +++ b/pynsee/geodata/_get_geodata.py @@ -19,15 +19,24 @@ from pynsee.geodata._geojson_parser import _geojson_parser from pynsee.utils.save_df import save_df -from pynsee.utils.requests_params import _get_requests_headers, _get_requests_proxies +from pynsee.utils.requests_params import ( + _get_requests_headers, + _get_requests_proxies, +) import logging logger = logging.getLogger(__name__) + @save_df(day_lapse_max=90) def _get_geodata( - id, polygon=None, update=False, silent=False, crs="EPSG:3857", crsPolygon="EPSG:4326" + id, + polygon=None, + update=False, + silent=True, + crs="EPSG:3857", + crsPolygon="EPSG:4326", ): """Get geographical data with identifier and from IGN API @@ -60,7 +69,7 @@ def _get_geodata( Version = "2.0.0" # make the query link for ign - #geoportail = "https://wxs.ign.fr/{}/geoportail".format(topic) + # geoportail = "https://wxs.ign.fr/{}/geoportail".format(topic) geoportail = f"https://data.geopf.fr/{service.lower()}/ows?" Service = "SERVICE=" + service + "&" version = "VERSION=" + Version + "&" @@ -70,7 +79,7 @@ def _get_geodata( link0 = ( geoportail - #+ "/wfs?" + # + "/wfs?" + Service + version + request diff --git a/pynsee/geodata/get_geodata_list.py b/pynsee/geodata/get_geodata_list.py index ee6285ca..0fe56eb9 100644 --- a/pynsee/geodata/get_geodata_list.py +++ b/pynsee/geodata/get_geodata_list.py @@ -7,8 +7,9 @@ from pynsee.utils.save_df import save_df + @save_df(day_lapse_max=90) -def get_geodata_list(update=False, silent=False): +def get_geodata_list(update=False, silent=True): """Get a list of geographical limits of French administrative areas from IGN API Args: @@ -44,7 +45,9 @@ def get_geodata_list(update=False, silent=False): list_first_col = [ col for col in data_full_list.columns if col in list_var ] - list_other_col = [col for col in data_full_list.columns if col not in list_first_col] + list_other_col = [ + col for col in data_full_list.columns if col not in list_first_col + ] data_list = data_full_list[list_first_col + list_other_col] data_list = data_list.drop_duplicates().reset_index(drop=True) diff --git a/pynsee/localdata/get_area_list.py b/pynsee/localdata/get_area_list.py index 60c292e7..6db1704f 100644 --- a/pynsee/localdata/get_area_list.py +++ b/pynsee/localdata/get_area_list.py @@ -8,8 +8,9 @@ from pynsee.utils._paste import _paste from pynsee.utils.save_df import save_df + @save_df(day_lapse_max=90) -def get_area_list(area=None, date=None, update=False, silent=False): +def get_area_list(area=None, date=None, update=False, silent=True): """Get an exhaustive list of administrative areas : communes, departments, and urban, employment or functional areas Args: diff --git a/pynsee/localdata/get_area_projection.py b/pynsee/localdata/get_area_projection.py index a76685df..dcd94e06 100644 --- a/pynsee/localdata/get_area_projection.py +++ b/pynsee/localdata/get_area_projection.py @@ -10,6 +10,8 @@ import logging logger = logging.getLogger(__name__) + + @lru_cache(maxsize=None) def _warning_get_area_projection(): logger.info( @@ -20,7 +22,11 @@ def _warning_get_area_projection(): @save_df(day_lapse_max=90) def get_area_projection( - area: str, code: str, date: str, dateProjection: str = None + area: str, + code: str, + date: str, + dateProjection: str = None, + silent: bool = True, ): """ Get data about the area (valid at given `date` datetime) projected @@ -44,6 +50,8 @@ def get_area_projection( is supposed to be the current date (ie projection into today's value) + silent (bool, optional): Set to True, to disable messages printed in log info + Examples: >>> from pynsee.localdata import get_area_projection >>> df = get_area_projection( diff --git a/pynsee/localdata/get_ascending_area.py b/pynsee/localdata/get_ascending_area.py index 9fb9ed04..db711915 100644 --- a/pynsee/localdata/get_ascending_area.py +++ b/pynsee/localdata/get_ascending_area.py @@ -15,6 +15,7 @@ logger = logging.getLogger(__name__) + @save_df(day_lapse_max=90) def get_ascending_area( area: str, @@ -22,7 +23,7 @@ def get_ascending_area( date: str = None, type: str = None, update: bool = False, - silent: bool = False + silent: bool = True, ): """ Get information about areas containing a given area @@ -39,7 +40,7 @@ def get_ascending_area( update (bool): locally saved data is used by default. Trigger an update with update=True. silent (bool, optional): Set to True, to disable messages printed in log info - + Examples: >>> from pynsee.localdata import get_ascending_area >>> df = get_ascending_area("commune", code='59350', date='2018-01-01') @@ -62,7 +63,7 @@ def get_ascending_area( params_hash = ["get_ascending_area", area, code, date, type] params_hash = [x if x else "_" for x in params_hash] - + INSEE_localdata_api_link = "https://api.insee.fr/metadonnees/V1/geo/" api_link = INSEE_localdata_api_link + area + f"/{code}/ascendants?" @@ -75,10 +76,8 @@ def get_ascending_area( api_link = api_link + "&".join(params) - request = _request_insee( - api_url=api_link, file_format="application/json" - ) - + request = _request_insee(api_url=api_link, file_format="application/json") + try: data = request.json() @@ -92,6 +91,6 @@ def get_ascending_area( except Exception: logger.error("No data found !") - data_final = pd.DataFrame() + data_final = pd.DataFrame() return data_final diff --git a/pynsee/localdata/get_descending_area.py b/pynsee/localdata/get_descending_area.py index 21d634e4..782b81a8 100644 --- a/pynsee/localdata/get_descending_area.py +++ b/pynsee/localdata/get_descending_area.py @@ -12,8 +12,10 @@ from pynsee.utils.save_df import save_df import logging + logger = logging.getLogger(__name__) + @save_df(day_lapse_max=90) def get_descending_area( area: str, @@ -21,6 +23,7 @@ def get_descending_area( date: str = None, type: str = None, update: bool = False, + silent: bool = True, ): """ Get information about areas contained in a given area @@ -36,6 +39,8 @@ def get_descending_area( update (bool): locally saved data is used by default. Trigger an update with update=True. + silent (bool, optional): Set to True, to disable messages printed in log info + Examples: >>> from pynsee.localdata import get_area_descending >>> df = get_descending_area("commune", code='59350', date='2018-01-01') @@ -59,7 +64,7 @@ def get_descending_area( params_hash = ["get_descending_area", area, code, date, type] params_hash = [x if x else "_" for x in params_hash] - + INSEE_localdata_api_link = "https://api.insee.fr/metadonnees/V1/geo/" api_link = INSEE_localdata_api_link + area + f"/{code}/descendants?" @@ -72,9 +77,7 @@ def get_descending_area( api_link = api_link + "&".join(params) - request = _request_insee( - api_url=api_link, file_format="application/json" - ) + request = _request_insee(api_url=api_link, file_format="application/json") try: data = request.json() @@ -89,6 +92,6 @@ def get_descending_area( except Exception: logger.error("No data found !") - data_final = pd.DataFrame() + data_final = pd.DataFrame() return data_final diff --git a/pynsee/localdata/get_geo_list.py b/pynsee/localdata/get_geo_list.py index 6f6e8ba1..fbed1025 100644 --- a/pynsee/localdata/get_geo_list.py +++ b/pynsee/localdata/get_geo_list.py @@ -13,8 +13,9 @@ from pynsee.utils.save_df import save_df + @save_df(day_lapse_max=90) -def get_geo_list(geo=None, date=None, update=False, silent=False): +def get_geo_list(geo=None, date=None, update=False, silent=True): """Get a list of French geographic areas (communes, departements, regions ...) Args: @@ -179,16 +180,12 @@ def get_geo_list(geo=None, date=None, update=False, silent=False): for i in range(len(data_all.index)): if pd.isna(data_all.loc[i, "TITLE_DEP1"]): - data_all.loc[i, "CODE_DEP"] = data_all.loc[ - i, "code_dep2" - ] + data_all.loc[i, "CODE_DEP"] = data_all.loc[i, "code_dep2"] data_all.loc[i, "TITLE_DEP"] = data_all.loc[ i, "TITLE_DEP2" ] else: - data_all.loc[i, "CODE_DEP"] = data_all.loc[ - i, "code_dep1" - ] + data_all.loc[i, "CODE_DEP"] = data_all.loc[i, "code_dep1"] data_all.loc[i, "TITLE_DEP"] = data_all.loc[ i, "TITLE_DEP1" ] diff --git a/pynsee/localdata/get_local_data.py b/pynsee/localdata/get_local_data.py index c61f301a..546c8663 100644 --- a/pynsee/localdata/get_local_data.py +++ b/pynsee/localdata/get_local_data.py @@ -10,14 +10,18 @@ import sys import datetime -from pynsee.localdata._find_latest_local_dataset import _find_latest_local_dataset +from pynsee.localdata._find_latest_local_dataset import ( + _find_latest_local_dataset, +) from pynsee.localdata._get_insee_local_onegeo import _get_insee_local_onegeo from pynsee.localdata.get_geo_list import get_geo_list from pynsee.utils.save_df import save_df import logging + logger = logging.getLogger(__name__) + @lru_cache(maxsize=None) def _warning_nivgeo(nivgeo): if nivgeo == "DEP": @@ -29,9 +33,15 @@ def _warning_nivgeo(nivgeo): elif nivgeo == "FE": logger.info("By default, the query is on all France territory") + @save_df(day_lapse_max=90) def get_local_data( - variables, dataset_version, nivgeo="FE", geocodes=["1"], update=False, silent=False + variables, + dataset_version, + nivgeo="FE", + geocodes=["1"], + update=False, + silent=True, ): """Get INSEE local numeric data @@ -47,7 +57,7 @@ def get_local_data( update (bool): data is saved locally, set update=True to trigger an update silent (bool, optional): Set to True, to disable messages printed in log info - + Raises: ValueError: Error if geocodes is not a list @@ -74,7 +84,7 @@ def get_local_data( if isinstance(geocodes, pd.core.series.Series): geocodes = geocodes.to_list() - + if type(geocodes) == str: geocodes = [geocodes] @@ -94,37 +104,41 @@ def get_local_data( _warning_nivgeo(_warning_nivgeo) elif nivgeo != "METRODOM": logger.warning("Please provide a list with geocodes argument !") - + # # LATEST AVAILABLE DATASET OPTION # - - pattern = re.compile('.*latest$') + + pattern = re.compile(".*latest$") if pattern.match(dataset_version): - - dataset_version = _find_latest_local_dataset(dataset_version, variables, nivgeo, geocodes[0], update) - - list_data_all = [] - + + dataset_version = _find_latest_local_dataset( + dataset_version, variables, nivgeo, geocodes[0], update + ) + + list_data_all = [] + for cdg in trange(len(geocodes), desc="Getting data"): - + codegeo = geocodes[cdg] - df_default = pd.DataFrame({"CODEGEO": codegeo, "OBS_VALUE": np.nan}, index=[0]) - + df_default = pd.DataFrame( + {"CODEGEO": codegeo, "OBS_VALUE": np.nan}, index=[0] + ) + try: df = _get_insee_local_onegeo( variables, dataset_version, nivgeo, codegeo ) - + except Exception as e: df = df_default list_data_all.append(df) data_final = pd.concat(list_data_all).reset_index(drop=True) - + if data_final.equals(df_default): - logger.error("Error or no data found !") - + logger.error("Error or no data found !") + return data_final diff --git a/pynsee/macrodata/_get_dataset_list_internal.py b/pynsee/macrodata/_get_dataset_list_internal.py index 997a919f..91305804 100644 --- a/pynsee/macrodata/_get_dataset_list_internal.py +++ b/pynsee/macrodata/_get_dataset_list_internal.py @@ -7,7 +7,10 @@ @save_df(day_lapse_max=90) -def _get_dataset_list_internal(): +def _get_dataset_list_internal(silent=True): + """ + silent (bool, optional): Set to True, to disable messages printed in log info + """ zip_file = pkg_resources.resource_stream( __name__, "data/dataset_list_internal.zip" diff --git a/pynsee/macrodata/_get_dataset_metadata_core.py b/pynsee/macrodata/_get_dataset_metadata_core.py index f6343f32..d06508a4 100644 --- a/pynsee/macrodata/_get_dataset_metadata_core.py +++ b/pynsee/macrodata/_get_dataset_metadata_core.py @@ -8,6 +8,7 @@ from pynsee.macrodata._get_dimension_values import _get_dimension_values from pynsee.utils.save_df import save_df + @save_df(day_lapse_max=90) def _get_dataset_metadata_core(dataset, update=False, silent=True): @@ -15,12 +16,12 @@ def _get_dataset_metadata_core(dataset, update=False, silent=True): # test1 = _get_dataset_metadata_core('IPC-2015', update=True) # test2 = _get_dataset_metadata_core('IRL', update=True) - idbank_list = _download_idbank_list(update=update, silent=True) + idbank_list = _download_idbank_list(update=update, silent=silent) # get dataset's dimensions - dataset_dimension = _get_dataset_dimension(dataset, update=update, silent=True).reset_index( - drop=True - ) + dataset_dimension = _get_dataset_dimension( + dataset, update=update, silent=silent + ).reset_index(drop=True) # select only the idbanks corresponding to the dataset idbank_list_dataset = idbank_list[idbank_list["nomflow"] == dataset] @@ -34,11 +35,14 @@ def _get_dataset_metadata_core(dataset, update=False, silent=True): # subset new columns in case of mismatch between idbank list and insee metadata new_columns = [new_columns[c] for c in range(len(df_cleflow_splitted[0]))] - df_cleflow_splitted = pd.DataFrame(df_cleflow_splitted, columns=new_columns) + df_cleflow_splitted = pd.DataFrame( + df_cleflow_splitted, columns=new_columns + ) # join the splitted cleflow dataframe with the former idbank list idbank_list_dataset = pd.concat( - [idbank_list_dataset.reset_index(drop=True), df_cleflow_splitted], axis=1 + [idbank_list_dataset.reset_index(drop=True), df_cleflow_splitted], + axis=1, ) n_dimensions = len(dataset_dimension.index) @@ -49,13 +53,19 @@ def _get_dataset_metadata_core(dataset, update=False, silent=True): dim_local_rep = dataset_dimension["local_representation"].iloc[irow] # get dimension values # - dim_values = _get_dimension_values(dim_local_rep, update=update, silent=True) + dim_values = _get_dimension_values( + dim_local_rep, update=update, silent=silent + ) # drop dimension label dim_values = dim_values[dim_values["id"] != dim_local_rep] # rename columns - dim_values.columns = [dim_id, dim_id + "_label_fr", dim_id + "_label_en"] + dim_values.columns = [ + dim_id, + dim_id + "_label_fr", + dim_id + "_label_en", + ] if dim_id in idbank_list_dataset.columns: idbank_list_dataset = idbank_list_dataset.merge( diff --git a/pynsee/macrodata/_load_dataset_data.py b/pynsee/macrodata/_load_dataset_data.py index 81213cb0..9457da45 100644 --- a/pynsee/macrodata/_load_dataset_data.py +++ b/pynsee/macrodata/_load_dataset_data.py @@ -6,15 +6,16 @@ from pynsee.macrodata._get_dataset_metadata import _get_dataset_metadata from pynsee.utils.save_df import save_df + @save_df(day_lapse_max=90) def _load_dataset_data(update=False, silent=True): - - list_dataset = list(get_dataset_list(silent=True).id.unique()) + + list_dataset = list(get_dataset_list(silent=silent).id.unique()) list_metadata = [] - for dt in trange(len(list_dataset), desc='Metadata download'): + for dt in trange(len(list_dataset), desc="Metadata download"): dataset = list_dataset[dt] - metadata = _get_dataset_metadata(dataset, silent=True) - list_metadata += [metadata] - + metadata = _get_dataset_metadata(dataset, silent=silent) + list_metadata += [metadata] + return pd.concat(list_metadata) diff --git a/pynsee/macrodata/get_dataset.py b/pynsee/macrodata/get_dataset.py index 88a0ad16..6bc1bcea 100644 --- a/pynsee/macrodata/get_dataset.py +++ b/pynsee/macrodata/get_dataset.py @@ -9,11 +9,12 @@ from pynsee.utils._paste import _paste from pynsee.utils.save_df import save_df + @save_df(day_lapse_max=30) def get_dataset( dataset, update=False, - silent=False, + silent=True, metadata=True, filter=None, startPeriod=None, @@ -30,6 +31,8 @@ def get_dataset( update (bool, optional): Set to True, to update manually the data stored locally on the computer. Defaults to False. + silent (bool, optional): Set to True, to disable messages printed in log info + metadata (bool, optional): If True, some metadata is added to the data filter (str, optional): Use the filter to choose only some values in a dimension. @@ -62,8 +65,8 @@ def get_dataset( >>> # >>> business_climate = get_dataset("CLIMAT-AFFAIRES", lastNObservations = 1) """ - - insee_dataset = get_dataset_list(silent=True) + + insee_dataset = get_dataset_list(silent=silent) insee_dataset_list = insee_dataset["id"].to_list() # check if the dataset exists in INSEE's list @@ -108,12 +111,12 @@ def get_dataset( # add metadata if metadata: try: - - idbank_list = get_series_list(dataset, silent=True) - newcol = [col for col in idbank_list.columns if col not in data.columns] + [ - "IDBANK" - ] + idbank_list = get_series_list(dataset, silent=silent) + + newcol = [ + col for col in idbank_list.columns if col not in data.columns + ] + ["IDBANK"] idbank_list = idbank_list[newcol] data = data.merge(idbank_list, on="IDBANK", how="left") diff --git a/pynsee/macrodata/get_dataset_list.py b/pynsee/macrodata/get_dataset_list.py index 58eb53d1..04aac206 100644 --- a/pynsee/macrodata/get_dataset_list.py +++ b/pynsee/macrodata/get_dataset_list.py @@ -19,7 +19,7 @@ @save_df(day_lapse_max=30) -def get_dataset_list(update=False, silent=False): +def get_dataset_list(update=False, silent=True): """Download a full INSEE's datasets list from BDM macroeconomic database Args: diff --git a/pynsee/macrodata/get_series.py b/pynsee/macrodata/get_series.py index 08ec21e3..5aa99fb9 100644 --- a/pynsee/macrodata/get_series.py +++ b/pynsee/macrodata/get_series.py @@ -11,11 +11,12 @@ from pynsee.utils._paste import _paste from pynsee.utils.save_df import save_df + @save_df(day_lapse_max=30) def get_series( *idbanks, update=False, - silent=False, + silent=True, metadata=True, startPeriod=None, endPeriod=None, @@ -137,7 +138,7 @@ def get_series( api_query = api_query + added_param_string df = _get_insee( - api_query=api_query, + api_query=api_query, sdmx_query=sdmx_query, step=str("{0}/{1}").format(q + 1, max_seq_idbank), ) @@ -158,7 +159,9 @@ def get_series( metadata_df["IDBANK"].isin(list_idbank_data) ].reset_index(drop=True) - list_col = ['IDBANK'] + [c for c in metadata_df.columns if c not in data] + list_col = ["IDBANK"] + [ + c for c in metadata_df.columns if c not in data + ] metadata_df = metadata_df[list_col] data = data.merge(metadata_df, on="IDBANK", how="left") diff --git a/pynsee/macrodata/get_series_list.py b/pynsee/macrodata/get_series_list.py index 11baaa03..1e4528df 100644 --- a/pynsee/macrodata/get_series_list.py +++ b/pynsee/macrodata/get_series_list.py @@ -9,8 +9,9 @@ from pynsee.utils.save_df import save_df + @save_df(day_lapse_max=30) -def get_series_list(*datasets, update=False, silent=False): +def get_series_list(*datasets, update=False, silent=True): """Download an INSEE's series key list for one or several datasets from BDM macroeconomic database Args: @@ -35,7 +36,7 @@ def get_series_list(*datasets, update=False, silent=False): >>> dataset_list = get_dataset_list() >>> idbank_ipc = get_series_list('IPC-2015', 'CLIMAT-AFFAIRES') """ - insee_dataset = get_dataset_list(silent=True) + insee_dataset = get_dataset_list(silent=silent) insee_dataset_list = insee_dataset["id"].to_list() if len(datasets) == 1: @@ -52,7 +53,9 @@ def get_series_list(*datasets, update=False, silent=False): idbank_list_dataset = [] for dt in datasets: - idbank_list_dt = _get_dataset_metadata(dt, update=update, silent=True) + idbank_list_dt = _get_dataset_metadata( + dt, update=update, silent=silent + ) idbank_list_dataset.append(idbank_list_dt) @@ -72,6 +75,8 @@ def get_series_list(*datasets, update=False, silent=False): columns={"nomflow": "DATASET", "idbank": "IDBANK", "cleFlow": "KEY"} ) - idbank_list.columns = [col.replace("-", "_") for col in idbank_list.columns] + idbank_list.columns = [ + col.replace("-", "_") for col in idbank_list.columns + ] return idbank_list diff --git a/pynsee/metadata/get_legal_entity.py b/pynsee/metadata/get_legal_entity.py index 82e0038b..637f2840 100644 --- a/pynsee/metadata/get_legal_entity.py +++ b/pynsee/metadata/get_legal_entity.py @@ -9,8 +9,9 @@ from pynsee.utils._request_insee import _request_insee from pynsee.utils.save_df import save_df + @save_df(day_lapse_max=30) -def get_legal_entity(codes, print_err_msg=True, update=False, silent=False): +def get_legal_entity(codes, print_err_msg=True, update=False, silent=True): """Get legal entities labels Args: @@ -39,7 +40,7 @@ def get_legal_entity(codes, print_err_msg=True, update=False, silent=False): data_final = pd.concat(list_data).reset_index(drop=True) data_final = data_final.rename(columns={"intitule": "title"}) - + return data_final diff --git a/pynsee/sirene/search_sirene.py b/pynsee/sirene/search_sirene.py index 4ab6194d..264dec07 100644 --- a/pynsee/sirene/search_sirene.py +++ b/pynsee/sirene/search_sirene.py @@ -33,6 +33,7 @@ def _warning_data_save(): "Set update=True to trigger an update" ) + @save_df(day_lapse_max=30, obj=SireneDataFrame) def search_sirene( variable, @@ -47,7 +48,7 @@ def search_sirene( legal=False, closed=False, update=False, - silent=False + silent=True, ): """Get data about companies from criteria on variables @@ -120,10 +121,10 @@ def search_sirene( >>> number = 2500, >>> kind = "siret") >>> # - >>> # Find 1000 companies whose name sounds like Dassault Système or is a big company (GE), + >>> # Find 1000 companies whose name sounds like Dassault Système or is a big company (GE), >>> # search is made as well on patterns whose accents have been removed >>> import os - >>> # environment variable 'pynsee_print_url' force the package to print the request + >>> # environment variable 'pynsee_print_url' force the package to print the request >>> os.environ["pynsee_print_url"] = 'True' >>> df = search_sirene(variable = ["denominationUniteLegale", 'categorieEntreprise'], >>> pattern = ['Dassot Système', 'GE'], @@ -165,30 +166,30 @@ def search_sirene( pattern = [pattern] list_siren_hist_variable = [ - "nomUniteLegale",# - "nomUsageUniteLegale",# - "denominationUniteLegale",# - "denominationUsuelle1UniteLegale",# - "denominationUsuelle2UniteLegale",# + "nomUniteLegale", # + "nomUsageUniteLegale", # + "denominationUniteLegale", # + "denominationUsuelle1UniteLegale", # + "denominationUsuelle2UniteLegale", # "denominationUsuelle3UniteLegale", - "categorieJuridiqueUniteLegale",# - "etatAdministratifUniteLegale",# - "nicSiegeUniteLegale",# - "activitePrincipaleUniteLegale",# - "caractereEmployeurUniteLegale",# - "economieSocialeSolidaireUniteLegale",# - #"nomenclatureActivitePrincipaleUniteLegale", - ] + ['societeMissionUniteLegale'] + "categorieJuridiqueUniteLegale", # + "etatAdministratifUniteLegale", # + "nicSiegeUniteLegale", # + "activitePrincipaleUniteLegale", # + "caractereEmployeurUniteLegale", # + "economieSocialeSolidaireUniteLegale", # + # "nomenclatureActivitePrincipaleUniteLegale", + ] + ["societeMissionUniteLegale"] list_siret_hist_variable = [ - "denominationUsuelleEtablissement",# - "enseigne1Etablissement",# - "enseigne2Etablissement",# - "enseigne3Etablissement",# - "activitePrincipaleEtablissement",# - "etatAdministratifEtablissement",# - "nomenclatureActiviteEtablissement",# - "caractereEmployeurEtablissement",# + "denominationUsuelleEtablissement", # + "enseigne1Etablissement", # + "enseigne2Etablissement", # + "enseigne3Etablissement", # + "activitePrincipaleEtablissement", # + "etatAdministratifEtablissement", # + "nomenclatureActiviteEtablissement", # + "caractereEmployeurEtablissement", # ] if kind == "siren": @@ -208,8 +209,8 @@ def search_sirene( # using OR, unless it's a range condition, cf. # https://www.sirene.fr/static-resources/htm/sommaire.html if not ( - re.match(r"\[\w+\s+TO\s+\w+\]", patt) or - re.match(r"\{\w+\s+TO\s+\w+\}", patt) + re.match(r"\[\w+\s+TO\s+\w+\]", patt) + or re.match(r"\{\w+\s+TO\s+\w+\}", patt) ): patt = re.sub(r"\s+", "|", patt) else: @@ -221,13 +222,13 @@ def search_sirene( if upper_case: list_var_patt_maj = [p.upper() for p in list_patt] list_patt += list_var_patt_maj - + if decode: list_var_decode = [unidecode(p) for p in list_patt] list_patt += list_var_decode list_patt = list(set(list_patt)) - + list_var_patt = [] for ptt in list_patt: if var in list_hist_variable: @@ -235,7 +236,7 @@ def search_sirene( else: ptt_h = "{}{}:{}".format(var, phntc_string, ptt) - list_var_patt += [ptt_h] + list_var_patt += [ptt_h] list_var_pattern.append(list_var_patt)