diff --git a/constants.py b/constants.py index ab5f665..874a8a7 100644 --- a/constants.py +++ b/constants.py @@ -67,10 +67,10 @@ kerbs_path_raw = 'data/kerbs_raw' + data_format other_footways_path_raw = 'data/other_footways_raw' + data_format -sidewalks_path_versioning = 'data/sidewalks_versioning.json' -crossings_path_versioning = 'data/crossings_versioning.json' -kerbs_path_versioning = 'data/kerbs_versioning.json' -other_footways_path_versioning = 'data/other_footways_versioning.json' +sidewalks_path_versioning = 'data/versioning/sidewalks_versioning.json' +crossings_path_versioning = 'data/versioning/crossings_versioning.json' +kerbs_path_versioning = 'data/versioning/kerbs_versioning.json' +other_footways_path_versioning = 'data/versioning/other_footways_versioning.json' # data quality jsons path feat_keys_path = 'quality_check/feature_keys.json' @@ -529,6 +529,9 @@ }, 'crossings':{ + }, + 'other_footways':{ + }, } diff --git a/filtering_adapting_data.py b/filtering_adapting_data.py index 3c60f04..332637c 100644 --- a/filtering_adapting_data.py +++ b/filtering_adapting_data.py @@ -20,9 +20,19 @@ # for datalayerpath in paths_dict['versioning']: # updating_dict[datalayerpath] = pd.read_json(paths_dict['versioning'][datalayerpath]) -updating_dict = {datalayerpath: pd.read_json(paths_dict['versioning'][datalayerpath]) - for datalayerpath - in paths_dict['versioning']} +# updating_dict = {datalayerpath: pd.read_json(paths_dict['versioning'].get(datalayerpath,StringIO(r"{}"))) +# for datalayerpath +# in paths_dict['versioning']} + +updating_dict = {} +for category in paths_dict['versioning']: + category_path = paths_dict['versioning'][category] + + if os.path.exists(category_path): + updating_dict[category] = pd.read_json(category_path) + else: + updating_dict[category] = pd.DataFrame() + # sidewalks_updating = pd.read_json(sidewalks_path_versioning) # crossings_updating = pd.read_json(crossings_path_versioning) @@ -48,12 +58,12 @@ print(category) - if category != 'sidewalks': + if category != 'sidewalks' or category != 'other_footways': print('- Removing unconnected crossings and kerbs') create_folder_if_not_exists(disjointed_folderpath) - # TODO: include other footways in this + # TODO: include other footways here disjointed = gdf_dict[category].disjoint(sidewalks_big_unary_buffer) outfilepath = os.path.join(disjointed_folderpath,f'{category}_disjointed' + data_format) @@ -146,17 +156,24 @@ print('- Adding update data') + # inserting last update: - updating_dict[category]['update_date'] = updating_dict[category]['rev_day'].astype(str) + "-" + updating_dict[category]['rev_month'].astype(str) + "-" + updating_dict[category]['rev_year'].astype(str) + if not updating_dict[category].empty: - # print(updating_dict[category].set_index('osmid')['last_update']) + updating_dict[category]['last_update'] = updating_dict[category]['rev_day'].astype(str) + "-" + updating_dict[category]['rev_month'].astype(str) + "-" + updating_dict[category]['rev_year'].astype(str) - gdf_dict[category] = gdf_dict[category].set_index('id').join(updating_dict[category].set_index('osmid')['update_date'] - # ,rsuffix = 'r_remove',lsuffix = 'l_remove', - ).reset_index() + # joining the updating info dict to the geodataframe: + gdf_dict[category] = gdf_dict[category].set_index('id').join(updating_dict[category].set_index('osmid')['last_update'] + # ,rsuffix = 'r_remove',lsuffix = 'l_remove', + ).reset_index() + else: + gdf_dict[category]['last_update'] = '' - gdf_dict[category]['last_update'] = gdf_dict[category]['update_date'] + # gdf_dict[category]['last_update'] = gdf_dict[category]['update_date'] + # now spliting the Other_Footways into categories: + if category == 'other_footways': + pass # gdf_dict[category].to_file(f'data/{category}' + data_format) save_geoparquet(gdf_dict[category],f'data/{category}' + data_format) diff --git a/functions.py b/functions.py index 31fd311..d5747a4 100644 --- a/functions.py +++ b/functions.py @@ -734,4 +734,3 @@ def get_gdfs_dict(raw_data=False): category_group = 'data_raw' if raw_data else 'data' return {category: gpd.read_parquet(paths_dict[category_group][category]) for category in paths_dict[category_group]} - \ No newline at end of file diff --git a/getting_data.py b/getting_data.py index 567c4a8..b09c411 100644 --- a/getting_data.py +++ b/getting_data.py @@ -48,6 +48,11 @@ if as_gdf[column].dtype == object: as_gdf[column] = as_gdf[column].astype(str) +# adapting osmnx output: +as_gdf.reset_index(inplace=True) +as_gdf.replace('nan', None, inplace=True) +as_gdf.rename(columns={'osmid': 'id'}, inplace=True) + print('splitting layers:') # small adaptations as OSMNX works differentlydownloaded in for category in layer_tags_dict: diff --git a/getting_feature_versioning_data.py b/getting_feature_versioning_data.py index eddcaaf..bba56a6 100644 --- a/getting_feature_versioning_data.py +++ b/getting_feature_versioning_data.py @@ -3,6 +3,7 @@ import geopandas as gpd import pandas as pd + ''' As separate script as long it's really much more slow compared to the other processes...