no broken code for tomorrow

kauevestena · Jan 10, 2024 · cb2e250 · cb2e250
1 parent d433f83
commit cb2e250
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 16 deletions.
diff --git a/constants.py b/constants.py
@@ -67,10 +67,10 @@
 kerbs_path_raw = 'data/kerbs_raw' + data_format
 other_footways_path_raw = 'data/other_footways_raw' + data_format
 
-sidewalks_path_versioning = 'data/sidewalks_versioning.json'
-crossings_path_versioning = 'data/crossings_versioning.json'
-kerbs_path_versioning = 'data/kerbs_versioning.json'
-other_footways_path_versioning = 'data/other_footways_versioning.json'
+sidewalks_path_versioning = 'data/versioning/sidewalks_versioning.json'
+crossings_path_versioning = 'data/versioning/crossings_versioning.json'
+kerbs_path_versioning = 'data/versioning/kerbs_versioning.json'
+other_footways_path_versioning = 'data/versioning/other_footways_versioning.json'
 
 # data quality jsons path
 feat_keys_path = 'quality_check/feature_keys.json'
@@ -529,6 +529,9 @@
     },
     'crossings':{
 
+    },
+    'other_footways':{
+
     },
 }
 

diff --git a/filtering_adapting_data.py b/filtering_adapting_data.py
@@ -20,9 +20,19 @@
 # for datalayerpath in paths_dict['versioning']:
 # 	updating_dict[datalayerpath] = pd.read_json(paths_dict['versioning'][datalayerpath])
 
-updating_dict = {datalayerpath: pd.read_json(paths_dict['versioning'][datalayerpath])
-                for datalayerpath 
-                in paths_dict['versioning']}
+# updating_dict = {datalayerpath: pd.read_json(paths_dict['versioning'].get(datalayerpath,StringIO(r"{}")))
+#                 for datalayerpath 
+#                 in paths_dict['versioning']}
+
+updating_dict = {}
+for category in paths_dict['versioning']:
+    category_path = paths_dict['versioning'][category]
+
+    if os.path.exists(category_path):
+        updating_dict[category] = pd.read_json(category_path)
+    else:
+        updating_dict[category] = pd.DataFrame()
+
 
 # sidewalks_updating = pd.read_json(sidewalks_path_versioning)
 # crossings_updating = pd.read_json(crossings_path_versioning)
@@ -48,12 +58,12 @@
     print(category)
 
 
-    if category != 'sidewalks':
+    if category != 'sidewalks' or category != 'other_footways':
         print('- Removing unconnected crossings and kerbs')
 
         create_folder_if_not_exists(disjointed_folderpath)
 
-        # TODO: include other footways in this
+        # TODO: include other footways here
         disjointed = gdf_dict[category].disjoint(sidewalks_big_unary_buffer)
 
         outfilepath = os.path.join(disjointed_folderpath,f'{category}_disjointed' + data_format)
@@ -146,17 +156,24 @@
 
 
     print('- Adding update data')
+
     # inserting last update:
-    updating_dict[category]['update_date'] = updating_dict[category]['rev_day'].astype(str) + "-" + updating_dict[category]['rev_month'].astype(str) + "-" + updating_dict[category]['rev_year'].astype(str)
+    if not updating_dict[category].empty:
 
-    # print(updating_dict[category].set_index('osmid')['last_update'])
+        updating_dict[category]['last_update'] = updating_dict[category]['rev_day'].astype(str) + "-" + updating_dict[category]['rev_month'].astype(str) + "-" + updating_dict[category]['rev_year'].astype(str)
 
-    gdf_dict[category] = gdf_dict[category].set_index('id').join(updating_dict[category].set_index('osmid')['update_date']
-    # ,rsuffix = 'r_remove',lsuffix = 'l_remove',
-    ).reset_index()
+        # joining the updating info dict to the geodataframe:
+        gdf_dict[category] = gdf_dict[category].set_index('id').join(updating_dict[category].set_index('osmid')['last_update']
+        # ,rsuffix = 'r_remove',lsuffix = 'l_remove',
+        ).reset_index()
+    else:
+        gdf_dict[category]['last_update'] = ''
 
-    gdf_dict[category]['last_update'] = gdf_dict[category]['update_date']
+    # gdf_dict[category]['last_update'] = gdf_dict[category]['update_date']
 
+    # now spliting the Other_Footways into categories:
+    if category == 'other_footways':
+        pass
 
     # gdf_dict[category].to_file(f'data/{category}' + data_format)
     save_geoparquet(gdf_dict[category],f'data/{category}' + data_format)

diff --git a/functions.py b/functions.py
@@ -734,4 +734,3 @@ def get_gdfs_dict(raw_data=False):
     category_group = 'data_raw' if raw_data else 'data'
 
     return {category: gpd.read_parquet(paths_dict[category_group][category]) for category in paths_dict[category_group]}
-
diff --git a/getting_data.py b/getting_data.py
@@ -48,6 +48,11 @@
     if as_gdf[column].dtype == object:
         as_gdf[column] = as_gdf[column].astype(str)
 
+# adapting osmnx output:
+as_gdf.reset_index(inplace=True)
+as_gdf.replace('nan', None, inplace=True)
+as_gdf.rename(columns={'osmid': 'id'}, inplace=True)
+
 print('splitting layers:')
 # small adaptations as OSMNX works differentlydownloaded in
 for category in layer_tags_dict:

diff --git a/getting_feature_versioning_data.py b/getting_feature_versioning_data.py
@@ -3,6 +3,7 @@
 import geopandas as gpd
 import pandas as pd
 
+
 '''
 
     As separate script as long it's really much more slow compared to the other processes...