Skip to content

Commit

Permalink
no broken code for tomorrow
Browse files Browse the repository at this point in the history
  • Loading branch information
kauevestena committed Jan 10, 2024
1 parent d433f83 commit cb2e250
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 16 deletions.
11 changes: 7 additions & 4 deletions constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@
kerbs_path_raw = 'data/kerbs_raw' + data_format
other_footways_path_raw = 'data/other_footways_raw' + data_format

sidewalks_path_versioning = 'data/sidewalks_versioning.json'
crossings_path_versioning = 'data/crossings_versioning.json'
kerbs_path_versioning = 'data/kerbs_versioning.json'
other_footways_path_versioning = 'data/other_footways_versioning.json'
sidewalks_path_versioning = 'data/versioning/sidewalks_versioning.json'
crossings_path_versioning = 'data/versioning/crossings_versioning.json'
kerbs_path_versioning = 'data/versioning/kerbs_versioning.json'
other_footways_path_versioning = 'data/versioning/other_footways_versioning.json'

# data quality jsons path
feat_keys_path = 'quality_check/feature_keys.json'
Expand Down Expand Up @@ -529,6 +529,9 @@
},
'crossings':{

},
'other_footways':{

},
}

Expand Down
39 changes: 28 additions & 11 deletions filtering_adapting_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,19 @@
# for datalayerpath in paths_dict['versioning']:
# updating_dict[datalayerpath] = pd.read_json(paths_dict['versioning'][datalayerpath])

updating_dict = {datalayerpath: pd.read_json(paths_dict['versioning'][datalayerpath])
for datalayerpath
in paths_dict['versioning']}
# updating_dict = {datalayerpath: pd.read_json(paths_dict['versioning'].get(datalayerpath,StringIO(r"{}")))
# for datalayerpath
# in paths_dict['versioning']}

updating_dict = {}
for category in paths_dict['versioning']:
category_path = paths_dict['versioning'][category]

if os.path.exists(category_path):
updating_dict[category] = pd.read_json(category_path)
else:
updating_dict[category] = pd.DataFrame()


# sidewalks_updating = pd.read_json(sidewalks_path_versioning)
# crossings_updating = pd.read_json(crossings_path_versioning)
Expand All @@ -48,12 +58,12 @@
print(category)


if category != 'sidewalks':
if category != 'sidewalks' or category != 'other_footways':
print('- Removing unconnected crossings and kerbs')

create_folder_if_not_exists(disjointed_folderpath)

# TODO: include other footways in this
# TODO: include other footways here
disjointed = gdf_dict[category].disjoint(sidewalks_big_unary_buffer)

outfilepath = os.path.join(disjointed_folderpath,f'{category}_disjointed' + data_format)
Expand Down Expand Up @@ -146,17 +156,24 @@


print('- Adding update data')

# inserting last update:
updating_dict[category]['update_date'] = updating_dict[category]['rev_day'].astype(str) + "-" + updating_dict[category]['rev_month'].astype(str) + "-" + updating_dict[category]['rev_year'].astype(str)
if not updating_dict[category].empty:

# print(updating_dict[category].set_index('osmid')['last_update'])
updating_dict[category]['last_update'] = updating_dict[category]['rev_day'].astype(str) + "-" + updating_dict[category]['rev_month'].astype(str) + "-" + updating_dict[category]['rev_year'].astype(str)

gdf_dict[category] = gdf_dict[category].set_index('id').join(updating_dict[category].set_index('osmid')['update_date']
# ,rsuffix = 'r_remove',lsuffix = 'l_remove',
).reset_index()
# joining the updating info dict to the geodataframe:
gdf_dict[category] = gdf_dict[category].set_index('id').join(updating_dict[category].set_index('osmid')['last_update']
# ,rsuffix = 'r_remove',lsuffix = 'l_remove',
).reset_index()
else:
gdf_dict[category]['last_update'] = ''

gdf_dict[category]['last_update'] = gdf_dict[category]['update_date']
# gdf_dict[category]['last_update'] = gdf_dict[category]['update_date']

# now spliting the Other_Footways into categories:
if category == 'other_footways':
pass

# gdf_dict[category].to_file(f'data/{category}' + data_format)
save_geoparquet(gdf_dict[category],f'data/{category}' + data_format)
Expand Down
1 change: 0 additions & 1 deletion functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,4 +734,3 @@ def get_gdfs_dict(raw_data=False):
category_group = 'data_raw' if raw_data else 'data'

return {category: gpd.read_parquet(paths_dict[category_group][category]) for category in paths_dict[category_group]}

5 changes: 5 additions & 0 deletions getting_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@
if as_gdf[column].dtype == object:
as_gdf[column] = as_gdf[column].astype(str)

# adapting osmnx output:
as_gdf.reset_index(inplace=True)
as_gdf.replace('nan', None, inplace=True)
as_gdf.rename(columns={'osmid': 'id'}, inplace=True)

print('splitting layers:')
# small adaptations as OSMNX works differentlydownloaded in
for category in layer_tags_dict:
Expand Down
1 change: 1 addition & 0 deletions getting_feature_versioning_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import geopandas as gpd
import pandas as pd


'''
As separate script as long it's really much more slow compared to the other processes...
Expand Down

0 comments on commit cb2e250

Please sign in to comment.