-
Notifications
You must be signed in to change notification settings - Fork 0
/
post_processing.py
93 lines (78 loc) · 3.96 KB
/
post_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# This script takes the output geojson from a google colab notebook
# and does some post processing on it as described below
# https://colab.research.google.com/drive/1eylU3waQbmvxT_mL_m5gQwFhgcJW9-jN
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import shape
# Create three bins for data and add them to the GeoJSON file
def bin_geojson_data(gdf, property, bin_name):
print('preparing to bin data')
# Calculate the bin edges
bins = np.percentile(gdf[property], [33.3, 66.6])
# Bin the property
gdf[bin_name] = pd.cut(gdf[property], bins=[-np.inf, *bins, np.inf], labels=['high', 'medium', 'low'])
# Convert the 'bin' column to string type
gdf[bin_name] = gdf[bin_name].astype(str)
# Function to find all upstream huc
def find_upstream_huc(huc12_list):
filtered_features = gdf[gdf['tohuc'].isin(huc12_list)]
upstream_huc_unique = list(set(filtered_features['huc12'])) # remove duplicates
if len(upstream_huc_unique) > 0:
upstream_huc_list.append(upstream_huc_unique)
find_upstream_huc(upstream_huc_unique)
else:
return
# Function to find all downstream huc
def find_downstream_huc(tohuc_list):
filtered_features = gdf[gdf['huc12'].isin(tohuc_list)]
downstream_huc_unique = list(set(filtered_features['tohuc'])) # remove duplicates
if len(downstream_huc_unique) > 0:
downstream_huc_list.append(downstream_huc_unique)
find_downstream_huc(downstream_huc_unique)
else:
return
def get_building_count(gdf):
print('preparing to get building count')
# Grab this gpkg from slack and update path below to point to where you saved it
# https://nemac.slack.com/files/U0C66HMC1/F06G9RQ6RU0/nc-buildings.gpkg.zip
gpkg_file = '/Users/jbliss/Downloads/nc-buildings.gpkg'
# Load the GPKG file layers
gpkg_gdf_ml = gpd.read_file(gpkg_file, layer='machine-learning-derived')
gpkg_gdf_osm = gpd.read_file(gpkg_file, layer='open-street-map-derived')
# Initialize new columns in the GeoJSON DataFrame to store the counts
gdf['machine_building_count'] = 0
gdf['osm_building_count'] = 0
for i, row in gdf.iterrows():
# Create a shapely geometry object from the GeoJSON feature
geojson_geom = shape(row['geometry'])
# Count how many GPKG features from each layer are contained in the GeoJSON feature
count_ml = gpkg_gdf_ml[gpkg_gdf_ml.geometry.within(geojson_geom)].shape[0]
count_osm = gpkg_gdf_osm[gpkg_gdf_osm.geometry.within(geojson_geom)].shape[0]
# Add these counts to the GeoJSON DataFrame
gdf.at[i, 'machine_building_count'] = count_ml
gdf.at[i, 'osm_building_count'] = count_osm
gdf.at[i, 'total_building_count'] = count_ml+count_osm
print('Feature: ', i+1, ' of ', len(gdf), ' total building count: ', count_ml+count_osm)
# Load GeoJSON data from google colab notebook output
gdf = gpd.read_file('huc12_data_with_ndvi.geojson')
bin_geojson_data(gdf, 'ndvi_change_one_year_percent', 'ndvi_change_one_year_bin')
bin_geojson_data(gdf, 'ndvi_change_three_year_percent', 'ndvi_change_three_year_bin')
# Finding upstream and downstream hucs is a bit hacky with global variables
print('preparing to find upstream and downstream hucs')
# Loop through every feature and find all of the upstream and downstream hucs
for index, feature in gdf.iterrows():
print('Finding upstream and downstream hucs for feature: ', index+1, ' of ', len(gdf))
# global variables for upstream and downstream huc lists
upstream_huc_list = []
downstream_huc_list = [[feature['tohuc']]] # start with the tohuc of the feature
find_upstream_huc([feature['huc12']])
find_downstream_huc([feature['tohuc']])
gdf.at[index, 'upstream_huc_list'] = str(sum(upstream_huc_list, [])) # flatten list of lists
gdf.at[index, 'downstream_huc_list'] = str(sum(downstream_huc_list, [])) # flatten list of lists
get_building_count(gdf)
# converting GeoDataFrame to a JSON
gdf = gdf.to_json()
# writing JSON data structure to a GeoJSON file
with open('processed_output_huc12.geojson', 'w', encoding='utf-8') as file:
file.write(gdf)