Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat : add new analysis output #266

Merged
merged 10 commits into from
Nov 18, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

**Under development**

- feat: add population analysis output
- fix: avoid regenerating OSM when population changes
- feat: add municipality information to households and activities
- chore: update to `eqasim-java` commit `ece4932`
Expand Down
13 changes: 10 additions & 3 deletions analysis/grid/comparison_flow_volume.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pandas as pd
import geopandas as gpd
import os

import plotly.express as px

ANALYSIS_FOLDER = "compare_flow_volume"

SAMPLING_RATE = 0.05

Expand Down Expand Up @@ -84,6 +85,12 @@ def execute(context):
df_grids = stat_grid(df_trips_comp,df_locations_comp,df_persons_comp,df_grid)
point = df_grid.unary_union.centroid # a changé avec ploy_dep
print("Printing grids...")

# check output folder existence
analysis_output_path = os.path.join(context.config("output_path"), ANALYSIS_FOLDER)
if not os.path.exists(analysis_output_path):
os.mkdir(analysis_output_path)

for prefix, figure in figures.items():
df_select_age = df_stats[df_stats["age"].between(figure["min_age"],figure["max_age"])]
df_select_age = df_select_age.dissolve(by=["id_carr_1km","following_purpose"],aggfunc="count").reset_index()
Expand All @@ -103,14 +110,14 @@ def execute(context):
df_select = df_select[df_select["count"] != 0]
fig = px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="count", opacity= 0.7,color_continuous_scale='reds',
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Localisation flow distribution for {prefix} group with {purpose} purpose")
fig.write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')
fig.write_html(f'{analysis_output_path}/{context.config("output_prefix")}{prefix}_{purpose}.html')
else :
df_grids_select = gpd.sjoin(df_grids_select,df_grid,how='right',predicate="contains").fillna(0)
df_select = gpd.sjoin(df_select,df_grids_select.drop(columns=[ 'index_left']),how='right',predicate="contains").rename(columns={"count_left":"volume_studied_simu","count_right":"volume_compared_simu"}).fillna(0)
df_select["volume_difference"] = df_select["volume_studied_simu"] - df_select["volume_compared_simu"]
df_select = df_select[(df_select["volume_studied_simu"] != 0 )| (df_select["volume_compared_simu"] != 0)]
df_select["pourcentage_vol"] = df_select["volume_difference"] / df_select["volume_compared_simu"]
px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="volume_difference", opacity= 0.7,color_continuous_scale="picnic", color_continuous_midpoint= 0,hover_name="id_carr_1km_right", hover_data=["volume_studied_simu", "volume_compared_simu","pourcentage_vol"],
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Comparison flow distribution with previous simulation for {prefix} group with {purpose} purpose").write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Comparison flow distribution with previous simulation for {prefix} group with {purpose} purpose").write_html(f'{analysis_output_path}/{context.config("output_prefix")}{prefix}_{purpose}.html')


100 changes: 100 additions & 0 deletions analysis/synthesis/population.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@

import os
import numpy as np
import pandas as pd
from analysis.marginals import NUMBER_OF_VEHICLES_LABELS

AGE_CLASS = [0, 10, 14, 17, 25, 50, 65, np.inf]
NUMBER_OF_VEHICLES= [0,1,2,3,np.inf]
NAME_AGE_CLASS = ["0-10","11-14","15-17","18-25","26-50","51-65","65+"]
ANALYSIS_FOLDER = "analysis_population"
def configure(context):

context.config("output_path")
context.config("output_prefix", "ile_de_france_")
context.config("sampling_rate")
context.stage("synthesis.population.trips")
context.stage("synthesis.population.enriched")

context.stage("data.census.filtered", alias = "census")
context.stage("data.hts.selected", alias = "hts")

def execute(context):

# check output folder existence
analysis_output_path = os.path.join(context.config("output_path"), ANALYSIS_FOLDER)
if not os.path.exists(analysis_output_path):
os.mkdir(analysis_output_path)

prefix = context.config("output_prefix")
sampling_rate = context.config("sampling_rate")
df_person_eq = context.stage("synthesis.population.enriched")
df_trip_eq = context.stage("synthesis.population.trips")

df_census = context.stage("census")
df_hts_households, df_hts_person, df_hts_trip = context.stage("hts")
df_hts_person["person_weight"] *=df_census["weight"].sum()/df_hts_person["person_weight"].sum()
df_hts_households["household_weight"] *=df_census["weight"].sum()/df_hts_households["household_weight"].sum()
# get age class
df_person_eq["age_class"] = pd.cut(df_person_eq["age"],AGE_CLASS,include_lowest=True,labels=NAME_AGE_CLASS)
df_census["age_class"] = pd.cut(df_census["age"],AGE_CLASS,include_lowest=True,labels=NAME_AGE_CLASS)
df_hts_person["age_class"] = pd.cut(df_hts_person["age"],AGE_CLASS,include_lowest=True,labels=NAME_AGE_CLASS)

# get vehicule class
df_person_eq["vehicles_class"] = pd.cut(df_person_eq["number_of_vehicles"],NUMBER_OF_VEHICLES,right=True,labels=NUMBER_OF_VEHICLES_LABELS)
df_hts_households["vehicles_class"] = pd.cut(df_hts_households["number_of_vehicles"],NUMBER_OF_VEHICLES,right=True,labels=NUMBER_OF_VEHICLES_LABELS)


df_eq_travel = pd.merge(df_trip_eq,df_person_eq[["person_id","age_class"]],on=["person_id"])
df_hts_travel = pd.merge(df_hts_trip,df_hts_person[["person_id","age_class","person_weight"]],on=["person_id"])
# Age purpose analysis
analysis_age_purpose = pd.pivot_table(df_eq_travel,"person_id",index="age_class",columns="following_purpose",aggfunc="count")
analysis_age_purpose = analysis_age_purpose/sampling_rate
analysis_age_purpose.to_csv(f"{analysis_output_path}/{prefix}age_purpose.csv")

# Compare age volume
analysis_age_class = pd.concat([df_census.groupby("age_class")["weight"].sum(),df_person_eq.groupby("age_class")["person_id"].count()],axis=1).reset_index()
analysis_age_class.columns = ["Age class","INSEE","EQASIM"]
analysis_age_class["Proportion_INSEE"] = analysis_age_class["INSEE"] /df_census["weight"].sum()
analysis_age_class["Proportion_EQASIM"] = analysis_age_class["EQASIM"] /len(df_person_eq)
analysis_age_class["EQASIM"] = analysis_age_class["EQASIM"]/sampling_rate
analysis_age_class.to_csv(f"{analysis_output_path}/{prefix}age.csv")

# Compare vehicle volume
analysis_vehicles_class = pd.concat([df_hts_households.groupby("vehicles_class")["household_weight"].sum(),df_person_eq.groupby("vehicles_class")["household_id"].nunique()],axis=1).reset_index()
analysis_vehicles_class.columns = ["Number of vehicles class","HTS","EQASIM"]
analysis_vehicles_class["Proportion_HTS"] = analysis_vehicles_class["HTS"] / df_hts_households["household_weight"].sum()
analysis_vehicles_class["Proportion_EQASIM"] = analysis_vehicles_class["EQASIM"] / df_person_eq["household_id"].nunique()
analysis_vehicles_class.to_csv(f"{analysis_output_path}/{prefix}nbr_vehicle.csv")

# Compare license volume
analysis_license_class = pd.concat([df_hts_person.groupby("has_license")["person_weight"].sum(),df_person_eq.groupby("has_license")["person_id"].count()],axis=1).reset_index()
analysis_license_class.columns = ["Possession of license","HTS","EQASIM"]
analysis_license_class["Proportion_HTS"] = analysis_license_class["HTS"] /df_hts_person["person_weight"].sum()
analysis_license_class["Proportion_EQASIM"] = analysis_license_class["EQASIM"] /len(df_person_eq)
analysis_license_class["EQASIM"] = analysis_license_class["EQASIM"]/sampling_rate
analysis_license_class.to_csv(f"{analysis_output_path}/{prefix}license.csv")

# Compare travel volume
analysis_travel = pd.concat([df_hts_travel.groupby("age_class")["person_weight"].sum(),df_eq_travel.groupby("age_class")["person_id"].count()],axis=1).reset_index()
analysis_travel.columns = ["Age class","HTS","EQASIM"]
analysis_travel["Proportion_HTS"] = analysis_travel["HTS"] /df_hts_travel["person_weight"].sum()
analysis_travel["Proportion_EQASIM"] = analysis_travel["EQASIM"] /len(df_eq_travel)
analysis_travel["EQASIM"] = analysis_travel["EQASIM"]/sampling_rate
analysis_travel.to_csv(f"{analysis_output_path}/{prefix}travel.csv")

# Compare distance
df_hts_travel["routed_distance"] = df_hts_travel["routed_distance"]/1000 if "routed_distance" in df_hts_travel.columns else df_hts_travel["euclidean_distance"]/1000
df_hts_travel["distance_class"] = pd.cut(df_hts_travel["routed_distance"],list(np.arange(100))+[np.inf])
analysis_dist = df_hts_travel.groupby("distance_class")["person_weight"].sum()

return analysis_dist









23 changes: 21 additions & 2 deletions synthesis/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
import sqlite3
import math
import numpy as np
from analysis.synthesis.population import ANALYSIS_FOLDER

def configure(context):

context.stage("synthesis.population.enriched")

context.stage("synthesis.population.activities")
Expand All @@ -16,13 +18,14 @@ def configure(context):
context.stage("synthesis.vehicles.vehicles")

context.stage("synthesis.population.spatial.locations")

context.stage("analysis.synthesis.population")
context.stage("documentation.meta_output")

context.config("output_path")
context.config("output_prefix", "ile_de_france_")
context.config("output_formats", ["csv", "gpkg"])

context.config("sampling_rate")

if context.config("mode_choice", False):
context.stage("matsim.simulation.prepare")

Expand Down Expand Up @@ -271,3 +274,19 @@ def execute(context):
if "geoparquet" in output_formats:
path = "%s/%strips.geoparquet" % (output_path, output_prefix)
df_spatial.to_parquet(path)

# Output population analysis
SAMPLING_RATE =context.config("sampling_rate")
df_spatial = df_spatial.to_crs("EPSG:2154")

df_spatial["distance"] = df_spatial.length/1000
df_spatial["distance_class"] = pd.cut(df_spatial["distance"],list(np.arange(100))+[np.inf])

# Compare distance
analysis_distance = context.stage("analysis.synthesis.population")
analysis_distance = pd.concat([analysis_distance,df_spatial.groupby("distance_class")["person_id"].count()],axis=1).reset_index()
analysis_distance.columns = ["Distance class","HTS","EQASIM"]
analysis_distance["Proportion_HTS"] = analysis_distance["HTS"] / analysis_distance["HTS"].sum()
analysis_distance["Proportion_EQASIM"] = analysis_distance["EQASIM"] / len(df_spatial)
analysis_distance["EQASIM"] = analysis_distance["EQASIM"]/ SAMPLING_RATE
analysis_distance.to_csv(f"{output_path}/{ANALYSIS_FOLDER}/{output_prefix}distance.csv")
MarieMcLaurent marked this conversation as resolved.
Show resolved Hide resolved
Loading