Skip to content

Commit

Permalink
fix: separate analysis from data output & update docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Marie Laurent committed Oct 23, 2024
1 parent 9dd5df2 commit a01f600
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 22 deletions.
35 changes: 31 additions & 4 deletions analysis/synthesis/population.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from analysis.marginals import NUMBER_OF_VEHICLES_LABELS

from shapely import distance
AGE_CLASS = [0, 10, 14, 17, 25, 50, 65, np.inf]
NUMBER_OF_VEHICLES= [0,1,2,3,np.inf]
NAME_AGE_CLASS = ["0-10","11-14","15-17","18-25","26-50","51-65","65+"]
Expand All @@ -13,8 +14,10 @@ def configure(context):
context.config("output_path")
context.config("output_prefix", "ile_de_france_")
context.config("sampling_rate")

context.stage("synthesis.population.trips")
context.stage("synthesis.population.enriched")
context.stage("synthesis.population.spatial.locations")

context.stage("data.census.filtered", alias = "census")
context.stage("data.hts.selected", alias = "hts")
Expand All @@ -30,7 +33,12 @@ def execute(context):
sampling_rate = context.config("sampling_rate")
df_person_eq = context.stage("synthesis.population.enriched")
df_trip_eq = context.stage("synthesis.population.trips")

df_location_eq = context.stage("synthesis.population.spatial.locations")[["person_id", "activity_index", "geometry"]]

df_location_eq = df_location_eq.to_crs("EPSG:2154")
df_trip_eq["preceding_activity_index"] = df_trip_eq["trip_index"]
df_trip_eq["following_activity_index"] = df_trip_eq["trip_index"] + 1

df_census = context.stage("census")
df_hts_households, df_hts_person, df_hts_trip = context.stage("hts")
df_hts_person["person_weight"] *=df_census["weight"].sum()/df_hts_person["person_weight"].sum()
Expand All @@ -47,6 +55,8 @@ def execute(context):

df_eq_travel = pd.merge(df_trip_eq,df_person_eq[["person_id","age_class"]],on=["person_id"])
df_hts_travel = pd.merge(df_hts_trip,df_hts_person[["person_id","age_class","person_weight"]],on=["person_id"])

print("Generate tables ...")
# Age purpose analysis
analysis_age_purpose = pd.pivot_table(df_eq_travel,"person_id",index="age_class",columns="following_purpose",aggfunc="count")
analysis_age_purpose = analysis_age_purpose/sampling_rate
Expand Down Expand Up @@ -86,9 +96,26 @@ def execute(context):
# Compare distance
df_hts_travel["routed_distance"] = df_hts_travel["routed_distance"]/1000 if "routed_distance" in df_hts_travel.columns else df_hts_travel["euclidean_distance"]/1000
df_hts_travel["distance_class"] = pd.cut(df_hts_travel["routed_distance"],list(np.arange(100))+[np.inf])
analysis_dist = df_hts_travel.groupby("distance_class")["person_weight"].sum()

return analysis_dist
df_spatial = pd.merge(df_trip_eq, df_location_eq.rename(columns = {
"activity_index": "preceding_activity_index",
"geometry": "preceding_geometry"
}), how = "left", on = ["person_id", "preceding_activity_index"])

df_spatial = pd.merge(df_spatial, df_location_eq.rename(columns = {
"activity_index": "following_activity_index",
"geometry": "following_geometry"
}), how = "left", on = ["person_id", "following_activity_index"])
df_spatial["distance"] = df_spatial.apply(lambda x:distance( x["preceding_geometry"],x["following_geometry"])/1000,axis=1)
df_spatial["distance_class"] = pd.cut(df_spatial["distance"],list(np.arange(100))+[np.inf])

analysis_distance = pd.concat([df_hts_travel.groupby("distance_class")["person_weight"].sum(),df_spatial.groupby("distance_class")["person_id"].count()],axis=1).reset_index()
analysis_distance.columns = ["Distance class","HTS","EQASIM"]
analysis_distance["Proportion_HTS"] = analysis_distance["HTS"] / analysis_distance["HTS"].sum()
analysis_distance["Proportion_EQASIM"] = analysis_distance["EQASIM"] / len(df_spatial)
analysis_distance["EQASIM"] = analysis_distance["EQASIM"]/ sampling_rate
analysis_distance.to_csv(f"{analysis_output_path}/{prefix}distance.csv")




Expand Down
8 changes: 8 additions & 0 deletions docs/population.md
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,11 @@ folder as: `{output_prefix}_{age group}_{trip pupose}.html`

Note:
With `analysis_from_file` at False, the last synthetic population is studied by default. Also if `output_prefix` and `comparison_file_prefix` refer to the same outputs, or `comparison_file_prefix` is not specified, then only a volume visualisation of this particular population is produced.


### Comparaison population to source data

Using the population pipeline in the Analysis directory, you can generate multiple tables comparing composition of synthetic population to source data. Right now the tables generated compare : population volume by age range, households volume by number of vehicles, population volume with a license and without, trip volume by age range and trip volume by length.
Complementary from the synthetic population only, a table of population volume by age range and trip purpose is also created.

To be able to use this pipeline, you must already have create a synthetic population. Then you need to open the `config.yml` and add the `analysis.synthesis.population` stage in the `run` section.
20 changes: 2 additions & 18 deletions synthesis/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def configure(context):
context.stage("synthesis.vehicles.vehicles")

context.stage("synthesis.population.spatial.locations")
context.stage("analysis.synthesis.population")

context.stage("documentation.meta_output")

context.config("output_path")
Expand Down Expand Up @@ -273,20 +273,4 @@ def execute(context):
clean_gpkg(path)
if "geoparquet" in output_formats:
path = "%s/%strips.geoparquet" % (output_path, output_prefix)
df_spatial.to_parquet(path)

# Output population analysis
SAMPLING_RATE =context.config("sampling_rate")
df_spatial = df_spatial.to_crs("EPSG:2154")

df_spatial["distance"] = df_spatial.length/1000
df_spatial["distance_class"] = pd.cut(df_spatial["distance"],list(np.arange(100))+[np.inf])

# Compare distance
analysis_distance = context.stage("analysis.synthesis.population")
analysis_distance = pd.concat([analysis_distance,df_spatial.groupby("distance_class")["person_id"].count()],axis=1).reset_index()
analysis_distance.columns = ["Distance class","HTS","EQASIM"]
analysis_distance["Proportion_HTS"] = analysis_distance["HTS"] / analysis_distance["HTS"].sum()
analysis_distance["Proportion_EQASIM"] = analysis_distance["EQASIM"] / len(df_spatial)
analysis_distance["EQASIM"] = analysis_distance["EQASIM"]/ SAMPLING_RATE
analysis_distance.to_csv(f"{output_path}/{ANALYSIS_FOLDER}/{output_prefix}distance.csv")
df_spatial.to_parquet(path)

0 comments on commit a01f600

Please sign in to comment.