eqasim-org · sebhoerl · Sep 5, 2024 · May 23, 2024 · May 23, 2024 · May 23, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 **Under development**
 
+- chore: update to `eqasim-java` commit `ece4932`
+- feat: vehicles and vehicle types are now always generated
+- feat: read vehicles data from zip files
 - feat : option parameter to remove filtering for requesting departements in hts
 - fix: secondary location model used same random seed in every parallel thread
 - feat: add a new method for attributing income to housholds using the bhepop2 package

diff --git a/config.yml b/config.yml
@@ -31,10 +31,5 @@ config:
   # Activate if you want to run mode choice
   mode_choice: false
 
-  # Uncommented below to enable vehicle fleet generation
-  # generate_vehicles_file: True
-  # generate_vehicles_method: fleet_sample
-  # vehicles_data_year: 2015
-
   # Uncomment to use the bhepop2 package for attributing income
-  # income_assignation_method: bhepop2
+  # income_assignation_method: bhepop2
diff --git a/data/vehicles/raw.py b/data/vehicles/raw.py
@@ -1,7 +1,8 @@
 import numpy as np
 import pandas as pd
-import mock
+import mock, os, glob
 from openpyxl.reader import excel
+import zipfile
 
 """
 This stage loads the raw data of the specified vehicle fleet data
@@ -10,60 +11,73 @@
 
 def configure(context):
     context.config("data_path")
-    context.config("vehicles_data_year", 2015)
+    context.config("vehicles_path", "vehicles")
+    context.config("vehicles_year", 2021)
     context.stage("data.spatial.codes")
 
 def execute(context):
-
-    year = context.config("vehicles_data_year")
-
     df_codes = context.stage("data.spatial.codes")
 
     # the downloaded excel files meta-data are actually have a badly formatted ISO datetime
     # https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1659 
     with mock.patch.object(excel.ExcelReader, 'read_properties', lambda self: None):
-        df_vehicle_com_counts = pd.read_excel(
-            "%s/vehicles_%s/Parc_VP_Communes_%s.xlsx" % (context.config("data_path"), year, year)
-        )
-        df_vehicle_reg_counts = pd.read_excel(
-            "%s/vehicles_%s/Parc_VP_Regions_%s.xlsx" % (context.config("data_path"), year, year)
-        )
+        year = str(context.config("vehicles_year"))
+
+        with zipfile.ZipFile("{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_communes.zip")) as archive:
+            with archive.open("Parc_VP_Communes_{}.xlsx".format(year)) as f:
+                df_municipalities = pd.read_excel(f)
+
+        with zipfile.ZipFile("{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_regions.zip")) as archive:
+            with archive.open("Parc_VP_Regions_{}.xlsx".format(year)) as f:
+                df_regions = pd.read_excel(f)
 
-    df_vehicle_com_counts["region_id"] = df_vehicle_com_counts["Code région"].astype("category")
-    df_vehicle_com_counts["departement_id"] = df_vehicle_com_counts["Code départment"].astype("category")
-    df_vehicle_com_counts["commune_id"] = df_vehicle_com_counts["Code commune"].astype("category")
+    df_municipalities["region_id"] = df_municipalities["Code région"].astype("category")
+    df_municipalities["departement_id"] = df_municipalities["Code départment"].astype("category")
+    df_municipalities["commune_id"] = df_municipalities["Code commune"].astype("category")
 
-    df_vehicle_reg_counts["region_id"] = df_vehicle_reg_counts["Code région"].astype("category")
+    df_regions["region_id"] = df_regions["Code région"].astype("category")
 
     requested_departements = set(df_codes["departement_id"].unique())
     requested_regions = set(df_codes["region_id"].astype(str).unique())
 
     if len(requested_departements) > 0:
-        df_vehicle_com_counts = df_vehicle_com_counts[df_vehicle_com_counts["departement_id"].isin(requested_departements)]
+        df_municipalities = df_municipalities[df_municipalities["departement_id"].isin(requested_departements)]
 
     if len(requested_regions) > 0:
-        df_vehicle_reg_counts = df_vehicle_reg_counts[df_vehicle_reg_counts["region_id"].isin(requested_regions)]
+        df_regions = df_regions[df_regions["region_id"].isin(requested_regions)]
+
+    df_municipalities["region_id"] = df_municipalities["region_id"].cat.remove_unused_categories()
+    df_municipalities["departement_id"] = df_municipalities["departement_id"].cat.remove_unused_categories()
+    df_municipalities["commune_id"] = df_municipalities["commune_id"].cat.remove_unused_categories()
 
-    df_vehicle_com_counts["region_id"] = df_vehicle_com_counts["region_id"].cat.remove_unused_categories()
-    df_vehicle_com_counts["departement_id"] = df_vehicle_com_counts["departement_id"].cat.remove_unused_categories()
-    df_vehicle_com_counts["commune_id"] = df_vehicle_com_counts["commune_id"].cat.remove_unused_categories()
+    df_regions["region_id"] = df_regions["region_id"].cat.remove_unused_categories()
 
-    df_vehicle_reg_counts["region_id"] = df_vehicle_reg_counts["region_id"].cat.remove_unused_categories()
+    df_municipalities["critair"] = df_municipalities["Vignette Crit'air"]
+    df_municipalities["technology"] = df_municipalities["Energie"]
 
-    df_vehicle_com_counts["critair"] = df_vehicle_com_counts["Vignette Crit'air"]
-    df_vehicle_com_counts["technology"] = df_vehicle_com_counts["Energie"]
+    df_regions["critair"] = df_regions["Vignette crit'air"]
+    df_regions["technology"] = df_regions["Energie"]
 
-    df_vehicle_reg_counts["critair"] = df_vehicle_reg_counts["Vignette crit'air"]
-    df_vehicle_reg_counts["technology"] = df_vehicle_reg_counts["Energie"]
+    count_column_name = "Parc au 01/01/%s" % context.config("vehicles_year")
+    age_column_name = "Age au 01/01/%s" % context.config("vehicles_year")
 
-    count_column_name = "Parc au 01/01/%s" % context.config("vehicles_data_year")
-    age_column_name = "Age au 01/01/%s" % context.config("vehicles_data_year")
+    df_municipalities["fleet"] = df_municipalities[count_column_name]
+    df_regions["fleet"] = df_regions[count_column_name]
+    df_regions["age"] = df_regions[age_column_name]
 
-    df_vehicle_com_counts["fleet"] = df_vehicle_com_counts[count_column_name]
-    df_vehicle_reg_counts["fleet"] = df_vehicle_reg_counts[count_column_name]
-    df_vehicle_reg_counts["age"] = df_vehicle_reg_counts[age_column_name]
+    df_vehicle_fleet_counts = df_municipalities.groupby(["region_id", "commune_id", "critair","technology"])["fleet"].sum().reset_index().dropna()
+    df_vehicle_age_counts = df_regions.groupby(["region_id", "critair", "technology", "age"])["fleet"].sum().reset_index().dropna()
 
-    df_vehicle_fleet_counts = df_vehicle_com_counts.groupby(["region_id", "commune_id", "critair","technology"])["fleet"].sum().reset_index().dropna()
-    df_vehicle_age_counts = df_vehicle_reg_counts.groupby(["region_id", "critair", "technology", "age"])["fleet"].sum().reset_index().dropna()
+    return df_vehicle_fleet_counts, df_vehicle_age_counts
+
+def validate(context):
+    municipalities_path = "{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_communes.zip")
+    regions_path = "{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_regions.zip")
+
+    if not os.path.exists(municipalities_path):
+        raise RuntimeError("Municipalities vehicle data is not available at {}".format(municipalities_path))
+
+    if not os.path.exists(regions_path):
+        raise RuntimeError("Regions vehicle data is not available at {}".format(regions_path))
 
-    return df_vehicle_fleet_counts, df_vehicle_age_counts
+    return os.path.getsize(municipalities_path) + os.path.getsize(regions_path)
diff --git a/docs/simulation.md b/docs/simulation.md
@@ -127,36 +127,31 @@ config:
 
 ## <a name="section-data"></a>Using MATSim's emissions contrib
 
-You can calculate air pollution emissions using matsim by using some additional data.
+In order to use a detailed emissions analysis, you need to let the pipeline generate a meaningful vehicle fleet. Data on the private vehicle stock across France are available from the Ministry of Ecology:
 
-You must download the crit'air data from this site : https://www.statistiques.developpement-durable.gouv.fr/donnees-sur-le-parc-automobile-francais-au-1er-janvier-2021
+- [Vehicle stock data](https://www.statistiques.developpement-durable.gouv.fr/donnees-sur-le-parc-automobile-francais-au-1er-janvier-2021)
+- Click on *Données sur les voitures particulières* (first tab) to get information on the private vehicles
+- Download *Données régionales des voitures particulières - 2011 à 2021*
+- Download *Données communales des voitures particulières - 2011 à 2021*
+- Put both zip files into `data/vehicles`
 
+In the `config.yml`, you must enable the vehicle fleet generation :
 
-You should download both files :
-
- - Données régionales des voitures particulières - 2011 à 2021 (zip, 1.79 Mo)
- - Données communales des voitures particulières - 2011 à 2021 (zip, 130.33 Mo)
+```yaml
+config:
+  vehicles_method: fleet_sample
+```
 
-Inside the zip you'll find one data file per year, you can extract the files concerning the year you're intereseted in (let's use `2015` for this exemple). Then unzip and place them in a `data/vehicles_2015/`.
+After doing so, the `vehicles.xml.gz` and `vehicle_types.xml.gz` in the output will not only contain default vehicles and vehicle types, but realistic ones, based on the regional probabilities.
 
-Then, in the `config.yml`, you must enable the vehicle fleet generation :
+You can also choose to generate vehicles for a different year. The 2021 edition ZIP, for instance, contains all the years from 2012 and newer editions will contain more recent years. You can choose the year by setting:
 
 ```yaml
-# ...
-
 config:
-  generate_vehicles_file: True
-  generate_vehicles_method: fleet_sample
-  vehicles_data_year: 2015
-
-# ...
+  vehicles_year: 2015
 ```
 
-You should end up, at the end of the `matsim.output` stage, with a vechicles.xml file.
-
-After you run the full simulation, you'll be able to use some classes defined in `eqasim-java` to analyse and compute emissions based on the MATSim outputs.
-
-for exemple :
+Once have run a full simulation, you'll be able to use some classes defined in `eqasim-java` to analyse and compute emissions based on the MATSim outputs. For example:
 
 ```bash
 java -cp ile_de_france-1.0.6.jar org.eqasim.ile_de_france.emissions.RunComputeEmissionsEvents --config-path config.xml --hbefa-cold-avg ./EFA_ColdStart_Vehcat_2015_Cold_Average.csv --hbefa-hot-avg ./EFA_HOT_Vehcat_2015_Hot_Average.csv --hbefa-cold-detailed ./EFA_ColdStart_Subsegm_2015_Cold_Detailed.csv --hbefa-hot-detailed ./EFA_HOT_Subsegm_2015_Hot_Detailed.csv
@@ -170,6 +165,4 @@ java -cp ile_de_france-1.0.6.jar org.eqasim.ile_de_france.emissions.RunExportEmi
 java -cp ile_de_france-1.0.6.jar org.eqasim.ile_de_france.emissions.RunComputeEmissionsGrid --config-path config.xml --domain-shp-path idf_2154.shp
 ```
 
-Please note that you need a copy of the HBEFA database in order to run those.
-
-For further information you can look at [eqasim-java](https://github.com/eqasim-org/eqasim-java) and [matsim-libs/contribs/emissions](https://github.com/matsim-org/matsim-libs/tree/master/contribs/emissions)
+Please note that you need a copy of the HBEFA database in order to run those. For further information you can look at [eqasim-java](https://github.com/eqasim-org/eqasim-java) and [matsim-libs/contribs/emissions](https://github.com/matsim-org/matsim-libs/tree/master/contribs/emissions)
diff --git a/matsim/output.py b/matsim/output.py
@@ -11,7 +11,6 @@ def configure(context):
     context.config("output_path")
     context.config("output_prefix", "ile_de_france_")
     context.config("write_jar", True)
-    context.config("generate_vehicles_file", False)
     need_osm = context.config("export_detailed_network", False)
     if need_osm:
         context.stage("matsim.scenario.supply.osm")
@@ -28,36 +27,14 @@ def execute(context):
     file_names = [
         "%shouseholds.xml.gz" % context.config("output_prefix"),
         "%spopulation.xml.gz" % context.config("output_prefix"),
+        "%svehicles.xml.gz" % context.config("output_prefix"),
         "%sfacilities.xml.gz" % context.config("output_prefix"),
         "%snetwork.xml.gz" % context.config("output_prefix"),
         "%stransit_schedule.xml.gz" % context.config("output_prefix"),
         "%stransit_vehicles.xml.gz" % context.config("output_prefix"),
         "%sconfig.xml" % context.config("output_prefix")
     ]
 
-    if context.config("generate_vehicles_file"):
-        vehicle_file = "%svehicles.xml.gz" % context.config("output_prefix")
-
-        # it would make more sense to modify this in the eqasim-java part (in org.eqasim.core.scenario.config)
-        # but it's not obvious how to preserve backward compatibility hence the following method :
-        config_file = "%sconfig.xml" % context.config("output_prefix")
-        with open( "%s/%s" % (context.path("matsim.simulation.prepare"), config_file)) as f_read:
-            content = f_read.read()
-            content = content.replace(
-                '<param name="vehiclesFile" value="null" />',
-                '<param name="vehiclesFile" value="%s" />' % vehicle_file
-            )
-            content = content.replace(
-                '<param name="vehiclesSource" value="defaultVehicle" />',
-                '<param name="vehiclesSource" value="fromVehiclesData" />'
-            )
-            with open("%s/%s" % (context.config("output_path"), config_file), "w+") as f_write:
-                f_write.write(content)
-
-        file_names.append(vehicle_file)
-        # since we did a copy & modify, no need to copy it again
-        file_names.remove(config_file)
-
     for name in file_names:
         shutil.copy(
             "%s/%s" % (context.path("matsim.simulation.prepare"), name),

diff --git a/matsim/runtime/eqasim.py b/matsim/runtime/eqasim.py
@@ -7,7 +7,7 @@
 
 DEFAULT_EQASIM_VERSION = "1.5.0"
 DEFAULT_EQASIM_BRANCH = "develop"
-DEFAULT_EQASIM_COMMIT = "73ac087"
+DEFAULT_EQASIM_COMMIT = "ece4932"
 
 def configure(context):
     context.stage("matsim.runtime.git")

diff --git a/matsim/scenario/population.py b/matsim/scenario/population.py
@@ -14,6 +14,7 @@ def configure(context):
     context.stage("synthesis.population.spatial.locations")
 
     context.stage("synthesis.population.trips")
+    context.stage("synthesis.vehicles.vehicles")
 
 PERSON_FIELDS = [
     "person_id", "household_income", "car_availability", "bike_availability",
@@ -31,7 +32,11 @@ def configure(context):
     "person_id", "mode", "departure_time", "travel_time"
 ]
 
-def add_person(writer, person, activities, trips):
+VEHICLE_FIELDS = [
+    "owner_id", "vehicle_id", "mode"
+]
+
+def add_person(writer, person, activities, trips, vehicles):
     writer.start_person(person[PERSON_FIELDS.index("person_id")])
 
     writer.start_attributes()
@@ -56,6 +61,11 @@ def add_person(writer, person, activities, trips):
     writer.add_attribute("employed", "java.lang.String", person[PERSON_FIELDS.index("employed")])
     writer.add_attribute("sex", "java.lang.String", person[PERSON_FIELDS.index("sex")][0])
 
+    writer.add_attribute("vehicles", "org.matsim.vehicles.PersonVehicles", "{{{content}}}".format(content = ",".join([
+        "\"{mode}\":\"{id}\"".format(mode = v[VEHICLE_FIELDS.index("mode")], id = v[VEHICLE_FIELDS.index("vehicle_id")])
+        for v in vehicles
+    ])))
+
     writer.end_attributes()
 
     writer.start_plan(selected = True)
@@ -108,20 +118,25 @@ def execute(context):
     df_trips = context.stage("synthesis.population.trips")
     df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"]
 
+    df_vehicles = context.stage("synthesis.vehicles.vehicles")[1]
+    df_vehicles = df_vehicles.sort_values(by = ["owner_id"])
+
     with gzip.open(output_path, 'wb+') as writer:
         with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer:
             writer = writers.PopulationWriter(writer)
             writer.start_population()
 
             activity_iterator = backlog_iterator(iter(df_activities[ACTIVITY_FIELDS].itertuples(index = False)))
             trip_iterator = backlog_iterator(iter(df_trips[TRIP_FIELDS].itertuples(index = False)))
+            vehicle_iterator = backlog_iterator(iter(df_vehicles[VEHICLE_FIELDS].itertuples(index = False)))
 
             with context.progress(total = len(df_persons), label = "Writing population ...") as progress:
                 for person in df_persons.itertuples(index = False):
                     person_id = person[PERSON_FIELDS.index("person_id")]
 
                     activities = []
                     trips = []
+                    vehicles = []
 
                     # Track all activities for person
                     while activity_iterator.has_next():
@@ -147,7 +162,17 @@ def execute(context):
 
                     assert len(trips) == len(activities) - 1
 
-                    add_person(writer, person, activities, trips)
+                    # Track all vehicles for person
+                    while vehicle_iterator.has_next():
+                        vehicle = vehicle_iterator.next()
+
+                        if not vehicle[VEHICLE_FIELDS.index("owner_id")] == person_id:
+                            vehicle_iterator.previous()
+                            break
+                        else:
+                            vehicles.append(vehicle)
+
+                    add_person(writer, person, activities, trips, vehicles)
                     progress.update()
 
             writer.end_population()

diff --git a/matsim/scenario/vehicles.py b/matsim/scenario/vehicles.py
@@ -6,15 +6,15 @@
 import matsim.writers as writers
 
 def configure(context):
-    context.stage("synthesis.vehicles.selected")
+    context.stage("synthesis.vehicles.vehicles")
 
 TYPE_FIELDS = ["type_id", "nb_seats", "length", "width", "pce", "mode"]
 VEHICLE_FIELDS = ["vehicle_id", "type_id", "critair", "technology", "age", "euro"]
 
 def execute(context):
     output_path = "%s/vehicles.xml.gz" % context.path()
 
-    df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.selected")
+    df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.vehicles")
 
     with gzip.open(output_path, 'wb+') as writer:
         with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer:

diff --git a/matsim/simulation/prepare.py b/matsim/simulation/prepare.py
@@ -8,9 +8,7 @@ def configure(context):
 
     context.stage("matsim.scenario.population")
     context.stage("matsim.scenario.households")
-
-    if context.config("generate_vehicles_file", False):
-        context.stage("matsim.scenario.vehicles")
+    context.stage("matsim.scenario.vehicles")
 
     context.stage("matsim.scenario.facilities")
     context.stage("matsim.scenario.supply.processed")
@@ -78,12 +76,11 @@ def execute(context):
     )
     shutil.copy(transit_vehicles_path, "%s/%stransit_vehicles.xml.gz" % (context.cache_path, context.config("output_prefix")))
 
-    if context.config("generate_vehicles_file"):
-        vehicles_path = "%s/%s" % (
-            context.path("matsim.scenario.vehicles"),
-            context.stage("matsim.scenario.vehicles")
-        )
-        shutil.copy(vehicles_path, "%s/%svehicles.xml.gz" % (context.cache_path, context.config("output_prefix")))
+    vehicles_path = "%s/%s" % (
+        context.path("matsim.scenario.vehicles"),
+        context.stage("matsim.scenario.vehicles")
+    )
+    shutil.copy(vehicles_path, "%s/%svehicles.xml.gz" % (context.cache_path, context.config("output_prefix")))
 
     # Generate base configuration
     eqasim.run(context, "org.eqasim.core.scenario.config.RunGenerateConfig", [
@@ -98,7 +95,8 @@ def execute(context):
     # Adapt config for Île-de-France
     eqasim.run(context, "org.eqasim.ile_de_france.scenario.RunAdaptConfig", [
         "--input-path", "generic_config.xml",
-        "--output-path", "%sconfig.xml" % context.config("output_prefix")
+        "--output-path", "%sconfig.xml" % context.config("output_prefix"),
+        "--prefix", context.config("output_prefix")
     ])
     assert os.path.exists("%s/%sconfig.xml" % (context.path(), context.config("output_prefix")))