From 5ea020bbd77734e25fdd651e84e02fba3750ecff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20H=C3=B6rl?= <sebastian.horl@irt-systemx.fr>
Date: Fri, 5 Jul 2024 17:38:09 +0200
Subject: [PATCH] chore: avoid hardcoding epsg codes outside of 'data' space
 (#245)

---
 matsim/scenario/supply/gtfs.py                      |  4 +++-
 matsim/scenario/supply/osm.py                       |  4 +++-
 matsim/scenario/supply/processed.py                 |  6 ------
 synthesis/locations/home/addresses.py               |  2 +-
 synthesis/output.py                                 |  4 ++--
 synthesis/population/spatial/locations.py           |  2 +-
 synthesis/population/spatial/secondary/locations.py | 11 ++++++-----
 7 files changed, 16 insertions(+), 17 deletions(-)
diff --git a/matsim/scenario/supply/gtfs.py b/matsim/scenario/supply/gtfs.py
index 348e81b9..0635cc0f 100644
--- a/matsim/scenario/supply/gtfs.py
+++ b/matsim/scenario/supply/gtfs.py
@@ -6,15 +6,17 @@ def configure(context):
     context.stage("matsim.runtime.java")
     context.stage("matsim.runtime.pt2matsim")
     context.stage("data.gtfs.cleaned")
+    context.stage("synthesis.population.spatial.home.locations")
 
     context.config("gtfs_date", "dayWithMostServices")
 
 def execute(context):
     gtfs_path = "%s/output" % context.path("data.gtfs.cleaned")
+    crs = context.stage("synthesis.population.spatial.home.locations").crs
 
     pt2matsim.run(context, "org.matsim.pt2matsim.run.Gtfs2TransitSchedule", [
         gtfs_path,
-        context.config("gtfs_date"), "EPSG:2154",
+        context.config("gtfs_date"), crs,
         "%s/transit_schedule.xml.gz" % context.path(),
         "%s/transit_vehicles.xml.gz" % context.path()
     ])
diff --git a/matsim/scenario/supply/osm.py b/matsim/scenario/supply/osm.py
index 48810217..b9e79d3a 100644
--- a/matsim/scenario/supply/osm.py
+++ b/matsim/scenario/supply/osm.py
@@ -6,11 +6,13 @@ def configure(context):
     context.stage("matsim.runtime.java")
     context.stage("matsim.runtime.pt2matsim")
     context.stage("data.osm.cleaned")
+    context.stage("synthesis.population.spatial.home.locations")
 
     context.config("export_detailed_network", False)
 
 def execute(context):
     osm_path = "%s/output.osm.gz" % context.path("data.osm.cleaned")
+    crs = context.stage("synthesis.population.spatial.home.locations").crs
 
     pt2matsim.run(context, "org.matsim.pt2matsim.run.CreateDefaultOsmConfig", 
         arguments=["config_template.xml"]
@@ -26,7 +28,7 @@ def execute(context):
 
         content = content.replace(
             '<param name="outputCoordinateSystem" value="null" />',
-            '<param name="outputCoordinateSystem" value="EPSG:2154" />'
+            '<param name="outputCoordinateSystem" value="{}" />'.format(crs)
         )
 
         content = content.replace(
diff --git a/matsim/scenario/supply/processed.py b/matsim/scenario/supply/processed.py
index 9679f47e..f75fc130 100644
--- a/matsim/scenario/supply/processed.py
+++ b/matsim/scenario/supply/processed.py
@@ -72,12 +72,6 @@ def execute(context):
     assert(os.path.exists("%s/network.xml.gz" % context.path()))
     assert(os.path.exists("%s/schedule.xml.gz" % context.path()))
 
-    # Run plausibility checks
-    #pt2matsim.run(context, "org.matsim.pt2matsim.run.CheckMappedSchedulePlausibility", [
-    #    "schedule.xml.gz", "network.xml.gz", "EPSG:2154", context.path()
-    #])
-    #assert(os.path.exists("%s/allPlausibilityWarnings.csv" % context.path()))
-
     return dict(
         network_path = "network.xml.gz",
         schedule_path = "schedule.xml.gz",
diff --git a/synthesis/locations/home/addresses.py b/synthesis/locations/home/addresses.py
index 5a34de80..afe0e7e4 100644
--- a/synthesis/locations/home/addresses.py
+++ b/synthesis/locations/home/addresses.py
@@ -57,7 +57,7 @@ def execute(context):
 
     # Put together matched and missing addresses
     df_addresses = pd.concat([df_addresses, df_missing])
-    df_addresses = gpd.GeoDataFrame(df_addresses, crs = "EPSG:2154")
+    df_addresses = gpd.GeoDataFrame(df_addresses, crs = df_buildings.crs)
 
     # Obtain weights for all addresses
     if context.config("home_location_weight") == "housing":
diff --git a/synthesis/output.py b/synthesis/output.py
index b818900f..b970e59b 100644
--- a/synthesis/output.py
+++ b/synthesis/output.py
@@ -178,7 +178,7 @@ def execute(context):
     ]], how = "left", on = ["person_id", "activity_index"])
 
     # Write spatial activities
-    df_spatial = gpd.GeoDataFrame(df_activities, crs = "EPSG:2154")
+    df_spatial = gpd.GeoDataFrame(df_activities, crs = df_locations.crs)
     df_spatial["purpose"] = df_spatial["purpose"].astype(str)
     if "gpkg" in output_formats:
         path = "%s/%sactivities.gpkg" % (output_path, output_prefix)
@@ -244,7 +244,7 @@ def execute(context):
 
     df_spatial = df_spatial.drop(columns = ["preceding_geometry", "following_geometry"])
 
-    df_spatial = gpd.GeoDataFrame(df_spatial, crs = "EPSG:2154")
+    df_spatial = gpd.GeoDataFrame(df_spatial, crs = df_locations.crs)
     df_spatial["following_purpose"] = df_spatial["following_purpose"].astype(str)
     df_spatial["preceding_purpose"] = df_spatial["preceding_purpose"].astype(str)
 
diff --git a/synthesis/population/spatial/locations.py b/synthesis/population/spatial/locations.py
index 25a4dabf..5277fd19 100644
--- a/synthesis/population/spatial/locations.py
+++ b/synthesis/population/spatial/locations.py
@@ -55,6 +55,6 @@ def execute(context):
     assert initial_count == final_count
 
     assert not df_locations["geometry"].isna().any()
-    df_locations = gpd.GeoDataFrame(df_locations, crs = "EPSG:2154")
+    df_locations = gpd.GeoDataFrame(df_locations, crs = df_home.crs)
 
     return df_locations
diff --git a/synthesis/population/spatial/secondary/locations.py b/synthesis/population/spatial/secondary/locations.py
index c837c2c0..df597f47 100644
--- a/synthesis/population/spatial/secondary/locations.py
+++ b/synthesis/population/spatial/secondary/locations.py
@@ -25,6 +25,7 @@ def prepare_locations(context):
     # Load persons and their primary locations
     df_home = context.stage("synthesis.population.spatial.home.locations")
     df_work, df_education = context.stage("synthesis.population.spatial.primary.locations")
+    crs = df_home.crs
 
     df_home = df_home.rename(columns = { "geometry": "home" })
     df_work = df_work.rename(columns = { "geometry": "work" })
@@ -35,7 +36,7 @@ def prepare_locations(context):
     df_locations = pd.merge(df_locations, df_work[["person_id", "work"]], how = "left", on = "person_id")
     df_locations = pd.merge(df_locations, df_education[["person_id", "education"]], how = "left", on = "person_id")
 
-    return df_locations[["person_id", "home", "work", "education"]].sort_values(by = "person_id")
+    return df_locations[["person_id", "home", "work", "education"]].sort_values(by = "person_id"), crs
 
 def prepare_destinations(context):
     df_locations = context.stage("synthesis.locations.secondary")
@@ -76,7 +77,7 @@ def execute(context):
     # Load trips and primary locations
     df_trips = context.stage("synthesis.population.trips").sort_values(by = ["person_id", "trip_index"])
     df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"]
-    df_primary = prepare_locations(context)
+    df_primary, crs = prepare_locations(context)
 
     # Prepare data
     distance_distributions = context.stage("synthesis.population.spatial.secondary.distance_distributions")
@@ -104,7 +105,7 @@ def execute(context):
         batches.append((
             df_trips[df_trips["person_id"].isin(unique_person_ids[index])],
             df_primary[df_primary["person_id"].isin(unique_person_ids[index])],
-            random_seeds[index]
+            random_seeds[index], crs
         ))
 
     # Run algorithm in parallel
@@ -127,7 +128,7 @@ def execute(context):
     return df_locations, df_convergence
 
 def process(context, arguments):
-  df_trips, df_primary, random_seed = arguments
+  df_trips, df_primary, random_seed, crs = arguments
 
   # Set up RNG
   random = np.random.RandomState(context.config("random_seed"))
@@ -195,7 +196,7 @@ def process(context, arguments):
           context.progress.update()
 
   df_locations = pd.DataFrame.from_records(df_locations, columns = ["person_id", "activity_index", "location_id", "geometry"])
-  df_locations = gpd.GeoDataFrame(df_locations, crs = "EPSG:2154")
+  df_locations = gpd.GeoDataFrame(df_locations, crs = crs)
   assert not df_locations["geometry"].isna().any()
 
   df_convergence = pd.DataFrame.from_records(df_convergence, columns = ["valid", "size"])