Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handling of mexico-city survey data for scenario generation #26

Draft
wants to merge 16 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions matsim/scenariogen/__main__.py
simei94 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@

import argparse

from .data import run_create_ref_data
from .data import run_extract_activities
from .data import run_lookup_regiostar
from .network import run_collect_results
from .network import run_edges as sumo_edges
from .network import run_intersections as sumo_intersections
from .network import run_routes as sumo_routes
from .network import run_train_model
# from matsim.scenariogen.data import run_create_ref_data
from data import run_extract_activities
# from matsim.scenariogen.data import run_lookup_regiostar
# from matsim.scenariogen.network import run_collect_results
# from matsim.scenariogen.network import run_edges as sumo_edges
# from matsim.scenariogen.network import run_intersections as sumo_intersections
# from matsim.scenariogen.network import run_routes as sumo_routes
# from matsim.scenariogen.network import run_train_model


def _add(subparsers, m):
Expand All @@ -28,11 +28,11 @@ def main():

subparsers = parser.add_subparsers(title="Subcommands")

_add(subparsers, sumo_edges)
_add(subparsers, sumo_routes)
_add(subparsers, sumo_intersections)
_add(subparsers, run_train_model)
_add(subparsers, run_collect_results)
# _add(subparsers, sumo_edges)
# _add(subparsers, sumo_routes)
# _add(subparsers, sumo_intersections)
# _add(subparsers, run_train_model)
# _add(subparsers, run_collect_results)

try:
from .network import run_opt_freespeed
Expand All @@ -41,8 +41,8 @@ def main():
print("Opt freespeed not available", e)

_add(subparsers, run_extract_activities)
_add(subparsers, run_create_ref_data)
_add(subparsers, run_lookup_regiostar)
# _add(subparsers, run_create_ref_data)
# _add(subparsers, run_lookup_regiostar)

args = parser.parse_args()

Expand Down
25 changes: 19 additions & 6 deletions matsim/scenariogen/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _batch(iterable: list, max_batch_size: int):
def read_all(dirs: Union[str, List[str]], regio=None) -> Tuple[pd.DataFrame]:
""" Scan directories and read everything into one dataframe """

from .formats import srv, mid
from .formats import srv, mid, eodmx

hh = []
pp = []
Expand All @@ -42,7 +42,7 @@ def read_all(dirs: Union[str, List[str]], regio=None) -> Tuple[pd.DataFrame]:

for d in dirs:

for format in (srv, mid):
for format in (srv, mid, eodmx):

files = []

Expand Down Expand Up @@ -112,14 +112,16 @@ class HouseholdType(AutoNameLowerStrEnum):
MULTI_W_CHILDREN = auto()
MULTI_WO_CHILDREN = auto()
SINGLE = auto()
UNKNOWN = auto()


class EconomicStatus(AutoNameLowerStrEnum):
simei94 marked this conversation as resolved.
Show resolved Hide resolved
VERY_LOW = auto()
# VERY_LOW = auto()
LOW = auto()
MEDIUM = auto()
MEDIUMLOW= auto()
MEDIUMHIGH= auto()
HIGH = auto()
VERY_HIGH = auto()
# VERY_HIGH = auto()
UNKNOWN = auto()


Expand Down Expand Up @@ -189,6 +191,7 @@ class TripMode(AutoNameLowerStrEnum):
PT = auto()
MOTORCYCLE = auto()
OTHER = auto()
COLECTIVO = auto()
simei94 marked this conversation as resolved.
Show resolved Hide resolved


class DistanceGroup(AutoNameLowerStrEnum):
Expand Down Expand Up @@ -262,6 +265,7 @@ class SourceDestinationGroup(AutoNameLowerStrEnum):
OTHER_WORK = auto()
WORK_OTHER = auto()
OTHER_OTHER = auto()
VISIT_OTHER = auto()

UNKNOWN = auto()

Expand Down Expand Up @@ -309,6 +313,7 @@ class Person:
present_on_day: bool
reporting_day: int
n_trips: int
home_district: str = ""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should belong to the household ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Household already has location and geometry. Is an additional attribute needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, BUT for the simple routing in the next activity sampling step (because survey data does not provide leg length) this information is needed. It is added to the persons, because I do not want to have to read the whole households.csv in the next step just for one parameter (as the persons / activities datasets already are huge files).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see the problem, but I generally don't like duplicating information. CSV reading should be superfast, is it really a concern?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we are talking about 4GB combined only for persons.csv and activities.csv already.. Therefore I cannot run it on my hardware and have to run it on the math cluster, which is annoying for debugging and testing. You have to take into account that we are talking about an area with about 20 million inhabitants, which is way above what we are usually handling (Berlin Brandenburg e.g.)



@dataclass
Expand All @@ -327,16 +332,24 @@ class Trip:
purpose: Purpose
sd_group: SourceDestinationGroup
valid: bool
dep_district: str = ""
simei94 marked this conversation as resolved.
Show resolved Hide resolved
arr_district: str = ""
arrival: int = 0
simei94 marked this conversation as resolved.
Show resolved Hide resolved


@dataclass
class Activity:
""" Activity information (including leg) """
# all leg information relates to the leg leading to the activity
simei94 marked this conversation as resolved.
Show resolved Hide resolved
a_id: str
p_id: str
p_index: str
simei94 marked this conversation as resolved.
Show resolved Hide resolved
n: int
type: Purpose
duration: int
leg_dist: float
leg_duration: float
leg_mode: TripMode
leg_dep_district: str = ""
leg_arr_district: str = ""
leg_departure: int = 0
start_time: int = 0
2 changes: 1 addition & 1 deletion matsim/scenariogen/data/formats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# -*- coding: utf-8 -*-

__all__ = ["srv", "mid"]
__all__ = ["srv", "mid", "eodmx"]
Loading