diff --git a/.gitignore b/.gitignore index 7e902e8..dfeabbf 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -output/* \ No newline at end of file +output/* +extentions/__pycache__/ \ No newline at end of file diff --git a/configs/common/network_los.yaml b/configs/common/network_los.yaml index a7d195a..50dc90b 100644 --- a/configs/common/network_los.yaml +++ b/configs/common/network_los.yaml @@ -35,7 +35,7 @@ maz_to_maz: - maz_maz_walk.csv - maz_maz_bike.csv # maz_to_maz blending distance (missing or 0 means no blending) - max_blend_distance: 2 + # max_blend_distance: 2 skim_time_periods: diff --git a/configs/resident/disaggregate_accessibility.yaml b/configs/resident/disaggregate_accessibility.yaml index f12c209..d89b351 100644 --- a/configs/resident/disaggregate_accessibility.yaml +++ b/configs/resident/disaggregate_accessibility.yaml @@ -1,8 +1,10 @@ # Sampling size. 0 = no limit # can be whole integer value or a sample rate (percent of zones) # zero or missing defaults to full sample! -DESTINATION_SAMPLE_SIZE: 100 -ORIGIN_SAMPLE_SIZE: 5000 +# DESTINATION_SAMPLE_SIZE: 100 +# ORIGIN_SAMPLE_SIZE: 5000 +DESTINATION_SAMPLE_SIZE: 10 +ORIGIN_SAMPLE_SIZE: 50 # select origin zones weighted by population (or another landuse variable) ORIGIN_WEIGHTING_COLUMN: pop diff --git a/configs/resident/logging.yaml b/configs/resident/logging.yaml index 7742c3e..b422d52 100644 --- a/configs/resident/logging.yaml +++ b/configs/resident/logging.yaml @@ -28,7 +28,8 @@ logging: logfile: class: logging.FileHandler - filename: !!python/object/apply:activitysim.core.config.log_file_path ['activitysim.log'] + filename: + get_log_file_path: 'activitysim.log' mode: w formatter: fileFormatter level: NOTSET diff --git a/configs/resident/non_mandatory_tour_scheduling_annotate_tours_preprocessor.csv b/configs/resident/non_mandatory_tour_scheduling_annotate_tours_preprocessor.csv index 09b066e..14079ec 100644 --- a/configs/resident/non_mandatory_tour_scheduling_annotate_tours_preprocessor.csv +++ b/configs/resident/non_mandatory_tour_scheduling_annotate_tours_preprocessor.csv @@ -6,6 +6,6 @@ number of person joint tours,num_person_joint_tours,"reindex_i(joint_tour_partic # included for school escorting model,, flag to denote outbound school escort tours,is_outbound_school_escort_tour,"non_mandatory_tours.index.isin(school_escort_tours[school_escort_tours['school_escort_direction'] == 'outbound'].index)" flag to denote inbound school escort tours,is_inbound_school_escort_tour,"non_mandatory_tours.index.isin(school_escort_tours[school_escort_tours['school_escort_direction'] == 'inbound'].index)" -school escort tour start time,school_escort_tour_start,"reindex(school_escort_tours.start, non_mandatory_tours.index)" -school escort tour next start time,school_escort_tour_next_start,"reindex(school_escort_tours.next_pure_escort_start, non_mandatory_tours.index)" -school escort tour end time,school_escort_tour_end,"reindex(school_escort_tours.end, non_mandatory_tours.index)" \ No newline at end of file +school escort tour start time,school_escort_tour_start,"reindex(school_escort_tours.start, non_mandatory_tours.index).fillna(-1).astype(int)" +school escort tour next start time,school_escort_tour_next_start,"reindex(school_escort_tours.next_pure_escort_start, non_mandatory_tours.index).fillna(-1).astype(int)" +school escort tour end time,school_escort_tour_end,"reindex(school_escort_tours.end, non_mandatory_tours.index).fillna(-1).astype(int)" \ No newline at end of file diff --git a/configs/resident/parking_location_choice.yaml b/configs/resident/parking_location_choice.yaml index 7e5c730..c359c4c 100644 --- a/configs/resident/parking_location_choice.yaml +++ b/configs/resident/parking_location_choice.yaml @@ -21,7 +21,7 @@ TRIP_DEPARTURE_PERIOD: depart TRIP_ORIGIN: origin TRIP_DESTINATION: destination -AUTO_MODES: - - DRIVEALONE - - SHARED2 - - SHARED3 +# AUTO_MODES: +# - DRIVEALONE +# - SHARED2 +# - SHARED3 diff --git a/configs/resident/settings.yaml b/configs/resident/settings.yaml index 8c66330..68c0943 100644 --- a/configs/resident/settings.yaml +++ b/configs/resident/settings.yaml @@ -127,7 +127,7 @@ distributed_time_factor_nonwork_stddev: 0.6 distributed_time_factor_min: 0.1 distributed_time_factor_max: 10 -resume_after: +resume_after: trip_mode_choice models: ### mp_init_proto_pop (single process) diff --git a/configs/resident/settings_mp.yaml b/configs/resident/settings_mp.yaml index 3ca3da8..9b748c4 100644 --- a/configs/resident/settings_mp.yaml +++ b/configs/resident/settings_mp.yaml @@ -98,7 +98,7 @@ multiprocess_steps: slice: tables: - accessibility - except: True # this is needed so landuse (i.e. destinations) doesn't get split + exclude: True # this is needed so landuse (i.e. destinations) doesn't get split - name: mp_households begin: av_ownership slice: diff --git a/configs/resident/transit_pass_subsidy.yaml b/configs/resident/transit_pass_subsidy.yaml index a717359..a886590 100644 --- a/configs/resident/transit_pass_subsidy.yaml +++ b/configs/resident/transit_pass_subsidy.yaml @@ -11,4 +11,3 @@ preprocessor: - accessibility - land_use -CHOOSER_FILTER_COLUMN_NAME: transit_subsidy_available diff --git a/configs/resident/trip_scheduling.yaml b/configs/resident/trip_scheduling.yaml index 5bca29c..124021c 100644 --- a/configs/resident/trip_scheduling.yaml +++ b/configs/resident/trip_scheduling.yaml @@ -3,6 +3,8 @@ # e.g. depart_alt_base = 5 means first column (column 0) represents period 5 DEPART_ALT_BASE: 0 +logic_version: 2 + MAX_ITERATIONS: 100 #FAILFIX: drop_and_cleanup diff --git a/extensions/__pycache__/__init__.cpython-310.pyc b/extensions/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..8f8ff31 Binary files /dev/null and b/extensions/__pycache__/__init__.cpython-310.pyc differ diff --git a/extensions/__pycache__/adjust_auto_operating_cost.cpython-310.pyc b/extensions/__pycache__/adjust_auto_operating_cost.cpython-310.pyc new file mode 100644 index 0000000..ddef776 Binary files /dev/null and b/extensions/__pycache__/adjust_auto_operating_cost.cpython-310.pyc differ diff --git a/extensions/__pycache__/airport_returns.cpython-310.pyc b/extensions/__pycache__/airport_returns.cpython-310.pyc new file mode 100644 index 0000000..fde8766 Binary files /dev/null and b/extensions/__pycache__/airport_returns.cpython-310.pyc differ diff --git a/extensions/__pycache__/av_ownership.cpython-310.pyc b/extensions/__pycache__/av_ownership.cpython-310.pyc new file mode 100644 index 0000000..cae70ff Binary files /dev/null and b/extensions/__pycache__/av_ownership.cpython-310.pyc differ diff --git a/extensions/__pycache__/external_identification.cpython-310.pyc b/extensions/__pycache__/external_identification.cpython-310.pyc new file mode 100644 index 0000000..fa348cf Binary files /dev/null and b/extensions/__pycache__/external_identification.cpython-310.pyc differ diff --git a/extensions/__pycache__/external_location_choice.cpython-310.pyc b/extensions/__pycache__/external_location_choice.cpython-310.pyc new file mode 100644 index 0000000..fbfd681 Binary files /dev/null and b/extensions/__pycache__/external_location_choice.cpython-310.pyc differ diff --git a/extensions/__pycache__/transponder_ownership.cpython-310.pyc b/extensions/__pycache__/transponder_ownership.cpython-310.pyc new file mode 100644 index 0000000..4d7567b Binary files /dev/null and b/extensions/__pycache__/transponder_ownership.cpython-310.pyc differ diff --git a/extensions/adjust_auto_operating_cost.py b/extensions/adjust_auto_operating_cost.py index 2c924f2..0020258 100644 --- a/extensions/adjust_auto_operating_cost.py +++ b/extensions/adjust_auto_operating_cost.py @@ -3,27 +3,23 @@ import numpy as np import pandas as pd -from activitysim.core import ( - config, - inject, - pipeline, -) +from activitysim.core import workflow logger = logging.getLogger(__name__) -@inject.step() -def adjust_auto_operating_cost(vehicles): - """Adjusts the `auto_operating_cost` field in the vehicles table +@workflow.step +def adjust_auto_operating_cost(state: workflow.State, vehicles: pd.DataFrame): + """ + Adjusts the `auto_operating_cost` field in the vehicles table so that the average is a desired value set as costPerMile in the settings Parameters ---------- - vehicles : orca.DataFrameWrapper + vehicles : pd.DataFrame """ - target_auto_operating_cost = config.get_global_constants()["costPerMile"] - vehicles = vehicles.to_frame() + target_auto_operating_cost = state.get_global_constants()["costPerMile"] adjustment_factor = ( target_auto_operating_cost / vehicles["auto_operating_cost"].mean() @@ -35,4 +31,4 @@ def adjust_auto_operating_cost(vehicles): ) vehicles["auto_operating_cost"] *= adjustment_factor - pipeline.replace_table("vehicles", vehicles) + state.add_table("vehicles", vehicles) diff --git a/extensions/airport_returns.py b/extensions/airport_returns.py index ac195cb..b1e7f16 100644 --- a/extensions/airport_returns.py +++ b/extensions/airport_returns.py @@ -3,39 +3,37 @@ import logging import numpy as np +import pandas as pd -from activitysim.core import tracing -from activitysim.core import config -from activitysim.core import pipeline -from activitysim.core import simulate -from activitysim.core import inject -from activitysim.core import expressions - -from activitysim.abm.models.util import estimation +from activitysim.core import ( + config, + tracing, + workflow, +) logger = logging.getLogger(__name__) -@inject.step() -def airport_returns(trips, chunk_size, trace_hh_id): +@workflow.step +def airport_returns( + state: workflow.State, + trips: pd.DataFrame, + model_settings_file_name: str = "airport_returns.yaml", + trace_label: str = "airport_returns", + trace_hh_id: bool = False, +): """ This model updates the airport trip list to include return trips for drop off passengers. The output is a larger trip list duplicating the trips which are dropped off at the airport to return to their origin. The main interface to the airport returns model is the airport_returns() function. """ - - trace_label = "airport_returns" - model_settings_file_name = "airport_returns.yaml" - - trip_list = trips.to_frame() - logger.info("Running %s with %d trips", trace_label, len(trip_list)) + logger.info("Running %s with %d trips", trace_label, len(trips)) model_settings = config.read_model_settings(model_settings_file_name) returning_modes = model_settings["RETURN_MODE_SEGMENTS"] - print(trips.trip_mode.unique()) - trip_returns = trip_list.copy() + trip_returns = trips.copy() trip_returns = trip_returns[trip_returns.trip_mode.isin(returning_modes)] trip_returns["return_origin"] = trip_returns["destination"] trip_returns["return_dest"] = trip_returns["origin"] @@ -49,14 +47,14 @@ def airport_returns(trips, chunk_size, trace_hh_id): ) trip_returns = trip_returns.drop(["return_origin", "return_dest"], axis=1) trip_returns["trip_id"] = np.arange( - trip_list.index.max() + 1, trip_list.index.max() + 1 + len(trip_returns) + trips.index.max() + 1, trips.index.max() + 1 + len(trip_returns) ) trip_returns = trip_returns.set_index("trip_id") - trip_list = trip_list.append(trip_returns) + trips = trips.append(trip_returns) - pipeline.replace_table("trips", trip_list) + state.replace_table("trips", trips) - # tracing.print_summary('airport_returns', trips.returns, value_counts=True) + tracing.print_summary("airport_returns", trips.returns, value_counts=True) - if trace_hh_id: - tracing.trace_df(trip_list, label=trace_label, warn_if_empty=True) + if state.settings.trace_hh_id: + tracing.trace_df(trips, label=trace_label, warn_if_empty=True) diff --git a/extensions/av_ownership.py b/extensions/av_ownership.py index 5947a4e..5ee8bc5 100644 --- a/extensions/av_ownership.py +++ b/extensions/av_ownership.py @@ -3,35 +3,79 @@ import logging import numpy as np - -from activitysim.abm.models.util import estimation -from activitysim.core import config, expressions, inject, pipeline, simulate, tracing +import pandas as pd + +from activitysim.core import ( + config, + expressions, + estimation, + simulate, + tracing, + workflow, +) +from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.logit import LogitComponentSettings logger = logging.getLogger("activitysim") -@inject.step() -def av_ownership(households_merged, households, chunk_size, trace_hh_id): +class AVOwnershipSettings(LogitComponentSettings, extra="forbid"): + """ + Settings for the `transit_pass_subsidy` component. + """ + + preprocessor: PreprocessorSettings | None = None + """Setting for the preprocessor.""" + + AV_OWNERSHIP_ALT: int = 0 + """The column index number of the spec file for owning an autonomous vehicle.""" + + # iterative what-if analysis example + # omit these settings to not iterate + AV_OWNERSHIP_ITERATIONS: int | None = 1 + """Maximum number of auto-calibration iterations to run.""" + AV_OWNERSHIP_TARGET_PERCENT: float | None = 0.0 + """Target percent of households owning an autonomous vehicle.""" + AV_OWNERSHIP_TARGET_PERCENT_TOLERANCE: float | None = 0.01 + """ + Tolerance for the target percent of households owning an autonomous vehicle. + Auto-calibration iterations will stop after achieving tolerance or hitting the max number. + """ + AV_OWNERSHIP_COEFFICIENT_CONSTANT: str | None = "coef_av_target_share" + """Name of the coefficient to adjust in each auto-calibration iteration.""" + + +@workflow.step +def av_ownership( + state: workflow.State, + households_merged: pd.DataFrame, + households: pd.DataFrame, + model_settings: AVOwnershipSettings | None = None, + model_settings_file_name: str = "av_ownership.yaml", + trace_label: str = "av_ownership", + trace_hh_id: bool = False, +) -> None: """ This model predicts whether a household owns an autonomous vehicle. The output from this model is TRUE or FALSE. """ - trace_label = "av_ownership" - model_settings_file_name = "av_ownership.yaml" - - choosers = households_merged.to_frame() - model_settings = config.read_model_settings(model_settings_file_name) + if model_settings is None: + model_settings = AVOwnershipSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + choosers = households_merged logger.info("Running %s with %d households", trace_label, len(choosers)) - estimator = estimation.manager.begin_estimation("av_ownership") + estimator = estimation.manager.begin_estimation(state, "av_ownership") constants = config.get_model_constants(model_settings) - av_ownership_alt = model_settings.get("AV_OWNERSHIP_ALT", 0) + av_ownership_alt = model_settings.AV_OWNERSHIP_ALT # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_d = {} @@ -39,14 +83,15 @@ def av_ownership(households_merged, households, chunk_size, trace_hh_id): locals_d.update(constants) expressions.assign_columns( + state, df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label, ) - model_spec = simulate.read_model_spec(file_name=model_settings["SPEC"]) - coefficients_df = simulate.read_model_coefficients(model_settings) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) + coefficients_df = state.filesystem.read_model_coefficients(model_settings) nest_spec = config.get_logit_model_settings(model_settings) if estimator: @@ -56,15 +101,24 @@ def av_ownership(households_merged, households, chunk_size, trace_hh_id): estimator.write_choosers(choosers) # - iterative single process what-if adjustment if specified - iterations = model_settings.get("AV_OWNERSHIP_ITERATIONS", 1) - iterations_coefficient_constant = model_settings.get( - "AV_OWNERSHIP_COEFFICIENT_CONSTANT", None - ) - iterations_target_percent = model_settings.get("AV_OWNERSHIP_TARGET_PERCENT", None) - iterations_target_percent_tolerance = model_settings.get( - "AV_OWNERSHIP_TARGET_PERCENT_TOLERANCE", 0.01 + iterations = model_settings.AV_OWNERSHIP_ITERATIONS + iterations_coefficient_constant = model_settings.AV_OWNERSHIP_COEFFICIENT_CONSTANT + iterations_target_percent = model_settings.AV_OWNERSHIP_TARGET_PERCENT + iterations_target_percent_tolerance = ( + model_settings.AV_OWNERSHIP_TARGET_PERCENT_TOLERANCE ) + # check to make sure all required settings are specified + assert ( + iterations_coefficient_constant is not None if (iterations > 0) else True + ), "AV_OWNERSHIP_COEFFICIENT_CONSTANT required if AV_OWNERSHIP_ITERATIONS is specified" + assert ( + iterations_target_percent is not None if (iterations > 0) else True + ), "AV_OWNERSHIP_TARGET_PERCENT required if AV_OWNERSHIP_ITERATIONS is specified" + assert ( + iterations_target_percent_tolerance is not None if (iterations > 0) else True + ), "AV_OWNERSHIP_TARGET_PERCENT_TOLERANCE required if AV_OWNERSHIP_ITERATIONS is specified" + for iteration in range(iterations): logger.info( @@ -75,22 +129,23 @@ def av_ownership(households_merged, households, chunk_size, trace_hh_id): ) # re-read spec to reset substitution - model_spec = simulate.read_model_spec(file_name=model_settings["SPEC"]) - model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) + model_spec = simulate.eval_coefficients( + state, model_spec, coefficients_df, estimator + ) choices = simulate.simple_simulate( + state, choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, - chunk_size=chunk_size, trace_label=trace_label, trace_choice_name="av_ownership", estimator=estimator, ) if iterations_target_percent is not None: - # choices_for_filter = choices[choosers[iterations_chooser_filter]] current_percent = (choices == av_ownership_alt).sum() / len(choosers) logger.info( @@ -139,12 +194,11 @@ def av_ownership(households_merged, households, chunk_size, trace_hh_id): estimator.write_override_choices(choices) estimator.end_estimation() - households = households.to_frame() households["av_ownership"] = ( choices.reindex(households.index).fillna(0).astype(bool) ) - pipeline.replace_table("households", households) + state.add_table("households", households) tracing.print_summary("av_ownership", households.av_ownership, value_counts=True) diff --git a/extensions/external_identification.py b/extensions/external_identification.py index be36ce5..2041b8e 100644 --- a/extensions/external_identification.py +++ b/extensions/external_identification.py @@ -5,22 +5,46 @@ import numpy as np import pandas as pd -from activitysim.core import tracing -from activitysim.core import config -from activitysim.core import pipeline -from activitysim.core import simulate -from activitysim.core import inject -from activitysim.core import expressions - -from activitysim.abm.models.util import estimation +from pydantic import validator + +from activitysim.core import ( + config, + expressions, + los, + estimation, + simulate, + tracing, + workflow, +) +from activitysim.core.configuration.logit import LogitComponentSettings +from activitysim.core.configuration.base import PreprocessorSettings logger = logging.getLogger(__name__) -def determine_closest_external_station(choosers, skim_dict, origin_col="home_zone_id"): +class ExternalIdentificationSettings(LogitComponentSettings): + """ + Settings for the `external_identification` component. + """ + + CHOOSER_FILTER_COLUMN_NAME: str | None = None + """Column name which selects choosers.""" + + EXTERNAL_COL_NAME: str | None = None + """Adds this column and set to True if model selects external""" + + INTERNAL_COL_NAME: str | None = None + """Column name set to True if not external but CHOOSER_FILTER_COLUMN_NAME is True""" + + preprocessor: PreprocessorSettings | None = None + + +def determine_closest_external_station( + state, choosers, skim_dict, origin_col="home_zone_id" +): unique_origin_zones = choosers[origin_col].unique() - landuse = inject.get_table("land_use").to_frame() + landuse = state.get_table("land_use") ext_zones = landuse[landuse.external_MAZ > 0].index.to_numpy() choosers["closest_external_zone"] = -1 @@ -46,7 +70,13 @@ def determine_closest_external_station(choosers, skim_dict, origin_col="home_zon def external_identification( - model_settings, estimator, choosers, network_los, chunk_size, trace_label + state, + model_settings, + estimator, + choosers, + network_los, + model_settings_file_name, + trace_label, ): constants = config.get_model_constants(model_settings) @@ -54,42 +84,42 @@ def external_identification( locals_d = {} if constants is not None: locals_d.update(constants) - locals_d.update({"land_use": inject.get_table("land_use").to_frame()}) + locals_d.update({"land_use": state.get_table("land_use")}) skim_dict = network_los.get_default_skim_dict() - # print('skim_dict', skim_dict) - choosers = determine_closest_external_station(choosers, skim_dict) + choosers = determine_closest_external_station(state, choosers, skim_dict) # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: expressions.assign_columns( + state, df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label, ) - model_spec = simulate.read_model_spec(file_name=model_settings["SPEC"]) - coefficients_df = simulate.read_model_coefficients(model_settings) - model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) + coefficients_df = state.filesystem.read_model_coefficients(model_settings) + model_spec = simulate.eval_coefficients( + state, model_spec, coefficients_df, estimator + ) nest_spec = config.get_logit_model_settings(model_settings) if estimator: - estimator.write_model_settings( - model_settings, model_settings["_yaml_file_name"] - ) + estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(choosers) choices = simulate.simple_simulate( + state, choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=locals_d, - chunk_size=chunk_size, trace_label=trace_label, trace_choice_name=trace_label, estimator=estimator, @@ -98,10 +128,17 @@ def external_identification( return choices -@inject.step() +@workflow.step def external_worker_identification( - persons_merged, persons, network_los, chunk_size, trace_hh_id -): + state: workflow.State, + persons: pd.DataFrame, + persons_merged: pd.DataFrame, + network_los: los.Network_LOS, + model_settings: ExternalIdentificationSettings | None = None, + model_settings_file_name: str = "external_worker_identification.yaml", + trace_label: str = "external_worker_identification", + trace_hh_id: bool = False, +) -> None: """ This model predicts the whether a worker has an external work location. The output from this model is TRUE (if external) or FALSE (if internal). @@ -109,25 +146,33 @@ def external_worker_identification( The main interface to the external worker model is the external_worker_identification() function. This function is registered as an orca step in the example Pipeline. """ + if model_settings is None: + model_settings = ExternalIdentificationSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - trace_label = "external_worker_identification" - model_settings_file_name = "external_worker_identification.yaml" - model_settings = config.read_model_settings(model_settings_file_name) - model_settings["_yaml_file_name"] = model_settings_file_name - - estimator = estimation.manager.begin_estimation(trace_label) + estimator = estimation.manager.begin_estimation(state, trace_label) - choosers = persons_merged.to_frame() - filter_col = model_settings.get("CHOOSER_FILTER_COLUMN_NAME") - choosers = choosers[choosers[filter_col]] + filter_col = model_settings.CHOOSER_FILTER_COLUMN_NAME + if filter_col is None: + choosers = persons_merged + else: + choosers = persons_merged[persons_merged[filter_col]] logger.info("Running %s with %d persons", trace_label, len(choosers)) choices = external_identification( - model_settings, estimator, choosers, network_los, chunk_size, trace_label + state, + model_settings, + estimator, + choosers, + network_los, + model_settings_file_name, + trace_label, ) - external_col_name = model_settings["EXTERNAL_COL_NAME"] - internal_col_name = model_settings["INTERNAL_COL_NAME"] + external_col_name = model_settings.EXTERNAL_COL_NAME + internal_col_name = model_settings.INTERNAL_COL_NAME if estimator: estimator.write_choices(choices) @@ -135,13 +180,14 @@ def external_worker_identification( estimator.write_override_choices(choices) estimator.end_estimation() - persons = persons.to_frame() - persons[external_col_name] = ( - (choices == 0).reindex(persons.index).fillna(False).astype(bool) - ) - persons[internal_col_name] = persons[filter_col] & ~persons[external_col_name] + if external_col_name is not None: + persons[external_col_name] = ( + (choices == 0).reindex(persons.index).fillna(False).astype(bool) + ) + if internal_col_name is not None: + persons[internal_col_name] = persons[filter_col] & ~persons[external_col_name] - pipeline.replace_table("persons", persons) + state.add_table("persons", persons) tracing.print_summary( external_col_name, persons[external_col_name], value_counts=True @@ -151,10 +197,17 @@ def external_worker_identification( tracing.trace_df(persons, label=trace_label, warn_if_empty=True) -@inject.step() +@workflow.step def external_student_identification( - persons_merged, persons, network_los, chunk_size, trace_hh_id -): + state: workflow.State, + persons: pd.DataFrame, + persons_merged: pd.DataFrame, + network_los: los.Network_LOS, + model_settings: ExternalIdentificationSettings | None = None, + model_settings_file_name: str = "external_student_identification.yaml", + trace_label: str = "external_student_identification", + trace_hh_id: bool = False, +) -> None: """ This model predicts the whether a student has an external work location. The output from this model is TRUE (if external) or FALSE (if internal). @@ -163,23 +216,33 @@ def external_student_identification( This function is registered as an orca step in the example Pipeline. """ - trace_label = "external_student_identification" - model_settings_file_name = "external_student_identification.yaml" - model_settings = config.read_model_settings(model_settings_file_name) - model_settings["_yaml_file_name"] = model_settings_file_name + if model_settings is None: + model_settings = ExternalIdentificationSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - estimator = estimation.manager.begin_estimation(trace_label) + estimator = estimation.manager.begin_estimation(state, trace_label) - choosers = persons_merged.to_frame() - filter_col = model_settings.get("CHOOSER_FILTER_COLUMN_NAME") - choosers = choosers[choosers[filter_col]] + filter_col = model_settings.CHOOSER_FILTER_COLUMN_NAME + if filter_col is None: + choosers = persons_merged + else: + choosers = persons_merged[persons_merged[filter_col]] + logger.info("Running %s with %d persons", trace_label, len(choosers)) choices = external_identification( - model_settings, estimator, choosers, network_los, chunk_size, trace_label + state, + model_settings, + estimator, + choosers, + network_los, + model_settings_file_name, + trace_label, ) - external_col_name = model_settings["EXTERNAL_COL_NAME"] - internal_col_name = model_settings["INTERNAL_COL_NAME"] + external_col_name = model_settings.EXTERNAL_COL_NAME + internal_col_name = model_settings.INTERNAL_COL_NAME if estimator: estimator.write_choices(choices) @@ -187,13 +250,14 @@ def external_student_identification( estimator.write_override_choices(choices) estimator.end_estimation() - persons = persons.to_frame() - persons[external_col_name] = ( - (choices == 0).reindex(persons.index).fillna(False).astype(bool) - ) - persons[internal_col_name] = persons[filter_col] & ~persons[external_col_name] + if external_col_name is not None: + persons[external_col_name] = ( + (choices == 0).reindex(persons.index).fillna(False).astype(bool) + ) + if internal_col_name is not None: + persons[internal_col_name] = persons[filter_col] & ~persons[external_col_name] - pipeline.replace_table("persons", persons) + state.add_table("persons", persons) tracing.print_summary( external_col_name, persons[external_col_name], value_counts=True @@ -203,25 +267,26 @@ def external_student_identification( tracing.trace_df(persons, label=trace_label, warn_if_empty=True) -def set_external_tour_variables(tours, choices, model_settings, trace_label): +def set_external_tour_variables(state, tours, choices, model_settings, trace_label): """ Set the internal and external tour indicator columns in the tours file """ - external_col_name = model_settings["EXTERNAL_COL_NAME"] - internal_col_name = model_settings["INTERNAL_COL_NAME"] + external_col_name = model_settings.EXTERNAL_COL_NAME + internal_col_name = model_settings.INTERNAL_COL_NAME - tours = tours.to_frame() - - tours.loc[choices.index, external_col_name] = ( - (choices == 0).reindex(tours.index).fillna(False).astype(bool) - ) - tours.loc[choices.index, internal_col_name] = np.where( - tours.loc[choices.index, external_col_name], False, True - ) + if external_col_name is not None: + tours.loc[choices.index, external_col_name] = ( + (choices == 0).reindex(tours.index).fillna(False).astype(bool) + ) + if internal_col_name is not None: + tours.loc[choices.index, internal_col_name] = np.where( + tours.loc[choices.index, external_col_name], False, True + ) # - annotate tours table if "annotate_tours" in model_settings: expressions.assign_columns( + state, df=tours, model_settings=model_settings.get("annotate_tours"), trace_label=tracing.extend_trace_label(trace_label, "annotate_tours"), @@ -230,10 +295,17 @@ def set_external_tour_variables(tours, choices, model_settings, trace_label): return tours -@inject.step() +@workflow.step def external_non_mandatory_identification( - tours_merged, tours, network_los, chunk_size, trace_hh_id -): + state: workflow.State, + tours: pd.DataFrame, + tours_merged: pd.DataFrame, + network_los: los.Network_LOS, + model_settings: ExternalIdentificationSettings | None = None, + model_settings_file_name: str = "external_non_mandatory_identification.yaml", + trace_label: str = "external_non_mandatory_identification", + trace_hh_id: bool = False, +) -> None: """ This model predicts the whether a non-mandatory tour is external. The output from this model is TRUE (if external) or FALSE (if internal). @@ -241,19 +313,24 @@ def external_non_mandatory_identification( The main interface to the external student model is the external_nonmandatory_identification() function. This function is registered as an orca step in the example Pipeline. """ + if model_settings is None: + model_settings = ExternalIdentificationSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - trace_label = "external_non_mandatory_identification" - model_settings_file_name = "external_non_mandatory_identification.yaml" - model_settings = config.read_model_settings(model_settings_file_name) - model_settings["_yaml_file_name"] = model_settings_file_name - - estimator = estimation.manager.begin_estimation(trace_label) + estimator = estimation.manager.begin_estimation(state, trace_label) - choosers = tours_merged.to_frame() - choosers = choosers[choosers["tour_category"] == "non_mandatory"] + choosers = tours_merged[tours_merged["tour_category"] == "non_mandatory"] choices = external_identification( - model_settings, estimator, choosers, network_los, chunk_size, trace_label + state, + model_settings, + estimator, + choosers, + network_los, + model_settings_file_name, + trace_label, ) if estimator: @@ -262,11 +339,13 @@ def external_non_mandatory_identification( estimator.write_override_choices(choices) estimator.end_estimation() - tours = set_external_tour_variables(tours, choices, model_settings, trace_label) + tours = set_external_tour_variables( + state, tours, choices, model_settings, trace_label + ) - pipeline.replace_table("tours", tours) + state.add_table("tours", tours) - external_col_name = model_settings["EXTERNAL_COL_NAME"] + external_col_name = model_settings.EXTERNAL_COL_NAME tracing.print_summary( external_col_name, tours[external_col_name], value_counts=True ) @@ -275,10 +354,17 @@ def external_non_mandatory_identification( tracing.trace_df(tours, label=trace_label, warn_if_empty=True) -@inject.step() +@workflow.step def external_joint_tour_identification( - tours_merged, tours, network_los, chunk_size, trace_hh_id -): + state: workflow.State, + tours: pd.DataFrame, + tours_merged: pd.DataFrame, + network_los: los.Network_LOS, + model_settings: ExternalIdentificationSettings | None = None, + model_settings_file_name: str = "external_joint_tour_identification.yaml", + trace_label: str = "external_joint_tour_identification", + trace_hh_id: bool = False, +) -> None: """ This model predicts the whether a joint tour is external. The output from this model is TRUE (if external) or FALSE (if internal). @@ -286,21 +372,26 @@ def external_joint_tour_identification( The main interface to the external student model is the external_nonmandatory_identification() function. This function is registered as an orca step in the example Pipeline. """ + if model_settings is None: + model_settings = ExternalIdentificationSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - trace_label = "external_joint_tour_identification" - model_settings_file_name = "external_joint_tour_identification.yaml" - model_settings = config.read_model_settings(model_settings_file_name) - model_settings["_yaml_file_name"] = model_settings_file_name - - estimator = estimation.manager.begin_estimation(trace_label) + estimator = estimation.manager.begin_estimation(state, trace_label) - choosers = tours_merged.to_frame() - choosers = choosers[choosers["tour_category"] == "joint"] + choosers = tours_merged[tours_merged["tour_category"] == "joint"] # - if no choosers if choosers.shape[0] > 0: choices = external_identification( - model_settings, estimator, choosers, network_los, chunk_size, trace_label + state, + model_settings, + estimator, + choosers, + network_los, + model_settings_file_name, + trace_label, ) else: # everything is internal, still want to set internal or external columns in df @@ -313,11 +404,13 @@ def external_joint_tour_identification( estimator.write_override_choices(choices) estimator.end_estimation() - tours = set_external_tour_variables(tours, choices, model_settings, trace_label) + tours = set_external_tour_variables( + state, tours, choices, model_settings, trace_label + ) - pipeline.replace_table("tours", tours) + state.add_table("tours", tours) - external_col_name = model_settings["EXTERNAL_COL_NAME"] + external_col_name = model_settings.EXTERNAL_COL_NAME tracing.print_summary( external_col_name, tours[external_col_name], value_counts=True ) diff --git a/extensions/external_location_choice.py b/extensions/external_location_choice.py index ba0ab54..8b2b436 100644 --- a/extensions/external_location_choice.py +++ b/extensions/external_location_choice.py @@ -1,70 +1,94 @@ # ActivitySim # See full license in LICENSE.txt. +from __future__ import annotations + import logging import numpy as np +import pandas as pd -from activitysim.core import tracing -from activitysim.core import config -from activitysim.core import pipeline -from activitysim.core import simulate -from activitysim.core import inject -from activitysim.core import expressions - -from activitysim.abm.models.util import estimation +from activitysim.abm.models.util import logsums as logsum from activitysim.abm.models.util import tour_destination +from activitysim.abm.tables import shadow_pricing +from activitysim.core import ( + config, + expressions, + los, + estimation, + simulate, + tracing, + workflow, +) +from activitysim.core.configuration.logit import ( + TourLocationComponentSettings, + TourModeComponentSettings, +) from activitysim.abm.models.location_choice import ( - iterate_location_choice, write_estimation_specs, + iterate_location_choice, ) - from activitysim.core.util import assign_in_place - logger = logging.getLogger(__name__) -@inject.step() +@workflow.step def external_school_location( - persons_merged, persons, households, network_los, chunk_size, trace_hh_id, locutor + state: workflow.State, + persons_merged: pd.DataFrame, + persons: pd.DataFrame, + households: pd.DataFrame, + network_los: los.Network_LOS, + locutor: bool, + model_settings: TourLocationComponentSettings | None = None, + model_settings_file_name: str = "external_school_location.yaml", + trace_label: str = "external_school_location", ): """ External school location choice model iterate_location_choice adds location choice column and annotations to persons table """ + if model_settings is None: + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - trace_label = "external_school_location" - model_settings = config.read_model_settings("external_school_location.yaml") - - estimator = estimation.manager.begin_estimation("external_school_location") + estimator = estimation.manager.begin_estimation(state, "external_school_location") if estimator: - write_estimation_specs( - estimator, model_settings, "external_school_location.yaml" - ) + write_estimation_specs(estimator, model_settings, model_settings_file_name) persons_df = iterate_location_choice( - model_settings, - persons_merged, - persons, - households, - network_los, - estimator, - chunk_size, - trace_hh_id, - locutor, - trace_label, + state=state, + model_settings=model_settings, + persons_merged=persons_merged, + persons=persons, + households=households, + network_los=network_los, + estimator=estimator, + chunk_size=state.settings.chunk_size, + locutor=locutor, + trace_label=trace_label, ) - pipeline.replace_table("persons", persons_df) + state.add_table("persons", persons_df) if estimator: estimator.end_estimation() -@inject.step() +@workflow.step def external_workplace_location( - persons_merged, persons, households, network_los, chunk_size, trace_hh_id, locutor + state: workflow.State, + persons_merged: pd.DataFrame, + persons: pd.DataFrame, + households: pd.DataFrame, + network_los: los.Network_LOS, + locutor: bool, + model_settings: TourLocationComponentSettings | None = None, + model_settings_file_name: str = "external_workplace_location.yaml", + trace_label: str = "external_workplace_location", ): """ External workplace location choice model @@ -72,62 +96,66 @@ def external_workplace_location( iterate_location_choice adds location choice column and annotations to persons table """ - trace_label = "external_workplace_location" - model_settings = config.read_model_settings("external_workplace_location.yaml") + if model_settings is None: + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - estimator = estimation.manager.begin_estimation("external_workplace_location") + estimator = estimation.manager.begin_estimation( + state, "external_workplace_location" + ) if estimator: - write_estimation_specs( - estimator, model_settings, "external_workplace_location.yaml" - ) + write_estimation_specs(estimator, model_settings, model_settings_file_name) persons_df = iterate_location_choice( - model_settings, - persons_merged, - persons, - households, - network_los, - estimator, - chunk_size, - trace_hh_id, - locutor, - trace_label, + state=state, + model_settings=model_settings, + persons_merged=persons_merged, + persons=persons, + households=households, + network_los=network_los, + estimator=estimator, + chunk_size=state.settings.chunk_size, + locutor=locutor, + trace_label=trace_label, ) - pipeline.replace_table("persons", persons_df) + state.add_table("persons", persons_df) if estimator: estimator.end_estimation() -@inject.step() +@workflow.step def external_non_mandatory_destination( - tours, persons_merged, network_los, chunk_size, trace_hh_id + state: workflow.State, + tours: pd.DataFrame, + persons_merged: pd.DataFrame, + network_los: los.Network_LOS, + model_settings: TourLocationComponentSettings | None = None, + model_settings_file_name: str = "external_non_mandatory_destination.yaml", + trace_label: str = "external_non_mandatory_destination", ): - """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ + if model_settings is None: + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - trace_label = "external_non_mandatory_destination" - model_settings_file_name = "external_non_mandatory_destination.yaml" - model_settings = config.read_model_settings(model_settings_file_name) - - logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME want_logsums = logsum_column_name is not None - sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME want_sample_table = ( - config.setting("want_dest_choice_sample_tables") - and sample_table_name is not None + state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) - tours = tours.to_frame() - - persons_merged = persons_merged.to_frame() - # choosers are tours - in a sense tours are choosing their destination non_mandatory_ext_tours = tours[ (tours.tour_category == "non_mandatory") & (tours.is_external_tour) @@ -139,22 +167,21 @@ def external_non_mandatory_destination( return estimator = estimation.manager.begin_estimation( - "external_non_mandatory_destination" + state, "external_non_mandatory_destination" ) if estimator: estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag="SPEC") - estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) - estimator.write_table( - inject.get_injectable("size_terms"), "size_terms", append=False - ) + estimator.set_alt_id(model_settings.ALT_DEST_COL_NAME) estimator.write_table( - inject.get_table("land_use").to_frame(), "landuse", append=False + state.get_injectable("size_terms"), "size_terms", append=False ) + estimator.write_table(state.get_table("land_use"), "landuse", append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( + state, non_mandatory_ext_tours, persons_merged, want_logsums, @@ -162,8 +189,6 @@ def external_non_mandatory_destination( model_settings, network_los, estimator, - chunk_size, - trace_hh_id, trace_label, ) @@ -183,14 +208,14 @@ def external_non_mandatory_destination( non_mandatory_ext_tours[logsum_column_name] = choices_df["logsum"] assign_in_place(tours, non_mandatory_ext_tours[[logsum_column_name]]) - pipeline.replace_table("tours", tours) + state.add_table("tours", tours) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) - pipeline.extend_table(sample_table_name, save_sample_df) + state.extend_table(sample_table_name, save_sample_df) - if trace_hh_id: + if state.settings.trace_hh_id: tracing.trace_df( tours[tours.tour_category == "non_mandatory"], label="external_non_mandatory_destination", @@ -201,34 +226,35 @@ def external_non_mandatory_destination( ) -@inject.step() +@workflow.step def external_joint_tour_destination( - tours, persons_merged, network_los, chunk_size, trace_hh_id + state: workflow.State, + tours: pd.DataFrame, + persons_merged: pd.DataFrame, + network_los: los.Network_LOS, + model_settings: TourLocationComponentSettings | None = None, + model_settings_file_name: str = "external_joint_tour_destination.yaml", + trace_label: str = "external_joint_tour_destination", ): - """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ + if model_settings is None: + model_settings = TourLocationComponentSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) - trace_label = "external_joint_tour_destination" - model_settings_file_name = "external_joint_tour_destination.yaml" - model_settings = config.read_model_settings(model_settings_file_name) - - logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME") + logsum_column_name = model_settings.DEST_CHOICE_LOGSUM_COLUMN_NAME want_logsums = logsum_column_name is not None - sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME") + sample_table_name = model_settings.DEST_CHOICE_SAMPLE_TABLE_NAME want_sample_table = ( - config.setting("want_dest_choice_sample_tables") - and sample_table_name is not None + state.settings.want_dest_choice_sample_tables and sample_table_name is not None ) - tours = tours.to_frame() - - persons_merged = persons_merged.to_frame() - joint_ext_tours = tours[ (tours.tour_category == "joint") & (tours.get("is_external_tour", False) == True) @@ -239,21 +265,22 @@ def external_joint_tour_destination( tracing.no_results(trace_label) return - estimator = estimation.manager.begin_estimation("external_joint_tour_destination") + estimator = estimation.manager.begin_estimation( + state, "external_joint_tour_destination" + ) if estimator: estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag="SPEC") - estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) - estimator.write_table( - inject.get_injectable("size_terms"), "size_terms", append=False - ) + estimator.set_alt_id(model_settings.ALT_DEST_COL_NAME) estimator.write_table( - inject.get_table("land_use").to_frame(), "landuse", append=False + state.get_injectable("size_terms"), "size_terms", append=False ) + estimator.write_table(state.get_table("land_use"), "landuse", append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( + state, joint_ext_tours, persons_merged, want_logsums, @@ -261,8 +288,6 @@ def external_joint_tour_destination( model_settings, network_los, estimator, - chunk_size, - trace_hh_id, trace_label, ) @@ -282,14 +307,14 @@ def external_joint_tour_destination( joint_ext_tours[logsum_column_name] = choices_df["logsum"] assign_in_place(tours, joint_ext_tours[[logsum_column_name]]) - pipeline.replace_table("tours", tours) + state.add_table("tours", tours) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) - pipeline.extend_table(sample_table_name, save_sample_df) + state.extend_table(sample_table_name, save_sample_df) - if trace_hh_id: + if state.settings.trace_hh_id: tracing.trace_df( tours[tours.tour_category == "non_mandatory"], label="external_joint_tour_destination", diff --git a/extensions/transponder_ownership.py b/extensions/transponder_ownership.py index 03cc765..ed00721 100644 --- a/extensions/transponder_ownership.py +++ b/extensions/transponder_ownership.py @@ -3,23 +3,43 @@ import logging import numpy as np +import pandas as pd + +from activitysim.core import ( + config, + expressions, + estimation, + simulate, + tracing, + workflow, +) +from activitysim.core.configuration.base import PreprocessorSettings +from activitysim.core.configuration.logit import LogitComponentSettings -from activitysim.core import tracing -from activitysim.core import config -from activitysim.core import pipeline -from activitysim.core import simulate -from activitysim.core import inject -from activitysim.core import expressions +logger = logging.getLogger(__name__) -from activitysim.abm.models.util import estimation -logger = logging.getLogger(__name__) +class TransponderOwnershipSettings(LogitComponentSettings): + """ + Settings for the `external_identification` component. + """ + + TRANSPONDER_OWNERSHIP_ALT: int = 1 + """Zero-based index of the column for owning a transponder in the model spec.""" + preprocessor: PreprocessorSettings | None = None -@inject.step() + +@workflow.step def transponder_ownership( - households_merged, households, network_los, chunk_size, trace_hh_id -): + state: workflow.State, + households: pd.DataFrame, + households_merged: pd.DataFrame, + model_settings: TransponderOwnershipSettings | None = None, + model_settings_file_name: str = "transponder_ownership.yaml", + trace_label: str = "transponder_ownership", + trace_hh_id: bool = False, +) -> None: """ This model predicts whether the household owns a transponder. The output from this model is TRUE (if yes) or FALSE (if no) and is stored @@ -28,35 +48,39 @@ def transponder_ownership( The main interface to the Transponder Ownership model is the transponder_ownership() function. This function is registered as an orca step in the example Pipeline. """ + if model_settings is None: + model_settings = TransponderOwnershipSettings.read_settings_file( + state.filesystem, + model_settings_file_name, + ) + transponder_own_alt = model_settings.TRANSPONDER_OWNERSHIP_ALT - trace_label = "transponder_ownership" - model_settings_file_name = "transponder_ownership.yaml" - model_settings = config.read_model_settings(model_settings_file_name) - transponder_own_alt = model_settings["TRANSPONDER_OWNERSHIP_ALT"] - - estimator = estimation.manager.begin_estimation("transponder_ownership") + estimator = estimation.manager.begin_estimation(state, "transponder_ownership") constants = config.get_model_constants(model_settings) - choosers = households_merged.to_frame() + choosers = households_merged logger.info("Running %s with %d households", trace_label, len(choosers)) # - preprocessor - preprocessor_settings = model_settings.get("preprocessor", None) + preprocessor_settings = model_settings.preprocessor if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( + state, df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label, ) - model_spec = simulate.read_model_spec(file_name=model_settings["SPEC"]) - coefficients_df = simulate.read_model_coefficients(model_settings) - model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) + model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) + coefficients_df = state.filesystem.read_model_coefficients(model_settings) + model_spec = simulate.eval_coefficients( + state, model_spec, coefficients_df, estimator + ) nest_spec = config.get_logit_model_settings(model_settings) @@ -67,11 +91,11 @@ def transponder_ownership( estimator.write_choosers(choosers) choices = simulate.simple_simulate( + state, choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, - chunk_size=chunk_size, trace_label=trace_label, trace_choice_name="transponder_ownership", estimator=estimator, @@ -86,15 +110,14 @@ def transponder_ownership( estimator.write_override_choices(choices) estimator.end_estimation() - households = households.to_frame() households["transponder_ownership"] = ( choices.reindex(households.index).fillna(0).astype(bool) ) - pipeline.replace_table("households", households) + state.add_table("households", households) tracing.print_summary( "transponder_ownership", households["transponder_ownership"], value_counts=True ) if trace_hh_id: - tracing.trace_df(households, label=trace_label, warn_if_empty=True) + state.tracing.trace_df(households, label=trace_label, warn_if_empty=True) diff --git a/extensions/update_tables.py b/extensions/update_tables.py deleted file mode 100644 index edf646a..0000000 --- a/extensions/update_tables.py +++ /dev/null @@ -1,269 +0,0 @@ -# ActivitySim -# See full license in LICENSE.txt. -import git -import logging -import os -import yaml - -import numpy as np -import pandas as pd - -from activitysim.core import config, inject, pipeline, tracing -from activitysim.core.config import setting - -# from io import StringIO - -logger = logging.getLogger("activitysim") - - -def find_git_folder(code_folder, path_level): - """ - Returns the path to the .git folder - - Parameters - ---------- - code folder: str - path_level: str - - Returns - ------- - git_dir: str - The path to the .git folder. - """ - - git_dir = os.path.abspath(os.path.join(code_folder, path_level)) - git_folder = os.path.join(git_dir, ".git") - return git_folder - - -def get_commit_info(repo_path): - """ - Returns a dictionary containing the short commit hash and branch name of the Git repository - at the specified path. - - Parameters - ---------- - repo_path (str): The path to the Git repository. - - Returns - ------- - dict: A dictionary with the following keys: - - short_commit_hash (str): The first 7 characters of the current commit hash. - - branch_name (str): The name of the current active branch. - If the repository path is not a Git repository, both values will be empty strings. - """ - - commit_hash = "" - branch_name = "" - - if os.path.isdir(repo_path): - try: - repo = git.Repo(repo_path) - if repo.head.is_valid(): - commit_hash = repo.head.commit.hexsha[:7] - if not repo.head.is_detached: - branch_name = repo.active_branch.name - else: - branch_name = repo.active_branch.name - branch_file = open(repo_path + "\\refs\\heads\\" + branch_name, "r") - commit_hash = branch_file.read()[:7] - # commit_hash = branch_file.read(7) - branch_file.close() - except (git.InvalidGitRepositoryError, AttributeError, FileNotFoundError): - pass - - return {"short_commit_hash": commit_hash, "branch_name": branch_name} - - -def write_metadata(prefix): - - output_dir = inject.get_injectable("output_dir") - - # repo branch name and commit hash: activitysim - asim_git_folder = find_git_folder(pipeline.__file__, "../../..") - asim_commit_info = get_commit_info(asim_git_folder) - - # repo branch name and commit hash: abm3 - abm_git_path = os.path.abspath( - os.path.join(output_dir, "..", "..", "git_info.yaml") - ) - if os.path.isfile(abm_git_path): - with open(abm_git_path, "r") as stream: - abm_git_info = yaml.safe_load(stream) - abm_git_info["commit"] = abm_git_info["commit"][:7] - else: - abm_git_info = {"branch": "", "commit": ""} - - trip_settings = config.read_model_settings("write_trip_matrices.yaml") - constants = trip_settings.get("CONSTANTS") - - model_metadata_dict = { - "asim_branch_name": asim_commit_info["branch_name"], - "asim_commit_hash": asim_commit_info["short_commit_hash"], - "abm_branch_name": abm_git_info["branch"], - "abm_commit_hash": abm_git_info["commit"], - "constants": constants, - "prefix": prefix, - } - model_metadata_path = os.path.join(output_dir, "model_metadata.yaml") - with open(model_metadata_path, "w") as file: - yaml.dump(model_metadata_dict, file, default_flow_style=False) - - -def remove_columns(table_settings, df): - # remove columns from a final table - setting = "remove_columns" - if setting not in table_settings: - return df - remove_cols = table_settings[setting] - - remove_filter = df.filter(remove_cols) - df_removed = df.drop(columns=remove_filter) - # df_removed.name = df.name - - return df_removed - - -def reorder_columns(table_settings, df): - # reorder columns in a final table - setting = "reorder_columns" - if setting not in table_settings: - return df - reorder_cols = table_settings[setting] - - # index will not get reordered - if df.index.name in reorder_cols: - reorder_cols.remove(df.index.name) - - existing_cols = df.columns.values.tolist() - for col in existing_cols: - if col not in reorder_cols: - reorder_cols.append(col) - - for col in reorder_cols: - if col not in existing_cols: - df[col] = np.nan - - df_reorder = df[reorder_cols] - # df_reorder.name = df.name - - return df_reorder - - -def rename_columns(table_settings, df): - # rename columns in a final table - setting = "rename_columns" - if setting not in table_settings: - return df - rename_cols = table_settings[setting] - - df_rename = df.rename(columns=rename_cols) - - return df_rename - - -def get_output_table_names(output_tables_settings, output_tables_settings_name): - """ """ - action = output_tables_settings.get("action") - tables = output_tables_settings.get("tables") - registered_tables = pipeline.registered_tables() - if action == "include": - # interpret empty or missing tables setting to mean include all registered tables - output_tables_list = tables if tables is not None else registered_tables - elif action == "skip": - output_tables_list = [t for t in registered_tables if t not in tables] - else: - raise "expected %s action '%s' to be either 'include' or 'skip'" % ( - output_tables_settings_name, - action, - ) - return output_tables_list - - -@inject.step() -def update_tables(): - # get list of model outputs to update - output_dir = inject.get_injectable("output_dir") - input_dir = os.path.abspath(os.path.join(output_dir, "..", "..", "input")) - # input_dir = inject.get_injectable("data_dir") - output_tables_settings_name = "output_tables" - output_tables_settings = setting(output_tables_settings_name) - if output_tables_settings is None: - logger.info("No output_tables specified in settings file. Nothing to update.") - return - output_tables_list = get_output_table_names( - output_tables_settings, output_tables_settings_name - ) - - common_settings_file_name = "..\common\outputs.yaml" - common_settings = config.read_model_settings(common_settings_file_name) - - for table_name in output_tables_list: - if not isinstance(table_name, str): - table_name = table_name["tablename"] - - if not ( - table_name in common_settings - or table_name == "households" - or table_name == "vehicles" - or table_name == "persons" - ): - continue - - output_table = pipeline.get_table(table_name) - - # set sample rate to float - if table_name == "households": - output_table["sample_rate"] = output_table["sample_rate"].astype(float) - input_households = pd.read_csv( - os.path.join(input_dir, "households.csv"), - usecols=["hhid", "poverty"], - dtype={"hhid": "int32", "poverty": "float"}, - ) - output_table = output_table.merge( - input_households, how="inner", left_on="household_id", right_on="hhid" - ) - - # split vehicle_type column - if table_name == "vehicles": - output_table[ - ["vehicle_category", "num_occupants", "fuel_type"] - ] = output_table["vehicle_type"].str.split(pat="_", expand=True) - # output_table.drop(columns={'vehicle_type'}, inplace=True) ## TODO decide whether to drop column here or in bronze -> silver filter - # add missing columns from input persons file - if table_name == "persons": - input_persons = pd.read_csv( - os.path.join(input_dir, "persons.csv"), - usecols=[ - "perid", - "miltary", - "grade", - "weeks", - "hours", - "rac1p", - "hisp", - ], - dtype={ - "perid": "int32", - "miltary": "int8", - "grade": "int8", - "weeks": "int8", - "hours": "int8", - "rac1p": "int8", - "hisp": "int8", - }, - ) - output_table = output_table.merge( - input_persons, how="inner", left_on="person_id", right_on="perid" - ) - - if table_name in common_settings: - table_settings = common_settings[table_name] - output_table = remove_columns(table_settings, output_table) - output_table = reorder_columns(table_settings, output_table) - output_table = rename_columns(table_settings, output_table) - - pipeline.replace_table(table_name, output_table) - - prefix = output_tables_settings.get("prefix", "final_") - write_metadata(prefix)