From 739c2e428856eb401b42e95cedf9fd88bfcb02c6 Mon Sep 17 00:00:00 2001 From: David Hensle <51132108+dhensle@users.noreply.github.com> Date: Thu, 4 Apr 2024 07:20:13 -0700 Subject: [PATCH] BayDAG Contribution #11: School Escorting Estimation Updates (#777) * School escorting estimation updates Most changes are needed to avoid crash if estimation run actually had no school escorting in the input data * blacken * updating to work with Pydantic and State object * adding missed columns necessary for no school escorting * blacken * handling zero escorting cases * removing duplicate code --- activitysim/abm/models/school_escorting.py | 141 +++++++++++++----- .../models/util/school_escort_tours_trips.py | 17 +++ 2 files changed, 118 insertions(+), 40 deletions(-) diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index b3aaf2b60..908113e98 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -58,7 +58,14 @@ def determine_escorting_participants( & (persons.cdap_activity == "M") ] households_with_escortees = escortees["household_id"] - choosers = choosers[choosers.index.isin(households_with_escortees)] + if len(households_with_escortees) == 0: + logger.warning("No households with escortees found!") + else: + tot_households = len(choosers) + choosers = choosers[choosers.index.isin(households_with_escortees)] + logger.info( + f"Proceeding with {len(choosers)} households with escortees out of {tot_households} total households" + ) # can specify different weights to determine chaperones persontype_weight = model_settings.PERSON_WEIGHT @@ -140,7 +147,7 @@ def add_prev_choices_to_choosers( stage_alts, how="left", left_on=escorting_choice, - right_on=stage_alts.index.name, + right_index=True, ) .set_index("household_id") ) @@ -216,8 +223,12 @@ def create_school_escorting_bundles_table(choosers, tours, stage): bundles : pd.DataFrame one school escorting bundle per row """ - # making a table of bundles - choosers = choosers.reset_index() + # want to keep household_id in columns, which is already there if running in estimation mode + if "household_id" in choosers.columns: + choosers = choosers.reset_index(drop=True) + else: + choosers = choosers.reset_index() + # creating a row for every school escorting bundle choosers = choosers.loc[choosers.index.repeat(choosers["nbundles"])] bundles = pd.DataFrame() @@ -460,7 +471,11 @@ def school_escorting( trace_hh_id = state.settings.trace_hh_id - alts = simulate.read_model_alts(state, model_settings.ALTS, set_index="Alt") + # FIXME setting index as "Alt" causes crash in estimation mode... + # happens in joint_tour_frequency_composition too! + # alts = simulate.read_model_alts(state, model_settings.ALTS, set_index="Alt") + alts = simulate.read_model_alts(state, model_settings.ALTS, set_index=None) + alts.index = alts["Alt"].values choosers, participant_columns = determine_escorting_participants( households_merged, persons, model_settings @@ -478,7 +493,9 @@ def school_escorting( for stage_num, stage in enumerate(school_escorting_stages): stage_trace_label = trace_label + "_" + stage estimator = estimation.manager.begin_estimation( - state, "school_escorting_" + stage + state, + model_name="school_escorting_" + stage, + bundle_name="school_escorting", ) model_spec_raw = state.filesystem.read_model_spec( @@ -533,9 +550,26 @@ def school_escorting( if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) - estimator.write_spec(model_settings) - estimator.write_coefficients(coefficients_df, model_settings) + estimator.write_spec(model_settings, tag=stage.upper() + "_SPEC") + estimator.write_coefficients( + coefficients_df, file_name=stage.upper() + "_COEFFICIENTS" + ) estimator.write_choosers(choosers) + estimator.write_alternatives(alts, bundle_directory=True) + + # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column + # shuold we do it here or have interaction_simulate do it? + # chooser index must be duplicated in column or it will be omitted from interaction_dataset + # estimation requires that chooser_id is either in index or a column of interaction_dataset + # so it can be reformatted (melted) and indexed by chooser_id and alt_id + assert choosers.index.name == "household_id" + assert "household_id" not in choosers.columns + choosers["household_id"] = choosers.index + + # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? + estimator.set_alt_id("alt_id") + + estimator.set_chooser_id(choosers.index.name) log_alt_losers = state.settings.log_alt_losers @@ -580,47 +614,74 @@ def school_escorting( if stage_num >= 1: choosers["Alt"] = choices - choosers = choosers.join(alts, how="left", on="Alt") + choosers = choosers.join(alts.set_index("Alt"), how="left", on="Alt") bundles = create_school_escorting_bundles_table( choosers[choosers["Alt"] > 1], tours, stage ) escort_bundles.append(bundles) escort_bundles = pd.concat(escort_bundles) - escort_bundles["bundle_id"] = ( - escort_bundles["household_id"] * 10 - + escort_bundles.groupby("household_id").cumcount() - + 1 - ) - escort_bundles.sort_values( - by=["household_id", "school_escort_direction"], - ascending=[True, False], - inplace=True, - ) - school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours( - state, escort_bundles - ) - chauf_tour_id_map = { - v: k for k, v in school_escort_tours["bundle_id"].to_dict().items() - } - escort_bundles["chauf_tour_id"] = np.where( - escort_bundles["escort_type"] == "ride_share", - escort_bundles["first_mand_tour_id"], - escort_bundles["bundle_id"].map(chauf_tour_id_map), - ) - assert ( - escort_bundles["chauf_tour_id"].notnull().all() - ), f"chauf_tour_id is null for {escort_bundles[escort_bundles['chauf_tour_id'].isna()]}. Check availability conditions." + # Only want to create bundles and tours and trips if at least one household has school escorting + if len(escort_bundles) > 0: + escort_bundles["bundle_id"] = ( + escort_bundles["household_id"] * 10 + + escort_bundles.groupby("household_id").cumcount() + + 1 + ) + escort_bundles.sort_values( + by=["household_id", "school_escort_direction"], + ascending=[True, False], + inplace=True, + ) - tours = school_escort_tours_trips.add_pure_escort_tours(tours, school_escort_tours) - tours = school_escort_tours_trips.process_tours_after_escorting_model( - state, escort_bundles, tours - ) + school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours( + state, escort_bundles + ) + chauf_tour_id_map = { + v: k for k, v in school_escort_tours["bundle_id"].to_dict().items() + } + escort_bundles["chauf_tour_id"] = np.where( + escort_bundles["escort_type"] == "ride_share", + escort_bundles["first_mand_tour_id"], + escort_bundles["bundle_id"].map(chauf_tour_id_map), + ) - school_escort_trips = school_escort_tours_trips.create_school_escort_trips( - escort_bundles - ) + assert ( + escort_bundles["chauf_tour_id"].notnull().all() + ), f"chauf_tour_id is null for {escort_bundles[escort_bundles['chauf_tour_id'].isna()]}. Check availability conditions." + + tours = school_escort_tours_trips.add_pure_escort_tours( + tours, school_escort_tours + ) + tours = school_escort_tours_trips.process_tours_after_escorting_model( + state, escort_bundles, tours + ) + school_escort_trips = school_escort_tours_trips.create_school_escort_trips( + escort_bundles + ) + + else: + # create empty school escort tours & trips tables to be used downstream + tours["school_esc_outbound"] = pd.NA + tours["school_esc_inbound"] = pd.NA + tours["school_escort_direction"] = pd.NA + tours["next_pure_escort_start"] = pd.NA + school_escort_tours = pd.DataFrame(columns=tours.columns) + trip_cols = [ + "household_id", + "person_id", + "tour_id", + "trip_id", + "outbound", + "depart", + "purpose", + "destination", + "escort_participants", + "chauf_tour_id", + "primary_purpose", + ] + school_escort_trips = pd.DataFrame(columns=trip_cols) school_escort_trips["primary_purpose"] = school_escort_trips[ "primary_purpose" diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py index d8b5386eb..e22a7198f 100644 --- a/activitysim/abm/models/util/school_escort_tours_trips.py +++ b/activitysim/abm/models/util/school_escort_tours_trips.py @@ -405,6 +405,19 @@ def merge_school_escort_trips_into_pipeline(state: workflow.State): tours = state.get_dataframe("tours") trips = state.get_dataframe("trips") + # checking to see if there are school escort trips to merge in + if len(school_escort_trips) == 0: + # if no trips, fill escorting columns with NA + trips[ + [ + "escort_participants", + "school_escort_direction", + "school_escort_trip_id", + ] + ] = pd.NA + state.add_table("trips", trips) + return trips + # want to remove stops if school escorting takes place on that half tour so we can replace them with the actual stops out_se_tours = tours[ tours["school_esc_outbound"].isin(["pure_escort", "ride_share"]) @@ -643,6 +656,10 @@ def force_escortee_tour_modes_to_match_chauffeur(state: workflow.State, tours): # Does it even matter if trip modes are getting matched later? escort_bundles = state.get_dataframe("escort_bundles") + if len(escort_bundles) == 0: + # do not need to do anything if no escorting + return tours + # grabbing the school tour ids for each school escort bundle se_tours = escort_bundles[["school_tour_ids", "chauf_tour_id"]].copy() # merging in chauffeur tour mode