Skip to content

Commit

Permalink
CDAP estimation with joint component
Browse files Browse the repository at this point in the history
  • Loading branch information
dhensle committed Dec 16, 2023
1 parent a8e755f commit 8381bff
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 16 deletions.
10 changes: 10 additions & 0 deletions activitysim/abm/models/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id):
for hhsize in range(2, cdap.MAX_HHSIZE + 1):
spec = cdap.get_cached_spec(hhsize)
estimator.write_table(spec, "spec_%s" % hhsize, append=False)
if add_joint_tour_utility:
joint_spec = cdap.get_cached_joint_spec(hhsize)
estimator.write_table(
joint_spec, "joint_spec_%s" % hhsize, append=False
)

logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

Expand Down Expand Up @@ -184,6 +189,11 @@ def cdap_simulate(persons_merged, persons, households, chunk_size, trace_hh_id):
if estimator:
estimator.write_choices(choices)
choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
if add_joint_tour_utility:
hh_joint.index.name = "household_id"
hh_joint = estimator.get_survey_values(
hh_joint, "households", "has_joint_tour"
)
estimator.write_override_choices(choices)
estimator.end_estimation()

Expand Down
143 changes: 127 additions & 16 deletions activitysim/estimation/larch/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@

_logger = logging.getLogger(logger_name)

MAX_HHSIZE = 5

def generate_alternatives(n_persons):

def generate_alternatives(n_persons, add_joint=False):
"""
Generate a dictionary of CDAP alternatives.
Expand All @@ -39,8 +41,14 @@ def generate_alternatives(n_persons):
alt_names = list(
"".join(i) for i in itertools.product(basic_patterns, repeat=n_persons)
)
if add_joint:
pattern = r"[MN]"
joint_alts = [
alt + "J" for alt in alt_names if len(re.findall(pattern, alt)) >= 2
]
alt_names = alt_names + joint_alts
alt_codes = np.arange(1, len(alt_names) + 1)
return Dict(zip(alt_names, alt_codes))
return dict(zip(alt_names, alt_codes))


def apply_replacements(expression, prefix, tokens):
Expand All @@ -67,7 +75,9 @@ def apply_replacements(expression, prefix, tokens):
return expression


def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()):
def cdap_base_utility_by_person(
model, n_persons, spec, alts=None, value_tokens=(), add_joint=False
):
"""
Build the base utility by person for each pattern.
Expand Down Expand Up @@ -100,7 +110,7 @@ def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=
model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"])
else:
if alts is None:
alts = generate_alternatives(n_persons)
alts = generate_alternatives(n_persons, add_joint)
person_numbers = range(1, n_persons + 1)
for pnum in person_numbers:
for i in spec.index:
Expand Down Expand Up @@ -220,13 +230,71 @@ def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficie
model.utility_co[anum] += linear_component


def cdap_split_data(households, values):
def cdap_joint_tour_utility(model, n_persons, alts, joint_coef, values):
"""
FIXME: Not fully implemented!!!!
Code is adapted from the cdap model in ActivitySim with the joint tour component
Structure is pretty much in place, but dependencies need to be filtered out.
"""

for row in joint_coef.itertuples():
for aname, anum in alts.items():
# only adding joint tour utility to alternatives with joint tours
if "J" not in aname:
continue
expression = row.Expression
dependency_name = row.dependency
coefficient = row.coefficient

# dealing with dependencies
if dependency_name in ["M_px", "N_px", "H_px"]:
if "_pxprod" in expression:
prod_conds = row.Expression.split("|")
expanded_expressions = [
tup
for tup in itertools.product(
range(len(prod_conds)), repeat=n_persons
)
]
for expression_tup in expanded_expressions:
expression_list = []
dependency_list = []
for counter in range(len(expression_tup)):
expression_list.append(
prod_conds[expression_tup[counter]].replace(
"xprod", str(counter + 1)
)
)
if expression_tup[counter] == 0:
dependency_list.append(
dependency_name.replace("x", str(counter + 1))
)

expression_value = "&".join(expression_list)
# FIXME only apply to alternative if dependency satisfied
bug
model.utility_co[anum] += X(expression_value) * P(coefficient)

elif "_px" in expression:
for pnum in range(1, n_persons + 1):
dependency_name = row.dependency.replace("x", str(pnum))
expression = row.Expression.replace("x", str(pnum))
# FIXME only apply to alternative if dependency satisfied
bug
model.utility_co[anum] += X(expression) * P(coefficient)

else:
model.utility_co[anum] += X(expression) * P(coefficient)


def cdap_split_data(households, values, add_joint):
if "cdap_rank" not in values:
raise ValueError("assign cdap_rank to values first")
# only process the first 5 household members
values = values[values.cdap_rank <= 5]
values = values[values.cdap_rank <= MAX_HHSIZE]
cdap_data = {}
for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, 5)):
for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, MAX_HHSIZE)):
if hhsize == 1:
v = pd.merge(values, hhs_part.household_id, on="household_id").set_index(
"household_id"
Expand All @@ -239,16 +307,31 @@ def cdap_split_data(households, values):
)
v.columns = [f"p{i[1]}_{i[0]}" for i in v.columns]
for agglom in ["override_choice", "model_choice"]:
v[agglom] = v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]].sum(1)
v[agglom] = (
v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]]
.fillna("H")
.sum(1)
)
if add_joint:
joint_tour_indicator = (
hhs_part.set_index("household_id")
.reindex(v.index)
.has_joint_tour
)
pd.testing.assert_index_equal(v.index, joint_tour_indicator.index)
v[agglom] = np.where(
joint_tour_indicator == 1, v[agglom] + "J", v[agglom]
)
cdap_data[hhsize] = v

return cdap_data


def cdap_dataframes(households, values):
data = cdap_split_data(households, values)
def cdap_dataframes(households, values, add_joint):
data = cdap_split_data(households, values, add_joint)
dfs = {}
for hhsize in data.keys():
alts = generate_alternatives(hhsize)
alts = generate_alternatives(hhsize, add_joint)
dfs[hhsize] = DataFrames(
co=data[hhsize],
alt_names=alts.keys(),
Expand Down Expand Up @@ -296,6 +379,7 @@ def cdap_data(
spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
settings_file="{name}_model_settings.yaml",
chooser_data_file="{name}_values_combined.csv",
joint_coeffs_file="{name}_joint_tour_coefficients.csv",
):
edb_directory = edb_directory.format(name=name)
if not os.path.exists(edb_directory):
Expand Down Expand Up @@ -326,7 +410,7 @@ def read_yaml(filename, **kwargs):
if person_type_map is None:
raise KeyError("PERSON_TYPE_MAP missing from cdap_settings.yaml")

person_rank = cdap.assign_cdap_rank(persons, person_type_map)
# person_rank = cdap.assign_cdap_rank(persons, person_type_map)

coefficients = read_csv(
coefficients_file,
Expand All @@ -341,9 +425,28 @@ def read_yaml(filename, **kwargs):
comment="#",
)

try:
joint_coef = read_csv(
joint_coeffs_file,
# dtype={"interaction_ptypes": str},
# keep_default_na=False,
comment="#",
)
add_joint = True
except FileNotFoundError:
joint_coef = None
add_joint = False
print("Including joint tour utiltiy?:", add_joint)

spec1 = read_csv(spec1_file, comment="#")
values = read_csv(chooser_data_file, comment="#")
values["cdap_rank"] = person_rank
person_rank = cdap.assign_cdap_rank(
persons[persons.household_id.isin(values.household_id)]
.set_index("person_id")
.reindex(values.person_id),
person_type_map,
)
values["cdap_rank"] = person_rank.values

return Dict(
edb_directory=Path(edb_directory),
Expand All @@ -353,6 +456,8 @@ def read_yaml(filename, **kwargs):
coefficients=coefficients,
households=hhs,
settings=settings,
joint_coef=joint_coef,
add_joint=add_joint,
)


Expand All @@ -365,6 +470,7 @@ def cdap_model(
spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
settings_file="{name}_model_settings.yaml",
chooser_data_file="{name}_values_combined.csv",
joint_coeffs_file="{name}_joint_tour_coefficients.csv",
return_data=False,
):
d = cdap_data(
Expand All @@ -377,15 +483,17 @@ def cdap_model(
spec1_file=spec1_file,
settings_file=settings_file,
chooser_data_file=chooser_data_file,
joint_coeffs_file=joint_coeffs_file,
)

households = d.households
values = d.person_data
spec1 = d.spec1
interaction_coef = d.interaction_coef
coefficients = d.coefficients
add_joint = d.add_joint

cdap_dfs = cdap_dataframes(households, values)
cdap_dfs = cdap_dataframes(households, values, add_joint)
m = {}
_logger.info(f"building for model 1")
m[1] = Model(dataservice=cdap_dfs[1])
Expand All @@ -398,12 +506,15 @@ def cdap_model(
interaction_coef["cardinality"] = interaction_coef[
"interaction_ptypes"
].str.len()
for s in [2, 3, 4, 5]:
for s in range(2, MAX_HHSIZE + 1):
# for s in [2, 3, 4, 5]:
_logger.info(f"building for model {s}")
m[s] = Model(dataservice=cdap_dfs[s])
alts = generate_alternatives(s)
alts = generate_alternatives(s, add_joint)
cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns)
cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients)
# if add_joint:
# cdap_joint_tour_utility(m[s], s, alts, d.joint_coef, values)
m[s].choice_any = True
m[s].availability_any = True

Expand Down

0 comments on commit 8381bff

Please sign in to comment.