Skip to content

Commit

Permalink
categorisation education : new distribution of education od with age …
Browse files Browse the repository at this point in the history
…range
  • Loading branch information
Marie Laurent committed Jul 25, 2024
1 parent 569f840 commit 30709d3
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 20 deletions.
12 changes: 11 additions & 1 deletion data/od/cleaned.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,22 @@ def execute(context):

assert not np.any(df_work["commute_mode"].isna())

# Clean age range for education
df_education["age_range"] = np.nan
df_education.loc[df_education["AGEREV10"] <= 6, "age_range"] = "primary_school"
df_education.loc[df_education["AGEREV10"] == 11, "age_range"] = "middle_school"
df_education.loc[df_education["AGEREV10"] == 15, "age_range"] = "high_school"
df_education.loc[df_education["AGEREV10"] >= 18, "age_range"] = "higher_education"
df_education["age_range"] = df_education["age_range"].astype("category")

assert not np.any(df_education["age_range"].isna())

# Aggregate the flows
print("Aggregating work ...")
df_work = df_work.groupby(["origin_id", "destination_id", "commute_mode"])["weight"].sum().reset_index()

print("Aggregating education ...")
df_education = df_education.groupby(["origin_id", "destination_id"])["weight"].sum().reset_index()
df_education = df_education.groupby(["origin_id", "destination_id","age_range"])["weight"].sum().reset_index()

df_work["weight"] = df_work["weight"].fillna(0.0)
df_education["weight"] = df_education["weight"].fillna(0.0)
Expand Down
3 changes: 2 additions & 1 deletion data/od/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def execute(context):
"COMMUNE":"str",
"ARM":"str",
"IPONDI":"float",
"DCETUF":"str"
"DCETUF":"str",
"AGEREV10":"int"
}

with zipfile.ZipFile(
Expand Down
32 changes: 20 additions & 12 deletions data/od/weighted.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,24 @@
def configure(context):
context.stage("data.od.cleaned")
context.stage("data.spatial.codes")
context.config("output_path")
context.config("education_location_source","bpe")

def fix_origins(df, commune_ids, purpose):
def fix_origins(df, commune_ids, purpose,category):
existing_ids = set(np.unique(df["origin_id"]))
missing_ids = commune_ids - existing_ids
categories = set(np.unique(df[category]))

rows = []
for origin_id in missing_ids:
for destination_id in commune_ids:
rows.append((origin_id, destination_id, 1.0 if origin_id == destination_id else 0.0))
for category_name in categories :
rows.append((origin_id, destination_id, category_name, 1.0/len(categories) if origin_id == destination_id else 0.0))

print("Fixing %d origins for %s" % (len(missing_ids), purpose))

return pd.concat([df, pd.DataFrame.from_records(
rows, columns = ["origin_id", "destination_id", "weight"]
rows, columns = ["origin_id", "destination_id", category, "weight"]
)]).sort_values(["origin_id", "destination_id"])

def execute(context):
Expand All @@ -35,25 +39,29 @@ def execute(context):
# Load data
df_work, df_education = context.stage("data.od.cleaned")

# Aggregate work (we do not consider different modes at the moment)
df_work = df_work[["origin_id", "destination_id", "weight"]].groupby(["origin_id", "destination_id"]).sum().reset_index()

# Add missing origins
df_work = fix_origins(df_work, commune_ids, "work")
df_education = fix_origins(df_education, commune_ids, "education")
df_work = fix_origins(df_work, commune_ids, "work","commute_mode")
df_education = fix_origins(df_education, commune_ids, "education","age_range")

# Aggregate work (we do not consider different modes at the moment)
df_work = df_work[["origin_id", "destination_id", "weight"]].groupby(["origin_id", "destination_id"]).sum().reset_index()

# Compute totals
df_total = df_work[["origin_id", "weight"]].groupby("origin_id").sum().reset_index().rename({ "weight" : "total" }, axis = 1)
df_work = pd.merge(df_work, df_total, on = "origin_id")

df_total = df_education[["origin_id", "weight"]].groupby("origin_id").sum().reset_index().rename({ "weight" : "total" }, axis = 1)
df_education = pd.merge(df_education, df_total, on = "origin_id")

df_total = df_education[["origin_id","age_range", "weight"]].groupby(["origin_id","age_range"]).sum().reset_index().rename({ "weight" : "total" }, axis = 1)
df_education = pd.merge(df_education, df_total, on = ["origin_id","age_range"])

if context.config("education_location_source") == 'bpe':
# Aggregate education (we do not consider different age range with bpe source)
df_education = df_education[["origin_id", "destination_id", "weight","total"]].groupby(["origin_id", "destination_id"]).sum().reset_index()
# Compute weight
df_work["weight"] /= df_work["total"]
df_education["weight"] /= df_education["total"]

del df_work["total"]
del df_education["total"]

df_education = df_education.fillna(0.0)

return df_work, df_education
7 changes: 3 additions & 4 deletions synthesis/locations/education.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,12 @@ def execute(context):
df_education["fake"] = False
df_education = df_education.to_crs("2154")
list_type = set(df_education["TYPEQU"].unique())
df_locations = pd.concat([df_locations[~(df_locations["TYPEQU"].isin(list_type))],df_education])
df_locations = pd.concat([df_locations[~(df_locations["TYPEQU"].str.startswith(tuple(list_type)))],df_education[df_education["commune_id"].isin(required_communes)]])


# Add education destinations in function of level education
for c in ["C1", "C2", "C3"]:
missing_communes = required_communes - set(
df_locations[df_locations["TYPEQU"].str.startswith(c)]["commune_id"].unique())
missing_communes = required_communes - set(df_locations[df_locations["TYPEQU"].str.startswith(c)]["commune_id"].unique())

if len(missing_communes) > 0:
df_locations = pd.concat([df_locations,fake_education(missing_communes, c, df_locations, df_zones)])
Expand All @@ -93,7 +92,7 @@ def execute(context):

if len(missing_communes) > 0:

df_locations = pd.concat([df_locations,fake_education(missing_communes, "C4", df_locations, df_zones)])
df_locations = pd.concat([df_locations,fake_education(missing_communes, "C4", df_locations, df_zones)])
else :
missing_communes = required_communes - set(df_locations["commune_id"].unique())
if len(missing_communes) > 0:
Expand Down
4 changes: 2 additions & 2 deletions synthesis/population/spatial/primary/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def configure(context):
context.stage("synthesis.population.spatial.home.zones")
context.stage("synthesis.population.enriched")
context.stage("synthesis.population.trips")

context.config("output_path")
context.config("random_seed")
context.config("education_location_source", "bpe")

Expand Down Expand Up @@ -149,7 +149,7 @@ def execute(context):
df_education.append(
process(context, "education_" + prefix, random,
df_persons[df_persons["age"].between( education_type["min_age"],education_type["max_age"])],
df_education_od,df_locations[df_locations["TYPEQU"].str.startswith(education_type["type_edu"])])
df_education_od[df_education_od["age_range"]==prefix],df_locations[df_locations["TYPEQU"].str.startswith(education_type["type_edu"])])
)
df_education = pd.concat(df_education).sort_values(["origin_id", "destination_id"])

Expand Down

0 comments on commit 30709d3

Please sign in to comment.