From 88d66a17c5bec15351d5aee305c9dcaf853a275c Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Thu, 25 Apr 2024 09:48:22 -0500 Subject: [PATCH] better exercise script --- exercise.py | 230 +++++++++++++++++++++++++++------------------------- 1 file changed, 121 insertions(+), 109 deletions(-) diff --git a/exercise.py b/exercise.py index d0a3c04..d0c4d7c 100644 --- a/exercise.py +++ b/exercise.py @@ -23,118 +23,130 @@ def _exercise_path(dirname) -> Path: return Path(os.path.dirname(__file__)).joinpath(dirname) -# Download compressed archive of full-scale data, if not already present -# If the full scale data is already available, the whole data download process -# can be commented out and skipped. +GET_FULL_DATA = True full_data_dir = _exercise_path("data-full") -archive_sha256 = { - 11: "5b0c7ad009115830fbedaee9dd33981b3bab23b3b7177a7a0a8f3c871decf989", - 10: "c35b7c7f83be21159b20da8185cb5bd78b812378b424c20bdc1f5df51b283921", - 4: "13f04db524324b48b9244e872cc997c97a0a920548ce5d5ef3fe4af7f09e9517", - 3: "a6751f10aee9deec531582862368519f70f6d6f03f344bf82397d099d36537e1", - 2: "ee1cdab914dcba7a0feb01b65822c6160be3f60aba29f797259c7dd0a2a40d3a", - 5: "1663dfaf6eda027850f7c79a783d646bdc58a6a588bc1308fc31ea9cb3f85f2d", - 12: "35b664eedaece82b9ae0167664e618a7daa4079ca99ebf8cc01dd61d7ff4a51b", - 15: "776fed4a3bd01e98caa4aea4f36c99465af252d5ae1a66d66897496271805e35", - 14: "0cce0a90ac662e2d40d89abd28640dd5457e74f0b1a4eabd5f53f747955fe335", - 13: "767cc029fe36e0a67446cf822c4e34525053a0857228efb2f50c17785e88a7a1", - 9: "6e49b3738acfd0778becaf984f570906cfbc600e14f2de0d03c627f95c61afde", - 0: "adeae9915a0402b87937ba19e6732396d29813cec64bb1b1d2c66e336ca349a4", - 7: "3df78f56eb383c4adba4e32ccf78700151e71a8271f3f525317574cdcb61adbb", - 6: "a204166e2875368314cae7070fcd77591603c75565986dfb4b171b1e08400c4f", - 1: "4322559d96c7c1521760f875da7aeb92b9dae7a19824717f5c1ae086623f15a0", - 8: "7dfb447beaf4e5fc2f9656c7e6427a1149f3a0f3c5f6f7c285b508a279d7eab5", -} - -full_data_sha256 = { - "persons.parquet": "f41434b49d87aa9bb19296c5ae271c25a07356e3e370659a6230017c82c881b7", - "transit_skims_PM.omx": "20d9af6f6be2f78ce81f817aca01eb05611a7a1702e9de896db8a918af11421f", - "maz_maz_walk.parquet": "8759bdae920e6f507120e68eab3ead2e7738e240c22512469bc734cc95bb7c59", - "traffic_skims_MD.omx": "5cdd041d4324f7898b17b22555af65201ed323cebaf7ba34a0351df981db733c", - "households.parquet": "c75156d739ae71b01e0d3be7563b04a115b987bd8e8587173d7957aab58f4a89", - "transit_skims_MD.omx": "535309745b79ad8a71601228b4bb6824e2996632fe177fe9e8cd7b56693eef4d", - "traffic_skims_PM.omx": "434996674399cdfd1073c4d24cd8b3a5691c541f97386a8d29c4833b9ce85c7f", - "traffic_skims_EA.omx": "fe097d769c373bd37ab24f57ff102c70213055aae73be9a5a9c3d5d762bf2f0e", - "traffic_skims_EV.omx": "91c762df3288867a050395691cf1f13b9850f6e8ea55163730f8fceb4f8fca98", - "transit_skims_AM.omx": "7fc26ce47bfc4c6844a6fcd193d4808dda711bf2a8e02a788b33ba21d000b88f", - "maz_stop_walk.parquet": "f94fe8690db2342546be3592da4f20088bce7b026749f626a66ce2126555946e", - "maz_maz_bike.parquet": "9ad9f5108b5dd88d893bc2cb56354400fe29749310d39919bc9e88e9b5ddb036", - "land_use.parquet": "a2b41246fbfed8250e9fcda0853da1bd33a05cf5d0699f965a93759e39c8071b", - "transit_skims_EV.omx": "6fcec702b5d4ebc01e88b5dec075fcc9b7ee6c26d32a751dc3b99536e80a336d", - "transit_skims_EA.omx": "96111a202d4d2630fd2e749f9a3f96dbc6f314d4a3abb4d5cb92f0ff1337d6d0", - "traffic_skims_AM.omx": "e31e1005897eaf30e3415b12c93696d88fbc42ee6c9b75f35178ad196f0eb80f", -} - -download_required = False - -for filename, sha256 in full_data_sha256.items(): - f = full_data_dir.joinpath(filename) - print("checking", f) - if not f.exists() or sha256 != sha256_checksum(f): - download_required = True - break - -if download_required: - print("downloading full data...") - download_external_example( - _exercise_path("."), - name="sandag-abm3", - assets={ - f"data-full.tar.zst.part{i:03}": { - "url": f"https://github.com/ActivitySim/sandag-abm3-example/releases/download/v0.2.0/sandag-full-data.tar.zst.part{i:03}", - "sha256": sha256, - } - for i, sha256 in archive_sha256.items() - }, - ) - untarzst( - _exercise_path("sandag-abm3/data-full.tar.zst.part000"), - full_data_dir, - ) - # recheck sha256 +def get_full_data(): + """ + Download the full-scale data archive and extract it. + + This function downloads the full-scale data archive, if not already present, + and verifies that the data is correct by checking the sha256 checksum of each + file. If the full scale data is already available, the whole data download + process can be skipped by setting the global variable GET_FULL_DATA to False. + """ + archive_sha256 = { + 11: "5b0c7ad009115830fbedaee9dd33981b3bab23b3b7177a7a0a8f3c871decf989", + 10: "c35b7c7f83be21159b20da8185cb5bd78b812378b424c20bdc1f5df51b283921", + 4: "13f04db524324b48b9244e872cc997c97a0a920548ce5d5ef3fe4af7f09e9517", + 3: "a6751f10aee9deec531582862368519f70f6d6f03f344bf82397d099d36537e1", + 2: "ee1cdab914dcba7a0feb01b65822c6160be3f60aba29f797259c7dd0a2a40d3a", + 5: "1663dfaf6eda027850f7c79a783d646bdc58a6a588bc1308fc31ea9cb3f85f2d", + 12: "35b664eedaece82b9ae0167664e618a7daa4079ca99ebf8cc01dd61d7ff4a51b", + 15: "776fed4a3bd01e98caa4aea4f36c99465af252d5ae1a66d66897496271805e35", + 14: "0cce0a90ac662e2d40d89abd28640dd5457e74f0b1a4eabd5f53f747955fe335", + 13: "767cc029fe36e0a67446cf822c4e34525053a0857228efb2f50c17785e88a7a1", + 9: "6e49b3738acfd0778becaf984f570906cfbc600e14f2de0d03c627f95c61afde", + 0: "adeae9915a0402b87937ba19e6732396d29813cec64bb1b1d2c66e336ca349a4", + 7: "3df78f56eb383c4adba4e32ccf78700151e71a8271f3f525317574cdcb61adbb", + 6: "a204166e2875368314cae7070fcd77591603c75565986dfb4b171b1e08400c4f", + 1: "4322559d96c7c1521760f875da7aeb92b9dae7a19824717f5c1ae086623f15a0", + 8: "7dfb447beaf4e5fc2f9656c7e6427a1149f3a0f3c5f6f7c285b508a279d7eab5", + } + + full_data_sha256 = { + "persons.parquet": "f41434b49d87aa9bb19296c5ae271c25a07356e3e370659a6230017c82c881b7", + "transit_skims_PM.omx": "20d9af6f6be2f78ce81f817aca01eb05611a7a1702e9de896db8a918af11421f", + "maz_maz_walk.parquet": "8759bdae920e6f507120e68eab3ead2e7738e240c22512469bc734cc95bb7c59", + "traffic_skims_MD.omx": "5cdd041d4324f7898b17b22555af65201ed323cebaf7ba34a0351df981db733c", + "households.parquet": "c75156d739ae71b01e0d3be7563b04a115b987bd8e8587173d7957aab58f4a89", + "transit_skims_MD.omx": "535309745b79ad8a71601228b4bb6824e2996632fe177fe9e8cd7b56693eef4d", + "traffic_skims_PM.omx": "434996674399cdfd1073c4d24cd8b3a5691c541f97386a8d29c4833b9ce85c7f", + "traffic_skims_EA.omx": "fe097d769c373bd37ab24f57ff102c70213055aae73be9a5a9c3d5d762bf2f0e", + "traffic_skims_EV.omx": "91c762df3288867a050395691cf1f13b9850f6e8ea55163730f8fceb4f8fca98", + "transit_skims_AM.omx": "7fc26ce47bfc4c6844a6fcd193d4808dda711bf2a8e02a788b33ba21d000b88f", + "maz_stop_walk.parquet": "f94fe8690db2342546be3592da4f20088bce7b026749f626a66ce2126555946e", + "maz_maz_bike.parquet": "9ad9f5108b5dd88d893bc2cb56354400fe29749310d39919bc9e88e9b5ddb036", + "land_use.parquet": "a2b41246fbfed8250e9fcda0853da1bd33a05cf5d0699f965a93759e39c8071b", + "transit_skims_EV.omx": "6fcec702b5d4ebc01e88b5dec075fcc9b7ee6c26d32a751dc3b99536e80a336d", + "transit_skims_EA.omx": "96111a202d4d2630fd2e749f9a3f96dbc6f314d4a3abb4d5cb92f0ff1337d6d0", + "traffic_skims_AM.omx": "e31e1005897eaf30e3415b12c93696d88fbc42ee6c9b75f35178ad196f0eb80f", + } + + download_required = False + for filename, sha256 in full_data_sha256.items(): - if not full_data_dir.joinpath(filename).exists(): - raise ValueError(f"data missing: {filename}") - if sha256 != sha256_checksum(full_data_dir.joinpath(filename)): - raise ValueError(f"data error: {filename}") -else: - print("full data ready") - -########################### -### MODEL EXERCISE ### -########################### - -out_dir = _exercise_path("exercise-output-25k") -out_dir.mkdir(exist_ok=True) -out_dir.joinpath(".gitignore").write_text("**\n") - -settings = dict( - cleanup_pipeline_after_run=False, - treat_warnings_as_errors=False, - households_sample_size=100_000, - chunk_size=0, - use_shadow_pricing=True, - sharrow="require", - recode_pipeline_columns=True, - memory_profile=True, -) - -state = workflow.State.make_default( - configs_dir=( - _exercise_path(r"configs/common"), - _exercise_path(r"configs/resident"), - ), - data_dir=_exercise_path("data-full"), - output_dir=out_dir, - settings=settings, -) -state.import_extensions("../extensions") -state.filesystem.persist_sharrow_cache() -assert state.settings.memory_profile == True - -assert state.settings.chunk_size == 0 + f = full_data_dir.joinpath(filename) + print("checking", f) + if not f.exists() or sha256 != sha256_checksum(f): + download_required = True + break + + if download_required: + print("downloading full data...") + download_external_example( + _exercise_path("."), + name="sandag-abm3", + assets={ + f"data-full.tar.zst.part{i:03}": { + "url": f"https://github.com/ActivitySim/sandag-abm3-example/releases/download/v0.2.0/sandag-full-data.tar.zst.part{i:03}", + "sha256": sha256, + } + for i, sha256 in archive_sha256.items() + }, + ) + untarzst( + _exercise_path("sandag-abm3/data-full.tar.zst.part000"), + full_data_dir, + ) + # recheck sha256 + for filename, sha256 in full_data_sha256.items(): + if not full_data_dir.joinpath(filename).exists(): + raise ValueError(f"data missing: {filename}") + if sha256 != sha256_checksum(full_data_dir.joinpath(filename)): + raise ValueError(f"data error: {filename}") + else: + print("full data ready") + + +def main(**settings): + """ + Run the full-scale model exercise. + """ + out_dir = _exercise_path("exercise-output") + out_dir.mkdir(exist_ok=True) + out_dir.joinpath(".gitignore").write_text("**\n") + + state = workflow.State.make_default( + configs_dir=( + _exercise_path(r"configs/common"), + _exercise_path(r"configs/resident"), + ), + data_dir=_exercise_path("data-full"), + output_dir=out_dir, + settings=settings, + ) + state.import_extensions("../extensions") + state.filesystem.persist_sharrow_cache() + state.run.all() + return state + if __name__ == "__main__": - state.run.all() + + if GET_FULL_DATA or not full_data_dir.exists(): + get_full_data() + + # Modify the settings value here to alter the default settings + # defined in the various config files. + state = main( + cleanup_pipeline_after_run=False, + treat_warnings_as_errors=False, + households_sample_size=100_000, + chunk_size=0, + use_shadow_pricing=True, + sharrow="require", + recode_pipeline_columns=True, + memory_profile=True, + )