From 45d63a66eff6cbc64632554d5377e347924dab64 Mon Sep 17 00:00:00 2001 From: Rajendra Adhikari Date: Tue, 13 Aug 2024 14:10:19 -0500 Subject: [PATCH 1/3] Skip writing ts metadata since it causes timeout --- buildstockbatch/postprocessing.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index 2ec5a44c..4461cc2e 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -379,19 +379,6 @@ def write_metadata_files(fs, parquet_root_dir, partition_columns): parquet.write_metadata(sch, f"{parquet_root_dir}/_common_metadata", filesystem=fs) logger.info(f"Written _common_metadata to {parquet_root_dir}") - if partition_columns: - partition_glob = "/".join([f"{c}*" for c in partition_columns]) - glob_str = f"{parquet_root_dir}/up*/{partition_glob}/*.parquet" - else: - glob_str = f"{parquet_root_dir}/up*/*.parquet" - - logger.info(f"Gathering all the parquet files in {glob_str}") - concat_files = fs.glob(glob_str) - logger.info(f"Gathered {len(concat_files)} files. Now writing _metadata") - parquet_root_dir = Path(parquet_root_dir).as_posix() - create_metadata_file(concat_files, root_dir=parquet_root_dir, engine="pyarrow", fs=fs) - logger.info(f"_metadata file written to {parquet_root_dir}") - def combine_results(fs, results_dir, cfg, do_timeseries=True): """Combine the results of the batch simulations. From 81d74f518c2caaadaa09c6a9cb3b8b4437da1ba0 Mon Sep 17 00:00:00 2001 From: Rajendra Adhikari Date: Tue, 13 Aug 2024 14:16:47 -0500 Subject: [PATCH 2/3] Update changelog --- docs/changelog/changelog_dev.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst index fd6dbdbe..5178193f 100644 --- a/docs/changelog/changelog_dev.rst +++ b/docs/changelog/changelog_dev.rst @@ -102,3 +102,10 @@ Development Changelog exposes optional ``include_annual_bills`` (defaults to true) and ``include_monthly_bills`` (defaults to false) arguments for reporting annual and monthly utility bill outputs, respectively. + + .. change:: + :tags: general, bugfix + :pullreq: 464 + + Stop creating dask _metadata files for the timeseries parquet files since it crashes the + postprocessing. From ce0781e9178327446e337899eb7b28455b4ccdb4 Mon Sep 17 00:00:00 2001 From: Rajendra Adhikari Date: Tue, 13 Aug 2024 14:43:38 -0500 Subject: [PATCH 3/3] Fix tests --- buildstockbatch/test/test_base.py | 5 ----- buildstockbatch/test/test_local.py | 1 - 2 files changed, 6 deletions(-) diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 5dbefcbd..25ff4347 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -218,11 +218,6 @@ def test_upload_files(mocker, basic_residential_project_file): assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) - s3_file_path = s3_path + "timeseries/_metadata" - source_file_path = os.path.join(source_path, "timeseries", "_metadata") - assert (source_file_path, s3_file_path) in files_uploaded - files_uploaded.remove((source_file_path, s3_file_path)) - s3_file_path = s3_path + "buildstock_csv/buildstock.csv" source_file_path = str(buildstock_csv_path) assert (source_file_path, s3_file_path) in files_uploaded diff --git a/buildstockbatch/test/test_local.py b/buildstockbatch/test/test_local.py index 6c409598..2256109a 100644 --- a/buildstockbatch/test/test_local.py +++ b/buildstockbatch/test/test_local.py @@ -89,7 +89,6 @@ def test_resstock_local_batch(project_filename): assert (upg["completed_status"] == "Success").all() assert upg.shape[0] == n_datapoints assert (ts_pq_path / "_common_metadata").exists() - assert (ts_pq_path / "_metadata").exists() shutil.rmtree(out_path)