Skip to content

Commit

Permalink
Merge pull request #151 from HERA-Team/multi-lst-blchunk
Browse files Browse the repository at this point in the history
Multi lst blchunk
  • Loading branch information
jsdillon authored Jul 24, 2024
2 parents e152a31 + 285b7ee commit 584fe78
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 7 deletions.
34 changes: 28 additions & 6 deletions hera_opm/mf_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import warnings
import toml
from pathlib import Path
import math
from itertools import product


def get_jd(filename):
Expand Down Expand Up @@ -1336,7 +1338,9 @@ def build_analysis_makeflow_from_config(
return


def make_lstbin_config_file(config, outdir: str) -> int:
def make_lstbin_config_file(
config, outdir: str, bl_chunk_size: int | None = None
) -> int:
# This must be a TOML file that specifies how to construct the LSTbin file-config
lstconfig = config["FILE_CFG"]

Expand All @@ -1358,7 +1362,16 @@ def make_lstbin_config_file(config, outdir: str) -> int:

lst_file_config.write(lstbin_config_file)

return lstbin_config_file, len(lst_file_config.matched_files)
# Split up the baselines into chunks that will be LST-binned together.
# This is just to save on RAM.
if bl_chunk_size is None:
bl_chunk_size = len(lst_file_config.antpairs)
else:
bl_chunk_size = min(bl_chunk_size, len(lst_file_config.antpairs))

n_bl_chunks = int(math.ceil(len(lst_file_config.antpairs) / bl_chunk_size))

return lstbin_config_file, len(lst_file_config.matched_files), n_bl_chunks


def build_lstbin_makeflow_from_config(
Expand Down Expand Up @@ -1403,7 +1416,9 @@ def build_lstbin_makeflow_from_config(

# Also write a YAML version of just the parameters, to be used to run
# the notebook.
cfg_opts = toml.load(config_file)["LSTAVG_OPTS"]
cfg_opts = config["LSTAVG_OPTS"]
# Interpolate the parameters
cfg_opts = {k: get_config_entry(config, "LSTAVG_OPTS", k) for k in cfg_opts}
lstavg_config = outdir / "lstavg-config.toml"
with open(lstavg_config, "w") as fl:
toml.dump(cfg_opts, fl)
Expand Down Expand Up @@ -1447,7 +1462,13 @@ def build_lstbin_makeflow_from_config(
base_mem, base_cpu, mail_user, default_queue, batch_system
)

lstbin_config_file, nfiles = make_lstbin_config_file(config, outdir)
bl_chunk_size = get_config_entry(
config, "LSTBIN_OPTS", "bl_chunk_size", required=False
)

lstbin_config_file, nfiles, nbl_chunks = make_lstbin_config_file(
config, outdir, bl_chunk_size=bl_chunk_size
)
config["LSTBIN_OPTS"]["lstconf"] = str(lstbin_config_file.absolute())

if not parallelize:
Expand Down Expand Up @@ -1485,13 +1506,14 @@ def build_lstbin_makeflow_from_config(
)

# loop over output files
for output_file_index in range(nfiles):
for output_file_index, bl_chunk in product(range(nfiles), range(nbl_chunks)):
# if parallize, update output_file_select
if parallelize:
config["LSTBIN_OPTS"]["output_file_select"] = str(output_file_index)
config["LSTBIN_OPTS"]["output_blchnk_select"] = str(bl_chunk)

# make outfile list
outfile = Path(f"{output_file_index:04}.LSTBIN.out")
outfile = Path(f"{output_file_index:04}.b{bl_chunk:03}.LSTBIN.out")

# get args list for lst-binning step
args = [
Expand Down
16 changes: 15 additions & 1 deletion hera_opm/tests/test_lstbin_makeflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def make_lstbin_config_file(
lstbin_opts=None,
file_cfg=None,
lstavg_opts=None,
bl_chunk_size: int | None = 5000,
):
"""Make a lstbin config file."""
options = {
Expand All @@ -36,6 +37,7 @@ def make_lstbin_config_file(
"parallelize": True,
"outdir": str(fl.parent),
"parent_dir": str(fl.parent),
"bl_chunk_size": bl_chunk_size,
},
**(lstbin_opts or {}),
}
Expand All @@ -55,7 +57,6 @@ def make_lstbin_config_file(
lstavg_opts = {
**{
"outdir": "../data",
"bl_chunk_size": 5000,
"fname_format": "{inpaint_mode}/zen.{kind}.{lst:7.5f}.sum.uvh5",
"overwrite": True,
"write_med_mad": True,
Expand Down Expand Up @@ -104,6 +105,18 @@ def lsttoml_direct_datafiles(tmp_path_factory) -> Path:
return fl


@pytest.fixture(scope="module")
def lsttoml_direct_datafiles_blchunk_none(tmp_path_factory) -> Path:
"""Make a direct lstbin config file."""
fl = tmp_path_factory.mktemp("data") / "lstbin_direct.toml"
make_lstbin_config_file(
fl,
datafiles=["zen.2458043.40141.HH.uvh5", "zen.2458043.40887.HH.uvh5"],
bl_chunk_size=None,
)
return fl


@pytest.fixture(scope="module")
def lsttoml_direct_datafiles_glob(tmp_path_factory) -> Path:
"""Make a direct lstbin config file."""
Expand Down Expand Up @@ -158,6 +171,7 @@ def lsttoml_notebook_datafiles(
"config_file",
[
"lsttoml_direct_datafiles",
"lsttoml_direct_datafiles_blchunk_none",
"lsttoml_direct_datafiles_glob",
"lsttoml_notebook_datafiles",
],
Expand Down

0 comments on commit 584fe78

Please sign in to comment.