Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[uwtools_integration] Integrate ics/lbcs #266

Open
wants to merge 44 commits into
base: uwtools_integration
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
6bdab4c
initial chgres_cube config
WeirAE Aug 21, 2024
46a3227
update coldstart.yaml
WeirAE Aug 21, 2024
1af2517
second pass on passing files
WeirAE Aug 27, 2024
b7884cc
progress continues adding functionality
WeirAE Sep 4, 2024
b188301
update secondary YAML logic
WeirAE Sep 5, 2024
4753089
first version edits complete
WeirAE Sep 17, 2024
93ac362
config_defaults.yaml formatting fixes
WeirAE Sep 18, 2024
e437b3d
yaml fixes to complete build
WeirAE Sep 18, 2024
bee31be
try some changes in chgres_cube.py
WeirAE Sep 18, 2024
e661d1c
add changes from PR 264
WeirAE Sep 19, 2024
7360ec1
Merge branch 'uwtools_integration' into integrate_ics_lbcs
WeirAE Sep 19, 2024
ed58157
revert unintended changes
WeirAE Sep 19, 2024
8dde45f
also revert utils.py
WeirAE Sep 19, 2024
9f16342
Revert remaining formatting
WeirAE Sep 19, 2024
d6c3786
fix local issues
WeirAE Sep 19, 2024
4eff96a
fix YAML formatting
WeirAE Sep 19, 2024
3b65a6c
Fix remaining YAML syntax issues
WeirAE Sep 19, 2024
7116141
Undo prior incorrect sync and refix YAML
WeirAE Sep 19, 2024
c310cb8
local YAML and path fixes
WeirAE Sep 20, 2024
9a0ab65
progress resolving dereferencing
WeirAE Sep 20, 2024
226d335
fixes for loop logic
WeirAE Sep 25, 2024
8a3ed18
fixes for lbcs
WeirAE Oct 7, 2024
8b7dd87
fix external model yaml issues
WeirAE Oct 7, 2024
2f709e3
Merge remote-tracking branch 'refs/remotes/origin/integrate_ics_lbcs'…
WeirAE Oct 8, 2024
b83c5af
move tracers to external_model_defaults
WeirAE Oct 10, 2024
d9701bf
significant logic changes
WeirAE Oct 15, 2024
fc59204
Fix file copy logic
WeirAE Oct 16, 2024
12fb677
fix output directories
WeirAE Oct 18, 2024
f485be8
All fundamental tests pass
WeirAE Oct 24, 2024
fccb757
fix extra files 1
WeirAE Oct 24, 2024
7bc7732
First attempt to fix erroneous changes
WeirAE Oct 24, 2024
80bca46
remove spurious uwtools directory
WeirAE Oct 24, 2024
4300a3e
Fix missing newline
WeirAE Oct 24, 2024
91e15dd
update cleaner
WeirAE Oct 24, 2024
7d81efb
Merge remote-tracking branch 'refs/remotes/origin/integrate_ics_lbcs'…
WeirAE Oct 24, 2024
25fd2b4
Apply suggestions from code review
WeirAE Oct 25, 2024
1460cff
Merge remote-tracking branch 'gsl/uwtools_integration' into integrate…
WeirAE Oct 29, 2024
0e2f6e0
add back several changes lost in conflict resolutions
WeirAE Oct 29, 2024
093fbf0
additional residual formatting fixes
WeirAE Oct 30, 2024
affa5d4
fixes for running
WeirAE Oct 30, 2024
d74b410
Additional fixes
WeirAE Oct 30, 2024
86db971
move file handling to external_model_defaults.yaml
WeirAE Oct 30, 2024
d9488d9
clean dereferencing, fix file errors
WeirAE Oct 31, 2024
fb398c0
found missing file path
WeirAE Oct 31, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions parm/wflow/coldstart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ metatask_run_ensemble:
mem: '{% if global.DO_ENSEMBLE %}{%- for m in range(1, global.NUM_ENS_MEMBERS+1) -%}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}'
task_make_ics_mem#mem#:
<<: *default_task
command: '&LOAD_MODULES_RUN_TASK; "make_ics" "&JOBSdir;/JREGIONAL_MAKE_ICS"'
command: !cycstr 'source &USHdir;/load_modules_wflow.sh hera ; python &SCRIPTSdir;/chgres_cube.py
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
-c &GLOBAL_VAR_DEFNS_FP;
--cycle @Y-@m-@dT@H:@M:@S
--key-path task_make_ics
--mem #mem#'
envars:
<<: *default_vars
SLASH_ENSMEM_SUBDIR: '&SLASH_ENSMEM_SUBDIR;'
Expand Down Expand Up @@ -124,7 +128,11 @@ metatask_run_ensemble:

task_make_lbcs_mem#mem#:
<<: *default_task
command: '&LOAD_MODULES_RUN_TASK; "make_lbcs" "&JOBSdir;/JREGIONAL_MAKE_LBCS"'
command: !cycstr 'source &USHdir;/load_modules_wflow.sh hera ; python &SCRIPTSdir;/chgres_cube.py
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
-c &GLOBAL_VAR_DEFNS_FP;
--cycle @Y-@m-@dT@H:@M:@S
--key-path task_make_lbcs
--mem #mem#'
envars:
<<: *default_vars
SLASH_ENSMEM_SUBDIR: '&SLASH_ENSMEM_SUBDIR;'
Expand Down
288 changes: 288 additions & 0 deletions scripts/chgres_cube.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
#!/usr/bin/env python
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
"""
The run script for chgres_cube
"""

import datetime as dt
import logging
import os
import re
import sys
from argparse import ArgumentParser
from copy import deepcopy
from pathlib import Path

from uwtools.api.chgres_cube import ChgresCube
from uwtools.api.config import get_yaml_config
from uwtools.api.fs import link as uwlink
from uwtools.api.logging import use_uwtools_logger


def _parse_var_defns(file):
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
var_dict = {}
with open(file, "r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if "=" in line:
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()

if value.startswith("(") and value.endswith(")"):
items = re.findall(r"\((.*?)\)", value)
if items:
value = [item.strip() for item in items[0].split()]
var_dict[key] = value
return var_dict


def _walk_key_path(config, key_path):
"""
Navigate to the sub-config at the end of the path of given keys.
"""
keys = []
pathstr = "<unknown>"
for key in key_path:
keys.append(key)
pathstr = " -> ".join(keys)
try:
subconfig = config[key]
except KeyError:
logging.error(f"Bad config path: {pathstr}")
raise
if not isinstance(subconfig, dict):
logging.error(f"Value at {pathstr} must be a dictionary")
sys.exit(1)
config = subconfig
return config


def parse_args(argv):
"""
Parse arguments for the script.
"""
parser = ArgumentParser(
description="Script that runs chgres_cube via uwtools API",
)
parser.add_argument(
"-c",
"--config-file",
metavar="PATH",
required=True,
help="Path to experiment config file.",
type=Path,
)
parser.add_argument(
"--cycle",
help="The cycle in ISO8601 format (e.g. 2024-07-15T18)",
required=True,
type=dt.datetime.fromisoformat,
)
parser.add_argument(
"--key-path",
help="Dot-separated path of keys leading through the config to the driver's YAML block",
metavar="KEY[.KEY...]",
required=True,
type=lambda s: s.split("."),
)
parser.add_argument(
"--member",
default="000",
help="The 3-digit ensemble member number.",
)
return parser.parse_args(argv)


# pylint: disable=too-many-locals, too-many-statements, too-many-branches
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# pylint: disable=too-many-locals, too-many-statements, too-many-branches
# pylint: disable-next=too-many-locals, too-many-statements, too-many-branches

I'm not sure this will work -- whether disable-next will apply to the next block and not just the next line, for those suppressors that relate to this function. If not, this line should probably move to the top of the module (after the #!) because, without -next, it applies to every line of code until the end of the module, and I think we should make this kind of interference with the linter more visibly obvious.

But some of this is also not bad advice from the linter: This function in 177 lines long. Could it be refactored into smaller, purpose-specific functions for better readability? I see what look like section comments describing various (maybe) unrelated operations, each of which could be its own function, which would then be somewhat self-documenting, e.g. we could change

# Extract driver config from experiment config

to function call

driver = get_driver(expt_config_cp)

and factor out the code to that function. If nothing else, there is a giant if / else block where the two alternatives might better be encapsulated in two functions.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm leaving this open for now to see what you think of the current shorter version. It might be a cleaner version as well if I moved the file linking or the loop off to a helper function?

def run_chgres_cube(config_file, cycle, key_path, member):
"""
Setup and run the chgres_cube Driver.
"""

# dereference expressions during driver initialization
expt_config = get_yaml_config(config_file)
CRES = expt_config["workflow"]["CRES"]
os.environ["CRES"] = CRES
os.environ["MEMBER"] = member

# set universal variables
cyc = str(expt_config["workflow"]["DATE_FIRST_CYCL"])[8:10]
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
dot_ensmem = (
f".mem{member}"
if (
expt_config["user"]["RUN_ENVIR"] == "nco"
and expt_config["global"]["DO_ENSEMBLE"]
and member
)
else ""
)
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
nco_net = expt_config["nco"]["NET_default"]

# Extract driver config from experiment config
chgres_cube_driver = ChgresCube(
config=config_file,
cycle=cycle,
key_path=key_path,
)
rundir = Path(chgres_cube_driver.config["rundir"])
print(f"Will run in {rundir}")

# Dereference cycle for file paths
expt_config_cp = get_yaml_config(deepcopy(expt_config.data))
expt_config_cp.dereference(
context={
"cycle": cycle,
**expt_config_cp,
}
)
chgres_cube_config = _walk_key_path(expt_config_cp, key_path)
input_type = chgres_cube_config["chgres_cube"]["namelist"]["update_values"][
"config"
].get("input_type")

# update config for ics task, run and stage data
if "task_make_ics" in key_path:
varsfilepath = chgres_cube_config["input_files_metadata_path"]
shconfig = _parse_var_defns(varsfilepath)
extrn_config_fns = shconfig["EXTRN_MDL_FNS"]
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
extrn_config_fhrs = shconfig["EXTRN_MDL_FHRS"]
WeirAE marked this conversation as resolved.
Show resolved Hide resolved

if input_type == "grib2":
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
fn_grib2 = extrn_config_fns[0]
update = {"grib2_file_input_grid": fn_grib2}
else:
fn_atm = extrn_config_fns[0]
fn_sfc = extrn_config_fns[1]
update = {"atm_files_input_grid": fn_atm, "sfc_files_input_grid": fn_sfc}
if expt_config["task_get_extrn_ics"]["EXTRN_MDL_NAME_ICS"] in [
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
"HRRR",
"RAP",
]:
if expt_config["workflow"]["SDF_USES_RUC_LSM"] is True:
update["nsoill_out"] = 9
else:
if expt_config["workflow"]["SDF_USES_THOMPSON_MP"] is True:
update["thomp_mp_climo_file"] = expt_config["workflow"][
"THOMPSON_MP_CLIMO_FP"

]

update_cfg = {
"task_make_ics": {
"chgres_cube": {"namelist": {"update_values": {"config": update}}}
}
}
expt_config_cp.update_from(update_cfg)

# reinstantiate driver
chgres_cube_driver = ChgresCube(
config=expt_config_cp,
cycle=cycle,
key_path=key_path,
)
chgres_cube_driver.run()

# Deliver output data to a common location above the rundir.
links = {}
tile_rgnl = expt_config["constants"]["TILE_RGNL"]
nh0 = expt_config["constants"]["NH0"]

output_dir = os.path.join(rundir.parent, "INPUT")
os.makedirs(output_dir, exist_ok=True)
links[
f"{nco_net}.t{cyc}z{dot_ensmem}.gfs_data.tile{tile_rgnl}.halo{nh0}.nc"
] = str(rundir / f"out.atm.tile{tile_rgnl}.nc")
links[
f"{nco_net}.t{cyc}z{dot_ensmem}.sfc_data.tile{tile_rgnl}.halo{nh0}.nc"
] = str(rundir / f"out.sfc.tile{tile_rgnl}.nc")
links[f"{nco_net}.t{cyc}z.gfs_ctrl.nc"] = str(rundir / f"gfs_ctrl.nc")
links[f"{nco_net}.t{cyc}z{dot_ensmem}.gfs_bndy.tile{tile_rgnl}.f000.nc"] = str(
rundir / f"gfs.bndy.nc"
)
WeirAE marked this conversation as resolved.
Show resolved Hide resolved
uwlink(target_dir=output_dir, config=links)

# update config for lbcs task, loop run and stage data
else:
fn_sfc = ""
varsfilepath = chgres_cube_config["input_files_metadata_path"]
shconfig = _parse_var_defns(varsfilepath)
extrn_config_fns = shconfig["EXTRN_MDL_FNS"]
extrn_config_fhrs = shconfig["EXTRN_MDL_FHRS"]
num_fhrs = len(extrn_config_fhrs)

bcgrp10 = 0
bcgrpnum10 = 1
for ii in range(bcgrp10, num_fhrs, bcgrpnum10):
i = ii + bcgrp10
if i < num_fhrs:
print(f"group {bcgrp10} processes member {i}")
if input_type == "grib2":
fn_grib2 = extrn_config_fns[i]
update = {"grib2_file_input_grid": fn_grib2}
else:
fn_atm = extrn_config_fns[i]
update = {"atm_files_input_grid": fn_atm}
if expt_config["task_get_extrn_lbcs"]["EXTRN_MDL_NAME_LBCS"] not in [
"HRRR",
"RAP",
]:
if expt_config["workflow"]["SDF_USES_THOMPSON_MP"] is True:
update["thomp_mp_climo_file"] = expt_config["workflow"][
"THOMPSON_MP_CLIMO_FP"
]

update_cfg = {
"task_make_lbcs": {
"chgres_cube": {
"namelist": {"update_values": {"config": update}}
}
}
}
expt_config_cp.update_from(update_cfg)

# reinstantiate driver
chgres_cube_driver = ChgresCube(
config=expt_config_cp,
cycle=cycle,
key_path=key_path,
)
chgres_cube_driver.run()

# Deliver output data to a common location above the rundir.
links = {}

lbc_spec_fhrs = extrn_config_fhrs[i]
lbc_offset_fhrs = expt_config_cp["task_get_extrn_lbcs"][
"EXTRN_MDL_LBCS_OFFSET_HRS"
]
fcst_hhh = int(lbc_spec_fhrs) - int(lbc_offset_fhrs)
fcst_hhh_FV3LAM = f"{fcst_hhh:03d}"

lbc_input_fn = rundir / f"gfs.bndy.nc"
output_dir = os.path.join(rundir.parent, "INPUT")
os.makedirs(output_dir, exist_ok=True)
lbc_output_fn = str(
f"{nco_net}.t{cyc}z{dot_ensmem}"
f".gfs_bndy.tile7.f{fcst_hhh_FV3LAM}.nc"
)
links[lbc_output_fn] = str(lbc_input_fn)
uwlink(target_dir=output_dir, config=links)

# error message
if not (rundir / "runscript.chgres_cube.done").is_file():
print("Error occurred running chgres_cube. Please see component error logs.")
sys.exit(1)


if __name__ == "__main__":

use_uwtools_logger()

args = parse_args(sys.argv[1:])
run_chgres_cube(
config_file=args.config_file,
cycle=args.cycle,
key_path=args.key_path,
member=args.member,
)
Loading