Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parallel execution scripts for sea ice #1099

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions pcmdi_metrics/sea_ice/param/parallel_param_cmip5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# CMIP5
var = "sic"
# realization=["r1i1p1","r2i1p1","r3i1p1","r4i1p1"]
realization = "*"
metrics_output_path = "/work/ordonez4/sea_ice/cmip5_2005/%(case_id)/"
msyear = 1988
meyear = 2005

ModUnitsAdjust = (True, "multiply", 1e-2)
AreaUnitsAdjust = (True, "multiply", 1e-6)


# OSI-SAF data
reference_data_path_nh = (
"/p/user_pub/PCMDIobs/obs4MIPs_input/EUMETSAT/OSI-SAF-450-a-3-0/v20231201/*nh*"
)
reference_data_path_sh = (
"/p/user_pub/PCMDIobs/obs4MIPs_input/EUMETSAT/OSI-SAF-450-a-3-0/v20231201/*sh*"
)
ObsUnitsAdjust = (True, "multiply", 1e-2)
reference_data_set = "OSI-SAF"
osyear = 1988
oeyear = 2005
obs_var = "ice_conc"
ObsAreaUnitsAdjust = (False, 0, 0)
obs_area_template = None # km2
obs_area_var = None
obs_cell_area = 625
plot = True
39 changes: 39 additions & 0 deletions pcmdi_metrics/sea_ice/param/parallel_param_cmip6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# CMIP6
# =======
# realization = ["r1i1p1f1","r2i1p1f1","r3i1p1f1","r4i1p1f1","r5i1p1f1"]
realization = "*"
var = "siconc"
msyear = 1988
meyear = 2014
metrics_output_path = "/work/ordonez4/sea_ice/cmip6_2014/%(case_id)/"

# CMIP5
# var="sic"
# realization=["r1i1p1","r2i1p1","r3i1p1","r4i1p1"]
# realization="*"
# metrics_output_path = "/work/ordonez4/sea_ice/cmip5_all/%(case_id)/"
# msyear = 1988
# meyear = 2005

ModUnitsAdjust = (True, "multiply", 1e-2)
AreaUnitsAdjust = (True, "multiply", 1e-6)


# OSI-SAF data
reference_data_path_nh = (
"/p/user_pub/PCMDIobs/obs4MIPs_input/EUMETSAT/OSI-SAF-450-a-3-0/v20231201/*nh*"
)
reference_data_path_sh = (
"/p/user_pub/PCMDIobs/obs4MIPs_input/EUMETSAT/OSI-SAF-450-a-3-0/v20231201/*sh*"
)
ObsUnitsAdjust = (True, "multiply", 1e-2)
reference_data_set = "OSI-SAF"
osyear = 1988
oeyear = 2014
# oeyear = 2005
obs_var = "ice_conc"
ObsAreaUnitsAdjust = (False, 0, 0)
obs_area_template = None # km2
obs_area_var = None
obs_cell_area = 625
plot = True
112 changes: 112 additions & 0 deletions pcmdi_metrics/sea_ice/scripts/sea_ice_parallel_driver_cmip5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import glob
import os

import xsearch as xs

from pcmdi_metrics.mean_climate.lib.pmp_parser import PMPParser
from pcmdi_metrics.misc.scripts import parallel_submitter
from pcmdi_metrics.precip_variability.lib import AddParserArgument

num_cpus = 20

# Read parameters
P = PMPParser()
P = AddParserArgument(P)
param = P.get_parameter()
# mip = "cmip6"
mip = "cmip5"
exp = "historical"
var = param.var
mod = None
frq = "mon"

if mod is None:
pathDict = xs.findPaths(exp, var, frq, mip_era=mip.upper())
else:
pathDict = xs.findPaths(exp, var, frq, mip_era=mip.upper(), model=mod)
# Get which area variable needed
print("Reading external variable attribute")
# deduplicate because some models, like CESM2, need to grab the gn rather than gr area file
areacello = xs.findPaths(
"historical", "areacello", "fx", mip_era="CMIP5", deduplicate=False
)
path_list = sorted(list(pathDict.keys()))
print("Number of datasets:", len(path_list))

cmd_list = []
log_list = []
model_list = xs.getGroupValues(pathDict, "model")
area_var = "areacello"
print(model_list)
for model in model_list:
skip = False
path = xs.getValuesForFacet(pathDict, "model", model)[0]
basename = os.path.basename(glob.glob(os.path.join(path, "*"))[0])

# TODO: Fix how the path gets sliced and indexed
# because I don't think it's consistent model-to-model
# for cmip5
dir_template = (
"/".join(path.split("/")[:-4])
+ "/%(realization)/"
+ "/".join(path.split("/")[-3:-1])
+ "/"
# + "/*/"
)
file_template = (
"_".join(basename.split("_")[0:4])
+ "_%(realization)"
# + "_"+basename.split("_")[5]
+ "_*-*.nc"
)

grid = path.split("/")[-3]

single = xs.getValuesForFacet(pathDict, "model", model)
empty = [{} for item in single]
d1 = zip(single, empty)
db = dict(d1)

try:
apath = xs.getValuesForFacet(areacello, "model", model)[0]
print(apath)
area_path = glob.glob(apath + "/*nc")
if len(area_path) < 1:
area_path = glob.glob(apath + "/*/*nc")
if len(area_path) < 1:
skip = True
print("area path not found", model)
print(area_path)
else:
area_path = area_path[0]
else:
area_path = area_path[0]
except KeyError:
print("area path not found", model)
skip = True

if not skip:
cmd_list.append(
"./sea_ice_driver.py -p parameter_file_cmip5.py --case_id "
+ model
+ " --test_data_set "
+ model
+ " --test_data_path "
+ dir_template
+ " --filename_template "
+ file_template
+ " --area_template "
+ area_path
+ " --area_var "
+ area_var
)
log_list.append("log_" + mip + "_" + var + "_" + model)
print(cmd_list)


parallel_submitter(
cmd_list,
log_dir="./log_cmip5",
logfilename_list=log_list,
num_workers=num_cpus,
)
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,20 @@
pathDict = xs.findPaths(exp, var, frq, mip_era=mip.upper(), model=mod)
# Get which area variable needed
print("Reading external variable attribute")
# pathDB = xs.addAttribute(pathDict, 'external_variables')
areacello = xs.findPaths("historical", "areacello", "fx", cmipTable="Ofx")
areacella = xs.findPaths("historical", "areacella", "fx")
# deduplicate because some models, like CESM2, need to grab the gn rather than gr area file
areacello = xs.findPaths(
"historical", "areacello", "fx", cmipTable="Ofx", deduplicate=False
)
areacella = xs.findPaths("historical", "areacella", "fx", deduplicate=False)
path_list = sorted(list(pathDict.keys()))
print("Number of datasets:", len(path_list))

cmd_list = []
log_list = []
model_list = xs.getGroupValues(pathDict, "model")
# Drop models with known issues that we're going to do by hand
model_list = [x for x in model_list if "FGOALS" not in x]
model_list = [x for x in model_list if x != "EC-Earth3"]
print(model_list)
for model in model_list:
skip = False
Expand All @@ -44,6 +49,7 @@
"/".join(path.split("/")[0:9])
+ "/%(realization)/"
+ "/".join(path.split("/")[10:13])
# + "/"+ path.split("/")[-2] + "/"
+ "/*/"
)
file_template = (
Expand All @@ -53,33 +59,37 @@
+ "_*-*.nc"
)

grid = path.split("/")[-3]

single = xs.getValuesForFacet(pathDict, "model", model)
empty = [{} for item in single]
d1 = zip(single, empty)
db = dict(d1)
db = xs.addAttribute(db, "external_variables")
try:
area_var = db[single[0]]["external_variables"]
except KeyError:
print("No external variables")
print("Guessing areacello")
area_var = "areacello"

# area_var = pathDB[path]["external_variables"]
# try:
area_var = db[single[0]]["external_variables"]
# except:
# print("No external variables")
# print("Guessing areacello")
# area_var = "areacello"
if area_var == "areacello": # Same for all realizations
# try:
apath = xs.getValuesForFacet(areacello, "model", model)
abase = os.path.basename(glob.glob(os.path.join(apath[0], "*"))[0])
area_path = os.path.join(apath[0], abase)
# except:
# print("No areacello for model ", model)
# print(apath)
# skip = True
## Make filename template
# area_path = "/".join(apath[0].split("/")[0:9]) + "/%(realization)/" + "/".join(apath[0].split("/")[10:])
if len(apath) > 0:
apath = [tmp for tmp in apath if tmp.split("/")[-3] == grid]
if len(apath) > 0:
abase = os.path.basename(glob.glob(os.path.join(apath[0], "*"))[-1])
area_path = os.path.join(apath[0], abase)
else:
print("wrong grid", model)
skip = True
else:
print("No values for facet", model)
skip = True
elif area_var == "areacella": # Different for each realization
apath = xs.getValuesForFacet(areacella, "model", model)
abase = os.path.basename(glob.glob(os.path.join(apath[0], "*"))[0])
apath = [tmp for tmp in apath if tmp.split("/")[-3] == grid]
abase = os.path.basename(glob.glob(os.path.join(apath[0], "*"))[-1])
abase = (
"_".join(abase.split("_")[0:4])
+ "_%(realization)_"
Expand All @@ -93,30 +103,31 @@
)
area_path = os.path.join(area_dir, abase)
else:
"Area variable not found."
print("Area variable not found for", model)
skip = True

if not skip:
cmd_list.append(
"python -u ice_driver.py -p parameter_file.py --case_id "
"./sea_ice_driver.py -p parameter_file.py --case_id "
+ model
+ " --test_data_set '"
+ " --test_data_set "
+ model
+ "' --test_data_path '"
+ " --test_data_path "
+ dir_template
+ "' --filename_template '"
+ " --filename_template "
+ file_template
+ "' --area_template '"
+ " --area_template "
+ area_path
+ "' --area_var "
+ " --area_var "
+ area_var
)
log_list.append("log_" + mip + "_" + var + "_" + model)
print(cmd_list)


parallel_submitter(
cmd_list,
log_dir="./log",
log_dir="./log_cmip6",
logfilename_list=log_list,
num_workers=num_cpus,
)
Loading