From 94a1d94f10cd1bfeeaa2c4566c0d0b7c562e473e Mon Sep 17 00:00:00 2001 From: Ana Ordonez Date: Thu, 16 May 2024 15:01:14 -0700 Subject: [PATCH 1/3] add parallel set ups --- .../sea_ice/param/parallel_param_cmip5.py | 29 ++++ .../sea_ice/param/parallel_param_cmip6.py | 39 +++++ .../param/sea_ice_parallel_driver_cmip5.py | 112 +++++++++++++++ .../param/sea_ice_parallel_driver_cmip6.py | 133 ++++++++++++++++++ 4 files changed, 313 insertions(+) create mode 100644 pcmdi_metrics/sea_ice/param/parallel_param_cmip5.py create mode 100644 pcmdi_metrics/sea_ice/param/parallel_param_cmip6.py create mode 100644 pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip5.py create mode 100644 pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip6.py diff --git a/pcmdi_metrics/sea_ice/param/parallel_param_cmip5.py b/pcmdi_metrics/sea_ice/param/parallel_param_cmip5.py new file mode 100644 index 000000000..6879cf2b8 --- /dev/null +++ b/pcmdi_metrics/sea_ice/param/parallel_param_cmip5.py @@ -0,0 +1,29 @@ +# CMIP5 +var = "sic" +# realization=["r1i1p1","r2i1p1","r3i1p1","r4i1p1"] +realization = "*" +metrics_output_path = "/work/ordonez4/sea_ice/cmip5_2005/%(case_id)/" +msyear = 1988 +meyear = 2005 + +ModUnitsAdjust = (True, "multiply", 1e-2) +AreaUnitsAdjust = (True, "multiply", 1e-6) + + +# OSI-SAF data +reference_data_path_nh = ( + "/p/user_pub/PCMDIobs/obs4MIPs_input/EUMETSAT/OSI-SAF-450-a-3-0/v20231201/*nh*" +) +reference_data_path_sh = ( + "/p/user_pub/PCMDIobs/obs4MIPs_input/EUMETSAT/OSI-SAF-450-a-3-0/v20231201/*sh*" +) +ObsUnitsAdjust = (True, "multiply", 1e-2) +reference_data_set = "OSI-SAF" +osyear = 1988 +oeyear = 2005 +obs_var = "ice_conc" +ObsAreaUnitsAdjust = (False, 0, 0) +obs_area_template = None # km2 +obs_area_var = None +obs_cell_area = 625 +plot = True diff --git a/pcmdi_metrics/sea_ice/param/parallel_param_cmip6.py b/pcmdi_metrics/sea_ice/param/parallel_param_cmip6.py new file mode 100644 index 000000000..99349e59c --- /dev/null +++ b/pcmdi_metrics/sea_ice/param/parallel_param_cmip6.py @@ -0,0 +1,39 @@ +# CMIP6 +# ======= +# realization = ["r1i1p1f1","r2i1p1f1","r3i1p1f1","r4i1p1f1","r5i1p1f1"] +realization = "*" +var = "siconc" +msyear = 1988 +meyear = 2014 +metrics_output_path = "/work/ordonez4/sea_ice/cmip6_2014/%(case_id)/" + +# CMIP5 +# var="sic" +# realization=["r1i1p1","r2i1p1","r3i1p1","r4i1p1"] +# realization="*" +# metrics_output_path = "/work/ordonez4/sea_ice/cmip5_all/%(case_id)/" +# msyear = 1988 +# meyear = 2005 + +ModUnitsAdjust = (True, "multiply", 1e-2) +AreaUnitsAdjust = (True, "multiply", 1e-6) + + +# OSI-SAF data +reference_data_path_nh = ( + "/p/user_pub/PCMDIobs/obs4MIPs_input/EUMETSAT/OSI-SAF-450-a-3-0/v20231201/*nh*" +) +reference_data_path_sh = ( + "/p/user_pub/PCMDIobs/obs4MIPs_input/EUMETSAT/OSI-SAF-450-a-3-0/v20231201/*sh*" +) +ObsUnitsAdjust = (True, "multiply", 1e-2) +reference_data_set = "OSI-SAF" +osyear = 1988 +oeyear = 2014 +# oeyear = 2005 +obs_var = "ice_conc" +ObsAreaUnitsAdjust = (False, 0, 0) +obs_area_template = None # km2 +obs_area_var = None +obs_cell_area = 625 +plot = True diff --git a/pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip5.py b/pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip5.py new file mode 100644 index 000000000..98d1b11e3 --- /dev/null +++ b/pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip5.py @@ -0,0 +1,112 @@ +import glob +import os + +import xsearch as xs + +from pcmdi_metrics.mean_climate.lib.pmp_parser import PMPParser +from pcmdi_metrics.misc.scripts import parallel_submitter +from pcmdi_metrics.precip_variability.lib import AddParserArgument + +num_cpus = 20 + +# Read parameters +P = PMPParser() +P = AddParserArgument(P) +param = P.get_parameter() +# mip = "cmip6" +mip = "cmip5" +exp = "historical" +var = param.var +mod = None +frq = "mon" + +if mod is None: + pathDict = xs.findPaths(exp, var, frq, mip_era=mip.upper()) +else: + pathDict = xs.findPaths(exp, var, frq, mip_era=mip.upper(), model=mod) +# Get which area variable needed +print("Reading external variable attribute") +# deduplicate because some models, like CESM2, need to grab the gn rather than gr area file +areacello = xs.findPaths( + "historical", "areacello", "fx", mip_era="CMIP5", deduplicate=False +) +path_list = sorted(list(pathDict.keys())) +print("Number of datasets:", len(path_list)) + +cmd_list = [] +log_list = [] +model_list = xs.getGroupValues(pathDict, "model") +area_var = "areacello" +print(model_list) +for model in model_list: + skip = False + path = xs.getValuesForFacet(pathDict, "model", model)[0] + basename = os.path.basename(glob.glob(os.path.join(path, "*"))[0]) + + # TODO: Fix how the path gets sliced and indexed + # because I don't think it's consistent model-to-model + # for cmip5 + dir_template = ( + "/".join(path.split("/")[:-4]) + + "/%(realization)/" + + "/".join(path.split("/")[-3:-1]) + + "/" + # + "/*/" + ) + file_template = ( + "_".join(basename.split("_")[0:4]) + + "_%(realization)" + # + "_"+basename.split("_")[5] + + "_*-*.nc" + ) + + grid = path.split("/")[-3] + + single = xs.getValuesForFacet(pathDict, "model", model) + empty = [{} for item in single] + d1 = zip(single, empty) + db = dict(d1) + + try: + apath = xs.getValuesForFacet(areacello, "model", model)[0] + print(apath) + area_path = glob.glob(apath + "/*nc") + if len(area_path) < 1: + area_path = glob.glob(apath + "/*/*nc") + if len(area_path) < 1: + skip = True + print("area path not found", model) + print(area_path) + else: + area_path = area_path[0] + else: + area_path = area_path[0] + except KeyError: + print("area path not found", model) + skip = True + + if not skip: + cmd_list.append( + "./sea_ice_driver.py -p parameter_file_cmip5.py --case_id " + + model + + " --test_data_set " + + model + + " --test_data_path " + + dir_template + + " --filename_template " + + file_template + + " --area_template " + + area_path + + " --area_var " + + area_var + ) + log_list.append("log_" + mip + "_" + var + "_" + model) +print(cmd_list) + + +parallel_submitter( + cmd_list, + log_dir="./log_cmip5", + logfilename_list=log_list, + num_workers=num_cpus, +) diff --git a/pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip6.py b/pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip6.py new file mode 100644 index 000000000..d97cd3de9 --- /dev/null +++ b/pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip6.py @@ -0,0 +1,133 @@ +import glob +import os + +import xsearch as xs + +from pcmdi_metrics.mean_climate.lib.pmp_parser import PMPParser +from pcmdi_metrics.misc.scripts import parallel_submitter +from pcmdi_metrics.precip_variability.lib import AddParserArgument + +num_cpus = 20 + +# Read parameters +P = PMPParser() +P = AddParserArgument(P) +param = P.get_parameter() +mip = "cmip6" +exp = "historical" +var = param.var +mod = None +frq = "mon" + +if mod is None: + pathDict = xs.findPaths(exp, var, frq, mip_era=mip.upper()) +else: + pathDict = xs.findPaths(exp, var, frq, mip_era=mip.upper(), model=mod) +# Get which area variable needed +print("Reading external variable attribute") +# deduplicate because some models, like CESM2, need to grab the gn rather than gr area file +areacello = xs.findPaths( + "historical", "areacello", "fx", cmipTable="Ofx", deduplicate=False +) +areacella = xs.findPaths("historical", "areacella", "fx", deduplicate=False) +path_list = sorted(list(pathDict.keys())) +print("Number of datasets:", len(path_list)) + +cmd_list = [] +log_list = [] +model_list = xs.getGroupValues(pathDict, "model") +# Drop models with known issues that we're going to do by hand +model_list = [x for x in model_list if "FGOALS" not in x] +model_list = [x for x in model_list if x != "EC-Earth3"] +print(model_list) +for model in model_list: + skip = False + path = xs.getValuesForFacet(pathDict, "model", model)[0] + basename = os.path.basename(glob.glob(os.path.join(path, "*"))[0]) + + dir_template = ( + "/".join(path.split("/")[0:9]) + + "/%(realization)/" + + "/".join(path.split("/")[10:13]) + # + "/"+ path.split("/")[-2] + "/" + + "/*/" + ) + file_template = ( + "_".join(basename.split("_")[0:4]) + + "_%(realization)_" + + basename.split("_")[5] + + "_*-*.nc" + ) + + grid = path.split("/")[-3] + + single = xs.getValuesForFacet(pathDict, "model", model) + empty = [{} for item in single] + d1 = zip(single, empty) + db = dict(d1) + db = xs.addAttribute(db, "external_variables") + try: + area_var = db[single[0]]["external_variables"] + except KeyError: + print("No external variables") + print("Guessing areacello") + area_var = "areacello" + + if area_var == "areacello": # Same for all realizations + apath = xs.getValuesForFacet(areacello, "model", model) + if len(apath) > 0: + apath = [tmp for tmp in apath if tmp.split("/")[-3] == grid] + if len(apath) > 0: + abase = os.path.basename(glob.glob(os.path.join(apath[0], "*"))[-1]) + area_path = os.path.join(apath[0], abase) + else: + print("wrong grid", model) + skip = True + else: + print("No values for facet", model) + skip = True + elif area_var == "areacella": # Different for each realization + apath = xs.getValuesForFacet(areacella, "model", model) + apath = [tmp for tmp in apath if tmp.split("/")[-3] == grid] + abase = os.path.basename(glob.glob(os.path.join(apath[0], "*"))[-1]) + abase = ( + "_".join(abase.split("_")[0:4]) + + "_%(realization)_" + + "_".join(abase.split("_")[5:]) + ) + # Make filename template + area_dir = ( + "/".join(apath[0].split("/")[0:9]) + + "/%(realization)/" + + "/".join(apath[0].split("/")[10:]) + ) + area_path = os.path.join(area_dir, abase) + else: + print("Area variable not found for", model) + skip = True + + if not skip: + cmd_list.append( + "./sea_ice_driver.py -p parameter_file.py --case_id " + + model + + " --test_data_set " + + model + + " --test_data_path " + + dir_template + + " --filename_template " + + file_template + + " --area_template " + + area_path + + " --area_var " + + area_var + ) + log_list.append("log_" + mip + "_" + var + "_" + model) +print(cmd_list) + + +parallel_submitter( + cmd_list, + log_dir="./log_cmip6", + logfilename_list=log_list, + num_workers=num_cpus, +) From d39941cd10da4378b42853062bd936cfcd216d95 Mon Sep 17 00:00:00 2001 From: Ana Ordonez Date: Thu, 16 May 2024 15:03:35 -0700 Subject: [PATCH 2/3] move files --- .../sea_ice/{param => scripts}/sea_ice_parallel_driver_cmip5.py | 0 .../sea_ice/{param => scripts}/sea_ice_parallel_driver_cmip6.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename pcmdi_metrics/sea_ice/{param => scripts}/sea_ice_parallel_driver_cmip5.py (100%) rename pcmdi_metrics/sea_ice/{param => scripts}/sea_ice_parallel_driver_cmip6.py (100%) diff --git a/pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip5.py b/pcmdi_metrics/sea_ice/scripts/sea_ice_parallel_driver_cmip5.py similarity index 100% rename from pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip5.py rename to pcmdi_metrics/sea_ice/scripts/sea_ice_parallel_driver_cmip5.py diff --git a/pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip6.py b/pcmdi_metrics/sea_ice/scripts/sea_ice_parallel_driver_cmip6.py similarity index 100% rename from pcmdi_metrics/sea_ice/param/sea_ice_parallel_driver_cmip6.py rename to pcmdi_metrics/sea_ice/scripts/sea_ice_parallel_driver_cmip6.py From 7e21afaad4eceb13d0a4e5eeb8c9346fa8dcce8e Mon Sep 17 00:00:00 2001 From: Ana Ordonez Date: Thu, 16 May 2024 15:04:25 -0700 Subject: [PATCH 3/3] remove file --- pcmdi_metrics/sea_ice/sea_ice_parallel.py | 122 ---------------------- 1 file changed, 122 deletions(-) delete mode 100644 pcmdi_metrics/sea_ice/sea_ice_parallel.py diff --git a/pcmdi_metrics/sea_ice/sea_ice_parallel.py b/pcmdi_metrics/sea_ice/sea_ice_parallel.py deleted file mode 100644 index 4565b711d..000000000 --- a/pcmdi_metrics/sea_ice/sea_ice_parallel.py +++ /dev/null @@ -1,122 +0,0 @@ -import glob -import os - -import xsearch as xs - -from pcmdi_metrics.mean_climate.lib.pmp_parser import PMPParser -from pcmdi_metrics.misc.scripts import parallel_submitter -from pcmdi_metrics.precip_variability.lib import AddParserArgument - -num_cpus = 20 - -# Read parameters -P = PMPParser() -P = AddParserArgument(P) -param = P.get_parameter() -mip = "cmip6" -exp = "historical" -var = param.var -mod = None -frq = "mon" - -if mod is None: - pathDict = xs.findPaths(exp, var, frq, mip_era=mip.upper()) -else: - pathDict = xs.findPaths(exp, var, frq, mip_era=mip.upper(), model=mod) -# Get which area variable needed -print("Reading external variable attribute") -# pathDB = xs.addAttribute(pathDict, 'external_variables') -areacello = xs.findPaths("historical", "areacello", "fx", cmipTable="Ofx") -areacella = xs.findPaths("historical", "areacella", "fx") -path_list = sorted(list(pathDict.keys())) -print("Number of datasets:", len(path_list)) - -cmd_list = [] -log_list = [] -model_list = xs.getGroupValues(pathDict, "model") -print(model_list) -for model in model_list: - skip = False - path = xs.getValuesForFacet(pathDict, "model", model)[0] - basename = os.path.basename(glob.glob(os.path.join(path, "*"))[0]) - - dir_template = ( - "/".join(path.split("/")[0:9]) - + "/%(realization)/" - + "/".join(path.split("/")[10:13]) - + "/*/" - ) - file_template = ( - "_".join(basename.split("_")[0:4]) - + "_%(realization)_" - + basename.split("_")[5] - + "_*-*.nc" - ) - - single = xs.getValuesForFacet(pathDict, "model", model) - empty = [{} for item in single] - d1 = zip(single, empty) - db = dict(d1) - db = xs.addAttribute(db, "external_variables") - - # area_var = pathDB[path]["external_variables"] - # try: - area_var = db[single[0]]["external_variables"] - # except: - # print("No external variables") - # print("Guessing areacello") - # area_var = "areacello" - if area_var == "areacello": # Same for all realizations - # try: - apath = xs.getValuesForFacet(areacello, "model", model) - abase = os.path.basename(glob.glob(os.path.join(apath[0], "*"))[0]) - area_path = os.path.join(apath[0], abase) - # except: - # print("No areacello for model ", model) - # print(apath) - # skip = True - ## Make filename template - # area_path = "/".join(apath[0].split("/")[0:9]) + "/%(realization)/" + "/".join(apath[0].split("/")[10:]) - elif area_var == "areacella": # Different for each realization - apath = xs.getValuesForFacet(areacella, "model", model) - abase = os.path.basename(glob.glob(os.path.join(apath[0], "*"))[0]) - abase = ( - "_".join(abase.split("_")[0:4]) - + "_%(realization)_" - + "_".join(abase.split("_")[5:]) - ) - # Make filename template - area_dir = ( - "/".join(apath[0].split("/")[0:9]) - + "/%(realization)/" - + "/".join(apath[0].split("/")[10:]) - ) - area_path = os.path.join(area_dir, abase) - else: - "Area variable not found." - skip = True - - if not skip: - cmd_list.append( - "python -u ice_driver.py -p parameter_file.py --case_id " - + model - + " --test_data_set '" - + model - + "' --test_data_path '" - + dir_template - + "' --filename_template '" - + file_template - + "' --area_template '" - + area_path - + "' --area_var " - + area_var - ) - log_list.append("log_" + mip + "_" + var + "_" + model) - - -parallel_submitter( - cmd_list, - log_dir="./log", - logfilename_list=log_list, - num_workers=num_cpus, -)