From 98e8061db56ed3de8433860e504313a6c6336689 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Fri, 18 Jun 2021 19:10:30 +0000 Subject: [PATCH 01/19] Add OrderedList class to report --- external/report/report/__init__.py | 9 ++++++++- external/report/report/create_report.py | 11 ++++++++++- external/report/tests/test_report.py | 7 ++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/external/report/report/__init__.py b/external/report/report/__init__.py index 0d51642ac7..2a0760086e 100644 --- a/external/report/report/__init__.py +++ b/external/report/report/__init__.py @@ -1,3 +1,10 @@ -from .create_report import create_html, insert_report_figure, Metrics, Metadata, Link +from .create_report import ( + create_html, + insert_report_figure, + Metrics, + Metadata, + Link, + OrderedList, +) __version__ = "0.1.0" diff --git a/external/report/report/create_report.py b/external/report/report/create_report.py index 28f3ca378d..0f6ccbd7f5 100644 --- a/external/report/report/create_report.py +++ b/external/report/report/create_report.py @@ -1,6 +1,6 @@ import datetime import os -from typing import Mapping, Sequence, Union +from typing import Any, Mapping, Sequence, Union from jinja2 import Template from pytz import timezone @@ -90,6 +90,15 @@ def __repr__(self) -> str: return f'{self.tag}' +class OrderedList: + def __init__(self, items: Sequence[Any]): + self.items = items + + def __repr__(self) -> str: + items_li = [f"
  • {item}
  • " for item in self.items] + return "
      \n" + "\n".join(items_li) + "\n
    " + + def resolve_plot(obj): if isinstance(obj, str): return ImagePlot(obj) diff --git a/external/report/tests/test_report.py b/external/report/tests/test_report.py index 22bd22cb6b..c13ad31f55 100644 --- a/external/report/tests/test_report.py +++ b/external/report/tests/test_report.py @@ -1,5 +1,5 @@ import os -from report import __version__, create_html +from report import __version__, create_html, OrderedList from report.create_report import _save_figure, insert_report_figure @@ -46,3 +46,8 @@ def test__save_figure(tmpdir): with open(os.path.join(output_dir, filepath_relative_to_report), "r") as f: saved_data = f.read() assert saved_data.replace("\n", "") == fig.content + + +def test_OrderedList_repr(): + result = str(OrderedList(["item1", "item2"])) + assert result == "
      \n
    1. item1
    2. \n
    3. item2
    4. \n
    " From 41976a2a5482a9d805c9ecbf3ed7d6fad593e043 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Fri, 18 Jun 2021 19:10:54 +0000 Subject: [PATCH 02/19] Add diagnostic of precipitation histogram --- .../diagnostics/prognostic_run/compute.py | 18 ++++++++++++++++++ .../diagnostics/prognostic_run/constants.py | 3 +++ 2 files changed, 21 insertions(+) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py index 1ed61a4338..783618718c 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py @@ -37,6 +37,7 @@ from fv3net.diagnostics.prognostic_run import diurnal_cycle from fv3net.diagnostics.prognostic_run import transform from fv3net.diagnostics.prognostic_run.constants import ( + HISTOGRAM_BINS, HORIZONTAL_DIMS, DiagArg, GLOBAL_AVERAGE_DYCORE_VARS, @@ -503,6 +504,23 @@ def _diurnal_func( return _assign_diagnostic_time_attrs(diag, prognostic) +@add_to_diags("physics") +@diag_finalizer("histogram") +@transform.apply("resample_time", "3H", inner_join=True) +@transform.apply("subset_variables", HISTOGRAM_BINS.keys()) +def compute_precip_histogram(prognostic, verification, grid): + logger.info("Computing precipitation histogram") + count_ds = xr.Dataset() + for varname in prognostic: + count, bins = np.histogram( + prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True + ) + count_da = xr.DataArray(count, coords={f"{varname}_bin": bins[:-1]}) + count_da[f"{varname}_bin"].attrs["units"] = prognostic[varname].units + count_ds = xr.merge([count_ds, count_da.rename(varname)]) + return _assign_diagnostic_time_attrs(count_ds, prognostic) + + def register_parser(subparsers): parser = subparsers.add_parser("save", help="Compute the prognostic run diags.") parser.add_argument("url", help="Prognostic run output location.") diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py index 45a6471432..0052ed1391 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py @@ -1,3 +1,4 @@ +import numpy as np import xarray as xr from typing import Tuple @@ -135,3 +136,5 @@ "dQu", "dQv", ] + +HISTOGRAM_BINS = {"total_precip_to_surface": np.logspace(-1, np.log10(500), 101)} From df2db9a26572914b9f5db5a341a6a6f276ee582a Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Fri, 18 Jun 2021 22:48:53 +0000 Subject: [PATCH 03/19] Stop requiring two runs if making report from folder --- .../diagnostics/prognostic_run/computed_diagnostics.py | 5 ----- workflows/prognostic_run_diags/tests/test_integration.sh | 3 --- 2 files changed, 8 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py index a3e0c3cefd..cae780bc7b 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py @@ -274,11 +274,6 @@ def detect_folders( bucket: str, fs: fsspec.AbstractFileSystem, ) -> Mapping[str, DiagnosticFolder]: diag_ncs = fs.glob(os.path.join(bucket, "*", "diags.nc")) - if len(diag_ncs) < 2: - raise ValueError( - "Plots require more than 1 diagnostic directory in" - f" {bucket} for holoviews plots to display correctly." - ) return { Path(url).parent.name: DiagnosticFolder(fs, Path(url).parent.as_posix()) for url in diag_ncs diff --git a/workflows/prognostic_run_diags/tests/test_integration.sh b/workflows/prognostic_run_diags/tests/test_integration.sh index 84edfc4035..41d137de5e 100644 --- a/workflows/prognostic_run_diags/tests/test_integration.sh +++ b/workflows/prognostic_run_diags/tests/test_integration.sh @@ -21,9 +21,6 @@ gsutil cp /tmp/$random/metrics.json $OUTPUT/run1/metrics.json # generate movies for short sample prognostic run prognostic_run_diags movie --n_jobs 1 --n_timesteps 2 $RUN $OUTPUT/run1 -# make a second copy of diags/metrics since generate_report.py needs at least two runs -gsutil -m cp -r $OUTPUT/run1 $OUTPUT/run2 - # generate report based on diagnostics computed above prognostic_run_diags report $OUTPUT $OUTPUT From 56bc134a4bd00a763cd9a1146d370296a8c8b9f4 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Fri, 18 Jun 2021 22:49:19 +0000 Subject: [PATCH 04/19] Better OrderedList API --- external/report/report/create_report.py | 2 +- external/report/tests/test_report.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/report/report/create_report.py b/external/report/report/create_report.py index 0f6ccbd7f5..a7b332934a 100644 --- a/external/report/report/create_report.py +++ b/external/report/report/create_report.py @@ -91,7 +91,7 @@ def __repr__(self) -> str: class OrderedList: - def __init__(self, items: Sequence[Any]): + def __init__(self, *items: Any): self.items = items def __repr__(self) -> str: diff --git a/external/report/tests/test_report.py b/external/report/tests/test_report.py index c13ad31f55..a5131478b8 100644 --- a/external/report/tests/test_report.py +++ b/external/report/tests/test_report.py @@ -49,5 +49,5 @@ def test__save_figure(tmpdir): def test_OrderedList_repr(): - result = str(OrderedList(["item1", "item2"])) + result = str(OrderedList("item1", "item2")) assert result == "
      \n
    1. item1
    2. \n
    3. item2
    4. \n
    " From ebccbef64716fcec466b2c43b66490d7cfb7c06e Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Fri, 18 Jun 2021 22:50:48 +0000 Subject: [PATCH 05/19] Set histogram diagnostic coordinate to bin midpoints --- .../diagnostics/prognostic_run/compute.py | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py index 783618718c..501e9774bc 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py @@ -12,15 +12,12 @@ grouping contains outputs from the physics routines (`sfc_dt_atmos.tile*.nc` and `diags.zarr`). """ -import os import sys import datetime -import tempfile import intake import numpy as np import xarray as xr -import shutil from dask.diagnostics import ProgressBar import fsspec @@ -230,16 +227,6 @@ def _assign_diagnostic_time_attrs( return diagnostics_ds -def dump_nc(ds: xr.Dataset, f): - # to_netcdf closes file, which will delete the buffer - # need to use a buffer since seek doesn't work with GCSFS file objects - with tempfile.TemporaryDirectory() as dirname: - url = os.path.join(dirname, "tmp.nc") - ds.to_netcdf(url, engine="h5netcdf") - with open(url, "rb") as tmp1: - shutil.copyfileobj(tmp1, f) - - @add_to_diags("dycore") @diag_finalizer("rms_global") @transform.apply("resample_time", "3H", inner_join=True) @@ -508,17 +495,28 @@ def _diurnal_func( @diag_finalizer("histogram") @transform.apply("resample_time", "3H", inner_join=True) @transform.apply("subset_variables", HISTOGRAM_BINS.keys()) -def compute_precip_histogram(prognostic, verification, grid): - logger.info("Computing precipitation histogram") - count_ds = xr.Dataset() - for varname in prognostic: +def compute_histogram(prognostic, verification, grid): + logger.info("Computing histograms for physics diagnostics") + counts = xr.Dataset() + for varname in prognostic.data_vars: + # bins = HISTOGRAM_BINS[varname] + # counts[varname] = prognostic.groupby_bins(varname, bins).count()[varname] + # bin_midpoints = [x.item().mid for x in counts[f"{varname}_bins"]] + # counts = counts.assign_coords({f"{varname}_bins": bin_midpoints}) + # counts[varname] /= counts[varname].sum() + # counts[varname] /= bins[1:] - bins[:1] + # counts[varname].attrs["units"] = f"({prognostic[varname].units})^-1" + # counts[varname].attrs["long_name"] = "Frequency" + # counts[f"{varname}_bins"].attrs = prognostic[varname].attrs count, bins = np.histogram( prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True ) - count_da = xr.DataArray(count, coords={f"{varname}_bin": bins[:-1]}) - count_da[f"{varname}_bin"].attrs["units"] = prognostic[varname].units - count_ds = xr.merge([count_ds, count_da.rename(varname)]) - return _assign_diagnostic_time_attrs(count_ds, prognostic) + bin_midpoints = 0.5 * (bins[:-1] + bins[1:]) + coords = {f"{varname}_bins": bin_midpoints} + count_da = xr.DataArray(count, coords=coords, dims=list(coords.keys())) + count_da[f"{varname}_bins"].attrs["units"] = prognostic[varname].units + counts[varname] = count_da + return _assign_diagnostic_time_attrs(counts, prognostic) def register_parser(subparsers): From 29a8f8054856ccfcab5c99a04132baeaca927d75 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Fri, 18 Jun 2021 22:52:13 +0000 Subject: [PATCH 06/19] Add process.html page with diurnal cycle and precip PDF --- .../prognostic_run/views/matplotlib.py | 25 +++++++++- .../prognostic_run/views/static_report.py | 50 +++++++++++++------ 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py index 1ac3cc3fe4..fd57255ff1 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py @@ -30,9 +30,9 @@ } -def fig_to_b64(fig, format="png"): +def fig_to_b64(fig, format="png", dpi=None): pic_IObytes = io.BytesIO() - fig.savefig(pic_IObytes, format=format, bbox_inches="tight") + fig.savefig(pic_IObytes, format=format, bbox_inches="tight", dpi=dpi) pic_IObytes.seek(0) pic_hash = base64.b64encode(pic_IObytes.read()) return f"data:image/png;base64, " + pic_hash.decode() @@ -179,6 +179,27 @@ def plot_cubed_sphere_map( ) +def plot_histogram(run_diags: RunDiagnostics, varname: str) -> raw_html: + """Plot 1D histogram of varname overlaid across runs.""" + + logging.info(f"plotting {varname}") + fig, ax = plt.subplots() + bin_name = varname.replace("histogram", "bins") + for run in run_diags.runs: + v = run_diags.get_variable(run, varname) + ax.step(v[bin_name], v, label=run, where="post", linewidth=1) + ax.set_xlabel(f"{v.long_name} [{v.units}]") + ax.set_ylabel(f"Frequency [({v.units})^-1]") + ax.set_xscale("log") + ax.set_yscale("log") + ax.set_xlim([v[bin_name].values[0], v[bin_name].values[-1]]) + ax.legend() + fig.tight_layout() + data = fig_to_b64(fig, dpi=150) + plt.close(fig) + return raw_html(f'') + + def _render_map_title( metrics: RunMetrics, variable: str, run: str, metrics_for_title: Mapping[str, str], ) -> str: diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py index 9696502557..8b80c97a83 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py @@ -13,9 +13,14 @@ RunMetrics, ) -from report import create_html, Link +from report import create_html, Link, OrderedList from report.holoviews import HVPlot, get_html_header -from .matplotlib import plot_2d_matplotlib, plot_cubed_sphere_map, raw_html +from .matplotlib import ( + plot_2d_matplotlib, + plot_cubed_sphere_map, + raw_html, + plot_histogram, +) import logging @@ -75,9 +80,7 @@ def make_plots(self, data) -> Iterable: yield func(data) -def plot_1d( - run_diags: RunDiagnostics, varfilter: str, run_attr_name: str = "run", -) -> HVPlot: +def plot_1d(run_diags: RunDiagnostics, varfilter: str) -> HVPlot: """Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs. All matching diagnostics must be 1D.""" p = hv.Cycle("Colorblind") @@ -95,10 +98,7 @@ def plot_1d( def plot_1d_min_max_with_region_bar( - run_diags: RunDiagnostics, - varfilter_min: str, - varfilter_max: str, - run_attr_name: str = "run", + run_diags: RunDiagnostics, varfilter_min: str, varfilter_max: str, ) -> HVPlot: """Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs. All matching diagnostics must be 1D.""" @@ -123,9 +123,7 @@ def plot_1d_min_max_with_region_bar( return HVPlot(_set_opts_and_overlay(hmap)) -def plot_1d_with_region_bar( - run_diags: RunDiagnostics, varfilter: str, run_attr_name: str = "run" -) -> HVPlot: +def plot_1d_with_region_bar(run_diags: RunDiagnostics, varfilter: str) -> HVPlot: """Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs. Region will be selectable through a drop-down bar. Region is assumed to be part of variable name after last underscore. All matching diagnostics must be 1D.""" @@ -189,6 +187,7 @@ def diurnal_component_plot( hovmoller_plot_manager = PlotManager() zonal_pressure_plot_manager = PlotManager() diurnal_plot_manager = PlotManager() +histogram_plot_manager = PlotManager() # this will be passed the data from the metrics.json files metrics_plot_manager = PlotManager() @@ -291,6 +290,11 @@ def diurnal_cycle_component_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot: return diurnal_component_plot(diagnostics) +@histogram_plot_manager.register +def histogram_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot: + return plot_histogram(diagnostics, "total_precip_to_surface_histogram") + + # Routines for plotting the "metrics" # New plotting routines can be registered here. @metrics_plot_manager.register @@ -325,12 +329,14 @@ def generic_metric_plot(metrics: RunMetrics, metric_type: str) -> hv.HoloMap: return HVPlot(hmap.opts(**bar_opts)) -navigation = [ +navigation = OrderedList( Link("Home", "index.html"), + Link("Process diagnostics", "process.html"), Link("Latitude versus time hovmoller", "hovmoller.html"), Link("Time-mean maps", "maps.html"), Link("Time-mean zonal-pressure profiles", "zonal_pressure.html"), -] +) +navigation = [navigation] # must be iterable for Jinja HTML template def render_index(metadata, diagnostics, metrics, movie_links): @@ -338,7 +344,6 @@ def render_index(metadata, diagnostics, metrics, movie_links): "Links": navigation, "Timeseries": list(timeseries_plot_manager.make_plots(diagnostics)), "Zonal mean": list(zonal_mean_plot_manager.make_plots(diagnostics)), - "Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)), } if not metrics.empty: @@ -398,6 +403,20 @@ def render_zonal_pressures(metadata, diagnostics): ) +def render_process_diagnostics(metadata, diagnostics): + sections = { + "Links": navigation, + "Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)), + "Precipitation histogram": list(histogram_plot_manager.make_plots(diagnostics)), + } + return create_html( + title="Process diagnostics", + metadata=metadata, + sections=sections, + html_header=get_html_header(), + ) + + def _html_link(url, tag): return f"{tag}" @@ -427,6 +446,7 @@ def make_report(computed_diagnostics: ComputedDiagnosticsList, output): "hovmoller.html": render_hovmollers(metadata, diagnostics), "maps.html": render_maps(metadata, diagnostics, metrics), "zonal_pressure.html": render_zonal_pressures(metadata, diagnostics), + "process_diagnostics.html": render_process_diagnostics(metadata, diagnostics), } for filename, html in pages.items(): From 53ce8fd802c8662784c904e3d0b0245247e29dc7 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Fri, 18 Jun 2021 23:11:44 +0000 Subject: [PATCH 07/19] Remove obsolete failing units tests --- .../tests/test_computed_diagnostics.py | 10 ------- .../tests/test_savediags.py | 29 ------------------- 2 files changed, 39 deletions(-) diff --git a/workflows/prognostic_run_diags/tests/test_computed_diagnostics.py b/workflows/prognostic_run_diags/tests/test_computed_diagnostics.py index 87f452f1eb..6db0c37e81 100644 --- a/workflows/prognostic_run_diags/tests/test_computed_diagnostics.py +++ b/workflows/prognostic_run_diags/tests/test_computed_diagnostics.py @@ -49,16 +49,6 @@ def test_ComputedDiagnosticsList_from_urls(): assert isinstance(result.folders["1"], DiagnosticFolder) -def test_detect_folders_fail_less_than_2(tmpdir): - - fs = fsspec.filesystem("file") - - tmpdir.mkdir("rundir1").join("diags.nc").write("foobar") - - with pytest.raises(ValueError): - detect_folders(tmpdir, fs) - - def test_get_movie_links(tmpdir): domain = "http://www.domain.com" rdirs = ["rundir1", "rundir2"] diff --git a/workflows/prognostic_run_diags/tests/test_savediags.py b/workflows/prognostic_run_diags/tests/test_savediags.py index f6fa505dd1..5c70b990f1 100644 --- a/workflows/prognostic_run_diags/tests/test_savediags.py +++ b/workflows/prognostic_run_diags/tests/test_savediags.py @@ -2,8 +2,6 @@ import cftime import numpy as np import xarray as xr -import fsspec -from unittest.mock import Mock import pytest @@ -29,33 +27,6 @@ def grid(): return xr.open_dataset("grid.nc").load() -def test_dump_nc(tmpdir): - ds = xr.Dataset({"a": (["x"], [1.0])}) - - path = str(tmpdir.join("data.nc")) - with fsspec.open(path, "wb") as f: - savediags.dump_nc(ds, f) - - ds_compare = xr.open_dataset(path) - xr.testing.assert_equal(ds, ds_compare) - - -def test_dump_nc_no_seek(): - """ - GCSFS file objects raise an error when seek is called in write mode:: - - if not self.mode == "rb": - raise ValueError("Seek only available in read mode") - ValueError: Seek only available in read mode - - """ - ds = xr.Dataset({"a": (["x"], [1.0])}) - m = Mock() - - savediags.dump_nc(ds, m) - m.seek.assert_not_called() - - @pytest.mark.parametrize("func", savediags._DIAG_FNS) def test_compute_diags_succeeds(func, resampled, verification, grid): func(resampled, verification, grid) From 7085fc91158108ee9e84470e1a8b700191aefb2d Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Tue, 22 Jun 2021 21:17:01 +0000 Subject: [PATCH 08/19] Case HISTOGRAM_BINS.keys() to list --- .../fv3net/diagnostics/prognostic_run/compute.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py index 501e9774bc..d4bf49dd8d 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py @@ -494,20 +494,11 @@ def _diurnal_func( @add_to_diags("physics") @diag_finalizer("histogram") @transform.apply("resample_time", "3H", inner_join=True) -@transform.apply("subset_variables", HISTOGRAM_BINS.keys()) +@transform.apply("subset_variables", list(HISTOGRAM_BINS.keys())) def compute_histogram(prognostic, verification, grid): logger.info("Computing histograms for physics diagnostics") counts = xr.Dataset() for varname in prognostic.data_vars: - # bins = HISTOGRAM_BINS[varname] - # counts[varname] = prognostic.groupby_bins(varname, bins).count()[varname] - # bin_midpoints = [x.item().mid for x in counts[f"{varname}_bins"]] - # counts = counts.assign_coords({f"{varname}_bins": bin_midpoints}) - # counts[varname] /= counts[varname].sum() - # counts[varname] /= bins[1:] - bins[:1] - # counts[varname].attrs["units"] = f"({prognostic[varname].units})^-1" - # counts[varname].attrs["long_name"] = "Frequency" - # counts[f"{varname}_bins"].attrs = prognostic[varname].attrs count, bins = np.histogram( prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True ) From 095423dc0a9842d5607ab307d77fc838378233ff Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Tue, 22 Jun 2021 23:09:06 +0000 Subject: [PATCH 09/19] Save bin widths to allow accurate cdf calculation --- .../fv3net/diagnostics/prognostic_run/compute.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py index d4bf49dd8d..109ee9814b 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py @@ -502,11 +502,13 @@ def compute_histogram(prognostic, verification, grid): count, bins = np.histogram( prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True ) - bin_midpoints = 0.5 * (bins[:-1] + bins[1:]) - coords = {f"{varname}_bins": bin_midpoints} + coords = {f"{varname}_bins": bins[:-1]} + width = bins[1:] - bins[:-1] + width_da = xr.DataArray(width, coords=coords, dims=list(coords.keys())) count_da = xr.DataArray(count, coords=coords, dims=list(coords.keys())) count_da[f"{varname}_bins"].attrs["units"] = prognostic[varname].units counts[varname] = count_da + counts[f"{varname}_bin_width"] = width_da return _assign_diagnostic_time_attrs(counts, prognostic) From 02d5be226a2eaaf51dde6e9ad0b2c37b4dd3f298 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Tue, 22 Jun 2021 23:09:29 +0000 Subject: [PATCH 10/19] Add metric for various percentiles --- .../diagnostics/prognostic_run/metrics.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py index a5b50202d6..353bad8583 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py @@ -18,6 +18,9 @@ _METRICS = [] GRID_VARS = ["lon", "lat", "lonb", "latb", "area"] +QUANTILE_LIMITS = { + "total_precip_to_surface": ((0.1, 1), (1, 10), (10, 100), (100, None)) +} def grab_diag(ds, name): @@ -138,6 +141,25 @@ def rmse_time_mean(diags): return rms_of_time_mean_bias +for percentile in [25, 50, 75, 90, 99, 99.9]: + + @add_to_metrics(f"percentile_{percentile}") + def time_and_global_mean_bias(diags, p=percentile): + histogram = grab_diag(diags, "histogram") + percentiles = xr.Dataset() + data_vars = [v for v in histogram.data_vars if not v.endswith("bin_width")] + for varname in data_vars: + bins = histogram[f"{varname}_bins"].values + bin_width = histogram[f"{varname}_bin_width"] + bin_midpoints = bins + 0.5 * bin_width + cumulative_distribution = np.cumsum(histogram[varname][:-1] * bin_width) + percentiles[varname] = bin_midpoints[ + np.argmin(np.abs(cumulative_distribution.values - p / 100)) + ] + restore_units(histogram, percentiles) + return percentiles + + def restore_units(source, target): for variable in target: target[variable].attrs["units"] = source[variable].attrs["units"] From 5647c1ea734bcbe895d34d286842cf5b2fb2b573 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Tue, 22 Jun 2021 23:09:54 +0000 Subject: [PATCH 11/19] Delete unused constant --- .../fv3net/diagnostics/prognostic_run/metrics.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py index 353bad8583..09a996905e 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py @@ -18,9 +18,6 @@ _METRICS = [] GRID_VARS = ["lon", "lat", "lonb", "latb", "area"] -QUANTILE_LIMITS = { - "total_precip_to_surface": ((0.1, 1), (1, 10), (10, 100), (100, None)) -} def grab_diag(ds, name): From eb099c95666aee2bdf4a8cae7f74a3a4e286b016 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Tue, 22 Jun 2021 23:11:23 +0000 Subject: [PATCH 12/19] Rename function to not be duplicated --- .../fv3net/diagnostics/prognostic_run/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py index 09a996905e..c4d22ae0f2 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py @@ -141,7 +141,7 @@ def rmse_time_mean(diags): for percentile in [25, 50, 75, 90, 99, 99.9]: @add_to_metrics(f"percentile_{percentile}") - def time_and_global_mean_bias(diags, p=percentile): + def percentile_metric(diags, p=percentile): histogram = grab_diag(diags, "histogram") percentiles = xr.Dataset() data_vars = [v for v in histogram.data_vars if not v.endswith("bin_width")] From caec3f91df2cd45d70012497afc1e997d3dc7237 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Tue, 22 Jun 2021 23:28:12 +0000 Subject: [PATCH 13/19] Fix navigation link --- .../fv3net/diagnostics/prognostic_run/views/static_report.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py index 8b80c97a83..c125678fb7 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py @@ -331,12 +331,12 @@ def generic_metric_plot(metrics: RunMetrics, metric_type: str) -> hv.HoloMap: navigation = OrderedList( Link("Home", "index.html"), - Link("Process diagnostics", "process.html"), + Link("Process diagnostics", "process_diagnostics.html"), Link("Latitude versus time hovmoller", "hovmoller.html"), Link("Time-mean maps", "maps.html"), Link("Time-mean zonal-pressure profiles", "zonal_pressure.html"), ) -navigation = [navigation] # must be iterable for Jinja HTML template +navigation = [navigation] # must be iterable for create_html template def render_index(metadata, diagnostics, metrics, movie_links): From ee55d102d4ac64df0035bdca757af46df0c03bec Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Wed, 23 Jun 2021 19:04:28 +0000 Subject: [PATCH 14/19] Add table of precip percentiles to process page --- .../diagnostics/prognostic_run/constants.py | 4 ++- .../diagnostics/prognostic_run/metrics.py | 4 +-- .../prognostic_run/views/static_report.py | 30 ++++++++++++++++--- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py index 0052ed1391..8f4e207c33 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py @@ -137,4 +137,6 @@ "dQv", ] -HISTOGRAM_BINS = {"total_precip_to_surface": np.logspace(-1, np.log10(500), 101)} +PRECIP_RATE = "total_precip_to_surface" +HISTOGRAM_BINS = {PRECIP_RATE: np.logspace(-1, np.log10(500), 101)} +PERCENTILES = [25, 50, 75, 90, 99, 99.9] diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py index c4d22ae0f2..8a0143d26d 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py @@ -13,7 +13,7 @@ import numpy as np import xarray as xr from toolz import curry -from .constants import HORIZONTAL_DIMS +from .constants import HORIZONTAL_DIMS, PERCENTILES import json _METRICS = [] @@ -138,7 +138,7 @@ def rmse_time_mean(diags): return rms_of_time_mean_bias -for percentile in [25, 50, 75, 90, 99, 99.9]: +for percentile in PERCENTILES: @add_to_metrics(f"percentile_{percentile}") def percentile_metric(diags, p=percentile): diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py index c125678fb7..c5046519eb 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from typing import Iterable +from typing import Iterable, Mapping, Sequence import os import xarray as xr import fsspec @@ -21,6 +21,7 @@ raw_html, plot_histogram, ) +from ..constants import PERCENTILES, PRECIP_RATE import logging @@ -292,7 +293,7 @@ def diurnal_cycle_component_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot: @histogram_plot_manager.register def histogram_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot: - return plot_histogram(diagnostics, "total_precip_to_surface_histogram") + return plot_histogram(diagnostics, f"{PRECIP_RATE}_histogram") # Routines for plotting the "metrics" @@ -329,6 +330,22 @@ def generic_metric_plot(metrics: RunMetrics, metric_type: str) -> hv.HoloMap: return HVPlot(hmap.opts(**bar_opts)) +def get_metrics_table( + metrics: RunMetrics, metric_types: Sequence[str], variable_names: Sequence[str] +) -> Mapping[str, Mapping[str, float]]: + """Structure a set of metrics in format suitable for reports.create_html""" + table = {} + for metric_type in metric_types: + for name in variable_names: + units = metrics.get_metric_units(metric_type, name, metrics.runs[0]) + type_label = f"{name} {metric_type} [{units}]" + table[type_label] = { + run: f"{metrics.get_metric_value(metric_type, name, run):.2f}" + for run in metrics.runs + } + return table + + navigation = OrderedList( Link("Home", "index.html"), Link("Process diagnostics", "process_diagnostics.html"), @@ -403,15 +420,18 @@ def render_zonal_pressures(metadata, diagnostics): ) -def render_process_diagnostics(metadata, diagnostics): +def render_process_diagnostics(metadata, diagnostics, metrics): sections = { "Links": navigation, "Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)), "Precipitation histogram": list(histogram_plot_manager.make_plots(diagnostics)), } + metric_types = [f"percentile_{p}" for p in PERCENTILES] + metrics_table = get_metrics_table(metrics, metric_types, [PRECIP_RATE]) return create_html( title="Process diagnostics", metadata=metadata, + metrics=metrics_table, sections=sections, html_header=get_html_header(), ) @@ -446,7 +466,9 @@ def make_report(computed_diagnostics: ComputedDiagnosticsList, output): "hovmoller.html": render_hovmollers(metadata, diagnostics), "maps.html": render_maps(metadata, diagnostics, metrics), "zonal_pressure.html": render_zonal_pressures(metadata, diagnostics), - "process_diagnostics.html": render_process_diagnostics(metadata, diagnostics), + "process_diagnostics.html": render_process_diagnostics( + metadata, diagnostics, metrics + ), } for filename, html in pages.items(): From 0fa128f4b2dfb0a91c866eb56aa18fd7c1694c6e Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Wed, 23 Jun 2021 20:37:04 +0000 Subject: [PATCH 15/19] Refactor compute_percentile function and add tests --- .../diagnostics/prognostic_run/metrics.py | 28 +++++++++++++------ .../tests/test_metrics.py | 23 +++++++++++++++ 2 files changed, 43 insertions(+), 8 deletions(-) create mode 100644 workflows/prognostic_run_diags/tests/test_metrics.py diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py index 8a0143d26d..0ed632626b 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py @@ -141,22 +141,34 @@ def rmse_time_mean(diags): for percentile in PERCENTILES: @add_to_metrics(f"percentile_{percentile}") - def percentile_metric(diags, p=percentile): + def percentile_metric(diags, percentile=percentile): histogram = grab_diag(diags, "histogram") percentiles = xr.Dataset() data_vars = [v for v in histogram.data_vars if not v.endswith("bin_width")] for varname in data_vars: - bins = histogram[f"{varname}_bins"].values - bin_width = histogram[f"{varname}_bin_width"] - bin_midpoints = bins + 0.5 * bin_width - cumulative_distribution = np.cumsum(histogram[varname][:-1] * bin_width) - percentiles[varname] = bin_midpoints[ - np.argmin(np.abs(cumulative_distribution.values - p / 100)) - ] + percentiles[varname] = compute_percentile( + percentile, + histogram[varname].values, + histogram[f"{varname}_bins"].values, + histogram[f"{varname}_bin_width"].values, + ) restore_units(histogram, percentiles) return percentiles +def compute_percentile( + p: float, freq: np.ndarray, bins: np.ndarray, bin_widths: np.ndarray +): + cumulative_distribution = np.cumsum(freq * bin_widths) + if np.abs(cumulative_distribution[-1] - 1) > 1e-6: + raise ValueError( + "The provided frequencies do not integrate to one. " + "Ensure that histogram is computed with density=True." + ) + bin_midpoints = bins + 0.5 * bin_widths + return bin_midpoints[np.argmin(np.abs(cumulative_distribution - p / 100))] + + def restore_units(source, target): for variable in target: target[variable].attrs["units"] = source[variable].attrs["units"] diff --git a/workflows/prognostic_run_diags/tests/test_metrics.py b/workflows/prognostic_run_diags/tests/test_metrics.py new file mode 100644 index 0000000000..dd5a55148e --- /dev/null +++ b/workflows/prognostic_run_diags/tests/test_metrics.py @@ -0,0 +1,23 @@ +import numpy as np +import pytest +from fv3net.diagnostics.prognostic_run.metrics import compute_percentile + + +@pytest.mark.parametrize( + ["percentile", "expected_value"], + [(0, 0.5), (5, 0.5), (10, 0.5), (20, 1.5), (45, 2.5), (99, 3.5)], +) +def test_compute_percentile(percentile, expected_value): + bins = np.array([0, 1, 2, 3]) + bin_widths = np.array([1, 1, 1, 1]) + frequency = np.array([0.1, 0.1, 0.4, 0.4]) + value = compute_percentile(percentile, frequency, bins, bin_widths) + assert value == expected_value + + +def test_compute_percentile_raise_value_error(): + bins = np.array([0, 1, 2, 3]) + bin_widths = np.array([1, 1, 1, 0.5]) + frequency = np.array([0.1, 0.1, 0.4, 0.4]) + with pytest.raises(ValueError): + compute_percentile(0, frequency, bins, bin_widths) From 48e9f6a96bcba8dc648b0045cd20e8840e229dd2 Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Wed, 23 Jun 2021 20:41:51 +0000 Subject: [PATCH 16/19] Add docstring --- .../diagnostics/prognostic_run/metrics.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py index 0ed632626b..dc7e8d638a 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py @@ -157,8 +157,19 @@ def percentile_metric(diags, percentile=percentile): def compute_percentile( - p: float, freq: np.ndarray, bins: np.ndarray, bin_widths: np.ndarray -): + percentile: float, freq: np.ndarray, bins: np.ndarray, bin_widths: np.ndarray +) -> float: + """Compute percentile given normalized histogram. + + Args: + percentile: value between 0 and 100 + freq: array of frequencies normalized by bin widths + bins: values of left sides of bins + bin_widths: values of bin widths + + Returns: + value of distribution at percentile + """ cumulative_distribution = np.cumsum(freq * bin_widths) if np.abs(cumulative_distribution[-1] - 1) > 1e-6: raise ValueError( @@ -166,7 +177,8 @@ def compute_percentile( "Ensure that histogram is computed with density=True." ) bin_midpoints = bins + 0.5 * bin_widths - return bin_midpoints[np.argmin(np.abs(cumulative_distribution - p / 100))] + closest_index = np.argmin(np.abs(cumulative_distribution - percentile / 100)) + return bin_midpoints[closest_index] def restore_units(source, target): From 92edd8c840bebdbea7072b2e34346403d6ee330d Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Wed, 23 Jun 2021 21:27:11 +0000 Subject: [PATCH 17/19] Refactor histogram calc to new function in VCM --- external/vcm/tests/test_calc.py | 13 +++++++++ external/vcm/vcm/__init__.py | 1 + external/vcm/vcm/calc/histogram.py | 28 +++++++++++++++++++ .../diagnostics/prognostic_run/compute.py | 13 +++------ 4 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 external/vcm/vcm/calc/histogram.py diff --git a/external/vcm/tests/test_calc.py b/external/vcm/tests/test_calc.py index 49396e0dab..2222ccf183 100644 --- a/external/vcm/tests/test_calc.py +++ b/external/vcm/tests/test_calc.py @@ -12,6 +12,7 @@ ) from vcm.calc.calc import local_time, apparent_source from vcm.cubedsphere.constants import COORD_Z_CENTER, COORD_Z_OUTER +from vcm.calc.histogram import histogram @pytest.mark.parametrize("toa_pressure", [0, 5]) @@ -119,3 +120,15 @@ def test_apparent_source(): s_dim="forecast_time", ) assert Q1_forecast3 == pytest.approx((2.0 / (15 * 60)) - (4.0 / 60)) + + +def test_histogram(): + data = xr.DataArray( + np.reshape(np.arange(0, 40, 2), (5, 4)), dims=["x", "y"], name="temperature" + ) + coords = {"temperature_bins": [0, 30]} + expected_count = xr.DataArray([15, 5], coords=coords, dims="temperature_bins") + expected_width = xr.DataArray([30, 10], coords=coords, dims="temperature_bins") + count, width = histogram(data, bins=[0, 30, 40]) + xr.testing.assert_equal(count, expected_count) + xr.testing.assert_equal(width, expected_width) diff --git a/external/vcm/vcm/__init__.py b/external/vcm/vcm/__init__.py index f8ba2af1f2..dba363489d 100644 --- a/external/vcm/vcm/__init__.py +++ b/external/vcm/vcm/__init__.py @@ -29,6 +29,7 @@ column_integrated_heating_from_isobaric_transition, column_integrated_heating_from_isochoric_transition, ) +from .calc.histogram import histogram from .interpolate import ( interpolate_to_pressure_levels, diff --git a/external/vcm/vcm/calc/histogram.py b/external/vcm/vcm/calc/histogram.py new file mode 100644 index 0000000000..0b33721b40 --- /dev/null +++ b/external/vcm/vcm/calc/histogram.py @@ -0,0 +1,28 @@ +from typing import Tuple + +import numpy as np +import xarray as xr + + +def histogram(da: xr.DataArray, **kwargs) -> Tuple[xr.DataArray, xr.DataArray]: + """Compute histogram and return tuple of counts and bin widths. + + Args: + da: input data + kwargs: optional parameters to pass on to np.histogram + + Return: + counts, bin_widths tuple of xr.DataArrays. The coordinate of both arrays is + equal to the left side of the histogram bins. + """ + coord_name = f"{da.name}_bins" if da.name is not None else "bins" + count, bins = np.histogram(da, **kwargs) + coords = {coord_name: bins[:-1]} + width = bins[1:] - bins[:-1] + width_da = xr.DataArray(width, coords=coords, dims=[coord_name]) + count_da = xr.DataArray(count, coords=coords, dims=[coord_name]) + if "units" in da.attrs: + count_da[coord_name].attrs["units"] = da.units + width_da[coord_name].attrs["units"] = da.units + width_da.attrs["units"] = da.units + return count_da, width_da diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py index 109ee9814b..ba48c48d19 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py @@ -23,7 +23,7 @@ from toolz import curry from collections import defaultdict -from typing import Dict, Callable, Mapping, Union +from typing import Dict, Callable, Mapping, Tuple, Union from joblib import Parallel, delayed @@ -499,16 +499,11 @@ def compute_histogram(prognostic, verification, grid): logger.info("Computing histograms for physics diagnostics") counts = xr.Dataset() for varname in prognostic.data_vars: - count, bins = np.histogram( + count, width = vcm.histogram( prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True ) - coords = {f"{varname}_bins": bins[:-1]} - width = bins[1:] - bins[:-1] - width_da = xr.DataArray(width, coords=coords, dims=list(coords.keys())) - count_da = xr.DataArray(count, coords=coords, dims=list(coords.keys())) - count_da[f"{varname}_bins"].attrs["units"] = prognostic[varname].units - counts[varname] = count_da - counts[f"{varname}_bin_width"] = width_da + counts[varname] = count + counts[f"{varname}_bin_width"] = width return _assign_diagnostic_time_attrs(counts, prognostic) From c3c476baf317d8418afd653939df734ac471b43f Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Wed, 23 Jun 2021 21:31:13 +0000 Subject: [PATCH 18/19] Fix lint and typechecking --- external/vcm/vcm/calc/histogram.py | 4 ++-- .../fv3net/diagnostics/prognostic_run/compute.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/external/vcm/vcm/calc/histogram.py b/external/vcm/vcm/calc/histogram.py index 0b33721b40..0cc34ad05a 100644 --- a/external/vcm/vcm/calc/histogram.py +++ b/external/vcm/vcm/calc/histogram.py @@ -1,4 +1,4 @@ -from typing import Tuple +from typing import Any, Hashable, Mapping, Tuple import numpy as np import xarray as xr @@ -17,7 +17,7 @@ def histogram(da: xr.DataArray, **kwargs) -> Tuple[xr.DataArray, xr.DataArray]: """ coord_name = f"{da.name}_bins" if da.name is not None else "bins" count, bins = np.histogram(da, **kwargs) - coords = {coord_name: bins[:-1]} + coords: Mapping[Hashable, Any] = {coord_name: bins[:-1]} width = bins[1:] - bins[:-1] width_da = xr.DataArray(width, coords=coords, dims=[coord_name]) count_da = xr.DataArray(count, coords=coords, dims=[coord_name]) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py index ba48c48d19..da15bc4506 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py @@ -23,7 +23,7 @@ from toolz import curry from collections import defaultdict -from typing import Dict, Callable, Mapping, Tuple, Union +from typing import Dict, Callable, Mapping, Union from joblib import Parallel, delayed From 7097de25e17064d65566f5f8efb50f67e26c395d Mon Sep 17 00:00:00 2001 From: Oliver Watt-Meyer Date: Wed, 23 Jun 2021 21:50:08 +0000 Subject: [PATCH 19/19] Compute 3-hourly mean before histogram --- .../diagnostics/prognostic_run/compute.py | 2 +- .../diagnostics/prognostic_run/transform.py | 21 ++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py index da15bc4506..7ac85edb88 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py @@ -493,7 +493,7 @@ def _diurnal_func( @add_to_diags("physics") @diag_finalizer("histogram") -@transform.apply("resample_time", "3H", inner_join=True) +@transform.apply("resample_time", "3H", inner_join=True, method="mean") @transform.apply("subset_variables", list(HISTOGRAM_BINS.keys())) def compute_histogram(prognostic, verification, grid): logger.info("Computing histograms for physics diagnostics") diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/transform.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/transform.py index 1b1ea4c343..62ddc09f78 100644 --- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/transform.py +++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/transform.py @@ -89,7 +89,11 @@ def transform(*diag_args): @add_to_input_transform_fns def resample_time( - freq_label: str, arg: DiagArg, time_slice=slice(None, -1), inner_join: bool = False + freq_label: str, + arg: DiagArg, + time_slice=slice(None, -1), + inner_join: bool = False, + method: str = "nearest", ) -> DiagArg: """ Subset times in prognostic and verification data @@ -102,10 +106,11 @@ def resample_time( time by default to work with crashed simulations. inner_join: Subset times to the intersection of prognostic and verification data. Defaults to False. + method: how to do resampling. Can be "nearest" or "mean". """ prognostic, verification, grid = arg - prognostic = _downsample_only(prognostic, freq_label) - verification = _downsample_only(verification, freq_label) + prognostic = _downsample_only(prognostic, freq_label, method) + verification = _downsample_only(verification, freq_label, method) prognostic = prognostic.isel(time=time_slice) if inner_join: @@ -113,12 +118,18 @@ def resample_time( return prognostic, verification, grid -def _downsample_only(ds: xr.Dataset, freq_label: str) -> xr.Dataset: +def _downsample_only(ds: xr.Dataset, freq_label: str, method: str) -> xr.Dataset: """Resample in time, only if given freq_label is lower frequency than time sampling of given dataset ds""" ds_freq = ds.time.values[1] - ds.time.values[0] if ds_freq < pd.to_timedelta(freq_label): - return ds.resample(time=freq_label, label="right").nearest() + resampled = ds.resample(time=freq_label, label="right") + if method == "nearest": + return resampled.nearest() + elif method == "mean": + return resampled.mean() + else: + raise ValueError(f"Don't know how to resample with method={method}.") else: return ds