From 98e8061db56ed3de8433860e504313a6c6336689 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Fri, 18 Jun 2021 19:10:30 +0000
Subject: [PATCH 01/19] Add OrderedList class to report

---
 external/report/report/__init__.py      |  9 ++++++++-
 external/report/report/create_report.py | 11 ++++++++++-
 external/report/tests/test_report.py    |  7 ++++++-
 3 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/external/report/report/__init__.py b/external/report/report/__init__.py
index 0d51642ac7..2a0760086e 100644
--- a/external/report/report/__init__.py
+++ b/external/report/report/__init__.py
@@ -1,3 +1,10 @@
-from .create_report import create_html, insert_report_figure, Metrics, Metadata, Link
+from .create_report import (
+    create_html,
+    insert_report_figure,
+    Metrics,
+    Metadata,
+    Link,
+    OrderedList,
+)
 
 __version__ = "0.1.0"
diff --git a/external/report/report/create_report.py b/external/report/report/create_report.py
index 28f3ca378d..0f6ccbd7f5 100644
--- a/external/report/report/create_report.py
+++ b/external/report/report/create_report.py
@@ -1,6 +1,6 @@
 import datetime
 import os
-from typing import Mapping, Sequence, Union
+from typing import Any, Mapping, Sequence, Union
 
 from jinja2 import Template
 from pytz import timezone
@@ -90,6 +90,15 @@ def __repr__(self) -> str:
         return f'<a href="{self.url}">{self.tag}</a>'
 
 
+class OrderedList:
+    def __init__(self, items: Sequence[Any]):
+        self.items = items
+
+    def __repr__(self) -> str:
+        items_li = [f"<li>{item}</li>" for item in self.items]
+        return "<ol>\n" + "\n".join(items_li) + "\n</ol>"
+
+
 def resolve_plot(obj):
     if isinstance(obj, str):
         return ImagePlot(obj)
diff --git a/external/report/tests/test_report.py b/external/report/tests/test_report.py
index 22bd22cb6b..c13ad31f55 100644
--- a/external/report/tests/test_report.py
+++ b/external/report/tests/test_report.py
@@ -1,5 +1,5 @@
 import os
-from report import __version__, create_html
+from report import __version__, create_html, OrderedList
 from report.create_report import _save_figure, insert_report_figure
 
 
@@ -46,3 +46,8 @@ def test__save_figure(tmpdir):
     with open(os.path.join(output_dir, filepath_relative_to_report), "r") as f:
         saved_data = f.read()
     assert saved_data.replace("\n", "") == fig.content
+
+
+def test_OrderedList_repr():
+    result = str(OrderedList(["item1", "item2"]))
+    assert result == "<ol>\n<li>item1</li>\n<li>item2</li>\n</ol>"

From 41976a2a5482a9d805c9ecbf3ed7d6fad593e043 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Fri, 18 Jun 2021 19:10:54 +0000
Subject: [PATCH 02/19] Add diagnostic of precipitation histogram

---
 .../diagnostics/prognostic_run/compute.py      | 18 ++++++++++++++++++
 .../diagnostics/prognostic_run/constants.py    |  3 +++
 2 files changed, 21 insertions(+)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
index 1ed61a4338..783618718c 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
@@ -37,6 +37,7 @@
 from fv3net.diagnostics.prognostic_run import diurnal_cycle
 from fv3net.diagnostics.prognostic_run import transform
 from fv3net.diagnostics.prognostic_run.constants import (
+    HISTOGRAM_BINS,
     HORIZONTAL_DIMS,
     DiagArg,
     GLOBAL_AVERAGE_DYCORE_VARS,
@@ -503,6 +504,23 @@ def _diurnal_func(
             return _assign_diagnostic_time_attrs(diag, prognostic)
 
 
+@add_to_diags("physics")
+@diag_finalizer("histogram")
+@transform.apply("resample_time", "3H", inner_join=True)
+@transform.apply("subset_variables", HISTOGRAM_BINS.keys())
+def compute_precip_histogram(prognostic, verification, grid):
+    logger.info("Computing precipitation histogram")
+    count_ds = xr.Dataset()
+    for varname in prognostic:
+        count, bins = np.histogram(
+            prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True
+        )
+        count_da = xr.DataArray(count, coords={f"{varname}_bin": bins[:-1]})
+        count_da[f"{varname}_bin"].attrs["units"] = prognostic[varname].units
+        count_ds = xr.merge([count_ds, count_da.rename(varname)])
+    return _assign_diagnostic_time_attrs(count_ds, prognostic)
+
+
 def register_parser(subparsers):
     parser = subparsers.add_parser("save", help="Compute the prognostic run diags.")
     parser.add_argument("url", help="Prognostic run output location.")
diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py
index 45a6471432..0052ed1391 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py
@@ -1,3 +1,4 @@
+import numpy as np
 import xarray as xr
 from typing import Tuple
 
@@ -135,3 +136,5 @@
     "dQu",
     "dQv",
 ]
+
+HISTOGRAM_BINS = {"total_precip_to_surface": np.logspace(-1, np.log10(500), 101)}

From df2db9a26572914b9f5db5a341a6a6f276ee582a Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Fri, 18 Jun 2021 22:48:53 +0000
Subject: [PATCH 03/19] Stop requiring two runs if making report from folder

---
 .../diagnostics/prognostic_run/computed_diagnostics.py       | 5 -----
 workflows/prognostic_run_diags/tests/test_integration.sh     | 3 ---
 2 files changed, 8 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py
index a3e0c3cefd..cae780bc7b 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py
@@ -274,11 +274,6 @@ def detect_folders(
     bucket: str, fs: fsspec.AbstractFileSystem,
 ) -> Mapping[str, DiagnosticFolder]:
     diag_ncs = fs.glob(os.path.join(bucket, "*", "diags.nc"))
-    if len(diag_ncs) < 2:
-        raise ValueError(
-            "Plots require more than 1 diagnostic directory in"
-            f" {bucket} for holoviews plots to display correctly."
-        )
     return {
         Path(url).parent.name: DiagnosticFolder(fs, Path(url).parent.as_posix())
         for url in diag_ncs
diff --git a/workflows/prognostic_run_diags/tests/test_integration.sh b/workflows/prognostic_run_diags/tests/test_integration.sh
index 84edfc4035..41d137de5e 100644
--- a/workflows/prognostic_run_diags/tests/test_integration.sh
+++ b/workflows/prognostic_run_diags/tests/test_integration.sh
@@ -21,9 +21,6 @@ gsutil cp /tmp/$random/metrics.json $OUTPUT/run1/metrics.json
 # generate movies for short sample prognostic run
 prognostic_run_diags movie --n_jobs 1 --n_timesteps 2 $RUN $OUTPUT/run1
 
-# make a second copy of diags/metrics since generate_report.py needs at least two runs
-gsutil -m cp -r $OUTPUT/run1 $OUTPUT/run2
-
 # generate report based on diagnostics computed above
 prognostic_run_diags report $OUTPUT $OUTPUT
 

From 56bc134a4bd00a763cd9a1146d370296a8c8b9f4 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Fri, 18 Jun 2021 22:49:19 +0000
Subject: [PATCH 04/19] Better OrderedList API

---
 external/report/report/create_report.py | 2 +-
 external/report/tests/test_report.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/report/report/create_report.py b/external/report/report/create_report.py
index 0f6ccbd7f5..a7b332934a 100644
--- a/external/report/report/create_report.py
+++ b/external/report/report/create_report.py
@@ -91,7 +91,7 @@ def __repr__(self) -> str:
 
 
 class OrderedList:
-    def __init__(self, items: Sequence[Any]):
+    def __init__(self, *items: Any):
         self.items = items
 
     def __repr__(self) -> str:
diff --git a/external/report/tests/test_report.py b/external/report/tests/test_report.py
index c13ad31f55..a5131478b8 100644
--- a/external/report/tests/test_report.py
+++ b/external/report/tests/test_report.py
@@ -49,5 +49,5 @@ def test__save_figure(tmpdir):
 
 
 def test_OrderedList_repr():
-    result = str(OrderedList(["item1", "item2"]))
+    result = str(OrderedList("item1", "item2"))
     assert result == "<ol>\n<li>item1</li>\n<li>item2</li>\n</ol>"

From ebccbef64716fcec466b2c43b66490d7cfb7c06e Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Fri, 18 Jun 2021 22:50:48 +0000
Subject: [PATCH 05/19] Set histogram diagnostic coordinate to bin midpoints

---
 .../diagnostics/prognostic_run/compute.py     | 40 +++++++++----------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
index 783618718c..501e9774bc 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
@@ -12,15 +12,12 @@
 grouping contains outputs from the physics routines (`sfc_dt_atmos.tile*.nc` and
 `diags.zarr`).
 """
-import os
 import sys
 
 import datetime
-import tempfile
 import intake
 import numpy as np
 import xarray as xr
-import shutil
 from dask.diagnostics import ProgressBar
 import fsspec
 
@@ -230,16 +227,6 @@ def _assign_diagnostic_time_attrs(
     return diagnostics_ds
 
 
-def dump_nc(ds: xr.Dataset, f):
-    # to_netcdf closes file, which will delete the buffer
-    # need to use a buffer since seek doesn't work with GCSFS file objects
-    with tempfile.TemporaryDirectory() as dirname:
-        url = os.path.join(dirname, "tmp.nc")
-        ds.to_netcdf(url, engine="h5netcdf")
-        with open(url, "rb") as tmp1:
-            shutil.copyfileobj(tmp1, f)
-
-
 @add_to_diags("dycore")
 @diag_finalizer("rms_global")
 @transform.apply("resample_time", "3H", inner_join=True)
@@ -508,17 +495,28 @@ def _diurnal_func(
 @diag_finalizer("histogram")
 @transform.apply("resample_time", "3H", inner_join=True)
 @transform.apply("subset_variables", HISTOGRAM_BINS.keys())
-def compute_precip_histogram(prognostic, verification, grid):
-    logger.info("Computing precipitation histogram")
-    count_ds = xr.Dataset()
-    for varname in prognostic:
+def compute_histogram(prognostic, verification, grid):
+    logger.info("Computing histograms for physics diagnostics")
+    counts = xr.Dataset()
+    for varname in prognostic.data_vars:
+        # bins = HISTOGRAM_BINS[varname]
+        # counts[varname] = prognostic.groupby_bins(varname, bins).count()[varname]
+        # bin_midpoints = [x.item().mid for x in counts[f"{varname}_bins"]]
+        # counts = counts.assign_coords({f"{varname}_bins": bin_midpoints})
+        # counts[varname] /= counts[varname].sum()
+        # counts[varname] /= bins[1:] - bins[:1]
+        # counts[varname].attrs["units"] = f"({prognostic[varname].units})^-1"
+        # counts[varname].attrs["long_name"] = "Frequency"
+        # counts[f"{varname}_bins"].attrs = prognostic[varname].attrs
         count, bins = np.histogram(
             prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True
         )
-        count_da = xr.DataArray(count, coords={f"{varname}_bin": bins[:-1]})
-        count_da[f"{varname}_bin"].attrs["units"] = prognostic[varname].units
-        count_ds = xr.merge([count_ds, count_da.rename(varname)])
-    return _assign_diagnostic_time_attrs(count_ds, prognostic)
+        bin_midpoints = 0.5 * (bins[:-1] + bins[1:])
+        coords = {f"{varname}_bins": bin_midpoints}
+        count_da = xr.DataArray(count, coords=coords, dims=list(coords.keys()))
+        count_da[f"{varname}_bins"].attrs["units"] = prognostic[varname].units
+        counts[varname] = count_da
+    return _assign_diagnostic_time_attrs(counts, prognostic)
 
 
 def register_parser(subparsers):

From 29a8f8054856ccfcab5c99a04132baeaca927d75 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Fri, 18 Jun 2021 22:52:13 +0000
Subject: [PATCH 06/19] Add process.html page with diurnal cycle and precip PDF

---
 .../prognostic_run/views/matplotlib.py        | 25 +++++++++-
 .../prognostic_run/views/static_report.py     | 50 +++++++++++++------
 2 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py
index 1ac3cc3fe4..fd57255ff1 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py
@@ -30,9 +30,9 @@
 }
 
 
-def fig_to_b64(fig, format="png"):
+def fig_to_b64(fig, format="png", dpi=None):
     pic_IObytes = io.BytesIO()
-    fig.savefig(pic_IObytes, format=format, bbox_inches="tight")
+    fig.savefig(pic_IObytes, format=format, bbox_inches="tight", dpi=dpi)
     pic_IObytes.seek(0)
     pic_hash = base64.b64encode(pic_IObytes.read())
     return f"data:image/png;base64, " + pic_hash.decode()
@@ -179,6 +179,27 @@ def plot_cubed_sphere_map(
     )
 
 
+def plot_histogram(run_diags: RunDiagnostics, varname: str) -> raw_html:
+    """Plot 1D histogram of varname overlaid across runs."""
+
+    logging.info(f"plotting {varname}")
+    fig, ax = plt.subplots()
+    bin_name = varname.replace("histogram", "bins")
+    for run in run_diags.runs:
+        v = run_diags.get_variable(run, varname)
+        ax.step(v[bin_name], v, label=run, where="post", linewidth=1)
+    ax.set_xlabel(f"{v.long_name} [{v.units}]")
+    ax.set_ylabel(f"Frequency [({v.units})^-1]")
+    ax.set_xscale("log")
+    ax.set_yscale("log")
+    ax.set_xlim([v[bin_name].values[0], v[bin_name].values[-1]])
+    ax.legend()
+    fig.tight_layout()
+    data = fig_to_b64(fig, dpi=150)
+    plt.close(fig)
+    return raw_html(f'<img src="{data}" width="800px" />')
+
+
 def _render_map_title(
     metrics: RunMetrics, variable: str, run: str, metrics_for_title: Mapping[str, str],
 ) -> str:
diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
index 9696502557..8b80c97a83 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
@@ -13,9 +13,14 @@
     RunMetrics,
 )
 
-from report import create_html, Link
+from report import create_html, Link, OrderedList
 from report.holoviews import HVPlot, get_html_header
-from .matplotlib import plot_2d_matplotlib, plot_cubed_sphere_map, raw_html
+from .matplotlib import (
+    plot_2d_matplotlib,
+    plot_cubed_sphere_map,
+    raw_html,
+    plot_histogram,
+)
 
 import logging
 
@@ -75,9 +80,7 @@ def make_plots(self, data) -> Iterable:
             yield func(data)
 
 
-def plot_1d(
-    run_diags: RunDiagnostics, varfilter: str, run_attr_name: str = "run",
-) -> HVPlot:
+def plot_1d(run_diags: RunDiagnostics, varfilter: str) -> HVPlot:
     """Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs.
     All matching diagnostics must be 1D."""
     p = hv.Cycle("Colorblind")
@@ -95,10 +98,7 @@ def plot_1d(
 
 
 def plot_1d_min_max_with_region_bar(
-    run_diags: RunDiagnostics,
-    varfilter_min: str,
-    varfilter_max: str,
-    run_attr_name: str = "run",
+    run_diags: RunDiagnostics, varfilter_min: str, varfilter_max: str,
 ) -> HVPlot:
     """Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs.
     All matching diagnostics must be 1D."""
@@ -123,9 +123,7 @@ def plot_1d_min_max_with_region_bar(
     return HVPlot(_set_opts_and_overlay(hmap))
 
 
-def plot_1d_with_region_bar(
-    run_diags: RunDiagnostics, varfilter: str, run_attr_name: str = "run"
-) -> HVPlot:
+def plot_1d_with_region_bar(run_diags: RunDiagnostics, varfilter: str) -> HVPlot:
     """Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs.
     Region will be selectable through a drop-down bar. Region is assumed to be part of
     variable name after last underscore. All matching diagnostics must be 1D."""
@@ -189,6 +187,7 @@ def diurnal_component_plot(
 hovmoller_plot_manager = PlotManager()
 zonal_pressure_plot_manager = PlotManager()
 diurnal_plot_manager = PlotManager()
+histogram_plot_manager = PlotManager()
 
 # this will be passed the data from the metrics.json files
 metrics_plot_manager = PlotManager()
@@ -291,6 +290,11 @@ def diurnal_cycle_component_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot:
     return diurnal_component_plot(diagnostics)
 
 
+@histogram_plot_manager.register
+def histogram_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot:
+    return plot_histogram(diagnostics, "total_precip_to_surface_histogram")
+
+
 # Routines for plotting the "metrics"
 # New plotting routines can be registered here.
 @metrics_plot_manager.register
@@ -325,12 +329,14 @@ def generic_metric_plot(metrics: RunMetrics, metric_type: str) -> hv.HoloMap:
         return HVPlot(hmap.opts(**bar_opts))
 
 
-navigation = [
+navigation = OrderedList(
     Link("Home", "index.html"),
+    Link("Process diagnostics", "process.html"),
     Link("Latitude versus time hovmoller", "hovmoller.html"),
     Link("Time-mean maps", "maps.html"),
     Link("Time-mean zonal-pressure profiles", "zonal_pressure.html"),
-]
+)
+navigation = [navigation]  # must be iterable for Jinja HTML template
 
 
 def render_index(metadata, diagnostics, metrics, movie_links):
@@ -338,7 +344,6 @@ def render_index(metadata, diagnostics, metrics, movie_links):
         "Links": navigation,
         "Timeseries": list(timeseries_plot_manager.make_plots(diagnostics)),
         "Zonal mean": list(zonal_mean_plot_manager.make_plots(diagnostics)),
-        "Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)),
     }
 
     if not metrics.empty:
@@ -398,6 +403,20 @@ def render_zonal_pressures(metadata, diagnostics):
     )
 
 
+def render_process_diagnostics(metadata, diagnostics):
+    sections = {
+        "Links": navigation,
+        "Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)),
+        "Precipitation histogram": list(histogram_plot_manager.make_plots(diagnostics)),
+    }
+    return create_html(
+        title="Process diagnostics",
+        metadata=metadata,
+        sections=sections,
+        html_header=get_html_header(),
+    )
+
+
 def _html_link(url, tag):
     return f"<a href='{url}'>{tag}</a>"
 
@@ -427,6 +446,7 @@ def make_report(computed_diagnostics: ComputedDiagnosticsList, output):
         "hovmoller.html": render_hovmollers(metadata, diagnostics),
         "maps.html": render_maps(metadata, diagnostics, metrics),
         "zonal_pressure.html": render_zonal_pressures(metadata, diagnostics),
+        "process_diagnostics.html": render_process_diagnostics(metadata, diagnostics),
     }
 
     for filename, html in pages.items():

From 53ce8fd802c8662784c904e3d0b0245247e29dc7 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Fri, 18 Jun 2021 23:11:44 +0000
Subject: [PATCH 07/19] Remove obsolete failing units tests

---
 .../tests/test_computed_diagnostics.py        | 10 -------
 .../tests/test_savediags.py                   | 29 -------------------
 2 files changed, 39 deletions(-)

diff --git a/workflows/prognostic_run_diags/tests/test_computed_diagnostics.py b/workflows/prognostic_run_diags/tests/test_computed_diagnostics.py
index 87f452f1eb..6db0c37e81 100644
--- a/workflows/prognostic_run_diags/tests/test_computed_diagnostics.py
+++ b/workflows/prognostic_run_diags/tests/test_computed_diagnostics.py
@@ -49,16 +49,6 @@ def test_ComputedDiagnosticsList_from_urls():
     assert isinstance(result.folders["1"], DiagnosticFolder)
 
 
-def test_detect_folders_fail_less_than_2(tmpdir):
-
-    fs = fsspec.filesystem("file")
-
-    tmpdir.mkdir("rundir1").join("diags.nc").write("foobar")
-
-    with pytest.raises(ValueError):
-        detect_folders(tmpdir, fs)
-
-
 def test_get_movie_links(tmpdir):
     domain = "http://www.domain.com"
     rdirs = ["rundir1", "rundir2"]
diff --git a/workflows/prognostic_run_diags/tests/test_savediags.py b/workflows/prognostic_run_diags/tests/test_savediags.py
index f6fa505dd1..5c70b990f1 100644
--- a/workflows/prognostic_run_diags/tests/test_savediags.py
+++ b/workflows/prognostic_run_diags/tests/test_savediags.py
@@ -2,8 +2,6 @@
 import cftime
 import numpy as np
 import xarray as xr
-import fsspec
-from unittest.mock import Mock
 
 import pytest
 
@@ -29,33 +27,6 @@ def grid():
     return xr.open_dataset("grid.nc").load()
 
 
-def test_dump_nc(tmpdir):
-    ds = xr.Dataset({"a": (["x"], [1.0])})
-
-    path = str(tmpdir.join("data.nc"))
-    with fsspec.open(path, "wb") as f:
-        savediags.dump_nc(ds, f)
-
-    ds_compare = xr.open_dataset(path)
-    xr.testing.assert_equal(ds, ds_compare)
-
-
-def test_dump_nc_no_seek():
-    """
-    GCSFS file objects raise an error when seek is called in write mode::
-
-        if not self.mode == "rb":
-            raise ValueError("Seek only available in read mode")
-            ValueError: Seek only available in read mode
-
-    """
-    ds = xr.Dataset({"a": (["x"], [1.0])})
-    m = Mock()
-
-    savediags.dump_nc(ds, m)
-    m.seek.assert_not_called()
-
-
 @pytest.mark.parametrize("func", savediags._DIAG_FNS)
 def test_compute_diags_succeeds(func, resampled, verification, grid):
     func(resampled, verification, grid)

From 7085fc91158108ee9e84470e1a8b700191aefb2d Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Tue, 22 Jun 2021 21:17:01 +0000
Subject: [PATCH 08/19] Case HISTOGRAM_BINS.keys() to list

---
 .../fv3net/diagnostics/prognostic_run/compute.py      | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
index 501e9774bc..d4bf49dd8d 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
@@ -494,20 +494,11 @@ def _diurnal_func(
 @add_to_diags("physics")
 @diag_finalizer("histogram")
 @transform.apply("resample_time", "3H", inner_join=True)
-@transform.apply("subset_variables", HISTOGRAM_BINS.keys())
+@transform.apply("subset_variables", list(HISTOGRAM_BINS.keys()))
 def compute_histogram(prognostic, verification, grid):
     logger.info("Computing histograms for physics diagnostics")
     counts = xr.Dataset()
     for varname in prognostic.data_vars:
-        # bins = HISTOGRAM_BINS[varname]
-        # counts[varname] = prognostic.groupby_bins(varname, bins).count()[varname]
-        # bin_midpoints = [x.item().mid for x in counts[f"{varname}_bins"]]
-        # counts = counts.assign_coords({f"{varname}_bins": bin_midpoints})
-        # counts[varname] /= counts[varname].sum()
-        # counts[varname] /= bins[1:] - bins[:1]
-        # counts[varname].attrs["units"] = f"({prognostic[varname].units})^-1"
-        # counts[varname].attrs["long_name"] = "Frequency"
-        # counts[f"{varname}_bins"].attrs = prognostic[varname].attrs
         count, bins = np.histogram(
             prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True
         )

From 095423dc0a9842d5607ab307d77fc838378233ff Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Tue, 22 Jun 2021 23:09:06 +0000
Subject: [PATCH 09/19] Save bin widths to allow accurate cdf calculation

---
 .../fv3net/diagnostics/prognostic_run/compute.py            | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
index d4bf49dd8d..109ee9814b 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
@@ -502,11 +502,13 @@ def compute_histogram(prognostic, verification, grid):
         count, bins = np.histogram(
             prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True
         )
-        bin_midpoints = 0.5 * (bins[:-1] + bins[1:])
-        coords = {f"{varname}_bins": bin_midpoints}
+        coords = {f"{varname}_bins": bins[:-1]}
+        width = bins[1:] - bins[:-1]
+        width_da = xr.DataArray(width, coords=coords, dims=list(coords.keys()))
         count_da = xr.DataArray(count, coords=coords, dims=list(coords.keys()))
         count_da[f"{varname}_bins"].attrs["units"] = prognostic[varname].units
         counts[varname] = count_da
+        counts[f"{varname}_bin_width"] = width_da
     return _assign_diagnostic_time_attrs(counts, prognostic)
 
 

From 02d5be226a2eaaf51dde6e9ad0b2c37b4dd3f298 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Tue, 22 Jun 2021 23:09:29 +0000
Subject: [PATCH 10/19] Add metric for various percentiles

---
 .../diagnostics/prognostic_run/metrics.py     | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
index a5b50202d6..353bad8583 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
@@ -18,6 +18,9 @@
 
 _METRICS = []
 GRID_VARS = ["lon", "lat", "lonb", "latb", "area"]
+QUANTILE_LIMITS = {
+    "total_precip_to_surface": ((0.1, 1), (1, 10), (10, 100), (100, None))
+}
 
 
 def grab_diag(ds, name):
@@ -138,6 +141,25 @@ def rmse_time_mean(diags):
     return rms_of_time_mean_bias
 
 
+for percentile in [25, 50, 75, 90, 99, 99.9]:
+
+    @add_to_metrics(f"percentile_{percentile}")
+    def time_and_global_mean_bias(diags, p=percentile):
+        histogram = grab_diag(diags, "histogram")
+        percentiles = xr.Dataset()
+        data_vars = [v for v in histogram.data_vars if not v.endswith("bin_width")]
+        for varname in data_vars:
+            bins = histogram[f"{varname}_bins"].values
+            bin_width = histogram[f"{varname}_bin_width"]
+            bin_midpoints = bins + 0.5 * bin_width
+            cumulative_distribution = np.cumsum(histogram[varname][:-1] * bin_width)
+            percentiles[varname] = bin_midpoints[
+                np.argmin(np.abs(cumulative_distribution.values - p / 100))
+            ]
+        restore_units(histogram, percentiles)
+        return percentiles
+
+
 def restore_units(source, target):
     for variable in target:
         target[variable].attrs["units"] = source[variable].attrs["units"]

From 5647c1ea734bcbe895d34d286842cf5b2fb2b573 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Tue, 22 Jun 2021 23:09:54 +0000
Subject: [PATCH 11/19] Delete unused constant

---
 .../fv3net/diagnostics/prognostic_run/metrics.py               | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
index 353bad8583..09a996905e 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
@@ -18,9 +18,6 @@
 
 _METRICS = []
 GRID_VARS = ["lon", "lat", "lonb", "latb", "area"]
-QUANTILE_LIMITS = {
-    "total_precip_to_surface": ((0.1, 1), (1, 10), (10, 100), (100, None))
-}
 
 
 def grab_diag(ds, name):

From eb099c95666aee2bdf4a8cae7f74a3a4e286b016 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Tue, 22 Jun 2021 23:11:23 +0000
Subject: [PATCH 12/19] Rename function to not be duplicated

---
 .../fv3net/diagnostics/prognostic_run/metrics.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
index 09a996905e..c4d22ae0f2 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
@@ -141,7 +141,7 @@ def rmse_time_mean(diags):
 for percentile in [25, 50, 75, 90, 99, 99.9]:
 
     @add_to_metrics(f"percentile_{percentile}")
-    def time_and_global_mean_bias(diags, p=percentile):
+    def percentile_metric(diags, p=percentile):
         histogram = grab_diag(diags, "histogram")
         percentiles = xr.Dataset()
         data_vars = [v for v in histogram.data_vars if not v.endswith("bin_width")]

From caec3f91df2cd45d70012497afc1e997d3dc7237 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Tue, 22 Jun 2021 23:28:12 +0000
Subject: [PATCH 13/19] Fix navigation link

---
 .../fv3net/diagnostics/prognostic_run/views/static_report.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
index 8b80c97a83..c125678fb7 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
@@ -331,12 +331,12 @@ def generic_metric_plot(metrics: RunMetrics, metric_type: str) -> hv.HoloMap:
 
 navigation = OrderedList(
     Link("Home", "index.html"),
-    Link("Process diagnostics", "process.html"),
+    Link("Process diagnostics", "process_diagnostics.html"),
     Link("Latitude versus time hovmoller", "hovmoller.html"),
     Link("Time-mean maps", "maps.html"),
     Link("Time-mean zonal-pressure profiles", "zonal_pressure.html"),
 )
-navigation = [navigation]  # must be iterable for Jinja HTML template
+navigation = [navigation]  # must be iterable for create_html template
 
 
 def render_index(metadata, diagnostics, metrics, movie_links):

From ee55d102d4ac64df0035bdca757af46df0c03bec Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Wed, 23 Jun 2021 19:04:28 +0000
Subject: [PATCH 14/19] Add table of precip percentiles to process page

---
 .../diagnostics/prognostic_run/constants.py   |  4 ++-
 .../diagnostics/prognostic_run/metrics.py     |  4 +--
 .../prognostic_run/views/static_report.py     | 30 ++++++++++++++++---
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py
index 0052ed1391..8f4e207c33 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py
@@ -137,4 +137,6 @@
     "dQv",
 ]
 
-HISTOGRAM_BINS = {"total_precip_to_surface": np.logspace(-1, np.log10(500), 101)}
+PRECIP_RATE = "total_precip_to_surface"
+HISTOGRAM_BINS = {PRECIP_RATE: np.logspace(-1, np.log10(500), 101)}
+PERCENTILES = [25, 50, 75, 90, 99, 99.9]
diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
index c4d22ae0f2..8a0143d26d 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
@@ -13,7 +13,7 @@
 import numpy as np
 import xarray as xr
 from toolz import curry
-from .constants import HORIZONTAL_DIMS
+from .constants import HORIZONTAL_DIMS, PERCENTILES
 import json
 
 _METRICS = []
@@ -138,7 +138,7 @@ def rmse_time_mean(diags):
     return rms_of_time_mean_bias
 
 
-for percentile in [25, 50, 75, 90, 99, 99.9]:
+for percentile in PERCENTILES:
 
     @add_to_metrics(f"percentile_{percentile}")
     def percentile_metric(diags, p=percentile):
diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
index c125678fb7..c5046519eb 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-from typing import Iterable
+from typing import Iterable, Mapping, Sequence
 import os
 import xarray as xr
 import fsspec
@@ -21,6 +21,7 @@
     raw_html,
     plot_histogram,
 )
+from ..constants import PERCENTILES, PRECIP_RATE
 
 import logging
 
@@ -292,7 +293,7 @@ def diurnal_cycle_component_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot:
 
 @histogram_plot_manager.register
 def histogram_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot:
-    return plot_histogram(diagnostics, "total_precip_to_surface_histogram")
+    return plot_histogram(diagnostics, f"{PRECIP_RATE}_histogram")
 
 
 # Routines for plotting the "metrics"
@@ -329,6 +330,22 @@ def generic_metric_plot(metrics: RunMetrics, metric_type: str) -> hv.HoloMap:
         return HVPlot(hmap.opts(**bar_opts))
 
 
+def get_metrics_table(
+    metrics: RunMetrics, metric_types: Sequence[str], variable_names: Sequence[str]
+) -> Mapping[str, Mapping[str, float]]:
+    """Structure a set of metrics in format suitable for reports.create_html"""
+    table = {}
+    for metric_type in metric_types:
+        for name in variable_names:
+            units = metrics.get_metric_units(metric_type, name, metrics.runs[0])
+            type_label = f"{name} {metric_type} [{units}]"
+            table[type_label] = {
+                run: f"{metrics.get_metric_value(metric_type, name, run):.2f}"
+                for run in metrics.runs
+            }
+    return table
+
+
 navigation = OrderedList(
     Link("Home", "index.html"),
     Link("Process diagnostics", "process_diagnostics.html"),
@@ -403,15 +420,18 @@ def render_zonal_pressures(metadata, diagnostics):
     )
 
 
-def render_process_diagnostics(metadata, diagnostics):
+def render_process_diagnostics(metadata, diagnostics, metrics):
     sections = {
         "Links": navigation,
         "Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)),
         "Precipitation histogram": list(histogram_plot_manager.make_plots(diagnostics)),
     }
+    metric_types = [f"percentile_{p}" for p in PERCENTILES]
+    metrics_table = get_metrics_table(metrics, metric_types, [PRECIP_RATE])
     return create_html(
         title="Process diagnostics",
         metadata=metadata,
+        metrics=metrics_table,
         sections=sections,
         html_header=get_html_header(),
     )
@@ -446,7 +466,9 @@ def make_report(computed_diagnostics: ComputedDiagnosticsList, output):
         "hovmoller.html": render_hovmollers(metadata, diagnostics),
         "maps.html": render_maps(metadata, diagnostics, metrics),
         "zonal_pressure.html": render_zonal_pressures(metadata, diagnostics),
-        "process_diagnostics.html": render_process_diagnostics(metadata, diagnostics),
+        "process_diagnostics.html": render_process_diagnostics(
+            metadata, diagnostics, metrics
+        ),
     }
 
     for filename, html in pages.items():

From 0fa128f4b2dfb0a91c866eb56aa18fd7c1694c6e Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Wed, 23 Jun 2021 20:37:04 +0000
Subject: [PATCH 15/19] Refactor compute_percentile function and add tests

---
 .../diagnostics/prognostic_run/metrics.py     | 28 +++++++++++++------
 .../tests/test_metrics.py                     | 23 +++++++++++++++
 2 files changed, 43 insertions(+), 8 deletions(-)
 create mode 100644 workflows/prognostic_run_diags/tests/test_metrics.py

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
index 8a0143d26d..0ed632626b 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
@@ -141,22 +141,34 @@ def rmse_time_mean(diags):
 for percentile in PERCENTILES:
 
     @add_to_metrics(f"percentile_{percentile}")
-    def percentile_metric(diags, p=percentile):
+    def percentile_metric(diags, percentile=percentile):
         histogram = grab_diag(diags, "histogram")
         percentiles = xr.Dataset()
         data_vars = [v for v in histogram.data_vars if not v.endswith("bin_width")]
         for varname in data_vars:
-            bins = histogram[f"{varname}_bins"].values
-            bin_width = histogram[f"{varname}_bin_width"]
-            bin_midpoints = bins + 0.5 * bin_width
-            cumulative_distribution = np.cumsum(histogram[varname][:-1] * bin_width)
-            percentiles[varname] = bin_midpoints[
-                np.argmin(np.abs(cumulative_distribution.values - p / 100))
-            ]
+            percentiles[varname] = compute_percentile(
+                percentile,
+                histogram[varname].values,
+                histogram[f"{varname}_bins"].values,
+                histogram[f"{varname}_bin_width"].values,
+            )
         restore_units(histogram, percentiles)
         return percentiles
 
 
+def compute_percentile(
+    p: float, freq: np.ndarray, bins: np.ndarray, bin_widths: np.ndarray
+):
+    cumulative_distribution = np.cumsum(freq * bin_widths)
+    if np.abs(cumulative_distribution[-1] - 1) > 1e-6:
+        raise ValueError(
+            "The provided frequencies do not integrate to one. "
+            "Ensure that histogram is computed with density=True."
+        )
+    bin_midpoints = bins + 0.5 * bin_widths
+    return bin_midpoints[np.argmin(np.abs(cumulative_distribution - p / 100))]
+
+
 def restore_units(source, target):
     for variable in target:
         target[variable].attrs["units"] = source[variable].attrs["units"]
diff --git a/workflows/prognostic_run_diags/tests/test_metrics.py b/workflows/prognostic_run_diags/tests/test_metrics.py
new file mode 100644
index 0000000000..dd5a55148e
--- /dev/null
+++ b/workflows/prognostic_run_diags/tests/test_metrics.py
@@ -0,0 +1,23 @@
+import numpy as np
+import pytest
+from fv3net.diagnostics.prognostic_run.metrics import compute_percentile
+
+
+@pytest.mark.parametrize(
+    ["percentile", "expected_value"],
+    [(0, 0.5), (5, 0.5), (10, 0.5), (20, 1.5), (45, 2.5), (99, 3.5)],
+)
+def test_compute_percentile(percentile, expected_value):
+    bins = np.array([0, 1, 2, 3])
+    bin_widths = np.array([1, 1, 1, 1])
+    frequency = np.array([0.1, 0.1, 0.4, 0.4])
+    value = compute_percentile(percentile, frequency, bins, bin_widths)
+    assert value == expected_value
+
+
+def test_compute_percentile_raise_value_error():
+    bins = np.array([0, 1, 2, 3])
+    bin_widths = np.array([1, 1, 1, 0.5])
+    frequency = np.array([0.1, 0.1, 0.4, 0.4])
+    with pytest.raises(ValueError):
+        compute_percentile(0, frequency, bins, bin_widths)

From 48e9f6a96bcba8dc648b0045cd20e8840e229dd2 Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Wed, 23 Jun 2021 20:41:51 +0000
Subject: [PATCH 16/19] Add docstring

---
 .../diagnostics/prognostic_run/metrics.py      | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
index 0ed632626b..dc7e8d638a 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/metrics.py
@@ -157,8 +157,19 @@ def percentile_metric(diags, percentile=percentile):
 
 
 def compute_percentile(
-    p: float, freq: np.ndarray, bins: np.ndarray, bin_widths: np.ndarray
-):
+    percentile: float, freq: np.ndarray, bins: np.ndarray, bin_widths: np.ndarray
+) -> float:
+    """Compute percentile given normalized histogram.
+
+    Args:
+        percentile: value between 0 and 100
+        freq: array of frequencies normalized by bin widths
+        bins: values of left sides of bins
+        bin_widths: values of bin widths
+
+    Returns:
+        value of distribution at percentile
+    """
     cumulative_distribution = np.cumsum(freq * bin_widths)
     if np.abs(cumulative_distribution[-1] - 1) > 1e-6:
         raise ValueError(
@@ -166,7 +177,8 @@ def compute_percentile(
             "Ensure that histogram is computed with density=True."
         )
     bin_midpoints = bins + 0.5 * bin_widths
-    return bin_midpoints[np.argmin(np.abs(cumulative_distribution - p / 100))]
+    closest_index = np.argmin(np.abs(cumulative_distribution - percentile / 100))
+    return bin_midpoints[closest_index]
 
 
 def restore_units(source, target):

From 92edd8c840bebdbea7072b2e34346403d6ee330d Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Wed, 23 Jun 2021 21:27:11 +0000
Subject: [PATCH 17/19] Refactor histogram calc to new function in VCM

---
 external/vcm/tests/test_calc.py               | 13 +++++++++
 external/vcm/vcm/__init__.py                  |  1 +
 external/vcm/vcm/calc/histogram.py            | 28 +++++++++++++++++++
 .../diagnostics/prognostic_run/compute.py     | 13 +++------
 4 files changed, 46 insertions(+), 9 deletions(-)
 create mode 100644 external/vcm/vcm/calc/histogram.py

diff --git a/external/vcm/tests/test_calc.py b/external/vcm/tests/test_calc.py
index 49396e0dab..2222ccf183 100644
--- a/external/vcm/tests/test_calc.py
+++ b/external/vcm/tests/test_calc.py
@@ -12,6 +12,7 @@
 )
 from vcm.calc.calc import local_time, apparent_source
 from vcm.cubedsphere.constants import COORD_Z_CENTER, COORD_Z_OUTER
+from vcm.calc.histogram import histogram
 
 
 @pytest.mark.parametrize("toa_pressure", [0, 5])
@@ -119,3 +120,15 @@ def test_apparent_source():
         s_dim="forecast_time",
     )
     assert Q1_forecast3 == pytest.approx((2.0 / (15 * 60)) - (4.0 / 60))
+
+
+def test_histogram():
+    data = xr.DataArray(
+        np.reshape(np.arange(0, 40, 2), (5, 4)), dims=["x", "y"], name="temperature"
+    )
+    coords = {"temperature_bins": [0, 30]}
+    expected_count = xr.DataArray([15, 5], coords=coords, dims="temperature_bins")
+    expected_width = xr.DataArray([30, 10], coords=coords, dims="temperature_bins")
+    count, width = histogram(data, bins=[0, 30, 40])
+    xr.testing.assert_equal(count, expected_count)
+    xr.testing.assert_equal(width, expected_width)
diff --git a/external/vcm/vcm/__init__.py b/external/vcm/vcm/__init__.py
index f8ba2af1f2..dba363489d 100644
--- a/external/vcm/vcm/__init__.py
+++ b/external/vcm/vcm/__init__.py
@@ -29,6 +29,7 @@
     column_integrated_heating_from_isobaric_transition,
     column_integrated_heating_from_isochoric_transition,
 )
+from .calc.histogram import histogram
 
 from .interpolate import (
     interpolate_to_pressure_levels,
diff --git a/external/vcm/vcm/calc/histogram.py b/external/vcm/vcm/calc/histogram.py
new file mode 100644
index 0000000000..0b33721b40
--- /dev/null
+++ b/external/vcm/vcm/calc/histogram.py
@@ -0,0 +1,28 @@
+from typing import Tuple
+
+import numpy as np
+import xarray as xr
+
+
+def histogram(da: xr.DataArray, **kwargs) -> Tuple[xr.DataArray, xr.DataArray]:
+    """Compute histogram and return tuple of counts and bin widths.
+    
+    Args:
+        da: input data
+        kwargs: optional parameters to pass on to np.histogram
+
+    Return:
+        counts, bin_widths tuple of xr.DataArrays. The coordinate of both arrays is
+        equal to the left side of the histogram bins.
+    """
+    coord_name = f"{da.name}_bins" if da.name is not None else "bins"
+    count, bins = np.histogram(da, **kwargs)
+    coords = {coord_name: bins[:-1]}
+    width = bins[1:] - bins[:-1]
+    width_da = xr.DataArray(width, coords=coords, dims=[coord_name])
+    count_da = xr.DataArray(count, coords=coords, dims=[coord_name])
+    if "units" in da.attrs:
+        count_da[coord_name].attrs["units"] = da.units
+        width_da[coord_name].attrs["units"] = da.units
+        width_da.attrs["units"] = da.units
+    return count_da, width_da
diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
index 109ee9814b..ba48c48d19 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
@@ -23,7 +23,7 @@
 
 from toolz import curry
 from collections import defaultdict
-from typing import Dict, Callable, Mapping, Union
+from typing import Dict, Callable, Mapping, Tuple, Union
 
 from joblib import Parallel, delayed
 
@@ -499,16 +499,11 @@ def compute_histogram(prognostic, verification, grid):
     logger.info("Computing histograms for physics diagnostics")
     counts = xr.Dataset()
     for varname in prognostic.data_vars:
-        count, bins = np.histogram(
+        count, width = vcm.histogram(
             prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True
         )
-        coords = {f"{varname}_bins": bins[:-1]}
-        width = bins[1:] - bins[:-1]
-        width_da = xr.DataArray(width, coords=coords, dims=list(coords.keys()))
-        count_da = xr.DataArray(count, coords=coords, dims=list(coords.keys()))
-        count_da[f"{varname}_bins"].attrs["units"] = prognostic[varname].units
-        counts[varname] = count_da
-        counts[f"{varname}_bin_width"] = width_da
+        counts[varname] = count
+        counts[f"{varname}_bin_width"] = width
     return _assign_diagnostic_time_attrs(counts, prognostic)
 
 

From c3c476baf317d8418afd653939df734ac471b43f Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Wed, 23 Jun 2021 21:31:13 +0000
Subject: [PATCH 18/19] Fix lint and typechecking

---
 external/vcm/vcm/calc/histogram.py                            | 4 ++--
 .../fv3net/diagnostics/prognostic_run/compute.py              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/external/vcm/vcm/calc/histogram.py b/external/vcm/vcm/calc/histogram.py
index 0b33721b40..0cc34ad05a 100644
--- a/external/vcm/vcm/calc/histogram.py
+++ b/external/vcm/vcm/calc/histogram.py
@@ -1,4 +1,4 @@
-from typing import Tuple
+from typing import Any, Hashable, Mapping, Tuple
 
 import numpy as np
 import xarray as xr
@@ -17,7 +17,7 @@ def histogram(da: xr.DataArray, **kwargs) -> Tuple[xr.DataArray, xr.DataArray]:
     """
     coord_name = f"{da.name}_bins" if da.name is not None else "bins"
     count, bins = np.histogram(da, **kwargs)
-    coords = {coord_name: bins[:-1]}
+    coords: Mapping[Hashable, Any] = {coord_name: bins[:-1]}
     width = bins[1:] - bins[:-1]
     width_da = xr.DataArray(width, coords=coords, dims=[coord_name])
     count_da = xr.DataArray(count, coords=coords, dims=[coord_name])
diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
index ba48c48d19..da15bc4506 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
@@ -23,7 +23,7 @@
 
 from toolz import curry
 from collections import defaultdict
-from typing import Dict, Callable, Mapping, Tuple, Union
+from typing import Dict, Callable, Mapping, Union
 
 from joblib import Parallel, delayed
 

From 7097de25e17064d65566f5f8efb50f67e26c395d Mon Sep 17 00:00:00 2001
From: Oliver Watt-Meyer <oliwm@vulcan.com>
Date: Wed, 23 Jun 2021 21:50:08 +0000
Subject: [PATCH 19/19] Compute 3-hourly mean before histogram

---
 .../diagnostics/prognostic_run/compute.py     |  2 +-
 .../diagnostics/prognostic_run/transform.py   | 21 ++++++++++++++-----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
index da15bc4506..7ac85edb88 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
@@ -493,7 +493,7 @@ def _diurnal_func(
 
 @add_to_diags("physics")
 @diag_finalizer("histogram")
-@transform.apply("resample_time", "3H", inner_join=True)
+@transform.apply("resample_time", "3H", inner_join=True, method="mean")
 @transform.apply("subset_variables", list(HISTOGRAM_BINS.keys()))
 def compute_histogram(prognostic, verification, grid):
     logger.info("Computing histograms for physics diagnostics")
diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/transform.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/transform.py
index 1b1ea4c343..62ddc09f78 100644
--- a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/transform.py
+++ b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/transform.py
@@ -89,7 +89,11 @@ def transform(*diag_args):
 
 @add_to_input_transform_fns
 def resample_time(
-    freq_label: str, arg: DiagArg, time_slice=slice(None, -1), inner_join: bool = False
+    freq_label: str,
+    arg: DiagArg,
+    time_slice=slice(None, -1),
+    inner_join: bool = False,
+    method: str = "nearest",
 ) -> DiagArg:
     """
     Subset times in prognostic and verification data
@@ -102,10 +106,11 @@ def resample_time(
             time by default to work with crashed simulations.
         inner_join: Subset times to the intersection of prognostic and verification
             data. Defaults to False.
+        method: how to do resampling. Can be "nearest" or "mean".
     """
     prognostic, verification, grid = arg
-    prognostic = _downsample_only(prognostic, freq_label)
-    verification = _downsample_only(verification, freq_label)
+    prognostic = _downsample_only(prognostic, freq_label, method)
+    verification = _downsample_only(verification, freq_label, method)
 
     prognostic = prognostic.isel(time=time_slice)
     if inner_join:
@@ -113,12 +118,18 @@ def resample_time(
     return prognostic, verification, grid
 
 
-def _downsample_only(ds: xr.Dataset, freq_label: str) -> xr.Dataset:
+def _downsample_only(ds: xr.Dataset, freq_label: str, method: str) -> xr.Dataset:
     """Resample in time, only if given freq_label is lower frequency than time
     sampling of given dataset ds"""
     ds_freq = ds.time.values[1] - ds.time.values[0]
     if ds_freq < pd.to_timedelta(freq_label):
-        return ds.resample(time=freq_label, label="right").nearest()
+        resampled = ds.resample(time=freq_label, label="right")
+        if method == "nearest":
+            return resampled.nearest()
+        elif method == "mean":
+            return resampled.mean()
+        else:
+            raise ValueError(f"Don't know how to resample with method={method}.")
     else:
         return ds