Merge pull request #911 from darshan-hpc/snyder/pydarshan-cleanup

PyDarshan job summary cleanup fixes
darshan-hpc · Apr 13, 2023 · 05efd1d · 05efd1d
2 parents b384a5e + dd5a305
commit 05efd1d
Show file tree

Hide file tree

Showing 10 changed files with 93 additions and 77 deletions.
diff --git a/darshan-util/pydarshan/darshan/cli/style.css b/darshan-util/pydarshan/darshan/cli/style.css
@@ -346,9 +346,8 @@ figure img {
 }
 
 figcaption {
-  font-style: italic;
   font-size: 0.75em;
-  font-weight: 200;
+  font-weight: 300;
   margin: 0;
 }
 

diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py
@@ -335,7 +335,7 @@ def register_figures(self):
         if not self.report.modules:
             # no data in report to summarize, print warning and that's it
             no_data_message = (
-                "This Darshan log file has no instrumentation records,"
+                "This Darshan log file has no instrumentation records, "
                 "there is no data to plot. Did this app do any I/O?"
             )
             fig = ReportFigure(

diff --git a/darshan-util/pydarshan/darshan/experimental/plots/data_access_by_filesystem.py b/darshan-util/pydarshan/darshan/experimental/plots/data_access_by_filesystem.py
@@ -13,7 +13,7 @@
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
-
+import humanize
 
 def process_byte_counts(df_reads, df_writes):
     """
@@ -505,17 +505,18 @@ def plot_data(fig: Any,
     num_cats: an integer representing the number of categories
     to plot; default ``None`` plots all categories
     """
+    fontsize = 18
     list_byte_axes: list = []
     list_count_axes: list = []
     # use log10 scale if range exceeds
     # two orders of magnitude in a column
     use_log = [False, False]
-    for idx, series_pair in enumerate([[bytes_rd_series, bytes_wr_series, 1048576],
-                                       [file_rd_series, file_wr_series, 1]]):
+    for idx, series_pair in enumerate([[bytes_rd_series, bytes_wr_series],
+                                       [file_rd_series, file_wr_series]]):
         maxval = max(series_pair[0].max(), series_pair[1].max())
         minval = max(min(series_pair[0].min(), series_pair[1].min()), 1)
         # adjust ratio to MiB when needed
-        ratio = ((maxval / series_pair[2]) / (minval / series_pair[2]))
+        ratio = (maxval / minval)
         if ratio > 100:
             use_log[idx] = True
 
@@ -540,64 +541,55 @@ def plot_data(fig: Any,
         list_byte_axes.append(ax_filesystem_bytes)
         list_count_axes.append(ax_filesystem_counts)
 
-        # convert to MiB using 1048576 (ie: 2**20)
-        bytes_read = bytes_rd_series[filesystem]/1048576
-        bytes_written = bytes_wr_series[filesystem]/1048576
-        files_written = file_wr_series[filesystem]
-        files_read = file_rd_series[filesystem]
-
-        # scale to fit longer filesystem
-        # strings on the left side of the plots
-        # NOTE: may need more sophisticated scaling
-        # eventually
-        if len(filesystem) <= 8 and not '<STD' in filesystem:
-            fontsize = 18
-        else:
-            fontsize = 12
+        bytes_read = bytes_rd_series[filesystem]
+        bytes_written = bytes_wr_series[filesystem]
+        files_written = int(file_wr_series[filesystem])
+        files_read = int(file_rd_series[filesystem])
 
         # anonymized STD.. streams have associated integers
         # that are stored in the filesystem data field
         # but that are confusing to display, so strip them
         if filesystem.startswith('anonymized'):
             ax_filesystem_bytes.annotate('anonymized',
-                                         (-0.3, 0.5),
+                                         (-0.1, 0.5),
                                          fontsize=fontsize,
                                          xycoords='axes fraction',
+                                         ha="right",
                                          va="center")
         else:
             ax_filesystem_bytes.annotate(filesystem,
-                                         (-0.3, 0.5),
+                                         (-0.1, 0.5),
                                          fontsize=fontsize,
                                          xycoords='axes fraction',
+                                         ha="right",
                                          va="center")
 
         ax_filesystem_counts.barh(0, files_written, color='red', alpha=0.3)
         ax_filesystem_counts.barh(1, files_read, color='blue', alpha=0.3)
 
-        ax_filesystem_bytes.text(0, 0.75, f' # bytes read ({bytes_read:.2E} MiB)',
+        bytes_read_str = humanize.naturalsize(bytes_read,
+                                              binary=True,
+                                              format="%.2f")
+        ax_filesystem_bytes.text(0, 0.75, f' bytes read: {bytes_read_str}',
                                  transform=ax_filesystem_bytes.transAxes,
+                                 fontsize=fontsize,
                                  va="center")
-        ax_filesystem_bytes.text(0, 0.25, f' # bytes written ({bytes_written:.2E} MiB)',
+        bytes_written_str = humanize.naturalsize(bytes_written,
+                                                 binary=True,
+                                                 format="%.2f")
+        ax_filesystem_bytes.text(0, 0.25, f' bytes written: {bytes_written_str}',
                                  transform=ax_filesystem_bytes.transAxes,
+                                 fontsize=fontsize,
                                  va="center")
 
-        if files_read == 0:
-            ax_filesystem_counts.text(0, 0.75, ' 0 files read',
-                                      transform=ax_filesystem_counts.transAxes,
-                                      va="center")
-        else:
-            ax_filesystem_counts.text(0, 0.75, f' # files read ({files_read:.2E})',
-                                      transform=ax_filesystem_counts.transAxes,
-                                      va="center")
-
-        if files_written == 0:
-            ax_filesystem_counts.text(0, 0.25, ' 0 files written',
-                                      transform=ax_filesystem_counts.transAxes,
-                                      va="center")
-        else:
-            ax_filesystem_counts.text(0, 0.25, f' # files written ({files_written:.2E})',
-                                      transform=ax_filesystem_counts.transAxes,
-                                      va="center")
+        ax_filesystem_counts.text(0, 0.75, f' files read: {files_read}',
+                                  transform=ax_filesystem_counts.transAxes,
+                                  fontsize=fontsize,
+                                  va="center")
+        ax_filesystem_counts.text(0, 0.25, f' files written: {files_written}',
+                                  transform=ax_filesystem_counts.transAxes,
+                                  fontsize=fontsize,
+                                  va="center")
 
         ax_filesystem_bytes.barh(0, bytes_written, color='red', alpha=0.3)
         ax_filesystem_bytes.barh(1, bytes_read, color='blue', alpha=0.3)
@@ -696,7 +688,8 @@ def plot_with_report(report: darshan.DarshanReport,
     # produce a decent aspect ratio
     if height < 16:
         height = 16
-
+    # add additional padding to left margin for annotations
+    fig.subplots_adjust(left=0.2)
     fig.set_size_inches(12, height)
     plt.close(fig)
     return fig
diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_access_histogram.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_access_histogram.py
@@ -7,15 +7,16 @@ def autolabel(ax, rects):
     """Attach a text label above each bar in *rects*, displaying its value."""
     for rect in rects:
         height = rect.get_height()
-        ax.annotate(
-            '{}'.format(height),
-            xy=(rect.get_x() + rect.get_width() / 2, height),
-            xytext=(0, 3),  # 3 points vertical offset
-            textcoords="offset points",
-            ha='center',
-            va='bottom',
-            rotation=0,
-        )
+        if height > 0:
+            ax.annotate(
+                '{}'.format(height),
+                xy=(rect.get_x() + rect.get_width() / 2, height),
+                xytext=(0, 3),  # 3 points vertical offset
+                textcoords="offset points",
+                ha='center',
+                va='bottom',
+                rotation=45,
+            )
 
 def plot_access_histogram(report, mod, ax=None):
     """
@@ -81,8 +82,11 @@ def plot_access_histogram(report, mod, ax=None):
     ax.set_xlabel('Access Sizes')
     ax.set_xticks(x)
     ax.set_xticklabels(labels, rotation=45, ha='right')
+    ax.set_ylim(ymin = 0)
     ax.legend()
 
+    ax.spines[['right', 'top']].set_visible(False)
+
     autolabel(ax=ax, rects=rects1)
     autolabel(ax=ax, rects=rects2)
 

diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py
@@ -178,6 +178,7 @@ def plot_io_cost(report: darshan.DarshanReport) -> Any:
     ax_raw.set_ylabel("Runtime (s)")
     handles, labels = ax_raw.get_legend_handles_labels()
     ax_norm.legend(handles[::-1], labels[::-1], loc="upper left", bbox_to_anchor=(1.22, 1.02))
+    ax_norm.set_ylabel("Runtime (%)")
     # rotate the xticklabels so they don't overlap
     for ax in [ax_raw, ax_norm]:
         for label in ax.get_xticklabels():

diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py
@@ -9,15 +9,16 @@ def autolabel(ax, rects):
     """Attach a text label above each bar in *rects*, displaying its height."""
     for rect in rects:
         height = rect.get_height()
-        ax.annotate(
-            '{}'.format(height),
-            xy=(rect.get_x() + rect.get_width() / 2, height),
-            xytext=(0, 3),  # 3 points vertical offset
-            textcoords="offset points",
-            ha='center',
-            va='bottom',
-            rotation=45,
-        )
+        if height > 0:
+            ax.annotate(
+                '{}'.format(height),
+                xy=(rect.get_x() + rect.get_width() / 2, height),
+                xytext=(0, 3),  # 3 points vertical offset
+                textcoords="offset points",
+                ha='center',
+                va='bottom',
+                rotation=45,
+            )
 
 def gather_count_data(report, mod):
     """
@@ -184,6 +185,9 @@ def plot_opcounts(report, mod, ax=None):
     ax.set_ylabel('Count')
     ax.set_xticks(x)
     ax.set_xticklabels(labels, rotation=90)
+    ax.set_ylim(ymin = 0)
+
+    ax.spines[['right', 'top']].set_visible(False)
 
     autolabel(ax=ax, rects=rects)
 

diff --git a/darshan-util/pydarshan/darshan/lib/accum.py b/darshan-util/pydarshan/darshan/lib/accum.py
@@ -100,5 +100,6 @@ def log_file_count_summary_table(derived_metrics,
     df.drop(columns="index", inplace=True)
     ret = plot_common_access_table.DarshanReportTable(df,
                                                       col_space=200,
-                                                      justify="center")
+                                                      justify="center",
+                                                      index_names=False)
     return ret
diff --git a/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py b/darshan-util/pydarshan/darshan/tests/test_data_access_by_filesystem.py
@@ -268,8 +268,8 @@ def test_plot_data(file_rd_series, file_wr_series, bytes_rd_series, bytes_wr_ser
         if isinstance(child, matplotlib.text.Text):
             actual_list_text_in_fig.append(child.get_text())
 
-    for expected_text_entry in [matplotlib.text.Text(0, 1, ' # files read (3.00E+00)'),
-                                matplotlib.text.Text(0, 0, ' # files written (1.40E+01)')]:
+    for expected_text_entry in [matplotlib.text.Text(0, 1, ' files read: 3'),
+                                matplotlib.text.Text(0, 0, ' files written: 14')]:
         assert expected_text_entry.get_text() in actual_list_text_in_fig
 
     # enforce invisibile right-side spine so that
@@ -298,10 +298,7 @@ def test_empty_data_posix_y_axis_annot_position():
             if isinstance(child, matplotlib.text.Annotation):
                 actual_text = child.get_text()
                 actual_fontsize = child.get_fontsize()
-                if len(actual_text) <= 8:
-                    assert actual_fontsize == 18
-                else:
-                    assert actual_fontsize == 12
+                assert actual_fontsize == 18
 
 @pytest.mark.parametrize("log_file_name, expected_text_labels", [
     ('noposixopens.darshan', ['/global', 'anonymized']),
@@ -324,10 +321,6 @@ def test_cat_labels_std_streams(log_file_name, expected_text_labels):
             if isinstance(child, matplotlib.text.Annotation):
                 actual_text = child.get_text()
                 actual_text_labels.append(actual_text)
-                if 'STD' in actual_text:
-                    # format the STD.. streams properly
-                    actual_fontsize = child.get_fontsize()
-                    assert actual_fontsize == 12
 
     assert actual_text_labels == expected_text_labels
 
@@ -409,10 +402,8 @@ def test_plot_data_shared_x_axis():
     wr_bytes = [1e8, 1e9, 1e10, 1e11]
     rd_file_cts = [1e3, 1e4, 1e5, 1e6]
     wr_file_cts = [1e2, 1e3, 1e4, 1e5]
-    # multiply by the MiB conversion factor
-    factor = 1048576
-    bytes_rd_series = pd.Series(data=rd_bytes, index=filesystem_roots) * factor
-    bytes_wr_series = pd.Series(data=wr_bytes, index=filesystem_roots) * factor
+    bytes_rd_series = pd.Series(data=rd_bytes, index=filesystem_roots)
+    bytes_wr_series = pd.Series(data=wr_bytes, index=filesystem_roots)
     file_rd_series = pd.Series(data=rd_file_cts, index=filesystem_roots)
     file_wr_series = pd.Series(data=wr_file_cts, index=filesystem_roots)
     fig = plt.figure()

diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py b/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py
@@ -160,6 +160,12 @@ def test_xticks_and_labels(log_path, func, expected_xticklabels, mod):
 
     assert_array_equal(actual_xticks, expected_xticks)
     assert_array_equal(actual_xticklabels, expected_xticklabels)
+    # see Argonne formatting reqs in gh-910
+    spines = ax.spines
+    assert not spines["top"].get_visible()
+    assert not spines["right"].get_visible()
+    assert spines["bottom"].get_visible()
+    assert spines["left"].get_visible()
 
 
 @pytest.mark.parametrize(

diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py
@@ -21,6 +21,22 @@
     has_lxml = False
 
 
+def _enforce_html_report_aesthetics(report_str):
+    # formatting requirements, partly from Argonne
+    # feedback in gh-910
+    prog = re.compile(r"figcaption {\n.*\n.*\n.*\n}")
+    m = prog.search(report_str)
+    if m:
+        result = m.group(0)
+    assert "font-weight: 300" in result
+    assert "font-size: 0.75em" in result
+    assert not "italic" in result
+
+    # Argonne team doesn't like index label
+    # for File Count Summary table
+    assert not "<th>type</th>" in report_str
+
+
 @pytest.mark.parametrize(
     "argv", [
         ["./tests/input/sample.darshan"],
@@ -117,6 +133,7 @@ def test_main_without_args(tmpdir, argv, expected_img_count, expected_table_coun
                 with darshan.DarshanReport(filename=argv[0], read_all=False) as report:
                     with open(expected_save_path) as html_report:
                         report_str = html_report.read()
+                        _enforce_html_report_aesthetics(report_str=report_str)
                         if "DXT" in "\t".join(report.modules):
                             for dxt_mod in ["DXT_POSIX", "DXT_MPIIO"]:
                                 if dxt_mod in report.modules:
@@ -215,7 +232,7 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath):
                             assert "Consider enabling the runtime heatmap module" in report_str
                         else:
                             # check empty log warning and return
-                            assert "This Darshan log file has no instrumentation records" in report_str
+                            assert "This Darshan log file has no instrumentation records, " in report_str
                             return
 
                     # check if I/O cost figure is present