darshan-hpc · tylerjereddy · Apr 17, 2023 · Feb 22, 2023 · Feb 22, 2023 · Feb 22, 2023
diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py
@@ -12,6 +12,8 @@
 import numpy as np
 import pandas as pd
 
+from collections import namedtuple
+
 import logging
 logger = logging.getLogger(__name__)
 
@@ -368,21 +370,29 @@ def log_get_generic_record(log, mod_name, dtype='numpy'):
         return None
     mod_type = _structdefs[mod_name]
 
-    rec = {}
     buf = ffi.new("void **")
     r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf)
     if r < 1:
         return None
     rbuf = ffi.cast(mod_type, buf)
 
+    rec = _make_generic_record(rbuf, mod_name, dtype)
+    libdutil.darshan_free(buf[0])
+
+    return rec
+
+def _make_generic_record(rbuf, mod_name, dtype='numpy'):
+    """
+    Returns a record dictionary for an input record buffer for a given module.
+    """
+    rec = {}
     rec['id'] = rbuf[0].base_rec.id
     rec['rank'] = rbuf[0].base_rec.rank
     if mod_name == 'H5D' or mod_name == 'PNETCDF_VAR':
         rec['file_rec_id'] = rbuf[0].file_rec_id
 
     clst = np.copy(np.frombuffer(ffi.buffer(rbuf[0].counters), dtype=np.int64))
     flst = np.copy(np.frombuffer(ffi.buffer(rbuf[0].fcounters), dtype=np.float64))
-    libdutil.darshan_free(buf[0])
 
     c_cols = counter_names(mod_name)
     fc_cols = fcounter_names(mod_name)
@@ -416,7 +426,6 @@ def log_get_generic_record(log, mod_name, dtype='numpy'):
         rec['fcounters'] = df_fc
     return rec
 
-
 @functools.lru_cache(maxsize=32)
 def counter_names(mod_name, fcnts=False, special=''):
     """
@@ -732,18 +741,20 @@ def _df_to_rec(rec_dict, mod_name, rec_index_of_interest=None):
     return buf
 
 
-def log_get_derived_metrics(rec_dict, mod_name, nprocs):
+def accumulate_records(rec_dict, mod_name, nprocs):
     """
     Passes a set of records (in pandas format) to the Darshan accumulator
-    interface, and returns the corresponding derived metrics struct.
+    interface, and returns the corresponding derived metrics struct and
+    summary record.
 
     Parameters:
         rec_dict: Dictionary containing the counter and fcounter dataframes.
         mod_name: Name of the Darshan module.
         nprocs: Number of processes participating in accumulation.
 
     Returns:
-        darshan_derived_metrics struct (cdata object)
+        namedtuple containing derived_metrics (cdata object) and
+        summary_record (dict).
     """
     mod_idx = mod_name_to_idx(mod_name)
     darshan_accumulator = ffi.new("darshan_accumulator *")
@@ -768,15 +779,20 @@ def log_get_derived_metrics(rec_dict, mod_name, nprocs):
                            "to retrieve additional information from the stderr "
                            "stream.")
     derived_metrics = ffi.new("struct darshan_derived_metrics *")
-    total_record = ffi.new(_structdefs[mod_name].replace("**", "*"))
+    summary_rbuf = ffi.new(_structdefs[mod_name].replace("**", "*"))
     r = libdutil.darshan_accumulator_emit(darshan_accumulator[0],
                                           derived_metrics,
-                                          total_record)
+                                          summary_rbuf)
     libdutil.darshan_accumulator_destroy(darshan_accumulator[0])
     if r != 0:
         raise RuntimeError("A nonzero exit code was received from "
                            "darshan_accumulator_emit() at the C level. "
                            "It may be possible "
                            "to retrieve additional information from the stderr "
                            "stream.")
-    return derived_metrics
+
+    summary_rec = _make_generic_record(summary_rbuf, mod_name, dtype='pandas')
+
+    # create namedtuple type to hold return values
+    AccumulatedRecords = namedtuple("AccumulatedRecords", ['derived_metrics', 'summary_record'])
+    return AccumulatedRecords(derived_metrics, summary_rec)
diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py
@@ -14,14 +14,15 @@
 
 import darshan
 import darshan.cli
-from darshan.backend.cffi_backend import log_get_derived_metrics
+from darshan.backend.cffi_backend import accumulate_records
 from darshan.lib.accum import log_get_bytes_bandwidth, log_file_count_summary_table
 from darshan.experimental.plots import (
     plot_dxt_heatmap,
     plot_io_cost,
     plot_common_access_table,
     plot_access_histogram,
     plot_opcounts,
+    plot_posix_access_pattern,
     data_access_by_filesystem,
 )
 
@@ -521,7 +522,7 @@ def register_figures(self):
                     # record and derived metrics
                     rec_dict = self.report.records[mod].to_df()
                     nprocs = self.report.metadata['job']['nprocs']
-                    derived_metrics = log_get_derived_metrics(rec_dict, mod, nprocs)
+                    acc = accumulate_records(rec_dict, mod, nprocs)
 
                     # this is really just some text
                     # so using ReportFigure feels awkward...
@@ -530,16 +531,30 @@ def register_figures(self):
                             fig_title="",
                             fig_func=None,
                             fig_args=None,
-                            fig_description=log_get_bytes_bandwidth(derived_metrics=derived_metrics,
+                            fig_description=log_get_bytes_bandwidth(derived_metrics=acc.derived_metrics,
                                                                     mod_name=mod),
                             text_only_color="blue")
                     self.figures.append(bandwidth_fig)
 
+                    if mod == "POSIX":
+                        access_pattern_fig = ReportFigure(
+                            section_title=sect_title,
+                            fig_title="Access Pattern",
+                            fig_func=plot_posix_access_pattern,
+                            fig_args=dict(record=acc.summary_record),
+                            fig_description="Sequential (offset greater than previous offset) vs. "
+                                            "consecutive (offset immediately following previous offset) "
+                                            "file operations. Note that, by definition, the sequential "
+                                            "operations are inclusive of consecutive operations.",
+                            fig_width=350,
+                        )
+                        self.figures.append(access_pattern_fig)
+
                     file_count_summary_fig = ReportFigure(
                             section_title=sect_title,
                             fig_title=f"File Count Summary <br> (estimated by {mod} I/O access offsets)",
                             fig_func=log_file_count_summary_table,
-                            fig_args=dict(derived_metrics=derived_metrics,
+                            fig_args=dict(derived_metrics=acc.derived_metrics,
                                           mod_name=mod),
                             fig_width=805,
                             fig_description="")

diff --git a/darshan-util/pydarshan/darshan/experimental/plots/__init__.py b/darshan-util/pydarshan/darshan/experimental/plots/__init__.py
@@ -2,3 +2,4 @@
 from .plot_opcounts import plot_opcounts
 from .plot_dxt_heatmap2 import plot_dxt_heatmap2
 from .plot_io_cost import plot_io_cost
+from .plot_posix_access_pattern import plot_posix_access_pattern
diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_posix_access_pattern.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_posix_access_pattern.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+def autolabel(ax, rects):
+    """Attach a text label above each bar in *rects*, displaying its value."""
+    for rect in rects:
+        height = rect.get_height()
+        ax.annotate(
+            '{}'.format(height),
+            xy=(rect.get_x() + rect.get_width() / 2, height),
+            xytext=(0, 3),  # 3 points vertical offset
+            textcoords="offset points",
+            ha='center',
+            va='bottom',
+            rotation=45,
+        )
+
+def plot_posix_access_pattern(record, ax=None):
+    """
+    Plots read/write access patterns (sequential vs consecutive access counts)
+    for a given POSIX module file record.
+
+	Args:
+		record (dict): POSIX module record to plot access pattern for.
+
+    """
+
+    if ax is None:
+        fig, ax = plt.subplots()
+    else:
+        fig = None
+
+    labels = ['read', 'write']
+    total_data = [record['counters']['POSIX_READS'][0],
+                  record['counters']['POSIX_WRITES'][0]]
+    seq_data = [record['counters']['POSIX_SEQ_READS'][0],
+                record['counters']['POSIX_SEQ_WRITES'][0]]
+    consec_data = [record['counters']['POSIX_CONSEC_READS'][0],
+                   record['counters']['POSIX_CONSEC_WRITES'][0]]
+
+    x = np.arange(len(labels))  # the label locations
+    width = 0.2  # the width of the bars
+
+    rects_total = ax.bar(x - width, total_data, width, label = 'total')
+    rects_seq = ax.bar(x, seq_data, width, label = 'sequential')
+    rects_consec = ax.bar(x + width, consec_data, width, label = 'consecutive')
+
+    ax.set_ylabel('Count')
+    ax.set_xticks(x)
+    ax.set_xticklabels(labels)
+    ax.legend(loc='center left', bbox_to_anchor=(1.05,.5))
+
+    ax.spines[['right', 'top']].set_visible(False)
+
+    autolabel(ax=ax, rects=rects_total)
+    autolabel(ax=ax, rects=rects_seq)
+    autolabel(ax=ax, rects=rects_consec)
+
+    plt.tight_layout()
+
+    if fig is not None:
+        plt.close()
+        return fig
diff --git a/darshan-util/pydarshan/darshan/tests/test_lib_accum.py b/darshan-util/pydarshan/darshan/tests/test_lib_accum.py
@@ -1,5 +1,5 @@
 import darshan
-from darshan.backend.cffi_backend import log_get_derived_metrics
+from darshan.backend.cffi_backend import accumulate_records
 from darshan.lib.accum import log_get_bytes_bandwidth, log_file_count_summary_table
 from darshan.log_utils import get_log_path
 
@@ -87,9 +87,9 @@ def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str):
             if expected_str == "RuntimeError":
                 with pytest.raises(RuntimeError,
                                    match=f"{mod_name} module does not support derived"):
-                    log_get_derived_metrics(rec_dict, mod_name, nprocs)
+                    accumulate_records(rec_dict, mod_name, nprocs)
             else:
-                derived_metrics = log_get_derived_metrics(rec_dict, mod_name, nprocs)
+                derived_metrics = accumulate_records(rec_dict, mod_name, nprocs).derived_metrics
                 actual_str = log_get_bytes_bandwidth(derived_metrics=derived_metrics,
                                                      mod_name=mod_name)
                 assert actual_str == expected_str
@@ -210,7 +210,7 @@ def test_file_count_summary_table(log_name,
         rec_dict = report.records[mod_name].to_df()
         nprocs = report.metadata['job']['nprocs']
 
-    derived_metrics = log_get_derived_metrics(rec_dict, mod_name, nprocs)
+    derived_metrics = accumulate_records(rec_dict, mod_name, nprocs).derived_metrics
 
     actual_df = log_file_count_summary_table(derived_metrics=derived_metrics,
                                              mod_name=mod_name).df

diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py
@@ -99,10 +99,10 @@ def test_main_with_args(tmpdir, argv):
     "argv, expected_img_count, expected_table_count", [
         (["noposix.darshan"], 3, 3),
         (["noposix.darshan", "--output=test.html"], 3, 3),
-        (["sample-dxt-simple.darshan"], 8, 6),
-        (["sample-dxt-simple.darshan", "--output=test.html"], 8, 6),
-        (["nonmpi_dxt_anonymized.darshan"], 6, 5),
-        (["ior_hdf5_example.darshan"], 11, 8),
+        (["sample-dxt-simple.darshan"], 9, 6),
+        (["sample-dxt-simple.darshan", "--output=test.html"], 9, 6),
+        (["nonmpi_dxt_anonymized.darshan"], 7, 5),
+        (["ior_hdf5_example.darshan"], 12, 8),
         ([None], 0, 0),
     ]
 )