diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 5076c4edc..bd7c338d8 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -12,6 +12,8 @@ import numpy as np import pandas as pd +from collections import namedtuple + import logging logger = logging.getLogger(__name__) @@ -368,13 +370,22 @@ def log_get_generic_record(log, mod_name, dtype='numpy'): return None mod_type = _structdefs[mod_name] - rec = {} buf = ffi.new("void **") r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf) if r < 1: return None rbuf = ffi.cast(mod_type, buf) + rec = _make_generic_record(rbuf, mod_name, dtype) + libdutil.darshan_free(buf[0]) + + return rec + +def _make_generic_record(rbuf, mod_name, dtype='numpy'): + """ + Returns a record dictionary for an input record buffer for a given module. + """ + rec = {} rec['id'] = rbuf[0].base_rec.id rec['rank'] = rbuf[0].base_rec.rank if mod_name == 'H5D' or mod_name == 'PNETCDF_VAR': @@ -382,7 +393,6 @@ def log_get_generic_record(log, mod_name, dtype='numpy'): clst = np.copy(np.frombuffer(ffi.buffer(rbuf[0].counters), dtype=np.int64)) flst = np.copy(np.frombuffer(ffi.buffer(rbuf[0].fcounters), dtype=np.float64)) - libdutil.darshan_free(buf[0]) c_cols = counter_names(mod_name) fc_cols = fcounter_names(mod_name) @@ -416,7 +426,6 @@ def log_get_generic_record(log, mod_name, dtype='numpy'): rec['fcounters'] = df_fc return rec - @functools.lru_cache(maxsize=32) def counter_names(mod_name, fcnts=False, special=''): """ @@ -732,10 +741,11 @@ def _df_to_rec(rec_dict, mod_name, rec_index_of_interest=None): return buf -def log_get_derived_metrics(rec_dict, mod_name, nprocs): +def accumulate_records(rec_dict, mod_name, nprocs): """ Passes a set of records (in pandas format) to the Darshan accumulator - interface, and returns the corresponding derived metrics struct. + interface, and returns the corresponding derived metrics struct and + summary record. Parameters: rec_dict: Dictionary containing the counter and fcounter dataframes. @@ -743,7 +753,8 @@ def log_get_derived_metrics(rec_dict, mod_name, nprocs): nprocs: Number of processes participating in accumulation. Returns: - darshan_derived_metrics struct (cdata object) + namedtuple containing derived_metrics (cdata object) and + summary_record (dict). """ mod_idx = mod_name_to_idx(mod_name) darshan_accumulator = ffi.new("darshan_accumulator *") @@ -768,10 +779,10 @@ def log_get_derived_metrics(rec_dict, mod_name, nprocs): "to retrieve additional information from the stderr " "stream.") derived_metrics = ffi.new("struct darshan_derived_metrics *") - total_record = ffi.new(_structdefs[mod_name].replace("**", "*")) + summary_rbuf = ffi.new(_structdefs[mod_name].replace("**", "*")) r = libdutil.darshan_accumulator_emit(darshan_accumulator[0], derived_metrics, - total_record) + summary_rbuf) libdutil.darshan_accumulator_destroy(darshan_accumulator[0]) if r != 0: raise RuntimeError("A nonzero exit code was received from " @@ -779,4 +790,9 @@ def log_get_derived_metrics(rec_dict, mod_name, nprocs): "It may be possible " "to retrieve additional information from the stderr " "stream.") - return derived_metrics + + summary_rec = _make_generic_record(summary_rbuf, mod_name, dtype='pandas') + + # create namedtuple type to hold return values + AccumulatedRecords = namedtuple("AccumulatedRecords", ['derived_metrics', 'summary_record']) + return AccumulatedRecords(derived_metrics, summary_rec) diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py index ff7bda19c..1a824b0fc 100644 --- a/darshan-util/pydarshan/darshan/cli/summary.py +++ b/darshan-util/pydarshan/darshan/cli/summary.py @@ -14,7 +14,7 @@ import darshan import darshan.cli -from darshan.backend.cffi_backend import log_get_derived_metrics +from darshan.backend.cffi_backend import accumulate_records from darshan.lib.accum import log_get_bytes_bandwidth, log_file_count_summary_table from darshan.experimental.plots import ( plot_dxt_heatmap, @@ -22,6 +22,7 @@ plot_common_access_table, plot_access_histogram, plot_opcounts, + plot_posix_access_pattern, data_access_by_filesystem, ) @@ -521,7 +522,7 @@ def register_figures(self): # record and derived metrics rec_dict = self.report.records[mod].to_df() nprocs = self.report.metadata['job']['nprocs'] - derived_metrics = log_get_derived_metrics(rec_dict, mod, nprocs) + acc = accumulate_records(rec_dict, mod, nprocs) # this is really just some text # so using ReportFigure feels awkward... @@ -530,16 +531,30 @@ def register_figures(self): fig_title="", fig_func=None, fig_args=None, - fig_description=log_get_bytes_bandwidth(derived_metrics=derived_metrics, + fig_description=log_get_bytes_bandwidth(derived_metrics=acc.derived_metrics, mod_name=mod), text_only_color="blue") self.figures.append(bandwidth_fig) + if mod == "POSIX": + access_pattern_fig = ReportFigure( + section_title=sect_title, + fig_title="Access Pattern", + fig_func=plot_posix_access_pattern, + fig_args=dict(record=acc.summary_record), + fig_description="Sequential (offset greater than previous offset) vs. " + "consecutive (offset immediately following previous offset) " + "file operations. Note that, by definition, the sequential " + "operations are inclusive of consecutive operations.", + fig_width=350, + ) + self.figures.append(access_pattern_fig) + file_count_summary_fig = ReportFigure( section_title=sect_title, fig_title=f"File Count Summary
(estimated by {mod} I/O access offsets)", fig_func=log_file_count_summary_table, - fig_args=dict(derived_metrics=derived_metrics, + fig_args=dict(derived_metrics=acc.derived_metrics, mod_name=mod), fig_width=805, fig_description="") diff --git a/darshan-util/pydarshan/darshan/experimental/plots/__init__.py b/darshan-util/pydarshan/darshan/experimental/plots/__init__.py index f86d64e83..a4550496a 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/__init__.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/__init__.py @@ -2,3 +2,4 @@ from .plot_opcounts import plot_opcounts from .plot_dxt_heatmap2 import plot_dxt_heatmap2 from .plot_io_cost import plot_io_cost +from .plot_posix_access_pattern import plot_posix_access_pattern diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_posix_access_pattern.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_posix_access_pattern.py new file mode 100644 index 000000000..fa123489a --- /dev/null +++ b/darshan-util/pydarshan/darshan/experimental/plots/plot_posix_access_pattern.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +import matplotlib.pyplot as plt +import numpy as np + +def autolabel(ax, rects): + """Attach a text label above each bar in *rects*, displaying its value.""" + for rect in rects: + height = rect.get_height() + ax.annotate( + '{}'.format(height), + xy=(rect.get_x() + rect.get_width() / 2, height), + xytext=(0, 3), # 3 points vertical offset + textcoords="offset points", + ha='center', + va='bottom', + rotation=45, + ) + +def plot_posix_access_pattern(record, ax=None): + """ + Plots read/write access patterns (sequential vs consecutive access counts) + for a given POSIX module file record. + + Args: + record (dict): POSIX module record to plot access pattern for. + + """ + + if ax is None: + fig, ax = plt.subplots() + else: + fig = None + + labels = ['read', 'write'] + total_data = [record['counters']['POSIX_READS'][0], + record['counters']['POSIX_WRITES'][0]] + seq_data = [record['counters']['POSIX_SEQ_READS'][0], + record['counters']['POSIX_SEQ_WRITES'][0]] + consec_data = [record['counters']['POSIX_CONSEC_READS'][0], + record['counters']['POSIX_CONSEC_WRITES'][0]] + + x = np.arange(len(labels)) # the label locations + width = 0.2 # the width of the bars + + rects_total = ax.bar(x - width, total_data, width, label = 'total') + rects_seq = ax.bar(x, seq_data, width, label = 'sequential') + rects_consec = ax.bar(x + width, consec_data, width, label = 'consecutive') + + ax.set_ylabel('Count') + ax.set_xticks(x) + ax.set_xticklabels(labels) + ax.legend(loc='center left', bbox_to_anchor=(1.05,.5)) + + ax.spines[['right', 'top']].set_visible(False) + + autolabel(ax=ax, rects=rects_total) + autolabel(ax=ax, rects=rects_seq) + autolabel(ax=ax, rects=rects_consec) + + plt.tight_layout() + + if fig is not None: + plt.close() + return fig diff --git a/darshan-util/pydarshan/darshan/tests/test_lib_accum.py b/darshan-util/pydarshan/darshan/tests/test_lib_accum.py index 080264d16..b0599f620 100644 --- a/darshan-util/pydarshan/darshan/tests/test_lib_accum.py +++ b/darshan-util/pydarshan/darshan/tests/test_lib_accum.py @@ -1,5 +1,5 @@ import darshan -from darshan.backend.cffi_backend import log_get_derived_metrics +from darshan.backend.cffi_backend import accumulate_records from darshan.lib.accum import log_get_bytes_bandwidth, log_file_count_summary_table from darshan.log_utils import get_log_path @@ -87,9 +87,9 @@ def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): if expected_str == "RuntimeError": with pytest.raises(RuntimeError, match=f"{mod_name} module does not support derived"): - log_get_derived_metrics(rec_dict, mod_name, nprocs) + accumulate_records(rec_dict, mod_name, nprocs) else: - derived_metrics = log_get_derived_metrics(rec_dict, mod_name, nprocs) + derived_metrics = accumulate_records(rec_dict, mod_name, nprocs).derived_metrics actual_str = log_get_bytes_bandwidth(derived_metrics=derived_metrics, mod_name=mod_name) assert actual_str == expected_str @@ -210,7 +210,7 @@ def test_file_count_summary_table(log_name, rec_dict = report.records[mod_name].to_df() nprocs = report.metadata['job']['nprocs'] - derived_metrics = log_get_derived_metrics(rec_dict, mod_name, nprocs) + derived_metrics = accumulate_records(rec_dict, mod_name, nprocs).derived_metrics actual_df = log_file_count_summary_table(derived_metrics=derived_metrics, mod_name=mod_name).df diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py index dfc53c941..6675b2a1c 100644 --- a/darshan-util/pydarshan/darshan/tests/test_summary.py +++ b/darshan-util/pydarshan/darshan/tests/test_summary.py @@ -99,10 +99,10 @@ def test_main_with_args(tmpdir, argv): "argv, expected_img_count, expected_table_count", [ (["noposix.darshan"], 3, 3), (["noposix.darshan", "--output=test.html"], 3, 3), - (["sample-dxt-simple.darshan"], 8, 6), - (["sample-dxt-simple.darshan", "--output=test.html"], 8, 6), - (["nonmpi_dxt_anonymized.darshan"], 6, 5), - (["ior_hdf5_example.darshan"], 11, 8), + (["sample-dxt-simple.darshan"], 9, 6), + (["sample-dxt-simple.darshan", "--output=test.html"], 9, 6), + (["nonmpi_dxt_anonymized.darshan"], 7, 5), + (["ior_hdf5_example.darshan"], 12, 8), ([None], 0, 0), ] )