Skip to content

Commit

Permalink
Merge pull request #903 from darshan-hpc/snyder/pydarshan-posix-acces…
Browse files Browse the repository at this point in the history
…s-pattern

WIP, ENH: add POSIX access pattern plot to darshan job summary
  • Loading branch information
tylerjereddy authored Apr 17, 2023
2 parents 59970a5 + 629a5c5 commit 122ad2c
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 21 deletions.
34 changes: 25 additions & 9 deletions darshan-util/pydarshan/darshan/backend/cffi_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import numpy as np
import pandas as pd

from collections import namedtuple

import logging
logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -368,21 +370,29 @@ def log_get_generic_record(log, mod_name, dtype='numpy'):
return None
mod_type = _structdefs[mod_name]

rec = {}
buf = ffi.new("void **")
r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf)
if r < 1:
return None
rbuf = ffi.cast(mod_type, buf)

rec = _make_generic_record(rbuf, mod_name, dtype)
libdutil.darshan_free(buf[0])

return rec

def _make_generic_record(rbuf, mod_name, dtype='numpy'):
"""
Returns a record dictionary for an input record buffer for a given module.
"""
rec = {}
rec['id'] = rbuf[0].base_rec.id
rec['rank'] = rbuf[0].base_rec.rank
if mod_name == 'H5D' or mod_name == 'PNETCDF_VAR':
rec['file_rec_id'] = rbuf[0].file_rec_id

clst = np.copy(np.frombuffer(ffi.buffer(rbuf[0].counters), dtype=np.int64))
flst = np.copy(np.frombuffer(ffi.buffer(rbuf[0].fcounters), dtype=np.float64))
libdutil.darshan_free(buf[0])

c_cols = counter_names(mod_name)
fc_cols = fcounter_names(mod_name)
Expand Down Expand Up @@ -416,7 +426,6 @@ def log_get_generic_record(log, mod_name, dtype='numpy'):
rec['fcounters'] = df_fc
return rec


@functools.lru_cache(maxsize=32)
def counter_names(mod_name, fcnts=False, special=''):
"""
Expand Down Expand Up @@ -732,18 +741,20 @@ def _df_to_rec(rec_dict, mod_name, rec_index_of_interest=None):
return buf


def log_get_derived_metrics(rec_dict, mod_name, nprocs):
def accumulate_records(rec_dict, mod_name, nprocs):
"""
Passes a set of records (in pandas format) to the Darshan accumulator
interface, and returns the corresponding derived metrics struct.
interface, and returns the corresponding derived metrics struct and
summary record.
Parameters:
rec_dict: Dictionary containing the counter and fcounter dataframes.
mod_name: Name of the Darshan module.
nprocs: Number of processes participating in accumulation.
Returns:
darshan_derived_metrics struct (cdata object)
namedtuple containing derived_metrics (cdata object) and
summary_record (dict).
"""
mod_idx = mod_name_to_idx(mod_name)
darshan_accumulator = ffi.new("darshan_accumulator *")
Expand All @@ -768,15 +779,20 @@ def log_get_derived_metrics(rec_dict, mod_name, nprocs):
"to retrieve additional information from the stderr "
"stream.")
derived_metrics = ffi.new("struct darshan_derived_metrics *")
total_record = ffi.new(_structdefs[mod_name].replace("**", "*"))
summary_rbuf = ffi.new(_structdefs[mod_name].replace("**", "*"))
r = libdutil.darshan_accumulator_emit(darshan_accumulator[0],
derived_metrics,
total_record)
summary_rbuf)
libdutil.darshan_accumulator_destroy(darshan_accumulator[0])
if r != 0:
raise RuntimeError("A nonzero exit code was received from "
"darshan_accumulator_emit() at the C level. "
"It may be possible "
"to retrieve additional information from the stderr "
"stream.")
return derived_metrics

summary_rec = _make_generic_record(summary_rbuf, mod_name, dtype='pandas')

# create namedtuple type to hold return values
AccumulatedRecords = namedtuple("AccumulatedRecords", ['derived_metrics', 'summary_record'])
return AccumulatedRecords(derived_metrics, summary_rec)
23 changes: 19 additions & 4 deletions darshan-util/pydarshan/darshan/cli/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@

import darshan
import darshan.cli
from darshan.backend.cffi_backend import log_get_derived_metrics
from darshan.backend.cffi_backend import accumulate_records
from darshan.lib.accum import log_get_bytes_bandwidth, log_file_count_summary_table
from darshan.experimental.plots import (
plot_dxt_heatmap,
plot_io_cost,
plot_common_access_table,
plot_access_histogram,
plot_opcounts,
plot_posix_access_pattern,
data_access_by_filesystem,
)

Expand Down Expand Up @@ -521,7 +522,7 @@ def register_figures(self):
# record and derived metrics
rec_dict = self.report.records[mod].to_df()
nprocs = self.report.metadata['job']['nprocs']
derived_metrics = log_get_derived_metrics(rec_dict, mod, nprocs)
acc = accumulate_records(rec_dict, mod, nprocs)

# this is really just some text
# so using ReportFigure feels awkward...
Expand All @@ -530,16 +531,30 @@ def register_figures(self):
fig_title="",
fig_func=None,
fig_args=None,
fig_description=log_get_bytes_bandwidth(derived_metrics=derived_metrics,
fig_description=log_get_bytes_bandwidth(derived_metrics=acc.derived_metrics,
mod_name=mod),
text_only_color="blue")
self.figures.append(bandwidth_fig)

if mod == "POSIX":
access_pattern_fig = ReportFigure(
section_title=sect_title,
fig_title="Access Pattern",
fig_func=plot_posix_access_pattern,
fig_args=dict(record=acc.summary_record),
fig_description="Sequential (offset greater than previous offset) vs. "
"consecutive (offset immediately following previous offset) "
"file operations. Note that, by definition, the sequential "
"operations are inclusive of consecutive operations.",
fig_width=350,
)
self.figures.append(access_pattern_fig)

file_count_summary_fig = ReportFigure(
section_title=sect_title,
fig_title=f"File Count Summary <br> (estimated by {mod} I/O access offsets)",
fig_func=log_file_count_summary_table,
fig_args=dict(derived_metrics=derived_metrics,
fig_args=dict(derived_metrics=acc.derived_metrics,
mod_name=mod),
fig_width=805,
fig_description="")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .plot_opcounts import plot_opcounts
from .plot_dxt_heatmap2 import plot_dxt_heatmap2
from .plot_io_cost import plot_io_cost
from .plot_posix_access_pattern import plot_posix_access_pattern
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
import numpy as np

def autolabel(ax, rects):
"""Attach a text label above each bar in *rects*, displaying its value."""
for rect in rects:
height = rect.get_height()
ax.annotate(
'{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center',
va='bottom',
rotation=45,
)

def plot_posix_access_pattern(record, ax=None):
"""
Plots read/write access patterns (sequential vs consecutive access counts)
for a given POSIX module file record.
Args:
record (dict): POSIX module record to plot access pattern for.
"""

if ax is None:
fig, ax = plt.subplots()
else:
fig = None

labels = ['read', 'write']
total_data = [record['counters']['POSIX_READS'][0],
record['counters']['POSIX_WRITES'][0]]
seq_data = [record['counters']['POSIX_SEQ_READS'][0],
record['counters']['POSIX_SEQ_WRITES'][0]]
consec_data = [record['counters']['POSIX_CONSEC_READS'][0],
record['counters']['POSIX_CONSEC_WRITES'][0]]

x = np.arange(len(labels)) # the label locations
width = 0.2 # the width of the bars

rects_total = ax.bar(x - width, total_data, width, label = 'total')
rects_seq = ax.bar(x, seq_data, width, label = 'sequential')
rects_consec = ax.bar(x + width, consec_data, width, label = 'consecutive')

ax.set_ylabel('Count')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend(loc='center left', bbox_to_anchor=(1.05,.5))

ax.spines[['right', 'top']].set_visible(False)

autolabel(ax=ax, rects=rects_total)
autolabel(ax=ax, rects=rects_seq)
autolabel(ax=ax, rects=rects_consec)

plt.tight_layout()

if fig is not None:
plt.close()
return fig
8 changes: 4 additions & 4 deletions darshan-util/pydarshan/darshan/tests/test_lib_accum.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import darshan
from darshan.backend.cffi_backend import log_get_derived_metrics
from darshan.backend.cffi_backend import accumulate_records
from darshan.lib.accum import log_get_bytes_bandwidth, log_file_count_summary_table
from darshan.log_utils import get_log_path

Expand Down Expand Up @@ -87,9 +87,9 @@ def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str):
if expected_str == "RuntimeError":
with pytest.raises(RuntimeError,
match=f"{mod_name} module does not support derived"):
log_get_derived_metrics(rec_dict, mod_name, nprocs)
accumulate_records(rec_dict, mod_name, nprocs)
else:
derived_metrics = log_get_derived_metrics(rec_dict, mod_name, nprocs)
derived_metrics = accumulate_records(rec_dict, mod_name, nprocs).derived_metrics
actual_str = log_get_bytes_bandwidth(derived_metrics=derived_metrics,
mod_name=mod_name)
assert actual_str == expected_str
Expand Down Expand Up @@ -210,7 +210,7 @@ def test_file_count_summary_table(log_name,
rec_dict = report.records[mod_name].to_df()
nprocs = report.metadata['job']['nprocs']

derived_metrics = log_get_derived_metrics(rec_dict, mod_name, nprocs)
derived_metrics = accumulate_records(rec_dict, mod_name, nprocs).derived_metrics

actual_df = log_file_count_summary_table(derived_metrics=derived_metrics,
mod_name=mod_name).df
Expand Down
8 changes: 4 additions & 4 deletions darshan-util/pydarshan/darshan/tests/test_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ def test_main_with_args(tmpdir, argv):
"argv, expected_img_count, expected_table_count", [
(["noposix.darshan"], 3, 3),
(["noposix.darshan", "--output=test.html"], 3, 3),
(["sample-dxt-simple.darshan"], 8, 6),
(["sample-dxt-simple.darshan", "--output=test.html"], 8, 6),
(["nonmpi_dxt_anonymized.darshan"], 6, 5),
(["ior_hdf5_example.darshan"], 11, 8),
(["sample-dxt-simple.darshan"], 9, 6),
(["sample-dxt-simple.darshan", "--output=test.html"], 9, 6),
(["nonmpi_dxt_anonymized.darshan"], 7, 5),
(["ior_hdf5_example.darshan"], 12, 8),
([None], 0, 0),
]
)
Expand Down

0 comments on commit 122ad2c

Please sign in to comment.