Skip to content

Commit

Permalink
Merge pull request #911 from darshan-hpc/snyder/pydarshan-cleanup
Browse files Browse the repository at this point in the history
PyDarshan job summary cleanup fixes
  • Loading branch information
tylerjereddy authored Apr 13, 2023
2 parents b384a5e + dd5a305 commit 05efd1d
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 77 deletions.
3 changes: 1 addition & 2 deletions darshan-util/pydarshan/darshan/cli/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,8 @@ figure img {
}

figcaption {
font-style: italic;
font-size: 0.75em;
font-weight: 200;
font-weight: 300;
margin: 0;
}

Expand Down
2 changes: 1 addition & 1 deletion darshan-util/pydarshan/darshan/cli/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def register_figures(self):
if not self.report.modules:
# no data in report to summarize, print warning and that's it
no_data_message = (
"This Darshan log file has no instrumentation records,"
"This Darshan log file has no instrumentation records, "
"there is no data to plot. Did this app do any I/O?"
)
fig = ReportFigure(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import humanize

def process_byte_counts(df_reads, df_writes):
"""
Expand Down Expand Up @@ -505,17 +505,18 @@ def plot_data(fig: Any,
num_cats: an integer representing the number of categories
to plot; default ``None`` plots all categories
"""
fontsize = 18
list_byte_axes: list = []
list_count_axes: list = []
# use log10 scale if range exceeds
# two orders of magnitude in a column
use_log = [False, False]
for idx, series_pair in enumerate([[bytes_rd_series, bytes_wr_series, 1048576],
[file_rd_series, file_wr_series, 1]]):
for idx, series_pair in enumerate([[bytes_rd_series, bytes_wr_series],
[file_rd_series, file_wr_series]]):
maxval = max(series_pair[0].max(), series_pair[1].max())
minval = max(min(series_pair[0].min(), series_pair[1].min()), 1)
# adjust ratio to MiB when needed
ratio = ((maxval / series_pair[2]) / (minval / series_pair[2]))
ratio = (maxval / minval)
if ratio > 100:
use_log[idx] = True

Expand All @@ -540,64 +541,55 @@ def plot_data(fig: Any,
list_byte_axes.append(ax_filesystem_bytes)
list_count_axes.append(ax_filesystem_counts)

# convert to MiB using 1048576 (ie: 2**20)
bytes_read = bytes_rd_series[filesystem]/1048576
bytes_written = bytes_wr_series[filesystem]/1048576
files_written = file_wr_series[filesystem]
files_read = file_rd_series[filesystem]

# scale to fit longer filesystem
# strings on the left side of the plots
# NOTE: may need more sophisticated scaling
# eventually
if len(filesystem) <= 8 and not '<STD' in filesystem:
fontsize = 18
else:
fontsize = 12
bytes_read = bytes_rd_series[filesystem]
bytes_written = bytes_wr_series[filesystem]
files_written = int(file_wr_series[filesystem])
files_read = int(file_rd_series[filesystem])

# anonymized STD.. streams have associated integers
# that are stored in the filesystem data field
# but that are confusing to display, so strip them
if filesystem.startswith('anonymized'):
ax_filesystem_bytes.annotate('anonymized',
(-0.3, 0.5),
(-0.1, 0.5),
fontsize=fontsize,
xycoords='axes fraction',
ha="right",
va="center")
else:
ax_filesystem_bytes.annotate(filesystem,
(-0.3, 0.5),
(-0.1, 0.5),
fontsize=fontsize,
xycoords='axes fraction',
ha="right",
va="center")

ax_filesystem_counts.barh(0, files_written, color='red', alpha=0.3)
ax_filesystem_counts.barh(1, files_read, color='blue', alpha=0.3)

ax_filesystem_bytes.text(0, 0.75, f' # bytes read ({bytes_read:.2E} MiB)',
bytes_read_str = humanize.naturalsize(bytes_read,
binary=True,
format="%.2f")
ax_filesystem_bytes.text(0, 0.75, f' bytes read: {bytes_read_str}',
transform=ax_filesystem_bytes.transAxes,
fontsize=fontsize,
va="center")
ax_filesystem_bytes.text(0, 0.25, f' # bytes written ({bytes_written:.2E} MiB)',
bytes_written_str = humanize.naturalsize(bytes_written,
binary=True,
format="%.2f")
ax_filesystem_bytes.text(0, 0.25, f' bytes written: {bytes_written_str}',
transform=ax_filesystem_bytes.transAxes,
fontsize=fontsize,
va="center")

if files_read == 0:
ax_filesystem_counts.text(0, 0.75, ' 0 files read',
transform=ax_filesystem_counts.transAxes,
va="center")
else:
ax_filesystem_counts.text(0, 0.75, f' # files read ({files_read:.2E})',
transform=ax_filesystem_counts.transAxes,
va="center")

if files_written == 0:
ax_filesystem_counts.text(0, 0.25, ' 0 files written',
transform=ax_filesystem_counts.transAxes,
va="center")
else:
ax_filesystem_counts.text(0, 0.25, f' # files written ({files_written:.2E})',
transform=ax_filesystem_counts.transAxes,
va="center")
ax_filesystem_counts.text(0, 0.75, f' files read: {files_read}',
transform=ax_filesystem_counts.transAxes,
fontsize=fontsize,
va="center")
ax_filesystem_counts.text(0, 0.25, f' files written: {files_written}',
transform=ax_filesystem_counts.transAxes,
fontsize=fontsize,
va="center")

ax_filesystem_bytes.barh(0, bytes_written, color='red', alpha=0.3)
ax_filesystem_bytes.barh(1, bytes_read, color='blue', alpha=0.3)
Expand Down Expand Up @@ -696,7 +688,8 @@ def plot_with_report(report: darshan.DarshanReport,
# produce a decent aspect ratio
if height < 16:
height = 16

# add additional padding to left margin for annotations
fig.subplots_adjust(left=0.2)
fig.set_size_inches(12, height)
plt.close(fig)
return fig
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@ def autolabel(ax, rects):
"""Attach a text label above each bar in *rects*, displaying its value."""
for rect in rects:
height = rect.get_height()
ax.annotate(
'{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center',
va='bottom',
rotation=0,
)
if height > 0:
ax.annotate(
'{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center',
va='bottom',
rotation=45,
)

def plot_access_histogram(report, mod, ax=None):
"""
Expand Down Expand Up @@ -81,8 +82,11 @@ def plot_access_histogram(report, mod, ax=None):
ax.set_xlabel('Access Sizes')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=45, ha='right')
ax.set_ylim(ymin = 0)
ax.legend()

ax.spines[['right', 'top']].set_visible(False)

autolabel(ax=ax, rects=rects1)
autolabel(ax=ax, rects=rects2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def plot_io_cost(report: darshan.DarshanReport) -> Any:
ax_raw.set_ylabel("Runtime (s)")
handles, labels = ax_raw.get_legend_handles_labels()
ax_norm.legend(handles[::-1], labels[::-1], loc="upper left", bbox_to_anchor=(1.22, 1.02))
ax_norm.set_ylabel("Runtime (%)")
# rotate the xticklabels so they don't overlap
for ax in [ax_raw, ax_norm]:
for label in ax.get_xticklabels():
Expand Down
22 changes: 13 additions & 9 deletions darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@ def autolabel(ax, rects):
"""Attach a text label above each bar in *rects*, displaying its height."""
for rect in rects:
height = rect.get_height()
ax.annotate(
'{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center',
va='bottom',
rotation=45,
)
if height > 0:
ax.annotate(
'{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center',
va='bottom',
rotation=45,
)

def gather_count_data(report, mod):
"""
Expand Down Expand Up @@ -184,6 +185,9 @@ def plot_opcounts(report, mod, ax=None):
ax.set_ylabel('Count')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=90)
ax.set_ylim(ymin = 0)

ax.spines[['right', 'top']].set_visible(False)

autolabel(ax=ax, rects=rects)

Expand Down
3 changes: 2 additions & 1 deletion darshan-util/pydarshan/darshan/lib/accum.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,6 @@ def log_file_count_summary_table(derived_metrics,
df.drop(columns="index", inplace=True)
ret = plot_common_access_table.DarshanReportTable(df,
col_space=200,
justify="center")
justify="center",
index_names=False)
return ret
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,8 @@ def test_plot_data(file_rd_series, file_wr_series, bytes_rd_series, bytes_wr_ser
if isinstance(child, matplotlib.text.Text):
actual_list_text_in_fig.append(child.get_text())

for expected_text_entry in [matplotlib.text.Text(0, 1, ' # files read (3.00E+00)'),
matplotlib.text.Text(0, 0, ' # files written (1.40E+01)')]:
for expected_text_entry in [matplotlib.text.Text(0, 1, ' files read: 3'),
matplotlib.text.Text(0, 0, ' files written: 14')]:
assert expected_text_entry.get_text() in actual_list_text_in_fig

# enforce invisibile right-side spine so that
Expand Down Expand Up @@ -298,10 +298,7 @@ def test_empty_data_posix_y_axis_annot_position():
if isinstance(child, matplotlib.text.Annotation):
actual_text = child.get_text()
actual_fontsize = child.get_fontsize()
if len(actual_text) <= 8:
assert actual_fontsize == 18
else:
assert actual_fontsize == 12
assert actual_fontsize == 18

@pytest.mark.parametrize("log_file_name, expected_text_labels", [
('noposixopens.darshan', ['/global', 'anonymized']),
Expand All @@ -324,10 +321,6 @@ def test_cat_labels_std_streams(log_file_name, expected_text_labels):
if isinstance(child, matplotlib.text.Annotation):
actual_text = child.get_text()
actual_text_labels.append(actual_text)
if 'STD' in actual_text:
# format the STD.. streams properly
actual_fontsize = child.get_fontsize()
assert actual_fontsize == 12

assert actual_text_labels == expected_text_labels

Expand Down Expand Up @@ -409,10 +402,8 @@ def test_plot_data_shared_x_axis():
wr_bytes = [1e8, 1e9, 1e10, 1e11]
rd_file_cts = [1e3, 1e4, 1e5, 1e6]
wr_file_cts = [1e2, 1e3, 1e4, 1e5]
# multiply by the MiB conversion factor
factor = 1048576
bytes_rd_series = pd.Series(data=rd_bytes, index=filesystem_roots) * factor
bytes_wr_series = pd.Series(data=wr_bytes, index=filesystem_roots) * factor
bytes_rd_series = pd.Series(data=rd_bytes, index=filesystem_roots)
bytes_wr_series = pd.Series(data=wr_bytes, index=filesystem_roots)
file_rd_series = pd.Series(data=rd_file_cts, index=filesystem_roots)
file_wr_series = pd.Series(data=wr_file_cts, index=filesystem_roots)
fig = plt.figure()
Expand Down
6 changes: 6 additions & 0 deletions darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ def test_xticks_and_labels(log_path, func, expected_xticklabels, mod):

assert_array_equal(actual_xticks, expected_xticks)
assert_array_equal(actual_xticklabels, expected_xticklabels)
# see Argonne formatting reqs in gh-910
spines = ax.spines
assert not spines["top"].get_visible()
assert not spines["right"].get_visible()
assert spines["bottom"].get_visible()
assert spines["left"].get_visible()


@pytest.mark.parametrize(
Expand Down
19 changes: 18 additions & 1 deletion darshan-util/pydarshan/darshan/tests/test_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,22 @@
has_lxml = False


def _enforce_html_report_aesthetics(report_str):
# formatting requirements, partly from Argonne
# feedback in gh-910
prog = re.compile(r"figcaption {\n.*\n.*\n.*\n}")
m = prog.search(report_str)
if m:
result = m.group(0)
assert "font-weight: 300" in result
assert "font-size: 0.75em" in result
assert not "italic" in result

# Argonne team doesn't like index label
# for File Count Summary table
assert not "<th>type</th>" in report_str


@pytest.mark.parametrize(
"argv", [
["./tests/input/sample.darshan"],
Expand Down Expand Up @@ -117,6 +133,7 @@ def test_main_without_args(tmpdir, argv, expected_img_count, expected_table_coun
with darshan.DarshanReport(filename=argv[0], read_all=False) as report:
with open(expected_save_path) as html_report:
report_str = html_report.read()
_enforce_html_report_aesthetics(report_str=report_str)
if "DXT" in "\t".join(report.modules):
for dxt_mod in ["DXT_POSIX", "DXT_MPIIO"]:
if dxt_mod in report.modules:
Expand Down Expand Up @@ -215,7 +232,7 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath):
assert "Consider enabling the runtime heatmap module" in report_str
else:
# check empty log warning and return
assert "This Darshan log file has no instrumentation records" in report_str
assert "This Darshan log file has no instrumentation records, " in report_str
return

# check if I/O cost figure is present
Expand Down

0 comments on commit 05efd1d

Please sign in to comment.