Skip to content

Commit

Permalink
updated pydarshan for DFS module
Browse files Browse the repository at this point in the history
  • Loading branch information
shanedsnyder committed Nov 9, 2024
1 parent a3f2b32 commit c83718f
Show file tree
Hide file tree
Showing 12 changed files with 99 additions and 100 deletions.
17 changes: 16 additions & 1 deletion darshan-util/pydarshan/darshan/backend/api_def_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,21 @@
struct darshan_dfs_file
{
struct darshan_base_record base_rec;
int64_t counters[54];
int64_t counters[52];
double fcounters[15];
unsigned char pool_uuid[16];
unsigned char cont_uuid[16];
};
struct darshan_daos_object
{
struct darshan_base_record base_rec;
int64_t counters[63];
double fcounters[15];
unsigned char pool_uuid[16];
unsigned char cont_uuid[16];
uint64_t oid_hi;
uint64_t oid_lo;
};
struct darshan_stdio_file
Expand Down Expand Up @@ -212,6 +225,8 @@
extern char *posix_f_counter_names[];
extern char *dfs_counter_names[];
extern char *dfs_f_counter_names[];
extern char *daos_counter_names[];
extern char *daos_f_counter_names[];
extern char *stdio_counter_names[];
extern char *stdio_f_counter_names[];
Expand Down
1 change: 1 addition & 0 deletions darshan-util/pydarshan/darshan/backend/cffi_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def mod_name_to_idx(mod_name):
"PNETCDF_VAR": "struct darshan_pnetcdf_var **",
"POSIX": "struct darshan_posix_file **",
"DFS": "struct darshan_dfs_file **",
"DAOS": "struct darshan_daos_object **",
"STDIO": "struct darshan_stdio_file **",
"APXC-HEADER": "struct darshan_apxc_header_record **",
"APXC-PERF": "struct darshan_apxc_perf_record **",
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,6 @@ def agg_ioops(self, mode='append'):
None or dict: Depending on mode
"""

series = [
{'name': 'POSIX', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] },
{'name': 'MPI-IO Indep.', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] },
{'name': 'MPI-IO Coll.', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] },
{'name': 'STDIO', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] },
{'name': 'DFS', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] },
]


# convienience
recs = self.records
ctx = {}
Expand Down Expand Up @@ -121,6 +112,11 @@ def agg_ioops(self, mode='append'):
'Writex': agg[mod + '_WRITEXS'],
'Open': agg[mod + '_OPENS'],
'GlobalOpen': agg[mod + '_GLOBAL_OPENS'],
'Lookup': agg[mod + '_LOOKUPS'],
'Get Size': agg[mod + '_GET_SIZES'],
'Punch': agg[mod + '_PUNCHES'],
'Remove': agg[mod + '_REMOVES'],
'Stat': agg[mod + '_STATS'],
}
ctx[mod] = agg
ctx[mod + '_simple'] = tmp
Expand Down
26 changes: 16 additions & 10 deletions darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,6 @@ def gather_count_data(report, mod):
0, # faulty? mod_data['POSIX_MMAPS'],
mod_data['POSIX_FSYNCS'] + mod_data['POSIX_FDSYNCS']
]
elif mod == 'DFS':
labels = ['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen']
counts = [
mod_data['DFS_READS'],
mod_data['DFS_READXS'],
mod_data['DFS_WRITES'],
mod_data['DFS_WRITEXS'],
mod_data['DFS_OPENS'],
mod_data['DFS_GLOBAL_OPENS'],
]

# Gather MPIIO
elif mod == 'MPI-IO':
Expand Down Expand Up @@ -164,6 +154,22 @@ def gather_count_data(report, mod):
report.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_COLL_WAITS'],
]

elif mod == 'DFS':
labels = ['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen', 'Lookup', 'Get Size', 'Punch', 'Remove', 'Stat']
counts = [
mod_data['DFS_READS'],
mod_data['DFS_READXS'],
mod_data['DFS_WRITES'],
mod_data['DFS_WRITEXS'],
mod_data['DFS_OPENS'],
mod_data['DFS_GLOBAL_OPENS'],
mod_data['DFS_LOOKUPS'],
mod_data['DFS_GET_SIZES'],
mod_data['DFS_PUNCHES'],
mod_data['DFS_REMOVES'],
mod_data['DFS_STATS'],
]

return labels, counts

def plot_opcounts(report, mod, ax=None):
Expand Down
6 changes: 3 additions & 3 deletions darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"log_path, mod, func, expected_xticklabels",
[
(
"snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
"snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
"DFS",
plot_access_histogram,
["0-100", "101-1K", "1K-10K", "10K-100K", "100K-1M",
Expand Down Expand Up @@ -72,10 +72,10 @@
"1M-4M", "4M-10M", "10M-100M", "100M-1G", "1G+"],
),
(
"snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
"snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
"DFS",
plot_opcounts,
['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen'],
['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen', 'Lookup', 'Get Size', 'Punch', 'Remove', 'Stat'],
),
(
"dxt.darshan",
Expand Down
28 changes: 14 additions & 14 deletions darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@
),
),
(
"snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
"snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
pd.DataFrame(
np.array([
[0.0, 0, 6.973743438720703e-06],
[0, 3.337860107421875e-05, 0.0],
[0.001381218433380127, 0.009676456451416016, 0.028002262115478516],
[0.0, 0.0, 0.0, 0.0],
[0.0, 4.515051841e-06, 0.0, 0.0],
[0.001456562, 0.002266062, 0.007923812, 0.0],
]),
["POSIX", "STDIO", "DFS"],
["Read", "Write", "Meta"],
["Read", "Write", "Meta", "Wait"],
),
),
],
Expand Down Expand Up @@ -100,8 +100,8 @@ def test_get_io_cost_df(logname, expected_df):
[0.0, 1111.0],
),
(
"snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
[0.0, 1],
"snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
[0.0, 0.80276728],
),
],
)
Expand All @@ -121,28 +121,28 @@ def test_plot_io_cost_ylims(logname, expected_ylims):
assert_allclose(actual_ylims, [0.0, 100.0])

@pytest.mark.parametrize(
"logname, expected_yticks", [
"logname, expected_yticks, expected_yticklabels", [
(
"ior_hdf5_example.darshan",
[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
['0.0', '0.2', '0.4', '0.6', '0.8', '1.0'],
),
(
"sample-badost.darshan",
[0, 156, 312, 468, 624, 780],
['0', '156', '312', '468', '624', '780'],
),
(
"snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
"snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
[0.0 , 0.16055346, 0.32110691, 0.48166037, 0.64221382, 0.80276728],
['0.0000' , '0.1606', '0.3211', '0.4817', '0.6422', '0.8028'],
),
],
)
def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks):
def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks, expected_yticklabels):
# check the y-axis tick marks are at the appropriate
# locations and the labels are as expected

# create the expected y-axis tick labels from the y ticks
expected_yticklabels = [str(i) for i in expected_yticks]

logpath = get_log_path(logname)
with darshan.DarshanReport(logpath) as report:
fig = plot_io_cost(report=report)
Expand Down
93 changes: 37 additions & 56 deletions darshan-util/pydarshan/darshan/tests/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,68 +80,49 @@ def test_dfs_daos_posix_match():
# the ior runs by Shane with POSIX vs. DAOS DFS
# backend should produce matching counters where
# comparable data fields exist
posix_ior_report = darshan.DarshanReport(get_log_path("snyders_ior-POSIX_id531897-30559_8-18-57636-5730483702862695835_1.darshan"))
dfs_ior_report = darshan.DarshanReport(get_log_path("snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan"))
posix_ior_report = darshan.DarshanReport(get_log_path("snyder_ior-POSIX_id1057716-202103_11-8-64415-6936117869459351096_1.darshan"))
dfs_ior_report = darshan.DarshanReport(get_log_path("snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan"))
posix_ior_report.mod_read_all_records("POSIX")
dfs_ior_report.mod_read_all_records("DFS")
posix_data_dict = posix_ior_report.data['records']["POSIX"].to_df()
dfs_data_dict = dfs_ior_report.data['records']["DFS"].to_df()
dfs_ior_name_recs = dfs_ior_report.data["name_records"]

# TODO: currently we filter out the extra `/` activity
# for DFS here, but we may want to filter this out "upstream"
# in the control flow somewhere? Shane indicates that
# POSIX does something similar under the hood, but it is
# not intercepted by darshan, and we should likely just make
# the two modules look the same in this regard

for hashval, val in dfs_ior_name_recs.items():
if val.endswith(":/"):
bad_hash = hashval

for counter_type in ["counters", "fcounters"]:
dfs_data_dict[counter_type] = dfs_data_dict[counter_type][dfs_data_dict[counter_type]["id"] != bad_hash]

for counter_type in ["counters", "fcounters"]:
for column_name in dfs_data_dict[counter_type].columns:
# for some columns we can't reasonably expect a match
# or we need to handle the data differently between POSIX
# and DAOS DFS
if column_name in ["id", "DFS_LOOKUPS", "DFS_NB_READS", "DFS_NB_WRITES",
"DFS_GET_SIZES", "DFS_PUNCHES", "DFS_STATS",
"DFS_CHUNK_SIZE",
"DFS_FASTEST_RANK", "DFS_SLOWEST_RANK"]:
continue
elif "time" in column_name.lower():
# you can't reasonably expect the timestamps to be the
# same for two different runs of any kind really
continue
elif column_name in ["DFS_GLOBAL_OPENS", "DFS_OPENS"]:
# sum these together to match the POSIX version
column_name = "DFS_OPENS"
dfs_data = (dfs_data_dict[counter_type]["DFS_GLOBAL_OPENS"] +
dfs_data_dict[counter_type]["DFS_OPENS"])
elif column_name in ["DFS_READS", "DFS_READXS"]:
column_name = "DFS_READS"
dfs_data = (dfs_data_dict[counter_type]["DFS_READS"] +
dfs_data_dict[counter_type]["DFS_READXS"])
# we know the hardcoded value for certain
assert dfs_data.values == 16
elif column_name in ["DFS_WRITES", "DFS_WRITEXS"]:
column_name = "DFS_WRITES"
dfs_data = (dfs_data_dict[counter_type]["DFS_WRITES"] +
dfs_data_dict[counter_type]["DFS_WRITEXS"])
# we know the hardcoded value for certain
assert dfs_data.values == 16
else:
dfs_data = dfs_data_dict[counter_type][column_name]
posix_column_name = column_name.replace("DFS", "POSIX")
posix_data = posix_data_dict[counter_type][posix_column_name]
assert_allclose(dfs_data.values, posix_data.values)
if column_name.endswith("BYTES_WRITTEN"):
# we know the hardcoded value for certain
# 256 KiB * 16
assert dfs_data.values == 4194304
for column_name in dfs_data_dict["counters"].columns:
# for some columns we can't reasonably expect a match
# or we need to handle the data differently between POSIX
# and DAOS DFS
if column_name in ["id", "DFS_LOOKUPS", "DFS_DUPS", "DFS_NB_READS", "DFS_NB_WRITES",
"DFS_GET_SIZES", "DFS_PUNCHES", "DFS_REMOVES", "DFS_STATS",
"DFS_CHUNK_SIZE",
"DFS_FASTEST_RANK", "DFS_SLOWEST_RANK"]:
continue
elif column_name in ["DFS_GLOBAL_OPENS", "DFS_OPENS"]:
# sum these together to match the POSIX version
column_name = "DFS_OPENS"
dfs_data = (dfs_data_dict["counters"]["DFS_GLOBAL_OPENS"] +
dfs_data_dict["counters"]["DFS_OPENS"])
elif column_name in ["DFS_READS", "DFS_READXS"]:
column_name = "DFS_READS"
dfs_data = (dfs_data_dict["counters"]["DFS_READS"] +
dfs_data_dict["counters"]["DFS_READXS"])
# we know the hardcoded value for certain
assert dfs_data.values == 64
elif column_name in ["DFS_WRITES", "DFS_WRITEXS"]:
column_name = "DFS_WRITES"
dfs_data = (dfs_data_dict["counters"]["DFS_WRITES"] +
dfs_data_dict["counters"]["DFS_WRITEXS"])
# we know the hardcoded value for certain
assert dfs_data.values == 64
else:
dfs_data = dfs_data_dict["counters"][column_name]
posix_column_name = column_name.replace("DFS", "POSIX")
posix_data = posix_data_dict["counters"][posix_column_name]
assert_allclose(dfs_data.values, posix_data.values)
if column_name.endswith("BYTES_WRITTEN"):
# we know the hardcoded value for certain
# 256 KiB * 16
assert dfs_data.values == 16777216


@pytest.mark.parametrize("unsupported_record",
Expand Down
14 changes: 7 additions & 7 deletions darshan-util/pydarshan/darshan/tests/test_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,9 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath):
elif ("runtime_and_dxt_heatmaps_diagonal_write_only" in log_filepath or
"treddy_runtime_heatmap_inactive_ranks" in log_filepath or
"h5d_no_h5f" in log_filepath or
"snyders_ior-dfs" in log_filepath):
"snyder_ior-DFS" in log_filepath):
assert actual_runtime_heatmap_titles == 1
elif "snyders_ior-POSIX" in log_filepath:
elif "snyder_ior-POSIX" in log_filepath:
assert actual_runtime_heatmap_titles == 2
else:
assert actual_runtime_heatmap_titles == 0
Expand Down Expand Up @@ -494,18 +494,18 @@ def test_metadata_table(self, log_path, expected_df):
2,
),
(
"snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
"snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
pd.DataFrame(
index=[
"Log Filename", "Runtime Library Version", "Log Format Version",
"POSIX (ver=4)",
"STDIO (ver=2)", "HEATMAP (ver=1)",
"DFS (ver=1)"
"DFS (ver=1)", "DAOS (ver=1)"
],
data=[
["snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan"], ["3.4.0"], ["3.41"],
["0.14 KiB"], ["0.07 KiB"], ["0.05 KiB"],
["0.17 KiB"],
["snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan"], ["3.4.7"], ["3.41"],
["0.07 KiB"], ["0.07 KiB"], ["0.05 KiB"],
["0.17 KiB"], ["0.39 KiB"],
],
),
0,
Expand Down

0 comments on commit c83718f

Please sign in to comment.