updated pydarshan for DFS module

darshan-hpc · Nov 9, 2024 · c83718f · c83718f
1 parent a3f2b32
commit c83718f
Show file tree

Hide file tree

Showing 12 changed files with 99 additions and 100 deletions.
diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py
@@ -98,8 +98,21 @@
 struct darshan_dfs_file
 {
     struct darshan_base_record base_rec;
-    int64_t counters[54];
+    int64_t counters[52];
     double fcounters[15];
+    unsigned char pool_uuid[16];
+    unsigned char cont_uuid[16];
+};
+
+struct darshan_daos_object
+{
+    struct darshan_base_record base_rec;
+    int64_t counters[63];
+    double fcounters[15];
+    unsigned char pool_uuid[16];
+    unsigned char cont_uuid[16];
+    uint64_t oid_hi;
+    uint64_t oid_lo;
 };
 
 struct darshan_stdio_file
@@ -212,6 +225,8 @@
 extern char *posix_f_counter_names[];
 extern char *dfs_counter_names[];
 extern char *dfs_f_counter_names[];
+extern char *daos_counter_names[];
+extern char *daos_f_counter_names[];
 extern char *stdio_counter_names[];
 extern char *stdio_f_counter_names[];
 

diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py
@@ -87,6 +87,7 @@ def mod_name_to_idx(mod_name):
     "PNETCDF_VAR": "struct darshan_pnetcdf_var **",
     "POSIX": "struct darshan_posix_file **",
     "DFS": "struct darshan_dfs_file **",
+    "DAOS": "struct darshan_daos_object **",
     "STDIO": "struct darshan_stdio_file **",
     "APXC-HEADER": "struct darshan_apxc_header_record **",
     "APXC-PERF": "struct darshan_apxc_perf_record **",

diff --git a/...les/example_logs/snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan b/...les/example_logs/snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan
diff --git a/...s/example_logs/snyder_ior-POSIX_id1057716-202103_11-8-64415-6936117869459351096_1.darshan b/...s/example_logs/snyder_ior-POSIX_id1057716-202103_11-8-64415-6936117869459351096_1.darshan
diff --git a/...les/example_logs/snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan b/...les/example_logs/snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan
diff --git a/...es/example_logs/snyders_ior-POSIX_id531897-30559_8-18-57636-5730483702862695835_1.darshan b/...es/example_logs/snyders_ior-POSIX_id531897-30559_8-18-57636-5730483702862695835_1.darshan
diff --git a/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py b/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py
@@ -11,15 +11,6 @@ def agg_ioops(self, mode='append'):
         None or dict: Depending on mode
     """
 
-    series = [
-        {'name': 'POSIX', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] }, 
-        {'name': 'MPI-IO Indep.', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] }, 
-        {'name': 'MPI-IO Coll.', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] },
-        {'name': 'STDIO', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] },
-        {'name': 'DFS', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] },
-    ]
-
-
     # convienience
     recs = self.records
     ctx = {}
@@ -121,6 +112,11 @@ def agg_ioops(self, mode='append'):
                 'Writex': agg[mod + '_WRITEXS'],
                 'Open':  agg[mod + '_OPENS'],
                 'GlobalOpen':  agg[mod + '_GLOBAL_OPENS'],
+                'Lookup':  agg[mod + '_LOOKUPS'],
+                'Get Size':  agg[mod + '_GET_SIZES'],
+                'Punch':  agg[mod + '_PUNCHES'],
+                'Remove':  agg[mod + '_REMOVES'],
+                'Stat':  agg[mod + '_STATS'],
             }
             ctx[mod] = agg
             ctx[mod + '_simple'] = tmp

diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py
@@ -49,16 +49,6 @@ def gather_count_data(report, mod):
             0, # faulty? mod_data['POSIX_MMAPS'],
             mod_data['POSIX_FSYNCS'] + mod_data['POSIX_FDSYNCS']
         ]
-    elif mod == 'DFS':
-        labels = ['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen']
-        counts = [
-            mod_data['DFS_READS'],
-            mod_data['DFS_READXS'],
-            mod_data['DFS_WRITES'],
-            mod_data['DFS_WRITEXS'],
-            mod_data['DFS_OPENS'],
-            mod_data['DFS_GLOBAL_OPENS'],
-        ]
 
     # Gather MPIIO
     elif mod == 'MPI-IO':
@@ -164,6 +154,22 @@ def gather_count_data(report, mod):
             report.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_COLL_WAITS'],
         ]
 
+    elif mod == 'DFS':
+        labels = ['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen', 'Lookup', 'Get Size', 'Punch', 'Remove', 'Stat']
+        counts = [
+            mod_data['DFS_READS'],
+            mod_data['DFS_READXS'],
+            mod_data['DFS_WRITES'],
+            mod_data['DFS_WRITEXS'],
+            mod_data['DFS_OPENS'],
+            mod_data['DFS_GLOBAL_OPENS'],
+            mod_data['DFS_LOOKUPS'],
+            mod_data['DFS_GET_SIZES'],
+            mod_data['DFS_PUNCHES'],
+            mod_data['DFS_REMOVES'],
+            mod_data['DFS_STATS'],
+        ]
+
     return labels, counts
 
 def plot_opcounts(report, mod, ax=None):

diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py b/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py
@@ -14,7 +14,7 @@
     "log_path, mod, func, expected_xticklabels",
     [
         (
-            "snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
+            "snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
             "DFS",
             plot_access_histogram,
             ["0-100", "101-1K", "1K-10K", "10K-100K", "100K-1M",
@@ -72,10 +72,10 @@
             "1M-4M", "4M-10M", "10M-100M", "100M-1G", "1G+"],
         ),
         (
-            "snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
+            "snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
             "DFS",
             plot_opcounts,
-            ['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen'],
+            ['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen', 'Lookup', 'Get Size', 'Punch', 'Remove', 'Stat'],
         ),
         (
             "dxt.darshan",

diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py b/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py
@@ -57,15 +57,15 @@
             ),
         ),
         (
-            "snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
+            "snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
             pd.DataFrame(
                 np.array([
-                    [0.0, 0, 6.973743438720703e-06],
-                    [0, 3.337860107421875e-05, 0.0],
-                    [0.001381218433380127, 0.009676456451416016, 0.028002262115478516],
+                    [0.0, 0.0, 0.0, 0.0],
+                    [0.0, 4.515051841e-06, 0.0, 0.0],
+                    [0.001456562, 0.002266062, 0.007923812, 0.0],
                 ]),
                 ["POSIX", "STDIO", "DFS"],
-                ["Read", "Write", "Meta"],
+                ["Read", "Write", "Meta", "Wait"],
             ),
         ),
     ],
@@ -100,8 +100,8 @@ def test_get_io_cost_df(logname, expected_df):
             [0.0, 1111.0],
         ),
         (
-            "snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
-            [0.0, 1],
+            "snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
+            [0.0, 0.80276728],
         ),
     ],
 )
@@ -121,28 +121,28 @@ def test_plot_io_cost_ylims(logname, expected_ylims):
             assert_allclose(actual_ylims, [0.0, 100.0])
 
 @pytest.mark.parametrize(
-    "logname, expected_yticks", [
+    "logname, expected_yticks, expected_yticklabels", [
         (
             "ior_hdf5_example.darshan",
             [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
+            ['0.0', '0.2', '0.4', '0.6', '0.8', '1.0'],
         ),
         (
             "sample-badost.darshan",
             [0, 156, 312, 468, 624, 780],
+            ['0', '156', '312', '468', '624', '780'],
         ),
         (
-            "snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
-            [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
+            "snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
+            [0.0 , 0.16055346, 0.32110691, 0.48166037, 0.64221382, 0.80276728],
+            ['0.0000' , '0.1606', '0.3211', '0.4817', '0.6422', '0.8028'],
         ),
     ],
 )
-def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks):
+def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks, expected_yticklabels):
     # check the y-axis tick marks are at the appropriate
     # locations and the labels are as expected
 
-    # create the expected y-axis tick labels from the y ticks
-    expected_yticklabels = [str(i) for i in expected_yticks]
-
     logpath = get_log_path(logname)
     with darshan.DarshanReport(logpath) as report:
         fig = plot_io_cost(report=report)

diff --git a/darshan-util/pydarshan/darshan/tests/test_report.py b/darshan-util/pydarshan/darshan/tests/test_report.py
@@ -80,68 +80,49 @@ def test_dfs_daos_posix_match():
     # the ior runs by Shane with POSIX vs. DAOS DFS
     # backend should produce matching counters where
     # comparable data fields exist
-    posix_ior_report = darshan.DarshanReport(get_log_path("snyders_ior-POSIX_id531897-30559_8-18-57636-5730483702862695835_1.darshan"))
-    dfs_ior_report = darshan.DarshanReport(get_log_path("snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan"))
+    posix_ior_report = darshan.DarshanReport(get_log_path("snyder_ior-POSIX_id1057716-202103_11-8-64415-6936117869459351096_1.darshan"))
+    dfs_ior_report = darshan.DarshanReport(get_log_path("snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan"))
     posix_ior_report.mod_read_all_records("POSIX")
     dfs_ior_report.mod_read_all_records("DFS")
     posix_data_dict = posix_ior_report.data['records']["POSIX"].to_df()
     dfs_data_dict = dfs_ior_report.data['records']["DFS"].to_df()
     dfs_ior_name_recs = dfs_ior_report.data["name_records"]
 
-    # TODO: currently we filter out the extra `/` activity
-    # for DFS here, but we may want to filter this out "upstream"
-    # in the control flow somewhere? Shane indicates that
-    # POSIX does something similar under the hood, but it is
-    # not intercepted by darshan, and we should likely just make
-    # the two modules look the same in this regard
-
-    for hashval, val in dfs_ior_name_recs.items():
-        if val.endswith(":/"):
-            bad_hash = hashval
-
-    for counter_type in ["counters", "fcounters"]:
-        dfs_data_dict[counter_type] = dfs_data_dict[counter_type][dfs_data_dict[counter_type]["id"] != bad_hash]
-
-    for counter_type in ["counters", "fcounters"]:
-        for column_name in dfs_data_dict[counter_type].columns:
-            # for some columns we can't reasonably expect a match
-            # or we need to handle the data differently between POSIX
-            # and DAOS DFS
-            if column_name in ["id", "DFS_LOOKUPS", "DFS_NB_READS", "DFS_NB_WRITES",
-                               "DFS_GET_SIZES", "DFS_PUNCHES", "DFS_STATS",
-                               "DFS_CHUNK_SIZE",
-                               "DFS_FASTEST_RANK", "DFS_SLOWEST_RANK"]:
-                continue
-            elif "time" in column_name.lower():
-                # you can't reasonably expect the timestamps to be the
-                # same for two different runs of any kind really
-                continue
-            elif column_name in ["DFS_GLOBAL_OPENS", "DFS_OPENS"]:
-                # sum these together to match the POSIX version
-                column_name = "DFS_OPENS"
-                dfs_data = (dfs_data_dict[counter_type]["DFS_GLOBAL_OPENS"] +
-                            dfs_data_dict[counter_type]["DFS_OPENS"])
-            elif column_name in ["DFS_READS", "DFS_READXS"]:
-                column_name = "DFS_READS"
-                dfs_data = (dfs_data_dict[counter_type]["DFS_READS"] +
-                            dfs_data_dict[counter_type]["DFS_READXS"])
-                # we know the hardcoded value for certain
-                assert dfs_data.values == 16
-            elif column_name in ["DFS_WRITES", "DFS_WRITEXS"]:
-                column_name = "DFS_WRITES"
-                dfs_data = (dfs_data_dict[counter_type]["DFS_WRITES"] +
-                            dfs_data_dict[counter_type]["DFS_WRITEXS"])
-                # we know the hardcoded value for certain
-                assert dfs_data.values == 16
-            else:
-                dfs_data = dfs_data_dict[counter_type][column_name]
-            posix_column_name = column_name.replace("DFS", "POSIX")
-            posix_data = posix_data_dict[counter_type][posix_column_name]
-            assert_allclose(dfs_data.values, posix_data.values)
-            if column_name.endswith("BYTES_WRITTEN"):
-                # we know the hardcoded value for certain
-                # 256 KiB * 16
-                assert dfs_data.values == 4194304
+    for column_name in dfs_data_dict["counters"].columns:
+        # for some columns we can't reasonably expect a match
+        # or we need to handle the data differently between POSIX
+        # and DAOS DFS
+        if column_name in ["id", "DFS_LOOKUPS", "DFS_DUPS", "DFS_NB_READS", "DFS_NB_WRITES",
+                           "DFS_GET_SIZES", "DFS_PUNCHES", "DFS_REMOVES", "DFS_STATS",
+                           "DFS_CHUNK_SIZE",
+                           "DFS_FASTEST_RANK", "DFS_SLOWEST_RANK"]:
+            continue
+        elif column_name in ["DFS_GLOBAL_OPENS", "DFS_OPENS"]:
+            # sum these together to match the POSIX version
+            column_name = "DFS_OPENS"
+            dfs_data = (dfs_data_dict["counters"]["DFS_GLOBAL_OPENS"] +
+                        dfs_data_dict["counters"]["DFS_OPENS"])
+        elif column_name in ["DFS_READS", "DFS_READXS"]:
+            column_name = "DFS_READS"
+            dfs_data = (dfs_data_dict["counters"]["DFS_READS"] +
+                        dfs_data_dict["counters"]["DFS_READXS"])
+            # we know the hardcoded value for certain
+            assert dfs_data.values == 64
+        elif column_name in ["DFS_WRITES", "DFS_WRITEXS"]:
+            column_name = "DFS_WRITES"
+            dfs_data = (dfs_data_dict["counters"]["DFS_WRITES"] +
+                        dfs_data_dict["counters"]["DFS_WRITEXS"])
+            # we know the hardcoded value for certain
+            assert dfs_data.values == 64
+        else:
+            dfs_data = dfs_data_dict["counters"][column_name]
+        posix_column_name = column_name.replace("DFS", "POSIX")
+        posix_data = posix_data_dict["counters"][posix_column_name]
+        assert_allclose(dfs_data.values, posix_data.values)
+        if column_name.endswith("BYTES_WRITTEN"):
+            # we know the hardcoded value for certain
+            # 256 KiB * 16
+            assert dfs_data.values == 16777216
 
 
 @pytest.mark.parametrize("unsupported_record",

diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py
@@ -263,9 +263,9 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath):
                     elif ("runtime_and_dxt_heatmaps_diagonal_write_only" in log_filepath or
                           "treddy_runtime_heatmap_inactive_ranks" in log_filepath or
                           "h5d_no_h5f" in log_filepath or
-                          "snyders_ior-dfs" in log_filepath):
+                          "snyder_ior-DFS" in log_filepath):
                         assert actual_runtime_heatmap_titles == 1
-                    elif "snyders_ior-POSIX" in log_filepath:
+                    elif "snyder_ior-POSIX" in log_filepath:
                         assert actual_runtime_heatmap_titles == 2
                     else:
                         assert actual_runtime_heatmap_titles == 0
@@ -494,18 +494,18 @@ def test_metadata_table(self, log_path, expected_df):
                 2,
             ),
             (
-                "snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan",
+                "snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan",
                 pd.DataFrame(
                     index=[
                         "Log Filename", "Runtime Library Version", "Log Format Version",
                         "POSIX (ver=4)",
                         "STDIO (ver=2)", "HEATMAP (ver=1)",
-                        "DFS (ver=1)"
+                        "DFS (ver=1)", "DAOS (ver=1)"
                     ],
                     data=[
-                        ["snyders_ior-DFS_id531897-30546_8-18-57619-12789084789544057019_1.darshan"], ["3.4.0"], ["3.41"],
-                        ["0.14 KiB"], ["0.07 KiB"], ["0.05 KiB"],
-                        ["0.17 KiB"],
+                        ["snyder_ior-DFS_id1057716-201712_11-8-64400-1922568413188514066_1.darshan"], ["3.4.7"], ["3.41"],
+                        ["0.07 KiB"], ["0.07 KiB"], ["0.05 KiB"],
+                        ["0.17 KiB"], ["0.39 KiB"],
                     ],
                 ),
                 0,