diff --git a/.cirrus.yml b/.cirrus.yml index 505c606d2..de40e4d30 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -8,7 +8,7 @@ macos_M1_native_apple_silicon_py310_task: /opt/homebrew/opt/python@3.10/bin/python3 -m venv ~/py_310 source ~/py_310/bin/activate python -m pip install --upgrade pip - python -m pip install --upgrade pytest lxml matplotlib packaging + python -m pip install --upgrade pytest lxml matplotlib packaging humanize brew install automake libtool binutils mkdir -p /tmp/darshan_install export DARSHAN_INSTALL_PATH=/tmp/darshan_install diff --git a/.github/workflows/main_ci.yml b/.github/workflows/main_ci.yml index c3b4c7359..f988f81e6 100644 --- a/.github/workflows/main_ci.yml +++ b/.github/workflows/main_ci.yml @@ -32,7 +32,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install --upgrade pytest mypy pyflakes asv pytest-cov codecov lxml matplotlib packaging + python -m pip install --upgrade pytest pyflakes asv pytest-cov codecov lxml matplotlib packaging humanize "mypy<1.0.0" - if: ${{matrix.platform == 'macos-latest'}} name: Install MacOS deps run: | diff --git a/.github/workflows/runtime_ci.yml b/.github/workflows/runtime_ci.yml index c09ee4ac3..bceaddfb5 100644 --- a/.github/workflows/runtime_ci.yml +++ b/.github/workflows/runtime_ci.yml @@ -26,10 +26,11 @@ jobs: sudo apt-get update -y sudo apt-get install -y hdf5-tools libhdf5-openmpi-dev openmpi-bin python -m pip install --upgrade pip - python -m pip install --upgrade pytest mpi4py + python -m pip install --upgrade pytest mpi4py cython numpy wheel pkgconfig "setuptools<62.0.0" # we need to build h5py with the system HDF5 lib backend export HDF5_MPI="ON" - CC=mpicc python -m pip install --no-binary=h5py h5py + # Install h5py https://github.com/h5py/h5py/issues/2222 + CC=mpicc python -m pip install --no-cache-dir --no-binary=h5py h5py --no-build-isolation - name: Install darshan-runtime run: | export C_INCLUDE_PATH=$C_INCLUDE_PATH:/usr/include/hdf5/openmpi/ diff --git a/LICENSE b/LICENSE new file mode 120000 index 000000000..cf3af40f3 --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +COPYRIGHT \ No newline at end of file diff --git a/darshan-runtime/lib/Makefile.am b/darshan-runtime/lib/Makefile.am index 2f0353836..ffd0e0b20 100644 --- a/darshan-runtime/lib/Makefile.am +++ b/darshan-runtime/lib/Makefile.am @@ -17,6 +17,7 @@ C_SRCS = darshan-core-init-finalize.c \ darshan-core.c \ darshan-common.c \ darshan-config.c \ + darshan-ldms.c \ lookup3.c \ lookup8.c @@ -36,10 +37,6 @@ if BUILD_DXT_MODULE C_SRCS += darshan-dxt.c endif -if BUILD_LDMS_MODULE - C_SRCS += darshan-ldms.c -endif - if BUILD_MPIIO_MODULE C_SRCS += darshan-mpiio.c endif @@ -112,10 +109,10 @@ libdarshan_la_CPPFLAGS = $(AM_CPPFLAGS) -D_LARGEFILE64_SOURCE -DDARSHAN_PRELOAD libdarshan_a_SOURCES = $(C_SRCS) libdarshan_a_CPPFLAGS = $(AM_CPPFLAGS) -D_LARGEFILE64_SOURCE -DDARSHAN_WRAP_MMAP -if HAVE_LDMS +if BUILD_LDMS_MODULE libdarshan_la_LIBADD += -lldmsd_stream -lldms -lovis_json -lcoll libdarshan_la_CPPFLAGS += -I$(LDMS_HOME)/include -libdarshan_la_LDFLAGS = -L$(LDMS_HOME)/lib -Wl,-rpath=$(LDMS_HOME)/lib +libdarshan_la_LDFLAGS = -L$(LDMS_HOME)/lib64 -Wl,-rpath=$(LDMS_HOME)/lib64 endif H_SRCS = darshan-common.h \ diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index 6c8a50993..54e560e2a 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -76,11 +76,6 @@ static int mnt_data_count = 0; extern void bgq_runtime_initialize(); #endif -#ifdef HAVE_LDMS -extern struct darshanConnector dC; -extern void darshan_ldms_connector_initialize(); -#endif - #ifdef DARSHAN_USE_APXC extern void apxc_runtime_initialize(); #endif @@ -361,7 +356,8 @@ void darshan_core_initialize(int argc, char **argv) #ifdef HAVE_LDMS - darshan_ldms_connector_initialize(); + /* pass init_core to darshan-ldms connector initialization*/ + darshan_ldms_connector_initialize(init_core); #endif /* if darshan was successfully initialized, set the global pointer @@ -407,30 +403,6 @@ void darshan_core_initialize(int argc, char **argv) darshan_core_fprintf(stderr, "darshan:init\t%d\t%f\n", nprocs, init_time); } - -#ifdef HAVE_LDMS - /*TODO: Create environment variable to re-connect to ldms every x seconds - if(getenv("DARSHAN_LDMS_REINIT")) - dC.env_ldms_reinit = getenv("DARSHAN_LDMS_REINIT"); - else - dC.env_ldms_reinit = "1"; - */ - /* Set meta data for LDMS message sending */ - (void)gethostname(dC.hname, sizeof(dC.hname)); - dC.jobid = (int64_t)jobid; - dC.uid = getuid(); - dC.exename = argv[0]; - - /* Pull executable name from darshans variable if no arguemments are given. */ - if (argc==0) - { - char buff[DARSHAN_EXE_LEN]; - int len = readlink("/proc/self/exe", buff, sizeof(buff)-1); - buff[len] = '\0'; - dC.exename = buff; - } -#endif - return; } diff --git a/darshan-runtime/lib/darshan-hdf5.c b/darshan-runtime/lib/darshan-hdf5.c index d7f9fadd6..e97c4ad66 100644 --- a/darshan-runtime/lib/darshan-hdf5.c +++ b/darshan-runtime/lib/darshan-hdf5.c @@ -28,6 +28,7 @@ #include "darshan-dynamic.h" #include +#include "darshan-ldms.h" /* H5F prototypes */ DARSHAN_FORWARD_DECL(H5Fcreate, hid_t, (const char *filename, unsigned flags, hid_t create_plist, hid_t access_plist)); @@ -374,6 +375,9 @@ herr_t DARSHAN_DECL(H5Fflush)(hid_t object_id, H5F_scope_t scope) tm1, tm2, rec_ref->last_meta_end); } H5F_POST_RECORD(); + + MAP_OR_FAIL(H5Fclose); + __real_H5Fclose(file_id); } } diff --git a/darshan-runtime/lib/darshan-ldms.c b/darshan-runtime/lib/darshan-ldms.c index ed0807856..70d336262 100644 --- a/darshan-runtime/lib/darshan-ldms.c +++ b/darshan-runtime/lib/darshan-ldms.c @@ -14,6 +14,8 @@ #include #include #include +#include "darshan-ldms.h" +#include "darshan.h" /* Check for LDMS libraries if Darshan is built --with-ldms */ #ifdef HAVE_LDMS @@ -21,10 +23,10 @@ #include #include #include "ovis_json/ovis_json.h" -#include "darshan-ldms.h" struct darshanConnector dC = { .ldms_darsh = NULL, + .exename = NULL, .ldms_lib = 0, }; @@ -57,7 +59,6 @@ static void event_cb(ldms_t x, ldms_xprt_event_t e, void *cb_arg) } } -#define SLURM_NOTIFY_TIMEOUT 5 ldms_t setup_connection(const char *xprt, const char *host, const char *port, const char *auth) { @@ -66,10 +67,6 @@ ldms_t setup_connection(const char *xprt, const char *host, int rc; struct timespec ts; - if (!host) { - if (0 == gethostname(hostname, sizeof(hostname))) - host = hostname; - } if (!timeout) { ts.tv_sec = time(NULL) + 5; ts.tv_nsec = 0; @@ -103,17 +100,36 @@ ldms_t setup_connection(const char *xprt, const char *host, return ldms_g; } -void darshan_ldms_connector_initialize() +void darshan_ldms_connector_initialize(struct darshan_core_runtime *init_core) { + /*TODO: Create environment variable to re-connect to ldms every x seconds + if(getenv("DARSHAN_LDMS_REINIT")) + dC.env_ldms_reinit = getenv("DARSHAN_LDMS_REINIT"); + else + dC.env_ldms_reinit = "1";*/ + + /* Set meta data for LDMS message sending */ + (void)gethostname(dC.hname, sizeof(dC.hname)); + dC.jobid = init_core->log_job_p->jobid; + dC.uid = init_core->log_job_p->uid; + + /* grab exe path from darshan_core_runtime */ + dC.exename = strtok(init_core->log_exemnt_p, " "); + + /* Pull executable name from proc if no arguemments are given. */ + if (dC.exename == NULL) + { + char buff[DARSHAN_EXE_LEN]; + int len = readlink("/proc/self/exe", buff, sizeof(buff)-1); + buff[len] = '\0'; + dC.exename = buff; + } + if (!getenv("DARSHAN_LDMS_STREAM")) - dC.env_ldms_stream = "darshanConnector"; + dC.env_ldms_stream = "darshanConnector"; - /* Set flags for various LDMS environment variables */ - if (getenv("DXT_ENABLE_LDMS")) - dC.dxt_enable_ldms = 0; - else - dC.dxt_enable_ldms =1; + /* Set flags for various LDMS environment variables */ if (getenv("POSIX_ENABLE_LDMS")) dC.posix_enable_ldms = 0; else @@ -124,21 +140,21 @@ void darshan_ldms_connector_initialize() else dC.mpiio_enable_ldms = 1; + /* Disable STDIO if verbose is enabled to avoid a recursive + function for darshan_ldms_connector_send() */ if (getenv("STDIO_ENABLE_LDMS")) - dC.stdio_enable_ldms = 0; + if (!getenv("DARSHAN_LDMS_VERBOSE")) + dC.stdio_enable_ldms = 0; + else + dC.stdio_enable_ldms = 1; else dC.stdio_enable_ldms = 1; - + if (getenv("HDF5_ENABLE_LDMS")) dC.hdf5_enable_ldms = 0; else dC.hdf5_enable_ldms = 1; - if (getenv("MDHIM_ENABLE_LDMS")) - dC.mdhim_enable_ldms = 0; - else - dC.mdhim_enable_ldms = 1; - const char* env_ldms_xprt = getenv("DARSHAN_LDMS_XPRT"); const char* env_ldms_host = getenv("DARSHAN_LDMS_HOST"); const char* env_ldms_port = getenv("DARSHAN_LDMS_PORT"); @@ -216,22 +232,24 @@ void darshan_ldms_connector_send(int64_t record_count, char *rwo, int64_t offset } sprintf(jb11,"{ \"uid\":%ld, \"exe\":\"%s\",\"job_id\":%ld,\"rank\":%ld,\"ProducerName\":\"%s\",\"file\":\"%s\",\"record_id\":%"PRIu64",\"module\":\"%s\",\"type\":\"%s\",\"max_byte\":%ld,\"switches\":%ld,\"flushes\":%ld,\"cnt\":%ld,\"op\":\"%s\",\"seg\":[{\"data_set\":\"%s\",\"pt_sel\":%ld,\"irreg_hslab\":%ld,\"reg_hslab\":%ld,\"ndims\":%ld,\"npoints\":%ld,\"off\":%ld,\"len\":%ld,\"start\":%0.6f,\"dur\":%0.6f,\"total\":%.6f,\"timestamp\":%lu.%.6lu}]}", dC.uid, dC.exename, dC.jobid, dC.rank, dC.hname, dC.filename, dC.record_id, mod_name, data_type, max_byte, rw_switch, flushes, record_count, rwo, dC.data_set, dC.hdf5_data[0], dC.hdf5_data[1], dC.hdf5_data[2], dC.hdf5_data[3], dC.hdf5_data[4], offset, length, start_time, end_time-start_time, total_time, tspec_end.tv_sec, micro_s); - //printf("this is in jb11 %s \n", jb11); + + if (getenv("DARSHAN_LDMS_VERBOSE")) + printf("JSON Message: %s\n", jb11); rc = ldmsd_stream_publish(dC.ldms_darsh, dC.env_ldms_stream, LDMSD_STREAM_JSON, jb11, strlen(jb11) + 1); - if (rc) - printf("Error %d publishing data.\n", rc); + if (rc) + printf("Error %d publishing data.\n", rc); - out_1: + out_1: return; } #else struct darshanConnector dC = { - .ldms_lib = 1 - }; + .ldms_lib = 1 + }; -void darshan_ldms_connector_initialize() +void darshan_ldms_connector_initialize(struct darshan_core_runtime *init_core) { return; } diff --git a/darshan-runtime/lib/darshan-ldms.h b/darshan-runtime/lib/darshan-ldms.h index c3a89770e..800d41cec 100644 --- a/darshan-runtime/lib/darshan-ldms.h +++ b/darshan-runtime/lib/darshan-ldms.h @@ -6,6 +6,7 @@ #ifndef __DARSHAN_LDMS_H #define __DARSHAN_LDMS_H +#include "darshan.h" #ifdef HAVE_LDMS #include @@ -16,12 +17,10 @@ typedef struct darshanConnector { int to; int ldms_lib; - int dxt_enable_ldms; int posix_enable_ldms; int mpiio_enable_ldms; int stdio_enable_ldms; int hdf5_enable_ldms; - int mdhim_enable_ldms; int64_t rank; uint64_t record_id; char *exename; @@ -43,18 +42,18 @@ typedef struct darshanConnector { sem_t conn_sem; sem_t recv_sem; } darshanConnector; + #else typedef struct darshanConnector { int to; int ldms_lib; - int dxt_enable_ldms; int posix_enable_ldms; int mpiio_enable_ldms; int stdio_enable_ldms; int hdf5_enable_ldms; - int mdhim_enable_ldms; } darshanConnector; + #endif /* darshan_ldms_connector_initialize(), darshan_ldms_connector_send() @@ -71,7 +70,7 @@ typedef struct darshanConnector { * is detected or a new run is executed. * */ -void darshan_ldms_connector_initialize(); +void darshan_ldms_connector_initialize(struct darshan_core_runtime *); void darshan_ldms_connector_send(int64_t record_count, char *rwo, int64_t offset, int64_t length, int64_t max_byte, int64_t rw_switch, int64_t flushes, double start_time, double end_time, struct timespec tspec_start, struct timespec tspec_end, double total_time, char *mod_name, char *data_type); diff --git a/darshan-runtime/lib/darshan-mpiio.c b/darshan-runtime/lib/darshan-mpiio.c index 28708a724..88580a55b 100644 --- a/darshan-runtime/lib/darshan-mpiio.c +++ b/darshan-runtime/lib/darshan-mpiio.c @@ -248,7 +248,7 @@ static int my_rank = -1; if(newpath != __path) free(newpath);\ /* LDMS to publish realtime read tracing information to daemon*/ \ if(!dC.ldms_lib)\ - if(!dC.dxt_enable_ldms || !dC.mpiio_enable_ldms){\ + if(!dC.mpiio_enable_ldms){\ darshan_ldms_set_meta(__path, "N/A", rec_ref->file_rec->base_rec.id, rec_ref->file_rec->base_rec.rank);\ darshan_ldms_connector_send(rec_ref->file_rec->counters[MPIIO_COLL_OPENS] + rec_ref->file_rec->counters[MPIIO_INDEP_OPENS], "open", -1, -1, -1, -1, -1, __tm1, __tm2, __ts1, __ts2, rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], "MPIIO", "MET");\ }\ @@ -308,7 +308,7 @@ static int get_byte_offset = 0; __tm1, __tm2, rec_ref->last_read_end); \ /* LDMS to publish realtime read tracing information to daemon*/ \ if(!dC.ldms_lib)\ - if(!dC.dxt_enable_ldms || !dC.mpiio_enable_ldms)\ + if(!dC.mpiio_enable_ldms)\ darshan_ldms_connector_send(rec_ref->file_rec->counters[__counter], "read", displacement, size, -1, rec_ref->file_rec->counters[MPIIO_RW_SWITCHES], -1, __tm1, __tm2, __ts1, __ts2, rec_ref->file_rec->fcounters[MPIIO_F_READ_TIME], "MPIIO", "MOD");\ } while(0) @@ -356,7 +356,7 @@ static int get_byte_offset = 0; __tm1, __tm2, rec_ref->last_write_end); \ /* LDMS to publish realtime read tracing information to daemon*/ \ if(!dC.ldms_lib)\ - if(!dC.dxt_enable_ldms || !dC.mpiio_enable_ldms)\ + if(!dC.mpiio_enable_ldms)\ darshan_ldms_connector_send(rec_ref->file_rec->counters[__counter], "write", displacement, size, -1, rec_ref->file_rec->counters[MPIIO_RW_SWITCHES], -1, __tm1, __tm2, __ts1, __ts2, rec_ref->file_rec->fcounters[MPIIO_F_WRITE_TIME], "MPIIO", "MOD");\ } while(0) @@ -1234,7 +1234,7 @@ int DARSHAN_DECL(MPI_File_close)(MPI_File *fh) #ifdef HAVE_LDMS /* publish close information for mpiio */ extern struct darshanConnector dC; - if(!dC.dxt_enable_ldms || !dC.mpiio_enable_ldms) + if(!dC.mpiio_enable_ldms) darshan_ldms_connector_send(-1, "close", -1, -1, -1, -1, -1, tm1, tm2, ts1, ts2, rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], "MPIIO", "MOD"); #endif diff --git a/darshan-runtime/lib/darshan-posix.c b/darshan-runtime/lib/darshan-posix.c index 2675b3aa2..ed62467ea 100644 --- a/darshan-runtime/lib/darshan-posix.c +++ b/darshan-runtime/lib/darshan-posix.c @@ -250,7 +250,7 @@ static int darshan_mem_alignment = 1; if(__newpath != __path) free(__newpath); \ /* LDMS to publish realtime read tracing information to daemon*/ \ if(!dC.ldms_lib)\ - if(!dC.dxt_enable_ldms || !dC.posix_enable_ldms){\ + if(!dC.posix_enable_ldms){\ darshan_ldms_set_meta((char *)__path, "N/A", __rec_ref->file_rec->base_rec.id, __rec_ref->file_rec->base_rec.rank);\ darshan_ldms_connector_send(__rec_ref->file_rec->counters[POSIX_OPENS], "open", -1, -1, -1, -1, -1, __tm1, __tm2, __ts1, __ts2, __rec_ref->file_rec->fcounters[POSIX_F_META_TIME], "POSIX", "MET");\ }\ @@ -345,7 +345,7 @@ static int darshan_mem_alignment = 1; __tm1, __tm2, rec_ref->last_read_end);\ /* LDMS to publish realtime read tracing information to daemon*/ \ if(!dC.ldms_lib)\ - if(!dC.dxt_enable_ldms || !dC.posix_enable_ldms)\ + if(!dC.posix_enable_ldms)\ darshan_ldms_connector_send(rec_ref->file_rec->counters[POSIX_READS], "read", this_offset, __ret, rec_ref->file_rec->counters[POSIX_MAX_BYTE_READ],rec_ref->file_rec->counters[POSIX_RW_SWITCHES], -1, __tm1, __tm2, __ts1, __ts2, rec_ref->file_rec->fcounters[POSIX_F_READ_TIME], "POSIX", "MOD");\ } while(0) @@ -415,7 +415,7 @@ static int darshan_mem_alignment = 1; __tm1, __tm2, rec_ref->last_write_end);\ /* LDMS to publish realtime write tracing information to daemon*/ \ if(!dC.ldms_lib)\ - if(!dC.dxt_enable_ldms || !dC.posix_enable_ldms)\ + if(!dC.posix_enable_ldms)\ darshan_ldms_connector_send(rec_ref->file_rec->counters[POSIX_WRITES], "write", this_offset, __ret, rec_ref->file_rec->counters[POSIX_MAX_BYTE_WRITTEN], rec_ref->file_rec->counters[POSIX_RW_SWITCHES], -1, __tm1, __tm2, __ts1, __ts2, rec_ref->file_rec->fcounters[POSIX_F_WRITE_TIME], "POSIX", "MOD");\ } while(0) @@ -1684,7 +1684,7 @@ int DARSHAN_DECL(close)(int fd) #ifdef HAVE_LDMS /* publish close information for posix */ extern struct darshanConnector dC; - if(!dC.dxt_enable_ldms || !dC.posix_enable_ldms) + if(!dC.posix_enable_ldms) darshan_ldms_connector_send(-1, "close", -1, -1, -1, -1, -1, tm1, tm2, ts1, ts2, rec_ref->file_rec->fcounters[POSIX_F_META_TIME], "POSIX", "MOD"); #endif diff --git a/darshan-runtime/lib/darshan-stdio.c b/darshan-runtime/lib/darshan-stdio.c index 7c4d2662c..0b645f052 100644 --- a/darshan-runtime/lib/darshan-stdio.c +++ b/darshan-runtime/lib/darshan-stdio.c @@ -87,7 +87,6 @@ #include "darshan.h" #include "darshan-dynamic.h" #include "darshan-heatmap.h" -//me #include "darshan-dxt.h" #include "darshan-ldms.h" diff --git a/darshan-runtime/lib/darshan.h b/darshan-runtime/lib/darshan.h index 45f1f09d0..fc135c463 100644 --- a/darshan-runtime/lib/darshan.h +++ b/darshan-runtime/lib/darshan.h @@ -33,7 +33,6 @@ #include "darshan-config.h" #include "darshan-common.h" #include "darshan-dxt.h" -#include "darshan-ldms.h" /* Environment variable to override __DARSHAN_JOBID */ #define DARSHAN_JOBID_OVERRIDE "DARSHAN_JOBID" diff --git a/darshan-test/regression/cray-module-alcf/env.sh b/darshan-test/regression/cray-module-alcf/env.sh index 5711f5773..01171165a 100755 --- a/darshan-test/regression/cray-module-alcf/env.sh +++ b/darshan-test/regression/cray-module-alcf/env.sh @@ -32,4 +32,4 @@ export DARSHAN_F90=ftn export DARSHAN_RUNJOB=$DARSHAN_TESTDIR/$DARSHAN_PLATFORM/runjob.sh module unload darshan >& /dev/null -module load $DARSHAN_PATH/share/craype-2.x/modulefiles/ +module load $DARSHAN_RUNTIME_PATH/share/craype-2.x/modulefiles/ diff --git a/darshan-test/regression/cray-module-nersc-perlmutter/env.sh b/darshan-test/regression/cray-module-nersc-perlmutter/env.sh new file mode 100755 index 000000000..5e299939e --- /dev/null +++ b/darshan-test/regression/cray-module-nersc-perlmutter/env.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# General notes +####################### + +# Script to set up the environment for tests on this platform. Must export +# the following environment variables: +# +# DARSHAN_CC: command to compile C programs +# DARSHAN_CXX: command to compile C++ programs +# DARSHAN_F90: command to compile Fortran90 programs +# DARSHAN_F77: command to compile Fortran77 programs +# DARSHAN_RUNJOB: command to execute a job and wait for its completion + +# This script may load optional modules (as in a Cray PE), set LD_PRELOAD +# variables (as in a dynamically linked environment), or generate mpicc +# wrappers (as in a statically linked environment). + +# Notes specific to this platform (cray-module-nersc-perlmutter) +######################## +# Use Cray's default compiler wrappers and load the system default darshan module +# +# RUNJOB is responsible for submitting a slurm job, waiting for its +# completion, and checking its return status + +export DARSHAN_CC=cc +export DARSHAN_CXX=CC +export DARSHAN_F77=ftn +export DARSHAN_F90=ftn + +export DARSHAN_RUNJOB=$DARSHAN_TESTDIR/$DARSHAN_PLATFORM/runjob.sh + +module load darshan diff --git a/darshan-test/regression/cray-module-nersc-perlmutter/runjob.sh b/darshan-test/regression/cray-module-nersc-perlmutter/runjob.sh new file mode 100755 index 000000000..3744c7b48 --- /dev/null +++ b/darshan-test/regression/cray-module-nersc-perlmutter/runjob.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# submit job and wait for it to return +sbatch --wait -N 1 -t 10 -p debug -C cpu --output $DARSHAN_TMP/$$-tmp.out --error $DARSHAN_TMP/$$-tmp.err $DARSHAN_TESTDIR/$DARSHAN_PLATFORM/slurm-submit.sl "$@" + +# exit with return code of this job submission +exit $? diff --git a/darshan-test/regression/cray-module-nersc-perlmutter/slurm-submit.sl b/darshan-test/regression/cray-module-nersc-perlmutter/slurm-submit.sl new file mode 100755 index 000000000..6ff06fdab --- /dev/null +++ b/darshan-test/regression/cray-module-nersc-perlmutter/slurm-submit.sl @@ -0,0 +1,3 @@ +#!/bin/bash -l + +srun -n $DARSHAN_DEFAULT_NPROCS $@ diff --git a/darshan-test/regression/cray-module-nersc/env.sh b/darshan-test/regression/cray-module-nersc/env.sh index e49380801..0cf9f0676 100755 --- a/darshan-test/regression/cray-module-nersc/env.sh +++ b/darshan-test/regression/cray-module-nersc/env.sh @@ -32,4 +32,4 @@ export DARSHAN_F90=ftn export DARSHAN_RUNJOB=$DARSHAN_TESTDIR/$DARSHAN_PLATFORM/runjob.sh module unload darshan >& /dev/null -module load $DARSHAN_PATH/share/craype-2.x/modulefiles/ +module load $DARSHAN_RUNTIME_PATH/share/craype-2.x/modulefiles/ diff --git a/darshan-test/regression/cray-module-olcf-crusher/env.sh b/darshan-test/regression/cray-module-olcf-crusher/env.sh new file mode 100755 index 000000000..487124a0c --- /dev/null +++ b/darshan-test/regression/cray-module-olcf-crusher/env.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# General notes +####################### + +# Script to set up the environment for tests on this platform. Must export +# the following environment variables: +# +# DARSHAN_CC: command to compile C programs +# DARSHAN_CXX: command to compile C++ programs +# DARSHAN_F90: command to compile Fortran90 programs +# DARSHAN_F77: command to compile Fortran77 programs +# DARSHAN_RUNJOB: command to execute a job and wait for its completion + +# This script may load optional modules (as in a Cray PE), set LD_PRELOAD +# variables (as in a dynamically linked environment), or generate mpicc +# wrappers (as in a statically linked environment). + +# Notes specific to this platform (cray-module-olcf-crusher) +######################## +# Use Cray's default compiler wrappers and LD_PRELOAD the darshan library +# associated with this install +# +# RUNJOB is responsible for submitting a Slurm job, waiting for its +# completion, and checking its return status + +export DARSHAN_CC=cc +export DARSHAN_CXX=CC +export DARSHAN_F77=ftn +export DARSHAN_F90=ftn + +export DARSHAN_RUNJOB=$DARSHAN_TESTDIR/$DARSHAN_PLATFORM/runjob.sh + +export LD_PRELOAD=$DARSHAN_RUNTIME_PATH/lib/libdarshan.so:$LD_PRELOAD diff --git a/darshan-test/regression/cray-module-olcf-crusher/runjob.sh b/darshan-test/regression/cray-module-olcf-crusher/runjob.sh new file mode 100755 index 000000000..fc3db33c3 --- /dev/null +++ b/darshan-test/regression/cray-module-olcf-crusher/runjob.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +PROJ=CSC332_crusher + +# submit job and wait for it to return +sbatch --wait -N 1 -t 10 -A $PROJ -p batch --output $DARSHAN_TMP/$$-tmp.out --error $DARSHAN_TMP/$$-tmp.err $DARSHAN_TESTDIR/$DARSHAN_PLATFORM/slurm-submit.sl "$@" + +# exit with return code of this job submission +exit $? diff --git a/darshan-test/regression/cray-module-olcf-crusher/slurm-submit.sl b/darshan-test/regression/cray-module-olcf-crusher/slurm-submit.sl new file mode 100755 index 000000000..6ff06fdab --- /dev/null +++ b/darshan-test/regression/cray-module-olcf-crusher/slurm-submit.sl @@ -0,0 +1,3 @@ +#!/bin/bash -l + +srun -n $DARSHAN_DEFAULT_NPROCS $@ diff --git a/darshan-test/regression/run-all.sh b/darshan-test/regression/run-all.sh index cb03fcc2b..7b4b8b811 100755 --- a/darshan-test/regression/run-all.sh +++ b/darshan-test/regression/run-all.sh @@ -3,11 +3,19 @@ if [ "$#" -ne 3 ]; then echo "Usage: run-all.sh " 1>&2 echo "Example: ./run-all.sh ~/darshan-install /tmp/test ws" 1>&2 + echo "Example: ./run-all.sh ~/darshan-runtime-install:~/darshan-util-install /tmp/test ws" 1>&2 exit 1 fi # set variables for use by other sub-scripts -export DARSHAN_PATH=$1 +DARSHAN_PATH=$1 +if [[ "$DARSHAN_PATH" == *":"* ]]; then + export DARSHAN_RUNTIME_PATH=`echo $DARSHAN_PATH | cut -f1 -d:` + export DARSHAN_UTIL_PATH=`echo $DARSHAN_PATH | cut -f2 -d:` +else + export DARSHAN_RUNTIME_PATH=$DARSHAN_PATH + export DARSHAN_UTIL_PATH=$DARSHAN_PATH +fi export DARSHAN_TMP=$2 export DARSHAN_PLATFORM=$3 # number of procs that most test jobs will use @@ -16,9 +24,15 @@ export DARSHAN_DEFAULT_NPROCS=4 DARSHAN_TESTDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) export DARSHAN_TESTDIR -# check darshan path -if [ ! -x $DARSHAN_PATH/bin/darshan-parser ]; then - echo "Error: $DARSHAN_PATH doesn't contain a valid Darshan install." 1>&2 +# check darshan-runtime path +if [ ! -x $DARSHAN_RUNTIME_PATH/bin/darshan-config ]; then + echo "Error: $DARSHAN_RUNTIME_PATH doesn't contain a valid Darshan-runtime install." 1>&2 + exit 1 +fi + +# check darshan-util path +if [ ! -x $DARSHAN_UTIL_PATH/bin/darshan-parser ]; then + echo "Error: $DARSHAN_UTIL_PATH doesn't contain a valid Darshan-util install." 1>&2 exit 1 fi diff --git a/darshan-test/regression/test-cases/access-size-counter-test.sh b/darshan-test/regression/test-cases/access-size-counter-test.sh index 568c442ea..644ac4fff 100755 --- a/darshan-test/regression/test-cases/access-size-counter-test.sh +++ b/darshan-test/regression/test-cases/access-size-counter-test.sh @@ -21,7 +21,7 @@ if [ $? -ne 0 ]; then fi # parse log -$DARSHAN_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt +$DARSHAN_UTIL_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt if [ $? -ne 0 ]; then echo "Error: failed to parse ${DARSHAN_LOGFILE}" 1>&2 exit 1 diff --git a/darshan-test/regression/test-cases/cxxpi.sh b/darshan-test/regression/test-cases/cxxpi.sh index 09523aa61..1249c76fb 100755 --- a/darshan-test/regression/test-cases/cxxpi.sh +++ b/darshan-test/regression/test-cases/cxxpi.sh @@ -21,7 +21,7 @@ if [ $? -ne 0 ]; then fi # parse log -$DARSHAN_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt +$DARSHAN_UTIL_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt if [ $? -ne 0 ]; then echo "Error: failed to parse ${DARSHAN_LOGFILE}" 1>&2 exit 1 diff --git a/darshan-test/regression/test-cases/fperf-f77.sh b/darshan-test/regression/test-cases/fperf-f77.sh index a934c6e28..30497f963 100755 --- a/darshan-test/regression/test-cases/fperf-f77.sh +++ b/darshan-test/regression/test-cases/fperf-f77.sh @@ -21,7 +21,7 @@ if [ $? -ne 0 ]; then fi # parse log -$DARSHAN_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt +$DARSHAN_UTIL_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt if [ $? -ne 0 ]; then echo "Error: failed to parse ${DARSHAN_LOGFILE}" 1>&2 exit 1 diff --git a/darshan-test/regression/test-cases/fperf-f90.sh b/darshan-test/regression/test-cases/fperf-f90.sh index 09028f654..11b5bf68d 100755 --- a/darshan-test/regression/test-cases/fperf-f90.sh +++ b/darshan-test/regression/test-cases/fperf-f90.sh @@ -21,7 +21,7 @@ if [ $? -ne 0 ]; then fi # parse log -$DARSHAN_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt +$DARSHAN_UTIL_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt if [ $? -ne 0 ]; then echo "Error: failed to parse ${DARSHAN_LOGFILE}" 1>&2 exit 1 diff --git a/darshan-test/regression/test-cases/fprintf-fscanf-test.sh b/darshan-test/regression/test-cases/fprintf-fscanf-test.sh index 1c60613bf..d45c15b0d 100755 --- a/darshan-test/regression/test-cases/fprintf-fscanf-test.sh +++ b/darshan-test/regression/test-cases/fprintf-fscanf-test.sh @@ -21,7 +21,7 @@ if [ $? -ne 0 ]; then fi # parse log -$DARSHAN_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt +$DARSHAN_UTIL_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt if [ $? -ne 0 ]; then echo "Error: failed to parse ${DARSHAN_LOGFILE}" 1>&2 exit 1 diff --git a/darshan-test/regression/test-cases/mpi-io-test-dxt.sh b/darshan-test/regression/test-cases/mpi-io-test-dxt.sh index 87a67ec58..c7a0ecd0b 100755 --- a/darshan-test/regression/test-cases/mpi-io-test-dxt.sh +++ b/darshan-test/regression/test-cases/mpi-io-test-dxt.sh @@ -24,7 +24,7 @@ if [ $? -ne 0 ]; then fi # parse log -$DARSHAN_PATH/bin/darshan-dxt-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}-dxt.darshan.txt +$DARSHAN_UTIL_PATH/bin/darshan-dxt-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}-dxt.darshan.txt if [ $? -ne 0 ]; then echo "Error: failed to parse ${DARSHAN_LOGFILE}" 1>&2 exit 1 @@ -34,7 +34,7 @@ fi # also, ensure that darshan-parser doesn't complain if given a log file that # has DXT data present -$DARSHAN_PATH/bin/darshan-parser $DARSHAN_LOGFILE > /dev/null +$DARSHAN_UTIL_PATH/bin/darshan-parser $DARSHAN_LOGFILE > /dev/null if [ $? -ne 0 ]; then echo "Error: darshan-parser failed to handle ${DARSHAN_LOGFILE}" 1>&2 exit 1 diff --git a/darshan-test/regression/test-cases/mpi-io-test.sh b/darshan-test/regression/test-cases/mpi-io-test.sh index ada761ad0..7ed5344f5 100755 --- a/darshan-test/regression/test-cases/mpi-io-test.sh +++ b/darshan-test/regression/test-cases/mpi-io-test.sh @@ -21,7 +21,7 @@ if [ $? -ne 0 ]; then fi # parse log -$DARSHAN_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt +$DARSHAN_UTIL_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt if [ $? -ne 0 ]; then echo "Error: failed to parse ${DARSHAN_LOGFILE}" 1>&2 exit 1 @@ -42,7 +42,7 @@ fi # also, ensure that darshan-dxt-parser doesn't complain if given a log file that # does not have DXT data present -$DARSHAN_PATH/bin/darshan-dxt-parser $DARSHAN_LOGFILE > /dev/null +$DARSHAN_UTIL_PATH/bin/darshan-dxt-parser $DARSHAN_LOGFILE > /dev/null if [ $? -ne 0 ]; then echo "Error: darshan-dxt-parser failed to handle ${DARSHAN_LOGFILE}" 1>&2 exit 1 diff --git a/darshan-test/regression/test-cases/src/cxxpi.cxx b/darshan-test/regression/test-cases/src/cxxpi.cxx index b7c28a610..25bda9930 100644 --- a/darshan-test/regression/test-cases/src/cxxpi.cxx +++ b/darshan-test/regression/test-cases/src/cxxpi.cxx @@ -4,6 +4,12 @@ * See COPYRIGHT in top-level directory. */ +/* NOTE: This example originated from the MPICH repo: + * (https://github.com/pmodels/mpich/blob/main/examples/cxx/cxxpi.cxx). + * We have since modified the code to stop using MPI C++ bindings as + * they have been deprecated and can cause compile failures. + */ + #include "mpi.h" #include using namespace std; @@ -25,19 +31,19 @@ int main(int argc,char **argv) int namelen; char processor_name[MPI_MAX_PROCESSOR_NAME]; - MPI::Init(argc,argv); - numprocs = MPI::COMM_WORLD.Get_size(); - myid = MPI::COMM_WORLD.Get_rank(); - MPI::Get_processor_name(processor_name,namelen); + MPI_Init(&argc,&argv); + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + MPI_Get_processor_name(processor_name,&namelen); cout << "Process " << myid << " of " << numprocs << " is on " << processor_name << endl; n = 10000; /* default # of rectangles */ if (myid == 0) - startwtime = MPI::Wtime(); + startwtime = MPI_Wtime(); - MPI::COMM_WORLD.Bcast(&n, 1, MPI_INT, 0); + MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); h = 1.0 / (double) n; sum = 0.0; @@ -49,15 +55,15 @@ int main(int argc,char **argv) } mypi = h * sum; - MPI::COMM_WORLD.Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0); + MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (myid == 0) { - endwtime = MPI::Wtime(); + endwtime = MPI_Wtime(); cout << "pi is approximately " << pi << " Error is " << fabs(pi - PI25DT) << endl; cout << "wall clock time = " << endwtime-startwtime << endl; } - MPI::Finalize(); + MPI_Finalize(); return 0; } diff --git a/darshan-test/regression/test-cases/stdio-test.sh b/darshan-test/regression/test-cases/stdio-test.sh index 4aaac72ab..aa2d20e89 100755 --- a/darshan-test/regression/test-cases/stdio-test.sh +++ b/darshan-test/regression/test-cases/stdio-test.sh @@ -21,7 +21,7 @@ if [ $? -ne 0 ]; then fi # parse log -$DARSHAN_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt +$DARSHAN_UTIL_PATH/bin/darshan-parser $DARSHAN_LOGFILE > $DARSHAN_TMP/${PROG}.darshan.txt if [ $? -ne 0 ]; then echo "Error: failed to parse ${DARSHAN_LOGFILE}" 1>&2 exit 1 diff --git a/darshan-test/regression/workstation-cc-wrapper/env.sh b/darshan-test/regression/workstation-cc-wrapper/env.sh index ccd0dc065..1d3223e8b 100755 --- a/darshan-test/regression/workstation-cc-wrapper/env.sh +++ b/darshan-test/regression/workstation-cc-wrapper/env.sh @@ -24,28 +24,28 @@ # The runjob command is just mpiexec, no scheduler -$DARSHAN_PATH/bin/darshan-gen-cc.pl `which mpicc` --output $DARSHAN_TMP/mpicc +$DARSHAN_RUNTIME_PATH/bin/darshan-gen-cc.pl `which mpicc` --output $DARSHAN_TMP/mpicc if [ $? -ne 0 ]; then echo "Error: failed to generate c compiler." 1>&2 exit 1 fi export DARSHAN_CC=$DARSHAN_TMP/mpicc -$DARSHAN_PATH/bin/darshan-gen-cxx.pl `which mpicxx` --output $DARSHAN_TMP/mpicxx +$DARSHAN_RUNTIME_PATH/bin/darshan-gen-cxx.pl `which mpicxx` --output $DARSHAN_TMP/mpicxx if [ $? -ne 0 ]; then echo "Error: failed to generate c compiler." 1>&2 exit 1 fi export DARSHAN_CXX=$DARSHAN_TMP/mpicxx -$DARSHAN_PATH/bin/darshan-gen-fortran.pl `which mpif77` --output $DARSHAN_TMP/mpif77 +$DARSHAN_RUNTIME_PATH/bin/darshan-gen-fortran.pl `which mpif77` --output $DARSHAN_TMP/mpif77 if [ $? -ne 0 ]; then echo "Error: failed to generate f77 compiler." 1>&2 exit 1 fi export DARSHAN_F77=$DARSHAN_TMP/mpif77 -$DARSHAN_PATH/bin/darshan-gen-fortran.pl `which mpif90` --output $DARSHAN_TMP/mpif90 +$DARSHAN_RUNTIME_PATH/bin/darshan-gen-fortran.pl `which mpif90` --output $DARSHAN_TMP/mpif90 if [ $? -ne 0 ]; then echo "Error: failed to generate f90 compiler." 1>&2 exit 1 diff --git a/darshan-test/regression/workstation-ld-preload/env.sh b/darshan-test/regression/workstation-ld-preload/env.sh index 00808ce59..04fe4da7e 100755 --- a/darshan-test/regression/workstation-ld-preload/env.sh +++ b/darshan-test/regression/workstation-ld-preload/env.sh @@ -35,7 +35,7 @@ FULL_MPICC_PATH=`which mpicc` # we must prepend libfmpich.so to the LD_PRELOAD variable, but with a fully # resolve path. To find a path we locate mpicc and speculate that # libfmich.so can be found in ../lib. -export LD_PRELOAD=`dirname $FULL_MPICC_PATH`/../lib/libfmpich.so:$DARSHAN_PATH/lib/libdarshan.so:$LD_PRELOAD +export LD_PRELOAD=`dirname $FULL_MPICC_PATH`/../lib/libfmpich.so:$DARSHAN_RUNTIME_PATH/lib/libdarshan.so:$LD_PRELOAD export DARSHAN_RUNJOB="mpiexec -n $DARSHAN_DEFAULT_NPROCS" diff --git a/darshan-test/regression/workstation-profile-conf-dynamic/env.sh b/darshan-test/regression/workstation-profile-conf-dynamic/env.sh index 110cdec94..7ca8806e3 100755 --- a/darshan-test/regression/workstation-profile-conf-dynamic/env.sh +++ b/darshan-test/regression/workstation-profile-conf-dynamic/env.sh @@ -30,12 +30,12 @@ export DARSHAN_CXX=mpicxx export DARSHAN_F77=mpif77 export DARSHAN_F90=mpif90 -export MPICC_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-cc -export MPICXX_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-cxx -export MPIF90_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-f -export MPIF77_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-f +export MPICC_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-cc +export MPICXX_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-cxx +export MPIF90_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-f +export MPIF77_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-f # MPICH 3.1.1 and newer use MPIFORT rather than MPIF90 and MPIF77 in env var # name -export MPIFORT_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-f +export MPIFORT_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-f export DARSHAN_RUNJOB="mpiexec -n $DARSHAN_DEFAULT_NPROCS" diff --git a/darshan-test/regression/workstation-profile-conf-static/env.sh b/darshan-test/regression/workstation-profile-conf-static/env.sh index b447d30eb..99b3d6e4a 100755 --- a/darshan-test/regression/workstation-profile-conf-static/env.sh +++ b/darshan-test/regression/workstation-profile-conf-static/env.sh @@ -30,12 +30,12 @@ export DARSHAN_CXX=mpicxx export DARSHAN_F77=mpif77 export DARSHAN_F90=mpif90 -export MPICC_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-cc-static -export MPICXX_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-cxx-static -export MPIF90_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-f-static -export MPIF77_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-f-static +export MPICC_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-cc-static +export MPICXX_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-cxx-static +export MPIF90_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-f-static +export MPIF77_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-f-static # MPICH 3.1.1 and newer use MPIFORT rather than MPIF90 and MPIF77 in env var # name -export MPIFORT_PROFILE=$DARSHAN_PATH/share/mpi-profile/darshan-f-static +export MPIFORT_PROFILE=$DARSHAN_RUNTIME_PATH/share/mpi-profile/darshan-f-static export DARSHAN_RUNJOB="mpiexec -n $DARSHAN_DEFAULT_NPROCS" diff --git a/darshan-util/pydarshan/.gitignore b/darshan-util/pydarshan/.gitignore index 826150ea1..b2e538724 100644 --- a/darshan-util/pydarshan/.gitignore +++ b/darshan-util/pydarshan/.gitignore @@ -24,7 +24,6 @@ dist/ downloads/ eggs/ .eggs/ -lib/ lib64/ parts/ sdist/ diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 5fce60b5e..50ae9cae3 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -8,6 +8,30 @@ header = """/* from darshan-logutils.h */ + +struct darshan_file_category_counters { + int64_t count; /* number of files in this category */ + int64_t total_read_volume_bytes; /* total read traffic volume */ + int64_t total_write_volume_bytes;/* total write traffic volume */ + int64_t max_read_volume_bytes; /* maximum read traffic volume to 1 file */ + int64_t max_write_volume_bytes; /* maximum write traffic volume to 1 file */ + int64_t total_max_offset_bytes; /* summation of max_offsets */ + int64_t max_offset_bytes; /* largest max_offset */ + int64_t nprocs; /* how many procs accessed (-1 for "all") */ +}; + +struct darshan_derived_metrics { + int64_t total_bytes; + double unique_io_total_time_by_slowest; + double unique_rw_only_time_by_slowest; + double unique_md_only_time_by_slowest; + int unique_io_slowest_rank; + double shared_io_total_time_by_slowest; + double agg_perf_by_slowest; + double agg_time_by_slowest; + struct darshan_file_category_counters category_counters[7]; +}; + struct darshan_mnt_info { char mnt_type[3015]; @@ -23,6 +47,20 @@ int partial_flag; }; +/* opaque accumulator reference */ +struct darshan_accumulator_st; +typedef struct darshan_accumulator_st* darshan_accumulator; + +/* NOTE: darshan_module_id is technically an enum in the C API, but we'll + * just use an int for now (equivalent type) to avoid warnings from cffi + * that we have not defined explicit enum values. We don't need that + * functionality. + */ +int darshan_accumulator_create(int darshan_module_id, int64_t, darshan_accumulator*); +int darshan_accumulator_inject(darshan_accumulator, void*, int); +int darshan_accumulator_emit(darshan_accumulator, struct darshan_derived_metrics*, void* aggregation_record); +int darshan_accumulator_destroy(darshan_accumulator); + /* from darshan-log-format.h */ typedef uint64_t darshan_record_id; diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 982fb2e86..5076c4edc 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -51,6 +51,26 @@ check_version(ffi, libdutil) +_mod_names = [ + "NULL", + "POSIX", + "MPI-IO", + "H5F", + "H5D", + "PNETCDF_FILE", + "PNETCDF_VAR", + "BG/Q", + "LUSTRE", + "STDIO", + "DXT_POSIX", + "DXT_MPIIO", + "MDHIM", + "APXC", + "APMPI", + "HEATMAP", +] +def mod_name_to_idx(mod_name): + return _mod_names.index(mod_name) _structdefs = { "BG/Q": "struct darshan_bgq_record **", @@ -657,3 +677,106 @@ def _log_get_heatmap_record(log): libdutil.darshan_free(buf[0]) return rec + + +def _df_to_rec(rec_dict, mod_name, rec_index_of_interest=None): + """ + Pack the DataFrames-format PyDarshan data back into + a C buffer of records that can be consumed by darshan-util + C code. + + Parameters + ---------- + rec_dict: dict + Dictionary containing the counter and fcounter dataframes. + + mod_name: str + Name of the darshan module. + + rec_index_of_interest: int or None + If ``None``, use all records in the dataframe. Otherwise, + repack only the the record at the provided integer index. + + Returns + ------- + buf: Raw char array containing a buffer of record(s) or a single record. + """ + counters_df = rec_dict["counters"] + fcounters_df = rec_dict["fcounters"] + counters_n_cols = counters_df.shape[1] + fcounters_n_cols = fcounters_df.shape[1] + id_col = counters_df.columns.get_loc("id") + rank_col = counters_df.columns.get_loc("rank") + if rec_index_of_interest is None: + num_recs = counters_df.shape[0] + # newer pandas versions can support ... + # but we use a slice for now + rec_index_of_interest = slice(0, counters_df.shape[0]) + else: + num_recs = 1 + # id and rank columns are duplicated + # in counters and fcounters + rec_arr = np.recarray(shape=(num_recs), dtype=[("id", " 1: + rec_arr.id = counters_df.iloc[rec_index_of_interest, id_col].to_numpy().reshape((num_recs, 1)) + rec_arr.rank = counters_df.iloc[rec_index_of_interest, rank_col].to_numpy().reshape((num_recs, 1)) + else: + rec_arr.id = counters_df.iloc[rec_index_of_interest, id_col] + rec_arr.rank = counters_df.iloc[rec_index_of_interest, rank_col] + buf = rec_arr.tobytes() + return buf + + +def log_get_derived_metrics(rec_dict, mod_name, nprocs): + """ + Passes a set of records (in pandas format) to the Darshan accumulator + interface, and returns the corresponding derived metrics struct. + + Parameters: + rec_dict: Dictionary containing the counter and fcounter dataframes. + mod_name: Name of the Darshan module. + nprocs: Number of processes participating in accumulation. + + Returns: + darshan_derived_metrics struct (cdata object) + """ + mod_idx = mod_name_to_idx(mod_name) + darshan_accumulator = ffi.new("darshan_accumulator *") + r = libdutil.darshan_accumulator_create(mod_idx, nprocs, darshan_accumulator) + if r != 0: + raise RuntimeError("A nonzero exit code was received from " + "darshan_accumulator_create() at the C level. " + f"This could mean that the {mod_name} module does not " + "support derived metric calculation, or that " + "another kind of error occurred. It may be possible " + "to retrieve additional information from the stderr " + "stream.") + + num_recs = rec_dict["fcounters"].shape[0] + record_array = _df_to_rec(rec_dict, mod_name) + + r_i = libdutil.darshan_accumulator_inject(darshan_accumulator[0], record_array, num_recs) + if r_i != 0: + raise RuntimeError("A nonzero exit code was received from " + "darshan_accumulator_inject() at the C level. " + "It may be possible " + "to retrieve additional information from the stderr " + "stream.") + derived_metrics = ffi.new("struct darshan_derived_metrics *") + total_record = ffi.new(_structdefs[mod_name].replace("**", "*")) + r = libdutil.darshan_accumulator_emit(darshan_accumulator[0], + derived_metrics, + total_record) + libdutil.darshan_accumulator_destroy(darshan_accumulator[0]) + if r != 0: + raise RuntimeError("A nonzero exit code was received from " + "darshan_accumulator_emit() at the C level. " + "It may be possible " + "to retrieve additional information from the stderr " + "stream.") + return derived_metrics diff --git a/darshan-util/pydarshan/darshan/cli/base.html b/darshan-util/pydarshan/darshan/cli/base.html index 99382cb34..302858046 100644 --- a/darshan-util/pydarshan/darshan/cli/base.html +++ b/darshan-util/pydarshan/darshan/cli/base.html @@ -49,7 +49,8 @@

${fig_title}

${fig.fig_description}
% else: -
+ +
${fig.fig_description}
% endif diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py index 56050d5c0..5aed20729 100644 --- a/darshan-util/pydarshan/darshan/cli/summary.py +++ b/darshan-util/pydarshan/darshan/cli/summary.py @@ -14,6 +14,8 @@ import darshan import darshan.cli +from darshan.backend.cffi_backend import log_get_derived_metrics +from darshan.lib.accum import log_get_bytes_bandwidth, log_file_count_summary_table from darshan.experimental.plots import ( plot_dxt_heatmap, plot_io_cost, @@ -53,6 +55,11 @@ def __init__( fig_args: dict, fig_description: str = "", fig_width: int = 500, + # when there is no HTML data generated + # for the figure (i.e., no image/plot), + # we have the option of changing the caption + # text color for a warning/important standalone text + text_only_color: str = "red", ): self.section_title = section_title if not fig_title: @@ -65,7 +72,11 @@ def __init__( # temporary handling for DXT disabled cases # so special error message can be passed # in place of an encoded image + # NOTE: this code path is now also + # being used for adding the bandwidth + # text, which doesn't really have an image... self.fig_html = None + self.text_only_color = text_only_color if self.fig_func: self.generate_fig() @@ -321,6 +332,22 @@ def register_figures(self): """ self.figures = [] + if not self.report.modules: + # no data in report to summarize, print warning and that's it + no_data_message = ( + "This Darshan log file has no instrumentation records," + "there is no data to plot. Did this app do any I/O?" + ) + fig = ReportFigure( + section_title="", + fig_title="", + fig_func=None, + fig_args=None, + fig_description=no_data_message, + ) + self.figures.append(fig) + return + ######################################### ## Add the runtime and/or DXT heat map(s) ######################################### @@ -487,6 +514,48 @@ def register_figures(self): ) self.figures.append(opcount_fig) + try: + if mod in ["POSIX", "MPI-IO", "STDIO"]: + # get the module's record dataframe and then pass to + # Darshan accumulator interface to generate a cumulative + # record and derived metrics + rec_dict = self.report.records[mod].to_df() + nprocs = self.report.metadata['job']['nprocs'] + derived_metrics = log_get_derived_metrics(rec_dict, mod, nprocs) + + # this is really just some text + # so using ReportFigure feels awkward... + bandwidth_fig = ReportFigure( + section_title=sect_title, + fig_title="", + fig_func=None, + fig_args=None, + fig_description=log_get_bytes_bandwidth(derived_metrics=derived_metrics, + mod_name=mod), + text_only_color="blue") + self.figures.append(bandwidth_fig) + + file_count_summary_fig = ReportFigure( + section_title=sect_title, + fig_title=f"File Count Summary
(estimated by {mod} I/O access offsets)", + fig_func=log_file_count_summary_table, + fig_args=dict(derived_metrics=derived_metrics, + mod_name=mod), + fig_width=805, + fig_description="") + self.figures.append(file_count_summary_fig) + except (RuntimeError, KeyError): + # the module probably doesn't support derived metrics + # calculations, but the C code doesn't distinguish other + # types of errors + + # the KeyError appears to be needed for a subset of logs + # for which _structdefs lacks APMPI or APXC entries; + # for example `e3sm_io_heatmap_only.darshan` in logs + # repo + pass + + ######################### # Data Access by Category if not {"POSIX", "STDIO"}.isdisjoint(set(self.report.modules)): diff --git a/darshan-util/pydarshan/darshan/experimental/plots/heatmap_handling.py b/darshan-util/pydarshan/darshan/experimental/plots/heatmap_handling.py index 08c49c599..560c7392f 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/heatmap_handling.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/heatmap_handling.py @@ -316,8 +316,12 @@ def get_heatmap_df(agg_df: pd.DataFrame, xbins: int, nprocs: int) -> pd.DataFram bin_edge_data = np.linspace(0.0, max_time, xbins + 1) # create dummy variables for start/end time data, where dataframe columns # are the x-axis bin ranges + # pin dtype here because of pandas 2.0+ change--see: + # gh-909 and + # https://github.com/pandas-dev/pandas/pull/48022#issuecomment-1448755561 cats_start = pd.get_dummies( - pd.cut(agg_df["start_time"], bin_edge_data, precision=16) + pd.cut(agg_df["start_time"], bin_edge_data, precision=16), + dtype=np.uint8, ) cats_end = pd.get_dummies(pd.cut(agg_df["end_time"], bin_edge_data, precision=16)) # get series for the elapsed times for each dxt segment diff --git a/darshan-util/pydarshan/darshan/lib/accum.py b/darshan-util/pydarshan/darshan/lib/accum.py new file mode 100644 index 000000000..924e69ee6 --- /dev/null +++ b/darshan-util/pydarshan/darshan/lib/accum.py @@ -0,0 +1,104 @@ +import darshan +from darshan.experimental.plots import plot_common_access_table + +darshan.enable_experimental() + +import numpy as np +import pandas as pd +import humanize + + +def log_get_bytes_bandwidth(derived_metrics, mod_name: str) -> str: + """ + Summarize I/O performance for a given darshan module. + + Parameters + ---------- + derived_metrics: + structure (cdata object) describing metrics derived from a + set of records passed to the Darshan accumulator interface + mod_name: str + Name of the darshan module to summarize the I/O + performance for. + + Returns + ------- + out: str + A short string summarizing the performance of the given module + in the provided log file, including bandwidth and total data + transferred. + + Raises + ------ + RuntimeError + When a provided module name is not supported for the accumulator + interface for provision of the summary data, or for any other + error that occurs in the C/CFFI interface. + ValueError + When a provided module name does not exist in the log file. + + Examples + -------- + + >>> from darshan.log_utils import get_log_path + >>> from darshan.lib.accum import log_get_bytes_bandwidth + + >>> log_path = get_log_path("imbalanced-io.darshan") + >>> log_get_bytes_bandwidth(log_path, "POSIX") + I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s + + >>> log_get_bytes_bandwidth(log_path, "MPI-IO") + I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s + """ + # get total bytes (in MiB) and bandwidth (in MiB/s) for + # a given module -- this information was commonly reported + # in the old perl-based summary reports + total_mib = derived_metrics.total_bytes / 2 ** 20 + total_bw = derived_metrics.agg_perf_by_slowest + ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s" + return ret_str + + +def log_file_count_summary_table(derived_metrics, + mod_name: str): + # the darshan_file_category enum is not really + # exposed in CFFI/Python layer, so we effectively + # re-export the content indices we need here + # so that we can properly index the C-level data + darshan_file_category = {"total files":0, + "read-only files":1, + "write-only files":2, + "read/write files":3} + df = pd.DataFrame.from_dict(darshan_file_category, orient="index") + df.rename(columns={0:"index"}, inplace=True) + df.index.rename('type', inplace=True) + df["number of files"] = np.zeros(4, dtype=int) + df["avg. size"] = np.zeros(4, dtype=str) + df["max size"] = np.zeros(4, dtype=str) + + for cat_name, index in darshan_file_category.items(): + cat_counters = derived_metrics.category_counters[index] + num_files = int(cat_counters.count) + if num_files == 0: + max_size = "0" + avg_size = "0" + else: + max_size, binary_units = humanize.naturalsize(cat_counters.max_offset_bytes + 1, + binary=True, + format="%.2f").split() + if max_size != "0": + max_size = f"{max_size} {binary_units}" + # NOTE: internal formula based on discussion with Phil Carns + avg_size, binary_units = humanize.naturalsize((cat_counters.total_max_offset_bytes + num_files) / num_files, + binary=True, + format="%.2f").split() + if avg_size != "0": + avg_size = f"{avg_size} {binary_units}" + df.iloc[index] = [index, num_files, avg_size, max_size] + # we don't need the index column once we're done with + # the CFFI/C interaction + df.drop(columns="index", inplace=True) + ret = plot_common_access_table.DarshanReportTable(df, + col_space=200, + justify="center") + return ret diff --git a/darshan-util/pydarshan/darshan/report.py b/darshan-util/pydarshan/darshan/report.py index 17b0e0e3c..047e4d568 100644 --- a/darshan-util/pydarshan/darshan/report.py +++ b/darshan-util/pydarshan/darshan/report.py @@ -661,6 +661,9 @@ def mod_read_all_records(self, mod, dtype=None, warnings=True): cn = backend.counter_names(mod) fcn = backend.fcounter_names(mod) + if mod not in self._modules: + raise ValueError(f"mod {mod} is not available in this DarshanReport object.") + # update module metadata self._modules[mod]['num_records'] = 0 if mod not in self.counters: diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index 4c95d52f2..f309dcb99 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -9,6 +9,7 @@ from numpy.testing import assert_array_equal, assert_allclose import darshan import darshan.backend.cffi_backend as backend +from darshan.backend.cffi_backend import ffi, libdutil, _structdefs from darshan.log_utils import get_log_path def test_get_lib_version(): @@ -159,3 +160,115 @@ def test_log_get_generic_record(dtype): # make sure the returned key/column names agree assert actual_counter_names == expected_counter_names assert actual_fcounter_names == expected_fcounter_names + + +@pytest.mark.parametrize("log_name", [ + "imbalanced-io.darshan", + "e3sm_io_heatmap_only.darshan", + ]) +@pytest.mark.parametrize("module, index", [ + ("POSIX", 0), + ("POSIX", 3), + ("POSIX", 5), + ("MPI-IO", 0), + ("MPI-IO", 2), + # less records available for STDIO testing + # with these logs + ("STDIO", 0), + ]) +def test_df_to_rec(log_name, index, module): + # test for packing a dataframe into a C-style record + # this is perhaps nothing more than a "round-trip" test + log_path = get_log_path(log_name) + with darshan.DarshanReport(log_path, read_all=True) as report: + report.mod_read_all_records(module, dtype="pandas") + rec_dict = report.records[module][0] + + # id and rank are not formally included in the reconsituted + # (f)counters "buffer" so truncate a bit on comparison + expected_fcounters = rec_dict["fcounters"].iloc[index, 2:] + expected_counters = rec_dict["counters"].iloc[index, 2:].astype(np.int64) + expected_id = rec_dict["counters"].iloc[index, 0].astype(np.uint64) + expected_rank = rec_dict["counters"].iloc[index, 1] + + # retrive the "re-packed"/actual record data: + rbuf = backend._df_to_rec(rec_dict, module, index) + rec_buf = ffi.from_buffer(_structdefs[module].replace("**", "*"), rbuf) + actual_fcounters = np.frombuffer(ffi.buffer(rec_buf[0].fcounters)) + actual_counters = np.frombuffer(ffi.buffer(rec_buf[0].counters), dtype=np.int64) + actual_id = rec_buf[0].base_rec.id + actual_rank = rec_buf[0].base_rec.rank + + + assert_allclose(actual_fcounters, expected_fcounters) + assert_allclose(actual_counters, expected_counters) + assert actual_id == expected_id + assert actual_rank == expected_rank + + +@pytest.mark.parametrize("python_filter, expected_counts", [ + # whether to do an initial filtering + # of the DataFrame in Python before + # packing it back into C records + (True, [18, 12, 2, 1]), + (False, [1026, 12, 2, 1]) # see gh-867 + ]) +def test_reverse_record_array(python_filter, expected_counts): + # pack pandas DataFrame objects back into + # a contiguous buffer of several records + # and then use the darshan-util C lib accumulator + # on that record array, and compare the results + # with those discussed in gh-867 from Perl report + log_path = get_log_path("imbalanced-io.darshan") + with darshan.DarshanReport(log_path, read_all=True) as report: + nprocs = report.metadata['job']['nprocs'] + modules = report.modules + report.mod_read_all_records("POSIX", dtype="pandas") + rec_dict = report.records["POSIX"][0] + counters_df = rec_dict["counters"] + fcounters_df = rec_dict["fcounters"] + if python_filter: + # gh-867 and the perl report filtered files that were + # only stat'd rather than opened, so demo the same filtering + # here at Python layer, then feed back to C accum stuff + fcounters_df = fcounters_df[counters_df["POSIX_OPENS"] > 0] + counters_df = counters_df[counters_df["POSIX_OPENS"] > 0] + rec_dict["counters"] = counters_df + rec_dict["fcounters"] = fcounters_df + num_recs = rec_dict["fcounters"].shape[0] + record_array = backend._df_to_rec(rec_dict, "POSIX") + + # need to deal with the low-level C stuff to set up + # accumulator infrastructure to receive the repacked + # records + darshan_accumulator = ffi.new("darshan_accumulator *") + r = libdutil.darshan_accumulator_create(modules["POSIX"]['idx'], + nprocs, + darshan_accumulator) + assert r == 0 + r_i = libdutil.darshan_accumulator_inject(darshan_accumulator[0], record_array, num_recs) + assert r_i == 0 + derived_metrics = ffi.new("struct darshan_derived_metrics *") + summation_record = ffi.new(_structdefs["POSIX"].replace("**", "*")) + r = libdutil.darshan_accumulator_emit(darshan_accumulator[0], + derived_metrics, + summation_record) + assert r == 0 + r = libdutil.darshan_accumulator_destroy(darshan_accumulator[0]) + assert r == 0 + + # the indices into category_counters are pretty opaque.. we should just + # move everything to Python "eventually"... (also to avoid all the junk above after filtering..) + # 0 = total + # 1 = RO + # 2 = WO + # 3 = R/W + actual_total_files = derived_metrics.category_counters[0].count + actual_ro_files = derived_metrics.category_counters[1].count + actual_wo_files = derived_metrics.category_counters[2].count + actual_rw_files = derived_metrics.category_counters[3].count + assert_array_equal([actual_total_files, + actual_ro_files, + actual_wo_files, + actual_rw_files], + expected_counts) diff --git a/darshan-util/pydarshan/darshan/tests/test_lib_accum.py b/darshan-util/pydarshan/darshan/tests/test_lib_accum.py new file mode 100644 index 000000000..080264d16 --- /dev/null +++ b/darshan-util/pydarshan/darshan/tests/test_lib_accum.py @@ -0,0 +1,217 @@ +import darshan +from darshan.backend.cffi_backend import log_get_derived_metrics +from darshan.lib.accum import log_get_bytes_bandwidth, log_file_count_summary_table +from darshan.log_utils import get_log_path + +import pytest +import pandas as pd +from pandas.testing import assert_frame_equal + +@pytest.mark.parametrize("log_path, mod_name, expected_str", [ + # the expected bytes/bandwidth strings are pasted + # directly from the old perl summary reports; + # exceptions noted below + # in some cases we defer to darshan-parser for the expected + # values; see discussion in gh-839 + ("imbalanced-io.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"), + ("imbalanced-io.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s"), + # imbalanced-io.darshan does have LUSTRE data, + # but it doesn't support derived metrics at time + # of writing + ("imbalanced-io.darshan", + "LUSTRE", + "RuntimeError"), + # APMPI doesn't support derived metrics either + ("e3sm_io_heatmap_only.darshan", + "APMPI", + "RuntimeError"), + ("imbalanced-io.darshan", + "POSIX", + "I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s"), + ("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"), + ("runtime_and_dxt_heatmaps_diagonal_write_only.darshan", + "POSIX", + "I/O performance estimate (at the POSIX layer): transferred 0.0 MiB at 0.02 MiB/s"), + ("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s"), + ("e3sm_io_heatmap_only.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 3.26 MiB/s"), + ("e3sm_io_heatmap_only.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 73880.2 MiB at 105.69 MiB/s"), + ("partial_data_stdio.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 32.0 MiB at 2317.98 MiB/s"), + ("partial_data_stdio.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 16336.0 MiB at 2999.14 MiB/s"), + # the C derived metrics code can't distinguish + # between different kinds of errors at this time, + # but we can still intercept in some cases... + ("partial_data_stdio.darshan", + "GARBAGE", + "ValueError"), + ("skew-app.darshan", + "POSIX", + "I/O performance estimate (at the POSIX layer): transferred 41615.8 MiB at 157.49 MiB/s"), + ("skew-app.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 41615.8 MiB at 55.22 MiB/s"), +]) +def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): + # test the basic scenario of retrieving + # the total data transferred and bandwidth + # for all records in a given module; the situation + # of accumulating derived metrics with filtering + # (i.e., for a single filename) is not tested here + + log_path = get_log_path(log_path) + with darshan.DarshanReport(log_path, read_all=True) as report: + if expected_str == "ValueError": + with pytest.raises(ValueError, + match=f"mod {mod_name} is not available"): + report.mod_read_all_records(mod_name, dtype="pandas") + else: + report.mod_read_all_records(mod_name, dtype="pandas") + rec_dict = report.records[mod_name][0] + nprocs = report.metadata['job']['nprocs'] + + if expected_str == "RuntimeError": + with pytest.raises(RuntimeError, + match=f"{mod_name} module does not support derived"): + log_get_derived_metrics(rec_dict, mod_name, nprocs) + else: + derived_metrics = log_get_derived_metrics(rec_dict, mod_name, nprocs) + actual_str = log_get_bytes_bandwidth(derived_metrics=derived_metrics, + mod_name=mod_name) + assert actual_str == expected_str + + +@pytest.mark.parametrize("log_name, mod_name, expected", [ + # we try to match the "File Count Summary" + # tables from the old Perl reports, but + # expected values for file counts + # are from darshan-parser --file + # because of issues like gh-867 + + # this also means that the average size + # column on the old Perl reports cannot always + # be relied upon, since that is calculated + # using the file counts; furthermore, + # total_max_offset_bytes is not printed by + # darshan-parser --file, so the avg size column + # is not checked quite as robustly as file count + # and max size, though in cases where the Perl + # report happens to match the file count, it does + # seem to match + + # futhermore, the old Perl report doesn't print out + # the file count summary table for all modules, for + # example often only showing for POSIX, so in those + # cases we really just verify the file count and + # the other columns are regression guards against + # what we currently have (max size may be available + # in a subset of these cases as well) + ("e3sm_io_heatmap_only.darshan", + "POSIX", + # + [[3, "99.74 GiB", "297.71 GiB"], + [1, "11.18 MiB", "11.18 MiB"], + [2, "149.60 GiB", "297.71 GiB"], + [0, "0", "0"]], + ), + ("e3sm_io_heatmap_only.darshan", + "MPI-IO", + [[3, "0", "0"], + [1, "0", "0"], + [2, "0", "0"], + [0, "0", "0"]], + ), + ("e3sm_io_heatmap_only.darshan", + "STDIO", + [[1, "5.80 KiB", "5.80 KiB"], + [0, "0", "0"], + [1, "5.80 KiB", "5.80 KiB"], + [0, "0", "0"]], + ), + # the Perl report only gets a very + # small fraction of these values correct; + # rely on the parser a bit more here; perhaps + # because of partial data, etc. + ("imbalanced-io.darshan", + "POSIX", + [[1026, "73.96 MiB", "49.30 GiB"], + [12, "67.73 MiB", "549.32 MiB"], + [2, "12.00 GiB", "22.63 GiB"], + [1, "49.30 GiB", "49.30 GiB"]], + ), + ("imbalanced-io.darshan", + "MPI-IO", + [[3, "0", "0"], + [0, "0", "0"], + [2, "0", "0"], + [1, "0", "0"]], + ), + ("imbalanced-io.darshan", + "STDIO", + [[12, "93.12 KiB", "964.00 KiB"], + [1, "1.81 KiB", "1.81 KiB"], + [10, "111.56 KiB", "964.00 KiB"], + [0, "0", "0"]], + ), + ("snyder_acme.exe_id1253318_9-27-24239-1515303144625770178_2.darshan", + "POSIX", + [[100, "1.84 GiB", "100.00 GiB"], + [73, "514.56 MiB", "13.84 GiB"], + [19, "66.86 MiB", "1.23 GiB"], + [8, "18.30 GiB", "100.00 GiB"]], + ), + ("snyder_acme.exe_id1253318_9-27-24239-1515303144625770178_2.darshan", + "MPI-IO", + [[59, "0", "0"], + [50, "0", "0"], + [9, "0", "0"], + [0, "0", "0"]], + ), + ("snyder_acme.exe_id1253318_9-27-24239-1515303144625770178_2.darshan", + "STDIO", + [[16, "81.21 KiB", "524.37 KiB"], + [9, "4 Bytes", "4 Bytes"], + [7, "185.62 KiB", "524.37 KiB"], + [0, "0", "0"]], + ), +]) +def test_file_count_summary_table(log_name, + mod_name, + expected): + expected_df = pd.DataFrame(expected) + expected_df.columns = ["number of files", + "avg. size", + "max size"] + # the team decided that we should exclude + # "created" files row from the old report because + # we can't really determine it reliably + expected_df.index = ["total files", + "read-only files", + "write-only files", + "read/write files"] + expected_df.index.rename('type', inplace=True) + + log_path = get_log_path(log_name) + with darshan.DarshanReport(log_path, read_all=True) as report: + rec_dict = report.records[mod_name].to_df() + nprocs = report.metadata['job']['nprocs'] + + derived_metrics = log_get_derived_metrics(rec_dict, mod_name, nprocs) + + actual_df = log_file_count_summary_table(derived_metrics=derived_metrics, + mod_name=mod_name).df + assert_frame_equal(actual_df, expected_df) diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py index 282ab7d36..65832066f 100644 --- a/darshan-util/pydarshan/darshan/tests/test_summary.py +++ b/darshan-util/pydarshan/darshan/tests/test_summary.py @@ -81,12 +81,12 @@ def test_main_with_args(tmpdir, argv): @pytest.mark.parametrize( "argv, expected_img_count, expected_table_count", [ - (["noposix.darshan"], 3, 2), - (["noposix.darshan", "--output=test.html"], 3, 2), - (["sample-dxt-simple.darshan"], 8, 4), - (["sample-dxt-simple.darshan", "--output=test.html"], 8, 4), - (["nonmpi_dxt_anonymized.darshan"], 6, 3), - (["ior_hdf5_example.darshan"], 11, 5), + (["noposix.darshan"], 3, 3), + (["noposix.darshan", "--output=test.html"], 3, 3), + (["sample-dxt-simple.darshan"], 8, 6), + (["sample-dxt-simple.darshan", "--output=test.html"], 8, 6), + (["nonmpi_dxt_anonymized.darshan"], 6, 5), + (["ior_hdf5_example.darshan"], 11, 8), ([None], 0, 0), ] ) @@ -209,9 +209,14 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath): stdio_position > -1): assert mpiio_position < posix_position < stdio_position else: - # check that help message is present - assert "Heatmap data is not available for this job" in report_str - assert "Consider enabling the runtime heatmap module" in report_str + if not "empty_log" in log_filepath: + # check that help message is present + assert "Heatmap data is not available for this job" in report_str + assert "Consider enabling the runtime heatmap module" in report_str + else: + # check empty log warning and return + assert "This Darshan log file has no instrumentation records" in report_str + return # check if I/O cost figure is present for mod in report.modules: @@ -236,6 +241,13 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath): else: assert actual_runtime_heatmap_titles == 0 + # check for presence of bandwidth summary strings + # (more detailed per-module probes are present + # in test_derived_metrics_bytes_and_bandwidth()) + assert "I/O performance estimate" in report_str + assert "color: blue" in report_str + assert "File Count Summary" in report_str + class TestReportData: diff --git a/darshan-util/pydarshan/docs/install.rst b/darshan-util/pydarshan/docs/install.rst index 114350a4d..19c5fc0d0 100644 --- a/darshan-util/pydarshan/docs/install.rst +++ b/darshan-util/pydarshan/docs/install.rst @@ -41,7 +41,7 @@ You can either clone the public repository: .. code-block:: console - $ git clone https://xgitlab.cels.anl.gov/darshan/darshan.git + $ git clone https://github.com/darshan-hpc/darshan.git $ cd darshan/darshan-util/pydarshan @@ -50,4 +50,4 @@ You can either clone the public repository: $ python setup.py install -.. _Github repo: https://xgitlab.cels.anl.gov/darshan/darshan +.. _Github repo: https://github.com/darshan-hpc/darshan.git diff --git a/darshan-util/pydarshan/pyproject.toml b/darshan-util/pydarshan/pyproject.toml index d7599caaf..4a1a7f1c4 100644 --- a/darshan-util/pydarshan/pyproject.toml +++ b/darshan-util/pydarshan/pyproject.toml @@ -22,7 +22,8 @@ test-requires = [ "pytest", "lxml", "matplotlib", - "importlib_resources;python_version<'3.9'" + "importlib_resources;python_version<'3.9'", + "humanize" ] before-test = "pip install -U git+https://github.com/darshan-hpc/darshan-logs.git@main" test-command = "pytest {package}" diff --git a/darshan-util/pydarshan/setup.py b/darshan-util/pydarshan/setup.py index b6cfc6596..7dc513e1d 100644 --- a/darshan-util/pydarshan/setup.py +++ b/darshan-util/pydarshan/setup.py @@ -13,7 +13,7 @@ readme = readme_file.read() -requirements = ["cffi", "numpy", "pandas", "matplotlib", "seaborn", "mako"] +requirements = ["cffi", "numpy", "pandas", "matplotlib", "seaborn", "mako", "humanize"] setup_requirements = [ "pytest-runner",