Skip to content

Commit

Permalink
merging main and cleaning up ldms related code
Browse files Browse the repository at this point in the history
  • Loading branch information
Snell1224 committed Mar 8, 2023
1 parent 9bb1fd4 commit bcba732
Show file tree
Hide file tree
Showing 49 changed files with 921 additions and 138 deletions.
2 changes: 1 addition & 1 deletion .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ macos_M1_native_apple_silicon_py310_task:
/opt/homebrew/opt/[email protected]/bin/python3 -m venv ~/py_310
source ~/py_310/bin/activate
python -m pip install --upgrade pip
python -m pip install --upgrade pytest lxml matplotlib packaging
python -m pip install --upgrade pytest lxml matplotlib packaging humanize
brew install automake libtool binutils
mkdir -p /tmp/darshan_install
export DARSHAN_INSTALL_PATH=/tmp/darshan_install
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade pytest mypy pyflakes asv pytest-cov codecov lxml matplotlib packaging
python -m pip install --upgrade pytest pyflakes asv pytest-cov codecov lxml matplotlib packaging humanize "mypy<1.0.0"
- if: ${{matrix.platform == 'macos-latest'}}
name: Install MacOS deps
run: |
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/runtime_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ jobs:
sudo apt-get update -y
sudo apt-get install -y hdf5-tools libhdf5-openmpi-dev openmpi-bin
python -m pip install --upgrade pip
python -m pip install --upgrade pytest mpi4py
python -m pip install --upgrade pytest mpi4py cython numpy wheel pkgconfig "setuptools<62.0.0"
# we need to build h5py with the system HDF5 lib backend
export HDF5_MPI="ON"
CC=mpicc python -m pip install --no-binary=h5py h5py
# Install h5py https://github.com/h5py/h5py/issues/2222
CC=mpicc python -m pip install --no-cache-dir --no-binary=h5py h5py --no-build-isolation
- name: Install darshan-runtime
run: |
export C_INCLUDE_PATH=$C_INCLUDE_PATH:/usr/include/hdf5/openmpi/
Expand Down
1 change: 1 addition & 0 deletions LICENSE
9 changes: 3 additions & 6 deletions darshan-runtime/lib/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ C_SRCS = darshan-core-init-finalize.c \
darshan-core.c \
darshan-common.c \
darshan-config.c \
darshan-ldms.c \
lookup3.c \
lookup8.c

Expand All @@ -36,10 +37,6 @@ if BUILD_DXT_MODULE
C_SRCS += darshan-dxt.c
endif

if BUILD_LDMS_MODULE
C_SRCS += darshan-ldms.c
endif

if BUILD_MPIIO_MODULE
C_SRCS += darshan-mpiio.c
endif
Expand Down Expand Up @@ -112,10 +109,10 @@ libdarshan_la_CPPFLAGS = $(AM_CPPFLAGS) -D_LARGEFILE64_SOURCE -DDARSHAN_PRELOAD
libdarshan_a_SOURCES = $(C_SRCS)
libdarshan_a_CPPFLAGS = $(AM_CPPFLAGS) -D_LARGEFILE64_SOURCE -DDARSHAN_WRAP_MMAP

if HAVE_LDMS
if BUILD_LDMS_MODULE
libdarshan_la_LIBADD += -lldmsd_stream -lldms -lovis_json -lcoll
libdarshan_la_CPPFLAGS += -I$(LDMS_HOME)/include
libdarshan_la_LDFLAGS = -L$(LDMS_HOME)/lib -Wl,-rpath=$(LDMS_HOME)/lib
libdarshan_la_LDFLAGS = -L$(LDMS_HOME)/lib64 -Wl,-rpath=$(LDMS_HOME)/lib64
endif

H_SRCS = darshan-common.h \
Expand Down
32 changes: 2 additions & 30 deletions darshan-runtime/lib/darshan-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,6 @@ static int mnt_data_count = 0;
extern void bgq_runtime_initialize();
#endif

#ifdef HAVE_LDMS
extern struct darshanConnector dC;
extern void darshan_ldms_connector_initialize();
#endif

#ifdef DARSHAN_USE_APXC
extern void apxc_runtime_initialize();
#endif
Expand Down Expand Up @@ -361,7 +356,8 @@ void darshan_core_initialize(int argc, char **argv)


#ifdef HAVE_LDMS
darshan_ldms_connector_initialize();
/* pass init_core to darshan-ldms connector initialization*/
darshan_ldms_connector_initialize(init_core);
#endif

/* if darshan was successfully initialized, set the global pointer
Expand Down Expand Up @@ -407,30 +403,6 @@ void darshan_core_initialize(int argc, char **argv)
darshan_core_fprintf(stderr, "darshan:init\t%d\t%f\n", nprocs, init_time);
}


#ifdef HAVE_LDMS
/*TODO: Create environment variable to re-connect to ldms every x seconds
if(getenv("DARSHAN_LDMS_REINIT"))
dC.env_ldms_reinit = getenv("DARSHAN_LDMS_REINIT");
else
dC.env_ldms_reinit = "1";
*/
/* Set meta data for LDMS message sending */
(void)gethostname(dC.hname, sizeof(dC.hname));
dC.jobid = (int64_t)jobid;
dC.uid = getuid();
dC.exename = argv[0];

/* Pull executable name from darshans variable if no arguemments are given. */
if (argc==0)
{
char buff[DARSHAN_EXE_LEN];
int len = readlink("/proc/self/exe", buff, sizeof(buff)-1);
buff[len] = '\0';
dC.exename = buff;
}
#endif

return;
}

Expand Down
4 changes: 4 additions & 0 deletions darshan-runtime/lib/darshan-hdf5.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "darshan-dynamic.h"

#include <hdf5.h>
#include "darshan-ldms.h"

/* H5F prototypes */
DARSHAN_FORWARD_DECL(H5Fcreate, hid_t, (const char *filename, unsigned flags, hid_t create_plist, hid_t access_plist));
Expand Down Expand Up @@ -374,6 +375,9 @@ herr_t DARSHAN_DECL(H5Fflush)(hid_t object_id, H5F_scope_t scope)
tm1, tm2, rec_ref->last_meta_end);
}
H5F_POST_RECORD();

MAP_OR_FAIL(H5Fclose);
__real_H5Fclose(file_id);
}
}

Expand Down
72 changes: 45 additions & 27 deletions darshan-runtime/lib/darshan-ldms.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,19 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "darshan-ldms.h"
#include "darshan.h"

/* Check for LDMS libraries if Darshan is built --with-ldms */
#ifdef HAVE_LDMS
#include <ldms/ldms.h>
#include <ldms/ldmsd_stream.h>
#include <ovis_util/util.h>
#include "ovis_json/ovis_json.h"
#include "darshan-ldms.h"

struct darshanConnector dC = {
.ldms_darsh = NULL,
.exename = NULL,
.ldms_lib = 0,
};

Expand Down Expand Up @@ -57,7 +59,6 @@ static void event_cb(ldms_t x, ldms_xprt_event_t e, void *cb_arg)
}
}

#define SLURM_NOTIFY_TIMEOUT 5
ldms_t setup_connection(const char *xprt, const char *host,
const char *port, const char *auth)
{
Expand All @@ -66,10 +67,6 @@ ldms_t setup_connection(const char *xprt, const char *host,
int rc;
struct timespec ts;

if (!host) {
if (0 == gethostname(hostname, sizeof(hostname)))
host = hostname;
}
if (!timeout) {
ts.tv_sec = time(NULL) + 5;
ts.tv_nsec = 0;
Expand Down Expand Up @@ -103,17 +100,36 @@ ldms_t setup_connection(const char *xprt, const char *host,
return ldms_g;
}

void darshan_ldms_connector_initialize()
void darshan_ldms_connector_initialize(struct darshan_core_runtime *init_core)
{
/*TODO: Create environment variable to re-connect to ldms every x seconds
if(getenv("DARSHAN_LDMS_REINIT"))
dC.env_ldms_reinit = getenv("DARSHAN_LDMS_REINIT");
else
dC.env_ldms_reinit = "1";*/

/* Set meta data for LDMS message sending */
(void)gethostname(dC.hname, sizeof(dC.hname));
dC.jobid = init_core->log_job_p->jobid;
dC.uid = init_core->log_job_p->uid;

/* grab exe path from darshan_core_runtime */
dC.exename = strtok(init_core->log_exemnt_p, " ");

/* Pull executable name from proc if no arguemments are given. */
if (dC.exename == NULL)
{
char buff[DARSHAN_EXE_LEN];
int len = readlink("/proc/self/exe", buff, sizeof(buff)-1);
buff[len] = '\0';
dC.exename = buff;
}

if (!getenv("DARSHAN_LDMS_STREAM"))
dC.env_ldms_stream = "darshanConnector";
dC.env_ldms_stream = "darshanConnector";

/* Set flags for various LDMS environment variables */
if (getenv("DXT_ENABLE_LDMS"))
dC.dxt_enable_ldms = 0;
else
dC.dxt_enable_ldms =1;

/* Set flags for various LDMS environment variables */
if (getenv("POSIX_ENABLE_LDMS"))
dC.posix_enable_ldms = 0;
else
Expand All @@ -124,21 +140,21 @@ void darshan_ldms_connector_initialize()
else
dC.mpiio_enable_ldms = 1;

/* Disable STDIO if verbose is enabled to avoid a recursive
function for darshan_ldms_connector_send() */
if (getenv("STDIO_ENABLE_LDMS"))
dC.stdio_enable_ldms = 0;
if (!getenv("DARSHAN_LDMS_VERBOSE"))
dC.stdio_enable_ldms = 0;
else
dC.stdio_enable_ldms = 1;
else
dC.stdio_enable_ldms = 1;

if (getenv("HDF5_ENABLE_LDMS"))
dC.hdf5_enable_ldms = 0;
else
dC.hdf5_enable_ldms = 1;

if (getenv("MDHIM_ENABLE_LDMS"))
dC.mdhim_enable_ldms = 0;
else
dC.mdhim_enable_ldms = 1;

const char* env_ldms_xprt = getenv("DARSHAN_LDMS_XPRT");
const char* env_ldms_host = getenv("DARSHAN_LDMS_HOST");
const char* env_ldms_port = getenv("DARSHAN_LDMS_PORT");
Expand Down Expand Up @@ -216,22 +232,24 @@ void darshan_ldms_connector_send(int64_t record_count, char *rwo, int64_t offset
}

sprintf(jb11,"{ \"uid\":%ld, \"exe\":\"%s\",\"job_id\":%ld,\"rank\":%ld,\"ProducerName\":\"%s\",\"file\":\"%s\",\"record_id\":%"PRIu64",\"module\":\"%s\",\"type\":\"%s\",\"max_byte\":%ld,\"switches\":%ld,\"flushes\":%ld,\"cnt\":%ld,\"op\":\"%s\",\"seg\":[{\"data_set\":\"%s\",\"pt_sel\":%ld,\"irreg_hslab\":%ld,\"reg_hslab\":%ld,\"ndims\":%ld,\"npoints\":%ld,\"off\":%ld,\"len\":%ld,\"start\":%0.6f,\"dur\":%0.6f,\"total\":%.6f,\"timestamp\":%lu.%.6lu}]}", dC.uid, dC.exename, dC.jobid, dC.rank, dC.hname, dC.filename, dC.record_id, mod_name, data_type, max_byte, rw_switch, flushes, record_count, rwo, dC.data_set, dC.hdf5_data[0], dC.hdf5_data[1], dC.hdf5_data[2], dC.hdf5_data[3], dC.hdf5_data[4], offset, length, start_time, end_time-start_time, total_time, tspec_end.tv_sec, micro_s);
//printf("this is in jb11 %s \n", jb11);

if (getenv("DARSHAN_LDMS_VERBOSE"))
printf("JSON Message: %s\n", jb11);

rc = ldmsd_stream_publish(dC.ldms_darsh, dC.env_ldms_stream, LDMSD_STREAM_JSON, jb11, strlen(jb11) + 1);
if (rc)
printf("Error %d publishing data.\n", rc);
if (rc)
printf("Error %d publishing data.\n", rc);

out_1:
out_1:
return;
}
#else

struct darshanConnector dC = {
.ldms_lib = 1
};
.ldms_lib = 1
};

void darshan_ldms_connector_initialize()
void darshan_ldms_connector_initialize(struct darshan_core_runtime *init_core)
{
return;
}
Expand Down
9 changes: 4 additions & 5 deletions darshan-runtime/lib/darshan-ldms.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#ifndef __DARSHAN_LDMS_H
#define __DARSHAN_LDMS_H
#include "darshan.h"

#ifdef HAVE_LDMS
#include <ldms/ldms.h>
Expand All @@ -16,12 +17,10 @@
typedef struct darshanConnector {
int to;
int ldms_lib;
int dxt_enable_ldms;
int posix_enable_ldms;
int mpiio_enable_ldms;
int stdio_enable_ldms;
int hdf5_enable_ldms;
int mdhim_enable_ldms;
int64_t rank;
uint64_t record_id;
char *exename;
Expand All @@ -43,18 +42,18 @@ typedef struct darshanConnector {
sem_t conn_sem;
sem_t recv_sem;
} darshanConnector;

#else

typedef struct darshanConnector {
int to;
int ldms_lib;
int dxt_enable_ldms;
int posix_enable_ldms;
int mpiio_enable_ldms;
int stdio_enable_ldms;
int hdf5_enable_ldms;
int mdhim_enable_ldms;
} darshanConnector;

#endif

/* darshan_ldms_connector_initialize(), darshan_ldms_connector_send()
Expand All @@ -71,7 +70,7 @@ typedef struct darshanConnector {
* is detected or a new run is executed.
*
*/
void darshan_ldms_connector_initialize();
void darshan_ldms_connector_initialize(struct darshan_core_runtime *);

void darshan_ldms_connector_send(int64_t record_count, char *rwo, int64_t offset, int64_t length, int64_t max_byte, int64_t rw_switch, int64_t flushes, double start_time, double end_time, struct timespec tspec_start, struct timespec tspec_end, double total_time, char *mod_name, char *data_type);

Expand Down
8 changes: 4 additions & 4 deletions darshan-runtime/lib/darshan-mpiio.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ static int my_rank = -1;
if(newpath != __path) free(newpath);\
/* LDMS to publish realtime read tracing information to daemon*/ \
if(!dC.ldms_lib)\
if(!dC.dxt_enable_ldms || !dC.mpiio_enable_ldms){\
if(!dC.mpiio_enable_ldms){\
darshan_ldms_set_meta(__path, "N/A", rec_ref->file_rec->base_rec.id, rec_ref->file_rec->base_rec.rank);\
darshan_ldms_connector_send(rec_ref->file_rec->counters[MPIIO_COLL_OPENS] + rec_ref->file_rec->counters[MPIIO_INDEP_OPENS], "open", -1, -1, -1, -1, -1, __tm1, __tm2, __ts1, __ts2, rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], "MPIIO", "MET");\
}\
Expand Down Expand Up @@ -308,7 +308,7 @@ static int get_byte_offset = 0;
__tm1, __tm2, rec_ref->last_read_end); \
/* LDMS to publish realtime read tracing information to daemon*/ \
if(!dC.ldms_lib)\
if(!dC.dxt_enable_ldms || !dC.mpiio_enable_ldms)\
if(!dC.mpiio_enable_ldms)\
darshan_ldms_connector_send(rec_ref->file_rec->counters[__counter], "read", displacement, size, -1, rec_ref->file_rec->counters[MPIIO_RW_SWITCHES], -1, __tm1, __tm2, __ts1, __ts2, rec_ref->file_rec->fcounters[MPIIO_F_READ_TIME], "MPIIO", "MOD");\
} while(0)

Expand Down Expand Up @@ -356,7 +356,7 @@ static int get_byte_offset = 0;
__tm1, __tm2, rec_ref->last_write_end); \
/* LDMS to publish realtime read tracing information to daemon*/ \
if(!dC.ldms_lib)\
if(!dC.dxt_enable_ldms || !dC.mpiio_enable_ldms)\
if(!dC.mpiio_enable_ldms)\
darshan_ldms_connector_send(rec_ref->file_rec->counters[__counter], "write", displacement, size, -1, rec_ref->file_rec->counters[MPIIO_RW_SWITCHES], -1, __tm1, __tm2, __ts1, __ts2, rec_ref->file_rec->fcounters[MPIIO_F_WRITE_TIME], "MPIIO", "MOD");\
} while(0)

Expand Down Expand Up @@ -1234,7 +1234,7 @@ int DARSHAN_DECL(MPI_File_close)(MPI_File *fh)
#ifdef HAVE_LDMS
/* publish close information for mpiio */
extern struct darshanConnector dC;
if(!dC.dxt_enable_ldms || !dC.mpiio_enable_ldms)
if(!dC.mpiio_enable_ldms)
darshan_ldms_connector_send(-1, "close", -1, -1, -1, -1, -1, tm1, tm2, ts1, ts2, rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], "MPIIO", "MOD");
#endif

Expand Down
Loading

0 comments on commit bcba732

Please sign in to comment.