From 2f2e9c67fbe98b52ee39ee11583950b2bd41a0d5 Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Tue, 4 Jun 2024 19:51:14 +0000 Subject: [PATCH] more Lustre PFL updates --- darshan-runtime/lib/darshan-core.c | 70 +++++++--------------- darshan-runtime/lib/darshan-lustre.c | 87 ++++++++++++++-------------- darshan-runtime/lib/darshan-posix.c | 17 +++++- darshan-runtime/lib/darshan-stdio.c | 20 ++++++- darshan-runtime/lib/darshan.h | 8 +-- 5 files changed, 101 insertions(+), 101 deletions(-) diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index d3b15d3e8..3aa1c2057 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -98,7 +98,7 @@ void (*mod_static_init_fns[])(void) = /* XXX need to use extern to get Lustre module's instrumentation function * since modules have no way of providing this to darshan-core */ -extern void darshan_instrument_lustre_file(const char *filepath, int fd); +extern void darshan_instrument_lustre_file(darshan_record_id rec_id, int fd); #endif /* prototypes for internal helper functions */ @@ -958,36 +958,6 @@ static void add_entry(char* buf, int* space_left, struct mntent* entry) else mnt_data_array[mnt_data_count].fs_info.block_size = 4096; -#ifdef DARSHAN_LUSTRE - /* attempt to retrieve OST and MDS counts from Lustre */ - mnt_data_array[mnt_data_count].fs_info.ost_count = -1; - mnt_data_array[mnt_data_count].fs_info.mdt_count = -1; - if ( statfsbuf.f_type == LL_SUPER_MAGIC ) - { - int n_ost, n_mdt; - int ret_ost, ret_mdt; - DIR *mount_dir; - - mount_dir = opendir( entry->mnt_dir ); - if ( mount_dir ) - { - /* n_ost and n_mdt are used for both input and output to ioctl */ - n_ost = 0; - n_mdt = 1; - - ret_ost = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_ost ); - ret_mdt = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_mdt ); - - if ( !(ret_ost < 0 || ret_mdt < 0) ) - { - mnt_data_array[mnt_data_count].fs_info.ost_count = n_ost; - mnt_data_array[mnt_data_count].fs_info.mdt_count = n_mdt; - } - closedir( mount_dir ); - } - } -#endif - /* store mount information with the job-level metadata in darshan log */ ret = snprintf(tmp_mnt, 256, "\n%s\t%s", entry->mnt_type, entry->mnt_dir); @@ -2660,27 +2630,27 @@ void *darshan_core_register_record( __DARSHAN_CORE_UNLOCK(); return(NULL); } + } - /* check to see if we've already stored the id->name mapping for - * this record, and add a new name record if not - */ - HASH_FIND(hlink, __darshan_core->name_hash, &rec_id, - sizeof(darshan_record_id), ref); - if(!ref) - { - ret = darshan_add_name_record_ref(__darshan_core, rec_id, name, mod_id); - if(ret == 0) - { - DARSHAN_MOD_FLAG_SET(__darshan_core->log_hdr_p->partial_flag, mod_id); - __DARSHAN_CORE_UNLOCK(); - return(NULL); - } - } - else + /* check to see if we've already stored the id->name mapping for + * this record, and add a new name record if not + */ + HASH_FIND(hlink, __darshan_core->name_hash, &rec_id, + sizeof(darshan_record_id), ref); + if(!ref) + { + ret = darshan_add_name_record_ref(__darshan_core, rec_id, name, mod_id); + if(ret == 0) { - DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id); + DARSHAN_MOD_FLAG_SET(__darshan_core->log_hdr_p->partial_flag, mod_id); + __DARSHAN_CORE_UNLOCK(); + return(NULL); } } + else + { + DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id); + } __darshan_core->mod_array[mod_id]->rec_mem_avail -= rec_size; if((mod_id != DXT_POSIX_MOD) && (mod_id != DXT_MPIIO_MOD)) @@ -2727,7 +2697,7 @@ char *darshan_core_lookup_record_name(darshan_record_id rec_id) return(name); } -void darshan_instrument_fs_data(int fs_type, const char *path, int fd) +void darshan_instrument_fs_data(int fs_type, darshan_record_id rec_id, int fd) { #ifdef DARSHAN_LUSTRE /* allow Lustre to generate a record if we configured with Lustre support */ @@ -2741,7 +2711,7 @@ void darshan_instrument_fs_data(int fs_type, const char *path, int fd) */ if(1 || fs_type == LL_SUPER_MAGIC) { - darshan_instrument_lustre_file(path, fd); + darshan_instrument_lustre_file(rec_id, fd); return; } #endif diff --git a/darshan-runtime/lib/darshan-lustre.c b/darshan-runtime/lib/darshan-lustre.c index 963d31aa1..a12c444cb 100644 --- a/darshan-runtime/lib/darshan-lustre.c +++ b/darshan-runtime/lib/darshan-lustre.c @@ -42,6 +42,8 @@ static void lustre_cleanup( struct lustre_record_ref { struct darshan_lustre_record *record; + int max_comps; + int max_osts; size_t record_size; }; @@ -108,7 +110,7 @@ static void darshan_get_lustre_layout_size(struct llapi_layout *lustre_layout, } static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_layout, - struct lustre_record_ref *rec_ref, int num_comps, int num_osts) + struct lustre_record_ref *rec_ref) { bool is_composite; int ret; @@ -122,7 +124,7 @@ static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_lay int comps_idx = 0, osts_idx = 0; struct darshan_lustre_component *comps = (struct darshan_lustre_component *)&(rec_ref->record->comps); - OST_ID *osts = (OST_ID *)(comps + num_comps); + OST_ID *osts = (OST_ID *)(comps + rec_ref->max_comps); rec_ref->record_size = 0; rec_ref->record->num_comps = 0; @@ -148,13 +150,13 @@ static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_lay ret += llapi_layout_mirror_id_get(lustre_layout, &mirror_id); /* record info on this component iff: * - the layout isn't composite _OR_ the composite layout component is - * initialized (actively used for this file) + * initialized (actively used for this file) * - the above functions querying stripe params returned no error * - there is enough room in the record buf to store the OST list */ if ((!is_composite || (flags & LCME_FL_INIT)) && - (ret == 0) && - (osts_idx + stripe_count <= num_osts)) + (ret == 0) && + (osts_idx + stripe_count <= rec_ref->max_osts)) { comps[comps_idx].counters[LUSTRE_COMP_STRIPE_SIZE] = (int64_t)stripe_size; comps[comps_idx].counters[LUSTRE_COMP_STRIPE_WIDTH] = (int64_t)stripe_count; @@ -185,9 +187,9 @@ static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_lay ret = llapi_layout_comp_use(lustre_layout, LLAPI_LAYOUT_COMP_USE_NEXT); } else break; - } while(ret == 0 && rec_ref->record->num_comps < num_comps); + } while(ret == 0 && rec_ref->record->num_comps < rec_ref->max_comps); - if (rec_ref->record->num_comps < num_comps) + if (rec_ref->record->num_comps < rec_ref->max_comps) memmove(comps + rec_ref->record->num_comps, osts, osts_idx * sizeof(*osts)); /* update record size to reflect final number of components/osts */ @@ -196,9 +198,8 @@ static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_lay return; } -void darshan_instrument_lustre_file(const char* filepath, int fd) +void darshan_instrument_lustre_file(darshan_record_id rec_id, int fd) { - darshan_record_id rec_id; void *lustre_xattr_val; size_t lustre_xattr_size = XATTR_SIZE_MAX; struct llapi_layout *lustre_layout; @@ -206,7 +207,6 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) size_t rec_size; struct darshan_lustre_record *rec; struct lustre_record_ref *rec_ref; - struct darshan_fs_info fs_info; int ret; LUSTRE_LOCK(); @@ -222,41 +222,42 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) return; } + if ((lustre_xattr_val = calloc(1, lustre_xattr_size)) == NULL) + { + LUSTRE_UNLOCK(); + return; + } + + /* -1 means fgetxattr failed, likely because file isn't on Lustre, but maybe because + * the Lustre version doesn't support this method of obtaining striping info + */ + if ((lustre_xattr_size = fgetxattr(fd, "lustre.lov", lustre_xattr_val, lustre_xattr_size)) == -1) + { + free(lustre_xattr_val); + LUSTRE_UNLOCK(); + return; + } + + /* get corresponding Lustre file layout, then extract stripe params */ + if ((lustre_layout = llapi_layout_get_by_xattr(lustre_xattr_val, lustre_xattr_size, 0)) == NULL) + { + free(lustre_xattr_val); + LUSTRE_UNLOCK(); + return; + } + free(lustre_xattr_val); + /* search the hash table for this file record, and initialize if not found */ - rec_id = darshan_core_gen_record_id(filepath); rec_ref = darshan_lookup_record_ref(lustre_runtime->record_id_hash, &rec_id, sizeof(darshan_record_id)); if(!rec_ref) { - if ((lustre_xattr_val = calloc(1, lustre_xattr_size)) == NULL) - { - LUSTRE_UNLOCK(); - return; - } - - /* -1 means fgetxattr failed, likely because file isn't on Lustre, but maybe because - * the Lustre version doesn't support this method of obtaining striping info - */ - if ((lustre_xattr_size = fgetxattr(fd, "lustre.lov", lustre_xattr_val, lustre_xattr_size)) == -1) - { - free(lustre_xattr_val); - LUSTRE_UNLOCK(); - return; - } - - /* get corresponding Lustre file layout, then extract stripe params */ - if ((lustre_layout = llapi_layout_get_by_xattr(lustre_xattr_val, lustre_xattr_size, 0)) == NULL) - { - free(lustre_xattr_val); - LUSTRE_UNLOCK(); - return; - } - free(lustre_xattr_val); /* iterate file layout components to determine total record size */ darshan_get_lustre_layout_size(lustre_layout, &num_comps, &num_osts); - if(num_comps == 0 || num_osts == 0) + if(num_comps == 0) { + llapi_layout_free(lustre_layout); LUSTRE_UNLOCK(); return; } @@ -280,13 +281,12 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) } /* register a Lustre file record with Darshan */ - fs_info.fs_type = -1; rec = darshan_core_register_record( rec_id, - filepath, + NULL, /* either POSIX or STDIO already registered the name */ DARSHAN_LUSTRE_MOD, rec_size, - &fs_info); + NULL); if(rec == NULL) { /* if NULL, darshan has no more memory for instrumenting */ @@ -301,11 +301,14 @@ void darshan_instrument_lustre_file(const char* filepath, int fd) rec->base_rec.id = rec_id; rec->base_rec.rank = my_rank; rec_ref->record = rec; - /* fill in record buffer with component info and OST list */ - darshan_get_lustre_layout_components(lustre_layout, rec_ref, num_comps, num_osts); - llapi_layout_free(lustre_layout); + rec_ref->max_comps = num_comps; + rec_ref->max_osts = num_osts; } + /* fill in record buffer with component info and OST list */ + darshan_get_lustre_layout_components(lustre_layout, rec_ref); + llapi_layout_free(lustre_layout); + LUSTRE_UNLOCK(); return; } diff --git a/darshan-runtime/lib/darshan-posix.c b/darshan-runtime/lib/darshan-posix.c index ce691dac4..549a524e1 100644 --- a/darshan-runtime/lib/darshan-posix.c +++ b/darshan-runtime/lib/darshan-posix.c @@ -248,7 +248,6 @@ static int darshan_mem_alignment = 1; break; \ } \ _POSIX_RECORD_OPEN(__ret, __rec_ref, __mode, __tm1, __tm2, 1, -1); \ - darshan_instrument_fs_data(__rec_ref->fs_type, __newpath, __ret); \ if(__newpath != __path) free(__newpath); \ /* LDMS to publish realtime open tracing information to daemon*/ \ if(dC.ldms_lib)\ @@ -1618,6 +1617,22 @@ int DARSHAN_DECL(close)(int fd) MAP_OR_FAIL(close); + if(!__darshan_disabled) + { + POSIX_LOCK(); + if(posix_runtime && !posix_runtime->frozen) + { + rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, + &fd, sizeof(int)); + if(rec_ref) + { + darshan_instrument_fs_data(rec_ref->fs_type, + rec_ref->file_rec->base_rec.id, fd); + } + } + POSIX_UNLOCK(); + } + tm1 = POSIX_WTIME(); ret = __real_close(fd); tm2 = POSIX_WTIME(); diff --git a/darshan-runtime/lib/darshan-stdio.c b/darshan-runtime/lib/darshan-stdio.c index 9914391fd..916198e14 100644 --- a/darshan-runtime/lib/darshan-stdio.c +++ b/darshan-runtime/lib/darshan-stdio.c @@ -219,7 +219,6 @@ extern int __real_fileno(FILE *stream); darshan_record_id __rec_id; \ struct stdio_file_record_ref *__rec_ref; \ char *__newpath; \ - int __fd; \ MAP_OR_FAIL(fileno); \ (void)__darshan_disabled; \ if(!__ret || !__path) break; \ @@ -233,8 +232,6 @@ extern int __real_fileno(FILE *stream); break; \ } \ _STDIO_RECORD_OPEN(__ret, __rec_ref, __tm1, __tm2, 1, -1); \ - __fd = __real_fileno(__ret); \ - darshan_instrument_fs_data(__rec_ref->fs_type, __newpath, __fd); \ if(__newpath != (char*)__path) free(__newpath); \ /* LDMS to publish realtime open tracing information to daemon*/ \ if(dC.ldms_lib)\ @@ -445,6 +442,23 @@ int DARSHAN_DECL(fclose)(FILE *fp) MAP_OR_FAIL(fclose); + if(!__darshan_disabled) + { + STDIO_LOCK(); + if(stdio_runtime && !stdio_runtime->frozen) + { + rec_ref = darshan_lookup_record_ref(stdio_runtime->stream_hash, + &fp, sizeof(fp)); + if(rec_ref) + { + int fd = __real_fileno(fp); + darshan_instrument_fs_data(rec_ref->fs_type, + rec_ref->file_rec->base_rec.id, fd); + } + } + STDIO_UNLOCK(); + } + tm1 = STDIO_WTIME(); ret = __real_fclose(fp); tm2 = STDIO_WTIME(); diff --git a/darshan-runtime/lib/darshan.h b/darshan-runtime/lib/darshan.h index c1ac2215b..7e3692c80 100644 --- a/darshan-runtime/lib/darshan.h +++ b/darshan-runtime/lib/darshan.h @@ -94,8 +94,6 @@ struct darshan_fs_info { int fs_type; int block_size; - int ost_count; - int mdt_count; }; /* FS mount information */ @@ -316,8 +314,8 @@ void darshan_core_unregister_module( /* darshan_instrument_fs_data() * - * Allow file system-specific modules to instrument data for the file - * stored at 'path'. 'fs_type' is checked to determine the underlying + * Allow file system-specific modules to instrument data for the file record + * corresponding to 'rec_id'. 'fs_type' is checked to determine the underlying * filesystem and calls into the corresponding file system instrumentation * module, if defined -- currently we only have a Lustre module. 'fd' is * the file descriptor corresponding to the file, which may be needed by @@ -325,7 +323,7 @@ void darshan_core_unregister_module( */ void darshan_instrument_fs_data( int fs_type, - const char *path, + darshan_record_id rec_id, int fd); /* darshan_core_gen_record_id()