Skip to content

Commit

Permalink
more Lustre PFL updates
Browse files Browse the repository at this point in the history
  • Loading branch information
shanedsnyder committed Jun 4, 2024
1 parent 31bb814 commit 2f2e9c6
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 101 deletions.
70 changes: 20 additions & 50 deletions darshan-runtime/lib/darshan-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ void (*mod_static_init_fns[])(void) =
/* XXX need to use extern to get Lustre module's instrumentation function
* since modules have no way of providing this to darshan-core
*/
extern void darshan_instrument_lustre_file(const char *filepath, int fd);
extern void darshan_instrument_lustre_file(darshan_record_id rec_id, int fd);
#endif

/* prototypes for internal helper functions */
Expand Down Expand Up @@ -958,36 +958,6 @@ static void add_entry(char* buf, int* space_left, struct mntent* entry)
else
mnt_data_array[mnt_data_count].fs_info.block_size = 4096;

#ifdef DARSHAN_LUSTRE
/* attempt to retrieve OST and MDS counts from Lustre */
mnt_data_array[mnt_data_count].fs_info.ost_count = -1;
mnt_data_array[mnt_data_count].fs_info.mdt_count = -1;
if ( statfsbuf.f_type == LL_SUPER_MAGIC )
{
int n_ost, n_mdt;
int ret_ost, ret_mdt;
DIR *mount_dir;

mount_dir = opendir( entry->mnt_dir );
if ( mount_dir )
{
/* n_ost and n_mdt are used for both input and output to ioctl */
n_ost = 0;
n_mdt = 1;

ret_ost = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_ost );
ret_mdt = ioctl( dirfd(mount_dir), LL_IOC_GETOBDCOUNT, &n_mdt );

if ( !(ret_ost < 0 || ret_mdt < 0) )
{
mnt_data_array[mnt_data_count].fs_info.ost_count = n_ost;
mnt_data_array[mnt_data_count].fs_info.mdt_count = n_mdt;
}
closedir( mount_dir );
}
}
#endif

/* store mount information with the job-level metadata in darshan log */
ret = snprintf(tmp_mnt, 256, "\n%s\t%s",
entry->mnt_type, entry->mnt_dir);
Expand Down Expand Up @@ -2660,27 +2630,27 @@ void *darshan_core_register_record(
__DARSHAN_CORE_UNLOCK();
return(NULL);
}
}

/* check to see if we've already stored the id->name mapping for
* this record, and add a new name record if not
*/
HASH_FIND(hlink, __darshan_core->name_hash, &rec_id,
sizeof(darshan_record_id), ref);
if(!ref)
{
ret = darshan_add_name_record_ref(__darshan_core, rec_id, name, mod_id);
if(ret == 0)
{
DARSHAN_MOD_FLAG_SET(__darshan_core->log_hdr_p->partial_flag, mod_id);
__DARSHAN_CORE_UNLOCK();
return(NULL);
}
}
else
/* check to see if we've already stored the id->name mapping for
* this record, and add a new name record if not
*/
HASH_FIND(hlink, __darshan_core->name_hash, &rec_id,
sizeof(darshan_record_id), ref);
if(!ref)
{
ret = darshan_add_name_record_ref(__darshan_core, rec_id, name, mod_id);
if(ret == 0)
{
DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id);
DARSHAN_MOD_FLAG_SET(__darshan_core->log_hdr_p->partial_flag, mod_id);
__DARSHAN_CORE_UNLOCK();
return(NULL);
}
}
else
{
DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id);
}

__darshan_core->mod_array[mod_id]->rec_mem_avail -= rec_size;
if((mod_id != DXT_POSIX_MOD) && (mod_id != DXT_MPIIO_MOD))
Expand Down Expand Up @@ -2727,7 +2697,7 @@ char *darshan_core_lookup_record_name(darshan_record_id rec_id)
return(name);
}

void darshan_instrument_fs_data(int fs_type, const char *path, int fd)
void darshan_instrument_fs_data(int fs_type, darshan_record_id rec_id, int fd)
{
#ifdef DARSHAN_LUSTRE
/* allow Lustre to generate a record if we configured with Lustre support */
Expand All @@ -2741,7 +2711,7 @@ void darshan_instrument_fs_data(int fs_type, const char *path, int fd)
*/
if(1 || fs_type == LL_SUPER_MAGIC)
{
darshan_instrument_lustre_file(path, fd);
darshan_instrument_lustre_file(rec_id, fd);
return;
}
#endif
Expand Down
87 changes: 45 additions & 42 deletions darshan-runtime/lib/darshan-lustre.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ static void lustre_cleanup(
struct lustre_record_ref
{
struct darshan_lustre_record *record;
int max_comps;
int max_osts;
size_t record_size;
};

Expand Down Expand Up @@ -108,7 +110,7 @@ static void darshan_get_lustre_layout_size(struct llapi_layout *lustre_layout,
}

static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_layout,
struct lustre_record_ref *rec_ref, int num_comps, int num_osts)
struct lustre_record_ref *rec_ref)
{
bool is_composite;
int ret;
Expand All @@ -122,7 +124,7 @@ static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_lay
int comps_idx = 0, osts_idx = 0;
struct darshan_lustre_component *comps =
(struct darshan_lustre_component *)&(rec_ref->record->comps);
OST_ID *osts = (OST_ID *)(comps + num_comps);
OST_ID *osts = (OST_ID *)(comps + rec_ref->max_comps);

rec_ref->record_size = 0;
rec_ref->record->num_comps = 0;
Expand All @@ -148,13 +150,13 @@ static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_lay
ret += llapi_layout_mirror_id_get(lustre_layout, &mirror_id);
/* record info on this component iff:
* - the layout isn't composite _OR_ the composite layout component is
* initialized (actively used for this file)
* initialized (actively used for this file)
* - the above functions querying stripe params returned no error
* - there is enough room in the record buf to store the OST list
*/
if ((!is_composite || (flags & LCME_FL_INIT)) &&
(ret == 0) &&
(osts_idx + stripe_count <= num_osts))
(ret == 0) &&
(osts_idx + stripe_count <= rec_ref->max_osts))
{
comps[comps_idx].counters[LUSTRE_COMP_STRIPE_SIZE] = (int64_t)stripe_size;
comps[comps_idx].counters[LUSTRE_COMP_STRIPE_WIDTH] = (int64_t)stripe_count;
Expand Down Expand Up @@ -185,9 +187,9 @@ static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_lay
ret = llapi_layout_comp_use(lustre_layout, LLAPI_LAYOUT_COMP_USE_NEXT);
}
else break;
} while(ret == 0 && rec_ref->record->num_comps < num_comps);
} while(ret == 0 && rec_ref->record->num_comps < rec_ref->max_comps);

if (rec_ref->record->num_comps < num_comps)
if (rec_ref->record->num_comps < rec_ref->max_comps)
memmove(comps + rec_ref->record->num_comps, osts, osts_idx * sizeof(*osts));

/* update record size to reflect final number of components/osts */
Expand All @@ -196,17 +198,15 @@ static void darshan_get_lustre_layout_components(struct llapi_layout *lustre_lay
return;
}

void darshan_instrument_lustre_file(const char* filepath, int fd)
void darshan_instrument_lustre_file(darshan_record_id rec_id, int fd)
{
darshan_record_id rec_id;
void *lustre_xattr_val;
size_t lustre_xattr_size = XATTR_SIZE_MAX;
struct llapi_layout *lustre_layout;
int num_comps, num_osts;
size_t rec_size;
struct darshan_lustre_record *rec;
struct lustre_record_ref *rec_ref;
struct darshan_fs_info fs_info;
int ret;

LUSTRE_LOCK();
Expand All @@ -222,41 +222,42 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
return;
}

if ((lustre_xattr_val = calloc(1, lustre_xattr_size)) == NULL)
{
LUSTRE_UNLOCK();
return;
}

/* -1 means fgetxattr failed, likely because file isn't on Lustre, but maybe because
* the Lustre version doesn't support this method of obtaining striping info
*/
if ((lustre_xattr_size = fgetxattr(fd, "lustre.lov", lustre_xattr_val, lustre_xattr_size)) == -1)
{
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}

/* get corresponding Lustre file layout, then extract stripe params */
if ((lustre_layout = llapi_layout_get_by_xattr(lustre_xattr_val, lustre_xattr_size, 0)) == NULL)
{
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
free(lustre_xattr_val);

/* search the hash table for this file record, and initialize if not found */
rec_id = darshan_core_gen_record_id(filepath);
rec_ref = darshan_lookup_record_ref(lustre_runtime->record_id_hash,
&rec_id, sizeof(darshan_record_id));
if(!rec_ref)
{
if ((lustre_xattr_val = calloc(1, lustre_xattr_size)) == NULL)
{
LUSTRE_UNLOCK();
return;
}

/* -1 means fgetxattr failed, likely because file isn't on Lustre, but maybe because
* the Lustre version doesn't support this method of obtaining striping info
*/
if ((lustre_xattr_size = fgetxattr(fd, "lustre.lov", lustre_xattr_val, lustre_xattr_size)) == -1)
{
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}

/* get corresponding Lustre file layout, then extract stripe params */
if ((lustre_layout = llapi_layout_get_by_xattr(lustre_xattr_val, lustre_xattr_size, 0)) == NULL)
{
free(lustre_xattr_val);
LUSTRE_UNLOCK();
return;
}
free(lustre_xattr_val);

/* iterate file layout components to determine total record size */
darshan_get_lustre_layout_size(lustre_layout, &num_comps, &num_osts);
if(num_comps == 0 || num_osts == 0)
if(num_comps == 0)
{
llapi_layout_free(lustre_layout);
LUSTRE_UNLOCK();
return;
}
Expand All @@ -280,13 +281,12 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
}

/* register a Lustre file record with Darshan */
fs_info.fs_type = -1;
rec = darshan_core_register_record(
rec_id,
filepath,
NULL, /* either POSIX or STDIO already registered the name */
DARSHAN_LUSTRE_MOD,
rec_size,
&fs_info);
NULL);
if(rec == NULL)
{
/* if NULL, darshan has no more memory for instrumenting */
Expand All @@ -301,11 +301,14 @@ void darshan_instrument_lustre_file(const char* filepath, int fd)
rec->base_rec.id = rec_id;
rec->base_rec.rank = my_rank;
rec_ref->record = rec;
/* fill in record buffer with component info and OST list */
darshan_get_lustre_layout_components(lustre_layout, rec_ref, num_comps, num_osts);
llapi_layout_free(lustre_layout);
rec_ref->max_comps = num_comps;
rec_ref->max_osts = num_osts;
}

/* fill in record buffer with component info and OST list */
darshan_get_lustre_layout_components(lustre_layout, rec_ref);
llapi_layout_free(lustre_layout);

LUSTRE_UNLOCK();
return;
}
Expand Down
17 changes: 16 additions & 1 deletion darshan-runtime/lib/darshan-posix.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ static int darshan_mem_alignment = 1;
break; \
} \
_POSIX_RECORD_OPEN(__ret, __rec_ref, __mode, __tm1, __tm2, 1, -1); \
darshan_instrument_fs_data(__rec_ref->fs_type, __newpath, __ret); \
if(__newpath != __path) free(__newpath); \
/* LDMS to publish realtime open tracing information to daemon*/ \
if(dC.ldms_lib)\
Expand Down Expand Up @@ -1618,6 +1617,22 @@ int DARSHAN_DECL(close)(int fd)

MAP_OR_FAIL(close);

if(!__darshan_disabled)
{
POSIX_LOCK();
if(posix_runtime && !posix_runtime->frozen)
{
rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash,
&fd, sizeof(int));
if(rec_ref)
{
darshan_instrument_fs_data(rec_ref->fs_type,
rec_ref->file_rec->base_rec.id, fd);
}
}
POSIX_UNLOCK();
}

tm1 = POSIX_WTIME();
ret = __real_close(fd);
tm2 = POSIX_WTIME();
Expand Down
20 changes: 17 additions & 3 deletions darshan-runtime/lib/darshan-stdio.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,6 @@ extern int __real_fileno(FILE *stream);
darshan_record_id __rec_id; \
struct stdio_file_record_ref *__rec_ref; \
char *__newpath; \
int __fd; \
MAP_OR_FAIL(fileno); \
(void)__darshan_disabled; \
if(!__ret || !__path) break; \
Expand All @@ -233,8 +232,6 @@ extern int __real_fileno(FILE *stream);
break; \
} \
_STDIO_RECORD_OPEN(__ret, __rec_ref, __tm1, __tm2, 1, -1); \
__fd = __real_fileno(__ret); \
darshan_instrument_fs_data(__rec_ref->fs_type, __newpath, __fd); \
if(__newpath != (char*)__path) free(__newpath); \
/* LDMS to publish realtime open tracing information to daemon*/ \
if(dC.ldms_lib)\
Expand Down Expand Up @@ -445,6 +442,23 @@ int DARSHAN_DECL(fclose)(FILE *fp)

MAP_OR_FAIL(fclose);

if(!__darshan_disabled)
{
STDIO_LOCK();
if(stdio_runtime && !stdio_runtime->frozen)
{
rec_ref = darshan_lookup_record_ref(stdio_runtime->stream_hash,
&fp, sizeof(fp));
if(rec_ref)
{
int fd = __real_fileno(fp);
darshan_instrument_fs_data(rec_ref->fs_type,
rec_ref->file_rec->base_rec.id, fd);
}
}
STDIO_UNLOCK();
}

tm1 = STDIO_WTIME();
ret = __real_fclose(fp);
tm2 = STDIO_WTIME();
Expand Down
8 changes: 3 additions & 5 deletions darshan-runtime/lib/darshan.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,6 @@ struct darshan_fs_info
{
int fs_type;
int block_size;
int ost_count;
int mdt_count;
};

/* FS mount information */
Expand Down Expand Up @@ -316,16 +314,16 @@ void darshan_core_unregister_module(

/* darshan_instrument_fs_data()
*
* Allow file system-specific modules to instrument data for the file
* stored at 'path'. 'fs_type' is checked to determine the underlying
* Allow file system-specific modules to instrument data for the file record
* corresponding to 'rec_id'. 'fs_type' is checked to determine the underlying
* filesystem and calls into the corresponding file system instrumentation
* module, if defined -- currently we only have a Lustre module. 'fd' is
* the file descriptor corresponding to the file, which may be needed by
* the file system to retrieve specific parameters.
*/
void darshan_instrument_fs_data(
int fs_type,
const char *path,
darshan_record_id rec_id,
int fd);

/* darshan_core_gen_record_id()
Expand Down

0 comments on commit 2f2e9c6

Please sign in to comment.