From 0b24a6f12ebd865c4523160db813987fba6305d3 Mon Sep 17 00:00:00 2001 From: edoapra Date: Thu, 3 Aug 2023 17:03:18 -0700 Subject: [PATCH] less intrusive file-system check --- src/tools/mpipr-too-many_patch.sh | 59 ++++++++++++++++++------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/src/tools/mpipr-too-many_patch.sh b/src/tools/mpipr-too-many_patch.sh index 07658a964d..5896270a65 100755 --- a/src/tools/mpipr-too-many_patch.sh +++ b/src/tools/mpipr-too-many_patch.sh @@ -3,47 +3,58 @@ rm -f mpipr-too-many.patch cat > mpipr-too-many.patch < 0) { - newspace = (long) ( size*(g_state.node_size -1)); - // }else{ -@@ -7248,3 +7250,24 @@ +@@ -4746,6 +4748,7 @@ + + /* set the size of my shared memory object */ + check_devshm(fd, size); ++ count_open_fds(); + retval = ftruncate(fd, size); + if (-1 == retval) { + perror("_shm_create: ftruncate"); +@@ -7248,3 +7251,27 @@ #endif #endif } + +STATIC void count_open_fds(void) { -+ FILE *f = fopen("/proc/sys/fs/file-nr", "r"); ++ /* check only every 200 ops && rank == 1 */ ++ counter_open_fds += 1; ++ if (counter_open_fds % 200 == 0 && g_state.rank == MIN(1,g_state.node_size)) { ++ FILE *f = fopen("/proc/sys/fs/file-nr", "r"); + -+ long nfiles, unused, maxfiles; -+ fscanf(f, "%ld %ld %ld", &nfiles, &unused, &maxfiles); ++ long nfiles, unused, maxfiles; ++ fscanf(f, "%ld %ld %ld", &nfiles, &unused, &maxfiles); +#ifdef DEBUGSHM -+ if(nfiles % 1000 == 0) fprintf(stderr," %d: no. open files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles); ++ if(nfiles % 1000 == 0) fprintf(stderr," %d: no. open files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles); +#endif -+ long mylimit = (maxfiles/100)*90; -+ if(nfiles > (maxfiles/100)*90) { -+ printf(" %d: running out of files; files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles); ++ if(nfiles > (maxfiles/100)*80) { ++ printf(" %d: running out of files; files = %ld maxfiles = %ld mmin %ld \n", g_state.rank, nfiles, maxfiles, MIN(1,g_state.node_size)); +#if PAUSE_ON_ERROR -+ fprintf(stderr,"%d(%d): too many open files\n", -+ g_state.rank, getpid()); -+ pause(); ++ fprintf(stderr,"%d(%d): too many open files\n", ++ g_state.rank, getpid()); ++ pause(); +#endif -+ comex_error("count_open_fds: too many open files", -1); ++ comex_error("count_open_fds: too many open files", -1); ++ } ++ fclose(f); + } -+ fclose(f); +} EOF -patch -p0 -s -N < mpipr-too-many.patch -echo mpipr-too-many.patch applied