Skip to content

Commit

Permalink
less intrusive file-system check
Browse files Browse the repository at this point in the history
  • Loading branch information
edoapra committed Aug 4, 2023
1 parent c1724dc commit 0b24a6f
Showing 1 changed file with 35 additions and 24 deletions.
59 changes: 35 additions & 24 deletions src/tools/mpipr-too-many_patch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,58 @@ rm -f mpipr-too-many.patch
cat > mpipr-too-many.patch <<EOF
--- $1/comex/src-mpi-pr/comex.c.org 2023-07-12 19:10:15.711084258 -0700
+++ $1/comex/src-mpi-pr/comex.c 2023-07-12 19:10:21.851117110 -0700
@@ -358,6 +358,7 @@
@@ -51,7 +51,7 @@
#define XSTR(x) #x
#define STR(x) XSTR(x)
-
+#define MIN(a, b) (((b) < (a)) ? (b) : (a))
/* data structures */
typedef enum {
@@ -358,6 +358,8 @@
static int devshm_initialized = 0;
static long devshm_fs_left = 0;
static long devshm_fs_initial = 0;
+static long counter_open_fds = 0;
+STATIC void count_open_fds(void);
int comex_init()
{
@@ -7215,6 +7216,7 @@
g_state.rank, g_state.node_size, devshm_fs_initial/CONVERT_TO_M, (long) ufs_statfs.f_bsize, (long) g_state.node_size);
#endif
}
+ count_open_fds();
// if (size > 0) {
newspace = (long) ( size*(g_state.node_size -1));
// }else{
@@ -7248,3 +7250,24 @@
@@ -4746,6 +4748,7 @@
/* set the size of my shared memory object */
check_devshm(fd, size);
+ count_open_fds();
retval = ftruncate(fd, size);
if (-1 == retval) {
perror("_shm_create: ftruncate");
@@ -7248,3 +7251,27 @@
#endif
#endif
}
+
+STATIC void count_open_fds(void) {
+ FILE *f = fopen("/proc/sys/fs/file-nr", "r");
+ /* check only every 200 ops && rank == 1 */
+ counter_open_fds += 1;
+ if (counter_open_fds % 200 == 0 && g_state.rank == MIN(1,g_state.node_size)) {
+ FILE *f = fopen("/proc/sys/fs/file-nr", "r");
+
+ long nfiles, unused, maxfiles;
+ fscanf(f, "%ld %ld %ld", &nfiles, &unused, &maxfiles);
+ long nfiles, unused, maxfiles;
+ fscanf(f, "%ld %ld %ld", &nfiles, &unused, &maxfiles);
+#ifdef DEBUGSHM
+ if(nfiles % 1000 == 0) fprintf(stderr," %d: no. open files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles);
+ if(nfiles % 1000 == 0) fprintf(stderr," %d: no. open files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles);
+#endif
+ long mylimit = (maxfiles/100)*90;
+ if(nfiles > (maxfiles/100)*90) {
+ printf(" %d: running out of files; files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles);
+ if(nfiles > (maxfiles/100)*80) {
+ printf(" %d: running out of files; files = %ld maxfiles = %ld mmin %ld \n", g_state.rank, nfiles, maxfiles, MIN(1,g_state.node_size));
+#if PAUSE_ON_ERROR
+ fprintf(stderr,"%d(%d): too many open files\n",
+ g_state.rank, getpid());
+ pause();
+ fprintf(stderr,"%d(%d): too many open files\n",
+ g_state.rank, getpid());
+ pause();
+#endif
+ comex_error("count_open_fds: too many open files", -1);
+ comex_error("count_open_fds: too many open files", -1);
+ }
+ fclose(f);
+ }
+ fclose(f);
+}
EOF
patch -p0 -s -N < mpipr-too-many.patch
echo mpipr-too-many.patch applied

0 comments on commit 0b24a6f

Please sign in to comment.