diff --git a/META b/META index e29f728d000..140e028a23a 100644 --- a/META +++ b/META @@ -7,8 +7,8 @@ Name: slurm Major: 22 Minor: 05 - Micro: 9 - Version: 22.05.9 + Micro: 10 + Version: 22.05.10 Release: 1 ## diff --git a/NEWS b/NEWS index e7b28dd24fb..fef51e0073c 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,9 @@ documents those changes that are of interest to users and administrators. * Changes in Slurm 22.05.10 =========================== + -- Fix filesystem handling race conditions that could lead to an attacker + taking control of an arbitrary file, or removing entire directories' + contents. CVE-2023-41914. * Changes in Slurm 22.05.9 ========================== diff --git a/slurm.spec b/slurm.spec index fdc7e8eb068..13471156823 100644 --- a/slurm.spec +++ b/slurm.spec @@ -1,5 +1,5 @@ Name: slurm -Version: 22.05.9 +Version: 22.05.10 %define rel 1 Release: %{rel}.%{gittag}%{?dist}%{?gpu}.ug Summary: Slurm Workload Manager diff --git a/src/common/fd.c b/src/common/fd.c index 421058e56f0..45751aa6f85 100644 --- a/src/common/fd.c +++ b/src/common/fd.c @@ -67,6 +67,7 @@ strong_alias(fd_set_nonblocking,slurm_fd_set_nonblocking); strong_alias(fd_get_socket_error, slurm_fd_get_socket_error); strong_alias(send_fd_over_pipe, slurm_send_fd_over_pipe); strong_alias(receive_fd_over_pipe, slurm_receive_fd_over_pipe); +strong_alias(rmdir_recursive, slurm_rmdir_recursive); static int fd_get_lock(int fd, int cmd, int type); static pid_t fd_test_lock(int fd, int type); @@ -454,3 +455,92 @@ extern int receive_fd_over_pipe(int socket) return fd; } + +static int _rmdir_recursive(int dirfd) +{ + int rc = 0; + DIR *dp; + struct dirent *ent; + + if (!(dp = fdopendir(dirfd))) { + error("%s: can't open directory: %m", __func__); + return 1; + } + + while ((ent = readdir(dp))) { + int childfd = -1; + + /* skip special directories */ + if (!strcmp(ent->d_name, ".") || + !strcmp(ent->d_name, "..")) { + continue; + } + + /* try to remove entry, first as a file, then as a directory */ + if (unlinkat(dirfd, ent->d_name, 0) != -1) { + debug("%s: removed file `%s`", __func__, ent->d_name); + continue; + } else if (unlinkat(dirfd, ent->d_name, AT_REMOVEDIR) != -1) { + debug("%s: removed empty directory `%s`", + __func__, ent->d_name); + continue; + } + + /* removal didn't work. assume it's a non-empty directory */ + if ((childfd = openat(dirfd, ent->d_name, + (O_DIRECTORY | O_NOFOLLOW))) < 0) { + debug("%s: openat() failed for `%s`: %m", + __func__, ent->d_name); + rc++; + continue; + } + + debug("%s: descending into directory `%s`", + __func__, ent->d_name); + rc += _rmdir_recursive(childfd); + (void) close(childfd); + + if (unlinkat(dirfd, ent->d_name, AT_REMOVEDIR) != -1) { + debug("%s: removed now-empty directory `%s`", + __func__, ent->d_name); + } else { + debug("%s: unlinkat() failed for `%s`: %m", + __func__, ent->d_name); + rc++; + } + } + closedir(dp); + + return rc; +} + +extern int rmdir_recursive(const char *path, bool remove_top) +{ + int rc = 0; + int dirfd; + + if ((dirfd = open(path, O_DIRECTORY | O_NOFOLLOW)) < 0) { + error("%s: could not open %s", __func__, path); + return 1; + } + + rc = _rmdir_recursive(dirfd); + close(dirfd); + + if (remove_top) { + if (rmdir(path) < 0) { + debug("%s: rmdir() failed for `%s`: %m", + __func__, path); + rc++; + } else { + debug("%s: removed now-empty top directory `%s`", + __func__, path); + } + } + + if (rc) + error("%s: could not completely remove `%s`, %d files left", + __func__, path, rc); + + return rc; +} diff --git a/src/common/fd.h b/src/common/fd.h index d729f816895..307749542b8 100644 --- a/src/common/fd.h +++ b/src/common/fd.h @@ -156,4 +156,13 @@ extern char *poll_revents_to_str(const short revents); extern void send_fd_over_pipe(int socket, int fd); extern int receive_fd_over_pipe(int socket); +/* + * Recursively remove a directory and all contents. + * Takes care not to follow any symlinks outside the target directory. + * + * Returns the number of files/directories it failed to remove, + * or 0 on success. + */ +extern int rmdir_recursive(const char *path, bool remove_top); + #endif /* !_FD_H */ diff --git a/src/common/slurm_xlator.h b/src/common/slurm_xlator.h index 60705f57960..a3db33bbccd 100644 --- a/src/common/slurm_xlator.h +++ b/src/common/slurm_xlator.h @@ -123,6 +123,7 @@ #define fd_get_socket_error slurm_fd_get_socket_error #define send_fd_over_pipe slurm_send_fd_over_pipe #define receive_fd_over_pipe slurm_receive_fd_over_pipe +#define rmdir_recursive slurm_rmdir_recursive /* hostlist.[ch] functions */ #define hostlist_create_dims slurm_hostlist_create_dims diff --git a/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c index ea17246bd8d..fca1244e1f0 100644 --- a/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c +++ b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c @@ -44,6 +44,7 @@ * Copyright (C) 2002 The Regents of the University of California. \*****************************************************************************/ +#include #include #include #include @@ -101,6 +102,14 @@ const char plugin_name[] = "AcctGatherProfile hdf5 plugin"; const char plugin_type[] = "acct_gather_profile/hdf5"; const uint32_t plugin_version = SLURM_VERSION_NUMBER; +struct priv_state { + uid_t saved_uid; + gid_t saved_gid; + gid_t * gid_list; + int ngids; + char saved_cwd [4096]; +}; + typedef struct { char *dir; uint32_t def; @@ -133,6 +142,88 @@ static table_t *tables = NULL; static size_t tables_max_len = 0; static size_t tables_cur_len = 0; +/* If get_list is false make sure ps->gid_list is initialized before + * hand to prevent xfree. + */ +static int +_drop_privileges(stepd_step_rec_t *job, bool do_setuid, + struct priv_state *ps, bool get_list) +{ + ps->saved_uid = getuid(); + ps->saved_gid = getgid(); + + if (!getcwd (ps->saved_cwd, sizeof (ps->saved_cwd))) { + error ("Unable to get current working directory: %m"); + strlcpy(ps->saved_cwd, "/tmp", sizeof(ps->saved_cwd)); + } + + ps->ngids = getgroups(0, NULL); + if (ps->ngids == -1) { + error("%s: getgroups(): %m", __func__); + return -1; + } + if (get_list) { + ps->gid_list = (gid_t *) xmalloc(ps->ngids * sizeof(gid_t)); + + if (getgroups(ps->ngids, ps->gid_list) == -1) { + error("%s: couldn't get %d groups: %m", + __func__, ps->ngids); + xfree(ps->gid_list); + return -1; + } + } + + /* + * No need to drop privileges if we're not running as root + */ + if (getuid() != (uid_t) 0) + return SLURM_SUCCESS; + + if (setegid(job->gid) < 0) { + error("setegid: %m"); + return -1; + } + + if (setgroups(job->ngids, job->gids) < 0) { + error("setgroups: %m"); + return -1; + } + + if (do_setuid && seteuid(job->uid) < 0) { + error("seteuid: %m"); + return -1; + } + + return SLURM_SUCCESS; +} + +static int +_reclaim_privileges(struct priv_state *ps) +{ + int rc = SLURM_SUCCESS; + + /* + * No need to reclaim privileges if our uid == job->uid + */ + if (geteuid() == ps->saved_uid) + goto done; + else if (seteuid(ps->saved_uid) < 0) { + error("seteuid: %m"); + rc = -1; + } else if (setegid(ps->saved_gid) < 0) { + error("setegid: %m"); + rc = -1; + } else if (setgroups(ps->ngids, ps->gid_list) < 0) { + error("setgroups: %m"); + rc = -1; + } + +done: + xfree(ps->gid_list); + + return rc; +} + static void _reset_slurm_profile_conf(void) { xfree(hdf5_conf.dir); @@ -156,30 +247,66 @@ static uint32_t _determine_profile(void) static void _create_directories(void) { - char *user_dir = NULL; + char *parent_dir = NULL, *user_dir = NULL, *hdf5_dir_rel = NULL; + char *slash = NULL; + int parent_dirfd, user_parent_dirfd; xassert(g_job); xassert(hdf5_conf.dir); - xstrfmtcat(user_dir, "%s/%s", hdf5_conf.dir, g_job->user_name); + parent_dir = xstrdup(hdf5_conf.dir); + /* split into base and new directory name */ + while ((slash = strrchr(parent_dir, '/'))) { + /* fix a path with one or more trailing slashes */ + if (slash[1] == '\0') + slash[0] = '\0'; + else + break; + } + + if (!slash) + fatal("Invalid ProfileHDF5Dir=\"%s\"", hdf5_conf.dir); + + slash[0] = '\0'; + hdf5_dir_rel = slash + 1; + + parent_dirfd = open(parent_dir, O_DIRECTORY | O_NOFOLLOW); /* - * To avoid race conditions (TOCTOU) with stat() calls, always - * attempt to create the ProfileHDF5Dir and the user directory within. + * Use *at family of syscalls to prevent TOCTOU abuse by working + * on file descriptors instead of path names. */ - if (((mkdir(hdf5_conf.dir, 0755)) < 0) && (errno != EEXIST)) - fatal("mkdir(%s): %m", hdf5_conf.dir); - if (chmod(hdf5_conf.dir, 0755) < 0) - fatal("chmod(%s): %m", hdf5_conf.dir); - - if (((mkdir(user_dir, 0700)) < 0) && (errno != EEXIST)) - fatal("mkdir(%s): %m", user_dir); - if (chmod(user_dir, 0700) < 0) - fatal("chmod(%s): %m", user_dir); - if (chown(user_dir, g_job->uid, g_job->gid) < 0) - fatal("chown(%s): %m", user_dir); + if ((mkdirat(parent_dirfd, hdf5_dir_rel, 0755)) < 0) { + /* Never chmod on EEXIST */ + if (errno != EEXIST) + fatal("mkdirat(%s): %m", hdf5_conf.dir); + } else if (fchmodat(parent_dirfd, hdf5_dir_rel, 0755, + AT_SYMLINK_NOFOLLOW) < 0) + fatal("fchmodat(%s): %m", hdf5_conf.dir); + xstrfmtcat(user_dir, "%s/%s", hdf5_conf.dir, g_job->user_name); + user_parent_dirfd = openat(parent_dirfd, hdf5_dir_rel, + O_DIRECTORY | O_NOFOLLOW); + close(parent_dirfd); + + if ((mkdirat(user_parent_dirfd, g_job->user_name, 0700)) < 0) { + /* Never chmod on EEXIST */ + if (errno != EEXIST) + fatal("mkdirat(%s): %m", user_dir); + } else { + /* fchmodat(2) man says AT_SYMLINK_NOFOLLOW not implemented. */ + if (fchmodat(user_parent_dirfd, g_job->user_name, 0700, 0) < 0) + fatal("fchmodat(%s): %m", user_dir); + + if (fchownat(user_parent_dirfd, g_job->user_name, g_job->uid, + g_job->gid, AT_SYMLINK_NOFOLLOW) < 0) + fatal("fchmodat(%s): %m", user_dir); + } + + close(user_parent_dirfd); xfree(user_dir); + xfree(parent_dir); + /* Do not xfree() hdf5_dir_rel (interior pointer to freed data). */ } /* @@ -268,7 +395,7 @@ extern void acct_gather_profile_p_get(enum acct_gather_profile_info info_type, extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job) { int rc = SLURM_SUCCESS; - + struct priv_state sprivs = { 0 }; char *profile_file_name; xassert(running_in_slurmstepd()); @@ -311,16 +438,24 @@ extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job) acct_gather_profile_to_string(g_profile_running), profile_file_name); + if (_drop_privileges(g_job, true, &sprivs, false) < 0) { + error("%s: Unable to drop privileges", __func__); + xfree(profile_file_name); + return SLURM_ERROR; + } + /* * Create a new file using the default properties */ file_id = H5Fcreate(profile_file_name, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); - if (chown(profile_file_name, (uid_t)g_job->uid, - (gid_t)g_job->gid) < 0) - error("chown(%s): %m", profile_file_name); - if (chmod(profile_file_name, 0600) < 0) - error("chmod(%s): %m", profile_file_name); + + if (_reclaim_privileges(&sprivs) < 0) { + error("%s: Unable to reclaim privileges", __func__); + xfree(profile_file_name); + return SLURM_ERROR; + } + xfree(profile_file_name); if (file_id < 1) { diff --git a/src/plugins/job_container/tmpfs/job_container_tmpfs.c b/src/plugins/job_container/tmpfs/job_container_tmpfs.c index 0c2dad2adcd..5ae09f94b22 100644 --- a/src/plugins/job_container/tmpfs/job_container_tmpfs.c +++ b/src/plugins/job_container/tmpfs/job_container_tmpfs.c @@ -39,7 +39,6 @@ \*****************************************************************************/ #define _GNU_SOURCE -#define _XOPEN_SOURCE 500 /* For ftw.h */ #include #include #include @@ -47,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -55,6 +53,7 @@ #include "src/common/slurm_xlator.h" #include "src/common/env.h" +#include "src/common/fd.h" #include "src/common/log.h" #include "src/common/read_config.h" #include "src/common/run_command.h" @@ -79,7 +78,6 @@ const uint32_t plugin_version = SLURM_VERSION_NUMBER; static slurm_jc_conf_t *jc_conf = NULL; static int step_ns_fd = -1; -static bool force_rm = true; List legacy_jobs; typedef struct { @@ -382,35 +380,6 @@ static int _mount_private_shm(void) return rc; } -static int _rm_data(const char *path, const struct stat *st_buf, - int type, struct FTW *ftwbuf) -{ - int rc = SLURM_SUCCESS; - - if (remove(path) < 0) { - log_level_t log_lvl; - if (force_rm) { - rc = SLURM_ERROR; - log_lvl = LOG_LEVEL_ERROR; - } else - log_lvl = LOG_LEVEL_DEBUG2; - - if (type == FTW_NS) - log_var(log_lvl, - "%s: Unreachable file of FTW_NS type: %s", - __func__, path); - else if (type == FTW_DNR) - log_var(log_lvl, - "%s: Unreadable directory: %s", - __func__, path); - - log_var(log_lvl, - "%s: could not remove path: %s: %s", - __func__, path, strerror(errno)); - } - - return rc; -} static int _create_ns(uint32_t job_id, uid_t uid, bool remount) { @@ -590,9 +559,9 @@ static int _create_ns(uint32_t job_id, uid_t uid, bool remount) * already be correct here. */ if (!remount) { - rc = chown(src_bind, uid, -1); + rc = lchown(src_bind, uid, -1); if (rc) { - error("%s: chown failed for %s: %s", + error("%s: lchown failed for %s: %s", __func__, src_bind, strerror(errno)); rc = -1; goto child_exit; @@ -681,11 +650,11 @@ static int _create_ns(uint32_t job_id, uid_t uid, bool remount) exit2: if (rc) { + int failures; /* cleanup the job mount */ - force_rm = true; - if (nftw(job_mount, _rm_data, 64, FTW_DEPTH|FTW_PHYS) < 0) { - error("%s: Directory traversal failed: %s: %s", - __func__, job_mount, strerror(errno)); + if ((failures = rmdir_recursive(job_mount, false))) { + error("%s: failed to remove %d files from %s", + __func__, failures, job_mount); return SLURM_ERROR; } umount2(job_mount, MNT_DETACH); @@ -774,7 +743,7 @@ static int _delete_ns(uint32_t job_id, bool is_slurmd) { char job_mount[PATH_MAX]; char ns_holder[PATH_MAX]; - int rc = 0; + int rc = 0, failures = 0; #ifdef HAVE_NATIVE_CRAY return SLURM_SUCCESS; @@ -827,23 +796,9 @@ static int _delete_ns(uint32_t job_id, bool is_slurmd) } } - /* - * Traverses the job directory, and delete all files. - * Doesn't - - * traverse filesystem boundaries, - * follow symbolic links - * Does - - * a post order traversal and delete directory after processing - * contents - * NOTE: Can happen EBUSY here so we need to ignore this. - */ - force_rm = false; - if (nftw(job_mount, _rm_data, 64, FTW_DEPTH|FTW_PHYS) < 0) { - error("%s: Directory traversal failed: %s: %s", - __func__, job_mount, strerror(errno)); - return SLURM_ERROR; - } - + if ((failures = rmdir_recursive(job_mount, false))) + error("%s: failed to remove %d files from %s", + __func__, failures, job_mount); if (umount2(job_mount, MNT_DETACH)) debug2("umount2: %s failed: %s", job_mount, strerror(errno)); rmdir(job_mount); diff --git a/src/plugins/mpi/cray_shasta/apinfo.c b/src/plugins/mpi/cray_shasta/apinfo.c index 2e03ed0e57e..7236c833831 100644 --- a/src/plugins/mpi/cray_shasta/apinfo.c +++ b/src/plugins/mpi/cray_shasta/apinfo.c @@ -381,7 +381,7 @@ static int _open_apinfo(const stepd_step_rec_t *job) apinfo = xstrdup_printf("%s/apinfo", appdir); // Create file - fd = creat(apinfo, 0600); + fd = open(apinfo, (O_CREAT | O_WRONLY | O_TRUNC | O_EXCL), 0600); if (fd == -1) { error("%s: Couldn't open apinfo file %s: %m", plugin_type, apinfo); diff --git a/src/plugins/mpi/cray_shasta/mpi_cray_shasta.c b/src/plugins/mpi/cray_shasta/mpi_cray_shasta.c index 2528e65aa75..4d4dec893c0 100644 --- a/src/plugins/mpi/cray_shasta/mpi_cray_shasta.c +++ b/src/plugins/mpi/cray_shasta/mpi_cray_shasta.c @@ -45,6 +45,7 @@ #include "src/common/slurm_xlator.h" #include "src/common/env.h" +#include "src/common/fd.h" #include "src/common/parse_config.h" #include "src/common/read_config.h" #include "src/common/slurm_mpi.h" @@ -175,63 +176,6 @@ static void _set_pmi_port(char ***env) env_array_overwrite_fmt(env, "PMI_CONTROL_PORT", "%lu", pmi_port); } -/* - * Determine whether the given path is a directory - */ -static int _is_dir(char *path) -{ - struct stat stat_buf; - - if (stat(path, &stat_buf)) { - error("%s: Cannot stat %s: %m", plugin_type, path); - return 1; - } else if (!S_ISDIR(stat_buf.st_mode)) { - return 0; - } - return 1; -} - -/* - * Recursively remove a directory - */ -static int _rmdir_recursive(char *path) -{ - char nested_path[PATH_MAX]; - DIR *dp; - struct dirent *ent; - - if (!(dp = opendir(path))) { - error("%s: Can't open directory %s: %m", plugin_type, path); - return SLURM_ERROR; - } - - while ((ent = readdir(dp))) { - if (!xstrcmp(ent->d_name, ".") || - !xstrcmp(ent->d_name, "..")) { - /* skip special dir's */ - continue; - } - snprintf(nested_path, sizeof(nested_path), "%s/%s", path, - ent->d_name); - if (_is_dir(nested_path)) { - _rmdir_recursive(nested_path); - } else { - debug("%s: Removed file %s", plugin_type, nested_path); - unlink(nested_path); - } - } - closedir(dp); - - if (rmdir(path) == -1) { - error("%s: Can't remove directory %s: %m", - plugin_type, path); - return SLURM_ERROR; - } - - debug("%s: Removed directory %s", plugin_type, path); - return SLURM_SUCCESS; -} - extern int mpi_p_slurmstepd_prefork(const stepd_step_rec_t *job, char ***env) { /* do the node_name substitution once */ @@ -291,7 +235,7 @@ extern int fini(void) { // Remove application spool directory if (appdir) - _rmdir_recursive(appdir); + rmdir_recursive(appdir, true); // Free allocated storage xfree(appdir); diff --git a/src/plugins/mpi/pmix/pmixp_client.c b/src/plugins/mpi/pmix/pmixp_client.c index 9f4b74e4f1d..e32d9b1fe36 100644 --- a/src/plugins/mpi/pmix/pmixp_client.c +++ b/src/plugins/mpi/pmix/pmixp_client.c @@ -475,16 +475,14 @@ static void _set_localinfo(List lresp) extern int pmixp_libpmix_init(void) { int rc; - mode_t rights = (S_IRUSR | S_IWUSR | S_IXUSR) | - (S_IRGRP | S_IWGRP | S_IXGRP); - if (0 != (rc = pmixp_mkdir(pmixp_info_tmpdir_lib(), rights))) { + if (0 != (rc = pmixp_mkdir(pmixp_info_tmpdir_lib()))) { PMIXP_ERROR_STD("Cannot create server lib tmpdir: \"%s\"", pmixp_info_tmpdir_lib()); return errno; } - if (0 != (rc = pmixp_mkdir(pmixp_info_tmpdir_cli(), rights))) { + if (0 != (rc = pmixp_mkdir(pmixp_info_tmpdir_cli()))) { PMIXP_ERROR_STD("Cannot create client cli tmpdir: \"%s\"", pmixp_info_tmpdir_cli()); return errno; @@ -499,12 +497,6 @@ extern int pmixp_libpmix_init(void) /* TODO: must be deleted in future once info-key approach harden */ setenv(PMIXP_PMIXLIB_TMPDIR, pmixp_info_tmpdir_lib(), 1); - /* - if( pmixp_fixrights(pmixp_info_tmpdir_lib(), - (uid_t) pmixp_info_jobuid(), rights) ){ - } - */ - return 0; } @@ -514,14 +506,14 @@ extern int pmixp_libpmix_finalize(void) rc = pmixp_lib_finalize(); - rc1 = pmixp_rmdir_recursively(pmixp_info_tmpdir_lib()); + rc1 = rmdir_recursive(pmixp_info_tmpdir_lib(), true); if (0 != rc1) { PMIXP_ERROR_STD("Failed to remove %s\n", pmixp_info_tmpdir_lib()); /* Not considering this as fatal error */ } - rc1 = pmixp_rmdir_recursively(pmixp_info_tmpdir_cli()); + rc1 = rmdir_recursive(pmixp_info_tmpdir_cli(), true); if (0 != rc1) { PMIXP_ERROR_STD("Failed to remove %s\n", pmixp_info_tmpdir_cli()); diff --git a/src/plugins/mpi/pmix/pmixp_utils.c b/src/plugins/mpi/pmix/pmixp_utils.c index 50aae40ca92..79ed60a1d65 100644 --- a/src/plugins/mpi/pmix/pmixp_utils.c +++ b/src/plugins/mpi/pmix/pmixp_utils.c @@ -479,128 +479,12 @@ int pmixp_p2p_send(const char *nodename, const char *address, const char *data, return rc; } -static int _is_dir(char *path) +int pmixp_mkdir(char *path) { - struct stat stat_buf; - int rc; - if (0 > (rc = stat(path, &stat_buf))) { - PMIXP_ERROR_STD("Cannot stat() path=\"%s\"", path); - return rc; - } else if (!S_ISDIR(stat_buf.st_mode)) { - return 0; - } - return 1; -} - -int pmixp_rmdir_recursively(char *path) -{ - char nested_path[PATH_MAX]; - DIR *dp; - struct dirent *ent; - - int rc; - - /* - * Make sure that "directory" exists and is a directory. - */ - if (1 != (rc = _is_dir(path))) { - PMIXP_ERROR("path=\"%s\" is not a directory", path); - return (rc == 0) ? -1 : rc; - } - - if ((dp = opendir(path)) == NULL) { - PMIXP_ERROR_STD("cannot open path=\"%s\"", path); - return -1; - } - - while ((ent = readdir(dp)) != NULL) { - if (0 == xstrcmp(ent->d_name, ".") - || 0 == xstrcmp(ent->d_name, "..")) { - /* skip special dir's */ - continue; - } - snprintf(nested_path, sizeof(nested_path), "%s/%s", path, - ent->d_name); - if (_is_dir(nested_path)) { - pmixp_rmdir_recursively(nested_path); - } else { - unlink(nested_path); - } - } - closedir(dp); - if ((rc = rmdir(path))) { - PMIXP_ERROR_STD("Cannot remove path=\"%s\"", path); - } - return rc; -} - -static inline int _file_fix_rights(char *path, uid_t uid, mode_t mode) -{ - if (chmod(path, mode) < 0) { - PMIXP_ERROR("chown(%s): %m", path); - return errno; - } - - if (chown(path, uid, (gid_t) -1) < 0) { - PMIXP_ERROR("chown(%s): %m", path); - return errno; - } - return 0; -} + char *base = NULL, *newdir = NULL, *slash; + int dirfd; + mode_t rights = (S_IRUSR | S_IWUSR | S_IXUSR); -int pmixp_fixrights(char *path, uid_t uid, mode_t mode) -{ - char nested_path[PATH_MAX]; - DIR *dp; - struct dirent *ent; - int rc = 0; - - /* - * Make sure that "directory" exists and is a directory. - */ - if (1 != (rc = _is_dir(path))) { - PMIXP_ERROR("path=\"%s\" is not a directory", path); - return (rc == 0) ? -1 : rc; - } - - if ((dp = opendir(path)) == NULL) { - PMIXP_ERROR_STD("cannot open path=\"%s\"", path); - return -1; - } - - while ((ent = readdir(dp)) != NULL) { - if (0 == xstrcmp(ent->d_name, ".") - || 0 == xstrcmp(ent->d_name, "..")) { - /* skip special dir's */ - continue; - } - snprintf(nested_path, sizeof(nested_path), "%s/%s", path, - ent->d_name); - if (_is_dir(nested_path)) { - if ((rc = _file_fix_rights(nested_path, uid, mode))) { - PMIXP_ERROR_STD("cannot fix permissions for " - "\"%s\"", - nested_path); - goto exit; - } - pmixp_rmdir_recursively(nested_path); - } else { - if ((rc = _file_fix_rights(nested_path, uid, mode))) { - PMIXP_ERROR_STD("cannot fix permissions for " - "\"%s\"", - nested_path); - goto exit; - } - } - } - -exit: - closedir(dp); - return rc; -} - -int pmixp_mkdir(char *path, mode_t rights) -{ /* NOTE: we need user who owns the job to access PMIx usock * file. According to 'man 7 unix': * "... In the Linux implementation, sockets which are visible in the @@ -610,26 +494,51 @@ int pmixp_mkdir(char *path, mode_t rights) * access to the unix socket we do the following: * 1. Owner ID is set to the job owner. * 2. Group ID corresponds to slurmstepd. - * 3. Set 0770 access mode + * 3. Set 0700 access mode */ - if (0 != mkdir(path, rights) ) { - PMIXP_ERROR_STD("Cannot create directory \"%s\"", - path); + base = xstrdup(path); + /* split into base and new directory name */ + while ((slash = strrchr(base, '/'))) { + /* fix a path with one or more trailing slashes */ + if (slash[1] == '\0') + slash[0] = '\0'; + else + break; + } + + if (!slash) { + PMIXP_ERROR_STD("Invalid directory \"%s\"", path); + xfree(base); + return EINVAL; + } + + slash[0] = '\0'; + newdir = slash + 1; + + if ((dirfd = open(base, O_DIRECTORY | O_NOFOLLOW)) < 0) { + PMIXP_ERROR_STD("Could not open parent directory \"%s\"", base); + xfree(base); return errno; } - /* There might be umask that will drop essential rights. - * Fix it explicitly. - * TODO: is there more elegant solution? */ - if (chmod(path, rights) < 0) { - error("%s: chown(%s): %m", __func__, path); + if (mkdirat(dirfd, newdir, rights) < 0) { + PMIXP_ERROR_STD("Cannot create directory \"%s\"", + path); + close(dirfd); + xfree(base); return errno; } - if (chown(path, (uid_t) pmixp_info_jobuid(), (gid_t) -1) < 0) { - error("%s: chown(%s): %m", __func__, path); + if (fchownat(dirfd, newdir, (uid_t) pmixp_info_jobuid(), (gid_t) -1, + AT_SYMLINK_NOFOLLOW) < 0) { + error("%s: fchownath(%s): %m", __func__, path); + close(dirfd); + xfree(base); return errno; } + + close(dirfd); + xfree(base); return 0; } diff --git a/src/plugins/mpi/pmix/pmixp_utils.h b/src/plugins/mpi/pmix/pmixp_utils.h index 524740b401d..1be71d07277 100644 --- a/src/plugins/mpi/pmix/pmixp_utils.h +++ b/src/plugins/mpi/pmix/pmixp_utils.h @@ -61,9 +61,7 @@ int pmixp_stepd_send(const char *nodelist, const char *address, int pmixp_p2p_send(const char *nodename, const char *address, const char *data, uint32_t len, unsigned int start_delay, unsigned int retry_cnt, int silent); -int pmixp_rmdir_recursively(char *path); -int pmixp_fixrights(char *path, uid_t uid, mode_t mode); -int pmixp_mkdir(char *path, mode_t rights); +int pmixp_mkdir(char *path); /* lightweight pmix list of pointers */ #define PMIXP_LIST_DEBUG 0 diff --git a/src/plugins/switch/cray_aries/iaa.c b/src/plugins/switch/cray_aries/iaa.c index 2a008b3e5c3..20ffed6fdae 100644 --- a/src/plugins/switch/cray_aries/iaa.c +++ b/src/plugins/switch/cray_aries/iaa.c @@ -65,7 +65,7 @@ int write_iaa_file(stepd_step_rec_t *job, slurm_cray_jobinfo_t *sw_job, } // chown the file to the job user - rc = chown(fname, job->uid, job->gid); + rc = lchown(fname, job->uid, job->gid); if (rc == -1) { CRAY_ERR("chown(%s, %d, %d) failed: %m", fname, (int)job->uid, (int)job->gid); diff --git a/src/plugins/switch/cray_aries/switch_cray_aries.h b/src/plugins/switch/cray_aries/switch_cray_aries.h index 9c539b7328b..0d9fe15a43e 100644 --- a/src/plugins/switch/cray_aries/switch_cray_aries.h +++ b/src/plugins/switch/cray_aries/switch_cray_aries.h @@ -45,6 +45,7 @@ #include #include "src/common/bitstring.h" +#include "src/common/fd.h" #include "src/common/log.h" #include "src/common/slurm_protocol_defs.h" #include "src/slurmd/slurmstepd/slurmstepd_job.h" diff --git a/src/plugins/switch/cray_aries/util.c b/src/plugins/switch/cray_aries/util.c index 090cd17752e..0f4ebf20c08 100644 --- a/src/plugins/switch/cray_aries/util.c +++ b/src/plugins/switch/cray_aries/util.c @@ -50,7 +50,6 @@ #if defined(HAVE_NATIVE_CRAY) || defined(HAVE_CRAY_NETWORK) -static void _recursive_rmdir(const char *dirnm); /* * Create APID directory with given uid/gid as the owner. @@ -69,7 +68,7 @@ int create_apid_dir(uint64_t apid, uid_t uid, gid_t gid) return SLURM_ERROR; } - rc = chown(apid_dir, uid, gid); + rc = lchown(apid_dir, uid, gid); if (rc) { CRAY_ERR("chown %s, %d, %d failed: %m", apid_dir, (int)uid, (int)gid); @@ -116,7 +115,7 @@ int remove_spool_files(uint64_t apid) // Remove the apid directory LEGACY_SPOOL_DIR/ path_name = xstrdup_printf(LEGACY_SPOOL_DIR "%" PRIu64, apid); - _recursive_rmdir(path_name); + rmdir_recursive(path_name, true); xfree(path_name); // Remove the backwards compatibility ALPS placement file @@ -299,75 +298,6 @@ int list_str_to_array(char *list, int *cnt, int32_t **numbers) return ret; } -/* - * Recursive directory delete - * - * Call with a directory name and this function will delete - * all files and directories rooted in this name. Finally - * the named directory will be deleted. - * If called with a file name, only that file will be deleted. - */ -static void _recursive_rmdir(const char *dirnm) -{ - int st; - size_t dirnm_len, fnm_len, name_len; - char *fnm = 0; - DIR *dirp; - struct dirent *dir; - struct stat st_buf; - - /* Don't do anything if there is no directory name */ - if (!dirnm) { - return; - } - dirp = opendir(dirnm); - if (!dirp) { - if (errno == ENOTDIR) - goto fileDel; - CRAY_ERR("Error opening directory %s", dirnm); - return; - } - - dirnm_len = strlen(dirnm); - if (dirnm_len == 0) - return; - while ((dir = readdir(dirp))) { - name_len = strlen(dir->d_name); - if (name_len == 1 && dir->d_name[0] == '.') - continue; - if (name_len == 2 && xstrcmp(dir->d_name, "..") == 0) - continue; - fnm_len = dirnm_len + name_len + 2; - free(fnm); - fnm = malloc(fnm_len); - snprintf(fnm, fnm_len, "%s/%s", dirnm, dir->d_name); - st = stat(fnm, &st_buf); - if (st < 0) { - CRAY_ERR("stat of %s", fnm); - continue; - } - if (st_buf.st_mode & S_IFDIR) { - _recursive_rmdir(fnm); - } else { - - st = unlink(fnm); - if (st < 0 && errno == EISDIR) - st = rmdir(fnm); - if (st < 0 && errno != ENOENT) { - CRAY_ERR("Error removing %s", fnm); - } - } - } - free(fnm); - closedir(dirp); -fileDel: st = unlink(dirnm); - if (st < 0 && errno == EISDIR) - st = rmdir(dirnm); - if (st < 0 && errno != ENOENT) { - CRAY_ERR("Error removing %s", dirnm); - } -} - void print_jobinfo(slurm_cray_jobinfo_t *job) { int i; diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index eb1acbad663..c3f9bdde36c 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -126,14 +126,6 @@ #define RETRY_DELAY 15 /* retry every 15 seconds */ #define MAX_RETRY 240 /* retry 240 times (one hour max) */ -struct priv_state { - uid_t saved_uid; - gid_t saved_gid; - gid_t * gid_list; - int ngids; - char saved_cwd [4096]; -}; - step_complete_t step_complete = { PTHREAD_COND_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, @@ -172,9 +164,6 @@ static int _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized); static int _become_user(stepd_step_rec_t *job, struct priv_state *ps); static void _set_prio_process (stepd_step_rec_t *job); static int _setup_normal_io(stepd_step_rec_t *job); -static int _drop_privileges(stepd_step_rec_t *job, bool do_setuid, - struct priv_state *state, bool get_list); -static int _reclaim_privileges(struct priv_state *state); static void _send_launch_resp(stepd_step_rec_t *job, int rc); static int _slurmd_job_log_init(stepd_step_rec_t *job); static void _wait_for_io(stepd_step_rec_t *job); @@ -458,7 +447,7 @@ _setup_normal_io(stepd_step_rec_t *job) * descriptors (which may be connected to files), then * reclaim privileges. */ - if (_drop_privileges(job, true, &sprivs, true) < 0) + if (drop_privileges(job, true, &sprivs, true) < 0) return ESLURMD_SET_UID_OR_GID_ERROR; if (io_init_tasks_stdio(job) != SLURM_SUCCESS) { @@ -572,7 +561,7 @@ _setup_normal_io(stepd_step_rec_t *job) } claim: - if (_reclaim_privileges(&sprivs) < 0) { + if (reclaim_privileges(&sprivs) < 0) { error("sete{u/g}id(%lu/%lu): %m", (u_long) sprivs.saved_uid, (u_long) sprivs.saved_gid); } @@ -927,7 +916,7 @@ static void _shutdown_x11_forward(stepd_step_rec_t *job) { struct priv_state sprivs = { 0 }; - if (_drop_privileges(job, true, &sprivs, false) < 0) { + if (drop_privileges(job, true, &sprivs, false) < 0) { error("%s: Unable to drop privileges", __func__); return; } @@ -935,7 +924,7 @@ static void _shutdown_x11_forward(stepd_step_rec_t *job) if (shutdown_x11_forward(job) != SLURM_SUCCESS) error("%s: x11 forward shutdown failed", __func__); - if (_reclaim_privileges(&sprivs) < 0) + if (reclaim_privileges(&sprivs) < 0) error("%s: Unable to reclaim privileges", __func__); } @@ -996,7 +985,7 @@ static int _set_xauthority(stepd_step_rec_t *job) { struct priv_state sprivs = { 0 }; - if (_drop_privileges(job, true, &sprivs, false) < 0) { + if (drop_privileges(job, true, &sprivs, false) < 0) { error("%s: Unable to drop privileges before xauth", __func__); return SLURM_ERROR; } @@ -1007,7 +996,7 @@ static int _set_xauthority(stepd_step_rec_t *job) return SLURM_ERROR; } - if (_reclaim_privileges(&sprivs) < 0) { + if (reclaim_privileges(&sprivs) < 0) { error("%s: Unable to reclaim privileges after xauth", __func__); return SLURM_ERROR; } @@ -1058,7 +1047,7 @@ static int _spawn_job_container(stepd_step_rec_t *job) if (job->x11) { struct priv_state sprivs = { 0 }; - if (_drop_privileges(job, true, &sprivs, false) < 0) { + if (drop_privileges(job, true, &sprivs, false) < 0) { error ("Unable to drop privileges"); return SLURM_ERROR; } @@ -1067,7 +1056,7 @@ static int _spawn_job_container(stepd_step_rec_t *job) error("x11 port forwarding setup failed"); _exit(127); } - if (_reclaim_privileges(&sprivs) < 0) { + if (reclaim_privileges(&sprivs) < 0) { error ("Unable to reclaim privileges"); return SLURM_ERROR; } @@ -1558,7 +1547,7 @@ static int _pre_task_child_privileged( int setwd = 0; /* set working dir */ int rc = 0; - if (_reclaim_privileges(sp) < 0) + if (reclaim_privileges(sp) < 0) return SLURM_ERROR; set_oom_adj(0); /* the tasks may be killed by OOM */ @@ -1585,9 +1574,9 @@ static int _pre_task_child_privileged( return error("spank_task_init_privileged failed"); /* sp->gid_list should already be initialized */ - rc = _drop_privileges(job, true, sp, false); + rc = drop_privileges(job, true, sp, false); if (rc) { - error ("_drop_privileges: %m"); + error ("drop_privileges: %m"); return rc; } @@ -1825,7 +1814,7 @@ _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized) * Temporarily drop effective privileges, except for the euid. * We need to wait until after pam_setup() to drop euid. */ - if (_drop_privileges (job, false, &sprivs, true) < 0) + if (drop_privileges (job, false, &sprivs, true) < 0) return ESLURMD_SET_UID_OR_GID_ERROR; if (pam_setup(job->user_name, conf->hostname) @@ -1837,7 +1826,7 @@ _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized) /* * Reclaim privileges to do the io setup */ - _reclaim_privileges(&sprivs); + reclaim_privileges(&sprivs); if (rc) goto fail1; /* pam_setup error */ @@ -1885,7 +1874,7 @@ _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized) /* * Temporarily drop effective privileges */ - if (_drop_privileges (job, true, &sprivs, true) < 0) { + if (drop_privileges (job, true, &sprivs, true) < 0) { error ("_drop_privileges: %m"); rc = SLURM_ERROR; goto fail2; @@ -1943,7 +1932,7 @@ _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized) * Reclaim privileges for the child and call any plugin * hooks that may require elevated privs * sprivs.gid_list is already set from the - * _drop_privileges call above, no not reinitialize. + * drop_privileges call above, no not reinitialize. * NOTE: Only put things in here that are self contained * and belong in the child. */ @@ -2008,7 +1997,7 @@ _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized) /* * Reclaim privileges */ - if (_reclaim_privileges(&sprivs) < 0) { + if (reclaim_privileges(&sprivs) < 0) { error ("Unable to reclaim privileges"); /* Don't bother erroring out here */ } @@ -2119,7 +2108,7 @@ _fork_all_tasks(stepd_step_rec_t *job, bool *io_initialized) error ("Unable to return to working directory"); } fail3: - _reclaim_privileges (&sprivs); + reclaim_privileges (&sprivs); fail2: FREE_NULL_LIST(exec_wait_list); io_close_task_fds(job); @@ -2684,9 +2673,8 @@ _send_complete_batch_script_msg(stepd_step_rec_t *job, int err, int status) /* If get_list is false make sure ps->gid_list is initialized before * hand to prevent xfree. */ -static int -_drop_privileges(stepd_step_rec_t *job, bool do_setuid, - struct priv_state *ps, bool get_list) +extern int drop_privileges(stepd_step_rec_t *job, bool do_setuid, + struct priv_state *ps, bool get_list) { ps->saved_uid = getuid(); ps->saved_gid = getgid(); @@ -2736,8 +2724,7 @@ _drop_privileges(stepd_step_rec_t *job, bool do_setuid, return SLURM_SUCCESS; } -static int -_reclaim_privileges(struct priv_state *ps) +extern int reclaim_privileges(struct priv_state *ps) { int rc = SLURM_SUCCESS; @@ -2999,7 +2986,7 @@ _run_script_as_user(const char *name, const char *path, stepd_step_rec_t *job, #endif sprivs.gid_list = NULL; /* initialize to prevent xfree */ - if (_drop_privileges(job, true, &sprivs, false) < 0) { + if (drop_privileges(job, true, &sprivs, false) < 0) { error("run_script_as_user _drop_privileges: %m"); /* child process, should not return */ exit(127); diff --git a/src/slurmd/slurmstepd/mgr.h b/src/slurmd/slurmstepd/mgr.h index ebac81c1a01..2ce8eb9e778 100644 --- a/src/slurmd/slurmstepd/mgr.h +++ b/src/slurmd/slurmstepd/mgr.h @@ -85,4 +85,16 @@ int job_manager(stepd_step_rec_t *job); extern void init_initgroups(int); +struct priv_state { + uid_t saved_uid; + gid_t saved_gid; + gid_t *gid_list; + int ngids; + char saved_cwd[4096]; +}; + +extern int drop_privileges(stepd_step_rec_t *step, bool do_setuid, + struct priv_state *state, bool get_list); +extern int reclaim_privileges(struct priv_state *state); + #endif diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c index 27f9e105226..90bc1b9a1d0 100644 --- a/src/slurmd/slurmstepd/slurmstepd.c +++ b/src/slurmd/slurmstepd/slurmstepd.c @@ -747,7 +747,18 @@ _step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg) } if (job->container) { - int rc = setup_container(job); + struct priv_state sprivs; + int rc; + + if (drop_privileges(job, false, &sprivs, true) < 0) { + error("%s: drop_priviledges failed", __func__); + return NULL; + } + rc = setup_container(job); + if (reclaim_privileges(&sprivs) < 0) { + error("%s: reclaim_priviledges failed", __func__); + return NULL; + } if (rc == ESLURM_CONTAINER_NOT_CONFIGURED) { debug2("%s: container %s requested but containers are not configured on this node",