Skip to content

Commit

Permalink
Better control the thread pool size when mounting datasets
Browse files Browse the repository at this point in the history
Ever since a10d50f, ZFS has mounted file systems in parallel when
importing a pool.  It uses a fixed size of 512 for the thread pool.  But
since c183d16, it has also imported pools in parallel.  So the total
number of threads at one time is 513 * npools + 1.  That can easily
exceed the system's limit on the number of threads per process, which
will cause one or more pools to be unable to allocate any worker
threads, forcing them to fallback to slow serial mounting .  To
forestall that, manage the threadpool size in /sbin/zpool, not libzfs.
Use the same size (512), but divided by the number of pools.

This is a backwards-incompatible change to the libzfs abi.

Sponsored by: Axcient
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: George Wilson <[email protected]>
Signed-off-by: Alan Somers <[email protected]>
Closes #16178
  • Loading branch information
asomers authored and behlendorf committed May 14, 2024
1 parent eced2e2 commit b64afa4
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 23 deletions.
2 changes: 1 addition & 1 deletion cmd/zed/agents/zfs_mod.c
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ zfs_enable_ds(void *arg)
{
unavailpool_t *pool = (unavailpool_t *)arg;

(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0, 512);
zpool_close(pool->uap_zhp);
free(pool);
}
Expand Down
6 changes: 4 additions & 2 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -7192,6 +7192,8 @@ share_mount(int op, int argc, char **argv)
int c, ret = 0;
char *options = NULL;
int flags = 0;
const uint_t mount_nthr = 512;
uint_t nthr;

/* check options */
while ((c = getopt(argc, argv, op == OP_MOUNT ? ":aRlvo:Of" : "al"))
Expand Down Expand Up @@ -7310,9 +7312,9 @@ share_mount(int op, int argc, char **argv)
* be serialized so that we can prompt the user for their keys
* in a consistent manner.
*/
nthr = op == OP_MOUNT && !(flags & MS_CRYPT) ? mount_nthr : 1;
zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used,
share_mount_one_cb, &share_mount_state,
op == OP_MOUNT && !(flags & MS_CRYPT));
share_mount_one_cb, &share_mount_state, nthr);
zfs_commit_shares(NULL);

ret = share_mount_state.sm_status;
Expand Down
20 changes: 15 additions & 5 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@

libzfs_handle_t *g_zfs;

static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */

static int zpool_do_create(int, char **);
static int zpool_do_destroy(int, char **);

Expand Down Expand Up @@ -3418,7 +3420,7 @@ zfs_force_import_required(nvlist_t *config)
*/
static int
do_import(nvlist_t *config, const char *newname, const char *mntopts,
nvlist_t *props, int flags)
nvlist_t *props, int flags, uint_t mntthreads)
{
int ret = 0;
int ms_status = 0;
Expand Down Expand Up @@ -3518,7 +3520,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,

if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
!(flags & ZFS_IMPORT_ONLY)) {
ms_status = zpool_enable_datasets(zhp, mntopts, 0);
ms_status = zpool_enable_datasets(zhp, mntopts, 0, mntthreads);
if (ms_status == EZFS_SHAREFAILED) {
(void) fprintf(stderr, gettext("Import was "
"successful, but unable to share some datasets\n"));
Expand All @@ -3537,6 +3539,7 @@ typedef struct import_parameters {
const char *ip_mntopts;
nvlist_t *ip_props;
int ip_flags;
uint_t ip_mntthreads;
int *ip_err;
} import_parameters_t;

Expand All @@ -3545,7 +3548,7 @@ do_import_task(void *arg)
{
import_parameters_t *ip = arg;
*ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts,
ip->ip_props, ip->ip_flags);
ip->ip_props, ip->ip_flags, ip->ip_mntthreads);
free(ip);
}

Expand All @@ -3559,6 +3562,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
uint64_t pool_state;
boolean_t pool_specified = (import->poolname != NULL ||
import->guid != 0);
uint_t npools = 0;


tpool_t *tp = NULL;
Expand All @@ -3576,6 +3580,10 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
int err = 0;
nvpair_t *elem = NULL;
boolean_t first = B_TRUE;
if (!pool_specified && import->do_all) {
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL)
npools++;
}
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {

verify(nvpair_value_nvlist(elem, &config) == 0);
Expand Down Expand Up @@ -3606,6 +3614,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
ip->ip_mntopts = mntopts;
ip->ip_props = props;
ip->ip_flags = flags;
ip->ip_mntthreads = mount_tp_nthr / npools;
ip->ip_err = &err;

(void) tpool_dispatch(tp, do_import_task,
Expand Down Expand Up @@ -3673,7 +3682,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
err = B_TRUE;
} else {
err |= do_import(found_config, new_name,
mntopts, props, flags);
mntopts, props, flags, mount_tp_nthr);
}
}

Expand Down Expand Up @@ -7217,7 +7226,8 @@ zpool_do_split(int argc, char **argv)
}

if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
ms_status = zpool_enable_datasets(zhp, mntopts, 0);
ms_status = zpool_enable_datasets(zhp, mntopts, 0,
mount_tp_nthr);
if (ms_status == EZFS_SHAREFAILED) {
(void) fprintf(stderr, gettext("Split was successful, "
"datasets are mounted but sharing of some datasets "
Expand Down
5 changes: 3 additions & 2 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ typedef struct get_all_cb {
} get_all_cb_t;

_LIBZFS_H void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **,
size_t, zfs_iter_f, void *, boolean_t);
size_t, zfs_iter_f, void *, uint_t);
_LIBZFS_H void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);

/*
Expand Down Expand Up @@ -1004,7 +1004,8 @@ _LIBZFS_H int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *,
* Enable and disable datasets within a pool by mounting/unmounting and
* sharing/unsharing them.
*/
_LIBZFS_H int zpool_enable_datasets(zpool_handle_t *, const char *, int);
_LIBZFS_H int zpool_enable_datasets(zpool_handle_t *, const char *, int,
uint_t);
_LIBZFS_H int zpool_disable_datasets(zpool_handle_t *, boolean_t);
_LIBZFS_H void zpool_disable_datasets_os(zpool_handle_t *, boolean_t);
_LIBZFS_H void zpool_disable_volume_os(const char *);
Expand Down
3 changes: 2 additions & 1 deletion lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -5532,13 +5532,14 @@
<parameter type-id='b59d7dce' name='num_handles'/>
<parameter type-id='d8e49ab9' name='func'/>
<parameter type-id='eaa32e2f' name='data'/>
<parameter type-id='c19b74c3' name='parallel'/>
<parameter type-id='3502e3ff' name='nthr'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='zpool_enable_datasets' mangled-name='zpool_enable_datasets' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_enable_datasets'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='80f4b756' name='mntopts'/>
<parameter type-id='95e97e5e' name='flags'/>
<parameter type-id='3502e3ff' name='nthr'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_disable_datasets' mangled-name='zpool_disable_datasets' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_disable_datasets'>
Expand Down
25 changes: 13 additions & 12 deletions lib/libzfs/libzfs_mount.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@
#include <sys/systeminfo.h>
#define MAXISALEN 257 /* based on sysinfo(2) man page */

static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */

static void zfs_mount_task(void *);

static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = {
Expand Down Expand Up @@ -1205,19 +1203,20 @@ zfs_mount_task(void *arg)
*
* Callbacks are issued in one of two ways:
*
* 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT
* 1. Sequentially: If the nthr argument is <= 1 or the ZFS_SERIAL_MOUNT
* environment variable is set, then we issue callbacks sequentially.
*
* 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT
* 2. In parallel: If the nthr argument is > 1 and the ZFS_SERIAL_MOUNT
* environment variable is not set, then we use a tpool to dispatch threads
* to mount filesystems in parallel. This function dispatches tasks to mount
* the filesystems at the top-level mountpoints, and these tasks in turn
* are responsible for recursively mounting filesystems in their children
* mountpoints.
* mountpoints. The value of the nthr argument will be the number of worker
* threads for the thread pool.
*/
void
zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel)
size_t num_handles, zfs_iter_f func, void *data, uint_t nthr)
{
zoneid_t zoneid = getzoneid();

Expand All @@ -1226,7 +1225,7 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
* variable that can be used as a convenience to do a/b comparison
* of serial vs. parallel mounting.
*/
boolean_t serial_mount = !parallel ||
boolean_t serial_mount = nthr <= 1 ||
(getenv("ZFS_SERIAL_MOUNT") != NULL);

/*
Expand All @@ -1246,7 +1245,7 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
* Issue the callback function for each dataset using a parallel
* algorithm that uses a thread pool to manage threads.
*/
tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL);
tpool_t *tp = tpool_create(1, nthr, 0, NULL);

/*
* There may be multiple "top level" mountpoints outside of the pool's
Expand All @@ -1273,10 +1272,12 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,

/*
* Mount and share all datasets within the given pool. This assumes that no
* datasets within the pool are currently mounted.
* datasets within the pool are currently mounted. nthr will be number of
* worker threads to use while mounting datasets.
*/
int
zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags,
uint_t nthr)
{
get_all_cb_t cb = { 0 };
mount_state_t ms = { 0 };
Expand All @@ -1302,7 +1303,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
ms.ms_mntopts = mntopts;
ms.ms_mntflags = flags;
zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
zfs_mount_one, &ms, B_TRUE);
zfs_mount_one, &ms, nthr);
if (ms.ms_mntstatus != 0)
ret = EZFS_MOUNTFAILED;

Expand All @@ -1313,7 +1314,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
*/
ms.ms_mntstatus = 0;
zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
zfs_share_one, &ms, B_FALSE);
zfs_share_one, &ms, 1);
if (ms.ms_mntstatus != 0)
ret = EZFS_SHAREFAILED;
else
Expand Down

0 comments on commit b64afa4

Please sign in to comment.