From 0d654c32d61057135eba85f39024da7c1f179b3b Mon Sep 17 00:00:00 2001 From: "Charles L. Hedrick" Date: Fri, 29 Nov 2024 10:26:45 -0500 Subject: [PATCH] Add module parameter to disable prefetch in zfs_readdir Add paramter zfs_readdir_dnode_prefetch_limit, defaulting to 0, to control whether zfs_readdir prefetched metadata for objects it look at when reading a directory. If zero, metadata is prefetched for all directory entries. If non-zero, metadata is prefetched only if directory has fewer entries than this. Setting it to non-0 can be important for NFS servers with directories containing many subdirectories. Signed-off-by: Charles Hedrick Co-authored-by: Chris Siebenmann --- man/man4/zfs.4 | 21 +++++++++++++++++++++ module/os/linux/zfs/zfs_vnops_os.c | 9 +++++++++ 2 files changed, 30 insertions(+) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index da027798f96..81a26284a50 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1958,6 +1958,27 @@ Historical statistics for this many latest reads will be available in .It Sy zfs_read_history_hits Ns = Ns Sy 0 Ns | Ns 1 Pq int Include cache hits in read history . +.It Sy zfs_readdir_dnode_prefetch_limit Ns = Ns Sy 0 Pq u64 +Disable prefetches in readdir for large directories. +When readdir searches a directory, it normally prefetches metadata for +all objects in the directory it checks, even if it's just +looking for a single object. +Setting this to a non-zero value disables that prefetching for directories +with a greater size than that value. +Disabling prefetch for large directories can greatly lower CPU usage on NFS servers +where directories have a very large number of subdirectories. +Consider setting this parameter if your primary access is via NFS, you have +unusually high CPU used by nfsd processes, and arcstat shows very high metadata +read rates compared with other activity. +Directory size in this case is the size returned from calling +.Sy stat +on the directory (stat.st_size). +On ZFS, this directory size value is approximately the number of files +and subdirectories in the directory. +A reasonable value would be 20000. +A zero value (the default) means no limit on directory metadata prefetching. +This parameter only applies on Linux. +. .It Sy zfs_rebuild_max_segment Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64 Maximum read segment size to issue when sequentially resilvering a top-level vdev. diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index dd9fd760b9c..825420a0ac5 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -1546,6 +1546,7 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr, * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, * we use the offset 2 for the '.zfs' directory. */ +static ulong_t zfs_readdir_dnode_prefetch_limit = 0UL; int zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr) { @@ -1579,6 +1580,9 @@ zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr) os = zfsvfs->z_os; offset = ctx->pos; prefetch = zp->z_zn_prefetch; + if (zfs_readdir_dnode_prefetch_limit && + zp->z_size > zfs_readdir_dnode_prefetch_limit) + prefetch = B_FALSE; zap = zap_attribute_long_alloc(); /* @@ -4348,4 +4352,9 @@ EXPORT_SYMBOL(zfs_map); /* CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); + +/* CSTYLED */ +module_param(zfs_readdir_dnode_prefetch_limit, ulong, 0644); +MODULE_PARM_DESC(zfs_readdir_dnode_prefetch_limit, + "No zfs_readdir prefetch if non-zero and size > this"); #endif