Skip to content

Commit

Permalink
Several improvements to ARC shrinking
Browse files Browse the repository at this point in the history
 - When receiving memory pressure signal from OS be more strict
trying to free some memory.  Otherwise kernel may come again and
request much more.  Return as result how much arc_c was actually
reduced due to this request, that may be less than requested.
 - Add new module parameter zfs_arc_shrinker_seeks to balance ARC
eviction cost, relative to page cache and other subsystems.
 - Slightly update Linux arc_set_sys_free() math.

Signed-off-by:	Alexander Motin <[email protected]>
Sponsored by:	iXsystems, Inc.
  • Loading branch information
amotin committed May 14, 2024
1 parent abec7dc commit e2b786d
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 26 deletions.
4 changes: 2 additions & 2 deletions include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1058,10 +1058,10 @@ extern uint_t arc_lotsfree_percent;
extern uint64_t zfs_arc_min;
extern uint64_t zfs_arc_max;

extern void arc_reduce_target_size(int64_t to_free);
extern uint64_t arc_reduce_target_size(uint64_t to_free);
extern boolean_t arc_reclaim_needed(void);
extern void arc_kmem_reap_soon(void);
extern void arc_wait_for_eviction(uint64_t, boolean_t);
extern void arc_wait_for_eviction(uint64_t, boolean_t, boolean_t);

extern void arc_lowmem_init(void);
extern void arc_lowmem_fini(void);
Expand Down
7 changes: 7 additions & 0 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,13 @@ even with a small average compressed block size of ~8 KiB.
The parameter can be set to 0 (zero) to disable the limit,
and only applies on Linux.
.
.It Sy zfs_arc_shrinker_seeks Ns = Ns Sy 2 Pq int
Relative cost of ARC eviction on Linux, AKA number of seeks needed to
restore evicted page.
Bigger values makes ARC more precious and evictions smaller, comparing to
other kernel subsystems.
Value of 4 means parity with page cache.
.
.It Sy zfs_arc_sys_free Ns = Ns Sy 0 Ns B Pq u64
The target number of bytes the ARC should leave as free memory on the system.
If zero, equivalent to the bigger of
Expand Down
4 changes: 2 additions & 2 deletions module/os/freebsd/zfs/arc_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,15 @@ arc_lowmem(void *arg __unused, int howto __unused)
return;
to_free = (can_free >> arc_shrink_shift) - MIN(free_memory, 0);
DTRACE_PROBE2(arc__needfree, int64_t, free_memory, int64_t, to_free);
arc_reduce_target_size(to_free);
to_free = arc_reduce_target_size(to_free);

/*
* It is unsafe to block here in arbitrary threads, because we can come
* here from ARC itself and may hold ARC locks and thus risk a deadlock
* with ARC reclaim thread.
*/
if (curproc == pageproc)
arc_wait_for_eviction(to_free, B_FALSE);
arc_wait_for_eviction(to_free, B_FALSE, B_FALSE);
}

void
Expand Down
34 changes: 25 additions & 9 deletions module/os/linux/zfs/arc_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include <linux/page_compat.h>
#include <linux/notifier.h>
#include <linux/memory.h>
#include <linux/version.h>
#endif
#include <sys/callb.h>
#include <sys/kstat.h>
Expand All @@ -58,6 +59,7 @@
#include <sys/trace_zfs.h>
#include <sys/aggsum.h>

#ifdef _KERNEL
/*
* This is a limit on how many pages the ARC shrinker makes available for
* eviction in response to one page allocation attempt. Note that in
Expand All @@ -72,11 +74,19 @@
* See also the comment in arc_shrinker_count().
* Set to 0 to disable limit.
*/
int zfs_arc_shrinker_limit = 10000;
static int zfs_arc_shrinker_limit = 10000;

/*
* Relative cost of ARC eviction, AKA number of seeks needed to restore evicted
* page. Bigger values makes ARC more precious and evictions smaller comparing
* to other kernel subsystems. Value of 4 means parity with page cache.
*/
static int zfs_arc_shrinker_seeks = DEFAULT_SEEKS;

#ifdef CONFIG_MEMORY_HOTPLUG
static struct notifier_block arc_hotplug_callback_mem_nb;
#endif
#endif

/*
* Return a default max arc size based on the amount of physical memory.
Expand Down Expand Up @@ -222,13 +232,13 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
* Evict the requested number of pages by reducing arc_c and waiting
* for the requested amount of data to be evicted.
*/
arc_reduce_target_size(ptob(sc->nr_to_scan));
arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
uint64_t to_free = arc_reduce_target_size(ptob(sc->nr_to_scan));
arc_wait_for_eviction(to_free, B_FALSE, B_FALSE);
if (current->reclaim_state != NULL)
#ifdef HAVE_RECLAIM_STATE_RECLAIMED
current->reclaim_state->reclaimed += sc->nr_to_scan;
current->reclaim_state->reclaimed += btop(to_free);
#else
current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
current->reclaim_state->reclaimed_slab += btop(to_free);
#endif

/*
Expand All @@ -250,7 +260,7 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
ARCSTAT_BUMP(arcstat_memory_direct_count);
}

return (sc->nr_to_scan);
return (btop(to_free));
}

static struct shrinker *arc_shrinker = NULL;
Expand Down Expand Up @@ -325,13 +335,17 @@ arc_set_sys_free(uint64_t allmem)
/*
* Base wmark_low is 4 * the square root of Kbytes of RAM.
*/
long wmark = 4 * int_sqrt(allmem/1024) * 1024;
long wmark = int_sqrt(allmem / 1024 * 16) * 1024;

/*
* Clamp to between 128K and 64MB.
* Clamp to between 128K and 256/64MB.
*/
wmark = MAX(wmark, 128 * 1024);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)
wmark = MIN(wmark, 256 * 1024 * 1024);
#else
wmark = MIN(wmark, 64 * 1024 * 1024);
#endif

/*
* watermark_boost can increase the wmark by up to 150%.
Expand All @@ -357,7 +371,7 @@ arc_lowmem_init(void)
* swapping out pages when it is preferable to shrink the arc.
*/
arc_shrinker = spl_register_shrinker("zfs-arc-shrinker",
arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS);
arc_shrinker_count, arc_shrinker_scan, zfs_arc_shrinker_seeks);
VERIFY(arc_shrinker);

arc_set_sys_free(allmem);
Expand Down Expand Up @@ -500,3 +514,5 @@ arc_unregister_hotplug(void)

ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
"Limit on number of pages that ARC shrinker can reclaim at once");
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_seeks, INT, ZMOD_RW,
"Relative cost of ARC eviction vs other kernel subsystems");
36 changes: 23 additions & 13 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -4398,13 +4398,19 @@ arc_flush(spa_t *spa, boolean_t retry)
(void) arc_flush_state(arc_uncached, guid, ARC_BUFC_METADATA, retry);
}

void
arc_reduce_target_size(int64_t to_free)
uint64_t
arc_reduce_target_size(uint64_t to_free)
{
uint64_t c = arc_c;

/*
* Get the actual arc size even if we don't need it. This updates
* the aggsum lower bound estimate for arc_is_overflowing().
*/
uint64_t asize = aggsum_value(&arc_sums.arcstat_size);

if (c <= arc_c_min)
return;
return (0);

/*
* All callers want the ARC to actually evict (at least) this much
Expand All @@ -4414,16 +4420,17 @@ arc_reduce_target_size(int64_t to_free)
* immediately have arc_c < arc_size and therefore the arc_evict_zthr
* will evict.
*/
uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
if (asize < c)
to_free += c - asize;
arc_c = MAX((int64_t)c - to_free, (int64_t)arc_c_min);
c = MIN(c, MAX(asize, arc_c_min));
to_free = MIN(to_free, c - arc_c_min);
arc_c = c - to_free;

/* See comment in arc_evict_cb_check() on why lock+flag */
mutex_enter(&arc_evict_lock);
arc_evict_needed = B_TRUE;
mutex_exit(&arc_evict_lock);
zthr_wakeup(arc_evict_zthr);

return (to_free);
}

/*
Expand Down Expand Up @@ -4816,7 +4823,7 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, const void *tag)
* of ARC behavior and settings. See arc_lowmem_init().
*/
void
arc_wait_for_eviction(uint64_t amount, boolean_t use_reserve)
arc_wait_for_eviction(uint64_t amount, boolean_t lax, boolean_t use_reserve)
{
switch (arc_is_overflowing(use_reserve)) {
case ARC_OVF_NONE:
Expand All @@ -4832,11 +4839,14 @@ arc_wait_for_eviction(uint64_t amount, boolean_t use_reserve)
* taking the global lock here every time without waiting for
* the actual eviction creates a significant lock contention.
*/
if (!arc_evict_needed) {
arc_evict_needed = B_TRUE;
zthr_wakeup(arc_evict_zthr);
if (lax) {
if (!arc_evict_needed) {
arc_evict_needed = B_TRUE;
zthr_wakeup(arc_evict_zthr);
}
return;
}
return;
zfs_fallthrough;
case ARC_OVF_SEVERE:
default:
{
Expand Down Expand Up @@ -4913,7 +4923,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, const void *tag,
* under arc_c. See the comment above zfs_arc_eviction_pct.
*/
arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100,
alloc_flags & ARC_HDR_USE_RESERVE);
B_TRUE, alloc_flags & ARC_HDR_USE_RESERVE);

arc_buf_contents_t type = arc_buf_type(hdr);
if (type == ARC_BUFC_METADATA) {
Expand Down

0 comments on commit e2b786d

Please sign in to comment.