Skip to content

Commit

Permalink
ddt: dedup table quota enforcement
Browse files Browse the repository at this point in the history
This adds two new pool properties:
- dedup_table_size, the total size of all DDTs on the pool; and
- dedup_table_quota, the maximum possible size of all DDTs in the pool

When set, quota will be enforced by checking when a new entry is about
to be created. If the pool is over its dedup quota, the entry won't be
created, and the corresponding write will be converted to a regular
non-dedup write. Note that existing entries can be updated (ie their
refcounts changed), as that reuses the space rather than requiring more.

dedup_table_quota can be set to 'auto', which will set it based on the
size of the devices backing the "dedup" allocation device. This makes it
possible to limit the DDTs to the size of a dedup vdev only, such that
when the device fills, no new blocks are deduplicated.

Sponsored-by: iXsystems, Inc.
Sponsored-By: Klara Inc.
Co-authored-by: Rob Wing <[email protected]>
Co-authored-by: Sean Eric Fagan <[email protected]>
Co-authored-by: Allan Jude <[email protected]>
Signed-off-by: Rob Norris <[email protected]>
Signed-off-by: Don Brady <[email protected]>
  • Loading branch information
4 people committed Feb 14, 2024
1 parent a076382 commit bc82d39
Show file tree
Hide file tree
Showing 20 changed files with 567 additions and 22 deletions.
19 changes: 13 additions & 6 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -8501,17 +8501,24 @@ print_time(hrtime_t t, char *timebuf)
}

static nvlist_t *
make_random_props(void)
make_random_pool_props(void)
{
nvlist_t *props;

props = fnvlist_alloc();

if (ztest_random(2) == 0)
return (props);
/* Twenty percent of the time enable ZPOOL_PROP_DEDUP_TABLE_QUOTA */
if (ztest_random(5) == 0) {
fnvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_DEDUP_TABLE_QUOTA),
2 * 1024 * 1024);
}

fnvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1);
/* Fifty percent of the time enable ZPOOL_PROP_AUTOREPLACE */
if (ztest_random(2) == 0) {
fnvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1);
}

return (props);
}
Expand Down Expand Up @@ -8543,7 +8550,7 @@ ztest_init(ztest_shared_t *zs)
zs->zs_mirrors = ztest_opts.zo_mirrors;
nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
NULL, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
props = make_random_props();
props = make_random_pool_props();

/*
* We don't expect the pool to suspend unless maxfaults == 0,
Expand Down
21 changes: 20 additions & 1 deletion include/sys/ddt.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,22 @@ extern "C" {
* object and (if necessary), removed from an old one. ddt_tree is cleared and
* the next txg can start.
*
* ## Dedup quota
*
* A maximum size for all DDTs on the pool can be set with the
* dedup_table_quota property. This is determined in ddt_over_quota() and
* enforced during ddt_lookup(). If the pool is at or over its quota limit,
* ddt_lookup() will only return entries for existing blocks, as updates are
* still possible. New entries will not be created; instead, ddt_lookup() will
* return NULL. In response, the DDT write stage (zio_ddt_write()) will remove
* the D bit on the block and reissue the IO as a regular write. The block will
* not be deduplicated.
*
* Note that this is based on the on-disk size of the dedup store. Reclaiming
* this space after deleting entries relies on the ZAP "shrinking" behaviour,
* without which, no space would be recovered and the DDT would continue to be
* considered "over quota". See zap_shrink_enabled.
*
* ## Repair IO
*
* If a read on a dedup block fails, but there are other copies of the block in
Expand Down Expand Up @@ -246,7 +262,8 @@ enum ddt_phys_type {
*/

/* State flags for dde_flags */
#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */
#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */
#define DDE_FLAG_OVERQUOTA (1 << 1) /* entry unusable, no space */

typedef struct {
/* key must be first for ddt_key_compare */
Expand All @@ -265,6 +282,7 @@ typedef struct {

uint8_t dde_flags; /* load state flags */
kcondvar_t dde_cv; /* signaled when load completes */
uint64_t dde_waiters; /* count of waiters on dde_cv */

avl_node_t dde_node; /* ddt_tree node */
} ddt_entry_t;
Expand Down Expand Up @@ -323,6 +341,7 @@ extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
extern uint64_t ddt_get_ddt_dsize(spa_t *spa);
extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);

Expand Down
2 changes: 2 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ typedef enum {
ZPOOL_PROP_BCLONEUSED,
ZPOOL_PROP_BCLONESAVED,
ZPOOL_PROP_BCLONERATIO,
ZPOOL_PROP_DEDUP_TABLE_SIZE,
ZPOOL_PROP_DEDUP_TABLE_QUOTA,
ZPOOL_NUM_PROPS
} zpool_prop_t;

Expand Down
3 changes: 3 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,9 @@ struct spa {
boolean_t spa_waiters_cancel; /* waiters should return */

char *spa_compatibility; /* compatibility file(s) */
uint64_t spa_dedup_table_quota; /* property DDT maximum size */
uint64_t spa_ddt_dsize; /* cached on-disk size of DDT */
uint64_t spa_dedup_class_full_txg; /* txg dedup class was full */

/*
* spa_refcount & spa_config_lock must be the last elements
Expand Down
4 changes: 3 additions & 1 deletion lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -2891,7 +2891,9 @@
<enumerator name='ZPOOL_PROP_BCLONEUSED' value='33'/>
<enumerator name='ZPOOL_PROP_BCLONESAVED' value='34'/>
<enumerator name='ZPOOL_PROP_BCLONERATIO' value='35'/>
<enumerator name='ZPOOL_NUM_PROPS' value='36'/>
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_SIZE' value='36'/>
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_QUOTA' value='37'/>
<enumerator name='ZPOOL_NUM_PROPS' value='38'/>
</enum-decl>
<typedef-decl name='zpool_prop_t' type-id='af1ba157' id='5d0c23fb'/>
<typedef-decl name='regoff_t' type-id='95e97e5e' id='54a2a2a8'/>
Expand Down
19 changes: 19 additions & 0 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,24 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
intval = zpool_get_prop_int(zhp, prop, &src);

switch (prop) {
case ZPOOL_PROP_DEDUP_TABLE_QUOTA:
/*
* If dedup quota is 0, we translate this into 'none'
* (unless literal is set). And if it is UINT64_MAX
* we translate that as 'automatic' (limit to size of
* the dedicated dedup VDEV. Otherwise, fall throught
* into the regular number formating.
*/
if (intval == 0) {
(void) strlcpy(buf, literal ? "0" : "none",
len);
break;
} else if (intval == UINT64_MAX) {
(void) strlcpy(buf, "auto", len);
break;
}
zfs_fallthrough;

case ZPOOL_PROP_SIZE:
case ZPOOL_PROP_ALLOCATED:
case ZPOOL_PROP_FREE:
Expand All @@ -342,6 +360,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
case ZPOOL_PROP_MAXDNODESIZE:
case ZPOOL_PROP_BCLONESAVED:
case ZPOOL_PROP_BCLONEUSED:
case ZPOOL_PROP_DEDUP_TABLE_SIZE:
if (literal)
(void) snprintf(buf, len, "%llu",
(u_longlong_t)intval);
Expand Down
14 changes: 14 additions & 0 deletions lib/libzfs/libzfs_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -1685,6 +1685,16 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
"use 'none' to disable quota/refquota"));
goto error;
}
/*
* Pool dedup table quota; force use of 'none' instead of 0
*/
if ((type & ZFS_TYPE_POOL) && *ivalp == 0 &&
(!isnone && !isauto) &&
prop == ZPOOL_PROP_DEDUP_TABLE_QUOTA) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"use 'none' to disable ddt table quota"));
goto error;
}

/*
* Special handling for "*_limit=none". In this case it's not
Expand Down Expand Up @@ -1726,6 +1736,10 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
}
*ivalp = UINT64_MAX;
break;
case ZPOOL_PROP_DEDUP_TABLE_QUOTA:
ASSERT(type & ZFS_TYPE_POOL);
*ivalp = UINT64_MAX;
break;
default:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"'auto' is invalid value for '%s'"),
Expand Down
18 changes: 17 additions & 1 deletion man/man7/zpoolprops.7
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
.\" Copyright (c) 2021, Colm Buckley <[email protected]>
.\" Copyright (c) 2023, Klara Inc.
.\"
.Dd January 2, 2024
.Dd January 14, 2024
.Dt ZPOOLPROPS 7
.Os
.
Expand Down Expand Up @@ -73,6 +73,8 @@ The amount of storage used by cloned blocks.
Percentage of pool space used.
This property can also be referred to by its shortened column name,
.Sy cap .
.It Sy dedup_table_size
Total on-disk size of the deduplication table.
.It Sy expandsize
Amount of uninitialized space within the pool or device that can be used to
increase the total capacity of the pool.
Expand Down Expand Up @@ -348,6 +350,20 @@ See
and
.Xr zpool-upgrade 8
for more information on the operation of compatibility feature sets.
.It Sy dedup_table_quota Ns = Ns Ar number Ns | Ns Sy none Ns | Ns Sy auto
This property sets a limit on the on-disk size of the pool's dedup table.
Entries will not be added to the dedup table once this size is reached;
if a dedup table already exists, and is larger than this size, they
will not be removed as part of setting this property.
Existing entries will still have their reference counts updated.
The actual size limit of the table may be above or below the quota,
depending on the actual on-disk size of the entries (which may be
approximated for purposes of calculating the quota). That is, setting a
quota size of 1M may result in the maximum size being slightly below,
or slightly above, that value.
Set to 'none' to disable.
In automatic mode, the size of a dedicated dedup vdev is used as the quota
limit.
.It Sy dedupditto Ns = Ns Ar number
This property is deprecated and no longer has any effect.
.It Sy delegation Ns = Ns Sy on Ns | Ns Sy off
Expand Down
8 changes: 7 additions & 1 deletion module/zcommon/zpool_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2021, Colm Buckley <[email protected]>
* Copyright (c) 2021, Klara Inc.
* Copyright (c) 2021, 2023, Klara Inc.
*/

#include <sys/zio.h>
Expand Down Expand Up @@ -125,6 +125,9 @@ zpool_prop_init(void)
zprop_register_number(ZPOOL_PROP_BCLONERATIO, "bcloneratio", 0,
PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if cloned>",
"BCLONE_RATIO", B_FALSE, sfeatures);
zprop_register_number(ZPOOL_PROP_DEDUP_TABLE_SIZE, "dedup_table_size",
0, PROP_READONLY, ZFS_TYPE_POOL, "<size>", "DDTSIZE", B_FALSE,
sfeatures);

/* default number properties */
zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
Expand All @@ -133,6 +136,9 @@ zpool_prop_init(void)
zprop_register_number(ZPOOL_PROP_ASHIFT, "ashift", 0, PROP_DEFAULT,
ZFS_TYPE_POOL, "<ashift, 9-16, or 0=default>", "ASHIFT", B_FALSE,
sfeatures);
zprop_register_number(ZPOOL_PROP_DEDUP_TABLE_QUOTA, "dedup_table_quota",
0, PROP_DEFAULT, ZFS_TYPE_POOL, "<size>", "DDTQUOTA", B_FALSE,
sfeatures);

/* default index (boolean) properties */
zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1,
Expand Down
Loading

0 comments on commit bc82d39

Please sign in to comment.