Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase limit of redaction list by using spill block #15018

Merged
merged 6 commits into from
Aug 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -5292,8 +5292,18 @@ dump_one_objset(const char *dsname, void *arg)
avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL;
dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) {
mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj);
if (dbn->dbn_phys.zbm_redaction_obj != 0)
global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS]++;
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
global_feature_count[
SPA_FEATURE_REDACTION_BOOKMARKS]++;
objset_t *mos = os->os_spa->spa_meta_objset;
dnode_t *rl;
VERIFY0(dnode_hold(mos,
dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
if (rl->dn_have_spill) {
global_feature_count[
SPA_FEATURE_REDACTION_LIST_SPILL]++;
}
}
if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)
global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++;
}
Expand Down Expand Up @@ -8060,6 +8070,7 @@ dump_zpool(spa_t *spa)
for (spa_feature_t f = 0; f < SPA_FEATURES; f++)
global_feature_count[f] = UINT64_MAX;
global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0;
global_feature_count[SPA_FEATURE_REDACTION_LIST_SPILL] = 0;
global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0;
global_feature_count[SPA_FEATURE_LIVELIST] = 0;

Expand Down
4 changes: 4 additions & 0 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3978,6 +3978,10 @@ zfs_do_redact(int argc, char **argv)
(void) fprintf(stderr, gettext("potentially invalid redaction "
"snapshot; full dataset names required\n"));
break;
case ESRCH:
(void) fprintf(stderr, gettext("attempted to resume redaction "
" with a mismatched redaction list\n"));
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved
break;
default:
(void) fprintf(stderr, gettext("internal error: %s\n"),
strerror(errno));
Expand Down
1 change: 1 addition & 0 deletions include/sys/dsl_bookmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ typedef struct redaction_list_phys {
typedef struct redaction_list {
dmu_buf_user_t rl_dbu;
redaction_list_phys_t *rl_phys;
dmu_buf_t *rl_bonus;
dmu_buf_t *rl_dbuf;
uint64_t rl_object;
zfs_refcount_t rl_longholds;
Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ typedef enum spa_feature {
SPA_FEATURE_BLAKE3,
SPA_FEATURE_BLOCK_CLONING,
SPA_FEATURE_AVZ_V2,
SPA_FEATURE_REDACTION_LIST_SPILL,
SPA_FEATURES
} spa_feature_t;

Expand Down
9 changes: 5 additions & 4 deletions lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@
<elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='2184' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='2240' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
Expand Down Expand Up @@ -5809,7 +5809,8 @@
<enumerator name='SPA_FEATURE_BLAKE3' value='36'/>
<enumerator name='SPA_FEATURE_BLOCK_CLONING' value='37'/>
<enumerator name='SPA_FEATURE_AVZ_V2' value='38'/>
<enumerator name='SPA_FEATURES' value='39'/>
<enumerator name='SPA_FEATURE_REDACTION_LIST_SPILL' value='39'/>
<enumerator name='SPA_FEATURES' value='40'/>
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<qualified-type-def type-id='22cce67b' const='yes' id='d2816df0'/>
Expand Down Expand Up @@ -8706,8 +8707,8 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='17472' id='dd432c71'>
<subrange length='39' type-id='7359adad' id='ae4a9561'/>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='17920' id='dd432c71'>
<subrange length='40' type-id='7359adad' id='ae4a9561'/>
</array-type-def>
<enum-decl name='zfeature_flags' id='6db816a4'>
<underlying-type type-id='9cac1fee'/>
Expand Down
12 changes: 12 additions & 0 deletions man/man7/zpool-features.7
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,18 @@ once all filesystems that have ever had their
property set to
.Sy zstd
are destroyed.
.
.feature com.delphix redaction_list_spill no redaction_bookmarks
This feature enables the redaction list created by zfs redact to store
many more entries.
It becomes
.Sy active
when a redaction list is created with more than 36 entries,
and returns to being
.Sy enabled
when no long redaction lists remain in the pool.
For more information about redacted sends, see
.Xr zfs-send 8 .
.El
.
.Sh SEE ALSO
Expand Down
12 changes: 12 additions & 0 deletions module/zcommon/zfeature_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,18 @@ zpool_feature_init(void)
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL,
sfeatures);

{
static const spa_feature_t redact_list_spill_deps[] = {
SPA_FEATURE_REDACTION_BOOKMARKS,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_REDACTION_LIST_SPILL,
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved
"com.delphix:redaction_list_spill", "redaction_list_spill",
"Support for increased number of redaction_snapshot "
"arguments in zfs redact.", 0, ZFEATURE_TYPE_BOOLEAN,
redact_list_spill_deps, sfeatures);
}

zfs_mod_list_supported_free(sfeatures);
}

Expand Down
17 changes: 8 additions & 9 deletions module/zfs/dmu_redact.c
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
bqueue_enqueue(q, record, sizeof (*record));
return (0);
}
redact_nodes = kmem_zalloc(num_threads *
redact_nodes = vmem_zalloc(num_threads *
sizeof (*redact_nodes), KM_SLEEP);

avl_create(&start_tree, redact_node_compare_start,
Expand Down Expand Up @@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,

avl_destroy(&start_tree);
avl_destroy(&end_tree);
kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
if (current_record != NULL)
bqueue_enqueue(q, current_record, sizeof (*current_record));
return (err);
Expand Down Expand Up @@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,

numsnaps = fnvlist_num_pairs(redactnvl);
if (numsnaps > 0)
args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);

nvpair_t *pair = NULL;
for (int i = 0; i < numsnaps; i++) {
Expand Down Expand Up @@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
kmem_free(newredactbook,
sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
if (args != NULL)
kmem_free(args, numsnaps * sizeof (*args));
vmem_free(args, numsnaps * sizeof (*args));
return (SET_ERROR(ENAMETOOLONG));
}
err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark);
Expand Down Expand Up @@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
} else {
uint64_t *guids = NULL;
if (numsnaps > 0) {
guids = kmem_zalloc(numsnaps * sizeof (uint64_t),
guids = vmem_zalloc(numsnaps * sizeof (uint64_t),
KM_SLEEP);
}
for (int i = 0; i < numsnaps; i++) {
Expand All @@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
dp = NULL;
err = dsl_bookmark_create_redacted(newredactbook, snapname,
numsnaps, guids, FTAG, &new_rl);
kmem_free(guids, numsnaps * sizeof (uint64_t));
if (err != 0) {
vmem_free(guids, numsnaps * sizeof (uint64_t));
if (err != 0)
goto out;
}
}

for (int i = 0; i < numsnaps; i++) {
Expand Down Expand Up @@ -1188,7 +1187,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
}

if (args != NULL)
kmem_free(args, numsnaps * sizeof (*args));
vmem_free(args, numsnaps * sizeof (*args));
if (dp != NULL)
dsl_pool_rele(dp, FTAG);
if (ds != NULL) {
Expand Down
1 change: 1 addition & 0 deletions module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(DMU_OT_IS_VALID(ot));
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
(bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));
Expand Down
67 changes: 52 additions & 15 deletions module/zfs/dsl_bookmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <sys/dsl_bookmark.h>
#include <zfs_namecheck.h>
#include <sys/dmu_send.h>
#include <sys/dbuf.h>

static int
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
Expand Down Expand Up @@ -459,25 +460,42 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps);
if (redaction_list != NULL || bookmark_redacted) {
redaction_list_t *local_rl;
boolean_t spill = B_FALSE;
if (bookmark_redacted) {
redact_snaps = dsredactsnaps;
num_redact_snaps = dsnumsnaps;
}
int bonuslen = sizeof (redaction_list_phys_t) +
num_redact_snaps * sizeof (uint64_t);
if (bonuslen > dmu_bonus_max())
spill = B_TRUE;
dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos,
DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) +
num_redact_snaps * sizeof (uint64_t), tx);
DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx);
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_REDACTION_BOOKMARKS, tx);
if (spill) {
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_REDACTION_LIST_SPILL, tx);
}

VERIFY0(dsl_redaction_list_hold_obj(dp,
dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl));
dsl_redaction_list_long_hold(dp, local_rl, tag);

ASSERT3U((local_rl)->rl_dbuf->db_size, >=,
sizeof (redaction_list_phys_t) + num_redact_snaps *
sizeof (uint64_t));
dmu_buf_will_dirty(local_rl->rl_dbuf, tx);
if (!spill) {
ASSERT3U(local_rl->rl_bonus->db_size, >=, bonuslen);
dmu_buf_will_dirty(local_rl->rl_bonus, tx);
} else {
dmu_buf_t *db;
VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
DB_RF_MUST_SUCCEED, FTAG, &db));
dmu_buf_will_fill(db, tx);
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
SPA_MINBLOCKSIZE), tx));
local_rl->rl_phys = db->db_data;
local_rl->rl_dbuf = db;
}
memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps,
sizeof (uint64_t) * num_redact_snaps);
local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
Expand Down Expand Up @@ -636,11 +654,15 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
SPA_FEATURE_REDACTION_BOOKMARKS))
return (SET_ERROR(ENOTSUP));
/*
* If the list of redact snaps will not fit in the bonus buffer with
* the furthest reached object and offset, fail.
* If the list of redact snaps will not fit in the bonus buffer (or
* spill block, with the REDACTION_LIST_SPILL feature) with the
* furthest reached object and offset, fail.
*/
if (dbcra->dbcra_numsnaps > (dmu_bonus_max() -
sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
uint64_t snaplimit = ((spa_feature_is_enabled(dp->dp_spa,
SPA_FEATURE_REDACTION_LIST_SPILL) ? spa_maxblocksize(dp->dp_spa) :
dmu_bonus_max()) -
sizeof (redaction_list_phys_t)) / sizeof (uint64_t);
if (dbcra->dbcra_numsnaps > snaplimit)
return (SET_ERROR(E2BIG));

if (dsl_bookmark_create_nvl_validate_pair(
Expand Down Expand Up @@ -1040,6 +1062,14 @@ dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name,
}

if (dbn->dbn_phys.zbm_redaction_obj != 0) {
dnode_t *rl;
VERIFY0(dnode_hold(mos,
dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
if (rl->dn_have_spill) {
spa_feature_decr(dmu_objset_spa(mos),
SPA_FEATURE_REDACTION_LIST_SPILL, tx);
}
dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),
Expand Down Expand Up @@ -1213,15 +1243,17 @@ redaction_list_evict_sync(void *rlu)
void
dsl_redaction_list_rele(redaction_list_t *rl, const void *tag)
{
dmu_buf_rele(rl->rl_dbuf, tag);
if (rl->rl_bonus != rl->rl_dbuf)
dmu_buf_rele(rl->rl_dbuf, tag);
dmu_buf_rele(rl->rl_bonus, tag);
}

int
dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t **rlp)
{
objset_t *mos = dp->dp_meta_objset;
dmu_buf_t *dbuf;
dmu_buf_t *dbuf, *spill_dbuf;
redaction_list_t *rl;
int err;

Expand All @@ -1236,13 +1268,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t *winner = NULL;

rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP);
rl->rl_dbuf = dbuf;
rl->rl_bonus = dbuf;
if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) {
rl->rl_dbuf = spill_dbuf;
} else {
rl->rl_dbuf = dbuf;
}
rl->rl_object = rlobj;
rl->rl_phys = dbuf->db_data;
rl->rl_phys = rl->rl_dbuf->db_data;
rl->rl_mos = dp->dp_meta_objset;
zfs_refcount_create(&rl->rl_longholds);
dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL,
&rl->rl_dbuf);
&rl->rl_bonus);
if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) {
kmem_free(rl, sizeof (*rl));
rl = winner;
Expand Down
10 changes: 10 additions & 0 deletions module/zfs/dsl_destroy.c
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
NULL) {
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
dnode_t *rl;
VERIFY0(dnode_hold(mos,
dbn->dbn_phys.zbm_redaction_obj, FTAG,
&rl));
if (rl->dn_have_spill) {
spa_feature_decr(dmu_objset_spa(mos),
SPA_FEATURE_REDACTION_LIST_SPILL,
tx);
}
dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ typeset -a properties=(
"feature@log_spacemap"
"feature@device_rebuild"
"feature@draid"
"feature@redaction_list_spill"
)

if is_linux || is_freebsd; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
# second (the last block in the file) is common to them all.
# 2. Verify a redacted stream with a reasonable redaction list length can
# be correctly processed.
# 3. Verify that if the list is too long, the send fails gracefully.
#

typeset ds_name="many_clones"
Expand Down Expand Up @@ -56,13 +55,18 @@ for i in {1..64}; do
log_must zfs snapshot ${clone}$i@snap
done

# The limit isn't necessarily 32 snapshots. The maximum number of snapshots in
# The limit isn't necessarily 64 snapshots. The maximum number of snapshots in
# the redacted list is determined in dsl_bookmark_create_redacted_check().
log_must zfs redact $sendfs@snap book1 $clone{1..32}@snap
log_must zfs redact $sendfs@snap book1 $clone{1..64}@snap
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f2" "$RANGE8"

log_mustnot zfs redact $sendfs@snap book2 $clone{1..64}@snap
rls_value="$(zpool get -H -o value feature@redaction_list_spill $POOL)"
if [ "$rls_value" = "active" ]; then
log_note "redaction_list_spill feature active"
else
log_fail "redaction_list_spill feature not active"
fi

log_pass "Redacted send can deal with a large redaction list."