Skip to content

Commit

Permalink
Increase limit of redaction list by using spill block
Browse files Browse the repository at this point in the history
Signed-off-by: Paul Dagnelie <[email protected]>
  • Loading branch information
pcd1193182 committed Jun 28, 2023
1 parent 8e8acab commit 7d00a7a
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 26 deletions.
4 changes: 4 additions & 0 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3978,6 +3978,10 @@ zfs_do_redact(int argc, char **argv)
(void) fprintf(stderr, gettext("potentially invalid redaction "
"snapshot; full dataset names required\n"));
break;
case ESRCH:
(void) fprintf(stderr, gettext("attempted to resume redaction "
" with a mismatched redaction list\n"));
break;
default:
(void) fprintf(stderr, gettext("internal error: %s\n"),
strerror(errno));
Expand Down
1 change: 1 addition & 0 deletions include/sys/dsl_bookmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ typedef struct redaction_list_phys {
typedef struct redaction_list {
dmu_buf_user_t rl_dbu;
redaction_list_phys_t *rl_phys;
dmu_buf_t *rl_bonus;
dmu_buf_t *rl_dbuf;
uint64_t rl_object;
zfs_refcount_t rl_longholds;
Expand Down
17 changes: 8 additions & 9 deletions module/zfs/dmu_redact.c
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
bqueue_enqueue(q, record, sizeof (*record));
return (0);
}
redact_nodes = kmem_zalloc(num_threads *
redact_nodes = vmem_zalloc(num_threads *
sizeof (*redact_nodes), KM_SLEEP);

avl_create(&start_tree, redact_node_compare_start,
Expand Down Expand Up @@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,

avl_destroy(&start_tree);
avl_destroy(&end_tree);
kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
if (current_record != NULL)
bqueue_enqueue(q, current_record, sizeof (*current_record));
return (err);
Expand Down Expand Up @@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,

numsnaps = fnvlist_num_pairs(redactnvl);
if (numsnaps > 0)
args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);

nvpair_t *pair = NULL;
for (int i = 0; i < numsnaps; i++) {
Expand Down Expand Up @@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
kmem_free(newredactbook,
sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
if (args != NULL)
kmem_free(args, numsnaps * sizeof (*args));
vmem_free(args, numsnaps * sizeof (*args));
return (SET_ERROR(ENAMETOOLONG));
}
err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark);
Expand Down Expand Up @@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
} else {
uint64_t *guids = NULL;
if (numsnaps > 0) {
guids = kmem_zalloc(numsnaps * sizeof (uint64_t),
guids = vmem_zalloc(numsnaps * sizeof (uint64_t),
KM_SLEEP);
}
for (int i = 0; i < numsnaps; i++) {
Expand All @@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
dp = NULL;
err = dsl_bookmark_create_redacted(newredactbook, snapname,
numsnaps, guids, FTAG, &new_rl);
kmem_free(guids, numsnaps * sizeof (uint64_t));
if (err != 0) {
vmem_free(guids, numsnaps * sizeof (uint64_t));
if (err != 0)
goto out;
}
}

for (int i = 0; i < numsnaps; i++) {
Expand Down Expand Up @@ -1188,7 +1187,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
}

if (args != NULL)
kmem_free(args, numsnaps * sizeof (*args));
vmem_free(args, numsnaps * sizeof (*args));
if (dp != NULL)
dsl_pool_rele(dp, FTAG);
if (ds != NULL) {
Expand Down
1 change: 1 addition & 0 deletions module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(DMU_OT_IS_VALID(ot));
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
(bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));
Expand Down
45 changes: 33 additions & 12 deletions module/zfs/dsl_bookmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <sys/dsl_bookmark.h>
#include <zfs_namecheck.h>
#include <sys/dmu_send.h>
#include <sys/dbuf.h>

static int
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
Expand Down Expand Up @@ -459,25 +460,38 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps);
if (redaction_list != NULL || bookmark_redacted) {
redaction_list_t *local_rl;
boolean_t spill = B_FALSE;
if (bookmark_redacted) {
redact_snaps = dsredactsnaps;
num_redact_snaps = dsnumsnaps;
}
int bonuslen = sizeof (redaction_list_phys_t) +
num_redact_snaps * sizeof (uint64_t);
if (bonuslen > dmu_bonus_max())
spill = B_TRUE;
dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos,
DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) +
num_redact_snaps * sizeof (uint64_t), tx);
DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx);
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_REDACTION_BOOKMARKS, tx);

VERIFY0(dsl_redaction_list_hold_obj(dp,
dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl));
dsl_redaction_list_long_hold(dp, local_rl, tag);

ASSERT3U((local_rl)->rl_dbuf->db_size, >=,
sizeof (redaction_list_phys_t) + num_redact_snaps *
sizeof (uint64_t));
dmu_buf_will_dirty(local_rl->rl_dbuf, tx);
if (!spill) {
ASSERT3U((local_rl)->rl_bonus->db_size, >=,
sizeof (redaction_list_phys_t) + num_redact_snaps *
sizeof (uint64_t));
dmu_buf_will_dirty(local_rl->rl_bonus, tx);
} else {
dmu_buf_t *db;
VERIFY0(dmu_spill_hold_by_bonus((local_rl)->rl_bonus, DB_RF_MUST_SUCCEED, FTAG, &db));
dmu_buf_will_fill(db, tx);
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen, SPA_MINBLOCKSIZE), tx));
local_rl->rl_phys = db->db_data;
local_rl->rl_dbuf = db;
}
memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps,
sizeof (uint64_t) * num_redact_snaps);
local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
Expand Down Expand Up @@ -639,7 +653,7 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
* If the list of redact snaps will not fit in the bonus buffer with
* the furthest reached object and offset, fail.
*/
if (dbcra->dbcra_numsnaps > (dmu_bonus_max() -
if (dbcra->dbcra_numsnaps > (spa_maxblocksize(dp->dp_spa) -
sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
return (SET_ERROR(E2BIG));

Expand Down Expand Up @@ -1213,15 +1227,17 @@ redaction_list_evict_sync(void *rlu)
void
dsl_redaction_list_rele(redaction_list_t *rl, const void *tag)
{
dmu_buf_rele(rl->rl_dbuf, tag);
if (rl->rl_bonus != rl->rl_dbuf)
dmu_buf_rele(rl->rl_dbuf, tag);
dmu_buf_rele(rl->rl_bonus, tag);
}

int
dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t **rlp)
{
objset_t *mos = dp->dp_meta_objset;
dmu_buf_t *dbuf;
dmu_buf_t *dbuf, *spill_dbuf;
redaction_list_t *rl;
int err;

Expand All @@ -1236,13 +1252,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t *winner = NULL;

rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP);
rl->rl_dbuf = dbuf;
rl->rl_bonus = dbuf;
if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) {
rl->rl_dbuf = spill_dbuf;
} else {
rl->rl_dbuf = dbuf;
}
rl->rl_object = rlobj;
rl->rl_phys = dbuf->db_data;
rl->rl_phys = rl->rl_dbuf->db_data;
rl->rl_mos = dp->dp_meta_objset;
zfs_refcount_create(&rl->rl_longholds);
dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL,
&rl->rl_dbuf);
&rl->rl_bonus);
if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) {
kmem_free(rl, sizeof (*rl));
rl = winner;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
# second (the last block in the file) is common to them all.
# 2. Verify a redacted stream with a reasonable redaction list length can
# be correctly processed.
# 3. Verify that if the list is too long, the send fails gracefully.
#

typeset ds_name="many_clones"
Expand Down Expand Up @@ -56,13 +55,11 @@ for i in {1..64}; do
log_must zfs snapshot ${clone}$i@snap
done

# The limit isn't necessarily 32 snapshots. The maximum number of snapshots in
# The limit isn't necessarily 64 snapshots. The maximum number of snapshots in
# the redacted list is determined in dsl_bookmark_create_redacted_check().
log_must zfs redact $sendfs@snap book1 $clone{1..32}@snap
log_must zfs redact $sendfs@snap book1 $clone{1..64}@snap
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f2" "$RANGE8"

log_mustnot zfs redact $sendfs@snap book2 $clone{1..64}@snap

log_pass "Redacted send can deal with a large redaction list."

0 comments on commit 7d00a7a

Please sign in to comment.