From 7d00a7adc11dcc91ff3cf76dddba31671f8849e8 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 26 Jun 2023 14:33:42 -0700 Subject: [PATCH] Increase limit of redaction list by using spill block Signed-off-by: Paul Dagnelie --- cmd/zfs/zfs_main.c | 4 ++ include/sys/dsl_bookmark.h | 1 + module/zfs/dmu_redact.c | 17 ++++--- module/zfs/dnode.c | 1 + module/zfs/dsl_bookmark.c | 45 ++++++++++++++----- .../redacted_send/redacted_many_clones.ksh | 7 +-- 6 files changed, 49 insertions(+), 26 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index e28f1d04f350..49e983145cac 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -3978,6 +3978,10 @@ zfs_do_redact(int argc, char **argv) (void) fprintf(stderr, gettext("potentially invalid redaction " "snapshot; full dataset names required\n")); break; + case ESRCH: + (void) fprintf(stderr, gettext("attempted to resume redaction " + " with a mismatched redaction list\n")); + break; default: (void) fprintf(stderr, gettext("internal error: %s\n"), strerror(errno)); diff --git a/include/sys/dsl_bookmark.h b/include/sys/dsl_bookmark.h index 353c5c2d260f..d4e559a09037 100644 --- a/include/sys/dsl_bookmark.h +++ b/include/sys/dsl_bookmark.h @@ -72,6 +72,7 @@ typedef struct redaction_list_phys { typedef struct redaction_list { dmu_buf_user_t rl_dbu; redaction_list_phys_t *rl_phys; + dmu_buf_t *rl_bonus; dmu_buf_t *rl_dbuf; uint64_t rl_object; zfs_refcount_t rl_longholds; diff --git a/module/zfs/dmu_redact.c b/module/zfs/dmu_redact.c index 6bd35713ff18..5ac14edfca12 100644 --- a/module/zfs/dmu_redact.c +++ b/module/zfs/dmu_redact.c @@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads, bqueue_enqueue(q, record, sizeof (*record)); return (0); } - redact_nodes = kmem_zalloc(num_threads * + redact_nodes = vmem_zalloc(num_threads * sizeof (*redact_nodes), KM_SLEEP); avl_create(&start_tree, redact_node_compare_start, @@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads, avl_destroy(&start_tree); avl_destroy(&end_tree); - kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes)); + vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes)); if (current_record != NULL) bqueue_enqueue(q, current_record, sizeof (*current_record)); return (err); @@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, numsnaps = fnvlist_num_pairs(redactnvl); if (numsnaps > 0) - args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP); + args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP); nvpair_t *pair = NULL; for (int i = 0; i < numsnaps; i++) { @@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, kmem_free(newredactbook, sizeof (char) * ZFS_MAX_DATASET_NAME_LEN); if (args != NULL) - kmem_free(args, numsnaps * sizeof (*args)); + vmem_free(args, numsnaps * sizeof (*args)); return (SET_ERROR(ENAMETOOLONG)); } err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark); @@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, } else { uint64_t *guids = NULL; if (numsnaps > 0) { - guids = kmem_zalloc(numsnaps * sizeof (uint64_t), + guids = vmem_zalloc(numsnaps * sizeof (uint64_t), KM_SLEEP); } for (int i = 0; i < numsnaps; i++) { @@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, dp = NULL; err = dsl_bookmark_create_redacted(newredactbook, snapname, numsnaps, guids, FTAG, &new_rl); - kmem_free(guids, numsnaps * sizeof (uint64_t)); - if (err != 0) { + vmem_free(guids, numsnaps * sizeof (uint64_t)); + if (err != 0) goto out; - } } for (int i = 0; i < numsnaps; i++) { @@ -1188,7 +1187,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl, } if (args != NULL) - kmem_free(args, numsnaps * sizeof (*args)); + vmem_free(args, numsnaps * sizeof (*args)); if (dp != NULL) dsl_pool_rele(dp, FTAG); if (ds != NULL) { diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index d15268cd7bc7..f67f5c2fcc66 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT(DMU_OT_IS_VALID(ot)); ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) || (bonustype == DMU_OT_SA && bonuslen == 0) || + (bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) || (bonustype != DMU_OT_NONE && bonuslen != 0)); ASSERT(DMU_OT_IS_VALID(bonustype)); ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots)); diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c index e04796a0814f..331d4bc39f0a 100644 --- a/module/zfs/dsl_bookmark.c +++ b/module/zfs/dsl_bookmark.c @@ -34,6 +34,7 @@ #include #include #include +#include static int dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname, @@ -459,14 +460,18 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot, SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps); if (redaction_list != NULL || bookmark_redacted) { redaction_list_t *local_rl; + boolean_t spill = B_FALSE; if (bookmark_redacted) { redact_snaps = dsredactsnaps; num_redact_snaps = dsnumsnaps; } + int bonuslen = sizeof (redaction_list_phys_t) + + num_redact_snaps * sizeof (uint64_t); + if (bonuslen > dmu_bonus_max()) + spill = B_TRUE; dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos, DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE, - DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) + - num_redact_snaps * sizeof (uint64_t), tx); + DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx); spa_feature_incr(dp->dp_spa, SPA_FEATURE_REDACTION_BOOKMARKS, tx); @@ -474,10 +479,19 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot, dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl)); dsl_redaction_list_long_hold(dp, local_rl, tag); - ASSERT3U((local_rl)->rl_dbuf->db_size, >=, - sizeof (redaction_list_phys_t) + num_redact_snaps * - sizeof (uint64_t)); - dmu_buf_will_dirty(local_rl->rl_dbuf, tx); + if (!spill) { + ASSERT3U((local_rl)->rl_bonus->db_size, >=, + sizeof (redaction_list_phys_t) + num_redact_snaps * + sizeof (uint64_t)); + dmu_buf_will_dirty(local_rl->rl_bonus, tx); + } else { + dmu_buf_t *db; + VERIFY0(dmu_spill_hold_by_bonus((local_rl)->rl_bonus, DB_RF_MUST_SUCCEED, FTAG, &db)); + dmu_buf_will_fill(db, tx); + VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen, SPA_MINBLOCKSIZE), tx)); + local_rl->rl_phys = db->db_data; + local_rl->rl_dbuf = db; + } memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps, sizeof (uint64_t) * num_redact_snaps); local_rl->rl_phys->rlp_num_snaps = num_redact_snaps; @@ -639,7 +653,7 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx) * If the list of redact snaps will not fit in the bonus buffer with * the furthest reached object and offset, fail. */ - if (dbcra->dbcra_numsnaps > (dmu_bonus_max() - + if (dbcra->dbcra_numsnaps > (spa_maxblocksize(dp->dp_spa) - sizeof (redaction_list_phys_t)) / sizeof (uint64_t)) return (SET_ERROR(E2BIG)); @@ -1213,7 +1227,9 @@ redaction_list_evict_sync(void *rlu) void dsl_redaction_list_rele(redaction_list_t *rl, const void *tag) { - dmu_buf_rele(rl->rl_dbuf, tag); + if (rl->rl_bonus != rl->rl_dbuf) + dmu_buf_rele(rl->rl_dbuf, tag); + dmu_buf_rele(rl->rl_bonus, tag); } int @@ -1221,7 +1237,7 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag, redaction_list_t **rlp) { objset_t *mos = dp->dp_meta_objset; - dmu_buf_t *dbuf; + dmu_buf_t *dbuf, *spill_dbuf; redaction_list_t *rl; int err; @@ -1236,13 +1252,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag, redaction_list_t *winner = NULL; rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP); - rl->rl_dbuf = dbuf; + rl->rl_bonus = dbuf; + if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) { + rl->rl_dbuf = spill_dbuf; + } else { + rl->rl_dbuf = dbuf; + } rl->rl_object = rlobj; - rl->rl_phys = dbuf->db_data; + rl->rl_phys = rl->rl_dbuf->db_data; rl->rl_mos = dp->dp_meta_objset; zfs_refcount_create(&rl->rl_longholds); dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL, - &rl->rl_dbuf); + &rl->rl_bonus); if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) { kmem_free(rl, sizeof (*rl)); rl = winner; diff --git a/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh b/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh index 3386643b295e..9c33f957c290 100755 --- a/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh +++ b/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh @@ -27,7 +27,6 @@ # second (the last block in the file) is common to them all. # 2. Verify a redacted stream with a reasonable redaction list length can # be correctly processed. -# 3. Verify that if the list is too long, the send fails gracefully. # typeset ds_name="many_clones" @@ -56,13 +55,11 @@ for i in {1..64}; do log_must zfs snapshot ${clone}$i@snap done -# The limit isn't necessarily 32 snapshots. The maximum number of snapshots in +# The limit isn't necessarily 64 snapshots. The maximum number of snapshots in # the redacted list is determined in dsl_bookmark_create_redacted_check(). -log_must zfs redact $sendfs@snap book1 $clone{1..32}@snap +log_must zfs redact $sendfs@snap book1 $clone{1..64}@snap log_must eval "zfs send --redact book1 $sendfs@snap >$stream" log_must eval "zfs recv $recvfs <$stream" compare_files $sendfs $recvfs "f2" "$RANGE8" -log_mustnot zfs redact $sendfs@snap book2 $clone{1..64}@snap - log_pass "Redacted send can deal with a large redaction list."