Skip to content

Commit

Permalink
ddt: introduce lightweight entry
Browse files Browse the repository at this point in the history
The idea here is that sometimes you need the contents of an entry with
no intent to modify it, and/or from a place where its difficult to get
hold of its originating ddt_t to know how to interpret it.

A lightweight entry contains everything you might need to "read" an
entry - its key, type and phys contents - but none of the extras for
modifying it or using it in a larger context. It also has the full
complement of phys slots, so it can represent any kind of dedup entry
without having to know the specific configuration of the table it came
from.

Reviewed-by: Alexander Motin <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Rob Norris <[email protected]>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15893
  • Loading branch information
robn authored and behlendorf committed Aug 16, 2024
1 parent d17ab63 commit 4d686c3
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 34 deletions.
15 changes: 8 additions & 7 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1914,15 +1914,16 @@ dump_log_spacemaps(spa_t *spa)
}

static void
dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
dump_ddt_entry(const ddt_t *ddt, const ddt_lightweight_entry_t *ddlwe,
uint64_t index)
{
const ddt_key_t *ddk = &dde->dde_key;
const ddt_key_t *ddk = &ddlwe->ddlwe_key;
char blkbuf[BP_SPRINTF_LEN];
blkptr_t blk;
int p;

for (p = 0; p < DDT_NPHYS(ddt); p++) {
const ddt_phys_t *ddp = &dde->dde_phys[p];
for (p = 0; p < ddlwe->ddlwe_nphys; p++) {
const ddt_phys_t *ddp = &ddlwe->ddlwe_phys[p];
if (ddp->ddp_phys_birth == 0)
continue;
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
Expand Down Expand Up @@ -1959,7 +1960,7 @@ static void
dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
{
char name[DDT_NAMELEN];
ddt_entry_t dde;
ddt_lightweight_entry_t ddlwe;
uint64_t walk = 0;
dmu_object_info_t doi;
uint64_t count, dspace, mspace;
Expand Down Expand Up @@ -2000,8 +2001,8 @@ dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)

(void) printf("%s contents:\n\n", name);

while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
dump_dde(ddt, &dde, walk);
while ((error = ddt_object_walk(ddt, type, class, &walk, &ddlwe)) == 0)
dump_ddt_entry(ddt, &ddlwe, walk);

ASSERT3U(error, ==, ENOENT);

Expand Down
16 changes: 14 additions & 2 deletions include/sys/ddt.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,18 @@ typedef struct {
avl_node_t dde_node; /* ddt_tree node */
} ddt_entry_t;

/*
* A lightweight entry is for short-lived or transient uses, like iterating or
* inspecting, when you don't care where it came from.
*/
typedef struct {
ddt_key_t ddlwe_key;
ddt_type_t ddlwe_type;
ddt_class_t ddlwe_class;
uint8_t ddlwe_nphys;
ddt_phys_t ddlwe_phys[DDT_PHYS_MAX];
} ddt_lightweight_entry_t;

/*
* In-core DDT object. This covers all entries and stats for a the whole pool
* for a given checksum type.
Expand Down Expand Up @@ -241,7 +253,6 @@ extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
extern int ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize);

extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern ddt_t *ddt_select_checksum(spa_t *spa, enum zio_checksum checksum);
extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
extern void ddt_init(void);
Expand All @@ -263,7 +274,8 @@ extern void ddt_create(spa_t *spa);
extern int ddt_load(spa_t *spa);
extern void ddt_unload(spa_t *spa);
extern void ddt_sync(spa_t *spa, uint64_t txg);
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb,
ddt_lightweight_entry_t *ddlwe);

extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);

Expand Down
13 changes: 12 additions & 1 deletion include/sys/ddt_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ extern "C" {
#define DDT_DIR_VERSION "version"
#define DDT_DIR_FLAGS "flags"

/* Fill a lightweight entry from a live entry. */
#define DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, ddlwe) do { \
memset((ddlwe), 0, sizeof (*ddlwe)); \
(ddlwe)->ddlwe_key = (dde)->dde_key; \
(ddlwe)->ddlwe_type = (dde)->dde_type; \
(ddlwe)->ddlwe_class = (dde)->dde_class; \
(ddlwe)->ddlwe_nphys = DDT_NPHYS(ddt); \
for (int p = 0; p < (ddlwe)->ddlwe_nphys; p++) \
(ddlwe)->ddlwe_phys[p] = (dde)->dde_phys[p]; \
} while (0)

/*
* Ops vector to access a specific DDT object type.
*/
Expand Down Expand Up @@ -91,7 +102,7 @@ extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
char *name);
extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
uint64_t *walk, ddt_entry_t *dde);
uint64_t *walk, ddt_lightweight_entry_t *ddlwe);
extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
uint64_t *count);
extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
Expand Down
2 changes: 1 addition & 1 deletion include/sys/dsl_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx);
ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx);
void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
Expand Down
31 changes: 16 additions & 15 deletions module/zfs/ddt.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,13 +401,20 @@ ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

int
ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
uint64_t *walk, ddt_entry_t *dde)
uint64_t *walk, ddt_lightweight_entry_t *ddlwe)
{
ASSERT(ddt_object_exists(ddt, type, class));

return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
ddt->ddt_object[type][class], walk, &dde->dde_key,
dde->dde_phys, sizeof (dde->dde_phys)));
int error = ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
ddt->ddt_object[type][class], walk, &ddlwe->ddlwe_key,
ddlwe->ddlwe_phys, sizeof (ddlwe->ddlwe_phys));
if (error == 0) {
ddlwe->ddlwe_type = type;
ddlwe->ddlwe_class = class;
ddlwe->ddlwe_nphys = DDT_NPHYS(ddt);
return (0);
}
return (error);
}

int
Expand Down Expand Up @@ -572,12 +579,6 @@ ddt_select(spa_t *spa, const blkptr_t *bp)
return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]);
}

ddt_t *
ddt_select_checksum(spa_t *spa, enum zio_checksum checksum)
{
return (spa->spa_ddt[checksum]);
}

void
ddt_enter(ddt_t *ddt)
{
Expand Down Expand Up @@ -1347,8 +1348,10 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
* traversing.)
*/
if (nclass < oclass) {
ddt_lightweight_entry_t ddlwe;
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
dsl_scan_ddt_entry(dp->dp_scan,
ddt->ddt_checksum, dde, tx);
ddt->ddt_checksum, &ddlwe, tx);
}
}
}
Expand Down Expand Up @@ -1455,7 +1458,7 @@ ddt_sync(spa_t *spa, uint64_t txg)
}

int
ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde)
ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_lightweight_entry_t *ddlwe)
{
do {
do {
Expand All @@ -1468,10 +1471,8 @@ ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde)
ddb->ddb_class)) {
error = ddt_object_walk(ddt,
ddb->ddb_type, ddb->ddb_class,
&ddb->ddb_cursor, dde);
&ddb->ddb_cursor, ddlwe);
}
dde->dde_type = ddb->ddb_type;
dde->dde_class = ddb->ddb_class;
if (error == 0)
return (0);
if (error != ENOENT)
Expand Down
15 changes: 7 additions & 8 deletions module/zfs/dsl_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -2929,10 +2929,10 @@ enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)

void
dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx)
ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx)
{
(void) tx;
const ddt_key_t *ddk = &dde->dde_key;
const ddt_key_t *ddk = &ddlwe->ddlwe_key;
blkptr_t bp;
zbookmark_phys_t zb = { 0 };

Expand All @@ -2953,9 +2953,8 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
if (scn->scn_done_txg != 0)
return;

ddt_t *ddt = ddt_select_checksum(tx->tx_pool->dp_spa, checksum);
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];
for (int p = 0; p < ddlwe->ddlwe_nphys; p++) {
ddt_phys_t *ddp = &ddlwe->ddlwe_phys[p];

if (ddp->ddp_phys_birth == 0 ||
ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
Expand Down Expand Up @@ -3004,11 +3003,11 @@ static void
dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
{
ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark;
ddt_entry_t dde = {{{{0}}}};
ddt_lightweight_entry_t ddlwe = {0};
int error;
uint64_t n = 0;

while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) {
while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &ddlwe)) == 0) {
ddt_t *ddt;

if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max)
Expand All @@ -3023,7 +3022,7 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum];
ASSERT(avl_first(&ddt->ddt_tree) == NULL);

dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &ddlwe, tx);
n++;

if (dsl_scan_check_suspend(scn, NULL))
Expand Down

0 comments on commit 4d686c3

Please sign in to comment.