Skip to content

Commit

Permalink
ddt: rework access to phys array slots
Browse files Browse the repository at this point in the history
The "flat phys" feature will use only a single phys slot for all
entries, which means the old "single", "double" etc naming now makes no
sense, and more importantly, means that choosing the right slot for a
given block pointer will depend on how many slots are in use for a given
DDT.

This removes the old names, and adds accessor macros to decouple
specific phys array indexes from any particular meaning.

(These macros look strange in isolation, mainly in the way they take the
ddt_t* as an arg but don't use it. This is mostly a separate commit to
introduce the concept to the reader before the "flat phys" commit
extends it).

Reviewed-by: Alexander Motin <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Rob Norris <[email protected]>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15893
  • Loading branch information
robn authored and behlendorf committed Aug 16, 2024
1 parent d63f5d7 commit d17ab63
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 65 deletions.
13 changes: 6 additions & 7 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1916,21 +1916,20 @@ dump_log_spacemaps(spa_t *spa)
static void
dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
{
const ddt_phys_t *ddp = dde->dde_phys;
const ddt_key_t *ddk = &dde->dde_key;
const char *types[4] = { "ditto", "single", "double", "triple" };
char blkbuf[BP_SPRINTF_LEN];
blkptr_t blk;
int p;

for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
for (p = 0; p < DDT_NPHYS(ddt); p++) {
const ddt_phys_t *ddp = &dde->dde_phys[p];
if (ddp->ddp_phys_birth == 0)
continue;
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
(void) printf("index %llx refcnt %llu %s %s\n",
(void) printf("index %llx refcnt %llu phys %d %s\n",
(u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
types[p], blkbuf);
p, blkbuf);
}
}

Expand Down Expand Up @@ -5724,7 +5723,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
VERIFY3P(dde, !=, NULL);

/* Get the phys for this variant */
ddt_phys_t *ddp = ddt_phys_select(dde, bp);
ddt_phys_t *ddp = ddt_phys_select(ddt, dde, bp);
VERIFY3P(ddp, !=, NULL);

/*
Expand All @@ -5751,7 +5750,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
dde->dde_lead_zio[idx] = (zio_t *)(uintptr_t)B_TRUE;

/* Consume a reference for this block. */
VERIFY3U(ddt_phys_total_refcnt(dde), >, 0);
VERIFY3U(ddt_phys_total_refcnt(ddt, dde), >, 0);
ddt_phys_decref(ddp);

if (seen) {
Expand Down
27 changes: 10 additions & 17 deletions include/sys/ddt.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,19 +137,10 @@ typedef struct {
uint64_t ddp_phys_birth;
} ddt_phys_t;

/*
* Named indexes into the ddt_phys_t array in each entry.
*
* Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However,
* we maintain the ability to free existing dedup-ditto blocks.
*/
enum ddt_phys_type {
DDT_PHYS_DITTO = 0,
DDT_PHYS_SINGLE = 1,
DDT_PHYS_DOUBLE = 2,
DDT_PHYS_TRIPLE = 3,
DDT_PHYS_TYPES
};
#define DDT_PHYS_MAX (4)
#define DDT_NPHYS(ddt) ((ddt) ? DDT_PHYS_MAX : DDT_PHYS_MAX)
#define DDT_PHYS_IS_DITTO(ddt, p) ((ddt) && p == 0)
#define DDT_PHYS_FOR_COPIES(ddt, p) ((ddt) ? (p) : (p))

/*
* A "live" entry, holding changes to an entry made this txg, and other data to
Expand All @@ -162,11 +153,11 @@ enum ddt_phys_type {

typedef struct {
/* key must be first for ddt_key_compare */
ddt_key_t dde_key; /* ddt_tree key */
ddt_phys_t dde_phys[DDT_PHYS_TYPES]; /* on-disk data */
ddt_key_t dde_key; /* ddt_tree key */
ddt_phys_t dde_phys[DDT_PHYS_MAX]; /* on-disk data */

/* in-flight update IOs */
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
zio_t *dde_lead_zio[DDT_PHYS_MAX];

/* copy of data after a repair read, to be rewritten */
struct abd *dde_repair_abd;
Expand Down Expand Up @@ -234,7 +225,8 @@ extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
extern void ddt_phys_clear(ddt_phys_t *ddp);
extern void ddt_phys_addref(ddt_phys_t *ddp);
extern void ddt_phys_decref(ddt_phys_t *ddp);
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
extern ddt_phys_t *ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde,
const blkptr_t *bp);

extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
Expand All @@ -249,6 +241,7 @@ extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
extern int ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize);

extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern ddt_t *ddt_select_checksum(spa_t *spa, enum zio_checksum checksum);
extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
extern void ddt_init(void);
Expand Down
2 changes: 1 addition & 1 deletion include/sys/ddt_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
*/
#define DDT_NAMELEN 32

extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
extern uint64_t ddt_phys_total_refcnt(const ddt_t *ddt, const ddt_entry_t *dde);

extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);

Expand Down
54 changes: 31 additions & 23 deletions module/zfs/ddt.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,11 +540,10 @@ ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg)
}

ddt_phys_t *
ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp)
ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde, const blkptr_t *bp)
{
ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys;

for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = (ddt_phys_t *)&dde->dde_phys[p];
if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) &&
BP_GET_BIRTH(bp) == ddp->ddp_phys_birth)
return (ddp);
Expand All @@ -553,12 +552,15 @@ ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp)
}

uint64_t
ddt_phys_total_refcnt(const ddt_entry_t *dde)
ddt_phys_total_refcnt(const ddt_t *ddt, const ddt_entry_t *dde)
{
uint64_t refcnt = 0;

for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++)
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
if (DDT_PHYS_IS_DITTO(ddt, p))
continue;
refcnt += dde->dde_phys[p].ddp_refcnt;
}

return (refcnt);
}
Expand All @@ -570,6 +572,12 @@ ddt_select(spa_t *spa, const blkptr_t *bp)
return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]);
}

ddt_t *
ddt_select_checksum(spa_t *spa, enum zio_checksum checksum)
{
return (spa->spa_ddt[checksum]);
}

void
ddt_enter(ddt_t *ddt)
{
Expand Down Expand Up @@ -613,9 +621,9 @@ ddt_alloc(const ddt_key_t *ddk)
}

static void
ddt_free(ddt_entry_t *dde)
ddt_free(const ddt_t *ddt, ddt_entry_t *dde)
{
for (int p = 0; p < DDT_PHYS_TYPES; p++)
for (int p = 0; p < DDT_NPHYS(ddt); p++)
ASSERT3P(dde->dde_lead_zio[p], ==, NULL);

if (dde->dde_repair_abd != NULL)
Expand All @@ -631,7 +639,7 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde)
ASSERT(MUTEX_HELD(&ddt->ddt_lock));

avl_remove(&ddt->ddt_tree, dde);
ddt_free(dde);
ddt_free(ddt, dde);
}

static boolean_t
Expand Down Expand Up @@ -759,7 +767,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
if (dde->dde_flags & DDE_FLAG_OVERQUOTA) {
if (dde->dde_waiters == 0) {
avl_remove(&ddt->ddt_tree, dde);
ddt_free(dde);
ddt_free(ddt, dde);
}
return (NULL);
}
Expand Down Expand Up @@ -805,7 +813,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* Over quota. If no one is waiting, clean up right now. */
if (dde->dde_waiters == 0) {
avl_remove(&ddt->ddt_tree, dde);
ddt_free(dde);
ddt_free(ddt, dde);
return (NULL);
}

Expand Down Expand Up @@ -1212,24 +1220,23 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde)
avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL)
avl_insert(&ddt->ddt_repair_tree, dde, where);
else
ddt_free(dde);
ddt_free(ddt, dde);

ddt_exit(ddt);
}

static void
ddt_repair_entry_done(zio_t *zio)
{
ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp);
ddt_entry_t *rdde = zio->io_private;

ddt_free(rdde);
ddt_free(ddt, rdde);
}

static void
ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio)
{
ddt_phys_t *ddp = dde->dde_phys;
ddt_phys_t *rddp = rdde->dde_phys;
ddt_key_t *ddk = &dde->dde_key;
ddt_key_t *rddk = &rdde->dde_key;
zio_t *zio;
Expand All @@ -1238,7 +1245,9 @@ ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio)
zio = zio_null(rio, rio->io_spa, NULL,
ddt_repair_entry_done, rdde, rio->io_flags);

for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];
ddt_phys_t *rddp = &rdde->dde_phys[p];
if (ddp->ddp_phys_birth == 0 ||
ddp->ddp_phys_birth != rddp->ddp_phys_birth ||
memcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva)))
Expand Down Expand Up @@ -1281,7 +1290,6 @@ static void
ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
{
dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool;
ddt_phys_t *ddp = dde->dde_phys;
ddt_key_t *ddk = &dde->dde_key;
ddt_type_t otype = dde->dde_type;
ddt_type_t ntype = DDT_TYPE_DEFAULT;
Expand All @@ -1291,13 +1299,14 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)

ASSERT(dde->dde_flags & DDE_FLAG_LOADED);

for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ASSERT3P(dde->dde_lead_zio[p], ==, NULL);
ddt_phys_t *ddp = &dde->dde_phys[p];
if (ddp->ddp_phys_birth == 0) {
ASSERT0(ddp->ddp_refcnt);
continue;
}
if (p == DDT_PHYS_DITTO) {
if (DDT_PHYS_IS_DITTO(ddt, p)) {
/*
* Note, we no longer create DDT-DITTO blocks, but we
* don't want to leak any written by older software.
Expand All @@ -1310,8 +1319,6 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
total_refcnt += ddp->ddp_refcnt;
}

/* We do not create new DDT-DITTO blocks. */
ASSERT0(dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth);
if (total_refcnt > 1)
nclass = DDT_CLASS_DUPLICATE;
else
Expand Down Expand Up @@ -1369,7 +1376,7 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)

while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
ddt_sync_entry(ddt, dde, tx, txg);
ddt_free(dde);
ddt_free(ddt, dde);
}

uint64_t count = 0;
Expand Down Expand Up @@ -1512,7 +1519,8 @@ ddt_addref(spa_t *spa, const blkptr_t *bp)

ASSERT3S(dde->dde_class, <, DDT_CLASSES);

ddp = &dde->dde_phys[BP_GET_NDVAS(bp)];
int p = DDT_PHYS_FOR_COPIES(ddt, BP_GET_NDVAS(bp));
ddp = &dde->dde_phys[p];

/*
* This entry already existed (dde_type is real), so it must
Expand Down
5 changes: 3 additions & 2 deletions module/zfs/ddt_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,15 @@ static void
ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
{
spa_t *spa = ddt->ddt_spa;
ddt_phys_t *ddp = dde->dde_phys;
ddt_key_t *ddk = &dde->dde_key;
uint64_t lsize = DDK_GET_LSIZE(ddk);
uint64_t psize = DDK_GET_PSIZE(ddk);

memset(dds, 0, sizeof (*dds));

for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];

uint64_t dsize = 0;
uint64_t refcnt = ddp->ddp_refcnt;

Expand Down
1 change: 1 addition & 0 deletions module/zfs/ddt_zap.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018 by Delphix. All rights reserved.
* Copyright (c) 2023, Klara Inc.
*/

#include <sys/zfs_context.h>
Expand Down
6 changes: 4 additions & 2 deletions module/zfs/dsl_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -2933,7 +2933,6 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
{
(void) tx;
const ddt_key_t *ddk = &dde->dde_key;
ddt_phys_t *ddp = dde->dde_phys;
blkptr_t bp;
zbookmark_phys_t zb = { 0 };

Expand All @@ -2954,7 +2953,10 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
if (scn->scn_done_txg != 0)
return;

for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
ddt_t *ddt = ddt_select_checksum(tx->tx_pool->dp_spa, checksum);
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];

if (ddp->ddp_phys_birth == 0 ||
ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
continue;
Expand Down
Loading

0 comments on commit d17ab63

Please sign in to comment.