From d17ab631a9142b81b100d87f0619f5e59bc211ac Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Mon, 3 Jul 2023 15:16:02 +1000 Subject: [PATCH] ddt: rework access to phys array slots The "flat phys" feature will use only a single phys slot for all entries, which means the old "single", "double" etc naming now makes no sense, and more importantly, means that choosing the right slot for a given block pointer will depend on how many slots are in use for a given DDT. This removes the old names, and adds accessor macros to decouple specific phys array indexes from any particular meaning. (These macros look strange in isolation, mainly in the way they take the ddt_t* as an arg but don't use it. This is mostly a separate commit to introduce the concept to the reader before the "flat phys" commit extends it). Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. Closes #15893 --- cmd/zdb/zdb.c | 13 +++++----- include/sys/ddt.h | 27 ++++++++------------- include/sys/ddt_impl.h | 2 +- module/zfs/ddt.c | 54 ++++++++++++++++++++++++------------------ module/zfs/ddt_stats.c | 5 ++-- module/zfs/ddt_zap.c | 1 + module/zfs/dsl_scan.c | 6 +++-- module/zfs/zio.c | 36 ++++++++++++++++++---------- 8 files changed, 79 insertions(+), 65 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index fcf0e4779788..7a6459b756b2 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1916,21 +1916,20 @@ dump_log_spacemaps(spa_t *spa) static void dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) { - const ddt_phys_t *ddp = dde->dde_phys; const ddt_key_t *ddk = &dde->dde_key; - const char *types[4] = { "ditto", "single", "double", "triple" }; char blkbuf[BP_SPRINTF_LEN]; blkptr_t blk; int p; - for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + for (p = 0; p < DDT_NPHYS(ddt); p++) { + const ddt_phys_t *ddp = &dde->dde_phys[p]; if (ddp->ddp_phys_birth == 0) continue; ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); - (void) printf("index %llx refcnt %llu %s %s\n", + (void) printf("index %llx refcnt %llu phys %d %s\n", (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt, - types[p], blkbuf); + p, blkbuf); } } @@ -5724,7 +5723,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, VERIFY3P(dde, !=, NULL); /* Get the phys for this variant */ - ddt_phys_t *ddp = ddt_phys_select(dde, bp); + ddt_phys_t *ddp = ddt_phys_select(ddt, dde, bp); VERIFY3P(ddp, !=, NULL); /* @@ -5751,7 +5750,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, dde->dde_lead_zio[idx] = (zio_t *)(uintptr_t)B_TRUE; /* Consume a reference for this block. */ - VERIFY3U(ddt_phys_total_refcnt(dde), >, 0); + VERIFY3U(ddt_phys_total_refcnt(ddt, dde), >, 0); ddt_phys_decref(ddp); if (seen) { diff --git a/include/sys/ddt.h b/include/sys/ddt.h index 20bae8ce0fcb..a2e069f13922 100644 --- a/include/sys/ddt.h +++ b/include/sys/ddt.h @@ -137,19 +137,10 @@ typedef struct { uint64_t ddp_phys_birth; } ddt_phys_t; -/* - * Named indexes into the ddt_phys_t array in each entry. - * - * Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However, - * we maintain the ability to free existing dedup-ditto blocks. - */ -enum ddt_phys_type { - DDT_PHYS_DITTO = 0, - DDT_PHYS_SINGLE = 1, - DDT_PHYS_DOUBLE = 2, - DDT_PHYS_TRIPLE = 3, - DDT_PHYS_TYPES -}; +#define DDT_PHYS_MAX (4) +#define DDT_NPHYS(ddt) ((ddt) ? DDT_PHYS_MAX : DDT_PHYS_MAX) +#define DDT_PHYS_IS_DITTO(ddt, p) ((ddt) && p == 0) +#define DDT_PHYS_FOR_COPIES(ddt, p) ((ddt) ? (p) : (p)) /* * A "live" entry, holding changes to an entry made this txg, and other data to @@ -162,11 +153,11 @@ enum ddt_phys_type { typedef struct { /* key must be first for ddt_key_compare */ - ddt_key_t dde_key; /* ddt_tree key */ - ddt_phys_t dde_phys[DDT_PHYS_TYPES]; /* on-disk data */ + ddt_key_t dde_key; /* ddt_tree key */ + ddt_phys_t dde_phys[DDT_PHYS_MAX]; /* on-disk data */ /* in-flight update IOs */ - zio_t *dde_lead_zio[DDT_PHYS_TYPES]; + zio_t *dde_lead_zio[DDT_PHYS_MAX]; /* copy of data after a repair read, to be rewritten */ struct abd *dde_repair_abd; @@ -234,7 +225,8 @@ extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp); extern void ddt_phys_clear(ddt_phys_t *ddp); extern void ddt_phys_addref(ddt_phys_t *ddp); extern void ddt_phys_decref(ddt_phys_t *ddp); -extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp); +extern ddt_phys_t *ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde, + const blkptr_t *bp); extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src); extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh); @@ -249,6 +241,7 @@ extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa); extern int ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize); extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp); +extern ddt_t *ddt_select_checksum(spa_t *spa, enum zio_checksum checksum); extern void ddt_enter(ddt_t *ddt); extern void ddt_exit(ddt_t *ddt); extern void ddt_init(void); diff --git a/include/sys/ddt_impl.h b/include/sys/ddt_impl.h index 9c0fea64f389..e97b71621c37 100644 --- a/include/sys/ddt_impl.h +++ b/include/sys/ddt_impl.h @@ -82,7 +82,7 @@ extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg); */ #define DDT_NAMELEN 32 -extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde); +extern uint64_t ddt_phys_total_refcnt(const ddt_t *ddt, const ddt_entry_t *dde); extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp); diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 84d7800cbc73..9bb0b8f15fca 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -540,11 +540,10 @@ ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg) } ddt_phys_t * -ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp) +ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde, const blkptr_t *bp) { - ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys; - - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + for (int p = 0; p < DDT_NPHYS(ddt); p++) { + ddt_phys_t *ddp = (ddt_phys_t *)&dde->dde_phys[p]; if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) && BP_GET_BIRTH(bp) == ddp->ddp_phys_birth) return (ddp); @@ -553,12 +552,15 @@ ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp) } uint64_t -ddt_phys_total_refcnt(const ddt_entry_t *dde) +ddt_phys_total_refcnt(const ddt_t *ddt, const ddt_entry_t *dde) { uint64_t refcnt = 0; - for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) + for (int p = 0; p < DDT_NPHYS(ddt); p++) { + if (DDT_PHYS_IS_DITTO(ddt, p)) + continue; refcnt += dde->dde_phys[p].ddp_refcnt; + } return (refcnt); } @@ -570,6 +572,12 @@ ddt_select(spa_t *spa, const blkptr_t *bp) return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]); } +ddt_t * +ddt_select_checksum(spa_t *spa, enum zio_checksum checksum) +{ + return (spa->spa_ddt[checksum]); +} + void ddt_enter(ddt_t *ddt) { @@ -613,9 +621,9 @@ ddt_alloc(const ddt_key_t *ddk) } static void -ddt_free(ddt_entry_t *dde) +ddt_free(const ddt_t *ddt, ddt_entry_t *dde) { - for (int p = 0; p < DDT_PHYS_TYPES; p++) + for (int p = 0; p < DDT_NPHYS(ddt); p++) ASSERT3P(dde->dde_lead_zio[p], ==, NULL); if (dde->dde_repair_abd != NULL) @@ -631,7 +639,7 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde) ASSERT(MUTEX_HELD(&ddt->ddt_lock)); avl_remove(&ddt->ddt_tree, dde); - ddt_free(dde); + ddt_free(ddt, dde); } static boolean_t @@ -759,7 +767,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp) if (dde->dde_flags & DDE_FLAG_OVERQUOTA) { if (dde->dde_waiters == 0) { avl_remove(&ddt->ddt_tree, dde); - ddt_free(dde); + ddt_free(ddt, dde); } return (NULL); } @@ -805,7 +813,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp) /* Over quota. If no one is waiting, clean up right now. */ if (dde->dde_waiters == 0) { avl_remove(&ddt->ddt_tree, dde); - ddt_free(dde); + ddt_free(ddt, dde); return (NULL); } @@ -1212,7 +1220,7 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) avl_insert(&ddt->ddt_repair_tree, dde, where); else - ddt_free(dde); + ddt_free(ddt, dde); ddt_exit(ddt); } @@ -1220,16 +1228,15 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) static void ddt_repair_entry_done(zio_t *zio) { + ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp); ddt_entry_t *rdde = zio->io_private; - ddt_free(rdde); + ddt_free(ddt, rdde); } static void ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) { - ddt_phys_t *ddp = dde->dde_phys; - ddt_phys_t *rddp = rdde->dde_phys; ddt_key_t *ddk = &dde->dde_key; ddt_key_t *rddk = &rdde->dde_key; zio_t *zio; @@ -1238,7 +1245,9 @@ ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) zio = zio_null(rio, rio->io_spa, NULL, ddt_repair_entry_done, rdde, rio->io_flags); - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) { + for (int p = 0; p < DDT_NPHYS(ddt); p++) { + ddt_phys_t *ddp = &dde->dde_phys[p]; + ddt_phys_t *rddp = &rdde->dde_phys[p]; if (ddp->ddp_phys_birth == 0 || ddp->ddp_phys_birth != rddp->ddp_phys_birth || memcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva))) @@ -1281,7 +1290,6 @@ static void ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) { dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool; - ddt_phys_t *ddp = dde->dde_phys; ddt_key_t *ddk = &dde->dde_key; ddt_type_t otype = dde->dde_type; ddt_type_t ntype = DDT_TYPE_DEFAULT; @@ -1291,13 +1299,14 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) ASSERT(dde->dde_flags & DDE_FLAG_LOADED); - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + for (int p = 0; p < DDT_NPHYS(ddt); p++) { ASSERT3P(dde->dde_lead_zio[p], ==, NULL); + ddt_phys_t *ddp = &dde->dde_phys[p]; if (ddp->ddp_phys_birth == 0) { ASSERT0(ddp->ddp_refcnt); continue; } - if (p == DDT_PHYS_DITTO) { + if (DDT_PHYS_IS_DITTO(ddt, p)) { /* * Note, we no longer create DDT-DITTO blocks, but we * don't want to leak any written by older software. @@ -1310,8 +1319,6 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) total_refcnt += ddp->ddp_refcnt; } - /* We do not create new DDT-DITTO blocks. */ - ASSERT0(dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth); if (total_refcnt > 1) nclass = DDT_CLASS_DUPLICATE; else @@ -1369,7 +1376,7 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { ddt_sync_entry(ddt, dde, tx, txg); - ddt_free(dde); + ddt_free(ddt, dde); } uint64_t count = 0; @@ -1512,7 +1519,8 @@ ddt_addref(spa_t *spa, const blkptr_t *bp) ASSERT3S(dde->dde_class, <, DDT_CLASSES); - ddp = &dde->dde_phys[BP_GET_NDVAS(bp)]; + int p = DDT_PHYS_FOR_COPIES(ddt, BP_GET_NDVAS(bp)); + ddp = &dde->dde_phys[p]; /* * This entry already existed (dde_type is real), so it must diff --git a/module/zfs/ddt_stats.c b/module/zfs/ddt_stats.c index 82b682019ae9..5449eca3afb1 100644 --- a/module/zfs/ddt_stats.c +++ b/module/zfs/ddt_stats.c @@ -36,14 +36,15 @@ static void ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) { spa_t *spa = ddt->ddt_spa; - ddt_phys_t *ddp = dde->dde_phys; ddt_key_t *ddk = &dde->dde_key; uint64_t lsize = DDK_GET_LSIZE(ddk); uint64_t psize = DDK_GET_PSIZE(ddk); memset(dds, 0, sizeof (*dds)); - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + for (int p = 0; p < DDT_NPHYS(ddt); p++) { + ddt_phys_t *ddp = &dde->dde_phys[p]; + uint64_t dsize = 0; uint64_t refcnt = ddp->ddp_refcnt; diff --git a/module/zfs/ddt_zap.c b/module/zfs/ddt_zap.c index 7ce7461a2b25..8f1bbeeecd8d 100644 --- a/module/zfs/ddt_zap.c +++ b/module/zfs/ddt_zap.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018 by Delphix. All rights reserved. + * Copyright (c) 2023, Klara Inc. */ #include diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 085cfd3c5691..737ee4f6600c 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -2933,7 +2933,6 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, { (void) tx; const ddt_key_t *ddk = &dde->dde_key; - ddt_phys_t *ddp = dde->dde_phys; blkptr_t bp; zbookmark_phys_t zb = { 0 }; @@ -2954,7 +2953,10 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, if (scn->scn_done_txg != 0) return; - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + ddt_t *ddt = ddt_select_checksum(tx->tx_pool->dp_spa, checksum); + for (int p = 0; p < DDT_NPHYS(ddt); p++) { + ddt_phys_t *ddp = &dde->dde_phys[p]; + if (ddp->ddp_phys_birth == 0 || ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg) continue; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 5810e811a39d..914f83fb9f9b 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3254,12 +3254,14 @@ static void zio_ddt_child_read_done(zio_t *zio) { blkptr_t *bp = zio->io_bp; + ddt_t *ddt; ddt_entry_t *dde = zio->io_private; ddt_phys_t *ddp; zio_t *pio = zio_unique_parent(zio); mutex_enter(&pio->io_lock); - ddp = ddt_phys_select(dde, bp); + ddt = ddt_select(zio->io_spa, bp); + ddp = ddt_phys_select(ddt, dde, bp); if (zio->io_error == 0) ddt_phys_clear(ddp); /* this ddp doesn't need repair */ @@ -3282,8 +3284,7 @@ zio_ddt_read_start(zio_t *zio) if (zio->io_child_error[ZIO_CHILD_DDT]) { ddt_t *ddt = ddt_select(zio->io_spa, bp); ddt_entry_t *dde = ddt_repair_start(ddt, bp); - ddt_phys_t *ddp = dde->dde_phys; - ddt_phys_t *ddp_self = ddt_phys_select(dde, bp); + ddt_phys_t *ddp_self = ddt_phys_select(ddt, dde, bp); blkptr_t blk; ASSERT(zio->io_vsd == NULL); @@ -3292,7 +3293,8 @@ zio_ddt_read_start(zio_t *zio) if (ddp_self == NULL) return (zio); - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + for (int p = 0; p < DDT_NPHYS(ddt); p++) { + ddt_phys_t *ddp = &dde->dde_phys[p]; if (ddp->ddp_phys_birth == 0 || ddp == ddp_self) continue; ddt_bp_create(ddt->ddt_checksum, &dde->dde_key, ddp, @@ -3372,7 +3374,10 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) * loaded). */ - for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { + for (int p = 0; p < DDT_NPHYS(ddt); p++) { + if (DDT_PHYS_IS_DITTO(ddt, p)) + continue; + zio_t *lio = dde->dde_lead_zio[p]; if (lio != NULL && do_raw) { @@ -3384,7 +3389,10 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) } } - for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { + for (int p = 0; p < DDT_NPHYS(ddt); p++) { + if (DDT_PHYS_IS_DITTO(ddt, p)) + continue; + ddt_phys_t *ddp = &dde->dde_phys[p]; if (ddp->ddp_phys_birth != 0 && do_raw) { @@ -3452,15 +3460,16 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) static void zio_ddt_child_write_ready(zio_t *zio) { - int p = zio->io_prop.zp_copies; ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp); ddt_entry_t *dde = zio->io_private; - ddt_phys_t *ddp = &dde->dde_phys[p]; zio_t *pio; if (zio->io_error) return; + int p = DDT_PHYS_FOR_COPIES(ddt, zio->io_prop.zp_copies); + ddt_phys_t *ddp = &dde->dde_phys[p]; + ddt_enter(ddt); ASSERT(dde->dde_lead_zio[p] == zio); @@ -3477,9 +3486,10 @@ zio_ddt_child_write_ready(zio_t *zio) static void zio_ddt_child_write_done(zio_t *zio) { - int p = zio->io_prop.zp_copies; ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp); ddt_entry_t *dde = zio->io_private; + + int p = DDT_PHYS_FOR_COPIES(ddt, zio->io_prop.zp_copies); ddt_phys_t *ddp = &dde->dde_phys[p]; ddt_enter(ddt); @@ -3506,11 +3516,9 @@ zio_ddt_write(zio_t *zio) blkptr_t *bp = zio->io_bp; uint64_t txg = zio->io_txg; zio_prop_t *zp = &zio->io_prop; - int p = zp->zp_copies; zio_t *cio = NULL; ddt_t *ddt = ddt_select(spa, bp); ddt_entry_t *dde; - ddt_phys_t *ddp; ASSERT(BP_GET_DEDUP(bp)); ASSERT(BP_GET_CHECKSUM(bp) == zp->zp_checksum); @@ -3528,7 +3536,9 @@ zio_ddt_write(zio_t *zio) ddt_exit(ddt); return (zio); } - ddp = &dde->dde_phys[p]; + + int p = DDT_PHYS_FOR_COPIES(ddt, zp->zp_copies); + ddt_phys_t *ddp = &dde->dde_phys[p]; if (zp->zp_dedup_verify && zio_ddt_collision(zio, ddt, dde)) { /* @@ -3600,7 +3610,7 @@ zio_ddt_free(zio_t *zio) ddt_enter(ddt); freedde = dde = ddt_lookup(ddt, bp); if (dde) { - ddp = ddt_phys_select(dde, bp); + ddp = ddt_phys_select(ddt, dde, bp); if (ddp) ddt_phys_decref(ddp); }