Skip to content

Commit

Permalink
ddt: rework ops interface in terms of keys and values
Browse files Browse the repository at this point in the history
Store objects store keys and values, so have them take those types and
nothing more. This way, they don't need to be concerned about the "kind"
of entry being operated on; the dispatch layer can take care of the
appropriate conversions.

This adds a "contains" op to see if a particular entry exists without
loading it, which makes a couple of things easier to do; in particular,
it allows us to avoid an allocation in ddt_class_contains().

Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Rob Norris <[email protected]>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes openzfs#15887
  • Loading branch information
robn authored and lundman committed Mar 13, 2024
1 parent b9290c4 commit 70dd5ce
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 51 deletions.
20 changes: 12 additions & 8 deletions include/sys/ddt_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,19 @@ typedef struct {
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
boolean_t prehash);
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
int (*ddt_op_lookup)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
int (*ddt_op_contains)(objset_t *os, uint64_t object,
const ddt_key_t *ddk);
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
ddt_entry_t *dde);
int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
const ddt_key_t *ddk);
int (*ddt_op_update)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, const ddt_phys_t *phys, size_t psize,
dmu_tx_t *tx);
int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
dmu_tx_t *tx);
int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
uint64_t *walk);
int (*ddt_op_remove)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, dmu_tx_t *tx);
int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,
ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
} ddt_ops_t;

Expand All @@ -62,7 +66,7 @@ extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
* outside of the DDT implementation proper, and if you do, consider moving
* them up.
*/
#define DDT_NAMELEN 107
#define DDT_NAMELEN 110

extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);

Expand Down
1 change: 1 addition & 0 deletions module/Makefile.bsd
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ CFLAGS.gcc+= -Wno-pointer-to-int-cast

CFLAGS.abd.c= -Wno-cast-qual
CFLAGS.ddt.c= -Wno-cast-qual
CFLAGS.ddt_zap.c= -Wno-cast-qual
CFLAGS.dmu.c= -Wno-cast-qual
CFLAGS.dmu_traverse.c= -Wno-cast-qual
CFLAGS.dnode.c= ${NO_WUNUSED_BUT_SET_VARIABLE}
Expand Down
48 changes: 29 additions & 19 deletions module/zfs/ddt.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,18 +186,30 @@ ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
return (SET_ERROR(ENOENT));

return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,
ddt->ddt_object[type][class], dde));
ddt->ddt_object[type][class], &dde->dde_key,
dde->dde_phys, sizeof (dde->dde_phys)));
}

static int
ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
const ddt_key_t *ddk)
{
if (!ddt_object_exists(ddt, type, class))
return (SET_ERROR(ENOENT));

return (ddt_ops[type]->ddt_op_contains(ddt->ddt_os,
ddt->ddt_object[type][class], ddk));
}

static void
ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_entry_t *dde)
const ddt_key_t *ddk)
{
if (!ddt_object_exists(ddt, type, class))
return;

ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,
ddt->ddt_object[type][class], dde);
ddt->ddt_object[type][class], ddk);
}

static int
Expand All @@ -207,17 +219,18 @@ ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ASSERT(ddt_object_exists(ddt, type, class));

return (ddt_ops[type]->ddt_op_update(ddt->ddt_os,
ddt->ddt_object[type][class], dde, tx));
ddt->ddt_object[type][class], &dde->dde_key, dde->dde_phys,
sizeof (dde->dde_phys), tx));
}

static int
ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_entry_t *dde, dmu_tx_t *tx)
const ddt_key_t *ddk, dmu_tx_t *tx)
{
ASSERT(ddt_object_exists(ddt, type, class));

return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os,
ddt->ddt_object[type][class], dde, tx));
ddt->ddt_object[type][class], ddk, tx));
}

int
Expand All @@ -227,7 +240,8 @@ ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ASSERT(ddt_object_exists(ddt, type, class));

return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
ddt->ddt_object[type][class], dde, walk));
ddt->ddt_object[type][class], walk, &dde->dde_key,
dde->dde_phys, sizeof (dde->dde_phys)));
}

int
Expand Down Expand Up @@ -523,7 +537,7 @@ void
ddt_prefetch(spa_t *spa, const blkptr_t *bp)
{
ddt_t *ddt;
ddt_entry_t dde;
ddt_key_t ddk;

if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp))
return;
Expand All @@ -534,11 +548,11 @@ ddt_prefetch(spa_t *spa, const blkptr_t *bp)
* Thus no locking is required as the DDT can't disappear on us.
*/
ddt = ddt_select(spa, bp);
ddt_key_fill(&dde.dde_key, bp);
ddt_key_fill(&ddk, bp);

for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {
ddt_object_prefetch(ddt, type, class, &dde);
ddt_object_prefetch(ddt, type, class, &ddk);
}
}
}
Expand Down Expand Up @@ -660,7 +674,7 @@ boolean_t
ddt_class_contains(spa_t *spa, ddt_class_t max_class, const blkptr_t *bp)
{
ddt_t *ddt;
ddt_entry_t *dde;
ddt_key_t ddk;

if (!BP_GET_DEDUP(bp))
return (B_FALSE);
Expand All @@ -669,20 +683,16 @@ ddt_class_contains(spa_t *spa, ddt_class_t max_class, const blkptr_t *bp)
return (B_TRUE);

ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)];
dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP);

ddt_key_fill(&(dde->dde_key), bp);
ddt_key_fill(&ddk, bp);

for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class <= max_class; class++) {
if (ddt_object_lookup(ddt, type, class, dde) == 0) {
kmem_cache_free(ddt_entry_cache, dde);
if (ddt_object_contains(ddt, type, class, &ddk) == 0)
return (B_TRUE);
}
}
}

kmem_cache_free(ddt_entry_cache, dde);
return (B_FALSE);
}

Expand Down Expand Up @@ -833,9 +843,9 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)

if (otype != DDT_TYPES &&
(otype != ntype || oclass != nclass || total_refcnt == 0)) {
VERIFY0(ddt_object_remove(ddt, otype, oclass, dde, tx));
VERIFY0(ddt_object_remove(ddt, otype, oclass, ddk, tx));
ASSERT3U(
ddt_object_lookup(ddt, otype, oclass, dde), ==, ENOENT);
ddt_object_contains(ddt, otype, oclass, ddk), ==, ENOENT);
}

if (total_refcnt != 0) {
Expand Down
59 changes: 35 additions & 24 deletions module/zfs/ddt_zap.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ static unsigned int ddt_zap_default_ibs = 15;
#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))

static size_t
ddt_zap_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)
{
uchar_t *version = dst++;
int cpfunc = ZIO_COMPRESS_ZLE;
Expand All @@ -51,7 +51,8 @@ ddt_zap_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)

ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */

c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level);
c_len = ci->ci_compress((void *)src, dst, s_len, d_len - 1,
ci->ci_level);

if (c_len == s_len) {
cpfunc = ZIO_COMPRESS_OFF;
Expand Down Expand Up @@ -93,8 +94,10 @@ ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
ddt_zap_default_bs, ddt_zap_default_ibs,
DMU_OT_NONE, 0, tx);
if (*objectp == 0)
return (SET_ERROR(ENOTSUP));

return (*objectp == 0 ? SET_ERROR(ENOTSUP) : 0);
return (0);
}

static int
Expand All @@ -104,51 +107,57 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
}

static int
ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde)
ddt_zap_lookup(objset_t *os, uint64_t object,
const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize)
{
uchar_t *cbuf;
uint64_t one, csize;
int error;

error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key,
error = zap_length_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, &one, &csize);
if (error)
return (error);

ASSERT3U(one, ==, 1);
ASSERT3U(csize, <=, (sizeof (dde->dde_phys) + 1));
ASSERT3U(csize, <=, psize + 1);

cbuf = kmem_alloc(csize, KM_SLEEP);

error = zap_lookup_uint64(os, object, (uint64_t *)&dde->dde_key,
error = zap_lookup_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, 1, csize, cbuf);
if (error == 0)
ddt_zap_decompress(cbuf, dde->dde_phys, csize,
sizeof (dde->dde_phys));
ddt_zap_decompress(cbuf, phys, csize, psize);

kmem_free(cbuf, csize);

return (error);
}

static int
ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)
{
return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,
NULL, NULL));
}

static void
ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde)
ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
{
(void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key,
DDT_KEY_WORDS);
(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
}

static int
ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx)
{
const size_t cbuf_size = sizeof (dde->dde_phys) + 1;
const size_t cbuf_size = psize + 1;

uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP);

uint64_t csize = ddt_zap_compress(dde->dde_phys, cbuf,
sizeof (dde->dde_phys), cbuf_size);
uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);

int error = zap_update_uint64(os, object, (uint64_t *)&dde->dde_key,
int error = zap_update_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, 1, csize, cbuf, tx);

kmem_free(cbuf, cbuf_size);
Expand All @@ -157,14 +166,16 @@ ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
}

static int
ddt_zap_remove(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,
dmu_tx_t *tx)
{
return (zap_remove_uint64(os, object, (uint64_t *)&dde->dde_key,
return (zap_remove_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, tx));
}

static int
ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
ddt_phys_t *phys, size_t psize)
{
zap_cursor_t zc;
zap_attribute_t za;
Expand All @@ -186,17 +197,16 @@ ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
uint64_t csize = za.za_num_integers;

ASSERT3U(za.za_integer_length, ==, 1);
ASSERT3U(csize, <=, sizeof (dde->dde_phys) + 1);
ASSERT3U(csize, <=, psize + 1);

uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);

error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name,
DDT_KEY_WORDS, 1, csize, cbuf);
ASSERT0(error);
if (error == 0) {
ddt_zap_decompress(cbuf, dde->dde_phys, csize,
sizeof (dde->dde_phys));
dde->dde_key = *(ddt_key_t *)za.za_name;
ddt_zap_decompress(cbuf, phys, csize, psize);
*ddk = *(ddt_key_t *)za.za_name;
}

kmem_free(cbuf, csize);
Expand All @@ -219,6 +229,7 @@ const ddt_ops_t ddt_zap_ops = {
ddt_zap_create,
ddt_zap_destroy,
ddt_zap_lookup,
ddt_zap_contains,
ddt_zap_prefetch,
ddt_zap_update,
ddt_zap_remove,
Expand Down

0 comments on commit 70dd5ce

Please sign in to comment.