Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ZAP: Massively switch to _by_dnode() interfaces #15951

Merged
merged 1 commit into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,6 @@ void dmu_buf_sub_user_size(dmu_buf_t *db, uint64_t nsub);
void *dmu_buf_get_user(dmu_buf_t *db);

objset_t *dmu_buf_get_objset(dmu_buf_t *db);
dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db);
void dmu_buf_dnode_exit(dmu_buf_t *db);

/* Block until any in-progress dmu buf user evictions complete. */
void dmu_buf_user_evict_wait(void);
Expand Down Expand Up @@ -902,6 +900,8 @@ extern uint_t zfs_max_recordsize;
*/
void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
uint64_t len, enum zio_priority pri);
void dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
uint64_t len, enum zio_priority pri);
amotin marked this conversation as resolved.
Show resolved Hide resolved
void dmu_prefetch_dnode(objset_t *os, uint64_t object, enum zio_priority pri);

typedef struct dmu_object_info {
Expand Down
8 changes: 8 additions & 0 deletions include/sys/zap.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ int zap_add_by_dnode(dnode_t *dn, const char *key,
int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);

/*
* Set the attribute with the given name to the given value. If an
Expand All @@ -267,6 +270,9 @@ int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);

/*
* Get the length (in integers) and the integer size of the specified
Expand All @@ -292,6 +298,8 @@ int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);
int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, dmu_tx_t *tx);
int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints, dmu_tx_t *tx);

/*
* Returns (in *count) the number of attributes in the specified zap
Expand Down
1 change: 1 addition & 0 deletions include/sys/zap_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ typedef struct zap {
dmu_buf_user_t zap_dbu;
objset_t *zap_objset;
uint64_t zap_object;
dnode_t *zap_dnode;
struct dmu_buf *zap_dbuf;
krwlock_t zap_rwlock;
boolean_t zap_ismicro;
Expand Down
72 changes: 14 additions & 58 deletions module/zfs/brt.c
Original file line number Diff line number Diff line change
Expand Up @@ -955,52 +955,10 @@ brt_entry_prefetch(brt_t *brt, uint64_t vdevid, brt_entry_t *bre)
if (mos_entries == 0)
return;

BRT_DEBUG("ZAP prefetch: object=%llu vdev=%llu offset=%llu",
(u_longlong_t)mos_entries, (u_longlong_t)vdevid,
(u_longlong_t)bre->bre_offset);
(void) zap_prefetch_uint64(brt->brt_mos, mos_entries,
(uint64_t *)&bre->bre_offset, BRT_KEY_WORDS);
}

static int
brt_entry_update(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre, dmu_tx_t *tx)
{
int error;

ASSERT(RW_LOCK_HELD(&brt->brt_lock));
ASSERT(brtvd->bv_mos_entries != 0);
ASSERT(bre->bre_refcount > 0);

error = zap_update_uint64(brt->brt_mos, brtvd->bv_mos_entries,
(uint64_t *)&bre->bre_offset, BRT_KEY_WORDS, 1,
sizeof (bre->bre_refcount), &bre->bre_refcount, tx);
BRT_DEBUG("ZAP update: object=%llu vdev=%llu offset=%llu count=%llu "
"error=%d", (u_longlong_t)brtvd->bv_mos_entries,
(u_longlong_t)brtvd->bv_vdevid, (u_longlong_t)bre->bre_offset,
(u_longlong_t)bre->bre_refcount, error);

return (error);
}

static int
brt_entry_remove(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre, dmu_tx_t *tx)
{
int error;

ASSERT(RW_LOCK_HELD(&brt->brt_lock));
ASSERT(brtvd->bv_mos_entries != 0);
ASSERT0(bre->bre_refcount);

error = zap_remove_uint64(brt->brt_mos, brtvd->bv_mos_entries,
(uint64_t *)&bre->bre_offset, BRT_KEY_WORDS, tx);
BRT_DEBUG("ZAP remove: object=%llu vdev=%llu offset=%llu count=%llu "
"error=%d", (u_longlong_t)brtvd->bv_mos_entries,
(u_longlong_t)brtvd->bv_vdevid, (u_longlong_t)bre->bre_offset,
(u_longlong_t)bre->bre_refcount, error);

return (error);
}

/*
* Return TRUE if we _can_ have BRT entry for this bp. It might be false
* positive, but gives us quick answer if we should look into BRT, which
Expand Down Expand Up @@ -1559,24 +1517,16 @@ brt_pending_apply(spa_t *spa, uint64_t txg)
}

static void
brt_sync_entry(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre, dmu_tx_t *tx)
brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
{

ASSERT(RW_WRITE_HELD(&brt->brt_lock));
ASSERT(brtvd->bv_mos_entries != 0);

if (bre->bre_refcount == 0) {
int error;

error = brt_entry_remove(brt, brtvd, bre, tx);
ASSERT(error == 0 || error == ENOENT);
/*
* If error == ENOENT then zfs_clone_range() was done from a
* removed (but opened) file (open(), unlink()).
*/
ASSERT(brt_entry_lookup(brt, brtvd, bre) == ENOENT);
int error = zap_remove_uint64_by_dnode(dn, &bre->bre_offset,
BRT_KEY_WORDS, tx);
VERIFY(error == 0 || error == ENOENT);
} else {
VERIFY0(brt_entry_update(brt, brtvd, bre, tx));
VERIFY0(zap_update_uint64_by_dnode(dn, &bre->bre_offset,
BRT_KEY_WORDS, 1, sizeof (bre->bre_refcount),
&bre->bre_refcount, tx));
}
}

Expand All @@ -1585,6 +1535,7 @@ brt_sync_table(brt_t *brt, dmu_tx_t *tx)
{
brt_vdev_t *brtvd;
brt_entry_t *bre;
dnode_t *dn;
uint64_t vdevid;
void *c;

Expand All @@ -1608,14 +1559,19 @@ brt_sync_table(brt_t *brt, dmu_tx_t *tx)
if (brtvd->bv_mos_brtvdev == 0)
brt_vdev_create(brt, brtvd, tx);

VERIFY0(dnode_hold(brt->brt_mos, brtvd->bv_mos_entries,
FTAG, &dn));

c = NULL;
while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) {
brt_sync_entry(brt, brtvd, bre, tx);
brt_sync_entry(dn, bre, tx);
brt_entry_free(bre);
ASSERT(brt->brt_nentries > 0);
brt->brt_nentries--;
}

dnode_rele(dn, FTAG);

brt_vdev_sync(brt, brtvd, tx);

if (brtvd->bv_totalcount == 0)
Expand Down
15 changes: 0 additions & 15 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -4174,21 +4174,6 @@ dmu_buf_get_objset(dmu_buf_t *db)
return (dbi->db_objset);
}

dnode_t *
dmu_buf_dnode_enter(dmu_buf_t *db)
{
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
DB_DNODE_ENTER(dbi);
return (DB_DNODE(dbi));
}

void
dmu_buf_dnode_exit(dmu_buf_t *db)
{
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
DB_DNODE_EXIT(dbi);
}

static void
dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
{
Expand Down
18 changes: 14 additions & 4 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -712,8 +712,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
uint64_t len, zio_priority_t pri)
{
dnode_t *dn;
int64_t level2 = level;
uint64_t start, end, start2, end2;

if (dmu_prefetch_max == 0 || len == 0) {
dmu_prefetch_dnode(os, object, pri);
Expand All @@ -723,6 +721,18 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
if (dnode_hold(os, object, FTAG, &dn) != 0)
return;

dmu_prefetch_by_dnode(dn, level, offset, len, pri);

dnode_rele(dn, FTAG);
}

void
dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
uint64_t len, zio_priority_t pri)
{
int64_t level2 = level;
uint64_t start, end, start2, end2;

/*
* Depending on len we may do two prefetches: blocks [start, end) at
* level, and following blocks [start2, end2) at higher level2.
Expand Down Expand Up @@ -762,8 +772,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
for (uint64_t i = start2; i < end2; i++)
dbuf_prefetch(dn, level2, i, pri, 0);
rw_exit(&dn->dn_struct_rwlock);

dnode_rele(dn, FTAG);
}

/*
Expand Down Expand Up @@ -2563,6 +2571,8 @@ EXPORT_SYMBOL(dmu_bonus_hold_by_dnode);
EXPORT_SYMBOL(dmu_buf_hold_array_by_bonus);
EXPORT_SYMBOL(dmu_buf_rele_array);
EXPORT_SYMBOL(dmu_prefetch);
EXPORT_SYMBOL(dmu_prefetch_by_dnode);
EXPORT_SYMBOL(dmu_prefetch_dnode);
EXPORT_SYMBOL(dmu_free_range);
EXPORT_SYMBOL(dmu_free_long_range);
EXPORT_SYMBOL(dmu_free_long_object);
Expand Down
7 changes: 1 addition & 6 deletions module/zfs/dmu_recv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2353,7 +2353,6 @@ receive_process_write_record(struct receive_writer_arg *rwa,
if (rwa->heal) {
blkptr_t *bp;
dmu_buf_t *dbp;
dnode_t *dn;
int flags = DB_RF_CANFAIL;

if (rwa->raw)
Expand Down Expand Up @@ -2385,19 +2384,15 @@ receive_process_write_record(struct receive_writer_arg *rwa,
dmu_buf_rele(dbp, FTAG);
return (err);
}
dn = dmu_buf_dnode_enter(dbp);
/* Make sure the on-disk block and recv record sizes match */
if (drrw->drr_logical_size !=
dn->dn_datablkszsec << SPA_MINBLOCKSHIFT) {
if (drrw->drr_logical_size != dbp->db_size) {
err = ENOTSUP;
dmu_buf_dnode_exit(dbp);
dmu_buf_rele(dbp, FTAG);
return (err);
}
/* Get the block pointer for the corrupted block */
bp = dmu_buf_get_blkptr(dbp);
err = do_corrective_recv(rwa, drrw, rrd, bp);
dmu_buf_dnode_exit(dbp);
dmu_buf_rele(dbp, FTAG);
return (err);
}
Expand Down
Loading
Loading