Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[2.2] Additional BRT backports #15133

Merged
merged 3 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 76 additions & 1 deletion cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
#include <sys/dsl_crypt.h>
#include <sys/dsl_scan.h>
#include <sys/btree.h>
#include <sys/brt.h>
#include <zfs_comutil.h>
#include <sys/zstd/zstd.h>

Expand Down Expand Up @@ -5342,12 +5343,20 @@ static const char *zdb_ot_extname[] = {
#define ZB_TOTAL DN_MAX_LEVELS
#define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1)

typedef struct zdb_brt_entry {
dva_t zbre_dva;
uint64_t zbre_refcount;
avl_node_t zbre_node;
} zdb_brt_entry_t;

typedef struct zdb_cb {
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
uint64_t zcb_removing_size;
uint64_t zcb_checkpoint_size;
uint64_t zcb_dedup_asize;
uint64_t zcb_dedup_blocks;
uint64_t zcb_clone_asize;
uint64_t zcb_clone_blocks;
uint64_t zcb_psize_count[SPA_MAX_FOR_16M];
uint64_t zcb_lsize_count[SPA_MAX_FOR_16M];
uint64_t zcb_asize_count[SPA_MAX_FOR_16M];
Expand All @@ -5368,6 +5377,8 @@ typedef struct zdb_cb {
int zcb_haderrors;
spa_t *zcb_spa;
uint32_t **zcb_vd_obsolete_counts;
avl_tree_t zcb_brt;
boolean_t zcb_brt_is_active;
} zdb_cb_t;

/* test if two DVA offsets from same vdev are within the same metaslab */
Expand Down Expand Up @@ -5662,6 +5673,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
zcb->zcb_asize_total += BP_GET_ASIZE(bp);

if (zcb->zcb_brt_is_active && brt_maybe_exists(zcb->zcb_spa, bp)) {
/*
* Cloned blocks are special. We need to count them, so we can
* later uncount them when reporting leaked space, and we must
* only claim them them once.
*
* To do this, we keep our own in-memory BRT. For each block
* we haven't seen before, we look it up in the real BRT and
* if its there, we note it and its refcount then proceed as
* normal. If we see the block again, we count it as a clone
* and then give it no further consideration.
*/
zdb_brt_entry_t zbre_search, *zbre;
avl_index_t where;

zbre_search.zbre_dva = bp->blk_dva[0];
zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
if (zbre != NULL) {
zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
zcb->zcb_clone_blocks++;

zbre->zbre_refcount--;
if (zbre->zbre_refcount == 0) {
avl_remove(&zcb->zcb_brt, zbre);
umem_free(zbre, sizeof (zdb_brt_entry_t));
}
return;
}

uint64_t crefcnt = brt_entry_get_refcount(zcb->zcb_spa, bp);
if (crefcnt > 0) {
zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
UMEM_NOFAIL);
zbre->zbre_dva = bp->blk_dva[0];
zbre->zbre_refcount = crefcnt;
avl_insert(&zcb->zcb_brt, zbre, where);
}
}

if (dump_opt['L'])
return;

Expand Down Expand Up @@ -6664,6 +6714,20 @@ deleted_livelists_dump_mos(spa_t *spa)
iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
}

static int
zdb_brt_entry_compare(const void *zcn1, const void *zcn2)
{
const dva_t *dva1 = &((const zdb_brt_entry_t *)zcn1)->zbre_dva;
const dva_t *dva2 = &((const zdb_brt_entry_t *)zcn2)->zbre_dva;
int cmp;

cmp = TREE_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
if (cmp == 0)
cmp = TREE_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2));

return (cmp);
}

static int
dump_block_stats(spa_t *spa)
{
Expand All @@ -6678,6 +6742,13 @@ dump_block_stats(spa_t *spa)

zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);

if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
avl_create(&zcb->zcb_brt, zdb_brt_entry_compare,
sizeof (zdb_brt_entry_t),
offsetof(zdb_brt_entry_t, zbre_node));
zcb->zcb_brt_is_active = B_TRUE;
}

(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
(dump_opt['c'] == 1) ? "metadata " : "",
Expand Down Expand Up @@ -6779,7 +6850,8 @@ dump_block_stats(spa_t *spa)
metaslab_class_get_alloc(spa_special_class(spa)) +
metaslab_class_get_alloc(spa_dedup_class(spa)) +
get_unflushed_alloc_space(spa);
total_found = tzb->zb_asize - zcb->zcb_dedup_asize +
total_found =
tzb->zb_asize - zcb->zcb_dedup_asize - zcb->zcb_clone_asize +
zcb->zcb_removing_size + zcb->zcb_checkpoint_size;

if (total_found == total_alloc && !dump_opt['L']) {
Expand Down Expand Up @@ -6820,6 +6892,9 @@ dump_block_stats(spa_t *spa)
"bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize,
(u_longlong_t)zcb->zcb_dedup_blocks,
(double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0);
(void) printf("\t%-16s %14llu count: %6llu\n",
"bp cloned:", (u_longlong_t)zcb->zcb_clone_asize,
(u_longlong_t)zcb->zcb_clone_blocks);
(void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);

Expand Down
1 change: 1 addition & 0 deletions include/sys/brt.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ extern "C" {
#endif

extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp);
extern uint64_t brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp);

extern uint64_t brt_get_dspace(spa_t *spa);
extern uint64_t brt_get_used(spa_t *spa);
Expand Down
7 changes: 7 additions & 0 deletions module/os/linux/zfs/zpl_file_range.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = generic_copy_file_range(src_file, src_off, dst_file,
dst_off, len, flags);
#else
/*
* Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
* to the kernel that it should fallback to a content copy.
*/
if (ret == -EXDEV)
ret = -EOPNOTSUPP;
#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */

return (ret);
Expand Down
31 changes: 31 additions & 0 deletions module/zfs/brt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1544,6 +1544,37 @@ brt_entry_decref(spa_t *spa, const blkptr_t *bp)
return (B_FALSE);
}

uint64_t
brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp)
{
brt_t *brt = spa->spa_brt;
brt_vdev_t *brtvd;
brt_entry_t bre_search, *bre;
uint64_t vdevid, refcnt;
int error;

brt_entry_fill(bp, &bre_search, &vdevid);

brt_rlock(brt);

brtvd = brt_vdev(brt, vdevid);
ASSERT(brtvd != NULL);

bre = avl_find(&brtvd->bv_tree, &bre_search, NULL);
if (bre == NULL) {
error = brt_entry_lookup(brt, brtvd, &bre_search);
ASSERT(error == 0 || error == ENOENT);
if (error == ENOENT)
refcnt = 0;
else
refcnt = bre_search.bre_refcount;
} else
refcnt = bre->bre_refcount;

brt_unlock(brt);
return (refcnt);
}

static void
brt_prefetch(brt_t *brt, const blkptr_t *bp)
{
Expand Down
1 change: 1 addition & 0 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ tags = ['functional', 'atime']

[tests/functional/block_cloning:Linux]
tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
'block_cloning_copyfilerange_fallback',
'block_cloning_ficlone', 'block_cloning_ficlonerange',
'block_cloning_ficlonerange_partial',
'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone',
Expand Down
2 changes: 2 additions & 0 deletions tests/test-runner/bin/zts-report.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@ elif sys.platform.startswith('linux'):
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_partial':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_fallback':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_cross_dataset':
['SKIP', cfr_cross_reason],
})
Expand Down
2 changes: 1 addition & 1 deletion tests/zfs-tests/cmd/clonefile.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ main(int argc, char **argv)

int dfd = open(argv[optind+1], O_WRONLY|O_CREAT,
S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
if (sfd < 0) {
if (dfd < 0) {
fprintf(stderr, "open: %s: %s\n",
argv[optind+1], strerror(errno));
close(sfd);
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh \
functional/block_cloning/block_cloning_copyfilerange.ksh \
functional/block_cloning/block_cloning_copyfilerange_partial.ksh \
functional/block_cloning/block_cloning_copyfilerange_fallback.ksh \
functional/block_cloning/block_cloning_disabled_copyfilerange.ksh \
functional/block_cloning/block_cloning_disabled_ficlone.ksh \
functional/block_cloning/block_cloning_disabled_ficlonerange.ksh \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2023, Klara Inc.
# Copyright (c) 2023, Rob Norris <[email protected]>
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib

verify_runnable "global"

if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
log_unsupported "copy_file_range not available before Linux 4.5"
fi

claim="copy_file_range will fall back to copy when cloning not possible."

log_assert $claim

function cleanup
{
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
}

log_onexit cleanup

log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS

log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
log_must sync_pool $TESTPOOL


log_note "Copying entire file with copy_file_range"

log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
log_must sync_pool $TESTPOOL

log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone

typeset blocks=$(unique_blocks $TESTPOOL file $TESTPOOL clone)
log_must [ "$blocks" = "1 2 3 4" ]


log_note "Copying within a block with copy_file_range"

log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 32768 32768 65536
log_must sync_pool $TESTPOOL

log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone

typeset blocks=$(unique_blocks $TESTPOOL file $TESTPOOL clone)
log_must [ "$blocks" = "2 3 4" ]


log_note "Copying across a block with copy_file_range"

log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 327680 327680 131072
log_must sync_pool $TESTPOOL

log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone

typeset blocks=$(unique_blocks $TESTPOOL file $TESTPOOL clone)
log_must [ "$blocks" = "2" ]

log_pass $claim
Loading