From a74072e2d557892e718fd166db54431d333bf487 Mon Sep 17 00:00:00 2001 From: wangdi Date: Mon, 28 Aug 2023 01:55:45 -0700 Subject: [PATCH 01/80] DAOS-14208 container: do not disable vos aggregation (#12934) Do not disable vos aggregation during rebuild, only disable EC aggregation. Signed-off-by: Di Wang --- src/container/srv_target.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 999c01f344f..0edbe845e7f 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -191,14 +191,11 @@ cont_aggregate_runnable(struct ds_cont_child *cont, struct sched_request *req, return false; } - if (pool->sp_rebuilding) { - if (vos_agg) - cont->sc_vos_agg_active = 0; - else - cont->sc_ec_agg_active = 0; - D_DEBUG(DB_EPC, DF_CONT": skip %s aggregation during rebuild %d.\n", + if (pool->sp_rebuilding && !vos_agg) { + cont->sc_ec_agg_active = 0; + D_DEBUG(DB_EPC, DF_CONT": skip EC aggregation during rebuild %d.\n", DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid), - vos_agg ? "VOS" : "EC", pool->sp_rebuilding); + pool->sp_rebuilding); return false; } From 13c88130ce99bf35896b3975e35998284faf8f0d Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Mon, 28 Aug 2023 17:28:21 +0800 Subject: [PATCH 02/80] DAOS-14211 vos: fix improper assert in umark_removals() (#12910) * DAOS-14211 vos: fix improper assert in umark_removals() Fix an improper assert in unmark_removals(). Signed-off-by: Niu Yawei --- src/container/srv_target.c | 7 ++++--- src/vos/vos_aggregate.c | 12 +++++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 0edbe845e7f..81b9fef4f0c 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -55,10 +55,11 @@ agg_rate_ctl(void *arg) if (dss_ult_exiting(req) || pool->sp_reclaim == DAOS_RECLAIM_DISABLED) return -1; - /* EC aggregation needs to be parsed during rebuilding to avoid the race - * between EC rebuild and EC aggregation. + /* + * XXX temporary workaround: EC aggregation needs to be paused during rebuilding + * to avoid the race between EC rebuild and EC aggregation. **/ - if (pool->sp_rebuilding && cont->sc_ec_agg_active) + if (pool->sp_rebuilding && cont->sc_ec_agg_active && !param->ap_vos_agg) return -1; /* System is idle, let aggregation run in tight mode */ diff --git a/src/vos/vos_aggregate.c b/src/vos/vos_aggregate.c index 2bea0131567..af4783faf7a 100644 --- a/src/vos/vos_aggregate.c +++ b/src/vos/vos_aggregate.c @@ -76,7 +76,7 @@ struct agg_rmv_ent { uint32_t re_aggregate : 1, /* Aggregate of one or more records */ re_child : 1; /* Contained in aggregate record */ /** Refcount of physical records that reference this removal */ - int re_phy_count; + unsigned int re_phy_count; }; /* EV tree logical entry */ @@ -1345,8 +1345,14 @@ unmark_removals(struct agg_merge_window *mw, const struct agg_phy_ent *phy_ent) if (rmv_ent->re_rect.rc_ex.ex_lo > phy_ent->pe_rect.rc_ex.ex_hi) continue; - D_ASSERT(rmv_ent->re_phy_count > 0); - rmv_ent->re_phy_count--; + /* + * Aggregation could abort before processing the invisible record + * which being covered by a removal record, in such case, the removal + * record & physical record are both enqueued but the removal record + * isn't referenced yet. + */ + if (rmv_ent->re_phy_count > 0) + rmv_ent->re_phy_count--; } } From c32445fb4aba40698940ab7675e86bf7c77818d4 Mon Sep 17 00:00:00 2001 From: mjean308 <48688872+mjean308@users.noreply.github.com> Date: Mon, 28 Aug 2023 08:36:19 -0400 Subject: [PATCH 03/80] DAOS-14230 test: Add unique id to dfuse mountpoint (#12954) By adding a unique dir to each dfuse mount this will ensure that every job does not reuse a mount dir. Signed-off-by: Maureen Jean --- src/tests/ftest/util/soak_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tests/ftest/util/soak_utils.py b/src/tests/ftest/util/soak_utils.py index 8690c10784a..577fde705e3 100644 --- a/src/tests/ftest/util/soak_utils.py +++ b/src/tests/ftest/util/soak_utils.py @@ -700,7 +700,9 @@ def start_dfuse(self, pool, container, name=None, job_spec=None): dfuse.bind_cores = self.params.get("cores", dfuse.namespace, None) dfuse.get_params(self) # update dfuse params; mountpoint for each container - mount_dir = dfuse.mount_dir.value + unique = get_random_string(5, self.used) + self.used.append(unique) + mount_dir = dfuse.mount_dir.value + unique dfuse.update_params(mount_dir=mount_dir, pool=pool.identifier, cont=container.identifier) dfuselog = os.path.join( self.soak_log_dir, From b5b4e377cc42cfb287a07db279ae09931caf7d31 Mon Sep 17 00:00:00 2001 From: Jeff Olivier Date: Mon, 28 Aug 2023 08:44:47 -0600 Subject: [PATCH 04/80] DAOS-623 doc: Add some additional Githook docs (#12933) Document some of the requirements for getting full use from the githooks for developers. Signed-off-by: Jeff Olivier Co-authored-by: Dalton Bohning Co-authored-by: Ashley Pittman --- docs/dev/contributing.md | 4 +- utils/githooks/README.md | 99 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 95 insertions(+), 8 deletions(-) diff --git a/docs/dev/contributing.md b/docs/dev/contributing.md index 1e2463324a1..f0c24aa2fdc 100644 --- a/docs/dev/contributing.md +++ b/docs/dev/contributing.md @@ -13,7 +13,9 @@ features, offer to add a feature, or just begin a dialog about DAOS: ## Coding Rules Please check the [coding conventions](https://wiki.daos.io/spaces/DC/pages/4836655701/Coding+Rules) -for code contribution. +for code contribution. See [githooks](../../utils/githooks/README.md) for information about setting up +some automated checks on commit to help with adherence to the rules. + ## Commit Comments diff --git a/utils/githooks/README.md b/utils/githooks/README.md index 03f6e5660f4..0db2a5cb653 100644 --- a/utils/githooks/README.md +++ b/utils/githooks/README.md @@ -1,13 +1,98 @@ -# Git hooks +# About DAOS Git hooks -This directory contains a githooks framework that can be employed locally to automate things like -copyright updates on modified files. +Githooks are a [well documented](https://git-scm.com/docs/githooks) feature +of git that enable various local exectubles to be run during various stages of +the git workflow. -It supports adding local hooks by placing your custom hook in -`utils/githooks/.d/-user-` files. +The DAOS repo contains several built-in githooks that are intended +to help developers conform to DAOS community coding standards and practices +and to avoid some common mistakes in the development cycle. -To use the commit hooks here, do the following locally, or alternatively copy the files into place. +To enable these standard githooks requires a two step process: + +1. Install the hooks + +Configure your core.hookspath as follows (Recommended): ```sh -git config core.hookspath utils/githooks +git config.hookspath utils/githooks ``` + +Additionally, one can copy the files into an already configured path. + +With the first option, any new githooks added to the repository will +automatically run, but possibly require additional software to produce the +desired effect. Additionally, as the branch changes, the githooks +change with it. + +2. Install all of the required tools + +The Githooks framework in DAOS is such that the hooks will all run. +However, some hooks will simply check for required software and are +effectively a noop if such is not installed. + +On many systems, the required packages can be installed through standard means +but customization may be required. Some are specified in +[requirements.txt](../../requirements.txt) so can be installed using +`pip install -r requirements.txt` and `pip install -r utils/cq/requirements.txt` +which can also be done using a virtual environment. The following +packages are used by built-in githooks. + +1. clang-format version 14.0.5 or higher. If the check is unable to parse +the version output, it will fail. Try running +`/site_scons/site_tools/extra/extra.py` to check. +2. pylint +3. flake8 +4. yamllint +5. gofmt + +There is a daos wrapper around pylint at `utils/cq/daos_pylint.py` which will perform standard +pylint checks whilst managing scons and PYTHONPATH setup changes automatically. Installing +the python packages in `utils/cq/requirements.txt` will allow it to test for all the python dependencies. + +It is important to check the output on commit for any errors that may indicate +any one of the required tools is missing. + +Additionally, [find_base.sh](find_base.sh) attempts to determine the base +branch using `gh`, the Github CLI. If this isn't installed, it will use +`master` as the base which can result in a larger diff and more files being +checked than expected. + +## Checks performed by built-in scripts + +### pre-commit + +1. clang-format will update any C/C++ files changed using configuration in +[.clang-format](../../.clang-format). If anything changed, it will exit, +allowing the user to inspect the changes and retry the commit. +2. pylint will check python changes and fail if there are errors. +3. flake8 will check python changes and fail if there are errors. +4. yamllint will check YAML file changes and fail if there are errors. +5. gofmt will check Go files changed and fail if there are errors. +6. Copyrights will be checked and updated if needed. + +### prepare-commit-msg + +1. Checks to see if any submodules have been updated in the patch and +inserts a warning message in a comment. If the change is expected, +the comment can be ignored. If not, it's likely someone else changed +it and an update is needed before commit. In such a case, abort the +commit by exiting without saving. + +### commit-msg + +1. Checks to see if githooks are installed locally and adds a watermark +to the commit message that is checked in CI. It is not fatal but +gatekeepers may ask that githooks be used. + +## Adding user specific hooks + +The framework is extensible. In order to add a custom user hook, a developer +simply must add an executable file using the following naming convention: + +`utils/githooks/.d/-user-` + +This pattern appears in [.gitignore](../../.gitignore) so such files cannot be +checked in. If such a file would be generically useful, however, consider +renaming it to remove `-user` and pushing a pull request and update this +document accordingly. From 305eb486826cc61cf60a8ceb1c497909e3b7ea07 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 28 Aug 2023 21:21:46 +0100 Subject: [PATCH 05/80] DAOS-13625 dfuse: Replace fs_handle with dfuse_info. (#12894) Last change to migrate these structs, this just does a global replace on variable names from one to the other. Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 7 +- src/client/dfuse/dfuse_core.c | 141 +++++++++++++++---------------- src/client/dfuse/dfuse_fuseops.c | 30 +++---- 3 files changed, 87 insertions(+), 91 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index b79186d350f..c0a6453354d 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -364,11 +364,10 @@ struct dfuse_pool { /** Container information * - * This represents a container that DFUSE is accessing. All containers - * will have a valid dfs_handle. + * This represents a container that DFUSE is accessing. All containers will have a valid dfs + * handle. * - * Note this struct used to be dfuse_dfs, hence the dfs_prefix for it's - * members. + * Note this struct used to be dfuse_dfs, hence the dfs_prefix for it's members. * * uuid may be NULL for pool inodes. */ diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index 331e4e90636..f49612c5e4b 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -398,7 +398,7 @@ d_hash_table_ops_t cont_hops = { * Return code is a system errno. */ int -dfuse_pool_connect(struct dfuse_info *fs_handle, const char *label, struct dfuse_pool **_dfp) +dfuse_pool_connect(struct dfuse_info *dfuse_info, const char *label, struct dfuse_pool **_dfp) { struct dfuse_pool *dfp; d_list_t *rlink; @@ -411,7 +411,7 @@ dfuse_pool_connect(struct dfuse_info *fs_handle, const char *label, struct dfuse atomic_init(&dfp->dfp_ref, 1); - DFUSE_TRA_UP(dfp, fs_handle, "dfp"); + DFUSE_TRA_UP(dfp, dfuse_info, "dfp"); /* Handle the case where no identifier is supplied, this is for when dfuse * is started without any pool on the command line. @@ -419,7 +419,7 @@ dfuse_pool_connect(struct dfuse_info *fs_handle, const char *label, struct dfuse if (label[0]) { daos_pool_info_t p_info = {}; - rc = daos_pool_connect(label, fs_handle->di_group, DAOS_PC_RO, &dfp->dfp_poh, + rc = daos_pool_connect(label, dfuse_info->di_group, DAOS_PC_RO, &dfp->dfp_poh, &p_info, NULL); if (rc) { if (rc == -DER_NO_PERM || rc == -DER_NONEXIST) @@ -434,21 +434,21 @@ dfuse_pool_connect(struct dfuse_info *fs_handle, const char *label, struct dfuse uuid_copy(dfp->dfp_pool, p_info.pi_uuid); } - rc = d_hash_table_create_inplace(D_HASH_FT_LRU | D_HASH_FT_EPHEMERAL, 3, fs_handle, + rc = d_hash_table_create_inplace(D_HASH_FT_LRU | D_HASH_FT_EPHEMERAL, 3, dfuse_info, &cont_hops, &dfp->dfp_cont_table); if (rc != -DER_SUCCESS) { DFUSE_TRA_ERROR(dfp, "Failed to create hash table: " DF_RC, DP_RC(rc)); D_GOTO(err_disconnect, rc = daos_der2errno(rc)); } - atomic_fetch_add_relaxed(&fs_handle->di_pool_count, 1); + atomic_fetch_add_relaxed(&dfuse_info->di_pool_count, 1); - rlink = d_hash_rec_find_insert(&fs_handle->di_pool_table, &dfp->dfp_pool, + rlink = d_hash_rec_find_insert(&dfuse_info->di_pool_table, &dfp->dfp_pool, sizeof(dfp->dfp_pool), &dfp->dfp_entry); if (rlink != &dfp->dfp_entry) { DFUSE_TRA_DEBUG(dfp, "Found existing pool, reusing"); - _ph_free(fs_handle, dfp); + _ph_free(dfuse_info, dfp); dfp = container_of(rlink, struct dfuse_pool, dfp_entry); } @@ -1013,43 +1013,43 @@ dfuse_cache_evict(struct dfuse_inode_entry *ie) } int -dfuse_fs_init(struct dfuse_info *fs_handle) +dfuse_fs_init(struct dfuse_info *dfuse_info) { int rc; int i; - D_ALLOC_ARRAY(fs_handle->di_eqt, fs_handle->di_eq_count); - if (fs_handle->di_eqt == NULL) + D_ALLOC_ARRAY(dfuse_info->di_eqt, dfuse_info->di_eq_count); + if (dfuse_info->di_eqt == NULL) D_GOTO(err, rc = -DER_NOMEM); - atomic_init(&fs_handle->di_inode_count, 0); - atomic_init(&fs_handle->di_fh_count, 0); - atomic_init(&fs_handle->di_pool_count, 0); - atomic_init(&fs_handle->di_container_count, 0); + atomic_init(&dfuse_info->di_inode_count, 0); + atomic_init(&dfuse_info->di_fh_count, 0); + atomic_init(&dfuse_info->di_pool_count, 0); + atomic_init(&dfuse_info->di_container_count, 0); - rc = d_hash_table_create_inplace(D_HASH_FT_LRU | D_HASH_FT_EPHEMERAL, 3, fs_handle, - &pool_hops, &fs_handle->di_pool_table); + rc = d_hash_table_create_inplace(D_HASH_FT_LRU | D_HASH_FT_EPHEMERAL, 3, dfuse_info, + &pool_hops, &dfuse_info->di_pool_table); if (rc != 0) D_GOTO(err, rc); - rc = d_hash_table_create_inplace(D_HASH_FT_LRU | D_HASH_FT_EPHEMERAL, 16, fs_handle, - &ie_hops, &fs_handle->dpi_iet); + rc = d_hash_table_create_inplace(D_HASH_FT_LRU | D_HASH_FT_EPHEMERAL, 16, dfuse_info, + &ie_hops, &dfuse_info->dpi_iet); if (rc != 0) D_GOTO(err_pt, rc); - atomic_init(&fs_handle->di_ino_next, 2); - atomic_init(&fs_handle->di_eqt_idx, 0); + atomic_init(&dfuse_info->di_ino_next, 2); + atomic_init(&dfuse_info->di_eqt_idx, 0); - D_SPIN_INIT(&fs_handle->di_lock, 0); + D_SPIN_INIT(&dfuse_info->di_lock, 0); - D_RWLOCK_INIT(&fs_handle->di_forget_lock, 0); + D_RWLOCK_INIT(&dfuse_info->di_forget_lock, 0); - for (i = 0; i < fs_handle->di_eq_count; i++) { - struct dfuse_eq *eqt = &fs_handle->di_eqt[i]; + for (i = 0; i < dfuse_info->di_eq_count; i++) { + struct dfuse_eq *eqt = &dfuse_info->di_eqt[i]; - eqt->de_handle = fs_handle; + eqt->de_handle = dfuse_info; - DFUSE_TRA_UP(eqt, fs_handle, "event_queue"); + DFUSE_TRA_UP(eqt, dfuse_info, "event_queue"); /* Create the semaphore before the eq as there's no way to check if sem_init() * has been called or not and it's invalid to call sem_destroy if it hasn't. This @@ -1068,15 +1068,15 @@ dfuse_fs_init(struct dfuse_info *fs_handle) } } - fs_handle->di_shutdown = false; + dfuse_info->di_shutdown = false; return rc; err_eq: - D_SPIN_DESTROY(&fs_handle->di_lock); - D_RWLOCK_DESTROY(&fs_handle->di_forget_lock); + D_SPIN_DESTROY(&dfuse_info->di_lock); + D_RWLOCK_DESTROY(&dfuse_info->di_forget_lock); - for (i = 0; i < fs_handle->di_eq_count; i++) { - struct dfuse_eq *eqt = &fs_handle->di_eqt[i]; + for (i = 0; i < dfuse_info->di_eq_count; i++) { + struct dfuse_eq *eqt = &dfuse_info->di_eqt[i]; int rc2; if (daos_handle_is_inval(eqt->de_eq)) @@ -1089,11 +1089,11 @@ dfuse_fs_init(struct dfuse_info *fs_handle) sem_destroy(&eqt->de_sem); DFUSE_TRA_DOWN(eqt); } - d_hash_table_destroy_inplace(&fs_handle->dpi_iet, false); + d_hash_table_destroy_inplace(&dfuse_info->dpi_iet, false); err_pt: - d_hash_table_destroy_inplace(&fs_handle->di_pool_table, false); + d_hash_table_destroy_inplace(&dfuse_info->di_pool_table, false); err: - D_FREE(fs_handle->di_eqt); + D_FREE(dfuse_info->di_eqt); return rc; } @@ -1221,7 +1221,7 @@ dfuse_event_release(void *arg) } int -dfuse_fs_start(struct dfuse_info *fs_handle, struct dfuse_cont *dfs) +dfuse_fs_start(struct dfuse_info *dfuse_info, struct dfuse_cont *dfs) { struct fuse_args args = {0}; struct dfuse_inode_entry *ie = NULL; @@ -1238,7 +1238,7 @@ dfuse_fs_start(struct dfuse_info *fs_handle, struct dfuse_cont *dfs) args.argc = 5; - if (fs_handle->di_multi_user) + if (dfuse_info->di_multi_user) args.argc++; /* These allocations are freed later by libfuse so do not use the @@ -1269,7 +1269,7 @@ dfuse_fs_start(struct dfuse_info *fs_handle, struct dfuse_cont *dfs) if (!args.argv[4]) D_GOTO(err, rc = -DER_NOMEM); - if (fs_handle->di_multi_user) { + if (dfuse_info->di_multi_user) { args.argv[5] = strdup("-oallow_other"); if (!args.argv[5]) D_GOTO(err, rc = -DER_NOMEM); @@ -1280,12 +1280,12 @@ dfuse_fs_start(struct dfuse_info *fs_handle, struct dfuse_cont *dfs) if (!ie) D_GOTO(err, rc = -DER_NOMEM); - DFUSE_TRA_UP(ie, fs_handle, "root_inode"); + DFUSE_TRA_UP(ie, dfuse_info, "root_inode"); ie->ie_dfs = dfs; ie->ie_root = true; ie->ie_parent = 1; - dfuse_ie_init(fs_handle, ie); + dfuse_ie_init(dfuse_info, ie); if (dfs->dfs_ops == &dfuse_dfs_ops) { rc = dfs_lookup(dfs->dfs_ns, "/", O_RDWR, &ie->ie_obj, NULL, &ie->ie_stat); @@ -1301,25 +1301,22 @@ dfuse_fs_start(struct dfuse_info *fs_handle, struct dfuse_cont *dfs) ie->ie_stat.st_ino = 1; dfs->dfs_ino = ie->ie_stat.st_ino; - rc = d_hash_rec_insert(&fs_handle->dpi_iet, - &ie->ie_stat.st_ino, - sizeof(ie->ie_stat.st_ino), - &ie->ie_htl, - false); + rc = d_hash_rec_insert(&dfuse_info->dpi_iet, &ie->ie_stat.st_ino, + sizeof(ie->ie_stat.st_ino), &ie->ie_htl, false); D_ASSERT(rc == -DER_SUCCESS); - rc = d_slab_init(&fs_handle->di_slab, fs_handle); + rc = d_slab_init(&dfuse_info->di_slab, dfuse_info); if (rc != -DER_SUCCESS) D_GOTO(err_ie_remove, rc); - for (i = 0; i < fs_handle->di_eq_count; i++) { - struct dfuse_eq *eqt = &fs_handle->di_eqt[i]; + for (i = 0; i < dfuse_info->di_eq_count; i++) { + struct dfuse_eq *eqt = &dfuse_info->di_eqt[i]; - rc = d_slab_register(&fs_handle->di_slab, &read_slab, eqt, &eqt->de_read_slab); + rc = d_slab_register(&dfuse_info->di_slab, &read_slab, eqt, &eqt->de_read_slab); if (rc != -DER_SUCCESS) D_GOTO(err_threads, rc); - rc = d_slab_register(&fs_handle->di_slab, &write_slab, eqt, &eqt->de_write_slab); + rc = d_slab_register(&dfuse_info->di_slab, &write_slab, eqt, &eqt->de_write_slab); if (rc != -DER_SUCCESS) D_GOTO(err_threads, rc); @@ -1330,15 +1327,15 @@ dfuse_fs_start(struct dfuse_info *fs_handle, struct dfuse_cont *dfs) pthread_setname_np(eqt->de_thread, "dfuse_progress"); } - rc = dfuse_launch_fuse(fs_handle, &args); + rc = dfuse_launch_fuse(dfuse_info, &args); if (rc == -DER_SUCCESS) { fuse_opt_free_args(&args); return rc; } err_threads: - for (i = 0; i < fs_handle->di_eq_count; i++) { - struct dfuse_eq *eqt = &fs_handle->di_eqt[i]; + for (i = 0; i < dfuse_info->di_eq_count; i++) { + struct dfuse_eq *eqt = &dfuse_info->di_eqt[i]; if (!eqt->de_thread) continue; @@ -1348,14 +1345,14 @@ dfuse_fs_start(struct dfuse_info *fs_handle, struct dfuse_cont *dfs) sem_destroy(&eqt->de_sem); } - d_slab_destroy(&fs_handle->di_slab); + d_slab_destroy(&dfuse_info->di_slab); err_ie_remove: dfs_release(ie->ie_obj); - d_hash_rec_delete_at(&fs_handle->dpi_iet, &ie->ie_htl); + d_hash_rec_delete_at(&dfuse_info->dpi_iet, &ie->ie_htl); err_ie: - dfuse_ie_free(fs_handle, ie); + dfuse_ie_free(dfuse_info, ie); err: - DFUSE_TRA_ERROR(fs_handle, "Failed to start dfuse, rc: " DF_RC, DP_RC(rc)); + DFUSE_TRA_ERROR(dfuse_info, "Failed to start dfuse, rc: " DF_RC, DP_RC(rc)); fuse_opt_free_args(&args); return rc; } @@ -1448,7 +1445,7 @@ dfuse_pool_close_cb(d_list_t *rlink, void *handle) * operation. */ int -dfuse_fs_stop(struct dfuse_info *fs_handle) +dfuse_fs_stop(struct dfuse_info *dfuse_info) { d_list_t *rlink; uint64_t refs = 0; @@ -1456,34 +1453,34 @@ dfuse_fs_stop(struct dfuse_info *fs_handle) int rc; int i; - DFUSE_TRA_INFO(fs_handle, "Flushing inode table"); + DFUSE_TRA_INFO(dfuse_info, "Flushing inode table"); - fs_handle->di_shutdown = true; + dfuse_info->di_shutdown = true; - for (i = 0; i < fs_handle->di_eq_count; i++) { - struct dfuse_eq *eqt = &fs_handle->di_eqt[i]; + for (i = 0; i < dfuse_info->di_eq_count; i++) { + struct dfuse_eq *eqt = &dfuse_info->di_eqt[i]; sem_post(&eqt->de_sem); } - for (i = 0; i < fs_handle->di_eq_count; i++) { - struct dfuse_eq *eqt = &fs_handle->di_eqt[i]; + for (i = 0; i < dfuse_info->di_eq_count; i++) { + struct dfuse_eq *eqt = &dfuse_info->di_eqt[i]; pthread_join(eqt->de_thread, NULL); sem_destroy(&eqt->de_sem); } - rc = d_hash_table_traverse(&fs_handle->dpi_iet, ino_flush, fs_handle); + rc = d_hash_table_traverse(&dfuse_info->dpi_iet, ino_flush, dfuse_info); - DFUSE_TRA_INFO(fs_handle, "Flush complete: "DF_RC, DP_RC(rc)); + DFUSE_TRA_INFO(dfuse_info, "Flush complete: " DF_RC, DP_RC(rc)); - DFUSE_TRA_INFO(fs_handle, "Draining inode table"); + DFUSE_TRA_INFO(dfuse_info, "Draining inode table"); do { struct dfuse_inode_entry *ie; uint32_t ref; - rlink = d_hash_rec_first(&fs_handle->dpi_iet); + rlink = d_hash_rec_first(&dfuse_info->dpi_iet); if (!rlink) break; @@ -1498,18 +1495,18 @@ dfuse_fs_stop(struct dfuse_info *fs_handle) DFUSE_TRA_DEBUG(ie, "Dropping %d", ref); refs += ref; - d_hash_rec_ndecref(&fs_handle->dpi_iet, ref, rlink); + d_hash_rec_ndecref(&dfuse_info->dpi_iet, ref, rlink); handles++; } while (rlink); if (handles && rc != -DER_SUCCESS && rc != -DER_NO_HDL) - DFUSE_TRA_WARNING(fs_handle, "dropped %lu refs on %u inodes", refs, handles); + DFUSE_TRA_WARNING(dfuse_info, "dropped %lu refs on %u inodes", refs, handles); else - DFUSE_TRA_INFO(fs_handle, "dropped %lu refs on %u inodes", refs, handles); + DFUSE_TRA_INFO(dfuse_info, "dropped %lu refs on %u inodes", refs, handles); - d_hash_table_traverse(&fs_handle->di_pool_table, dfuse_pool_close_cb, NULL); + d_hash_table_traverse(&dfuse_info->di_pool_table, dfuse_pool_close_cb, NULL); - d_slab_destroy(&fs_handle->di_slab); + d_slab_destroy(&dfuse_info->di_slab); return 0; } diff --git a/src/client/dfuse/dfuse_fuseops.c b/src/client/dfuse/dfuse_fuseops.c index 208b85a21a1..8458b91d2cb 100644 --- a/src/client/dfuse/dfuse_fuseops.c +++ b/src/client/dfuse/dfuse_fuseops.c @@ -60,47 +60,47 @@ dfuse_show_flags(void *handle, unsigned int in) static void dfuse_fuse_init(void *arg, struct fuse_conn_info *conn) { - struct dfuse_info *fs_handle = arg; + struct dfuse_info *dfuse_info = arg; - DFUSE_TRA_INFO(fs_handle, "Fuse configuration"); + DFUSE_TRA_INFO(dfuse_info, "Fuse configuration"); - DFUSE_TRA_INFO(fs_handle, "Proto %d %d", conn->proto_major, conn->proto_minor); + DFUSE_TRA_INFO(dfuse_info, "Proto %d %d", conn->proto_major, conn->proto_minor); /* These are requests dfuse makes to the kernel, but are then capped by the kernel itself, * for max_read zero means "as large as possible" which is what we want, but then dfuse * does not know how large to pre-allocate any buffers. */ - DFUSE_TRA_INFO(fs_handle, "max read %#x", conn->max_read); - DFUSE_TRA_INFO(fs_handle, "max write %#x", conn->max_write); - DFUSE_TRA_INFO(fs_handle, "readahead %#x", conn->max_readahead); + DFUSE_TRA_INFO(dfuse_info, "max read %#x", conn->max_read); + DFUSE_TRA_INFO(dfuse_info, "max write %#x", conn->max_write); + DFUSE_TRA_INFO(dfuse_info, "readahead %#x", conn->max_readahead); #if HAVE_CACHE_READDIR - DFUSE_TRA_INFO(fs_handle, "kernel readdir cache support compiled in"); + DFUSE_TRA_INFO(dfuse_info, "kernel readdir cache support compiled in"); #else - DFUSE_TRA_INFO(fs_handle, "no support for kernel readdir cache available"); + DFUSE_TRA_INFO(dfuse_info, "no support for kernel readdir cache available"); #endif - DFUSE_TRA_INFO(fs_handle, "Capability supported by kernel %#x", conn->capable); + DFUSE_TRA_INFO(dfuse_info, "Capability supported by kernel %#x", conn->capable); - dfuse_show_flags(fs_handle, conn->capable); + dfuse_show_flags(dfuse_info, conn->capable); - DFUSE_TRA_INFO(fs_handle, "Capability requested %#x", conn->want); + DFUSE_TRA_INFO(dfuse_info, "Capability requested %#x", conn->want); conn->want |= FUSE_CAP_READDIRPLUS; conn->want |= FUSE_CAP_READDIRPLUS_AUTO; conn->time_gran = 1; - if (fs_handle->di_wb_cache) + if (dfuse_info->di_wb_cache) conn->want |= FUSE_CAP_WRITEBACK_CACHE; - dfuse_show_flags(fs_handle, conn->want); + dfuse_show_flags(dfuse_info, conn->want); conn->max_background = 16; conn->congestion_threshold = 8; - DFUSE_TRA_INFO(fs_handle, "max_background %d", conn->max_background); - DFUSE_TRA_INFO(fs_handle, "congestion_threshold %d", conn->congestion_threshold); + DFUSE_TRA_INFO(dfuse_info, "max_background %d", conn->max_background); + DFUSE_TRA_INFO(dfuse_info, "congestion_threshold %d", conn->congestion_threshold); } void From 88d4a5a7f513419cba83e332d2a0833ef80cb836 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard-intel@users.noreply.github.com> Date: Tue, 29 Aug 2023 05:06:15 +0200 Subject: [PATCH 06/80] DAOS-13967 engine: Fix NVMe storage leakage with md-on-ssd (#12715) Fixed RDB blob leak. --------- Signed-off-by: Cedric Koch-Hofer Signed-off-by: Li Wei Co-authored-by: Cedric Koch-Hofer Co-authored-by: Li Wei --- src/control/server/ctl_storage_rpc.go | 16 ++++++++-------- src/include/daos_srv/rsvc.h | 1 + src/mgmt/srv_target.c | 12 ++++++++++++ src/rsvc/srv.c | 12 +++++++++--- src/tests/ftest/pool/create.yaml | 2 +- src/vos/vos_pool.c | 5 +++-- utils/ansible/ftest/templates/daos-make.sh.j2 | 18 ++++++++++++------ 7 files changed, 46 insertions(+), 20 deletions(-) diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index c949ac927c0..cf0ca43be13 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -309,15 +309,15 @@ func (c *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdju if dev.GetRoleBits()&storage.BdevRoleMeta != 0 { clusterCount := getClusterCount(dev.GetMetaSize(), engineTargetNb, clusterSize) - c.log.Tracef("Removing %d Metadata clusters from the usable size of the SMD device %s (rank %d, ctlr %s): ", - clusterCount, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) + c.log.Tracef("Removing %d Metadata clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s): ", + clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) subtrClusterCount += clusterCount } if dev.GetRoleBits()&storage.BdevRoleWAL != 0 { clusterCount := getClusterCount(dev.GetMetaWalSize(), engineTargetNb, clusterSize) - c.log.Tracef("Removing %d Metadata WAL clusters from the usable size of the SMD device %s (rank %d, ctlr %s): ", - clusterCount, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) + c.log.Tracef("Removing %d Metadata WAL clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s): ", + clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) subtrClusterCount += clusterCount } @@ -327,15 +327,15 @@ func (c *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdju if dev.GetRoleBits()&storage.BdevRoleMeta != 0 { clusterCount := getClusterCount(dev.GetRdbSize(), 1, clusterSize) - c.log.Tracef("Removing %d RDB clusters the usable size of the SMD device %s (rank %d, ctlr %s)", - clusterCount, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) + c.log.Tracef("Removing %d RDB clusters (cluster size: %d) the usable size of the SMD device %s (rank %d, ctlr %s)", + clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) subtrClusterCount += clusterCount } if dev.GetRoleBits()&storage.BdevRoleWAL != 0 { clusterCount := getClusterCount(dev.GetRdbWalSize(), 1, clusterSize) - c.log.Tracef("Removing %d RDB WAL clusters from the usable size of the SMD device %s (rank %d, ctlr %s)", - clusterCount, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) + c.log.Tracef("Removing %d RDB WAL clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s)", + clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) subtrClusterCount += clusterCount } diff --git a/src/include/daos_srv/rsvc.h b/src/include/daos_srv/rsvc.h index 781ba04b6a3..aeaee9b94e5 100644 --- a/src/include/daos_srv/rsvc.h +++ b/src/include/daos_srv/rsvc.h @@ -107,6 +107,7 @@ struct ds_rsvc { int s_ref; ABT_mutex s_mutex; /* for the following members */ bool s_stop; + bool s_destroy; /* when putting last ref */ uint64_t s_term; /**< leader term */ enum ds_rsvc_state s_state; ABT_cond s_state_cv; diff --git a/src/mgmt/srv_target.c b/src/mgmt/srv_target.c index 49e017df269..dde422699a2 100644 --- a/src/mgmt/srv_target.c +++ b/src/mgmt/srv_target.c @@ -275,6 +275,11 @@ cleanup_leftover_cb(uuid_t uuid, void *arg) /* destroy blobIDs */ D_DEBUG(DB_MGMT, "Clear SPDK blobs for pool "DF_UUID"\n", DP_UUID(uuid)); + rc = vos_pool_kill(uuid, VOS_POF_RDB); + if (rc != 0) { + D_ERROR(DF_UUID": kill pool service VOS pool: "DF_RC"\n", DP_UUID(uuid), DP_RC(rc)); + return rc; + } uuid_copy(id.uuid, uuid); rc = dss_thread_collective(tgt_kill_pool, &id, 0); if (rc != 0) { @@ -1298,6 +1303,12 @@ ds_mgmt_hdlr_tgt_destroy(crt_rpc_t *td_req) rc = access(path, F_OK); if (rc >= 0) { /** target is still there, destroy it */ + rc = vos_pool_kill(td_in->td_pool_uuid, VOS_POF_RDB); + if (rc != 0 && rc != -DER_BUSY) { + D_ERROR(DF_UUID": kill pool service VOS pool: "DF_RC"\n", + DP_UUID(td_in->td_pool_uuid), DP_RC(rc)); + goto out_path; + } rc = tgt_destroy(td_req->cr_input, path); } else if (errno == ENOENT) { char *zombie; @@ -1319,6 +1330,7 @@ ds_mgmt_hdlr_tgt_destroy(crt_rpc_t *td_req) rc = daos_errno2der(errno); } +out_path: D_FREE(path); out: td_out->td_rc = rc; diff --git a/src/rsvc/srv.c b/src/rsvc/srv.c index 2eb4d5587a4..1d0b27aeea2 100644 --- a/src/rsvc/srv.c +++ b/src/rsvc/srv.c @@ -167,8 +167,11 @@ ds_rsvc_put(struct ds_rsvc *svc) D_ASSERTF(svc->s_ref > 0, "%d\n", svc->s_ref); svc->s_ref--; if (svc->s_ref == 0) { - if (svc->s_db != NULL) /* "nodb" */ + if (svc->s_db != NULL) { /* "nodb" */ rdb_stop_and_close(svc->s_db); + if (svc->s_destroy) + rdb_destroy(svc->s_db_path, svc->s_db_uuid); /* ignore any error */ + } fini_free(svc); } } @@ -871,6 +874,7 @@ ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t goto out; } } + D_ASSERT(!svc->s_destroy); if (svc->s_stop) rc = -DER_CANCELED; else @@ -927,8 +931,10 @@ stop(struct ds_rsvc *svc, bool destroy) while (svc->s_state != DS_RSVC_DOWN) ABT_cond_wait(svc->s_state_cv, svc->s_mutex); - if (destroy) - rc = remove(svc->s_db_path); + if (destroy) { + D_ASSERT(d_list_empty(&svc->s_entry)); + svc->s_destroy = true; + } ABT_mutex_unlock(svc->s_mutex); ds_rsvc_put(svc); diff --git a/src/tests/ftest/pool/create.yaml b/src/tests/ftest/pool/create.yaml index e7351582d50..1b41ca681a5 100644 --- a/src/tests/ftest/pool/create.yaml +++ b/src/tests/ftest/pool/create.yaml @@ -5,7 +5,7 @@ timeouts: test_create_max_pool_scm_only: 180 test_create_max_pool: 300 test_create_no_space: 300 - test_create_no_space_loop: 2160 + test_create_no_space_loop: 3500 server_config: name: daos_server engines_per_host: 2 diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index 7cdc64275af..8c5e4ff7ff5 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -793,7 +793,7 @@ pool_hop_free(struct d_ulink *hlink) } if (pool->vp_dying) - vos_delete_blob(pool->vp_id, 0); + vos_delete_blob(pool->vp_id, pool->vp_rdb ? VOS_POF_RDB : 0); D_FREE(pool); } @@ -1101,7 +1101,8 @@ vos_pool_kill(uuid_t uuid, unsigned int flags) /* Blob destroy will be deferred to last vos_pool ref drop */ return -DER_BUSY; } - D_DEBUG(DB_MGMT, "No open handles, OK to delete\n"); + D_DEBUG(DB_MGMT, DF_UUID": No open handles, OK to delete: flags=%x\n", DP_UUID(uuid), + flags); vos_delete_blob(uuid, flags); return 0; diff --git a/utils/ansible/ftest/templates/daos-make.sh.j2 b/utils/ansible/ftest/templates/daos-make.sh.j2 index 7db538202fb..ee4e9dec7de 100644 --- a/utils/ansible/ftest/templates/daos-make.sh.j2 +++ b/utils/ansible/ftest/templates/daos-make.sh.j2 @@ -325,12 +325,18 @@ if [[ $MPICH_PATH ]] ; then prepend-path PKG_CONFIG_PATH $DAOS_INSTALL_DIR/man/lib/pkgconfig EOF - if [[ -f /usr/share/modulefiles/mpi/mpich-x86_64 && ! -f /usr/share/modulefiles/mpi/mpich-x86_64.orig ]] ; then - warning "MPICH already installed: backup module file /usr/share/modulefiles/mpi/mpich-x86_64.orig" - run sudo mv /usr/share/modulefiles/mpi/mpich-x86_64 /usr/share/modulefiles/mpi/mpich-x86_64.orig - fi - run sudo mkdir -p /usr/share/modulefiles/mpi - run sudo ln -fs "$DAOS_INSTALL_DIR/share/modulefiles/mpich-x86_64" /usr/share/modulefiles/mpi/mpich-x86_64 + { + cat <<- EOF + set -e -o pipefail + + if [[ -f /usr/share/modulefiles/mpi/mpich-x86_64 && ! -f /usr/share/modulefiles/mpi/mpich-x86_64.orig ]] ; then + echo "[WARNING] MPICH already installed: backup module file /usr/share/modulefiles/mpi/mpich-x86_64.orig" + mv /usr/share/modulefiles/mpi/mpich-x86_64 /usr/share/modulefiles/mpi/mpich-x86_64.orig + fi + mkdir -p /usr/share/modulefiles/mpi + ln -fs "$DAOS_INSTALL_DIR/share/modulefiles/mpich-x86_64" /usr/share/modulefiles/mpi/mpich-x86_64 + EOF + } | run $CLUSH_EXE $CLUSH_OPTS -l root -w $CLIENTS_LIST bash -s fi if [[ $HDF5_PATH ]] ; then From deef72fcde009f3c2507e8c851957566a9674e75 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 29 Aug 2023 10:25:36 +0100 Subject: [PATCH 07/80] DAOS-14155 gurt: Add DL_CDEBUG macro. (#12880) Update usage in lots of places. This moves us closer to being able to properly support rc value printing consistency in log files. Signed-off-by: Ashley Pittman --- src/bio/bio_device.c | 5 +-- src/bio/bio_xstream.c | 10 ++--- src/bio/smd/smd_device.c | 10 ++--- src/bio/smd/smd_pool.c | 5 +-- src/cart/crt_iv.c | 22 +++------- src/client/array/dc_array.c | 7 ++- src/client/dfs/dfs.c | 4 +- src/container/container_iv.c | 19 +++----- src/container/srv_container.c | 13 +++--- src/container/srv_target.c | 13 +++--- src/include/gurt/debug.h | 8 ++++ src/mgmt/cli_mgmt.c | 11 +++-- src/object/cli_obj.c | 32 +++++++------- src/object/obj_tx.c | 6 +-- src/object/srv_ec_aggregate.c | 16 +++---- src/object/srv_obj.c | 81 ++++++++++++++++------------------- src/object/srv_obj_remote.c | 7 ++- src/pipeline/srv_pipeline.c | 16 +++---- src/pool/cli.c | 12 +++--- src/pool/srv_iv.c | 5 +-- src/pool/srv_pool.c | 4 +- src/pool/srv_target.c | 10 ++--- src/rebuild/rebuild_iv.c | 6 +-- src/rebuild/scan.c | 7 ++- src/vos/vos_aggregate.c | 33 +++++++------- src/vos/vos_dtx.c | 28 ++++++------ src/vos/vos_ilog.c | 3 +- src/vos/vos_obj.c | 4 +- src/vos/vos_pool.c | 3 +- utils/cq/d_logging_check.py | 4 +- 30 files changed, 177 insertions(+), 227 deletions(-) diff --git a/src/bio/bio_device.c b/src/bio/bio_device.c index f02a923b82c..3176a3bd06d 100644 --- a/src/bio/bio_device.c +++ b/src/bio/bio_device.c @@ -59,9 +59,8 @@ revive_dev(struct bio_xs_context *xs_ctxt, struct bio_bdev *d_bdev) NULL, 0); if (rc != 0) /* DER_NOSYS indicates that VMD-LED control is not enabled */ - D_CDEBUG(rc == -DER_NOSYS, DB_MGMT, DLOG_ERR, - "Reset LED on device:" DF_UUID " failed, " DF_RC "\n", - DP_UUID(d_bdev->bb_uuid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOSYS, DB_MGMT, DLOG_ERR, rc, + "Reset LED on device:" DF_UUID " failed", DP_UUID(d_bdev->bb_uuid)); return 0; } diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 84ee2dde9c7..18e8b298c99 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -1516,9 +1516,7 @@ bio_xsctxt_free(struct bio_xs_context *ctxt) */ rc = xs_poll_completion(ctxt, &cp_arg.cca_inflights, bio_spdk_subsys_timeout); - D_CDEBUG(rc == 0, DB_MGMT, DLOG_ERR, - "SPDK subsystems finalized. "DF_RC"\n", - DP_RC(rc)); + DL_CDEBUG(rc == 0, DB_MGMT, DLOG_ERR, rc, "SPDK subsystems finalized"); nvme_glb.bd_init_thread = NULL; @@ -1875,9 +1873,9 @@ bio_led_event_monitor(struct bio_xs_context *ctxt, uint64_t now) (unsigned int)CTL__LED_ACTION__RESET, NULL, 0); if (rc != 0) /* DER_NOSYS indicates that VMD-LED control is not enabled */ - D_CDEBUG(rc == -DER_NOSYS, DB_MGMT, DLOG_ERR, - "Reset LED on device:" DF_UUID " failed, " DF_RC "\n", - DP_UUID(d_bdev->bb_uuid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOSYS, DB_MGMT, DLOG_ERR, rc, + "Reset LED on device:" DF_UUID " failed", + DP_UUID(d_bdev->bb_uuid)); } } } diff --git a/src/bio/smd/smd_device.c b/src/bio/smd/smd_device.c index 3f6e906a578..6de9ac6b53c 100644 --- a/src/bio/smd/smd_device.c +++ b/src/bio/smd/smd_device.c @@ -172,9 +172,8 @@ smd_dev_get_info(struct d_uuid *id, struct smd_dev_info **dev_info) rc = smd_db_fetch(TABLE_DEV, id, sizeof(*id), &dev, sizeof(dev)); if (rc) { - D_CDEBUG(rc != -DER_NONEXIST, DLOG_ERR, DB_MGMT, - "Fetch dev "DF_UUID" failed. "DF_RC"\n", - DP_UUID(&id->uuid), DP_RC(rc)); + DL_CDEBUG(rc != -DER_NONEXIST, DLOG_ERR, DB_MGMT, rc, + "Fetch dev " DF_UUID " failed", DP_UUID(&id->uuid)); return rc; } @@ -208,9 +207,8 @@ smd_dev_get_by_tgt(uint32_t tgt_id, enum smd_dev_type st, struct smd_dev_info ** smd_db_lock(); rc = smd_db_fetch(TABLE_TGTS[st], &tgt_id, sizeof(tgt_id), &id, sizeof(id)); if (rc) { - D_CDEBUG(rc != -DER_NONEXIST, DLOG_ERR, DB_MGMT, - "Fetch target %d failed. "DF_RC"\n", tgt_id, - DP_RC(rc)); + DL_CDEBUG(rc != -DER_NONEXIST, DLOG_ERR, DB_MGMT, rc, "Fetch target %d failed", + tgt_id); goto out; } rc = smd_dev_get_info(&id, dev_info); diff --git a/src/bio/smd/smd_pool.c b/src/bio/smd/smd_pool.c index 21e14094f3d..7f709466b41 100644 --- a/src/bio/smd/smd_pool.c +++ b/src/bio/smd/smd_pool.c @@ -278,9 +278,8 @@ pool_get_blob(uuid_t pool_id, uint32_t tgt_id, char *table_name, uint64_t *blob_ smd_db_lock(); rc = smd_db_fetch(table_name, &id, sizeof(id), &pool, sizeof(pool)); if (rc) { - D_CDEBUG(rc != -DER_NONEXIST, DLOG_ERR, DB_MGMT, - "Fetch pool "DF_UUID" failed. "DF_RC"\n", - DP_UUID(&id.uuid), DP_RC(rc)); + DL_CDEBUG(rc != -DER_NONEXIST, DLOG_ERR, DB_MGMT, rc, + "Fetch pool " DF_UUID " failed", DP_UUID(&id.uuid)); goto out; } diff --git a/src/cart/crt_iv.c b/src/cart/crt_iv.c index eab2c7fe583..840aca97845 100644 --- a/src/cart/crt_iv.c +++ b/src/cart/crt_iv.c @@ -1650,9 +1650,7 @@ crt_iv_fetch(crt_iv_namespace_t ivns, uint32_t class_id, rc = iv_ops->ivo_on_hash(ivns_internal, iv_key, &root_rank); D_RWLOCK_UNLOCK(&ivns_internal->cii_grp_priv->gp_rwlock); if (rc != 0) { - D_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, - "Failed to get hash, rc="DF_RC"\n", - DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, rc, "Failed to get hash"); D_GOTO(exit, rc); } @@ -1759,9 +1757,7 @@ crt_iv_fetch(crt_iv_namespace_t ivns, uint32_t class_id, if (put_needed) iv_ops->ivo_on_put(ivns, iv_value, user_priv); - D_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, - "Failed to issue IV fetch, rc="DF_RC"\n", - DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, rc, "Failed to issue IV fetch"); if (cb_info) { IVNS_DECREF(cb_info->ifc_ivns_internal); @@ -3277,9 +3273,7 @@ crt_iv_update_internal(crt_iv_namespace_t ivns, uint32_t class_id, rc = iv_ops->ivo_on_hash(ivns, iv_key, &root_rank); D_RWLOCK_UNLOCK(&ivns_internal->cii_grp_priv->gp_rwlock); if (rc != 0) { - D_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, - "ivo_on_hash() failed, rc="DF_RC"\n", - DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, rc, "ivo_on_hash() failed"); D_GOTO(exit, rc); } @@ -3370,10 +3364,8 @@ crt_iv_update_internal(crt_iv_namespace_t ivns, uint32_t class_id, D_GOTO(exit, rc); } else { - D_CDEBUG(rc == -DER_NONEXIST || rc == -DER_NOTLEADER, - DLOG_INFO, DLOG_ERR, - "ivo_on_update failed with rc = "DF_RC"\n", - DP_RC(rc)); + DL_CDEBUG(rc == -DER_NONEXIST || rc == -DER_NOTLEADER, DLOG_INFO, DLOG_ERR, rc, + "ivo_on_update failed"); update_comp_cb(ivns, class_id, iv_key, NULL, iv_value, rc, cb_arg); @@ -3466,9 +3458,7 @@ crt_iv_get_nchildren(crt_iv_namespace_t ivns, uint32_t class_id, } rc = iv_ops->ivo_on_hash(ivns, iv_key, &root_rank); if (rc != 0) { - D_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, - "ivo_on_hash() failed, rc="DF_RC"\n", - DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, rc, "ivo_on_hash() failed"); D_GOTO(exit, rc); } diff --git a/src/client/array/dc_array.c b/src/client/array/dc_array.c index 820bc7bd0cc..b96bdfa7c94 100644 --- a/src/client/array/dc_array.c +++ b/src/client/array/dc_array.c @@ -189,8 +189,7 @@ create_handle_cb(tse_task_t *task, void *data) int rc = task->dt_result; if (rc != 0) { - D_CDEBUG(rc == -DER_EXIST, DLOG_DBG, DLOG_ERR, - "Failed to create array obj "DF_RC"\n", DP_RC(rc)); + DL_CDEBUG(rc == -DER_EXIST, DLOG_DBG, DLOG_ERR, rc, "Failed to create array obj"); D_GOTO(err_obj, rc); } @@ -615,8 +614,8 @@ open_handle_cb(tse_task_t *task, void *data) int rc = task->dt_result; if (rc != 0) { - D_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, - "Failed to open array object "DF_RC"\n", DP_RC(rc)); + DL_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, rc, + "Failed to open array object"); return rc; } diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 07969752143..3ae79220ff2 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -6312,8 +6312,8 @@ dfs_getxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, void *value, &iod, NULL, NULL, NULL); } if (rc) { - D_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, - "Failed to fetch xattr '%s' " DF_RC "\n", name, DP_RC(rc)); + DL_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, rc, "Failed to fetch xattr '%s'", + name); D_GOTO(close, rc = daos_der2errno(rc)); } diff --git a/src/container/container_iv.c b/src/container/container_iv.c index 7143880206c..c6d09fa644a 100644 --- a/src/container/container_iv.c +++ b/src/container/container_iv.c @@ -661,9 +661,8 @@ cont_iv_ent_update(struct ds_iv_entry *entry, struct ds_iv_key *key, out: if (rc < 0 && rc != -DER_IVCB_FORWARD) - D_CDEBUG(rc == -DER_NONEXIST || rc == -DER_NOTLEADER, - DB_ANY, DLOG_ERR, - "failed to insert: rc "DF_RC"\n", DP_RC(rc)); + DL_CDEBUG(rc == -DER_NONEXIST || rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, rc, + "failed to insert"); return rc; } @@ -759,9 +758,8 @@ cont_iv_fetch(void *ns, int class_id, uuid_t key_uuid, civ_key->entry_size = entry_size; rc = ds_iv_fetch(ns, &key, cont_iv ? &sgl : NULL, retry); if (rc) - D_CDEBUG(rc == -DER_NOTLEADER, DB_MGMT, DLOG_ERR, - DF_UUID" iv fetch failed "DF_RC"\n", - DP_UUID(key_uuid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER, DB_MGMT, DLOG_ERR, rc, DF_UUID " iv fetch failed", + DP_UUID(key_uuid)); return rc; } @@ -792,10 +790,8 @@ cont_iv_update(void *ns, int class_id, uuid_t key_uuid, civ_key->entry_size = cont_iv_len; rc = ds_iv_update(ns, &key, &sgl, shortcut, sync_mode, 0, retry); if (rc) - D_CDEBUG(rc == -DER_NOTLEADER || rc == -DER_NONEXIST, - DB_ANY, DLOG_ERR, - DF_UUID" iv update failed "DF_RC"\n", - DP_UUID(key_uuid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER || rc == -DER_NONEXIST, DB_ANY, DLOG_ERR, rc, + DF_UUID " iv update failed", DP_UUID(key_uuid)); return rc; } @@ -1434,8 +1430,7 @@ cont_iv_prop_fetch_ult(void *data) iv_entry, iv_entry_size, iv_entry_size, false /* retry */); if (rc) { - D_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, - "cont_iv_fetch failed "DF_RC"\n", DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, rc, "cont_iv_fetch failed"); D_GOTO(out, rc); } diff --git a/src/container/srv_container.c b/src/container/srv_container.c index d2f86d785a9..6dff9a62a91 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1865,12 +1865,9 @@ cont_agg_eph_leader_ult(void *arg) ec_agg->ea_cont_uuid, min_eph); if (rc) { - D_CDEBUG(rc == -DER_NONEXIST, - DLOG_INFO, DLOG_ERR, - DF_CONT": refresh failed: "DF_RC"\n", - DP_CONT(svc->cs_pool_uuid, - ec_agg->ea_cont_uuid), - DP_RC(rc)); + DL_CDEBUG(rc == -DER_NONEXIST, DLOG_INFO, DLOG_ERR, rc, + DF_CONT ": refresh failed", + DP_CONT(svc->cs_pool_uuid, ec_agg->ea_cont_uuid)); /* If there are network error or pool map inconsistency, * let's skip the following eph sync, which will fail @@ -4864,8 +4861,8 @@ ds_cont_rf_check(uuid_t pool_uuid, uuid_t cont_uuid, struct rdb_tx *tx) rc = ds_pool_rf_verify(pool, stat.dcs_pm_ver, daos_cont_prop2redunlvl(prop), daos_cont_prop2redunfac(prop)); if (rc != -DER_RF) { - D_CDEBUG(rc == 0, DB_MD, DLOG_ERR, DF_CONT", verify" DF_RC"\n", - DP_CONT(pool_uuid, cont_uuid), DP_RC(rc)); + DL_CDEBUG(rc == 0, DB_MD, DLOG_ERR, rc, DF_CONT ", verify", + DP_CONT(pool_uuid, cont_uuid)); D_GOTO(out, rc); } diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 81b9fef4f0c..e06338bdeeb 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -476,10 +476,9 @@ cont_aggregate_interval(struct ds_cont_child *cont, cont_aggregate_cb_t cb, if (rc == -DER_SHUTDOWN) { break; /* pool destroyed */ } else if (rc < 0) { - D_CDEBUG(rc == -DER_BUSY, DB_EPC, DLOG_ERR, - DF_CONT": VOS aggregate failed. "DF_RC"\n", - DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid), - DP_RC(rc)); + DL_CDEBUG(rc == -DER_BUSY, DB_EPC, DLOG_ERR, rc, + DF_CONT ": VOS aggregate failed", + DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid)); } else if (sched_req_space_check(req) != SCHED_SPACE_PRESS_NONE) { /* Don't sleep too long when there is space pressure */ msecs = 2ULL * 100; @@ -860,9 +859,9 @@ cont_child_start(struct ds_pool_child *pool_child, const uuid_t co_uuid, pool_child->spc_uuid, true /* create */, &cont_child); if (rc) { - D_CDEBUG(rc != -DER_NONEXIST, DLOG_ERR, DB_MD, - DF_CONT"[%d]: Load container error:%d\n", - DP_CONT(pool_child->spc_uuid, co_uuid), tgt_id, rc); + DL_CDEBUG(rc != -DER_NONEXIST, DLOG_ERR, DB_MD, rc, + DF_CONT "[%d]: Load container error", + DP_CONT(pool_child->spc_uuid, co_uuid), tgt_id); return rc; } diff --git a/src/include/gurt/debug.h b/src/include/gurt/debug.h index f8969d67534..f83914242f6 100644 --- a/src/include/gurt/debug.h +++ b/src/include/gurt/debug.h @@ -152,6 +152,14 @@ extern void (*d_alt_assert)(const int, const char*, const char*, const int); D_DEBUG(flag_false, __VA_ARGS__); \ } while (0) +#define DL_CDEBUG(cond, flag_true, flag_false, _rc, _fmt, ...) \ + do { \ + if (cond) \ + D_DEBUG(flag_true, _fmt ": " DF_RC " \n", ##__VA_ARGS__, DP_RC(_rc)); \ + else \ + D_DEBUG(flag_false, _fmt ": " DF_RC "\n", ##__VA_ARGS__, DP_RC(_rc)); \ + } while (0) + /* Register a descriptor with a parent and a type */ #define D_TRACE_UP(flag, ptr, parent, type) \ D_TRACE_DEBUG(flag, ptr, "Registered new '%s' from %p\n", \ diff --git a/src/mgmt/cli_mgmt.c b/src/mgmt/cli_mgmt.c index 08fb1d0a117..93df04b436e 100644 --- a/src/mgmt/cli_mgmt.c +++ b/src/mgmt/cli_mgmt.c @@ -1094,13 +1094,12 @@ dc_mgmt_pool_find(struct dc_mgmt_sys *sys, const char *label, uuid_t puuid, rc = rpc_out->pfo_rc; if (rc != 0) { if (label) { - D_CDEBUG(rc == -DER_NONEXIST, DB_MGMT, DLOG_ERR, - "%s: MGMT_POOL_FIND rpc failed to %d ranks, " DF_RC "\n", label, - ms_ranks->rl_nr, DP_RC(rc)); + DL_CDEBUG(rc == -DER_NONEXIST, DB_MGMT, DLOG_ERR, rc, + "%s: MGMT_POOL_FIND rpc failed to %d ranks", label, + ms_ranks->rl_nr); } else { - D_ERROR(DF_UUID ": MGMT_POOL_FIND rpc failed to %d " - "ranks, " DF_RC "\n", - DP_UUID(puuid), ms_ranks->rl_nr, DP_RC(rc)); + DL_ERROR(rc, DF_UUID ": MGMT_POOL_FIND rpc failed to %d ranks", + DP_UUID(puuid), ms_ranks->rl_nr); } goto decref; } diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index b71e5f21227..47c63aca140 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -1057,9 +1057,9 @@ obj_shard_tgts_query(struct dc_object *obj, uint32_t map_ver, uint32_t shard, rc = obj_shard_open(obj, shard, map_ver, &obj_shard); if (rc != 0) { - D_CDEBUG(rc == -DER_STALE || rc == -DER_NONEXIST, DB_IO, DLOG_ERR, - DF_OID " obj_shard_open %u opc %u, rc " DF_RC "\n", - DP_OID(obj->cob_md.omd_id), obj_auxi->opc, shard, DP_RC(rc)); + DL_CDEBUG(rc == -DER_STALE || rc == -DER_NONEXIST, DB_IO, DLOG_ERR, rc, + DF_OID " obj_shard_open %u opc %u", DP_OID(obj->cob_md.omd_id), + obj_auxi->opc, shard); D_GOTO(out, rc); } @@ -1266,9 +1266,8 @@ obj_shards_2_fwtgts(struct dc_object *obj, uint32_t map_ver, uint8_t *bit_map, * the operation, so let's skip such shard here. Note: these * non-exist shards will never happen for the leader. */ - D_CDEBUG(rc == -DER_NONEXIST, DB_IO, DLOG_ERR, - DF_OID", shard open:" DF_RC"\n", - DP_OID(obj->cob_md.omd_id), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NONEXIST, DB_IO, DLOG_ERR, rc, + DF_OID ", shard open", DP_OID(obj->cob_md.omd_id)); if (rc != -DER_NONEXIST) D_GOTO(out, rc); rc = 0; @@ -1319,8 +1318,8 @@ obj_shards_2_fwtgts(struct dc_object *obj, uint32_t map_ver, uint8_t *bit_map, D_ASSERT(tgt == req_tgts->ort_shard_tgts + shard_cnt); out: - D_CDEBUG(rc == 0 || rc == -DER_NEED_TX || rc == -DER_TGT_RETRY, DB_TRACE, - DLOG_ERR, DF_OID", forward:" DF_RC"\n", DP_OID(obj->cob_md.omd_id), DP_RC(rc)); + DL_CDEBUG(rc == 0 || rc == -DER_NEED_TX || rc == -DER_TGT_RETRY, DB_TRACE, DLOG_ERR, rc, + DF_OID ", forward", DP_OID(obj->cob_md.omd_id)); return rc; } @@ -5139,11 +5138,11 @@ static inline bool shard_was_fail(struct obj_auxi_args *obj_auxi, uint32_t shard_idx) { struct obj_auxi_tgt_list *failed_list; - uint32_t tgt_id; + uint32_t tgt_id; if (obj_auxi->force_degraded) { - D_DEBUG(DB_IO, DF_OID" fail idx %u\n", - DP_OID(obj_auxi->obj->cob_md.omd_id), shard_idx); + D_DEBUG(DB_IO, DF_OID " fail idx %u\n", DP_OID(obj_auxi->obj->cob_md.omd_id), + shard_idx); obj_auxi->force_degraded = 0; return true; } @@ -5152,7 +5151,7 @@ shard_was_fail(struct obj_auxi_args *obj_auxi, uint32_t shard_idx) return false; failed_list = obj_auxi->failed_tgt_list; - tgt_id = obj_auxi->obj->cob_shards->do_shards[shard_idx].do_target_id; + tgt_id = obj_auxi->obj->cob_shards->do_shards[shard_idx].do_target_id; if (tgt_in_failed_tgts_list(tgt_id, failed_list)) return true; @@ -5166,16 +5165,15 @@ obj_ec_valid_shard_get(struct obj_auxi_args *obj_auxi, uint8_t *tgt_bitmap, { struct dc_object *obj = obj_auxi->obj; uint32_t grp_start = grp_idx * obj_get_grp_size(obj); - uint32_t shard_idx = grp_start + *tgt_idx; + uint32_t shard_idx = grp_start + *tgt_idx; int rc = 0; while (shard_was_fail(obj_auxi, shard_idx) || obj_shard_is_invalid(obj, shard_idx, DAOS_OBJ_RPC_FETCH)) { - D_DEBUG(DB_IO, "tried shard %d/%u %d/%d/%d on "DF_OID"\n", shard_idx, *tgt_idx, + D_DEBUG(DB_IO, "tried shard %d/%u %d/%d/%d on " DF_OID "\n", shard_idx, *tgt_idx, obj->cob_shards->do_shards[shard_idx].do_rebuilding, obj->cob_shards->do_shards[shard_idx].do_target_id, - obj->cob_shards->do_shards[shard_idx].do_shard, - DP_OID(obj->cob_md.omd_id)); + obj->cob_shards->do_shards[shard_idx].do_shard, DP_OID(obj->cob_md.omd_id)); rc = obj_ec_fail_info_insert(&obj_auxi->reasb_req, (uint16_t)*tgt_idx); if (rc) break; @@ -5191,7 +5189,7 @@ obj_ec_valid_shard_get(struct obj_auxi_args *obj_auxi, uint8_t *tgt_bitmap, /* Can not find any valid shards anymore, so no need retry, and also to check * if it needs to restore the original failure. */ obj_auxi->no_retry = 1; - rc = retry_errcode(obj_auxi, rc); + rc = retry_errcode(obj_auxi, rc); D_ERROR(DF_OID" can not get parity shard: "DF_RC"\n", DP_OID(obj->cob_md.omd_id), DP_RC(rc)); } diff --git a/src/object/obj_tx.c b/src/object/obj_tx.c index 829a87ad6e0..b0e45826d89 100644 --- a/src/object/obj_tx.c +++ b/src/object/obj_tx.c @@ -3588,9 +3588,9 @@ dc_tx_convert_restart(struct dc_tx *tx, struct dc_object *obj, enum obj_rpc_opc /* dc_tx_restart_begin() will trigger dc_tx_cleanup() internally, let's re-attach. */ rc = dc_tx_attach(th, obj, opc, tx->tx_orig_task, *backoff, false); - D_CDEBUG(rc != 0, DLOG_ERR, DB_IO, - "Restart convert task %p with DTX " DF_DTI ", pm_ver %u, backoff %u: rc = %d\n", - tx->tx_orig_task, DP_DTI(&tx->tx_id), tx->tx_pm_ver, *backoff, rc); + DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, + "Restart convert task %p with DTX " DF_DTI ", pm_ver %u, backoff %u", + tx->tx_orig_task, DP_DTI(&tx->tx_id), tx->tx_pm_ver, *backoff); if (unlikely(rc == -DER_TX_RESTART)) goto again; diff --git a/src/object/srv_ec_aggregate.c b/src/object/srv_ec_aggregate.c index 796a31c0ab8..42a9c7acdd9 100644 --- a/src/object/srv_ec_aggregate.c +++ b/src/object/srv_ec_aggregate.c @@ -972,9 +972,9 @@ agg_fetch_remote_parity(struct ec_agg_entry *entry) &entry->ae_dkey, 1, &iod, &sgl, NULL, DIOF_TO_SPEC_SHARD | DIOF_FOR_EC_AGG, &peer_shard, NULL); - D_CDEBUG(rc != 0, DLOG_ERR, DB_TRACE, - DF_UOID " fetch parity from peer shard %d, " DF_RC "\n", - DP_UOID(entry->ae_oid), peer_shard, DP_RC(rc)); + DL_CDEBUG(rc != 0, DLOG_ERR, DB_TRACE, rc, + DF_UOID " fetch parity from peer shard %d", DP_UOID(entry->ae_oid), + peer_shard); if (rc) goto out; } @@ -2649,13 +2649,11 @@ ds_obj_ec_aggregate(void *arg) param.ap_cont = cont; rc = ec_agg_param_init(cont, ¶m); if (rc) { - /* To make sure the EC aggregation can be run on this xstream, - * let's do not exit here, and in cont_ec_aggregate_cb(), it will - * keep retrying parameter init. + /* To make sure the EC aggregation can be run on this xstream, let's do not exit + * here, and in cont_ec_aggregate_cb(), it will keep retrying parameter init. */ - D_CDEBUG(rc == -DER_NOTLEADER, DB_EPC, DLOG_ERR, - DF_UUID" EC aggregation failed: "DF_RC"\n", - DP_UUID(cont->sc_uuid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER, DB_EPC, DLOG_ERR, rc, + DF_UUID " EC aggregation failed", DP_UUID(cont->sc_uuid)); } cont_aggregate_interval(cont, cont_ec_aggregate_cb, ¶m); diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index f605dcf6189..a977ef7ce3e 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -144,12 +144,12 @@ obj_rw_complete(crt_rpc_t *rpc, struct obj_io_context *ioc, if (rc != 0) { if (rc == -DER_VOS_PARTIAL_UPDATE) rc = -DER_NO_PERM; - D_CDEBUG(rc == -DER_REC2BIG || rc == -DER_INPROGRESS || - rc == -DER_TX_RESTART || rc == -DER_EXIST || - rc == -DER_NONEXIST || rc == -DER_ALREADY || - rc == -DER_CHKPT_BUSY, - DLOG_DBG, DLOG_ERR, DF_UOID " %s end failed: " DF_RC "\n", - DP_UOID(orwi->orw_oid), update ? "Update" : "Fetch", DP_RC(rc)); + DL_CDEBUG(rc == -DER_REC2BIG || rc == -DER_INPROGRESS || + rc == -DER_TX_RESTART || rc == -DER_EXIST || + rc == -DER_NONEXIST || rc == -DER_ALREADY || + rc == -DER_CHKPT_BUSY, + DLOG_DBG, DLOG_ERR, rc, DF_UOID " %s end failed", + DP_UOID(orwi->orw_oid), update ? "Update" : "Fetch"); if (status == 0) status = rc; } @@ -1491,10 +1491,10 @@ obj_local_rw_internal(crt_rpc_t *rpc, struct obj_io_context *ioc, daos_iod_t *io cond_flags | fetch_flags, shadows, &ioh, dth); daos_recx_ep_list_free(shadows, iods_nr); if (rc) { - D_CDEBUG(rc == -DER_INPROGRESS || rc == -DER_NONEXIST || - rc == -DER_TX_RESTART, DB_IO, DLOG_ERR, - "Fetch begin for "DF_UOID" failed: "DF_RC"\n", - DP_UOID(orw->orw_oid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_INPROGRESS || rc == -DER_NONEXIST || + rc == -DER_TX_RESTART, + DB_IO, DLOG_ERR, rc, "Fetch begin for " DF_UOID " failed", + DP_UOID(orw->orw_oid)); goto out; } @@ -1618,7 +1618,7 @@ obj_local_rw_internal(crt_rpc_t *rpc, struct obj_io_context *ioc, daos_iod_t *io bio_pre_latency = daos_get_ntime() - time; if (obj_rpc_is_fetch(rpc) && DAOS_FAIL_CHECK(DAOS_OBJ_FAIL_NVME_IO)) { - D_ERROR(DF_UOID" fetch failed: %d\n", DP_UOID(orw->orw_oid), -DER_NVME_IO); + D_ERROR(DF_UOID " fetch failed: %d\n", DP_UOID(orw->orw_oid), -DER_NVME_IO); rc = -DER_NVME_IO; goto post; } @@ -1649,9 +1649,8 @@ obj_local_rw_internal(crt_rpc_t *rpc, struct obj_io_context *ioc, daos_iod_t *io if (rc == -DER_OVERFLOW) rc = -DER_REC2BIG; - D_CDEBUG(rc == -DER_REC2BIG, DLOG_DBG, DLOG_ERR, - DF_UOID" data transfer failed, dma %d rc "DF_RC"", - DP_UOID(orw->orw_oid), rma, DP_RC(rc)); + DL_CDEBUG(rc == -DER_REC2BIG, DLOG_DBG, DLOG_ERR, rc, + DF_UOID " data transfer failed, dma %d", DP_UOID(orw->orw_oid), rma); D_GOTO(post, rc); } @@ -2655,14 +2654,13 @@ ds_obj_tgt_update_handler(crt_rpc_t *rpc) */ rc = obj_local_rw(rpc, &ioc, dth); if (rc != 0) - D_CDEBUG( + DL_CDEBUG( rc == -DER_INPROGRESS || rc == -DER_TX_RESTART || (rc == -DER_EXIST && (orw->orw_api_flags & (DAOS_COND_DKEY_INSERT | DAOS_COND_AKEY_INSERT))) || (rc == -DER_NONEXIST && (orw->orw_api_flags & (DAOS_COND_DKEY_UPDATE | DAOS_COND_AKEY_UPDATE))), - DB_IO, DLOG_ERR, DF_UOID ": error=" DF_RC "\n", DP_UOID(orw->orw_oid), - DP_RC(rc)); + DB_IO, DLOG_ERR, rc, DF_UOID, DP_UOID(orw->orw_oid)); out: if (dth != NULL) @@ -2719,15 +2717,14 @@ obj_tgt_update(struct dtx_leader_handle *dlh, void *arg, int idx, */ rc = obj_local_rw(exec_arg->rpc, exec_arg->ioc, &dlh->dlh_handle); if (rc != 0) - D_CDEBUG(rc == -DER_INPROGRESS || rc == -DER_TX_RESTART || - (rc == -DER_EXIST && - (orw->orw_api_flags & - (DAOS_COND_DKEY_INSERT | DAOS_COND_AKEY_INSERT))) || - (rc == -DER_NONEXIST && - (orw->orw_api_flags & - (DAOS_COND_DKEY_UPDATE | DAOS_COND_AKEY_UPDATE))), - DB_IO, DLOG_ERR, DF_UOID ": error=" DF_RC "\n", - DP_UOID(orw->orw_oid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_INPROGRESS || rc == -DER_TX_RESTART || + (rc == -DER_EXIST && + (orw->orw_api_flags & + (DAOS_COND_DKEY_INSERT | DAOS_COND_AKEY_INSERT))) || + (rc == -DER_NONEXIST && + (orw->orw_api_flags & + (DAOS_COND_DKEY_UPDATE | DAOS_COND_AKEY_UPDATE))), + DB_IO, DLOG_ERR, rc, DF_UOID, DP_UOID(orw->orw_oid)); comp: if (comp_cb != NULL) @@ -3568,10 +3565,9 @@ ds_obj_tgt_punch_handler(crt_rpc_t *rpc) rc = obj_local_punch(opi, opc_get(rpc->cr_opc), &ioc, dth); if (rc != 0) - D_CDEBUG(rc == -DER_INPROGRESS || rc == -DER_TX_RESTART || - (rc == -DER_NONEXIST && (opi->opi_api_flags & DAOS_COND_PUNCH)), - DB_IO, DLOG_ERR, DF_UOID ": error=" DF_RC "\n", DP_UOID(opi->opi_oid), - DP_RC(rc)); + DL_CDEBUG(rc == -DER_INPROGRESS || rc == -DER_TX_RESTART || + (rc == -DER_NONEXIST && (opi->opi_api_flags & DAOS_COND_PUNCH)), + DB_IO, DLOG_ERR, rc, DF_UOID, DP_UOID(opi->opi_oid)); out: /* Stop the local transaction */ @@ -3643,11 +3639,10 @@ obj_tgt_punch(struct dtx_leader_handle *dlh, void *arg, int idx, rc = obj_local_punch(opi, opc_get(rpc->cr_opc), exec_arg->ioc, &dlh->dlh_handle); if (rc != 0) - D_CDEBUG( + DL_CDEBUG( rc == -DER_INPROGRESS || rc == -DER_TX_RESTART || (rc == -DER_NONEXIST && (opi->opi_api_flags & DAOS_COND_PUNCH)), - DB_IO, DLOG_ERR, DF_UOID ": error=" DF_RC "\n", DP_UOID(opi->opi_oid), - DP_RC(rc)); + DB_IO, DLOG_ERR, rc, DF_UOID, DP_UOID(opi->opi_oid)); comp: if (comp_cb != NULL) @@ -4154,10 +4149,9 @@ ds_cpd_handle_one(crt_rpc_t *rpc, struct daos_cpd_sub_head *dcsh, struct daos_cp rc = 0; if (rc != 0) { - D_CDEBUG(rc != -DER_INPROGRESS && rc != -DER_TX_RESTART, - DLOG_ERR, DB_IO, "Failed to set read TS for obj " - DF_UOID", DTX "DF_DTI": "DF_RC"\n", DP_UOID(dcsr->dcsr_oid), - DP_DTI(&dcsh->dcsh_xid), DP_RC(rc)); + DL_CDEBUG(rc != -DER_INPROGRESS && rc != -DER_TX_RESTART, DLOG_ERR, DB_IO, + rc, "Failed to set read TS for obj " DF_UOID ", DTX " DF_DTI, + DP_UOID(dcsr->dcsr_oid), DP_DTI(&dcsh->dcsh_xid)); goto out; } } @@ -4655,10 +4649,8 @@ ds_obj_dtx_follower(crt_rpc_t *rpc, struct obj_io_context *ioc) rc = dtx_end(dth, ioc->ioc_coc, rc); out: - D_CDEBUG(rc != 0 && rc != -DER_INPROGRESS && rc != -DER_TX_RESTART, - DLOG_ERR, DB_IO, - "Handled DTX "DF_DTI" on non-leader: "DF_RC"\n", - DP_DTI(&dcsh->dcsh_xid), DP_RC(rc)); + DL_CDEBUG(rc != 0 && rc != -DER_INPROGRESS && rc != -DER_TX_RESTART, DLOG_ERR, DB_IO, rc, + "Handled DTX " DF_DTI " on non-leader", DP_DTI(&dcsh->dcsh_xid)); return rc; } @@ -4872,10 +4864,9 @@ ds_obj_dtx_leader(struct daos_cpd_args *dca) rc = dtx_leader_end(dlh, dca->dca_ioc->ioc_coh, rc); out: - D_CDEBUG(rc != 0 && rc != -DER_INPROGRESS && rc != -DER_TX_RESTART && - rc != -DER_AGAIN, DLOG_ERR, DB_IO, - "Handled DTX "DF_DTI" on leader, idx %u: "DF_RC"\n", - DP_DTI(&dcsh->dcsh_xid), dca->dca_idx, DP_RC(rc)); + DL_CDEBUG(rc != 0 && rc != -DER_INPROGRESS && rc != -DER_TX_RESTART && rc != -DER_AGAIN, + DLOG_ERR, DB_IO, rc, "Handled DTX " DF_DTI " on leader, idx %u", + DP_DTI(&dcsh->dcsh_xid), dca->dca_idx); if (rc == -DER_AGAIN) { oci->oci_flags |= ORF_RESEND; diff --git a/src/object/srv_obj_remote.c b/src/object/srv_obj_remote.c index 90597f5483f..16a661c1b58 100644 --- a/src/object/srv_obj_remote.c +++ b/src/object/srv_obj_remote.c @@ -426,10 +426,9 @@ ds_obj_cpd_dispatch(struct dtx_leader_handle *dlh, void *arg, int idx, D_ERROR("crt_req_send failed, rc "DF_RC"\n", DP_RC(rc)); } - D_CDEBUG(rc != 0, DLOG_ERR, DB_TRACE, - "Forwarded CPD RPC to rank:%d tag:%d idx %u for DXT " - DF_DTI": "DF_RC"\n", tgt_ep.ep_rank, tgt_ep.ep_tag, idx, - DP_DTI(&dcsh->dcsh_xid), DP_RC(rc)); + DL_CDEBUG(rc != 0, DLOG_ERR, DB_TRACE, rc, + "Forwarded CPD RPC to rank:%d tag:%d idx %u for DXT " DF_DTI, tgt_ep.ep_rank, + tgt_ep.ep_tag, idx, DP_DTI(&dcsh->dcsh_xid)); return rc; out: diff --git a/src/pipeline/srv_pipeline.c b/src/pipeline/srv_pipeline.c index 94f99ef6942..d9659021820 100644 --- a/src/pipeline/srv_pipeline.c +++ b/src/pipeline/srv_pipeline.c @@ -100,9 +100,8 @@ pipeline_fetch_record(daos_handle_t vos_coh, daos_unit_oid_t oid, struct vos_ite /** fetching record */ rc = vos_fetch_begin(vos_coh, oid, epr.epr_hi, d_key, nr_iods, iods, 0, NULL, &ioh, NULL); if (rc) { - D_CDEBUG(rc == -DER_INPROGRESS || rc == -DER_NONEXIST || rc == -DER_TX_RESTART, - DB_IO, DLOG_ERR, "Fetch begin for " DF_UOID " failed: " DF_RC "\n", - DP_UOID(oid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_INPROGRESS || rc == -DER_NONEXIST || rc == -DER_TX_RESTART, + DB_IO, DLOG_ERR, rc, "Fetch begin for " DF_UOID " failed", DP_UOID(oid)); D_GOTO(out, rc); } biod = vos_ioh2desc(ioh); @@ -115,8 +114,8 @@ pipeline_fetch_record(daos_handle_t vos_coh, daos_unit_oid_t oid, struct vos_ite if (rc) { if (rc == -DER_OVERFLOW) rc = -DER_REC2BIG; - D_CDEBUG(rc == -DER_REC2BIG, DLOG_DBG, DLOG_ERR, - DF_UOID " data transfer failed, rc " DF_RC "", DP_UOID(oid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_REC2BIG, DLOG_DBG, DLOG_ERR, rc, + DF_UOID " data transfer failed", DP_UOID(oid)); /** D_GOTO(post, rc); */ } /**post:*/ @@ -124,10 +123,9 @@ pipeline_fetch_record(daos_handle_t vos_coh, daos_unit_oid_t oid, struct vos_ite out: rc1 = vos_fetch_end(ioh, &io_size, rc); if (rc1 != 0) { - D_CDEBUG(rc1 == -DER_REC2BIG || rc1 == -DER_INPROGRESS || rc1 == -DER_TX_RESTART || - rc1 == -DER_EXIST || rc1 == -DER_NONEXIST || rc1 == -DER_ALREADY, - DLOG_DBG, DLOG_ERR, DF_UOID " %s end failed: " DF_RC "\n", DP_UOID(oid), - "Fetch", DP_RC(rc1)); + DL_CDEBUG(rc1 == -DER_REC2BIG || rc1 == -DER_INPROGRESS || rc1 == -DER_TX_RESTART || + rc1 == -DER_EXIST || rc1 == -DER_NONEXIST || rc1 == -DER_ALREADY, + DLOG_DBG, DLOG_ERR, rc, DF_UOID " Fetch end failed", DP_UOID(oid)); if (rc == 0) rc = rc1; } diff --git a/src/pool/cli.c b/src/pool/cli.c index 9160e03b41d..7ce25440908 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -222,9 +222,9 @@ dc_pool_choose_svc_rank(const char *label, uuid_t puuid, D_MUTEX_UNLOCK(cli_lock); rc = dc_mgmt_pool_find(sys, label, puuid, &ranklist); if (rc) { - D_CDEBUG(rc == -DER_NONEXIST, DB_PL, DLOG_ERR, - DF_UUID ":%s: dc_mgmt_pool_find() failed, " DF_RC "\n", - DP_UUID(puuid), label ? label : "", DP_RC(rc)); + DL_CDEBUG(rc == -DER_NONEXIST, DB_PL, DLOG_ERR, rc, + DF_UUID ":%s: dc_mgmt_pool_find() failed", DP_UUID(puuid), + label ? label : ""); return rc; } if (cli_lock) @@ -603,9 +603,9 @@ dc_pool_connect_internal(tse_task_t *task, daos_pool_info_t *info, rc = dc_pool_choose_svc_rank(label, pool->dp_pool, &pool->dp_client, &pool->dp_client_lock, pool->dp_sys, &ep); if (rc != 0) { - D_CDEBUG(rc == -DER_NONEXIST, DB_PL, DLOG_ERR, - DF_UUID ":%s: cannot find pool service: " DF_RC "\n", - DP_UUID(pool->dp_pool), label ? label : "", DP_RC(rc)); + DL_CDEBUG(rc == -DER_NONEXIST, DB_PL, DLOG_ERR, rc, + DF_UUID ":%s: cannot find pool service", DP_UUID(pool->dp_pool), + label ? label : ""); goto out; } diff --git a/src/pool/srv_iv.c b/src/pool/srv_iv.c index 6bc3adfd567..74c1a14dc7e 100644 --- a/src/pool/srv_iv.c +++ b/src/pool/srv_iv.c @@ -1475,9 +1475,8 @@ ds_pool_iv_srv_hdl_fetch(struct ds_pool *pool, uuid_t *pool_hdl_uuid, pool_key->pik_entry_size = sizeof(struct pool_iv_entry); rc = ds_iv_fetch(pool->sp_iv_ns, &key, &sgl, false /* retry */); if (rc) { - D_CDEBUG(rc == -DER_NOTLEADER || rc == -DER_SHUTDOWN, - DB_ANY, DLOG_ERR, - "iv fetch failed "DF_RC"\n", DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER || rc == -DER_SHUTDOWN, DB_ANY, DLOG_ERR, rc, + "iv fetch failed"); D_GOTO(out, rc); } diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 1e40fb88db4..f69d04b6fa7 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -5758,8 +5758,8 @@ cont_rf_check_cb(uuid_t pool_uuid, uuid_t cont_uuid, struct rdb_tx *tx, void *ar */ rc = ds_cont_rf_check(pool_uuid, cont_uuid, tx); if (rc) - D_CDEBUG(rc == -DER_RF, DB_MD, DLOG_ERR, DF_CONT" check_rf: "DF_RC"\n", - DP_CONT(pool_uuid, cont_uuid), DP_RC(rc)); + DL_CDEBUG(rc == -DER_RF, DB_MD, DLOG_ERR, rc, DF_CONT " check_rf", + DP_CONT(pool_uuid, cont_uuid)); if (sched->psc_canceled) { D_DEBUG(DB_MD, DF_CONT" is canceled.\n", DP_CONT(pool_uuid, cont_uuid)); diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index 5a8f78bdf9a..8d07e66d9ea 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -1709,10 +1709,9 @@ ds_pool_tgt_query_map_handler(crt_rpc_t *rpc) } rc = ds_pool_hdl_is_from_srv(pool, in->tmi_op.pi_hdl); if (rc < 0) { - D_CDEBUG(rc == -DER_NOTLEADER, DLOG_DBG, DLOG_ERR, - DF_UUID": failed to check server pool handle "DF_UUID": "DF_RC"\n", - DP_UUID(in->tmi_op.pi_uuid), DP_UUID(in->tmi_op.pi_hdl), - DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOTLEADER, DLOG_DBG, DLOG_ERR, rc, + DF_UUID ": failed to check server pool handle " DF_UUID, + DP_UUID(in->tmi_op.pi_uuid), DP_UUID(in->tmi_op.pi_hdl)); if (rc == -DER_NOTLEADER) rc = -DER_AGAIN; goto out_pool; @@ -1979,8 +1978,7 @@ ds_pool_tgt_discard_ult(void *data) rc = dss_thread_collective_reduce(&coll_ops, &coll_args, DSS_ULT_DEEP_STACK); if (coll_args.ca_exclude_tgts) D_FREE(coll_args.ca_exclude_tgts); - D_CDEBUG(rc == 0, DB_MD, DLOG_ERR, DF_UUID" tgt discard:" DF_RC"\n", - DP_UUID(arg->pool_uuid), DP_RC(rc)); + DL_CDEBUG(rc == 0, DB_MD, DLOG_ERR, rc, DF_UUID " tgt discard", DP_UUID(arg->pool_uuid)); put: pool->sp_need_discard = 0; pool->sp_discard_status = rc; diff --git a/src/rebuild/rebuild_iv.c b/src/rebuild/rebuild_iv.c index da306be72a6..5f1a905e60b 100644 --- a/src/rebuild/rebuild_iv.c +++ b/src/rebuild/rebuild_iv.c @@ -203,7 +203,7 @@ rebuild_iv_ent_refresh(struct ds_iv_entry *entry, struct ds_iv_key *key, rpt->rt_global_scan_done = dst_iv->riv_global_scan_done; if (rpt->rt_global_dtx_resync_version < rpt->rt_rebuild_ver && dst_iv->riv_global_dtx_resyc_version >= rpt->rt_rebuild_ver) { - D_INFO(DF_UUID" global/iv/rebuild_ver %u/%u/%u signal wait cond\n", + D_INFO(DF_UUID " global/iv/rebuild_ver %u/%u/%u signal wait cond\n", DP_UUID(src_iv->riv_pool_uuid), rpt->rt_global_dtx_resync_version, dst_iv->riv_global_dtx_resyc_version, rpt->rt_rebuild_ver); ABT_mutex_lock(rpt->rt_lock); @@ -280,9 +280,7 @@ rebuild_iv_update(void *ns, struct rebuild_iv *iv, unsigned int shortcut, key.class_id = IV_REBUILD; rc = ds_iv_update(ns, &key, &sgl, shortcut, sync_mode, 0, retry); if (rc) - D_CDEBUG(daos_quiet_error(rc), DB_REBUILD, DLOG_ERR, "iv update failed "DF_RC"\n", - DP_RC(rc)); - + DL_CDEBUG(daos_quiet_error(rc), DB_REBUILD, DLOG_ERR, rc, "iv update failed"); return rc; } diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c index 4772a2c54b4..8587d7b5d8c 100644 --- a/src/rebuild/scan.c +++ b/src/rebuild/scan.c @@ -756,8 +756,8 @@ rebuild_obj_scan_cb(daos_handle_t ch, vos_iter_entry_t *ent, } if (rc <= 0) { - D_CDEBUG(rc == 0, DB_REBUILD, DLOG_ERR, DF_UOID" rebuild shards:" DF_RC"\n", - DP_UOID(oid), DP_RC(rc)); + DL_CDEBUG(rc == 0, DB_REBUILD, DLOG_ERR, rc, DF_UOID " rebuild shards", + DP_UOID(oid)); D_GOTO(out, rc); } @@ -1026,8 +1026,7 @@ rebuild_scan_leader(void *data) if (rpt->rt_global_dtx_resync_version < rpt->rt_rebuild_ver) { D_INFO(DF_UUID "wait for global dtx %u rebuild ver %u\n", DP_UUID(rpt->rt_pool_uuid), - rpt->rt_global_dtx_resync_version, - rpt->rt_rebuild_ver); + rpt->rt_global_dtx_resync_version, rpt->rt_rebuild_ver); ABT_cond_wait(rpt->rt_global_dtx_wait_cond, rpt->rt_lock); } ABT_mutex_unlock(rpt->rt_lock); diff --git a/src/vos/vos_aggregate.c b/src/vos/vos_aggregate.c index af4783faf7a..ea344eafa73 100644 --- a/src/vos/vos_aggregate.c +++ b/src/vos/vos_aggregate.c @@ -1195,8 +1195,8 @@ fill_one_segment(daos_handle_t ih, struct agg_merge_window *mw, rc = reserve_segment(obj, io, seg_size, &ent_in->ei_addr); if (rc) { - D_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, - "Reserve "DF_U64" segment error: "DF_RC"\n", seg_size, DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, rc, + "Reserve " DF_U64 " segment error", seg_size); goto out; } D_ASSERT(!bio_addr_is_hole(&ent_in->ei_addr)); @@ -1273,10 +1273,10 @@ fill_segments(daos_handle_t ih, struct vos_agg_param *agg_param, unsigned int *a rc = fill_one_segment(ih, mw, lgc_seg, acts); if (rc) { - D_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, - "Fill seg %u-%u %p "DF_RECT" error: "DF_RC"\n", - lgc_seg->ls_idx_start, lgc_seg->ls_idx_end, lgc_seg->ls_phy_ent, - DP_RECT(&lgc_seg->ls_ent_in.ei_rect), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, rc, + "Fill seg %u-%u %p " DF_RECT " error", lgc_seg->ls_idx_start, + lgc_seg->ls_idx_end, lgc_seg->ls_phy_ent, + DP_RECT(&lgc_seg->ls_ent_in.ei_rect)); break; } } @@ -1767,9 +1767,8 @@ flush_merge_window(daos_handle_t ih, struct vos_agg_param *agg_param, /* Transfer data from old logical records to reserved new segments */ rc = fill_segments(ih, agg_param, acts); if (rc) { - D_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, - "Fill segments "DF_EXT" error: "DF_RC"\n", - DP_EXT(&mw->mw_ext), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, rc, + "Fill segments " DF_EXT " error", DP_EXT(&mw->mw_ext)); goto out; } @@ -2093,9 +2092,8 @@ join_merge_window(daos_handle_t ih, struct vos_agg_param *agg_param, mw->mw_ext.ex_hi = lgc_ext.ex_lo - 1; rc = flush_merge_window(ih, agg_param, false, acts); if (rc) { - D_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, - "Flush window "DF_EXT" error: "DF_RC"\n", - DP_EXT(&mw->mw_ext), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, rc, + "Flush window " DF_EXT " error", DP_EXT(&mw->mw_ext)); return rc; } D_AGG_ASSERT(mw, merge_window_status(mw) == MW_FLUSHED); @@ -2148,9 +2146,8 @@ join_merge_window(daos_handle_t ih, struct vos_agg_param *agg_param, if (last) { rc = flush_merge_window(ih, agg_param, true, acts); if (rc) - D_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, - "Flush window "DF_EXT" error: "DF_RC"\n", - DP_EXT(&mw->mw_ext), DP_RC(rc)); + DL_CDEBUG(rc == -DER_NOSPACE, DB_EPC, DLOG_ERR, rc, + "Flush window " DF_EXT " error", DP_EXT(&mw->mw_ext)); close_merge_window(mw, rc); } @@ -2274,9 +2271,9 @@ vos_agg_ev(daos_handle_t ih, vos_iter_entry_t *entry, rc = join_merge_window(ih, agg_param, entry, acts); if (rc) - D_CDEBUG(rc == -DER_TX_RESTART || rc == -DER_TX_BUSY || rc == -DER_NOSPACE, - DB_TRACE, DLOG_ERR, "Join window "DF_EXT"/"DF_EXT" error: "DF_RC"\n", - DP_EXT(&mw->mw_ext), DP_EXT(&phy_ext), DP_RC(rc)); + DL_CDEBUG(rc == -DER_TX_RESTART || rc == -DER_TX_BUSY || rc == -DER_NOSPACE, + DB_TRACE, DLOG_ERR, rc, "Join window " DF_EXT "/" DF_EXT " error", + DP_EXT(&mw->mw_ext), DP_EXT(&phy_ext)); out: if (rc) close_merge_window(mw, rc); diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index be0743b6229..7cfdac7835a 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -743,9 +743,9 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, rc = umem_free(umm, dbd_off); - D_CDEBUG(rc != 0, DLOG_ERR, DB_IO, - "Release DTX active blob %p ("UMOFF_PF") for cont "DF_UUID": "DF_RC"\n", - dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id), DP_RC(rc)); + DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, + "Release DTX active blob %p (" UMOFF_PF ") for cont " DF_UUID, dbd, + UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); } return rc; @@ -863,8 +863,7 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t out: if (rc != -DER_ALREADY && rc != -DER_NONEXIST) - D_CDEBUG(rc != 0, DLOG_ERR, DB_IO, - "Commit the DTX "DF_DTI": rc = "DF_RC"\n", DP_DTI(dti), DP_RC(rc)); + DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, "Commit the DTX " DF_DTI, DP_DTI(dti)); if (rc != 0) D_FREE(dce); @@ -2313,8 +2312,7 @@ vos_dtx_abort(daos_handle_t coh, struct dtx_id *dti, daos_epoch_t epoch) if (rc == -DER_ALREADY) rc = 0; else if (rc != -DER_NONEXIST) - D_CDEBUG(rc != 0, DLOG_ERR, DB_IO, - "Abort the DTX "DF_DTI": "DF_RC"\n", DP_DTI(dti), DP_RC(rc)); + DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, "Abort the DTX " DF_DTI, DP_DTI(dti)); return rc; } @@ -2367,9 +2365,8 @@ vos_dtx_set_flags_one(struct vos_container *cont, struct dtx_id *dti, uint32_t f } out: - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_WARN, - "Mark the DTX entry "DF_DTI" as %s: "DF_RC"\n", - DP_DTI(dti), vos_dtx_flags2name(flags), DP_RC(rc)); + DL_CDEBUG(rc != 0, DLOG_ERR, DLOG_WARN, rc, "Mark the DTX entry " DF_DTI " as %s", + DP_DTI(dti), vos_dtx_flags2name(flags)); if ((rc == -DER_NO_PERM || rc == -DER_NONEXIST) && flags == DTE_PARTIAL_COMMITTED) rc = 0; @@ -2535,9 +2532,9 @@ vos_dtx_aggregate(daos_handle_t coh) if (rc == 0 && cont->vc_cmt_dtx_reindex_pos == dbd_off) cont->vc_cmt_dtx_reindex_pos = next; - D_CDEBUG(rc != 0, DLOG_ERR, DB_IO, - "Release DTX committed blob %p ("UMOFF_PF") for cont "DF_UUID": "DF_RC"\n", - dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id), DP_RC(rc)); + DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, + "Release DTX committed blob %p (" UMOFF_PF ") for cont " DF_UUID, dbd, + UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); return rc; } @@ -3059,9 +3056,8 @@ vos_dtx_attach(struct dtx_handle *dth, bool persistent, bool exist) if (unlikely(dae->dae_preparing && dae->dae_aborting)) { dae->dae_preparing = 0; rc = vos_dtx_abort_internal(cont, dae, true); - D_CDEBUG(rc != 0, DLOG_ERR, DB_IO, - "Delay abort DTX "DF_DTI" (2): rc = %d\n", - DP_DTI(&dth->dth_xid), rc); + DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, + "Delay abort DTX " DF_DTI " (2)", DP_DTI(&dth->dth_xid)); /* Aborted by race, return -DER_INPROGRESS for client retry. */ return -DER_INPROGRESS; diff --git a/src/vos/vos_ilog.c b/src/vos/vos_ilog.c index 3a0e08f5b89..dea2a22f28f 100644 --- a/src/vos/vos_ilog.c +++ b/src/vos/vos_ilog.c @@ -273,8 +273,7 @@ vos_ilog_fetch_internal(struct umem_instance *umm, daos_handle_t coh, uint32_t i if (rc == -DER_NONEXIST) goto init; if (rc != 0) { - D_CDEBUG(rc == -DER_INPROGRESS, DB_IO, DLOG_ERR, - "Could not fetch ilog: "DF_RC"\n", DP_RC(rc)); + DL_CDEBUG(rc == -DER_INPROGRESS, DB_IO, DLOG_ERR, rc, "Could not fetch ilog"); return rc; } diff --git a/src/vos/vos_obj.c b/src/vos/vos_obj.c index 066d3cfd9da..b7ce02edd08 100644 --- a/src/vos/vos_obj.c +++ b/src/vos/vos_obj.c @@ -2062,8 +2062,8 @@ obj_iter_delete(struct vos_obj_iter *oiter, void *args) rc = umem_tx_end(umm, rc); exit: if (rc != 0) - D_CDEBUG(rc == -DER_TX_BUSY, DB_TRACE, DLOG_ERR, - "Failed to delete iter entry: "DF_RC"\n", DP_RC(rc)); + DL_CDEBUG(rc == -DER_TX_BUSY, DB_TRACE, DLOG_ERR, rc, + "Failed to delete iter entry"); return rc; } diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index 8c5e4ff7ff5..b2b82d1494b 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -255,8 +255,7 @@ vos_meta_flush_prep(struct umem_store *store, struct umem_store_iod *iod, daos_h rc = bio_iod_try_prep(biod, BIO_CHK_TYPE_LOCAL, NULL, 0); if (rc) { - D_CDEBUG(rc == -DER_AGAIN, DB_TRACE, DLOG_ERR, - "Failed to prepare DMA buffer. "DF_RC"\n", DP_RC(rc)); + DL_CDEBUG(rc == -DER_AGAIN, DB_TRACE, DLOG_ERR, rc, "Failed to prepare DMA buffer"); goto free; } diff --git a/utils/cq/d_logging_check.py b/utils/cq/d_logging_check.py index a7f1c56ac3a..3709e64562a 100755 --- a/utils/cq/d_logging_check.py +++ b/utils/cq/d_logging_check.py @@ -127,12 +127,12 @@ def __next__(self): PREFIXES = ['D_ERROR', 'D_WARN', 'D_INFO', 'D_NOTE', 'D_ALERT', 'D_CRIT', 'D_FATAT', 'D_EMIT', 'D_TRACE_INFO', 'D_TRACE_NOTE', 'D_TRACE_WARN', 'D_TRACE_ERROR', 'D_TRACE_ALERT', 'D_TRACE_CRIT', 'D_TRACE_FATAL', 'D_TRACE_EMIT', 'RPC_TRACE', 'RPC_ERROR', - 'VOS_TX_LOG_FAIL', 'VOS_TX_TRACE_FAIL', 'D_DEBUG'] + 'VOS_TX_LOG_FAIL', 'VOS_TX_TRACE_FAIL', 'D_DEBUG', 'D_CDEBUG'] # Logging macros where a new-line is always added. PREFIXES_NNL = ['DFUSE_LOG_WARNING', 'DFUSE_LOG_ERROR', 'DFUSE_LOG_DEBUG', 'DFUSE_LOG_INFO', 'DFUSE_TRA_WARNING', 'DFUSE_TRA_ERROR', 'DFUSE_TRA_DEBUG', 'DFUSE_TRA_INFO', - 'DH_PERROR_SYS', 'DH_PERROR_DER', + 'DH_PERROR_SYS', 'DH_PERROR_DER', 'DL_CDEBUG', 'DL_ERROR', 'DHL_ERROR', 'DHL_WARN', 'DL_WARN', 'DL_INFO', 'DHL_INFO'] From 3a58ced1d5f327cf3ea639734e2d96c934afad3a Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 29 Aug 2023 21:17:02 +0900 Subject: [PATCH 08/80] DAOS-14228 tests: Adjust pool/create_all_vm.yaml (#12955) See the Jira ticket for the context. This patch reduces the number iterations in test_recycle_pools_vm so that each iteration will get more time and the test will take less time. Signed-off-by: Li Wei --- src/tests/ftest/pool/create_all_vm.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/create_all_vm.yaml b/src/tests/ftest/pool/create_all_vm.yaml index 889b7992778..8d884f85605 100644 --- a/src/tests/ftest/pool/create_all_vm.yaml +++ b/src/tests/ftest/pool/create_all_vm.yaml @@ -6,7 +6,7 @@ timeouts: test_one_pool_vm: 240 test_rank_filter: 240 test_two_pools_vm: 160 - test_recycle_pools_vm: 1000 + test_recycle_pools_vm: 800 # DAOS-12750 NOTE External tools used by DAOS are creating sparse files (e.g. daos_system.db) which # could eventually be compacted when a pool is created. To manage this, we add 9 MiB to the 17MiB @@ -18,7 +18,7 @@ test_rank_filter: delta: 27262976 # 26MiB test_recycle_pools_vm: - pool_count: 50 + pool_count: 20 test_two_pools_vm: deltas: From 1bbf1b0645b804f4366a3728672201ce87633fd2 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Tue, 29 Aug 2023 20:18:46 +0800 Subject: [PATCH 09/80] DAOS-14100 object: realloc buffer for packing CPD RPC sub-requests (#12758) Before transferring sub-requests for CPD RPC via RDAM, we will calculate the required space firstly. But in object layer, we cannot exactly know how much space the bulk handle (for some update sub-requests) will use when being packed in lower layer network (mercury). So we have to give some estimation for that. If the size is under-estimated, then instead of trigger assertion in dc_tx_cpd_pack_sub_reqs(), we can re-allocate the buffer based on the real size and then re-pack the sub-requests. Signed-off-by: Fan Yong --- src/object/obj_tx.c | 81 ++++++++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/src/object/obj_tx.c b/src/object/obj_tx.c index b0e45826d89..340cecf22e4 100644 --- a/src/object/obj_tx.c +++ b/src/object/obj_tx.c @@ -1635,6 +1635,27 @@ dc_tx_dump(struct dc_tx *tx) tx->tx_tgts_bulk.dcb_size, tx->tx_tgts_bulk.dcb_bulk); } +static inline bool +dc_tx_cpd_body_need_bulk(size_t size) +{ + /* + * NOTE: For 2.2 (DAOS_OBJ_VERSION is 8) and older release, we do not support to + * transfer for large CPD RPC body via RDMA. + */ + return dc_obj_proto_version > 8 && size >= DAOS_BULK_LIMIT; +} + +static inline size_t +dc_tx_cpd_adjust_size(size_t size) +{ + /* Lower layer (mercury) need some additional space to pack related payload into + * RPC body (or specified buffer) via related proc interfaces, usually that will + * not exceed 1/10 of the payload. We can make some relative large estimation if + * we do not exactly know the real size now. + */ + return size * 11 / 10; +} + /* The calculted CPD RPC sub-requests size may be some larger than the real case, no matter. */ static size_t dc_tx_cpd_sub_reqs_size(struct daos_cpd_sub_req *dcsr, int count) @@ -1696,7 +1717,17 @@ dc_tx_cpd_sub_reqs_size(struct daos_cpd_sub_req *dcsr, int count) size += sizeof(*oia->oia_offs) * oia->oia_iod_nr; if (dcu->dcu_flags & ORF_CPD_BULK) - size += sizeof(*dcu->dcu_bulks) * dcsr->dcsr_nr; + /* + * NOTE: In object layer, we cannot exactly know how large the + * bulk handle will be in lower layer network (mercury). + * The lower layer "struct hg_bulk" is opaque to object. + * Its current size in packing is 187 bytes, but it may + * be changed in future (who knowns). So here, we use it + * as estimation and preserve more space via subsequent + * dc_tx_cpd_adjust_size. Please check hg_proc_hg_bulk_t + * for detail. + */ + size += 187 * dcsr->dcsr_nr; else size += daos_sgls_packed_size(dcsr->dcsr_sgls, dcsr->dcsr_nr, NULL); break; @@ -1707,36 +1738,17 @@ dc_tx_cpd_sub_reqs_size(struct daos_cpd_sub_req *dcsr, int count) } break; case DCSO_PUNCH_AKEY: - for (j = 0; j < dcsr->dcsr_nr; j++) + for (j = 0; j < dcsr->dcsr_nr; j++) { + size += sizeof(dcsr->dcsr_punch.dcp_akeys[j]); size += dcsr->dcsr_punch.dcp_akeys[j].iov_buf_len; + } break; default: break; } } - return size; -} - -static inline bool -dc_tx_cpd_body_need_bulk(size_t size) -{ - /* - * NOTE: For 2.2 (DAOS_OBJ_VERSION is 8) and older release, we do not support to - * transfer for large CPD RPC body via RDMA. - */ - return dc_obj_proto_version > 8 && size >= DAOS_BULK_LIMIT; -} - -static inline size_t -dc_tx_cpd_adjust_size(size_t size) -{ - /* Lower layer (mercury) need some additional space to pack related payload into - * RPC body (or specified buffer) via related proc interfaces, usually that will - * not exceed 1/10 of the payload. We can make some relative large estimation if - * we do not exactly know the real size now. - */ - return size * 11 / 10; + return dc_tx_cpd_adjust_size(size); } static int @@ -1778,6 +1790,7 @@ dc_tx_cpd_pack_sub_reqs(struct dc_tx *tx, tse_task_t *task, size_t size) int rc; int i; +again: D_ALLOC(buf, size); if (buf == NULL) D_GOTO(out, rc = -DER_NOMEM); @@ -1798,13 +1811,28 @@ dc_tx_cpd_pack_sub_reqs(struct dc_tx *tx, tse_task_t *task, size_t size) } used = crp_proc_get_size_used(tx->tx_crt_proc); - D_ASSERTF(used <= size, "Input buffer size %ld is too small for real case %ld\n", - size, used); + if (unlikely(used > size)) { + D_DEBUG(DB_TRACE, "Former estimated size %ld is too small, enlarge it to %ld\n", + size, used); + size = used; + D_GOTO(out, rc = -DER_AGAIN); + } + + /* The @buf will be attached to tx->tx_reqs_bulk.dcb_iov and released via dc_tx_cleanup. */ rc = dc_tx_cpd_body_bulk(&tx->tx_reqs, &tx->tx_reqs_bulk, task, buf, used, req_cnt, DCST_BULK_REQ); out: + if (rc != 0) { + crt_proc_destroy(tx->tx_crt_proc); + tx->tx_crt_proc = NULL; + D_FREE(buf); + + if (rc == -DER_AGAIN) + goto again; + } + return rc; } @@ -2074,7 +2102,6 @@ dc_tx_commit_prepare(struct dc_tx *tx, tse_task_t *task) * Let's try to pack them inline the CPD RPC body firstly. */ size = dc_tx_cpd_sub_reqs_size(tx->tx_req_cache + start, req_cnt); - size = dc_tx_cpd_adjust_size(size); if (dc_tx_cpd_body_need_bulk(body_size + size)) { rc = dc_tx_cpd_pack_sub_reqs(tx, task, size); From a042a0507c40acb3823d9b95d8bea2832b45ca09 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 29 Aug 2023 13:19:20 +0100 Subject: [PATCH 10/80] DAOS-623 cq: Do not run spelling check on go.sum (#12943) This file is a list of checksums, do not run spelling checks on it. Signed-off-by: Ashley Pittman --- .github/workflows/spelling.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/spelling.yml b/.github/workflows/spelling.yml index e814c88816a..dfd0ed262f4 100644 --- a/.github/workflows/spelling.yml +++ b/.github/workflows/spelling.yml @@ -13,7 +13,7 @@ jobs: - name: Run check uses: codespell-project/actions-codespell@master with: - skip: ./src/control/vendor,./.git + skip: ./src/control/vendor,./src/control/go.sum,./.git ignore_words_file: ci/codespell.ignores builtin: clear,rare,informal,names,en-GB_to_en-US - name: Check DAOS logging macro use. From e6825935dd5c7740116fad1fc58460b8c9450e50 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 29 Aug 2023 14:51:26 +0100 Subject: [PATCH 11/80] DAOS-13396 dfs: Add a fi test for mwc code. (#12911) Add a fault injection test for dfs consistency checking, but do not enable it. Fix two issues identified by the test, but skip any logging changes for now. Signed-off-by: Ashley Pittman --- src/client/dfs/dfs.c | 4 +-- utils/node_local_test.py | 71 +++++++++++++++++++++++++++++++++------- 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 3ae79220ff2..9107db9214b 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -1874,7 +1874,7 @@ dfs_cont_create(daos_handle_t poh, uuid_t *cuuid, dfs_attr_t *attr, err_close: rc2 = daos_cont_close(coh, NULL); if (rc2) - D_ERROR("daos_cont_close failed "DF_RC"\n", DP_RC(rc)); + D_ERROR("daos_cont_close failed " DF_RC "\n", DP_RC(rc2)); err_destroy: /* * DAOS container create returns success even if container exists - @@ -7186,8 +7186,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * rc2 = daos_oit_close(oit_args->oit, NULL); if (rc == 0) rc = daos_der2errno(rc2); - D_FREE(oit_args); out_snap: + D_FREE(oit_args); epr.epr_hi = epr.epr_lo = snap_epoch; rc2 = daos_cont_destroy_snap(coh, epr, NULL); if (rc == 0) diff --git a/utils/node_local_test.py b/utils/node_local_test.py index f0bcd27ea77..ec7c5ce3cf0 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -5267,6 +5267,7 @@ def __init__(self, conf, desc, cmd): self.check_stderr = True self.expected_stdout = None self.ignore_busy = False + self.single_process = False self.use_il = False self._use_pil4dfs = None self.wf = conf.wf @@ -5311,6 +5312,9 @@ def _prep(self): else: max_child = int(num_cores / 4 * 3) + if self.single_process: + max_child = 1 + print(f'Maximum number of spawned tests will be {max_child}') active = [] @@ -5395,12 +5399,14 @@ def _run_cmd(self, loc, valgrind=False): if callable(self.cmd): cmd = self.cmd(loc) else: - cmd = self.cmd + # Take a copy of cmd as it might be modified and we only want that to happen once. + cmd = list(self.cmd) # Disable logging to stderr from the daos tool, the two streams are both checked already # but have different formats. - if os.path.basename(cmd[0]) == 'daos': + if cmd[0] == 'daos': cmd_env['DD_STDERR'] = 'CRIT' + cmd[0] = join(self.conf['PREFIX'], 'bin', 'daos') aftf = AllocFailTestRun(self, cmd, cmd_env, loc, cwd) if valgrind: @@ -5457,7 +5463,7 @@ def test_alloc_fail_copy(server, conf, wf): """ def get_cmd(cont_id): - return [join(conf['PREFIX'], 'bin', 'daos'), + return ['daos', 'filesystem', 'copy', '--src', @@ -5504,7 +5510,7 @@ def test_alloc_fail_copy_trunc(server, conf, wf): files_needed = 4000 def get_cmd(_): - cmd = [join(conf['PREFIX'], 'bin', 'daos'), 'filesystem', 'copy', '--src', src_file.name, + cmd = ['daos', 'filesystem', 'copy', '--src', src_file.name, '--dst', f'daos://{pool.id()}/aftc/new_dir/file.{get_cmd.idx}'] get_cmd.idx += 1 assert get_cmd.idx <= files_needed @@ -5582,7 +5588,7 @@ def test_alloc_cont_create(server, conf, wf): pool = server.get_test_pool_obj() def get_cmd(cont_id): - return [join(conf['PREFIX'], 'bin', 'daos'), + return ['daos', 'container', 'create', pool.id(), @@ -5606,7 +5612,7 @@ def test_alloc_fail_cont_create(server, conf): dfuse.start() def get_cmd(cont_id): - return [join(conf['PREFIX'], 'bin', 'daos'), + return ['daos', 'container', 'create', '--type', @@ -5697,7 +5703,7 @@ def test_fi_list_attr(server, conf, wf): container.set_attrs({'my-test-attr-1': 'some-value', 'my-test-attr-2': 'some-other-value'}) - cmd = [join(conf['PREFIX'], 'bin', 'daos'), + cmd = ['daos', 'container', 'list-attrs', pool.id(), @@ -5717,7 +5723,7 @@ def test_fi_get_prop(server, conf, wf): container = create_cont(conf, pool, ctype='POSIX') - cmd = [join(conf['PREFIX'], 'bin', 'daos'), + cmd = ['daos', 'container', 'get-prop', pool.id(), @@ -5742,7 +5748,7 @@ def test_fi_get_attr(server, conf, wf): container.set_attrs({attr_name: 'value'}) - cmd = [join(conf['PREFIX'], 'bin', 'daos'), + cmd = ['daos', 'container', 'get-attr', pool.id(), @@ -5766,7 +5772,7 @@ def test_fi_cont_query(server, conf, wf): container = create_cont(conf, pool, ctype='POSIX') - cmd = [join(conf['PREFIX'], 'bin', 'daos'), + cmd = ['daos', 'container', 'query', pool.id(), @@ -5789,7 +5795,7 @@ def test_fi_cont_check(server, conf, wf): container = create_cont(conf, pool) - cmd = [join(conf['PREFIX'], 'bin', 'daos'), + cmd = ['daos', 'container', 'check', pool.id(), @@ -5810,7 +5816,7 @@ def test_alloc_fail(server, conf): """Run 'daos' client binary with fault injection""" pool = server.get_test_pool_obj() - cmd = [join(conf['PREFIX'], 'bin', 'daos'), + cmd = ['daos', 'cont', 'list', pool.id()] @@ -5825,6 +5831,43 @@ def test_alloc_fail(server, conf): return rc +def test_dfs_check(server, conf, wf): + """Run filesystem check. + + Create a pool and populate a subdir with a number of entries, files, symlink (broken and not) + and another subdir. Run 'daos filesystem check' on this to see the output. + """ + pool = server.get_test_pool_obj() + + container = create_cont(conf, pool, ctype='POSIX', label='fsck') + + with tempfile.TemporaryDirectory(prefix='fsck_src_',) as src_dir: + sub_dir = join(src_dir, 'new_dir') + os.mkdir(sub_dir) + + for idx in range(5): + with open(join(sub_dir, f'file.{idx}'), 'w') as ofd: + ofd.write('hello') + + os.mkdir(join(sub_dir, 'new_dir')) + # os.symlink('broken', join(sub_dir, 'broken_s')) + os.symlink('file.0', join(sub_dir, 'link')) + + rc = run_daos_cmd(conf, ['filesystem', 'copy', '--src', f'{src_dir}/new_dir', + '--dst', f'daos://{pool.id()}/{container.id()}']) + print(rc) + assert rc.returncode == 0, rc + + test_cmd = AllocFailTest( + conf, 'fs-check', ['daos', 'filesystem', 'check', pool.id(), container.id()]) + test_cmd.wf = wf + test_cmd.single_process = True + test_cmd.check_daos_stderr = True + test_cmd.check_post_stdout = False + + return test_cmd.launch() + + def server_fi(args): """Run the server under fault injection. @@ -5977,6 +6020,10 @@ def run(wf, args): # Disabled for now because of errors # fatal_errors.add_result(test_alloc_pil4dfs_ls(server, conf, wf_client)) + # This test is disabled by default, it takes ~4 hours to run and can fill Jenkins + # available space, no not enable in CI. + # fatal_errors.add_result(test_dfs_check(server, conf, wf_client)) + wf_client.close() if fi_test_dfuse: From 20668ddd8db554a96fa6769e9012f43257e0eacc Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 29 Aug 2023 10:00:48 -0400 Subject: [PATCH 12/80] DAOS-14171 test: Exclude mounted devices in auto storage (#12839) When generating storage:auto server configurations for functional tests exclude any detected devices that are mounted. Signed-off-by: Phil Henderson --- src/tests/ftest/util/storage_utils.py | 115 ++++++++++++++++++++++++-- utils/cq/words.dict | 2 + 2 files changed, 112 insertions(+), 5 deletions(-) diff --git a/src/tests/ftest/util/storage_utils.py b/src/tests/ftest/util/storage_utils.py index b48c15d91f4..4b8f82a6360 100644 --- a/src/tests/ftest/util/storage_utils.py +++ b/src/tests/ftest/util/storage_utils.py @@ -9,17 +9,19 @@ from operator import is_not import os import re +import yaml from ClusterShell.NodeSet import NodeSet from run_utils import run_remote -def find_pci_address(value): +def find_pci_address(value, *flags): """Find PCI addresses in the specified string. Args: value (str): string to search for PCI addresses + flags (list, optional): flags arguments for re.findall Returns: list: a list of all the PCI addresses found in the string @@ -27,7 +29,7 @@ def find_pci_address(value): """ digit = '0-9a-fA-F' pattern = rf'[{digit}]{{4,5}}:[{digit}]{{2}}:[{digit}]{{2}}\.[{digit}]' - return re.findall(pattern, str(value)) + return re.findall(pattern, str(value), *flags) def get_tier_roles(tier, total_tiers): @@ -327,19 +329,21 @@ def scan(self, device_filter=None): """ self._devices.clear() self._log.info('Scanning %s for PMEM/NVMe/VMD devices', self._hosts) + mounted = self.get_mounted_addresses() for key in self.TYPE_SEARCH: - device_info = self.get_device_information(key, device_filter) + device_info = self.get_device_information(key, device_filter, mounted) if key == "VMD" and device_info: self._devices.extend(self.get_controller_information(device_info)) else: self._devices.extend(device_info) - def get_device_information(self, key, device_filter): + def get_device_information(self, key, device_filter, mounted): """Get a list of PMEM, NVMe, or VMD devices that exist on every host. Args: key (str): disk type: 'PMEM', 'NVMe', or 'VMD' - device_filter (str, optional): device search filter. Defaults to None. + device_filter (str): device search filter. + mounted (list): list of mounted devices addresses Returns: list: the StorageDevice objects found on every host @@ -405,6 +409,11 @@ def get_device_information(self, key, device_filter): self._log.debug(" excluding backing device: %s", device) continue + # Ignore mounted devices + if kwargs['address'] in mounted: + self._log.debug(" excluding mounted device: %s", device) + continue + # Ignore any devices that do not match a filter if specified if device_filter and device_filter.startswith("-"): if re.findall(device_filter[1:], device.description): @@ -527,6 +536,102 @@ def get_controller_mapping(self): return controllers + def get_mounted_addresses(self): + """Get a list of addresses of devices which are mounted on one or more hosts. + + Returns: + list: a list of addresses of devices which are mounted on one or more hosts + """ + mounted_addresses = set() + self._log.debug('Detecting mounted addresses on %s', self._hosts) + for device, hosts in self._get_mounted_devices().items(): + mounted_addresses.update(self._get_addresses(hosts, device)) + return list(mounted_addresses) + + def _get_mounted_devices(self): + """Get a dictionary of list of mounted device names per host. + + Returns: + dict: a dictionary of mounted device name keys on host NodeSet values + """ + def get_mounted_parent(block_device): + """Get the mounted parent kernel device name for the lsblk json block device entry. + + Args: + block_device (dict): lsblk json block device entry + + Returns: + str: mounted parent kernel device name or None + """ + if block_device and 'children' in block_device: + for child in block_device['children']: + if get_mounted_parent(child): + return block_device['name'] + if block_device and 'mountpoint' in block_device and block_device['mountpoint']: + return block_device['name'] + return None + + mounted_devices = {} + self._log.debug(' Detecting mounted device names on %s', self._hosts) + result = run_remote( + self._log, self._hosts, 'lsblk --output NAME,MOUNTPOINT --json', stderr=True) + for data in result.output: + if not data.passed: + self._log.debug(' - Error detecting mounted devices on %s', data.hosts) + continue + try: + lsblk_data = yaml.safe_load('\n'.join(data.stdout)) + except yaml.YAMLError as error: + self._log.debug( + ' - Error processing mounted device information on %s: %s', data.hosts, error) + continue + key = 'blockdevices' + if key not in lsblk_data: + self._log.debug(' -%s: lsblk json output missing \'%s\' key', data.hosts, key) + continue + for entry in lsblk_data[key]: + name = get_mounted_parent(entry) + if name is not None: + if name not in mounted_devices: + mounted_devices[name] = NodeSet() + mounted_devices[name].add(data.hosts) + + self._log.debug(' Detected mounted names:') + for device, hosts in mounted_devices.items(): + self._log.debug(' %s on %s', device, hosts) + + return mounted_devices + + def _get_addresses(self, hosts, device): + """Get a list of addresses for the device on each host. + + Args: + hosts (NodeSet): hosts on which to get the device address + device (str): device whose address to find + + Returns: + list: a list of addresses + """ + addresses = set() + self._log.debug(' Detecting addresses for %s on %s', device, hosts) + + # Find the mounted device names on each host + command = ' | '.join( + ['find /dev/disk/by-path/ -type l -printf \'%f -> %l\n\'', + f'grep -w \'{device}\'', + 'sort']) + result = run_remote(self._log, self._hosts, command) + self._log.debug(' Detected addresses for %s:', device) + for data in result.output: + if not data.passed: + self._log.debug(' %s: Error detecting addresses', data.hosts) + continue + info = set(find_pci_address('\n'.join(data.stdout), re.MULTILINE)) + self._log.debug(' %s: %s', data.hosts, info) + addresses.update(info) + + return list(addresses) + def write_storage_yaml(self, yaml_file, engines, tier_0_type, scm_size=0, scm_mount='/mnt/daos', max_nvme_tiers=1, control_metadata=None): """Generate a storage test yaml sub-section. diff --git a/utils/cq/words.dict b/utils/cq/words.dict index f5dba89a4ee..3d6eaffba4e 100644 --- a/utils/cq/words.dict +++ b/utils/cq/words.dict @@ -176,6 +176,7 @@ fchmod fcntl filename filesystem +findall findcmd fini fio @@ -253,6 +254,7 @@ logfile logfiles logmasks lseek +lsblk lspci lua lustre From c30848e8d37b50de7487c8f082f7072477c7296b Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Wed, 30 Aug 2023 00:21:25 +0900 Subject: [PATCH 13/80] DAOS-14139 test: Debug Unhandled exception for dfuse/caching_check.py (#12842) During the teardown of dfuse/caching_checking.py, Unhandled exception occurs when dmg pool destroy is called. In _cleanup(), add a check in the exception block for timeout exception. Signed-off-by: Makito Kano --- src/tests/ftest/util/apricot/apricot/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tests/ftest/util/apricot/apricot/test.py b/src/tests/ftest/util/apricot/apricot/test.py index fabfb6e173a..ce45252c38d 100644 --- a/src/tests/ftest/util/apricot/apricot/test.py +++ b/src/tests/ftest/util/apricot/apricot/test.py @@ -408,6 +408,9 @@ def _cleanup(self): cleanup = self._cleanup_methods.pop() errors.extend(cleanup["method"](**cleanup["kwargs"])) except Exception as error: # pylint: disable=broad-except + if str(error) == "Test interrupted by SIGTERM": + # Abort testing if timed out by avocado. + raise error errors.append( "Unhandled exception when calling {}({}): {}".format( str(cleanup["method"]), dict_to_str(cleanup["kwargs"]), str(error))) From a0cf793cf6978043a47d8ff809eb0e6b64f300cd Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 29 Aug 2023 18:17:34 +0100 Subject: [PATCH 14/80] DAOS-14207 engine: Improve error reporting in case of faults. (#12876) Fix issues identified by server fault injection testing. Log errors correctly. Improve cases where -DER_MISC or -DER-INVAL was used instead of -DER_NOMEM. Convert -DER_DOS to ENOMEM when using system error codes. Signed-off-by: Ashley Pittman --- src/cart/crt_hg.c | 3 +- src/cart/crt_iv.c | 17 +-- src/common/drpc.c | 54 +++----- src/common/tests/drpc_tests.c | 23 ++- src/common/tests/test_mocks.c | 6 +- src/container/srv_container.c | 185 ++++++++++++------------- src/dtx/dtx_rpc.c | 10 +- src/engine/drpc_progress.c | 23 ++- src/engine/tests/drpc_progress_tests.c | 6 +- src/include/daos/common.h | 1 + src/include/daos/drpc.h | 5 +- src/pool/srv_pool.c | 13 +- src/rdb/rdb_tx.c | 3 +- src/security/srv_acl.c | 84 +++++------ 14 files changed, 213 insertions(+), 220 deletions(-) diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 77f43823ab0..db659f39297 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -1117,8 +1117,7 @@ crt_rpc_handler_common(hg_handle_t hg_hdl) rpc_pub->cr_ep.ep_grp = NULL; /* TODO lookup by rpc_priv->crp_req_hdr.cch_grp_id */ } else { - D_ERROR("_unpack_body failed, rc: %d, opc: %#x.\n", - rc, rpc_pub->cr_opc); + DHL_ERROR(rpc_priv, rc, "_unpack_body failed, opc: %#x", rpc_pub->cr_opc); crt_hg_reply_error_send(rpc_priv, -DER_MISC); D_GOTO(decref, hg_ret = HG_SUCCESS); } diff --git a/src/cart/crt_iv.c b/src/cart/crt_iv.c index 840aca97845..7e742c7845d 100644 --- a/src/cart/crt_iv.c +++ b/src/cart/crt_iv.c @@ -2188,8 +2188,6 @@ handle_ivsync_response(const struct crt_cb_info *cb_info) /* Helper function to issue update sync * Important note: iv_key and iv_value are destroyed right after this call, * as such they need to be copied over - * - * TODO: This is leaking memory on failure. */ static int crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id, @@ -2199,7 +2197,7 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id, crt_iv_comp_cb_t update_comp_cb, void *cb_arg, void *user_priv, int update_rc) { - crt_rpc_t *corpc_req; + crt_rpc_t *corpc_req = NULL; struct crt_iv_sync_in *input; int rc = 0; bool delay_completion = false; @@ -2310,10 +2308,8 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id, /* Copy iv_key over as it will get destroyed after this call */ D_ALLOC(iv_sync_cb->isc_iv_key.iov_buf, iv_key->iov_buf_len); - if (iv_sync_cb->isc_iv_key.iov_buf == NULL) { - /* Avoid checkpatch warning */ + if (iv_sync_cb->isc_iv_key.iov_buf == NULL) D_GOTO(exit, rc = -DER_NOMEM); - } memcpy(iv_sync_cb->isc_iv_key.iov_buf, iv_key->iov_buf, iv_key->iov_buf_len); @@ -2329,9 +2325,12 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id, } rc = crt_req_send(corpc_req, handle_response_cb, iv_sync_cb); - if (rc != 0) - D_ERROR("crt_req_send(): "DF_RC"\n", DP_RC(rc)); + if (rc != 0) { + DL_ERROR(rc, "crt_req_send() failed, not calling decref"); + /* Do not call decref on this rpc below if req_send failed */ + corpc_req = NULL; + } exit: if (delay_completion == false || rc != 0) { if (rc != 0) @@ -2353,6 +2352,8 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id, D_FREE(iv_sync_cb->isc_iv_key.iov_buf); D_FREE(iv_sync_cb); } + if (corpc_req != NULL) + RPC_PUB_DECREF(corpc_req); } return rc; } diff --git a/src/common/drpc.c b/src/common/drpc.c index e14b0237f7d..1afb7c91cb1 100644 --- a/src/common/drpc.c +++ b/src/common/drpc.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -275,26 +275,6 @@ unixcomm_listen(char *sockaddr, int flags, struct unixcomm **newcommp) return 0; } -static struct unixcomm * -unixcomm_accept(struct unixcomm *listener) -{ - struct unixcomm *comm; - - D_ALLOC_PTR(comm); - if (comm == NULL) - return NULL; - - comm->fd = accept(listener->fd, NULL, NULL); - if (comm->fd < 0) { - D_ERROR("Failed to accept connection on listener fd %d, " - "errno=%d\n", listener->fd, errno); - D_FREE(comm); - return NULL; - } - - return comm; -} - static int unixcomm_send(struct unixcomm *hndl, uint8_t *buffer, size_t buflen, ssize_t *sent) @@ -538,34 +518,46 @@ drpc_is_valid_listener(struct drpc *ctx) * Wait for a client to connect to a listening drpc context, and return the * context for the client's session. * - * \param ctx drpc context created by drpc_listen() + * \param[in] ctx drpc context created by drpc_listen() + * \param[out] drpc New drpc context; * - * \return new drpc context for the accepted client session, or - * NULL if failed to get one + * \return daos errno */ -struct drpc * -drpc_accept(struct drpc *listener_ctx) +int +drpc_accept(struct drpc *listener_ctx, struct drpc **drpc) { struct drpc *session_ctx; struct unixcomm *comm; if (!drpc_is_valid_listener(listener_ctx)) { D_ERROR("dRPC context is not a listener\n"); - return NULL; + return -DER_INVAL; } D_ALLOC_PTR(session_ctx); if (session_ctx == NULL) - return NULL; + return -DER_NOMEM; - comm = unixcomm_accept(listener_ctx->comm); + D_ALLOC_PTR(comm); if (comm == NULL) { D_FREE(session_ctx); - return NULL; + return -DER_NOMEM; + } + + comm->fd = accept(listener_ctx->comm->fd, NULL, NULL); + if (comm->fd < 0) { + int rc = daos_errno2der(errno); + + DL_ERROR(rc, "Failed to accept connection on listener fd %d", + listener_ctx->comm->fd); + D_FREE(comm); + D_FREE(session_ctx); + return rc; } init_drpc_ctx(session_ctx, comm, listener_ctx->handler); - return session_ctx; + *drpc = session_ctx; + return 0; } static int diff --git a/src/common/tests/drpc_tests.c b/src/common/tests/drpc_tests.c index afa6d528a91..5ae15a4fbc0 100644 --- a/src/common/tests/drpc_tests.c +++ b/src/common/tests/drpc_tests.c @@ -434,17 +434,22 @@ test_drpc_listen_fails_if_listen_fails(void **state) static void test_drpc_accept_fails_with_null_ctx(void **state) { - assert_null(drpc_accept(NULL)); + int rc; + + rc = drpc_accept(NULL, NULL); + assert_rc_equal(rc, -DER_INVAL); } static void test_drpc_accept_fails_with_null_handler(void **state) { struct drpc *ctx = new_drpc_with_fd(15); + int rc; ctx->handler = NULL; - assert_null(drpc_accept(ctx)); + rc = drpc_accept(ctx, NULL); + assert_rc_equal(rc, -DER_INVAL); free_drpc(ctx); } @@ -452,10 +457,12 @@ test_drpc_accept_fails_with_null_handler(void **state) static void test_drpc_accept_success(void **state) { - struct drpc *ctx = new_drpc_with_fd(15); - struct drpc *session_ctx; + struct drpc *ctx = new_drpc_with_fd(15); + struct drpc *session_ctx; + int rc; - session_ctx = drpc_accept(ctx); + rc = drpc_accept(ctx, &session_ctx); + assert_rc_equal(rc, -DER_SUCCESS); /* got context back for the new accepted connection */ assert_non_null(session_ctx); @@ -479,10 +486,12 @@ static void test_drpc_accept_fails_if_accept_fails(void **state) { struct drpc *ctx = new_drpc_with_fd(15); + int rc; - accept_return = -1; + accept_return = -EIO; - assert_null(drpc_accept(ctx)); + rc = drpc_accept(ctx, NULL); + assert_rc_equal(rc, -DER_IO); free_drpc(ctx); } diff --git a/src/common/tests/test_mocks.c b/src/common/tests/test_mocks.c index 58020376402..a27de6447b7 100644 --- a/src/common/tests/test_mocks.c +++ b/src/common/tests/test_mocks.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -196,6 +196,10 @@ __wrap_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) accept_addr_ptr = addr; accept_addrlen_ptr = addrlen; accept_call_count++; + if (accept_return < 0) { + errno = -accept_return; + return -1; + } return accept_return; } diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 6dff9a62a91..78e9f73ab9b 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -331,8 +331,8 @@ get_metadata_times(struct rdb_tx *tx, struct cont *cont, bool update_otime, bool rc = 0; /* pool/container has old layout without metadata times */ goto out; } else if (rc != 0) { - D_ERROR(DF_CONT": rdb_tx_lookup co_md_times failed, "DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": rdb_tx_lookup co_md_times failed", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); goto out; } @@ -345,8 +345,8 @@ get_metadata_times(struct rdb_tx *tx, struct cont *cont, bool update_otime, bool rc = rdb_tx_update(tx, &cont->c_prop, &ds_cont_prop_co_md_times, &value); if (rc != 0) { - D_ERROR(DF_CONT": failed to update metadata times, "DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": failed to update metadata times", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); goto out; } } @@ -441,7 +441,7 @@ nhandles_ht_destroy(struct d_hash_table *nht) rc = d_hash_table_destroy_inplace(nht, true); if (rc) - D_ERROR("d_hash_table_destroy_inplace() failed, "DF_RC"\n", DP_RC(rc)); + DL_ERROR(rc, "d_hash_table_destroy_inplace() failed"); return rc; } @@ -477,8 +477,8 @@ get_nhandles(struct rdb_tx *tx, struct d_hash_table *nhc, struct cont *cont, enu d_iov_set(&value, &lookup_val, sizeof(lookup_val)); rc = rdb_tx_lookup(tx, &cont->c_prop, &ds_cont_prop_nhandles, &value); if (rc) { - D_ERROR(DF_CONT": rdb_tx_lookup nhandles failed, "DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": rdb_tx_lookup nhandles failed", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); goto out; } @@ -495,9 +495,8 @@ get_nhandles(struct rdb_tx *tx, struct d_hash_table *nhc, struct cont *cont, enu rc = d_hash_rec_insert(nhc, rec->nhr_cuuid, sizeof(uuid_t), &rec->nhr_hlink, true); if (rc != 0) { - D_ERROR(DF_CONT": error inserting into nhandles cache" DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), - DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": error inserting into nhandles cache", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); D_FREE(rec); goto out; } @@ -525,8 +524,8 @@ get_nhandles(struct rdb_tx *tx, struct d_hash_table *nhc, struct cont *cont, enu d_iov_set(&value, &result, sizeof(result)); rc = rdb_tx_update(tx, &cont->c_prop, &ds_cont_prop_nhandles, &value); if (rc != 0) { - D_ERROR(DF_CONT": rdb_tx_update nhandles failed, "DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": rdb_tx_update nhandles failed", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); goto out; } @@ -990,8 +989,7 @@ cont_prop_write(struct rdb_tx *tx, const rdb_path_t *kvs, daos_prop_t *prop, return -DER_INVAL; } if (rc) { - D_ERROR("Failed to update property=%d, "DF_RC"\n", - entry->dpe_type, DP_RC(rc)); + DL_ERROR(rc, "Failed to update property=%d", entry->dpe_type); break; } } @@ -1111,9 +1109,8 @@ cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, d_iov_set(&value, &ghce, sizeof(ghce)); rc = rdb_tx_update(tx, &kvs, &ds_cont_prop_ghce, &value); if (rc != 0) { - D_ERROR(DF_CONT": create ghce property failed: "DF_RC"\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, - in->cci_op.ci_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": create ghce property failed", + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid)); D_GOTO(out_kvs, rc); } @@ -1121,9 +1118,8 @@ cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, d_iov_set(&value, &alloced_oid, sizeof(alloced_oid)); rc = rdb_tx_update(tx, &kvs, &ds_cont_prop_alloced_oid, &value); if (rc != 0) { - D_ERROR(DF_CONT": create alloced_oid prop failed: "DF_RC"\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, - in->cci_op.ci_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": create alloced_oid prop failed", + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid)); D_GOTO(out_kvs, rc); } @@ -1136,9 +1132,8 @@ cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, d_iov_set(&value, &mdtimes, sizeof(mdtimes)); rc = rdb_tx_update(tx, &kvs, &ds_cont_prop_co_md_times, &value); if (rc != 0) { - D_ERROR(DF_CONT": create co_md_times failed: "DF_RC"\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid), - DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": create co_md_times failed", + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid)); D_GOTO(out_kvs, rc); } D_DEBUG(DB_MD, DF_CONT": set metadata times: open="DF_X64", modify="DF_X64"\n", @@ -1153,9 +1148,8 @@ cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, d_iov_set(&value, &nhandles, sizeof(nhandles)); rc = rdb_tx_update(tx, &kvs, &ds_cont_prop_nhandles, &value); if (rc != 0) { - D_ERROR(DF_CONT": create nhandles failed: "DF_RC"\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid), - DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": create nhandles failed", + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid)); goto out_kvs; } } @@ -1178,9 +1172,8 @@ cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, d_iov_set(&value, in->cci_op.ci_uuid, sizeof(uuid_t)); rc = rdb_tx_update(tx, &svc->cs_uuids, &key, &value); if (rc != 0) { - D_ERROR(DF_CONT": update cs_uuids failed: "DF_RC"\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, - in->cci_op.ci_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": update cs_uuids failed", + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid)); D_GOTO(out_kvs, rc); } D_DEBUG(DB_MD, DF_CONT": creating container, label: %s\n", @@ -1191,8 +1184,8 @@ cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, d_iov_set(&value, &nsnapshots, sizeof(nsnapshots)); rc = rdb_tx_update(tx, &kvs, &ds_cont_prop_nsnapshots, &value); if (rc != 0) { - D_ERROR(DF_CONT": failed to update nsnapshots, "DF_RC"\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": failed to update nsnapshots", + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid)); D_GOTO(out_kvs, rc); } attr.dsa_class = RDB_KVS_INTEGER; @@ -1405,9 +1398,8 @@ belongs_to_user(d_iov_t *key, struct find_hdls_by_cont_arg *arg) d_iov_set(&value, NULL, sizeof(struct container_hdl)); rc = rdb_tx_lookup(arg->fha_tx, &cont->c_svc->cs_hdls, key, &value); if (rc != 0) { - D_ERROR(DF_CONT": look up container handle "DF_UUIDF": "DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), DP_UUID(key->iov_buf), - DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": look up container handle " DF_UUIDF, + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), DP_UUID(key->iov_buf)); return rc; } hdl = value.iov_buf; @@ -2132,8 +2124,8 @@ check_hdl_compatibility(struct rdb_tx *tx, struct cont *cont, uint64_t flags) d_iov_set(&value, NULL, sizeof(struct container_hdl)); rc = rdb_tx_lookup(tx, &cont->c_svc->cs_hdls, &key, &value); if (rc != 0) { - D_ERROR(DF_CONT": look up first handle value: "DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": look up first handle value", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); return rc; } if (((struct container_hdl *)value.iov_buf)->ch_flags & DAOS_COO_EX) { @@ -2296,8 +2288,10 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, * have been upgraded to new layout yet. mdtimes will be zeros in that case. */ rc = get_metadata_times(tx, cont, update_otime, false /* update_mtime */, &mdtimes); - if (rc != 0) + if (rc != 0) { + daos_prop_free(prop); goto out; + } /* include metadata times in reply if client speaks the protocol */ if (mdtimes_in_reply && (opc_get(rpc->cr_opc) == CONT_OPEN)) { @@ -2319,8 +2313,8 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, cont->c_uuid, prop, true); daos_prop_free(prop); if (rc != 0) { - D_ERROR(DF_CONT": cont_iv_prop_update failed %d.\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), rc); + DL_ERROR(rc, DF_CONT ": cont_iv_prop_update failed", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); D_GOTO(out, rc); } @@ -2329,8 +2323,8 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, in->coi_op.ci_hdl, cont->c_uuid, in->coi_flags, sec_capas, stat_pm_ver); if (rc != 0) { - D_ERROR(DF_CONT": cont_iv_capability_update failed %d.\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), rc); + DL_ERROR(rc, DF_CONT ": cont_iv_capability_update failed", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); D_GOTO(out, rc); } cont_hdl_opened = true; @@ -2361,8 +2355,8 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, d_iov_set(&value, &snap_count, sizeof(snap_count)); rc = rdb_tx_lookup(tx, &cont->c_prop, &ds_cont_prop_nsnapshots, &value); if (rc != 0) { - D_ERROR(DF_CONT": failed to lookup nsnapshots, "DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": failed to lookup nsnapshots", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); goto out; } out->coo_snap_count = snap_count; @@ -3203,8 +3197,8 @@ cont_info_read(struct rdb_tx *tx, struct cont *cont, int cont_proto_ver, daos_co d_iov_set(&value, &snap_count, sizeof(snap_count)); rc = rdb_tx_lookup(tx, &cont->c_prop, &ds_cont_prop_nsnapshots, &value); if (rc != 0) { - D_ERROR(DF_CONT": failed to lookup nsnapshots, "DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": failed to lookup nsnapshots", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); return rc; } out.ci_nsnapshots = snap_count; @@ -3559,21 +3553,19 @@ check_set_prop_label(struct rdb_tx *tx, struct ds_pool *pool, struct cont *cont, rc = rdb_tx_lookup(tx, &cont->c_svc->cs_uuids, &key, &val); if (rc != -DER_NONEXIST) { if (rc != 0) { - D_ERROR(DF_UUID": lookup label (%s) failed: "DF_RC"\n", - DP_UUID(cont->c_uuid), in_lbl, DP_RC(rc)); + DL_ERROR(rc, DF_UUID ": lookup label (%s) failed", DP_UUID(cont->c_uuid), + in_lbl); return rc; /* other lookup failure */ } - D_ERROR(DF_UUID": non-unique label (%s) matches different " - "container "DF_UUID"\n", DP_UUID(cont->c_uuid), - in_lbl, DP_UUID(match_cuuid)); + DL_ERROR(rc, DF_UUID ": non-unique label (%s) matches different container " DF_UUID, + DP_UUID(cont->c_uuid), in_lbl, DP_UUID(match_cuuid)); return -DER_EXIST; } d_iov_set(&val, cont->c_uuid, sizeof(uuid_t)); rc = rdb_tx_update(tx, &cont->c_svc->cs_uuids, &key, &val); if (rc != 0) { - D_ERROR(DF_UUID": update cs_uuids failed: "DF_RC"\n", - DP_UUID(cont->c_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_UUID ": update cs_uuids failed", DP_UUID(cont->c_uuid)); return rc; } D_DEBUG(DB_MD, DF_UUID": inserted new label in cs_uuids KVS: %s\n", @@ -3588,15 +3580,15 @@ check_set_prop_label(struct rdb_tx *tx, struct ds_pool *pool, struct cont *cont, rc = rdb_tx_lookup(tx, &cont->c_svc->cs_uuids, &key, &val); if (rc != -DER_NONEXIST) { if (rc != 0) { - D_ERROR(DF_UUID": lookup label (%s) failed: "DF_RC"\n", - DP_UUID(cont->c_uuid), old_lbl, DP_RC(rc)); + DL_ERROR(rc, DF_UUID ": lookup label (%s) failed", DP_UUID(cont->c_uuid), + old_lbl); return rc; } d_iov_set(&val, NULL, 0); rc = rdb_tx_delete(tx, &cont->c_svc->cs_uuids, &key); if (rc != 0) { - D_ERROR(DF_UUID": delete label (%s) failed: "DF_RC"\n", - DP_UUID(cont->c_uuid), old_lbl, DP_RC(rc)); + DL_ERROR(rc, DF_UUID ": delete label (%s) failed", DP_UUID(cont->c_uuid), + old_lbl); return rc; } D_DEBUG(DB_MD, DF_UUID": deleted original label in cs_uuids KVS: %s\n", @@ -4196,9 +4188,9 @@ cont_filter_part_match(struct rdb_tx *tx, struct cont *cont, daos_pool_cont_filt } if (rc != 0) { - D_ERROR(DF_CONT": metadata lookup of %s failed, "DF_RC"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), - daos_pool_cont_filter_key_str(part->pcfp_key), DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": metadata lookup of %s failed", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), + daos_pool_cont_filter_key_str(part->pcfp_key)); goto out; } @@ -4521,8 +4513,8 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_cont_obj_version, &value); if (rc) { - D_ERROR("failed to upgrade container obj version pool/cont: "DF_CONTF"\n", - DP_CONT(ap->pool_uuid, cont_uuid)); + DL_ERROR(rc, "failed to upgrade container obj version pool/cont: " DF_CONTF, + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } upgraded = true; @@ -4550,9 +4542,9 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) } if (global_ver > DAOS_POOL_GLOBAL_VERSION) { - D_ERROR("Downgrading pool/cont: "DF_CONTF" not supported\n", - DP_CONT(ap->pool_uuid, cont_uuid)); rc = -DER_NOTSUPPORTED; + DL_ERROR(rc, "Downgrading pool/cont: " DF_CONTF " not supported", + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } @@ -4578,8 +4570,8 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_cont_global_version, &value); if (rc) { - D_ERROR("failed to upgrade container global version pool/cont: "DF_CONTF"\n", - DP_CONT(ap->pool_uuid, cont_uuid)); + DL_ERROR(rc, "failed to upgrade container global version pool/cont: " DF_CONTF, + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } @@ -4604,11 +4596,10 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) entry->dpe_flags &= ~DAOS_PROP_ENTRY_NOT_SET; obj_ver = DS_POOL_OBJ_VERSION; d_iov_set(&value, &obj_ver, sizeof(obj_ver)); - rc = rdb_tx_update(ap->tx, &cont->c_prop, - &ds_cont_prop_cont_obj_version, &value); + rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_cont_obj_version, &value); if (rc) { - D_ERROR("failed to upgrade container obj version pool/cont: "DF_CONTF"\n", - DP_CONT(ap->pool_uuid, cont_uuid)); + DL_ERROR(rc, "failed to upgrade container obj version pool/cont: " DF_CONTF, + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } @@ -4621,11 +4612,10 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) goto out; if (rc == -DER_NONEXIST) { pda = DAOS_PROP_PO_EC_PDA_DEFAULT; - rc = rdb_tx_update(ap->tx, &cont->c_prop, - &ds_cont_prop_ec_pda, &value); + rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_ec_pda, &value); if (rc) { - D_ERROR("failed to upgrade container ec_pda pool/cont: "DF_CONTF"\n", - DP_CONT(ap->pool_uuid, cont_uuid)); + DL_ERROR(rc, "failed to upgrade container ec_pda pool/cont: " DF_CONTF, + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } upgraded = true; @@ -4642,11 +4632,10 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) goto out; if (rc == -DER_NONEXIST) { pda = DAOS_PROP_PO_RP_PDA_DEFAULT; - rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_rp_pda, - &value); + rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_rp_pda, &value); if (rc) { - D_ERROR("failed to upgrade container rp_pda pool/cont: "DF_CONTF"\n", - DP_CONT(ap->pool_uuid, cont_uuid)); + DL_ERROR(rc, "failed to upgrade container rp_pda pool/cont: " DF_CONTF, + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } upgraded = true; @@ -4665,8 +4654,8 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) perf_domain = DAOS_PROP_CO_PERF_DOMAIN_DEFAULT; rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_perf_domain, &value); if (rc) { - D_ERROR("failed to upgrade container perf_domain pool/cont: "DF_CONTF"\n", - DP_CONT(ap->pool_uuid, cont_uuid)); + DL_ERROR(rc, "failed to upgrade container perf_domain pool/cont: " DF_CONTF, + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } upgraded = true; @@ -4684,8 +4673,9 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) pda = 0; rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_scrubber_disabled, &value); if (rc) { - D_ERROR("failed to upgrade container scrubbing disabled prop: "DF_CONTF"\n", - DP_CONT(ap->pool_uuid, cont_uuid)); + DL_ERROR(rc, + "failed to upgrade container scrubbing disabled prop: " DF_CONTF, + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } upgraded = true; @@ -4699,8 +4689,8 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) if (rc == -DER_NONEXIST) { rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_nhandles, &value); if (rc) { - D_ERROR("failed to upgrade container nhandles pool/cont: "DF_CONTF - ", "DF_RC"\n", DP_CONT(ap->pool_uuid, cont_uuid), DP_RC(rc)); + DL_ERROR(rc, "failed to upgrade container nhandles pool/cont: " DF_CONTF, + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } upgraded = true; @@ -4727,8 +4717,8 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) mdtimes.mtime = d_hlc_get(); rc = rdb_tx_update(ap->tx, &cont->c_prop, &ds_cont_prop_co_md_times, &value); if (rc) { - D_ERROR("failed to upgrade container co_md_times/cont: "DF_CONTF"\n", - DP_CONT(ap->pool_uuid, cont_uuid)); + DL_ERROR(rc, "failed to upgrade container co_md_times/cont: " DF_CONTF, + DP_CONT(ap->pool_uuid, cont_uuid)); goto out; } upgraded = true; @@ -5067,18 +5057,15 @@ cont_op_with_cont(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, rc = rdb_tx_lookup(tx, &cont->c_svc->cs_hdls, &key, &value); if (rc != 0) { if (rc == -DER_NONEXIST) { - D_ERROR(DF_CONT": rejecting unauthorized " - "operation: "DF_UUID"\n", - DP_CONT(cont->c_svc->cs_pool_uuid, - cont->c_uuid), - DP_UUID(in->ci_hdl)); rc = -DER_NO_HDL; + DL_ERROR(rc, DF_CONT ": rejecting unauthorized operation: " DF_UUID, + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), + DP_UUID(in->ci_hdl)); } else { - D_ERROR(DF_CONT": failed to look up container " - "handle "DF_UUID": %d\n", - DP_CONT(cont->c_svc->cs_pool_uuid, - cont->c_uuid), - DP_UUID(in->ci_hdl), rc); + DL_ERROR(rc, + DF_CONT ": failed to look up container handle " DF_UUID, + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), + DP_UUID(in->ci_hdl)); } goto out; } @@ -5390,8 +5377,8 @@ ds_cont_oid_fetch_add(uuid_t po_uuid, uuid_t co_uuid, uint64_t num_oids, uint64_ d_iov_set(&value, &alloced_oid, sizeof(alloced_oid)); rc = rdb_tx_lookup(&tx, &cont->c_prop, &ds_cont_prop_alloced_oid, &value); if (rc != 0) { - D_ERROR(DF_CONT": failed to lookup alloced_oid: %d\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), rc); + DL_ERROR(rc, DF_CONT ": failed to lookup alloced_oid", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); D_GOTO(out_cont, rc); } @@ -5403,8 +5390,8 @@ ds_cont_oid_fetch_add(uuid_t po_uuid, uuid_t co_uuid, uint64_t num_oids, uint64_ /* Update the max OID */ rc = rdb_tx_update(&tx, &cont->c_prop, &ds_cont_prop_alloced_oid, &value); if (rc != 0) { - D_ERROR(DF_CONT": failed to update alloced_oid: %d\n", - DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), rc); + DL_ERROR(rc, DF_CONT ": failed to update alloced_oid", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); D_GOTO(out_cont, rc); } diff --git a/src/dtx/dtx_rpc.c b/src/dtx/dtx_rpc.c index 1edc9e63220..73a307ee954 100644 --- a/src/dtx/dtx_rpc.c +++ b/src/dtx/dtx_rpc.c @@ -338,11 +338,11 @@ dtx_req_list_cb(void **args) drr = args[0]; D_CDEBUG(dra->dra_result < 0 && dra->dra_result != -DER_NONEXIST && - dra->dra_result != -DER_INPROGRESS, DLOG_ERR, DB_TRACE, - "DTX req for opc %x ("DF_DTI") %s, count %d: %d.\n", - dra->dra_opc, DP_DTI(&drr->drr_dti[0]), - dra->dra_result < 0 ? "failed" : "succeed", - dra->dra_length, dra->dra_result); + dra->dra_result != -DER_INPROGRESS, + DLOG_ERR, DB_TRACE, + "DTX req for opc %x (" DF_DTI ") %s, count %d: " DF_RC "\n", dra->dra_opc, + DP_DTI(&drr->drr_dti[0]), dra->dra_result < 0 ? "failed" : "succeed", + dra->dra_length, DP_RC(dra->dra_result)); } } diff --git a/src/engine/drpc_progress.c b/src/engine/drpc_progress.c index dd1af3b6512..cbeae15ea80 100644 --- a/src/engine/drpc_progress.c +++ b/src/engine/drpc_progress.c @@ -193,24 +193,23 @@ drpc_progress_context_is_valid(struct drpc_progress_context *ctx) static int drpc_progress_context_accept(struct drpc_progress_context *ctx) { - struct drpc *session; - struct drpc_list *session_node; - - session = drpc_accept(ctx->listener_ctx); - if (session == NULL) { - /* - * Any failure to accept is weird and surprising - */ - D_ERROR("Failed to accept new drpc connection\n"); - return -DER_MISC; - } + struct drpc *session; + struct drpc_list *session_node; + int rc; D_ALLOC_PTR(session_node); if (session_node == NULL) { - D_FREE(session); return -DER_NOMEM; } + rc = drpc_accept(ctx->listener_ctx, &session); + if (rc != -DER_SUCCESS) { + /* Any failure to accept is weird and surprising */ + DL_ERROR(rc, "Failed to accept new drpc connection"); + D_FREE(session_node); + return rc; + } + session_node->ctx = session; d_list_add(&session_node->link, &ctx->session_ctx_list); diff --git a/src/engine/tests/drpc_progress_tests.c b/src/engine/tests/drpc_progress_tests.c index 02aab309c13..48eb7c2aa35 100644 --- a/src/engine/tests/drpc_progress_tests.c +++ b/src/engine/tests/drpc_progress_tests.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -369,10 +369,10 @@ test_drpc_progress_listener_accept_failed(void **state) init_drpc_progress_context(&ctx, new_drpc_with_fd(15)); poll_revents_return[0] = POLLIN; - accept_return = -1; + accept_return = -EIO; /* No clear reason why accept would fail if we got data on it */ - assert_rc_equal(drpc_progress(&ctx, 100), -DER_MISC); + assert_rc_equal(drpc_progress(&ctx, 100), -DER_IO); cleanup_drpc_progress_context(&ctx); } diff --git a/src/include/daos/common.h b/src/include/daos/common.h index ed8dca26512..557655de4eb 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -612,6 +612,7 @@ daos_der2errno(int err) case -DER_UNREACH: return EHOSTUNREACH; case -DER_NOSPACE: return ENOSPC; case -DER_ALREADY: return EALREADY; + case -DER_DOS: case -DER_NOMEM: return ENOMEM; case -DER_TIMEDOUT: return ETIMEDOUT; case -DER_BUSY: diff --git a/src/include/daos/drpc.h b/src/include/daos/drpc.h index 3b11b40f23c..febaf7fdeae 100644 --- a/src/include/daos/drpc.h +++ b/src/include/daos/drpc.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2018-2021 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -78,7 +78,8 @@ int drpc_call(struct drpc *ctx, int flags, Drpc__Call *msg, int drpc_connect(char *sockaddr, struct drpc **); struct drpc *drpc_listen(char *sockaddr, drpc_handler_t handler); bool drpc_is_valid_listener(struct drpc *ctx); -struct drpc *drpc_accept(struct drpc *listener_ctx); +int + drpc_accept(struct drpc *listener_ctx, struct drpc **drpc); int drpc_recv_call(struct drpc *ctx, Drpc__Call **call); int drpc_send_response(struct drpc *ctx, Drpc__Response *resp); int drpc_close(struct drpc *ctx); diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index f69d04b6fa7..3c223a6bd42 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -1802,15 +1802,14 @@ pool_svc_map_dist_cb(struct ds_rsvc *rsvc) ABT_rwlock_unlock(svc->ps_lock); rdb_tx_end(&tx); if (rc != 0) { - D_ERROR(DF_UUID": failed to read pool map buffer: %d\n", - DP_UUID(svc->ps_uuid), rc); + DL_ERROR(rc, DF_UUID ": failed to read pool map buffer", DP_UUID(svc->ps_uuid)); goto out; } rc = ds_pool_iv_map_update(svc->ps_pool, map_buf, map_version); if (rc != 0) { - D_ERROR(DF_UUID": failed to distribute pool map %u: %d\n", - DP_UUID(svc->ps_uuid), map_version, rc); + DL_ERROR(rc, DF_UUID ": failed to distribute pool map %u", DP_UUID(svc->ps_uuid), + map_version); D_GOTO(out, rc); } svc->ps_global_map_version = max(svc->ps_global_map_version, map_version); @@ -6697,8 +6696,7 @@ ds_pool_svc_check_evict(uuid_t pool_uuid, d_rank_list_t *ranks, rc = pool_req_create(info->dmi_ctx, &ep, POOL_EVICT, &rpc); if (rc != 0) { - D_ERROR(DF_UUID": failed to create pool evict rpc: %d\n", - DP_UUID(pool_uuid), rc); + DL_ERROR(rc, DF_UUID ": failed to create pool evict rpc", DP_UUID(pool_uuid)); D_GOTO(out_client, rc); } @@ -6728,8 +6726,7 @@ ds_pool_svc_check_evict(uuid_t pool_uuid, d_rank_list_t *ranks, rc = out->pvo_op.po_rc; if (rc != 0) - D_ERROR(DF_UUID ": pool destroy failed to evict handles, rc: " DF_RC "\n", - DP_UUID(pool_uuid), DP_RC(rc)); + DL_ERROR(rc, DF_UUID ": pool destroy failed to evict handles", DP_UUID(pool_uuid)); if (count) *count = out->pvo_n_hdls_evicted; diff --git a/src/rdb/rdb_tx.c b/src/rdb/rdb_tx.c index 3cf81d45dc3..6b9ceba8193 100644 --- a/src/rdb/rdb_tx.c +++ b/src/rdb/rdb_tx.c @@ -792,8 +792,7 @@ rdb_tx_apply_delete(struct rdb *db, uint64_t index, rdb_oid_t kvs, rc = rdb_lc_punch(db->d_lc, index, kvs, 1 /* n */, key); if (rc != 0) - D_ERROR(DF_DB": failed to update KVS "DF_X64": %d\n", DP_DB(db), - kvs, rc); + DL_ERROR(rc, DF_DB ": failed to update KVS " DF_X64, DP_DB(db), kvs); return rc; } diff --git a/src/security/srv_acl.c b/src/security/srv_acl.c index 67894f41c21..43692cd066e 100644 --- a/src/security/srv_acl.c +++ b/src/security/srv_acl.c @@ -474,27 +474,27 @@ ds_sec_pool_get_capabilities(uint64_t flags, d_iov_t *cred, } if (!is_ownership_valid(ownership)) { - D_ERROR("Invalid ownership\n"); - return -DER_INVAL; + rc = -DER_INVAL; + DL_ERROR(rc, "Invalid ownership"); + return rc; } /* Pool flags are mutually exclusive */ - if ((flags != DAOS_PC_RO) && (flags != DAOS_PC_RW) && - (flags != DAOS_PC_EX)) { - D_ERROR("Invalid flags\n"); - return -DER_INVAL; + if ((flags != DAOS_PC_RO) && (flags != DAOS_PC_RW) && (flags != DAOS_PC_EX)) { + rc = -DER_INVAL; + DL_ERROR(rc, "Invalid flags"); + return rc; } rc = daos_acl_validate(acl); if (rc != -DER_SUCCESS) { - D_ERROR("Invalid ACL: " DF_RC "\n", DP_RC(rc)); + DL_ERROR(rc, "Invalid ACL"); return rc; } rc = ds_sec_validate_credentials(cred, &token); if (rc != 0) { - D_ERROR("Failed to validate credentials, rc="DF_RC"\n", - DP_RC(rc)); + DL_ERROR(rc, "Failed to validate credentials"); return rc; } @@ -557,35 +557,41 @@ filter_cont_capas_based_on_flags(uint64_t flags, uint64_t *capas) *capas &= ~(uint64_t)CONT_CAPA_EVICT_ALL; } -static Auth__Token * -unpack_token_from_cred(d_iov_t *cred) +static int +unpack_token_from_cred(d_iov_t *cred, Auth__Token **_token) { - struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); - Auth__Credential *unpacked; - Auth__Token *token = NULL; + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Auth__Credential *unpacked; + Auth__Token *token = NULL; - unpacked = auth__credential__unpack(&alloc.alloc, cred->iov_buf_len, - cred->iov_buf); - if (alloc.oom || unpacked == NULL) { - D_ERROR("Couldn't unpack credential\n"); - return NULL; + unpacked = auth__credential__unpack(&alloc.alloc, cred->iov_buf_len, cred->iov_buf); + if (alloc.oom) + return -DER_NOMEM; + + if (unpacked == NULL) { + DL_ERROR(-DER_INVAL, "Couldn't unpack credential"); + return -DER_INVAL; } - if (unpacked->token != NULL) + if (unpacked->token != NULL) { token = auth_token_dup(unpacked->token); + if (token == NULL) + return -DER_NOMEM; + } auth__credential__free_unpacked(unpacked, &alloc.alloc); - return token; + *_token = token; + return 0; } int ds_sec_cont_get_capabilities(uint64_t flags, d_iov_t *cred, struct d_ownership *ownership, struct daos_acl *acl, uint64_t *capas) { - struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); - Auth__Token *token; - int rc; - uint64_t owner_min_perms = CONT_OWNER_MIN_PERMS; + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + Auth__Token *token; + int rc; + uint64_t owner_min_perms = CONT_OWNER_MIN_PERMS; if (cred == NULL || ownership == NULL || acl == NULL || capas == NULL) { D_ERROR("NULL input\n"); @@ -602,9 +608,10 @@ ds_sec_cont_get_capabilities(uint64_t flags, d_iov_t *cred, struct d_ownership * return -DER_INVAL; } - if (daos_acl_validate(acl) != 0) { - D_ERROR("Invalid ACL\n"); - return -DER_INVAL; + rc = daos_acl_validate(acl); + if (rc != -DER_SUCCESS) { + DL_ERROR(rc, "Invalid ACL"); + return rc; } if (cred->iov_buf == NULL) { @@ -612,10 +619,11 @@ ds_sec_cont_get_capabilities(uint64_t flags, d_iov_t *cred, struct d_ownership * return -DER_INVAL; } - /* - * The credential has already been validated at pool connect. - */ - token = unpack_token_from_cred(cred); + rc = unpack_token_from_cred(cred, &token); + if (rc != -DER_SUCCESS) + return rc; + + /* The credential has already been validated at pool connect. */ if (token == NULL) return -DER_INVAL; @@ -783,17 +791,13 @@ ds_sec_creds_are_same_user(d_iov_t *cred_x, d_iov_t *cred_y) goto out; } - token_x = unpack_token_from_cred(cred_x); - if (token_x == NULL) { - rc = -DER_INVAL; + rc = unpack_token_from_cred(cred_x, &token_x); + if (rc != -DER_SUCCESS) goto out; - } - token_y = unpack_token_from_cred(cred_y); - if (token_y == NULL) { - rc = -DER_INVAL; + rc = unpack_token_from_cred(cred_y, &token_y); + if (rc != -DER_SUCCESS) goto out_token_x; - } rc = get_auth_sys_payload(token_x, &authsys_x); if (rc != 0) From 31922884e9d70545b27e592cca0488c0ef342c74 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Wed, 30 Aug 2023 07:56:23 +0100 Subject: [PATCH 15/80] DAOS-14155 gurt: add macros for logging of system error codes. (#12913) Add DS and DHS macros for logging system error codes which call strerror properly. Signed-off-by: Ashley Pittman --- src/cart/crt_init.c | 15 ++++++-------- src/cart/crt_swim.h | 4 ++-- src/client/dfuse/dfuse_cont.c | 4 ++-- src/client/dfuse/dfuse_core.c | 20 +++++++++---------- src/client/dfuse/dfuse_main.c | 6 ++---- src/include/gurt/debug.h | 17 ++++++++++++++++ src/tests/ftest/cart/util/cart_logtest.py | 2 ++ utils/cq/d_logging_check.py | 24 +++++++++++++++++++++-- 8 files changed, 63 insertions(+), 29 deletions(-) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index e6231f8509f..7fa5491a250 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -117,24 +117,21 @@ mem_pin_workaround(void) /* Disable fastbins; this option is not available on all systems */ rc = mallopt(M_MXFAST, 0); if (rc != 1) - D_WARN("Failed to disable malloc fastbins: %d (%s)\n", errno, strerror(errno)); + DS_WARN(errno, "Failed to disable malloc fastbins"); rc = getrlimit(RLIMIT_MEMLOCK, &rlim); if (rc != 0) { - D_WARN("getrlimit() failed; errno=%d (%s)\n", - errno, strerror(errno)); + DS_WARN(errno, "getrlimit() failed"); goto exit; } - if (rlim.rlim_cur == RLIM_INFINITY && - rlim.rlim_max == RLIM_INFINITY) { + if (rlim.rlim_cur == RLIM_INFINITY && rlim.rlim_max == RLIM_INFINITY) { D_INFO("Infinite rlimit detected; performing mlockall()\n"); /* Lock all pages */ rc = mlockall(MCL_CURRENT | MCL_FUTURE); if (rc) - D_WARN("Failed to mlockall(); errno=%d (%s)\n", - errno, strerror(errno)); + DS_WARN(errno, "mlockall() failed"); } else { D_INFO("mlockall() skipped\n"); @@ -1068,7 +1065,7 @@ crt_na_fill_ip_addr(struct crt_na_config *na_cfg) rc = getifaddrs(&if_addrs); if (rc != 0) { - D_ERROR("cannot getifaddrs, errno: %d(%s).\n", errno, strerror(errno)); + DS_ERROR(errno, "getifaddrs() failed"); D_GOTO(out, rc = -DER_PROTO); } @@ -1085,7 +1082,7 @@ crt_na_fill_ip_addr(struct crt_na_config *na_cfg) tmp_ptr = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; ip_str = inet_ntop(AF_INET, tmp_ptr, na_cfg->noc_ip_str, INET_ADDRSTRLEN); if (ip_str == NULL) { - D_ERROR("inet_ntop errno: %d(%s).\n", errno, strerror(errno)); + DS_ERROR(errno, "inet_ntop() failed"); freeifaddrs(if_addrs); D_GOTO(out, rc = -DER_PROTO); } diff --git a/src/cart/crt_swim.h b/src/cart/crt_swim.h index 5c63de1c8eb..ecf04e1a406 100644 --- a/src/cart/crt_swim.h +++ b/src/cart/crt_swim.h @@ -60,7 +60,7 @@ crt_swim_csm_lock(struct crt_swim_membs *csm) rc = D_SPIN_LOCK(&csm->csm_lock); if (rc != 0) - D_ERROR("D_SPIN_LOCK(): %s\n", strerror(rc)); + DS_ERROR(rc, "D_SPIN_LOCK()"); } static inline void @@ -70,7 +70,7 @@ crt_swim_csm_unlock(struct crt_swim_membs *csm) rc = D_SPIN_UNLOCK(&csm->csm_lock); if (rc != 0) - D_ERROR("D_SPIN_UNLOCK(): %s\n", strerror(rc)); + DS_ERROR(rc, "D_SPIN_UNLOCK()"); } static inline uint32_t diff --git a/src/client/dfuse/dfuse_cont.c b/src/client/dfuse/dfuse_cont.c index 05a04f3cbab..bb34bfecc4d 100644 --- a/src/client/dfuse/dfuse_cont.c +++ b/src/client/dfuse/dfuse_cont.c @@ -53,7 +53,7 @@ dfuse_cont_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char * /* Update the stat information, but copy in the inode value afterwards. */ rc = dfs_ostat(ie->ie_dfs->dfs_ns, ie->ie_obj, &entry.attr); if (rc) { - DFUSE_TRA_ERROR(ie, "dfs_ostat() failed: (%s)", strerror(rc)); + DHS_ERROR(ie, rc, "dfs_ostat() failed"); D_GOTO(decref, rc); } @@ -77,7 +77,7 @@ dfuse_cont_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char * rc = dfs_lookup(dfc->dfs_ns, "/", O_RDWR, &ie->ie_obj, NULL, &ie->ie_stat); if (rc) { - DFUSE_TRA_ERROR(ie, "dfs_lookup() failed: (%s)", strerror(rc)); + DHS_ERROR(ie, rc, "dfs_lookup() failed"); D_GOTO(close, rc); } diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index f49612c5e4b..c3ebda252e9 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -358,13 +358,13 @@ _ch_free(struct dfuse_info *dfuse_info, struct dfuse_cont *dfc) rc = dfs_umount(dfc->dfs_ns); if (rc != 0) - DFUSE_TRA_ERROR(dfc, "dfs_umount() failed: %d (%s)", rc, strerror(rc)); + DHS_ERROR(dfc, rc, "dfs_umount() failed"); rc = daos_cont_close(dfc->dfs_coh, NULL); if (rc == -DER_NOMEM) rc = daos_cont_close(dfc->dfs_coh, NULL); if (rc != 0) - DFUSE_TRA_ERROR(dfc, "daos_cont_close() failed, " DF_RC, DP_RC(rc)); + DHL_ERROR(dfc, rc, "daos_cont_close() failed"); } atomic_fetch_sub_relaxed(&dfuse_info->di_container_count, 1); @@ -716,7 +716,7 @@ dfuse_cont_open_by_label(struct dfuse_info *dfuse_info, struct dfuse_pool *dfp, rc = dfs_mount(dfp->dfp_poh, dfc->dfs_coh, dfs_flags, &dfc->dfs_ns); if (rc) { - DFUSE_TRA_ERROR(dfc, "dfs_mount() failed: %d (%s)", rc, strerror(rc)); + DHS_ERROR(dfc, rc, "dfs_mount failed"); D_GOTO(err_close, rc); } @@ -839,7 +839,7 @@ dfuse_cont_open(struct dfuse_info *dfuse_info, struct dfuse_pool *dfp, uuid_t *c } rc = dfs_mount(dfp->dfp_poh, dfc->dfs_coh, dfs_flags, &dfc->dfs_ns); if (rc) { - DFUSE_TRA_ERROR(dfc, "dfs_mount() failed: %d (%s)", rc, strerror(rc)); + DHS_ERROR(dfc, rc, "dfs mount() failed"); D_GOTO(err_close, rc); } @@ -1140,7 +1140,7 @@ dfuse_ie_close(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie) if (ie->ie_obj) { rc = dfs_release(ie->ie_obj); if (rc) - DFUSE_TRA_ERROR(ie, "dfs_release() failed: %d (%s)", rc, strerror(rc)); + DHS_ERROR(ie, rc, "dfs_release() failed"); } if (ie->ie_root) { @@ -1290,7 +1290,7 @@ dfuse_fs_start(struct dfuse_info *dfuse_info, struct dfuse_cont *dfs) if (dfs->dfs_ops == &dfuse_dfs_ops) { rc = dfs_lookup(dfs->dfs_ns, "/", O_RDWR, &ie->ie_obj, NULL, &ie->ie_stat); if (rc) { - DFUSE_TRA_ERROR(ie, "dfs_lookup() failed: %d (%s)", rc, strerror(rc)); + DHS_ERROR(ie, rc, "dfs_lookup() failed"); D_GOTO(err_ie, rc = daos_errno2der(rc)); } } else { @@ -1377,11 +1377,11 @@ ino_flush(d_list_t *rlink, void *arg) rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, ie->ie_parent, ie->ie_name, strlen(ie->ie_name)); if (rc != 0 && rc != -EBADF) - DFUSE_TRA_WARNING(ie, "%#lx %#lx " DF_DE ": %d %s", ie->ie_parent, - ie->ie_stat.st_ino, DP_DE(ie->ie_name), rc, strerror(-rc)); + DHS_WARN(ie, -rc, "%#lx %#lx " DF_DE, ie->ie_parent, ie->ie_stat.st_ino, + DP_DE(ie->ie_name)); else - DFUSE_TRA_INFO(ie, "%#lx %#lx " DF_DE ": %d %s", ie->ie_parent, ie->ie_stat.st_ino, - DP_DE(ie->ie_name), rc, strerror(-rc)); + DHS_INFO(ie, -rc, "%#lx %#lx " DF_DE, ie->ie_parent, ie->ie_stat.st_ino, + DP_DE(ie->ie_name)); /* If the FUSE connection is dead then do not traverse further, it * doesn't matter what gets returned here, as long as it's negative diff --git a/src/client/dfuse/dfuse_main.c b/src/client/dfuse/dfuse_main.c index 8da5e9356a4..27a0e0be092 100644 --- a/src/client/dfuse/dfuse_main.c +++ b/src/client/dfuse/dfuse_main.c @@ -202,8 +202,7 @@ dfuse_launch_fuse(struct dfuse_info *dfuse_info, struct fuse_args *args) else rc = fuse_session_loop(dfuse_info->di_session); if (rc != 0) - DFUSE_TRA_ERROR(dfuse_info, - "Fuse loop exited with return code: %d (%s)", rc, strerror(rc)); + DHS_ERROR(dfuse_info, rc, "Fuse loop exited"); fuse_session_unmount(dfuse_info->di_session); @@ -584,8 +583,7 @@ main(int argc, char **argv) } rc = duns_resolve_path(path, &path_attr); - DFUSE_TRA_INFO(dfuse_info, "duns_resolve_path() on path: %d (%s)", rc, - strerror(rc)); + DHS_INFO(dfuse_info, rc, "duns_resolve_path() on path"); if (rc == ENOENT) { printf("Attr path does not exist\n"); D_GOTO(out_daos, rc = daos_errno2der(rc)); diff --git a/src/include/gurt/debug.h b/src/include/gurt/debug.h index f83914242f6..1dbe4cc9b3d 100644 --- a/src/include/gurt/debug.h +++ b/src/include/gurt/debug.h @@ -223,6 +223,23 @@ extern void (*d_alt_assert)(const int, const char*, const char*, const int); #define DL_WARN(_rc, _fmt, ...) D_DEBUG(DLOG_WARN, _fmt ": " DF_RC "\n", ##__VA_ARGS__, DP_RC(_rc)) #define DL_ERROR(_rc, _fmt, ...) D_DEBUG(DLOG_ERR, _fmt ": " DF_RC "\n", ##__VA_ARGS__, DP_RC(_rc)) +#define DHS_INFO(_desc, _rc, _fmt, ...) \ + _D_DEBUG(_D_TRACE_NOCHECK, DLOG_INFO, (_desc), _fmt ": %d (%s)\n", ##__VA_ARGS__, _rc, \ + strerror(_rc)) +#define DHS_WARN(_desc, _rc, _fmt, ...) \ + _D_DEBUG(_D_TRACE_NOCHECK, DLOG_WARN, (_desc), _fmt ": %d (%s)\n", ##__VA_ARGS__, _rc, \ + strerror(_rc)) +#define DHS_ERROR(_desc, _rc, _fmt, ...) \ + _D_DEBUG(_D_TRACE_NOCHECK, DLOG_ERR, (_desc), _fmt ": %d (%s)\n", ##__VA_ARGS__, _rc, \ + strerror(_rc)) + +#define DS_INFO(_rc, _fmt, ...) \ + D_DEBUG(DLOG_INFO, _fmt ": %d (%s)\n", ##__VA_ARGS__, _rc, strerror(_rc)) +#define DS_WARN(_rc, _fmt, ...) \ + D_DEBUG(DLOG_WARN, _fmt ": %d (%s)\n", ##__VA_ARGS__, _rc, strerror(_rc)) +#define DS_ERROR(_rc, _fmt, ...) \ + D_DEBUG(DLOG_ERR, _fmt ": %d (%s)\n", ##__VA_ARGS__, _rc, strerror(_rc)) + #ifdef D_USE_GURT_FAC D_FOREACH_GURT_FAC(D_LOG_DECLARE_FAC, D_NOOP) #endif /* D_USE_GURT_FAC */ diff --git a/src/tests/ftest/cart/util/cart_logtest.py b/src/tests/ftest/cart/util/cart_logtest.py index 159af559983..1266b63dcff 100755 --- a/src/tests/ftest/cart/util/cart_logtest.py +++ b/src/tests/ftest/cart/util/cart_logtest.py @@ -368,6 +368,8 @@ def _check_pid_from_log_file(self, pid, abort_on_warning, leak_wf, show_memleaks if 'DER_UNKNOWN' in msg: show_line(line, 'NORMAL', 'Use of DER_UNKNOWN') + if 'Unknown error' in msg: + show_line(line, 'NORMAL', 'Invalid strerror value') # Warn if a line references the name of the function it was in, # but skip short function names or _internal suffixes. if line.function in msg and len(line.function) > 6 and \ diff --git a/utils/cq/d_logging_check.py b/utils/cq/d_logging_check.py index 3709e64562a..dd8ed763e9b 100755 --- a/utils/cq/d_logging_check.py +++ b/utils/cq/d_logging_check.py @@ -132,9 +132,11 @@ def __next__(self): # Logging macros where a new-line is always added. PREFIXES_NNL = ['DFUSE_LOG_WARNING', 'DFUSE_LOG_ERROR', 'DFUSE_LOG_DEBUG', 'DFUSE_LOG_INFO', 'DFUSE_TRA_WARNING', 'DFUSE_TRA_ERROR', 'DFUSE_TRA_DEBUG', 'DFUSE_TRA_INFO', - 'DH_PERROR_SYS', 'DH_PERROR_DER', 'DL_CDEBUG', - 'DL_ERROR', 'DHL_ERROR', 'DHL_WARN', 'DL_WARN', 'DL_INFO', 'DHL_INFO'] + 'DH_PERROR_SYS', 'DH_PERROR_DER', 'DL_CDEBUG'] +for prefix in ['DL', 'DHL', 'DS', 'DHS']: + for suffix in ['ERROR', 'WARN', 'INFO']: + PREFIXES_NNL.append(f'{prefix}_{suffix}') PREFIXES_ALL = PREFIXES.copy() PREFIXES_ALL.extend(PREFIXES_NNL) @@ -183,6 +185,7 @@ def run_all_checks(self): self.check_df_rc(line) self.remove_trailing_period(line) self.check_quote(line) + self.check_failed(line) line.write(self._output) if line.modified: @@ -369,6 +372,23 @@ def remove_trailing_period(self, line): if new_code != code: line.correct(new_code) + def check_failed(self, line): + """Check for 'Failed' with uppercase F + + Lots of message are of the form 'function() failed' but some use Failed. + """ + code = line.raw() + + if 'Failed' not in code: + return + if '"Failed' in code: + return + if 'Failed to' in code: + return + + print(code) + line.note('Failed') + def one_entry(fname): """Process one path entry From 7907104fb29e33b61314faa31f9cbb72324db2e0 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Wed, 30 Aug 2023 10:11:21 -0500 Subject: [PATCH 16/80] DAOS-14247 build: update to mercury 2.3.1rc1 (#12973) Signed-off-by: Jerome Soumagne --- utils/build.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/build.config b/utils/build.config index 25c3e092961..3f695ac38cc 100644 --- a/utils/build.config +++ b/utils/build.config @@ -8,7 +8,7 @@ ISAL = v2.30.0 ISAL_CRYPTO = v2.23.0 SPDK = v22.01.2 OFI = v1.18.1 -MERCURY = v2.3.0 +MERCURY = v2.3.1rc1 PROTOBUFC = v1.3.3 UCX=v1.14.1 From 6c1a8f84a5b7e38205212f4e09f95abb1504f02a Mon Sep 17 00:00:00 2001 From: Li Wei Date: Thu, 31 Aug 2023 09:21:32 +0900 Subject: [PATCH 17/80] DAOS-14027 cart: Revise default SWIM parameters (#12928) It seems that a less-than-1-second ib link status change may cause a 17-second RPC outage at the daos level. Although we do not know if this is fundamentally the case for all networks DAOS supports, try increasing the default SWIM suspicion timeout to 20 s to be more conservative. Signed-off-by: Li Wei --- src/cart/swim/swim_internal.h | 4 ++-- src/tests/ftest/cart/rpc/swim_notification.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cart/swim/swim_internal.h b/src/cart/swim/swim_internal.h index 25e02b5de1a..3a292c33f13 100644 --- a/src/cart/swim/swim_internal.h +++ b/src/cart/swim/swim_internal.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2016 UChicago Argonne, LLC - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -49,7 +49,7 @@ extern "C" { /** SWIM protocol parameter defaults */ #define SWIM_PROTOCOL_PERIOD_LEN 1000 /* milliseconds */ -#define SWIM_SUSPECT_TIMEOUT (8 * SWIM_PROTOCOL_PERIOD_LEN) +#define SWIM_SUSPECT_TIMEOUT (20 * SWIM_PROTOCOL_PERIOD_LEN) #define SWIM_PING_TIMEOUT 900 /* milliseconds */ #define SWIM_SUBGROUP_SIZE 2 #define SWIM_PIGGYBACK_ENTRIES 8 /**< count of piggybacked entries */ diff --git a/src/tests/ftest/cart/rpc/swim_notification.yaml b/src/tests/ftest/cart/rpc/swim_notification.yaml index 796915d49c3..c7ffd153921 100644 --- a/src/tests/ftest/cart/rpc/swim_notification.yaml +++ b/src/tests/ftest/cart/rpc/swim_notification.yaml @@ -36,7 +36,7 @@ tests: !mux test_clients_arg: - "--name client_group --attach_to tg_srv_grp_swim_test --init_only --holdtime 20" - "--name client_group --attach_to tg_srv_grp_swim_test --rank 0,1 --verify_swim_status 'rank2=alive' --skip_shutdown --skip_check_in" - - "--name client_group --attach_to tg_srv_grp_swim_test --rank 2 --shut_only --holdtime 20" + - "--name client_group --attach_to tg_srv_grp_swim_test --rank 2 --shut_only --holdtime 30" - "--name client_group --attach_to tg_srv_grp_swim_test --rank 0,1 --verify_swim_status 'rank2=dead' --skip_shutdown --skip_check_in" - "--name client_group --attach_to tg_srv_grp_swim_test --rank 0,1 --verify_swim_status 'rank1=alive' --skip_shutdown --skip_check_in" - "--name client_group --attach_to tg_srv_grp_swim_test --rank 0,1 --verify_swim_status 'rank0=alive' --skip_shutdown --skip_check_in" From 3a16953849f416d028b741c95dd52826d318d863 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Thu, 31 Aug 2023 22:35:16 +0800 Subject: [PATCH 18/80] DAOS-14224 cart: call crt_bulk_free before iv_put (#12965) * DAOS-14224 cart: call crt_bulk_free before iv_put crt_bulk_free() should always be called before buffer free, otherwise retried rma may corrupt freed memory. Signed-off-by: Liang Zhen --- src/cart/crt_corpc.c | 5 ++--- src/cart/crt_iv.c | 39 +++++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/cart/crt_corpc.c b/src/cart/crt_corpc.c index 57e0121fb55..25b96e85178 100644 --- a/src/cart/crt_corpc.c +++ b/src/cart/crt_corpc.c @@ -244,13 +244,12 @@ crt_corpc_free_chained_bulk(crt_bulk_t bulk_hdl) D_GOTO(out, rc); } - for (i = 0; i < seg_num; i++) - D_FREE(iovs[i].iov_buf); - rc = crt_bulk_free(bulk_hdl); if (rc != 0) D_ERROR("crt_bulk_free failed: "DF_RC"\n", DP_RC(rc)); + for (i = 0; i < seg_num; i++) + D_FREE(iovs[i].iov_buf); out: D_FREE(iovs); return rc; diff --git a/src/cart/crt_iv.c b/src/cart/crt_iv.c index 7e742c7845d..e998a195159 100644 --- a/src/cart/crt_iv.c +++ b/src/cart/crt_iv.c @@ -2398,17 +2398,16 @@ finalize_transfer_back(struct update_cb_info *cb_info, int rc) child_output->rc = rc; ivns = cb_info->uci_ivns_internal; + crt_reply_send(cb_info->uci_child_rpc); + + /* ADDREF done in crt_hdlr_iv_update */ + crt_bulk_free(cb_info->uci_bulk_hdl); iv_ops = crt_iv_ops_get(ivns, cb_info->uci_class_id); D_ASSERT(iv_ops != NULL); - iv_ops->ivo_on_put(ivns, &cb_info->uci_iv_value, cb_info->uci_user_priv); - crt_reply_send(cb_info->uci_child_rpc); - - /* ADDREF done in crt_hdlr_iv_update */ - crt_bulk_free(cb_info->uci_bulk_hdl); RPC_PUB_DECREF(cb_info->uci_child_rpc); /* addref in transfer_back_to_child() */ @@ -2455,12 +2454,12 @@ int transfer_back_to_child(crt_iv_key_t *key, struct update_cb_info *cb_info, &cb_info->uci_iv_value, update_rc, cb_info->uci_cb_arg); - /* Corresponding on_get() done in crt_iv_update_internal */ - iv_ops->ivo_on_put(ivns, NULL, cb_info->uci_user_priv); - if (cb_info->uci_bulk_hdl != CRT_BULK_NULL) crt_bulk_free(cb_info->uci_bulk_hdl); + /* Corresponding on_get() done in crt_iv_update_internal */ + iv_ops->ivo_on_put(ivns, NULL, cb_info->uci_user_priv); + /* addref done in crt_hdlr_iv_update */ IVNS_DECREF(cb_info->uci_ivns_internal); D_FREE(cb_info); @@ -2518,10 +2517,11 @@ handle_ivupdate_response(const struct crt_cb_info *cb_info) child_output = crt_reply_get(iv_info->uci_child_rpc); /* uci_bulk_hdl will not be set for invalidate call */ - if (iv_info->uci_bulk_hdl != CRT_BULK_NULL) + if (iv_info->uci_bulk_hdl != CRT_BULK_NULL) { + crt_bulk_free(iv_info->uci_bulk_hdl); iv_ops->ivo_on_put(iv_info->uci_ivns_internal, &iv_info->uci_iv_value, iv_info->uci_user_priv); - + } child_output->rc = output->rc; if (cb_info->cci_rc != 0) @@ -2536,7 +2536,7 @@ handle_ivupdate_response(const struct crt_cb_info *cb_info) } else { d_sg_list_t *tmp_iv_value; - if (iv_info->uci_bulk_hdl == NULL) + if (iv_info->uci_bulk_hdl == CRT_BULK_NULL) tmp_iv_value = NULL; else tmp_iv_value = &iv_info->uci_iv_value; @@ -2557,15 +2557,14 @@ handle_ivupdate_response(const struct crt_cb_info *cb_info) iv_info->uci_cb_arg, iv_info->uci_user_priv, rc); + if (iv_info->uci_bulk_hdl != CRT_BULK_NULL) + crt_bulk_free(iv_info->uci_bulk_hdl); if (rc != 0) { iv_ops->ivo_on_put(iv_info->uci_ivns_internal, tmp_iv_value, iv_info->uci_user_priv); } } - if (iv_info->uci_bulk_hdl != CRT_BULK_NULL) - crt_bulk_free(iv_info->uci_bulk_hdl); - /* addref done in crt_hdlr_iv_update */ IVNS_DECREF(iv_info->uci_ivns_internal); D_FREE(iv_info); @@ -2853,6 +2852,8 @@ bulk_update_transfer_done_aux(const struct crt_bulk_cb_info *info) D_ERROR("crt_ivu_rpc_issue(): "DF_RC"\n", DP_RC(rc)); D_GOTO(send_error, rc); } + rc = crt_bulk_free(cb_info->buc_bulk_hdl); + } else if (update_rc == 0) { /* If sync was bi-directional - transfer value back */ if (sync_type->ivs_flags & CRT_IV_SYNC_BIDIRECTIONAL) { @@ -2864,7 +2865,8 @@ bulk_update_transfer_done_aux(const struct crt_bulk_cb_info *info) D_GOTO(exit, rc); } - output->rc = -DER_SUCCESS; + rc = crt_bulk_free(cb_info->buc_bulk_hdl); + output->rc = rc; iv_ops->ivo_on_put(ivns_internal, &cb_info->buc_iv_value, cb_info->buc_user_priv); crt_reply_send(info->bci_bulk_desc->bd_rpc); @@ -2875,17 +2877,14 @@ bulk_update_transfer_done_aux(const struct crt_bulk_cb_info *info) } else { D_GOTO(send_error, rc = update_rc); } - - rc = crt_bulk_free(cb_info->buc_bulk_hdl); exit: return rc; send_error: - iv_ops->ivo_on_put(ivns_internal, &cb_info->buc_iv_value, - cb_info->buc_user_priv); - rc = crt_bulk_free(cb_info->buc_bulk_hdl); output->rc = rc; + iv_ops->ivo_on_put(ivns_internal, &cb_info->buc_iv_value, + cb_info->buc_user_priv); crt_reply_send(info->bci_bulk_desc->bd_rpc); RPC_PUB_DECREF(info->bci_bulk_desc->bd_rpc); From ed5eed5df43a68571afe123132a743824c02637a Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Thu, 31 Aug 2023 17:38:00 +0100 Subject: [PATCH 19/80] DAOS-14214 control: Fix potential missed call to drpc failure handlers (#12944) Refactor harness CallDrpc method to simplify and try to improve clarity of intent. Remove potential for failure handlers not to get called on a dRPC comms related error. Also provide minor fixes related to review comments on previously landed PRs 12858 and 12871. Signed-off-by: Tom Nabarro --- src/bio/smd.pb-c.c | 60 +++++++------ src/bio/smd.pb-c.h | 53 ++++++----- src/control/common/proto/ctl/smd.pb.go | 2 +- src/control/server/ctl_smd_rpc.go | 4 +- src/control/server/harness.go | 117 ++++++++++++++----------- src/control/server/harness_test.go | 100 ++++++++++++++++----- src/mgmt/smd.pb-c.c | 60 +++++++------ src/mgmt/smd.pb-c.h | 53 ++++++----- src/proto/ctl/smd.proto | 2 +- 9 files changed, 266 insertions(+), 185 deletions(-) diff --git a/src/bio/smd.pb-c.c b/src/bio/smd.pb-c.c index ece0953e678..4a3da682883 100644 --- a/src/bio/smd.pb-c.c +++ b/src/bio/smd.pb-c.c @@ -2570,33 +2570,39 @@ const ProtobufCMessageDescriptor ctl__smd_manage_resp__descriptor = (ProtobufCMessageInit) ctl__smd_manage_resp__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCEnumValue ctl__nvme_dev_state__enum_values_by_number[5] = { - {"UNKNOWN", "CTL__NVME_DEV_STATE__UNKNOWN", 0}, - {"NORMAL", "CTL__NVME_DEV_STATE__NORMAL", 1}, - {"NEW", "CTL__NVME_DEV_STATE__NEW", 2}, - {"EVICTED", "CTL__NVME_DEV_STATE__EVICTED", 3}, - {"UNPLUGGED", "CTL__NVME_DEV_STATE__UNPLUGGED", 4}, -}; -static const ProtobufCIntRange ctl__nvme_dev_state__value_ranges[] = {{0, 0}, {0, 5}}; -static const ProtobufCEnumValueIndex ctl__nvme_dev_state__enum_values_by_name[5] = { - {"EVICTED", 3}, {"NEW", 2}, {"NORMAL", 1}, {"UNKNOWN", 0}, {"UNPLUGGED", 4}, -}; -const ProtobufCEnumDescriptor ctl__nvme_dev_state__descriptor = { - PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, - "ctl.NvmeDevState", - "NvmeDevState", - "Ctl__NvmeDevState", - "ctl", - 5, - ctl__nvme_dev_state__enum_values_by_number, - 5, - ctl__nvme_dev_state__enum_values_by_name, - 1, - ctl__nvme_dev_state__value_ranges, - NULL, - NULL, - NULL, - NULL /* reserved[1234] */ +static const ProtobufCEnumValue ctl__nvme_dev_state__enum_values_by_number[5] = +{ + { "UNKNOWN", "CTL__NVME_DEV_STATE__UNKNOWN", 0 }, + { "NORMAL", "CTL__NVME_DEV_STATE__NORMAL", 1 }, + { "NEW", "CTL__NVME_DEV_STATE__NEW", 2 }, + { "EVICTED", "CTL__NVME_DEV_STATE__EVICTED", 3 }, + { "UNPLUGGED", "CTL__NVME_DEV_STATE__UNPLUGGED", 4 }, +}; +static const ProtobufCIntRange ctl__nvme_dev_state__value_ranges[] = { +{0, 0},{0, 5} +}; +static const ProtobufCEnumValueIndex ctl__nvme_dev_state__enum_values_by_name[5] = +{ + { "EVICTED", 3 }, + { "NEW", 2 }, + { "NORMAL", 1 }, + { "UNKNOWN", 0 }, + { "UNPLUGGED", 4 }, +}; +const ProtobufCEnumDescriptor ctl__nvme_dev_state__descriptor = +{ + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "ctl.NvmeDevState", + "NvmeDevState", + "Ctl__NvmeDevState", + "ctl", + 5, + ctl__nvme_dev_state__enum_values_by_number, + 5, + ctl__nvme_dev_state__enum_values_by_name, + 1, + ctl__nvme_dev_state__value_ranges, + NULL,NULL,NULL,NULL /* reserved[1234] */ }; static const ProtobufCEnumValue ctl__led_state__enum_values_by_number[5] = { diff --git a/src/bio/smd.pb-c.h b/src/bio/smd.pb-c.h index 35ce9f2b412..45d159907ca 100644 --- a/src/bio/smd.pb-c.h +++ b/src/bio/smd.pb-c.h @@ -41,27 +41,27 @@ typedef struct _Ctl__SmdManageResp__RankResp Ctl__SmdManageResp__RankResp; /* --- enums --- */ typedef enum _Ctl__NvmeDevState { - /* - * Device state is unknown, zer6o value - */ - CTL__NVME_DEV_STATE__UNKNOWN = 0, - /* - * Device is in a normal operational state - */ - CTL__NVME_DEV_STATE__NORMAL = 1, - /* - * Device is new and is not yet in-use - */ - CTL__NVME_DEV_STATE__NEW = 2, - /* - * Device is faulty and has been evicted - */ - CTL__NVME_DEV_STATE__EVICTED = 3, - /* - * Device has been physically removed - */ - CTL__NVME_DEV_STATE__UNPLUGGED = - 4 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CTL__NVME_DEV_STATE) + /* + * Device state is unknown, zero value + */ + CTL__NVME_DEV_STATE__UNKNOWN = 0, + /* + * Device is in a normal operational state + */ + CTL__NVME_DEV_STATE__NORMAL = 1, + /* + * Device is new and is not yet in-use + */ + CTL__NVME_DEV_STATE__NEW = 2, + /* + * Device is faulty and has been evicted + */ + CTL__NVME_DEV_STATE__EVICTED = 3, + /* + * Device has been physically removed + */ + CTL__NVME_DEV_STATE__UNPLUGGED = 4 + PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CTL__NVME_DEV_STATE) } Ctl__NvmeDevState; typedef enum _Ctl__LedState { /* @@ -290,13 +290,10 @@ struct _Ctl__SmdDevice */ uint64_t usable_bytes; }; -#define CTL__SMD_DEVICE__INIT \ - { \ - PROTOBUF_C_MESSAGE_INIT(&ctl__smd_device__descriptor) \ - , (char *)protobuf_c_empty_string, 0, NULL, (char *)protobuf_c_empty_string, \ - CTL__NVME_DEV_STATE__UNKNOWN, CTL__LED_STATE__OFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0 \ - } +#define CTL__SMD_DEVICE__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&ctl__smd_device__descriptor) \ + , (char *)protobuf_c_empty_string, 0,NULL, (char *)protobuf_c_empty_string, CTL__NVME_DEV_STATE__UNKNOWN, CTL__LED_STATE__OFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + struct _Ctl__SmdDevReq { diff --git a/src/control/common/proto/ctl/smd.pb.go b/src/control/common/proto/ctl/smd.pb.go index 3fc15aaa976..87d30921fca 100644 --- a/src/control/common/proto/ctl/smd.pb.go +++ b/src/control/common/proto/ctl/smd.pb.go @@ -29,7 +29,7 @@ const ( type NvmeDevState int32 const ( - NvmeDevState_UNKNOWN NvmeDevState = 0 // Device state is unknown, zer6o value + NvmeDevState_UNKNOWN NvmeDevState = 0 // Device state is unknown, zero value NvmeDevState_NORMAL NvmeDevState = 1 // Device is in a normal operational state NvmeDevState_NEW NvmeDevState = 2 // Device is new and is not yet in-use NvmeDevState_EVICTED NvmeDevState = 3 // Device is faulty and has been evicted diff --git a/src/control/server/ctl_smd_rpc.go b/src/control/server/ctl_smd_rpc.go index 7fbd367f044..6f8ba75ab97 100644 --- a/src/control/server/ctl_smd_rpc.go +++ b/src/control/server/ctl_smd_rpc.go @@ -398,12 +398,12 @@ func addManageRespIDOnFail(log logging.Logger, res *ctlpb.SmdManageResp_Result, // Retry dev-replace requests as state propagation may take some time after set-faulty call has // been made to manually trigger a faulty device state. func replaceDevRetryBusy(ctx context.Context, log logging.Logger, e Engine, req proto.Message) (res *ctlpb.SmdManageResp_Result, err error) { - for try := uint(0); try < uint(maxDevReplaceRetries); try++ { + for try := 0; try < maxDevReplaceRetries; try++ { res, err = sendManageReq(ctx, e, drpc.MethodReplaceStorage, req) if err != nil { return } - if daos.Status(res.Status) != daos.Busy { + if daos.Status(res.Status) != daos.Busy || try == maxDevReplaceRetries-1 { break } diff --git a/src/control/server/harness.go b/src/control/server/harness.go index d6a4460591a..88028bc658a 100644 --- a/src/control/server/harness.go +++ b/src/control/server/harness.go @@ -8,6 +8,7 @@ package server import ( "context" + "fmt" "os" "sync" @@ -76,7 +77,7 @@ type EngineHarness struct { instances []Engine started atm.Bool faultDomain *system.FaultDomain - onDrpcFailure []func(context.Context, error) + onDrpcFailure []onDrpcFailureFn } // NewEngineHarness returns an initialized *EngineHarness. @@ -146,8 +147,10 @@ func (h *EngineHarness) AddInstance(ei Engine) error { return nil } +type onDrpcFailureFn func(ctx context.Context, err error) + // OnDrpcFailure registers callbacks to be invoked on dRPC call failure. -func (h *EngineHarness) OnDrpcFailure(fns ...func(ctx context.Context, err error)) { +func (h *EngineHarness) OnDrpcFailure(fns ...onDrpcFailureFn) { h.Lock() defer h.Unlock() @@ -156,44 +159,62 @@ func (h *EngineHarness) OnDrpcFailure(fns ...func(ctx context.Context, err error // CallDrpc calls the supplied dRPC method on a managed I/O Engine instance. func (h *EngineHarness) CallDrpc(ctx context.Context, method drpc.Method, body proto.Message) (resp *drpc.Response, err error) { - defer func() { - if err == nil { - return - } - // If the context was canceled, don't trigger callbacks. - if errors.Cause(err) == context.Canceled { - return - } - // Don't trigger callbacks for these errors which can happen when - // things are still starting up. - if err == FaultHarnessNotStarted || err == errEngineNotReady { - return - } - - h.log.Debugf("invoking dRPC failure handlers for %s", err) - h.RLock() - defer h.RUnlock() - for _, fn := range h.onDrpcFailure { - fn(ctx, err) - } - }() - if !h.isStarted() { return nil, FaultHarnessNotStarted } - // Iterate through the managed instances, looking for - // the first one that is available to service the request. - // If the request fails, that error will be returned. - for _, i := range h.Instances() { + instances := h.Instances() + if len(instances) == 0 { + return nil, errors.New("no engine instances to service drpc call") + } + + // Iterate through the managed instances, looking for the first one that is available to + // service the request. If non-transient error is returned from CallDrpc, that error will + // be returned immediately. If a transient error is returned, continue to the next engine. + drpcErrs := make([]error, 0, len(instances)) + for _, i := range instances { resp, err = i.CallDrpc(ctx, method, body) + if err == nil { + break + } + + drpcErrs = append(drpcErrs, errors.Cause(err)) + msg := fmt.Sprintf("failure on engine instance %d: %s", i.Index(), err) switch errors.Cause(err) { case errEngineNotReady, errDRPCNotReady, FaultDataPlaneNotStarted: + h.log.Debug("drpc call transient " + msg) continue - default: - return } + + h.log.Debug("drpc call hard " + msg) + break + } + + if err == nil { + return // Request sent. + } + + var e error + hasDRPCErr := false + for _, e = range drpcErrs { + switch e { + case errDRPCNotReady, FaultDataPlaneNotStarted: + // If no engines can service request and drpc specific error has + // been returned then pass that error to the failure handlers. + hasDRPCErr = true + break + } + } + if !hasDRPCErr { + return // Don't trigger handlers on failures not related to dRPC comms. + } + + h.log.Debugf("invoking dRPC failure handlers for %s", e) + h.RLock() + defer h.RUnlock() + for _, fn := range h.onDrpcFailure { + fn(ctx, e) } return @@ -205,6 +226,20 @@ type dbLeader interface { ResignLeadership(error) error } +func newOnDrpcFailureFn(log logging.Logger, db dbLeader) onDrpcFailureFn { + return func(_ context.Context, errIn error) { + if !db.IsLeader() { + return + } + + // If we cannot service a dRPC request on this node, we should resign as leader in + // order to force a new leader election. + if err := db.ResignLeadership(errIn); err != nil { + log.Errorf("failed to resign leadership after dRPC failure: %s", err) + } + } +} + // Start starts harness by setting up and starting dRPC before initiating // configured instances' processing loops. // @@ -228,27 +263,7 @@ func (h *EngineHarness) Start(ctx context.Context, db dbLeader, cfg *config.Serv ei.Run(ctx, cfg.RecreateSuperblocks) } - h.OnDrpcFailure(func(_ context.Context, errIn error) { - if !db.IsLeader() { - return - } - - switch errors.Cause(errIn) { - case errDRPCNotReady, FaultDataPlaneNotStarted: - break - default: - // Don't shutdown on other failures which are - // not related to dRPC communications. - return - } - - // If we cannot service a dRPC request on this node, - // we should resign as leader in order to force a new - // leader election. - if err := db.ResignLeadership(errIn); err != nil { - h.log.Errorf("failed to resign leadership after dRPC failure: %s", err) - } - }) + h.OnDrpcFailure(newOnDrpcFailureFn(h.log, db)) <-ctx.Done() h.log.Debug("shutting down harness") diff --git a/src/control/server/harness_test.go b/src/control/server/harness_test.go index 2c040fa3eb0..116b5edda0b 100644 --- a/src/control/server/harness_test.go +++ b/src/control/server/harness_test.go @@ -522,20 +522,6 @@ func TestServer_Harness_CallDrpc(t *testing.T) { }, }, }, - "one not ready, one fails": { - mics: []*MockInstanceConfig{ - { - Ready: atm.NewBool(true), - CallDrpcErr: errDRPCNotReady, - }, - { - Ready: atm.NewBool(true), - CallDrpcErr: errors.New("whoops"), - }, - }, - expErr: errors.New("whoops"), - expFailHandler: true, - }, "instance not ready": { mics: []*MockInstanceConfig{ { @@ -560,8 +546,7 @@ func TestServer_Harness_CallDrpc(t *testing.T) { Ready: atm.NewBool(true), }, }, - expErr: errors.New("whoops"), - expFailHandler: true, + expErr: errors.New("whoops"), }, "none available": { mics: []*MockInstanceConfig{ @@ -578,6 +563,79 @@ func TestServer_Harness_CallDrpc(t *testing.T) { expErr: FaultDataPlaneNotStarted, expFailHandler: true, }, + "none available; not leader": { + notLeader: true, + mics: []*MockInstanceConfig{ + { + Ready: atm.NewBool(true), + CallDrpcErr: errDRPCNotReady, + }, + { + Ready: atm.NewBool(true), + CallDrpcErr: FaultDataPlaneNotStarted, + }, + }, + expNotLeader: true, + expErr: FaultDataPlaneNotStarted, + expFailHandler: true, + }, + "none available; no drpc related errors": { + mics: []*MockInstanceConfig{ + { + Started: atm.NewBool(true), + CallDrpcErr: errEngineNotReady, + }, + { + Ready: atm.NewBool(true), + CallDrpcErr: errors.New("whoops"), + }, + }, + expErr: errors.New("whoops"), + }, + "none available; one engine not ready, one drpc not ready": { + mics: []*MockInstanceConfig{ + { + Started: atm.NewBool(true), + CallDrpcErr: errEngineNotReady, + }, + { + Ready: atm.NewBool(true), + CallDrpcErr: errDRPCNotReady, + }, + }, + expNotLeader: true, + expErr: errDRPCNotReady, + expFailHandler: true, + }, + "none available; one drpc not ready, one engine not ready": { + mics: []*MockInstanceConfig{ + { + Ready: atm.NewBool(true), + CallDrpcErr: errDRPCNotReady, + }, + { + Started: atm.NewBool(true), + CallDrpcErr: errEngineNotReady, + }, + }, + expNotLeader: true, + expErr: errEngineNotReady, + expFailHandler: true, + }, + "none available; one data-plane not ready, one engine not ready": { + mics: []*MockInstanceConfig{ + { + CallDrpcErr: FaultDataPlaneNotStarted, + }, + { + Started: atm.NewBool(true), + CallDrpcErr: errEngineNotReady, + }, + }, + expNotLeader: true, + expErr: errEngineNotReady, + expFailHandler: true, + }, "context canceled": { mics: []*MockInstanceConfig{ { @@ -599,15 +657,17 @@ func TestServer_Harness_CallDrpc(t *testing.T) { } } + db := &mockdb{ + isLeader: !tc.notLeader, + } + var drpcFailureInvoked atm.Bool - h.OnDrpcFailure(func(_ context.Context, err error) { + h.OnDrpcFailure(func(ctx context.Context, err error) { drpcFailureInvoked.SetTrue() + newOnDrpcFailureFn(log, db)(ctx, err) }) ctx, cancel := context.WithCancel(test.Context(t)) - db := &mockdb{ - isLeader: !tc.notLeader, - } startErr := make(chan error) go func() { diff --git a/src/mgmt/smd.pb-c.c b/src/mgmt/smd.pb-c.c index ece0953e678..4a3da682883 100644 --- a/src/mgmt/smd.pb-c.c +++ b/src/mgmt/smd.pb-c.c @@ -2570,33 +2570,39 @@ const ProtobufCMessageDescriptor ctl__smd_manage_resp__descriptor = (ProtobufCMessageInit) ctl__smd_manage_resp__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCEnumValue ctl__nvme_dev_state__enum_values_by_number[5] = { - {"UNKNOWN", "CTL__NVME_DEV_STATE__UNKNOWN", 0}, - {"NORMAL", "CTL__NVME_DEV_STATE__NORMAL", 1}, - {"NEW", "CTL__NVME_DEV_STATE__NEW", 2}, - {"EVICTED", "CTL__NVME_DEV_STATE__EVICTED", 3}, - {"UNPLUGGED", "CTL__NVME_DEV_STATE__UNPLUGGED", 4}, -}; -static const ProtobufCIntRange ctl__nvme_dev_state__value_ranges[] = {{0, 0}, {0, 5}}; -static const ProtobufCEnumValueIndex ctl__nvme_dev_state__enum_values_by_name[5] = { - {"EVICTED", 3}, {"NEW", 2}, {"NORMAL", 1}, {"UNKNOWN", 0}, {"UNPLUGGED", 4}, -}; -const ProtobufCEnumDescriptor ctl__nvme_dev_state__descriptor = { - PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, - "ctl.NvmeDevState", - "NvmeDevState", - "Ctl__NvmeDevState", - "ctl", - 5, - ctl__nvme_dev_state__enum_values_by_number, - 5, - ctl__nvme_dev_state__enum_values_by_name, - 1, - ctl__nvme_dev_state__value_ranges, - NULL, - NULL, - NULL, - NULL /* reserved[1234] */ +static const ProtobufCEnumValue ctl__nvme_dev_state__enum_values_by_number[5] = +{ + { "UNKNOWN", "CTL__NVME_DEV_STATE__UNKNOWN", 0 }, + { "NORMAL", "CTL__NVME_DEV_STATE__NORMAL", 1 }, + { "NEW", "CTL__NVME_DEV_STATE__NEW", 2 }, + { "EVICTED", "CTL__NVME_DEV_STATE__EVICTED", 3 }, + { "UNPLUGGED", "CTL__NVME_DEV_STATE__UNPLUGGED", 4 }, +}; +static const ProtobufCIntRange ctl__nvme_dev_state__value_ranges[] = { +{0, 0},{0, 5} +}; +static const ProtobufCEnumValueIndex ctl__nvme_dev_state__enum_values_by_name[5] = +{ + { "EVICTED", 3 }, + { "NEW", 2 }, + { "NORMAL", 1 }, + { "UNKNOWN", 0 }, + { "UNPLUGGED", 4 }, +}; +const ProtobufCEnumDescriptor ctl__nvme_dev_state__descriptor = +{ + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "ctl.NvmeDevState", + "NvmeDevState", + "Ctl__NvmeDevState", + "ctl", + 5, + ctl__nvme_dev_state__enum_values_by_number, + 5, + ctl__nvme_dev_state__enum_values_by_name, + 1, + ctl__nvme_dev_state__value_ranges, + NULL,NULL,NULL,NULL /* reserved[1234] */ }; static const ProtobufCEnumValue ctl__led_state__enum_values_by_number[5] = { diff --git a/src/mgmt/smd.pb-c.h b/src/mgmt/smd.pb-c.h index 35ce9f2b412..45d159907ca 100644 --- a/src/mgmt/smd.pb-c.h +++ b/src/mgmt/smd.pb-c.h @@ -41,27 +41,27 @@ typedef struct _Ctl__SmdManageResp__RankResp Ctl__SmdManageResp__RankResp; /* --- enums --- */ typedef enum _Ctl__NvmeDevState { - /* - * Device state is unknown, zer6o value - */ - CTL__NVME_DEV_STATE__UNKNOWN = 0, - /* - * Device is in a normal operational state - */ - CTL__NVME_DEV_STATE__NORMAL = 1, - /* - * Device is new and is not yet in-use - */ - CTL__NVME_DEV_STATE__NEW = 2, - /* - * Device is faulty and has been evicted - */ - CTL__NVME_DEV_STATE__EVICTED = 3, - /* - * Device has been physically removed - */ - CTL__NVME_DEV_STATE__UNPLUGGED = - 4 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CTL__NVME_DEV_STATE) + /* + * Device state is unknown, zero value + */ + CTL__NVME_DEV_STATE__UNKNOWN = 0, + /* + * Device is in a normal operational state + */ + CTL__NVME_DEV_STATE__NORMAL = 1, + /* + * Device is new and is not yet in-use + */ + CTL__NVME_DEV_STATE__NEW = 2, + /* + * Device is faulty and has been evicted + */ + CTL__NVME_DEV_STATE__EVICTED = 3, + /* + * Device has been physically removed + */ + CTL__NVME_DEV_STATE__UNPLUGGED = 4 + PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CTL__NVME_DEV_STATE) } Ctl__NvmeDevState; typedef enum _Ctl__LedState { /* @@ -290,13 +290,10 @@ struct _Ctl__SmdDevice */ uint64_t usable_bytes; }; -#define CTL__SMD_DEVICE__INIT \ - { \ - PROTOBUF_C_MESSAGE_INIT(&ctl__smd_device__descriptor) \ - , (char *)protobuf_c_empty_string, 0, NULL, (char *)protobuf_c_empty_string, \ - CTL__NVME_DEV_STATE__UNKNOWN, CTL__LED_STATE__OFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0 \ - } +#define CTL__SMD_DEVICE__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&ctl__smd_device__descriptor) \ + , (char *)protobuf_c_empty_string, 0,NULL, (char *)protobuf_c_empty_string, CTL__NVME_DEV_STATE__UNKNOWN, CTL__LED_STATE__OFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + struct _Ctl__SmdDevReq { diff --git a/src/proto/ctl/smd.proto b/src/proto/ctl/smd.proto index 28d60a81616..e8616fc166f 100644 --- a/src/proto/ctl/smd.proto +++ b/src/proto/ctl/smd.proto @@ -76,7 +76,7 @@ message BioHealthResp { } enum NvmeDevState { - UNKNOWN = 0; // Device state is unknown, zer6o value + UNKNOWN = 0; // Device state is unknown, zero value NORMAL = 1; // Device is in a normal operational state NEW = 2; // Device is new and is not yet in-use EVICTED = 3; // Device is faulty and has been evicted From 701b1e18b62c1111244827f0a8d81fbac95fda1b Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Mon, 4 Sep 2023 15:08:15 +0800 Subject: [PATCH 20/80] DAOS-14246 vos: do not reset dae_preparing until dtx_rec_release done (#12974) Test-nvme: auto_md_on_ssd That will avoid triggering assertion for 'prepare' status check during dtx_rec_release(). Required-githooks: true Signed-off-by: Fan Yong --- src/vos/vos_common.c | 1 - src/vos/vos_dtx.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index b2472ae94f3..cf2ae1520ad 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -288,7 +288,6 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, dae = dth->dth_ent; if (dae != NULL) { if (err == 0 && unlikely(dae->dae_preparing && dae->dae_aborting)) { - dae->dae_preparing = 0; rc = vos_dtx_abort_internal(cont, dae, true); D_CDEBUG(rc != 0, DLOG_ERR, DB_IO, "Delay abort DTX "DF_DTI" (1): rc = %d\n", diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 7cfdac7835a..63b102e32f9 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -2217,6 +2217,7 @@ vos_dtx_abort_internal(struct vos_container *cont, struct vos_dtx_act_ent *dae, } rc = dtx_rec_release(cont, dae, true); + dae->dae_preparing = 0; if (rc == 0) { dae->dae_aborting = 1; rc = umem_tx_commit(umm); From 77ecdf41473a3e40cd937ce47151b53a4a4ed903 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Mon, 4 Sep 2023 20:39:22 +0900 Subject: [PATCH 21/80] DAOS-14138 rsvc: Fix a map_distd leak (#12963) The rsvc_step_down_cb function forgets to finalize map_distd when the entry state of the ds_rsvc is UP_EMPTY. This patch fixes such a map_distd leak, adds an assertion that there must be no existing map_distd when init_map_distd creates a new one, and adds a regression test that would trigger the new assertion if the leak were to happen again. Required-githooks: true Signed-off-by: Li Wei --- src/include/daos/common.h | 1 + src/pool/srv_pool.c | 5 +++- src/rsvc/srv.c | 59 ++++++++++++++++++------------------- src/tests/suite/daos_mgmt.c | 47 +++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 32 deletions(-) diff --git a/src/include/daos/common.h b/src/include/daos/common.h index 557655de4eb..c7af0fc6563 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -827,6 +827,7 @@ enum { #define DAOS_CONT_OPEN_FAIL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x68) #define DAOS_POOL_FAIL_MAP_REFRESH (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x69) #define DAOS_CONT_G2L_FAIL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x6a) +#define DAOS_POOL_CREATE_FAIL_STEP_UP (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x6b) /** interoperability failure inject */ #define FLC_SMD_DF_VER (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x70) diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 3c223a6bd42..71e7e2d358b 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -2686,7 +2686,10 @@ ds_pool_create_handler(crt_rpc_t *rpc) */ D_DEBUG(DB_MD, DF_UUID": trying to finish stepping up\n", DP_UUID(in->pri_op.pi_uuid)); - rc = pool_svc_step_up_cb(&svc->ps_rsvc); + if (DAOS_FAIL_CHECK(DAOS_POOL_CREATE_FAIL_STEP_UP)) + rc = -DER_GRPVER; + else + rc = pool_svc_step_up_cb(&svc->ps_rsvc); if (rc != 0) { D_ASSERT(rc != DER_UNINIT); rdb_resign(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term); diff --git a/src/rsvc/srv.c b/src/rsvc/srv.c index 1d0b27aeea2..348ba7d0858 100644 --- a/src/rsvc/srv.c +++ b/src/rsvc/srv.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2022 Intel Corporation. + * (C) Copyright 2019-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -78,6 +78,7 @@ alloc_init(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, D_ASSERT(svc->s_id.iov_buf_len >= svc->s_id.iov_len); uuid_copy(svc->s_db_uuid, db_uuid); svc->s_state = DS_RSVC_DOWN; + svc->s_map_distd = ABT_THREAD_NULL; rc = rsvc_class(class)->sc_name(&svc->s_id, &svc->s_name); if (rc != 0) @@ -404,16 +405,16 @@ init_map_distd(struct ds_rsvc *svc) { int rc; + D_ASSERT(svc->s_map_distd == ABT_THREAD_NULL); svc->s_map_dist = false; svc->s_map_distd_stop = false; ds_rsvc_get(svc); get_leader(svc); - rc = dss_ult_create(map_distd, svc, DSS_XS_SELF, 0, 0, - &svc->s_map_distd); + rc = dss_ult_create(map_distd, svc, DSS_XS_SELF, 0, 0, &svc->s_map_distd); if (rc != 0) { - D_ERROR("%s: failed to start map_distd: "DF_RC"\n", svc->s_name, - DP_RC(rc)); + D_ERROR("%s: failed to start map_distd: "DF_RC"\n", svc->s_name, DP_RC(rc)); + svc->s_map_distd = ABT_THREAD_NULL; put_leader(svc); ds_rsvc_put(svc); } @@ -433,9 +434,8 @@ fini_map_distd(struct ds_rsvc *svc) { int rc; - rc = ABT_thread_join(svc->s_map_distd); - D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc)); - ABT_thread_free(&svc->s_map_distd); + rc = ABT_thread_free(&svc->s_map_distd); + D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); } static int @@ -534,40 +534,37 @@ bootstrap_self(struct ds_rsvc *svc, void *arg) static void rsvc_step_down_cb(struct rdb *db, uint64_t term, void *arg) { - struct ds_rsvc *svc = arg; + struct ds_rsvc *svc = arg; + enum ds_rsvc_state entry_state; D_DEBUG(DB_MD, "%s: stepping down from "DF_U64"\n", svc->s_name, term); ABT_mutex_lock(svc->s_mutex); - D_ASSERTF(svc->s_term == term, DF_U64" == "DF_U64"\n", svc->s_term, - term); - D_ASSERT(svc->s_state == DS_RSVC_UP_EMPTY || - svc->s_state == DS_RSVC_UP); + D_ASSERTF(svc->s_term == term, DF_U64" == "DF_U64"\n", svc->s_term, term); + entry_state = svc->s_state; + D_ASSERT(entry_state == DS_RSVC_UP_EMPTY || entry_state == DS_RSVC_UP); - if (svc->s_state == DS_RSVC_UP) { - /* Stop accepting new leader references. */ - change_state(svc, DS_RSVC_DRAINING); + /* Stop accepting new leader references (ds_rsvc_lookup_leader). */ + change_state(svc, DS_RSVC_DRAINING); - if (rsvc_class(svc->s_class)->sc_map_dist != NULL) - drain_map_distd(svc); + if (rsvc_class(svc->s_class)->sc_map_dist != NULL) + drain_map_distd(svc); + if (entry_state == DS_RSVC_UP) rsvc_class(svc->s_class)->sc_drain(svc); - /* TODO: Abort all in-flight RPCs we sent. */ - - /* Wait for all leader references to be released. */ - for (;;) { - if (svc->s_leader_ref == 0) - break; - D_DEBUG(DB_MD, "%s: waiting for %d leader refs\n", - svc->s_name, svc->s_leader_ref); - ABT_cond_wait(svc->s_leader_ref_cv, svc->s_mutex); - } + /* Wait for all leader references to be released. */ + for (;;) { + if (svc->s_leader_ref == 0) + break; + D_DEBUG(DB_MD, "%s: waiting for %d leader refs\n", svc->s_name, svc->s_leader_ref); + ABT_cond_wait(svc->s_leader_ref_cv, svc->s_mutex); + } + if (entry_state == DS_RSVC_UP) rsvc_class(svc->s_class)->sc_step_down(svc); - if (rsvc_class(svc->s_class)->sc_map_dist != NULL) - fini_map_distd(svc); - } + if (rsvc_class(svc->s_class)->sc_map_dist != NULL) + fini_map_distd(svc); change_state(svc, DS_RSVC_DOWN); ABT_mutex_unlock(svc->s_mutex); diff --git a/src/tests/suite/daos_mgmt.c b/src/tests/suite/daos_mgmt.c index 8659a71beed..9f43610afc6 100644 --- a/src/tests/suite/daos_mgmt.c +++ b/src/tests/suite/daos_mgmt.c @@ -440,6 +440,51 @@ get_sys_info_test(void **state) daos_mgmt_put_sys_info(info); } +/* + * A pool service who steps down from the UP_EMPTY state shall not leak + * map_distd. This is a regression test for DAOS-14138. + */ +static void +pool_create_steps_down_from_up_empty(void **state) +{ + test_arg_t *arg = *state; + uuid_t uuid; + d_rank_list_t svc; + d_rank_t rank; + int rc; + + FAULT_INJECTION_REQUIRED(); + + if (arg->myrank != 0) + return; + + print_message("setting DAOS_POOL_CREATE_FAIL_STEP_UP ... "); + rc = daos_debug_set_params(arg->group, 0, DMG_KEY_FAIL_LOC, + DAOS_POOL_CREATE_FAIL_STEP_UP | DAOS_FAIL_ONCE, 0, NULL); + assert_rc_equal(rc, 0); + print_message("success\n"); + + /* + * Request a single PS replica so that this replica will step up again + * after stepping down. The assertion on s_map_distd in init_map_distd + * would fail if we had leaked map_distd during the step down process. + */ + print_message("creating pool synchronously ... "); + rank = -1; + svc.rl_ranks = &rank; + svc.rl_nr = 1; + rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */, + 256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */, + NULL /* prop */, &svc, uuid); + assert_rc_equal(rc, 0); + print_message("success uuid = "DF_UUIDF"\n", DP_UUID(uuid)); + + print_message("destroying pool synchronously ... "); + rc = dmg_pool_destroy(dmg_config_file, uuid, arg->group, 1); + assert_rc_equal(rc, 0); + print_message("success\n"); +} + static const struct CMUnitTest tests[] = { { "MGMT1: create/destroy pool on all tgts", pool_create_all, async_disable, test_case_teardown}, @@ -453,6 +498,8 @@ static const struct CMUnitTest tests[] = { pool_create_and_destroy_retry, async_disable, test_case_teardown}, { "MGMT6: daos_mgmt_get_sys_info", get_sys_info_test, async_disable, test_case_teardown}, + { "MGMT7: create: PS steps down from UP_EMPTY", + pool_create_steps_down_from_up_empty, async_disable, test_case_teardown}, }; static int From b5ff2943213e9304ff4c61e169bbdbffefec2406 Mon Sep 17 00:00:00 2001 From: dinghwah <48604964+dinghwah@users.noreply.github.com> Date: Tue, 5 Sep 2023 14:15:46 -0400 Subject: [PATCH 22/80] DAOS-14243 test: pool/list_verbose.py:test_fields_basic - missing rebuild_state (#12983) Description: added pool state and rebuild_state to pool list verbose verification. Signed-off-by: Ding Ho ding-hwa.ho@intel.com --- src/tests/ftest/pool/list_verbose.py | 57 +++++++++++++++++++++------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/src/tests/ftest/pool/list_verbose.py b/src/tests/ftest/pool/list_verbose.py index f60e01abd48..b7b8c74568d 100644 --- a/src/tests/ftest/pool/list_verbose.py +++ b/src/tests/ftest/pool/list_verbose.py @@ -23,7 +23,8 @@ class ListVerboseTest(IorTestBase): def create_expected(self, pool, scm_free, nvme_free, scm_imbalance, nvme_imbalance, targets_disabled=0, scm_size=None, - nvme_size=None): + nvme_size=None, state=None, rebuild_state=None): + # pylint: disable=too-many-arguments """Create expected dmg pool list output to compare against the actual. Args: @@ -36,6 +37,8 @@ def create_expected(self, pool, scm_free, nvme_free, scm_imbalance, Defaults to 0. scm_size (int, optional): SCM size to fill in the output. Defaults to None. nvme_size (int, optional): NVMe size to fill in the output. Defaults to None. + state (str, optional): Expected pool state. Defaults to None. + rebuild_state (str, optional): Expected pool rebuild state. Defaults to None. Returns: dict: Expected in the same format of actual. @@ -58,15 +61,15 @@ def create_expected(self, pool, scm_free, nvme_free, scm_imbalance, return { "uuid": pool.uuid.lower(), "label": pool.label.value, + "svc_ldr": 0, "svc_reps": pool.svc_ranks, + "state": state, "targets_total": targets_total, "targets_disabled": targets_disabled, "upgrade_layout_ver": upgrade_layout_ver, "pool_layout_ver": pool_layout_ver, "query_error_msg": "", "query_status_msg": "", - "state": "Ready", - "svc_ldr": 0, "usage": [ { "tier_name": "SCM", @@ -79,8 +82,8 @@ def create_expected(self, pool, scm_free, nvme_free, scm_imbalance, "size": nvme_size, "free": nvme_free, "imbalance": nvme_imbalance - } - ] + }], + "rebuild_state": rebuild_state } @staticmethod @@ -146,7 +149,7 @@ def verify_scm_size(self, actual, created, rank_count): threshold, diff) self.assertTrue(diff < threshold, msg) - def verify_pool_lists(self, targets_disabled, scm_size, nvme_size): + def verify_pool_lists(self, targets_disabled, scm_size, nvme_size, state, rebuild_state): """Call dmg pool list and verify. self.pool should be a list. The elements of the inputs should @@ -156,6 +159,8 @@ def verify_pool_lists(self, targets_disabled, scm_size, nvme_size): targets_disabled (list): List of targets disabled for pools. scm_size (list): List of SCM size for pools. nvme_size (list): List of NVMe size for pools. + state (list): List of pool state for pools. + rebuild_state (list): List of pool rebuild state for pools. Returns: list: a list of dictionaries containing information for each pool from the dmg @@ -193,12 +198,16 @@ def verify_pool_lists(self, targets_disabled, scm_size, nvme_size): nvme_imbalance=pool_free_data["nvme_imbalance"], targets_disabled=targets_disabled[index], scm_size=pool_free_data["scm_size"], - nvme_size=nvme_size[index])) + nvme_size=nvme_size[index], + state=state[index], + rebuild_state=rebuild_state[index])) # Sort pools by UUID. actual_pools.sort(key=lambda item: item.get("uuid")) expected_pools.sort(key=lambda item: item.get("uuid")) + self.log.info("actual_pools: %s", actual_pools) + self.log.info("expected_pools: %s", expected_pools) self.assertListEqual(expected_pools, actual_pools) # For convenience. @@ -247,27 +256,38 @@ def test_fields_basic(self): self.pool = [] # 1. Create first pool with a given SCM and NVMe size. + self.log_step("Create first pool") self.pool.append(self.get_pool(namespace="/run/pool_basic_1/*")) # 2. Verify the fields of pool 1. + self.log_step("Verify the field of first pool") targets_disabled = [0] scm_size = [None] nvme_size = [None] + state = ["Ready"] + rebuild_state = ["idle"] self.verify_pool_lists( - targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size) + targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, + state=state, rebuild_state=rebuild_state) # 3. Create second pool. + self.log_step("Create second pool") self.pool.append(self.get_pool(namespace="/run/pool_basic_2/*")) # 4. Verify the fields for both pools. # Fill in the expected target and size and pass them into verify_pool_lists. + self.log_step("Verify the field of second pool") targets_disabled.append(0) scm_size.append(None) nvme_size.append(None) + state.append("Ready") + rebuild_state.append("idle") self.verify_pool_lists( - targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size) + targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, + state=state, rebuild_state=rebuild_state) # 5. Exclude target 7 in rank 1 of pool 1. + self.log_step("Exclude target 7 in rank 1 of pool 1") self.pool[0].exclude(ranks=[1], tgt_idx="7") # Sizes are reduced by 1/8. @@ -275,14 +295,20 @@ def test_fields_basic(self): reduced_nvme_size = self.pool[0].nvme_size.value * 0.875 # 6. Verify the fields for both pools with expected disabled and size. + self.log_step("Verify the fields for both pools with expected disabled and size") targets_disabled[0] = 1 scm_size[0] = reduced_scm_size nvme_size[0] = reduced_nvme_size + state[0] = "Degraded" + rebuild_state[0] = "busy" + self.verify_pool_lists( - targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size) + targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, + state=state, rebuild_state=rebuild_state) # 7-11. Destroy and verify until the pools are gone. while self.pool: + self.log_step("Destroy and verify until the pools are gone") self.pool[-1].destroy() self.pool.pop() @@ -292,7 +318,8 @@ def test_fields_basic(self): nvme_size.pop() self.verify_pool_lists( - targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size) + targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, + state=state, rebuild_state=rebuild_state) def verify_used_imbalance(self, storage): """Verification steps for test_used_imbalance. @@ -317,8 +344,11 @@ def verify_used_imbalance(self, storage): # 2. Verify the pool created. targets_disabled = [0] scm_size = [None] + state = ["Ready"] + rebuild_state = ["idle"] actual_pools_before = self.verify_pool_lists( - targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size) + targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, + state=state, rebuild_state=rebuild_state) # 3. Store free. free_before, _ = self.get_free_imbalance(actual_pools_before[0], storage) @@ -335,7 +365,8 @@ def verify_used_imbalance(self, storage): # 6. Verify all fields except free and imbalance. Free and imbalance are # obtained from actual. actual_pools_after = self.verify_pool_lists( - targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size) + targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, + state=state, rebuild_state=rebuild_state) # Obtain the new free and imbalance. free_after, imbalance_after = self.get_free_imbalance( From 65e5bfecefd7d8dba003a2752f81a386fcd6c223 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 5 Sep 2023 23:51:17 +0100 Subject: [PATCH 23/80] DAOS-13613 test: Make 2nd pool create fail with ENOSPACE (#12998) Bump system_ram_reserved to 32 to avoid OOM-killer terminating engine process. Signed-off-by: Tom Nabarro --- src/tests/ftest/control/dmg_system_reformat.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tests/ftest/control/dmg_system_reformat.yaml b/src/tests/ftest/control/dmg_system_reformat.yaml index 2f9a09ee3b4..7b8dbd1ad0b 100644 --- a/src/tests/ftest/control/dmg_system_reformat.yaml +++ b/src/tests/ftest/control/dmg_system_reformat.yaml @@ -8,6 +8,7 @@ server_config: engines: 0: storage: auto + system_ram_reserved: 32 pool: control_method: dmg size: 90% From 883854049a07e3b20c7a6319d5c5c94a0eb34384 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Wed, 6 Sep 2023 17:05:32 -0500 Subject: [PATCH 24/80] DAOS-13293 cart: add APIs to query protocol info from mercury (#12439) - Add crt_protocol_info_get()/crt_protocol_info_free() and crt_protocol_info struct to retrieve list of protocols - Add utest_protocol unit test - Add crt_hg_get_protocol_info()/crt_hg_free_protocol_info() - This will allow control plane to collect fabric interfaces and their supported providers. Signed-off-by: Jerome Soumagne --- ci/test_files_to_stash.txt | 1 + src/cart/crt_hg.c | 14 +++ src/cart/crt_hg.h | 2 + src/cart/crt_init.c | 14 +++ src/include/cart/api.h | 23 ++++- src/include/cart/types.h | 11 +++ src/tests/ftest/cart/utest/SConscript | 3 +- src/tests/ftest/cart/utest/utest_protocol.c | 98 +++++++++++++++++++++ utils/rpms/daos.spec | 2 +- utils/utest.yaml | 1 + 10 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 src/tests/ftest/cart/utest/utest_protocol.c diff --git a/ci/test_files_to_stash.txt b/ci/test_files_to_stash.txt index 60a5014bcca..8e82bdf164c 100755 --- a/ci/test_files_to_stash.txt +++ b/ci/test_files_to_stash.txt @@ -1,5 +1,6 @@ build/*/*/src/tests/ftest/cart/utest/test_linkage, build/*/*/src/tests/ftest/cart/utest/utest_hlc, +build/*/*/src/tests/ftest/cart/utest/utest_protocol, build/*/*/src/tests/ftest/cart/utest/utest_swim, build/*/*/src/gurt/tests/test_gurt, build/*/*/src/gurt/tests/test_gurt_telem_producer, diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index db659f39297..8e871e7c3a6 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -764,6 +764,20 @@ crt_hg_log(FILE *stream, const char *fmt, ...) return 0; } +int +crt_hg_get_protocol_info(const char *info_string, struct na_protocol_info **na_protocol_info_p) +{ + hg_return_t ret = HG_Get_na_protocol_info(info_string, na_protocol_info_p); + + return crt_hgret_2_der(ret); +} + +void +crt_hg_free_protocol_info(struct na_protocol_info *na_protocol_info) +{ + HG_Free_na_protocol_info(na_protocol_info); +} + /* to be called only in crt_init */ int crt_hg_init(void) diff --git a/src/cart/crt_hg.h b/src/cart/crt_hg.h index 7c6c49570d7..48ea4c74f41 100644 --- a/src/cart/crt_hg.h +++ b/src/cart/crt_hg.h @@ -126,6 +126,8 @@ struct crt_hg_context { }; /* crt_hg.c */ +int crt_hg_get_protocol_info(const char *info_string, struct na_protocol_info **na_protocol_info_p); +void crt_hg_free_protocol_info(struct na_protocol_info *na_protocol_info); int crt_hg_init(void); int crt_hg_fini(void); int crt_hg_ctx_init(struct crt_hg_context *hg_ctx, int provider, int idx, bool primary); diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 7fa5491a250..a9a912dbd8a 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -557,6 +557,20 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) g_prov_settings_applied[prov] = true; } +int +crt_protocol_info_get(const char *info_string, struct crt_protocol_info **protocol_info_p) +{ + static_assert(sizeof(struct crt_protocol_info) == sizeof(struct na_protocol_info), + "protocol info structs do not match"); + return crt_hg_get_protocol_info(info_string, (struct na_protocol_info **)protocol_info_p); +} + +void +crt_protocol_info_free(struct crt_protocol_info *protocol_info) +{ + crt_hg_free_protocol_info((struct na_protocol_info *)protocol_info); +} + int crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) { diff --git a/src/include/cart/api.h b/src/include/cart/api.h index 121d7207fb9..45ef8c67529 100644 --- a/src/include/cart/api.h +++ b/src/include/cart/api.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -36,6 +36,27 @@ extern "C" { * @{ */ +/** + * Get information on protocols that are supported by underlying mercury plugins. If + * \info_string is NULL, a list of all supported protocols by all plugins will + * be returned. The returned list must be freed using crt_protocol_info_free(). + * + * \param[in] info_string NULL or "" or "" + * \param[out] protocol_info_p linked-list of protocol infos + * + * \return DER_SUCCESS on success, negative value if error +*/ +int +crt_protocol_info_get(const char *info_string, struct crt_protocol_info **protocol_info_p); + +/** + * Free protocol_info from crt_protocol_info_get(). + * + * \param[in,out] protocol_info linked-list of protocol infos +*/ +void +crt_protocol_info_free(struct crt_protocol_info *protocol_info); + /** * Initialize CRT transport layer. Must be called on both the server side and * the client side. This function is reference counted, it can be called diff --git a/src/include/cart/types.h b/src/include/cart/types.h index a4bda75fc24..0ce7dd79815 100644 --- a/src/include/cart/types.h +++ b/src/include/cart/types.h @@ -20,6 +20,17 @@ #include #include + +/** + * Protocol info used to query list of protocols and devices. +*/ +struct crt_protocol_info { + struct crt_protocol_info *next; /**< Pointer to the next info */ + char *class_name; /**< Name of the Mercury class */ + char *protocol_name; /**< Name of this protocol */ + char *device_name; /**< Name of associated device */ +}; + /** * Initialization options passed during crt_init() call. * diff --git a/src/tests/ftest/cart/utest/SConscript b/src/tests/ftest/cart/utest/SConscript index 649c7d9881e..28b48844161 100644 --- a/src/tests/ftest/cart/utest/SConscript +++ b/src/tests/ftest/cart/utest/SConscript @@ -4,7 +4,8 @@ # """Unit tests""" -TEST_SRC = ['test_linkage.cpp', 'utest_hlc.c', 'utest_swim.c', 'utest_portnumber.c'] +TEST_SRC = ['test_linkage.cpp', 'utest_hlc.c', 'utest_swim.c', + 'utest_portnumber.c', 'utest_protocol.c'] LIBPATH = [Dir('../../'), Dir('../../../gurt')] diff --git a/src/tests/ftest/cart/utest/utest_protocol.c b/src/tests/ftest/cart/utest/utest_protocol.c new file mode 100644 index 00000000000..cf3631fb4b7 --- /dev/null +++ b/src/tests/ftest/cart/utest/utest_protocol.c @@ -0,0 +1,98 @@ +/* + * (C) Copyright 2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include +#include +#include +#include "gurt/debug.h" + +#include +#include + +#define NWIDTH 20 + +/* + * Mimics hg_info utility to return list of protocols but uses crt APIs. + */ + +static void +print_info(const char *info_string) +{ + struct crt_protocol_info *protocol_infos = NULL, *protocol_info; + int rc; + + rc = crt_protocol_info_get(info_string, &protocol_infos); + if (rc != DER_SUCCESS) { + DL_ERROR(rc, "crt_protocol_info_get() failed"); + goto out; + } + if (protocol_infos == NULL) { + D_ERROR("No protocol found for \"%s\"\n", info_string); + rc = -DER_NOTSUPPORTED; + goto out; + } + + printf("--------------------------------------------------\n"); + printf("%-*s%*s%*s\n", 10, "Class", NWIDTH, "Protocol", NWIDTH, "Device"); + printf("--------------------------------------------------\n"); + for (protocol_info = protocol_infos; protocol_info != NULL; + protocol_info = protocol_info->next) + printf("%-*s%*s%*s\n", 10, protocol_info->class_name, NWIDTH, + protocol_info->protocol_name, NWIDTH, protocol_info->device_name); + + crt_protocol_info_free(protocol_infos); + +out: + assert_true(rc == DER_SUCCESS); +} + +static void +test_all(void **state) +{ + print_info(NULL); +} + +static void +test_tcp(void **state) +{ + print_info("tcp"); +} + +// disabled until memleak is resolved in mercury +// static void +// test_ofi_tcp(void **state) +// { +// print_info("ofi+tcp"); +// } + +static int +init_tests(void **state) +{ + return d_log_init(); +} + +static int +fini_tests(void **state) +{ + d_log_fini(); + + return 0; +} + +int +main(int argc, char *argv[]) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_all), cmocka_unit_test(test_tcp), + // cmocka_unit_test(test_ofi_tcp), + }; + + d_register_alt_assert(mock_assert); + + return cmocka_run_group_tests_name("utest_protocol", tests, init_tests, fini_tests); +} diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index f3458bf93cb..e77db2c49f8 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -367,7 +367,7 @@ mv %{buildroot}/%{conf_dir}/bash_completion.d %{buildroot}/%{_sysconfdir} sed -i -e '1s/env //' %{buildroot}{%{daoshome}/TESTING/ftest/{cart/cart_logtest,config_file_gen,launch,slurm_setup,util/verify_perms}.py,%{_bindir}/daos_storage_estimator.py,%{_datarootdir}/daos/control/setup_spdk.sh} # shouldn't have source files in a non-devel RPM -rm -f %{buildroot}%{daoshome}/TESTING/ftest/cart/{test_linkage.cpp,utest_{hlc,portnumber,swim}.c,wrap_cmocka.h} +rm -f %{buildroot}%{daoshome}/TESTING/ftest/cart/{test_linkage.cpp,utest_{hlc,portnumber,protocol,swim}.c,wrap_cmocka.h} %pre server getent group daos_metrics >/dev/null || groupadd -r daos_metrics diff --git a/utils/utest.yaml b/utils/utest.yaml index 8bea65c0dd8..fd7580be142 100644 --- a/utils/utest.yaml +++ b/utils/utest.yaml @@ -142,6 +142,7 @@ tests: - cmd: ["src/tests/ftest/cart/utest/test_linkage"] - cmd: ["src/tests/ftest/cart/utest/utest_hlc"] + - cmd: ["src/tests/ftest/cart/utest/utest_protocol"] - cmd: ["src/tests/ftest/cart/utest/utest_swim"] - name: storage_estimator base: "DAOS_BASE" From 61d88038a3f77be92f8b767e2d966a069579ae39 Mon Sep 17 00:00:00 2001 From: wangdi Date: Wed, 6 Sep 2023 20:57:47 -0700 Subject: [PATCH 25/80] DAOS-14208 object: do not cross stripe during parity rebuild (#12994) parity rebuild should not cross the stripe boundary and use the same epoch, otherwise it may cause data corruption for degraded fetch Signed-off-by: Di Wang --- src/object/srv_obj_migrate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index e28a4ce8d4d..51280364c2b 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -951,6 +951,7 @@ __migrate_fetch_update_parity(struct migrate_one *mrone, daos_handle_t oh, char *data; daos_size_t size; unsigned int p = obj_ec_parity_tgt_nr(&mrone->mo_oca); + daos_size_t stride_nr = obj_ec_stripe_rec_nr(&mrone->mo_oca); unsigned char *p_bufs[OBJ_EC_MAX_P] = { 0 }; struct daos_csummer *csummer = NULL; unsigned char *ptr; @@ -996,7 +997,9 @@ __migrate_fetch_update_parity(struct migrate_one *mrone, daos_handle_t oh, for (j = 1; j < iods[i].iod_nr; j++) { daos_recx_t *recx = &iods[i].iod_recxs[j]; - if (offset + size == recx->rx_idx) { + /* Merge the recx if there are in the same stripe */ + if (offset + size == recx->rx_idx && + offset / stride_nr == recx->rx_idx / stride_nr) { size += recx->rx_nr; parity_eph = max(ephs[i][j], parity_eph); continue; From e49defd2f43b5faf33a1659ff229c74784fc631a Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Thu, 7 Sep 2023 00:09:10 -0700 Subject: [PATCH 26/80] DAOS-13499 tools: fix datamover container create message (#12642) PR #11189 moved the container create messages to the GO code, but this isn't correct because the container isn't *always* created. The original logging issue was resolved by #12632, so the messages can be moved back to the C code where there is finer control. Signed-off-by: Dalton Bohning --- src/control/cmd/daos/container.go | 1 - src/control/cmd/daos/filesystem.go | 1 - src/tests/ftest/deployment/basic_checkout.py | 2 +- src/utils/daos_hdlr.c | 1 + 4 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/control/cmd/daos/container.go b/src/control/cmd/daos/container.go index 48a3da8bd09..d83c08600e3 100644 --- a/src/control/cmd/daos/container.go +++ b/src/control/cmd/daos/container.go @@ -1095,7 +1095,6 @@ func (cmd *containerCloneCmd) Execute(_ []string) error { } // Compat with old-style output - cmd.Infof("Successfully created container %s", C.GoString(&ap.dm_args.dst_cont[0])) cmd.Infof("Successfully copied to destination container %s", C.GoString(&ap.dm_args.dst_cont[0])) return nil diff --git a/src/control/cmd/daos/filesystem.go b/src/control/cmd/daos/filesystem.go index 02344d91cb2..a03d38ff0b9 100644 --- a/src/control/cmd/daos/filesystem.go +++ b/src/control/cmd/daos/filesystem.go @@ -102,7 +102,6 @@ func (cmd *fsCopyCmd) Execute(_ []string) error { fsType = "POSIX" } // Compat with old-style output - cmd.Infof("Successfully created container %s", C.GoString(&ap.dm_args.dst_cont[0])) cmd.Infof("Successfully copied to %s: %s", fsType, cmd.Dest) cmd.Infof(" Directories: %d", ap.fs_copy_stats.num_dirs) cmd.Infof(" Files: %d", ap.fs_copy_stats.num_files) diff --git a/src/tests/ftest/deployment/basic_checkout.py b/src/tests/ftest/deployment/basic_checkout.py index f6d5e465bd1..3a700f27223 100644 --- a/src/tests/ftest/deployment/basic_checkout.py +++ b/src/tests/ftest/deployment/basic_checkout.py @@ -110,7 +110,7 @@ def test_basic_checkout_dm(self): :avocado: tags=all,full_regression :avocado: tags=hw,large - :avocado: tags=deployment,datamover,fs_copy,ior,basic_checkout + :avocado: tags=deployment,datamover,daos_fs_copy,ior,basic_checkout :avocado: tags=BasicCheckoutDm,test_basic_checkout_dm """ # load ior params for dm test diff --git a/src/utils/daos_hdlr.c b/src/utils/daos_hdlr.c index eeecad507d2..cf192241ece 100644 --- a/src/utils/daos_hdlr.c +++ b/src/utils/daos_hdlr.c @@ -1570,6 +1570,7 @@ dm_connect(struct cmd_args_s *ap, DH_PERROR_DER(ap, rc, "failed to open container"); D_GOTO(err, rc); } + fprintf(ap->outstream, "Successfully created container %s\n", ca->dst_cont); } if (is_posix_copy) { rc = dfs_sys_mount(ca->dst_poh, ca->dst_coh, O_RDWR, DFS_SYS_NO_LOCK, From 8d884edd04d8a3ad3970e2376b99aa943d9f9785 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 7 Sep 2023 10:21:05 -0400 Subject: [PATCH 27/80] DAOS-13674 test: Bump pool size in container/snapshot.py (#13012) To avoid ENOSPACE failures with large single value writes the pool size has been increased in the container/snapshot.py test. Signed-off-by: Phil Henderson --- src/tests/ftest/container/snapshot.py | 4 ++-- src/tests/ftest/container/snapshot.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/ftest/container/snapshot.py b/src/tests/ftest/container/snapshot.py index 3fc29d209d8..4b7918f2f71 100644 --- a/src/tests/ftest/container/snapshot.py +++ b/src/tests/ftest/container/snapshot.py @@ -141,7 +141,7 @@ def test_snapshot_negativecases(self): :avocado: tags=all,daily_regression :avocado: tags=vm :avocado: tags=container,smoke,snap,snapshot - :avocado: tags=snapshot_negative,snapshotcreate_negative,test_snapshot_negativecases + :avocado: tags=Snapshot,test_snapshot_negativecases """ # DAOS-1322 Create a new container, verify snapshot state as expected @@ -315,7 +315,7 @@ def test_snapshots(self): :avocado: tags=all,full_regression :avocado: tags=vm :avocado: tags=container,smoke,snap,snapshot - :avocado: tags=snapshots,test_snapshots + :avocado: tags=Snapshot,test_snapshots """ test_data = [] diff --git a/src/tests/ftest/container/snapshot.yaml b/src/tests/ftest/container/snapshot.yaml index f82b525660f..95bbe8ca18e 100644 --- a/src/tests/ftest/container/snapshot.yaml +++ b/src/tests/ftest/container/snapshot.yaml @@ -17,7 +17,7 @@ server_config: scm_mount: /mnt/daos system_ram_reserved: 1 pool: - scm_size: 1G + scm_size: 5G control_method: dmg snapshot: dkey: "dkey" From c5aec75483dd2cea55bf05beff5de824fc854d44 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 7 Sep 2023 12:38:13 -0400 Subject: [PATCH 28/80] DAOS-14301 test: Disable fault injection tests from release testing. (#13016) Adding 'faults' tag to scrubber/csum_fault.py. Signed-off-by: Phil Henderson --- src/tests/ftest/scrubber/csum_fault.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ftest/scrubber/csum_fault.py b/src/tests/ftest/scrubber/csum_fault.py index a038e1d2d37..6e1b9a968db 100644 --- a/src/tests/ftest/scrubber/csum_fault.py +++ b/src/tests/ftest/scrubber/csum_fault.py @@ -20,7 +20,7 @@ def test_scrubber_csum_fault(self): whether scrubber finds them. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium - :avocado: tags=scrubber + :avocado: tags=scrubber,faults :avocado: tags=TestWithScrubberFault,test_scrubber_csum_fault """ From d6285840be53edc1dee711b547031c64cadafb15 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 8 Sep 2023 13:45:23 -0400 Subject: [PATCH 29/80] DAOS-14254 test: Improve test harness server file cleanup in startup (#12978) Add raising exceptions for failed server file cleanup steps during server startup and limiting mount cleanup to existing mount points. Signed-off-by: Phil Henderson --- src/tests/ftest/util/server_utils.py | 123 +++++++++++++++++---------- 1 file changed, 76 insertions(+), 47 deletions(-) diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index cf8350f09ba..7b312457995 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -272,60 +272,89 @@ def clean_files(self, verbose=True): Args: verbose (bool, optional): display clean commands. Defaults to True. - """ - clean_commands = [] - for index, engine_params in enumerate(self.manager.job.yaml.engine_params): - scm_mount = engine_params.get_value("scm_mount") - self.log.info("Cleaning up the %s directory.", str(scm_mount)) - - # Remove the superblocks - cmd = "sudo rm -fr {}/*".format(scm_mount) - if cmd not in clean_commands: - clean_commands.append(cmd) - - # Remove the shared memory segment associated with this io server - cmd = "sudo ipcrm -M {}".format(self.D_TM_SHARED_MEMORY_KEY + index) - clean_commands.append(cmd) - - # Dismount the scm mount point - cmd = "while sudo umount {}; do continue; done".format(scm_mount) - if cmd not in clean_commands: - clean_commands.append(cmd) + Raises: + ServerFailed: if there was an error cleaning up the daos server files + """ + scm_mounts = [] + scm_lists = [] + for engine_params in self.manager.job.yaml.engine_params: + scm_mounts.append(engine_params.get_value("scm_mount")) if self.manager.job.using_dcpm: scm_list = engine_params.get_value("scm_list") if isinstance(scm_list, list): - self.log.info("Cleaning up the following device(s): %s.", ", ".join(scm_list)) - # Umount and wipefs the dcpm device - cmd_list = [ - "for dev in {}".format(" ".join(scm_list)), - "do mount=$(lsblk $dev -n -o MOUNTPOINT)", - "if [ ! -z $mount ]", - "then while sudo umount $mount", - "do continue", - "done", - "fi", - "sudo wipefs -a $dev", - "done" - ] - cmd = "; ".join(cmd_list) - if cmd not in clean_commands: - clean_commands.append(cmd) + scm_lists.append(scm_list) + + for index, scm_mount in enumerate(scm_mounts): + # Remove the superblocks and dismount the scm mount point + self.log.info("Cleaning up the %s scm mount.", str(scm_mount)) + self.clean_mount(self._hosts, scm_mount, verbose, index) + + for scm_list in scm_lists: + # Umount and wipefs the dcpm device + self.log.info("Cleaning up the %s dcpm devices", str(scm_list)) + command_list = [ + "for dev in {}".format(" ".join(scm_list)), + "do mount=$(lsblk $dev -n -o MOUNTPOINT)", + "if [ ! -z $mount ]", + "then while sudo umount $mount", + "do continue", + "done", + "fi", + "sudo wipefs -a $dev", + "done" + ] + command = "; ".join(command_list) + result = run_remote(self.log, self._hosts, command, verbose) + if not result.passed: + raise ServerFailed("Failed cleaning {} on {}".format(scm_list, result.failed_hosts)) if self.manager.job.using_control_metadata: # Remove the contents (superblocks) of the control plane metadata path - cmd = "sudo rm -fr {}/*".format(self.manager.job.control_metadata.path.value) - if cmd not in clean_commands: - clean_commands.append(cmd) - - if self.manager.job.control_metadata.device.value is not None: - # Dismount the control plane metadata mount point - cmd = "while sudo umount {}; do continue; done".format( - self.manager.job.control_metadata.device.value) - if cmd not in clean_commands: - clean_commands.append(cmd) - - pcmd(self._hosts, "; ".join(clean_commands), verbose) + self.log.info( + "Cleaning up the control metadata path %s", + self.manager.job.control_metadata.path.value) + self.clean_mount(self._hosts, self.manager.job.control_metadata.path.value, verbose) + + def clean_mount(self, hosts, mount, verbose=True, index=None): + """Clean the mount point by removing the superblocks and dismounting. + + Args: + hosts (NodeSet): the hosts on which to clean the mount point + mount (str): the mount point to clean + verbose (bool, optional): display clean commands. Defaults to True. + index (int, optional): Defaults to None. + + Raises: + ServerFailed: if there is an error cleaning the mount point + """ + self.log.debug("Checking for the existence of the %s mount point", mount) + command = "test -d {}".format(mount) + result = run_remote(self.log, hosts, command, verbose) + if result.passed: + mounted_hosts = result.passed_hosts + + # Remove the superblocks + self.log.debug("Removing the %s superblocks", mount) + command = "sudo rm -fr {}/*".format(mount) + result = run_remote(self.log, mounted_hosts, command, verbose) + if not result.passed: + raise ServerFailed( + "Failed to remove superblocks for {} on {}".format(mount, result.failed_hosts)) + + if index is not None: + # Remove the shared memory segment associated with this io server + self.log.debug("Removing the shared memory segment") + command = "sudo ipcrm -M {}".format(self.D_TM_SHARED_MEMORY_KEY + index) + run_remote(self.log, self._hosts, command, verbose) + + # Dismount the scm mount point + self.log.debug("Dismount the %s mount point", mount) + command = "while sudo umount {}; do continue; done".format(mount) + result = run_remote(self.log, mounted_hosts, command, verbose) + if not result.passed: + raise ServerFailed( + "Failed to dismount {} on {}".format(mount, result.failed_hosts)) def prepare_storage(self, user, using_dcpm=None, using_nvme=None): """Prepare the server storage. From db6ac13c819d8053e5a94541be2d6df0fcd11a2b Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 8 Sep 2023 20:57:48 +0100 Subject: [PATCH 30/80] DAOS-623 build: Make scons quiet quiet. (#12892) Update build so there's less output when quiet is used. Signed-off-by: Ashley Pittman --- site_scons/components/__init__.py | 8 +-- site_scons/env_modules.py | 35 +++++++------ site_scons/prereq_tools/base.py | 8 ++- site_scons/site_tools/compiler_setup.py | 5 +- site_scons/site_tools/daos_builder.py | 12 +++-- src/SConscript | 3 +- src/client/dfs/SConscript | 17 +++--- src/client/serialize/SConscript | 7 ++- src/tests/suite/SConscript | 70 +------------------------ src/tests/suite/daos_test.h | 17 ------ 10 files changed, 60 insertions(+), 122 deletions(-) diff --git a/site_scons/components/__init__.py b/site_scons/components/__init__.py index 1fe43214676..94154c6d2f9 100644 --- a/site_scons/components/__init__.py +++ b/site_scons/components/__init__.py @@ -58,7 +58,7 @@ def check(self, name): self.installed.append(name) return True - if not GetOption('help'): + if not GetOption('help') and not GetOption('silent'): print(f'Using build version of {name}') self.not_installed.append(name) return False @@ -90,12 +90,14 @@ def check(reqs, name, built_str, installed_str=""): def ofi_config(config): """Check ofi version""" - print('Checking for libfabric > 1.11...', end=' ') + if not GetOption('silent'): + print('Checking for libfabric > 1.11...', end=' ') code = """#include _Static_assert(FI_MAJOR_VERSION == 1 && FI_MINOR_VERSION >= 11, "libfabric must be >= 1.11");""" rc = config.TryCompile(code, ".c") - print('yes' if rc else 'no') + if not GetOption('silent'): + print('yes' if rc else 'no') return rc diff --git a/site_scons/env_modules.py b/site_scons/env_modules.py index 9d38df7e2db..df4af0a6498 100644 --- a/site_scons/env_modules.py +++ b/site_scons/env_modules.py @@ -36,7 +36,7 @@ class _env_module(): # pylint: disable=invalid-name "openmpi": ['mpi/mlnx_openmpi-x86_64', 'mpi/openmpi3-x86_64', 'gnu-openmpi', 'mpi/openmpi-x86_64']} - def __init__(self): + def __init__(self, silent=False): """Load Modules for initializing environment variables""" # Leap 15's lmod-lua doesn't include the usual module path # in it's MODULEPATH, for some unknown reason @@ -44,6 +44,7 @@ def __init__(self): os.path.join(os.sep, "usr", "share", "modulefiles"), os.path.join(os.sep, "etc", "modulefiles")] + os.environ.get("MODULEPATH", "").split(":")) + self._silent = silent self._module_load = self._init_mpi_module() def _module_func(self, command, *arguments): # pylint: disable=no-self-use @@ -56,7 +57,8 @@ def _module_func(self, command, *arguments): # pylint: disable=no-self-use # pylint: disable=consider-using-with try: - print(f"Going to run {cmd}") + if not self._silent: + print(' '.join(cmd)) proc = Popen(cmd, stdout=PIPE, stderr=PIPE) except OSError as error: if error.errno == errno.ENOENT: @@ -81,11 +83,11 @@ def _module_func(self, command, *arguments): # pylint: disable=no-self-use # return _mlstatus, stderr.decode() # pylint: disable=undefined-variable def _init_mpi_module(self): - """init mpi module function""" + """Init mpi module function""" return self._mpi_module def _mpi_module(self, mpi): - """attempt to load the requested module""" + """Attempt to load the requested module""" load = [] unload = [] @@ -109,16 +111,17 @@ def _mpi_module(self, mpi): self._module_func('unload', to_unload) for to_load in load: - print(f"Trying to load {to_load}") - if self._module_func('is-avail', to_load)[0] and \ - self._module_func('load', to_load)[0]: - print(f'Loaded {to_load}') + if not self._silent: + print(f"Trying to load {to_load}") + if self._module_func('is-avail', to_load)[0] and self._module_func('load', to_load)[0]: + if not self._silent: + print(f'Loaded {to_load}') return True return False def _mpi_module_old(self, mpi): - """attempt to load the requested module""" + """Attempt to load the requested module""" load = [] for key, value in self._mpi_map.items(): if key == mpi: @@ -162,7 +165,7 @@ def load_mpi(self, mpi): return True def show_avail(self): - """list available modules""" + """List available modules""" try: status, output = self._module_func('avail') if not status: @@ -172,12 +175,12 @@ def show_avail(self): return output def get_map(self, key): - """return the mpi map""" + """Return the mpi map""" return self._mpi_map[key] -def load_mpi(mpi): - """global function to load MPI into os.environ""" +def load_mpi(mpi, silent=False): + """Global function to load MPI into os.environ""" # On Ubuntu, MPI stacks use alternatives and need root to change their # pointer, so just verify that the desired MPI is loaded if distro.id() == "ubuntu": @@ -201,19 +204,19 @@ def load_mpi(mpi): return False if _env_module.env_module_init is None: - _env_module.env_module_init = _env_module() + _env_module.env_module_init = _env_module(silent) return _env_module.env_module_init.load_mpi(mpi) def show_avail(): - """global function to show the available modules""" + """Global function to show the available modules""" if _env_module.env_module_init is None: _env_module.env_module_init = _env_module() return _env_module.env_module_init.show_avail() def get_module_list(key): - """global function to show the modules that map to a key""" + """Global function to show the modules that map to a key""" if _env_module.env_module_init is None: _env_module.env_module_init = _env_module() return _env_module.env_module_init.get_map(key) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 9163bfb0ed7..f016e4295b0 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -1125,6 +1125,11 @@ def _parse_config(self, env, opts): return + def _print(self, msg): + if GetOption('silent'): + return + print(msg) + def has_missing_targets(self, env): """Check for expected build targets (e.g. libraries or headers)""" # pylint: disable=too-many-return-statements @@ -1151,7 +1156,7 @@ def has_missing_targets(self, env): print('help set') return True - print(f"Checking targets for component '{self.name}'") + self._print(f"Checking targets for component '{self.name}'") config = env.Configure() config_cb = self.key_words.get("config_cb", None) @@ -1244,7 +1249,6 @@ def configure(self): def set_environment(self, env, needed_libs): """Modify the specified construction environment to build with the external component""" - if self.skip_arch: return diff --git a/site_scons/site_tools/compiler_setup.py b/site_scons/site_tools/compiler_setup.py index 1f20d00b050..8c91ff2f535 100644 --- a/site_scons/site_tools/compiler_setup.py +++ b/site_scons/site_tools/compiler_setup.py @@ -39,8 +39,9 @@ def _base_setup(env): compiler = env['CC'] build_type = env['BUILD_TYPE'] - print(f'Setting up compile environment for {compiler}') - print(f"Build type is '{build_type}'") + if not GetOption('silent'): + print(f'Setting up compile environment for {compiler}') + print(f"Build type is '{build_type}'") prev_compiler = env.get('BSETUP', False) if prev_compiler: diff --git a/site_scons/site_tools/daos_builder.py b/site_scons/site_tools/daos_builder.py index 36676952ffd..8afd254a182 100644 --- a/site_scons/site_tools/daos_builder.py +++ b/site_scons/site_tools/daos_builder.py @@ -224,6 +224,10 @@ def _configure_mpi(self): if GetOption('help'): return None + def _print(msg): + if not GetOption('silent'): + print(msg) + env = self.Clone() env['CXX'] = None @@ -233,13 +237,13 @@ def _configure_mpi(self): return env for mpi in ['openmpi', 'mpich']: - if not load_mpi(mpi): + if not load_mpi(mpi, GetOption('silent')): continue if _find_mpicc(env): - print(f'{mpi} is installed') + _print(f'{mpi} is installed') return env - print(f'No {mpi} installed and/or loaded') - print("No MPI installed") + _print(f'No {mpi} installed and/or loaded') + _print("No MPI installed") return None diff --git a/src/SConscript b/src/SConscript index e440dff1eea..c4cb419e047 100644 --- a/src/SConscript +++ b/src/SConscript @@ -49,7 +49,8 @@ def read_and_save_version(env): '@Template for @': ''} out = env.Substfile(tmpl_hdr_in, SUBST_DICT=subst_dict) - print(f'generated daos version header file: {out[0].abspath}') + if not GetOption('silent'): + print(f'generated daos version header file: {out[0].abspath}') return version diff --git a/src/client/dfs/SConscript b/src/client/dfs/SConscript index a0c12efc139..38512536397 100644 --- a/src/client/dfs/SConscript +++ b/src/client/dfs/SConscript @@ -5,24 +5,29 @@ def configure_lustre(denv): """Do Lustre configure checks""" if GetOption('help') or GetOption('clean'): return denv + + def _print(msg): + if not GetOption('silent'): + print(msg) + # If Lustre installed build a Lustre-aware libduns conf = Configure(denv) gotversion = False if not conf.CheckLibWithHeader('lustreapi', 'linux/lustre/lustre_user.h', 'c'): - print("No installed Lustre version detected") + _print("No installed Lustre version detected") else: - print("Installed Lustre version detected") + _print("Installed Lustre version detected") if not conf.CheckFunc('llapi_unlink_foreign'): - print("Lustre version is not compatible") + _print("Lustre version is not compatible") else: - print("Lustre version is compatible") + _print("Lustre version is compatible") gotversion = True if gotversion is True: - print("Building with Lustre bindings.") + _print("Building with Lustre bindings.") denv.AppendUnique(CCFLAGS=['-DLUSTRE_INCLUDE']) else: - print("Not building with Lustre bindings.") + _print("Not building with Lustre bindings.") return conf.Finish() diff --git a/src/client/serialize/SConscript b/src/client/serialize/SConscript index a50ffca0ebd..83b077d7867 100644 --- a/src/client/serialize/SConscript +++ b/src/client/serialize/SConscript @@ -3,6 +3,7 @@ def scons(): """Execute build""" + Import('env') denv = env.Clone() @@ -19,11 +20,13 @@ def scons(): src = ['daos_serialize.c'] if have_hdf5 is True: - print("Building with hdf5 bindings.") + if not GetOption('silent'): + print("Building with hdf5 bindings.") daos_serialize = denv.d_library('daos_serialize', src, LIBS=libraries) denv.Install('$PREFIX/lib64/', daos_serialize) else: - print("No installed hdf5 detected, DAOS serialization is not enabled") + if not GetOption('silent'): + print("No installed hdf5 detected, DAOS serialization is not enabled") if __name__ == "SCons.Script": diff --git a/src/tests/suite/SConscript b/src/tests/suite/SConscript index f4872c86e09..efbcba289cb 100644 --- a/src/tests/suite/SConscript +++ b/src/tests/suite/SConscript @@ -1,72 +1,4 @@ """Build test suite""" -import sys -import subprocess # nosec - -TEST_CMOCKA_SKIP = """ -#include -#include -#include -#include - -static void -test(void **state) { skip(); } - -int main(int argc, char **argv) -{ - const struct CMUnitTest tests[] = { - cmocka_unit_test(test), - cmocka_unit_test(test), - }; - return cmocka_run_group_tests(tests, NULL, NULL); -} -""" - - -# pylint: disable-next=invalid-name -def CheckCmockaSkip(context): - """Configure check for cmocka bug""" - context.Message('Checking if cmocka skip() bug is present ... ') - rc = context.TryCompile(TEST_CMOCKA_SKIP, '.c') - if rc == 0: - sys.stdout.write(" (Compile failed) assuming ") - context.Result(not rc) - return rc - rc = context.TryLink(TEST_CMOCKA_SKIP, '.c') - if rc == 0: - sys.stdout.write(" (Link failed) assuming ") - context.Result(not rc) - return rc - prog = context.lastTarget - pname = prog.get_abspath() - rc = subprocess.call(pname, env={"CMOCKA_TEST_ABORT": "1"}, shell=False, - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - # in case of abort rc is -6 instead of 134 (128+6) with shell ... - if rc == -6: - sys.stdout.write(" (Bug reproduced) ") - else: - if rc != 0: - sys.stdout.write(" (Other error than bug) assuming ") - else: - sys.stdout.write(" (Bug not reproduced) ") - context.Result(rc) - # return 0 means error - return not rc - - -# pylint: disable=no-member -def configure_cmocka(nenv): - """configure cmocka environment""" - if GetOption('help') or GetOption('clean'): - return nenv - conf = Configure(nenv, custom_tests={'CheckCmockaSkip': CheckCmockaSkip}) - conf.env.AppendUnique(LIBS=['cmocka']) - if not conf.CheckCmockaSkip(): - # it would be cool to be able to check exit code is effectively 134 - # (for abort() upon skip() bug) but in all error cases we should - # decide to use workaround - conf.env.AppendUnique(CCFLAGS=['-DOVERRIDE_CMOCKA_SKIP']) - print("libcmocka with broken skip(), using workaround (DAOS-1093).") - return conf.Finish() def scons(): @@ -97,7 +29,7 @@ def scons(): c_files + daos_test_tgt, LIBS=['daos_common'] + libraries) - newenv = configure_cmocka(denv.Clone()) + newenv = denv.Clone() c_files = Split("""daos_array.c daos_base_tx.c daos_capa.c daos_checksum.c daos_container.c daos_dedup.c daos_degraded.c diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index e423a61433e..49a40e2f62c 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -22,23 +22,6 @@ #include #include -#ifdef OVERRIDE_CMOCKA_SKIP -/* redefine cmocka's skip() so it will no longer abort() - * if CMOCKA_TEST_ABORT=1 - * - * it can't be redefined as a function as it must return from current context - */ -#undef skip -#define skip() \ - do { \ - const char *abort_test = getenv("CMOCKA_TEST_ABORT"); \ - if (abort_test != NULL && abort_test[0] == '1') \ - print_message("Skipped !!!\n"); \ - else \ - _skip(__FILE__, __LINE__); \ - return; \ - } while (0) -#endif #if FAULT_INJECTION #define FAULT_INJECTION_REQUIRED() do { } while (0) From d42b2ab2f088be6296910b2527de3f80b9de39ed Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Fri, 8 Sep 2023 15:06:04 -0700 Subject: [PATCH 31/80] DAOS-14328 cart: Do not print error when exceeding context limit (#13011) - Do not print an internal layer error when exceeding the context limit, as daos client layer will try to allocate as many contexts as it can in some usage models. Printing an error is not user friendly in such situations. Signed-off-by: Alexander A Oganezov --- src/cart/crt_hg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 8e871e7c3a6..e6c5fe70fb1 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -617,8 +617,9 @@ crt_provider_get_ctx_idx(bool primary, int provider) } } - D_ERROR("ctx_num %d, will exceed CRT_SRV_CONTEXT_NUM (%d) if create more context.\n", - prov_data->cpg_ctx_num, CRT_SRV_CONTEXT_NUM); + D_DEBUG(DB_ALL, "provider:%d allowed context limit = %d exceeded\n", + provider, CRT_SRV_CONTEXT_NUM); + return -1; } From ef93dbef058175e9aa4540e48964b8a53b1931dc Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sun, 10 Sep 2023 10:50:26 -0500 Subject: [PATCH 32/80] DAOS-14260 dfs: add API to readdir with a filter (#12985) This allows user to utilize the pipeline API to execute a server side find. Signed-off-by: Mohamad Chaarawi --- src/client/dfs/dfs.c | 406 +++++++++++++++++++++++++++++++- src/client/dfs/dfs_internal.h | 91 +++++++ src/tests/suite/dfs_unit_test.c | 125 ++++++++++ 3 files changed, 618 insertions(+), 4 deletions(-) diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 9107db9214b..3313c188c64 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -11,19 +11,15 @@ #include #include #include -#include #include #include #include #include -#include #include #include -#include #include "daos.h" #include "daos_fs.h" - #include "dfs_internal.h" /** D-key name of SB metadata */ @@ -7446,3 +7442,405 @@ dfs_obj_fix_type(dfs_t *dfs, dfs_obj_t *parent, const char *name) D_FREE(entry.value); return rc; } + +int +dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size) +{ + daos_handle_t oh; + int rc; + + if (dfs == NULL || !dfs->mounted) + return EINVAL; + if (daos_obj_id2type(oid) != DAOS_OT_ARRAY_BYTE) + return EINVAL; + + rc = daos_array_open_with_attr(dfs->coh, oid, DAOS_TX_NONE, DAOS_OO_RO, 1, + chunk_size ? chunk_size : dfs->attr.da_chunk_size, + &oh, NULL); + if (rc != 0) { + D_ERROR("daos_array_open() failed: "DF_RC"\n", DP_RC(rc)); + return daos_der2errno(rc); + } + + rc = daos_array_get_size(oh, DAOS_TX_NONE, size, NULL); + if (rc) { + daos_array_close(oh, NULL); + D_ERROR("daos_array_get_size() failed: "DF_RC"\n", DP_RC(rc)); + return daos_der2errno(rc); + } + + rc = daos_array_close(oh, NULL); + return daos_der2errno(rc); +} + +struct dfs_pipeline { + daos_pipeline_t pipeline; + dfs_predicate_t pred; + + mode_t constant1; + mode_t constant2; + + d_iov_t dkey_iov; + d_iov_t const1_iov; + d_iov_t const2_iov; + d_iov_t const3_iov; + + daos_filter_part_t dkey_ft; + daos_filter_part_t akey1_ft; + daos_filter_part_t akey2_ft; + daos_filter_part_t const0_ft; + daos_filter_part_t const1_ft; + daos_filter_part_t const2_ft; + daos_filter_part_t const3_ft; + daos_filter_part_t like_ft; + daos_filter_part_t ba_ft; + daos_filter_part_t eq_ft; + daos_filter_part_t gt_ft; + daos_filter_part_t and_ft; + daos_filter_part_t or_ft; + + daos_filter_t pipef; +}; + +#define DKEY_F "DAOS_FILTER_DKEY" +#define AKEY_F "DAOS_FILTER_AKEY" +#define CONST_F "DAOS_FILTER_CONST" +#define BINARY_F "DAOS_FILTER_TYPE_BINARY" +#define INT8_F "DAOS_FILTER_TYPE_UINTEGER8" +#define INT4_F "DAOS_FILTER_TYPE_UINTEGER4" +#define LIKE_F "DAOS_FILTER_FUNC_LIKE" +#define GT_F "DAOS_FILTER_FUNC_GT" +#define EQ_F "DAOS_FILTER_FUNC_EQ" +#define BA_F "DAOS_FILTER_FUNC_BITAND" +#define AND_F "DAOS_FILTER_FUNC_AND" +#define OR_F "DAOS_FILTER_FUNC_OR" +#define COND_F "DAOS_FILTER_CONDITION" + +int +dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **_dpipe) +{ + daos_size_t bin_flen = sizeof(BINARY_F) - 1; + daos_size_t dkey_flen = sizeof(DKEY_F) - 1; + daos_size_t akey_flen = sizeof(AKEY_F) - 1; + daos_size_t const_flen = sizeof(CONST_F) - 1; + daos_size_t int8_flen = sizeof(INT8_F) - 1; + daos_size_t int4_flen = sizeof(INT4_F) - 1; + daos_size_t like_flen = sizeof(LIKE_F) - 1; + daos_size_t gt_flen = sizeof(GT_F) - 1; + daos_size_t eq_flen = sizeof(EQ_F) - 1; + daos_size_t ba_flen = sizeof(BA_F) - 1; + daos_size_t and_flen = sizeof(AND_F) - 1; + daos_size_t or_flen = sizeof(OR_F) - 1; + daos_size_t cond_flen = sizeof(COND_F) - 1; + dfs_pipeline_t *dpipe; + int rc; + + D_ALLOC_PTR(dpipe); + if (dpipe == NULL) + return ENOMEM; + + /** copy the user predicate conditions */ + memcpy(&dpipe->pred, &pred, sizeof(dfs_predicate_t)); + + daos_pipeline_init(&dpipe->pipeline); + + /** build condition for entry name */ + if (flags & DFS_FILTER_NAME) { + daos_size_t name_len; + + name_len = strnlen(dpipe->pred.dp_name, DFS_MAX_NAME); + + d_iov_set(&dpipe->dkey_ft.part_type, DKEY_F, dkey_flen); + d_iov_set(&dpipe->dkey_ft.data_type, BINARY_F, bin_flen); + dpipe->dkey_ft.data_len = DFS_MAX_NAME; + + d_iov_set(&dpipe->const0_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const0_ft.data_type, BINARY_F, bin_flen); + dpipe->const0_ft.num_constants = 1; + dpipe->const0_ft.constant = &dpipe->dkey_iov; + d_iov_set(dpipe->const0_ft.constant, dpipe->pred.dp_name, name_len); + + d_iov_set(&dpipe->like_ft.part_type, LIKE_F, like_flen); + dpipe->like_ft.num_operands = 2; + } + + /** build condition for newer than ctime */ + if (flags & DFS_FILTER_NEWER) { + d_iov_set(&dpipe->akey2_ft.part_type, AKEY_F, akey_flen); + d_iov_set(&dpipe->akey2_ft.data_type, INT8_F, int8_flen); + d_iov_set(&dpipe->akey2_ft.akey, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + dpipe->akey2_ft.data_offset = CTIME_IDX; + dpipe->akey2_ft.data_len = sizeof(time_t); + + d_iov_set(&dpipe->const3_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const3_ft.data_type, INT8_F, int8_flen); + dpipe->const3_ft.num_constants = 1; + dpipe->const3_ft.constant = &dpipe->const3_iov; + d_iov_set(dpipe->const3_ft.constant, &dpipe->pred.dp_newer, sizeof(time_t)); + + d_iov_set(&dpipe->gt_ft.part_type, GT_F, gt_flen); + dpipe->gt_ft.num_operands = 2; + } + + /** If filter on dirs is not requested, return all dirs so they can be traversed */ + if (!(flags & DFS_FILTER_INCLUDE_DIRS)) { + d_iov_set(&dpipe->akey1_ft.part_type, AKEY_F, akey_flen); + d_iov_set(&dpipe->akey1_ft.data_type, INT4_F, int4_flen); + d_iov_set(&dpipe->akey1_ft.akey, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + dpipe->akey1_ft.data_offset = MODE_IDX; + dpipe->akey1_ft.data_len = sizeof(mode_t); + + dpipe->constant1 = S_IFMT; + d_iov_set(&dpipe->const1_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const1_ft.data_type, INT4_F, int4_flen); + dpipe->const1_ft.num_constants = 1; + dpipe->const1_ft.constant = &dpipe->const1_iov; + d_iov_set(dpipe->const1_ft.constant, &dpipe->constant1, sizeof(mode_t)); + + dpipe->constant2 = S_IFDIR; + d_iov_set(&dpipe->const2_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const2_ft.data_type, INT4_F, int4_flen); + dpipe->const2_ft.num_constants = 1; + dpipe->const2_ft.constant = &dpipe->const2_iov; + d_iov_set(dpipe->const2_ft.constant, &dpipe->constant2, sizeof(mode_t)); + + d_iov_set(&dpipe->ba_ft.part_type, BA_F, ba_flen); + dpipe->ba_ft.num_operands = 2; + + d_iov_set(&dpipe->eq_ft.part_type, EQ_F, eq_flen); + dpipe->eq_ft.num_operands = 2; + } + + /** build final condition: IS_DIR || (entry name match && newer match) */ + + d_iov_set(&dpipe->and_ft.part_type, AND_F, and_flen); + dpipe->and_ft.num_operands = 2; + + d_iov_set(&dpipe->or_ft.part_type, OR_F, or_flen); + dpipe->or_ft.num_operands = 2; + + /** initialize and add all the parts to the pipeline */ + daos_filter_init(&dpipe->pipef); + d_iov_set(&dpipe->pipef.filter_type, COND_F, cond_flen); + + if (!(flags & DFS_FILTER_INCLUDE_DIRS)) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->or_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + + rc = daos_filter_add(&dpipe->pipef, &dpipe->eq_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->ba_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->akey1_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const1_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const2_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + if (flags & DFS_FILTER_NEWER && flags & DFS_FILTER_NAME) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->and_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + if (flags & DFS_FILTER_NAME) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->like_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->dkey_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const0_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + if (flags & DFS_FILTER_NEWER) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->gt_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->akey2_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const3_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + rc = daos_pipeline_add(&dpipe->pipeline, &dpipe->pipef); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + + *_dpipe = dpipe; + return 0; +err: + printf("failed to create pipeline. rc = %d\n", rc); + D_FREE(dpipe); + return rc; +} + +int +dfs_pipeline_destroy(dfs_pipeline_t *dpipe) +{ + if (dpipe->pipeline.num_filters) + D_FREE(dpipe->pipeline.filters); + D_FREE(dpipe); + return 0; +} + +int +dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, + uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csize, + uint64_t *nr_scanned) +{ + daos_iod_t iod; + daos_key_desc_t *kds; + d_sg_list_t sgl_keys, sgl_recs; + d_iov_t iov_keys, iov_recs; + char *buf_keys = NULL, *buf_recs = NULL; + daos_recx_t recxs[4]; + uint32_t nr_iods, nr_kds, key_nr, i; + daos_size_t record_len; + int rc = 0; + + if (dfs == NULL || !dfs->mounted) + return EINVAL; + if (obj == NULL || !S_ISDIR(obj->mode)) + return ENOTDIR; + if (*nr == 0) + return 0; + if (dpipe == NULL || dirs == NULL || anchor == NULL) + return EINVAL; + + /* IOD to retrieve the mode_t and the ctime */ + iod.iod_nr = 2; + iod.iod_size = 1; + recxs[0].rx_idx = MODE_IDX; + recxs[0].rx_nr = sizeof(mode_t); + recxs[1].rx_idx = CTIME_IDX; + recxs[1].rx_nr = sizeof(time_t); + iod.iod_recxs = recxs; + iod.iod_type = DAOS_IOD_ARRAY; + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + record_len = recxs[0].rx_nr + recxs[1].rx_nr; + + if (oids) { + recxs[iod.iod_nr].rx_idx = OID_IDX; + recxs[iod.iod_nr].rx_nr = sizeof(daos_obj_id_t); + record_len += recxs[iod.iod_nr].rx_nr; + iod.iod_nr ++; + } + if (csize) { + recxs[iod.iod_nr].rx_idx = CSIZE_IDX; + recxs[iod.iod_nr].rx_nr = sizeof(daos_size_t); + record_len += recxs[iod.iod_nr].rx_nr; + iod.iod_nr ++; + } + + nr_kds = *nr; + nr_iods = 1; + + D_ALLOC_ARRAY(kds, nr_kds); + if (kds == NULL) + return ENOMEM; + + /** alloc buffer to store dkeys enumerated */ + sgl_keys.sg_nr = 1; + sgl_keys.sg_nr_out = 0; + sgl_keys.sg_iovs = &iov_keys; + D_ALLOC_ARRAY(buf_keys, nr_kds * DFS_MAX_NAME); + if (buf_keys == NULL) + D_GOTO(out, rc = ENOMEM); + d_iov_set(&iov_keys, buf_keys, nr_kds * DFS_MAX_NAME); + + + /** alloc buffer to store records enumerated */ + sgl_recs.sg_nr = 1; + sgl_recs.sg_nr_out = 0; + sgl_recs.sg_iovs = &iov_recs; + D_ALLOC_ARRAY(buf_recs, nr_kds * record_len); + if (buf_recs == NULL) + D_GOTO(out, rc = ENOMEM); + d_iov_set(&iov_recs, buf_recs, nr_kds * record_len); + + key_nr = 0; + *nr_scanned = 0; + while (!daos_anchor_is_eof(anchor)) { + daos_pipeline_stats_t stats = {0}; + char *ptr1; + + memset(buf_keys, 0, *nr * DFS_MAX_NAME); + + rc = daos_pipeline_run(dfs->coh, obj->oh, &dpipe->pipeline, DAOS_TX_NONE, 0, NULL, + &nr_iods, &iod, anchor, &nr_kds, kds, &sgl_keys, &sgl_recs, + NULL, NULL, &stats, NULL); + if (rc) + D_GOTO(out, rc = daos_der2errno(rc)); + + D_ASSERT(nr_iods == 1); + ptr1 = buf_keys; + + for (i = 0; i < nr_kds; i++) { + char *ptr2; + mode_t mode; + char *dkey = (char *)ptr1; + + /** set the dentry name */ + memcpy(dirs[key_nr].d_name, dkey, kds[i].kd_key_len); + dirs[key_nr].d_name[kds[i].kd_key_len] = '\0'; + + /** set the dentry type */ + ptr2 = &buf_recs[i * record_len]; + mode = *((mode_t *)ptr2); + + if (S_ISDIR(mode)) { + dirs[key_nr].d_type = DT_DIR; + } else if (S_ISREG(mode)) { + dirs[key_nr].d_type = DT_REG; + } else if (S_ISLNK(mode)) { + dirs[key_nr].d_type = DT_LNK; + } else { + D_ERROR("Invalid DFS entry type found, possible data corruption\n"); + D_GOTO(out, rc = EINVAL); + } + + /** set the OID for dentry if requested */ + if (oids) { + ptr2 += sizeof(mode_t) + sizeof(time_t); + oid_cp(&oids[key_nr], *((daos_obj_id_t *)ptr2)); + } + + /** set the chunk size for dentry if requested */ + if (csize) { + if (oids) + ptr2 += sizeof(daos_obj_id_t); + else + ptr2 += sizeof(mode_t) + sizeof(time_t); + csize[key_nr] = *((daos_size_t *)ptr2); + } + + key_nr++; + ptr1 += kds[i].kd_key_len; + } + + *nr_scanned += stats.nr_dkeys; + nr_kds = *nr - key_nr; + if (nr_kds == 0) + break; + } + *nr = key_nr; + +out: + D_FREE(kds); + D_FREE(buf_recs); + D_FREE(buf_keys); + return rc; +} diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index c337ec1bf42..83ac13aeaab 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -131,6 +131,97 @@ dfs_relink_root(daos_handle_t coh); int dfs_ostatx(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, daos_event_t *ev); +/** Internal pipeline readdir functionality */ + +/** DFS pipeline object */ +typedef struct dfs_pipeline dfs_pipeline_t; + +enum { + DFS_FILTER_NAME = (1 << 1), + DFS_FILTER_NEWER = (1 << 2), + DFS_FILTER_INCLUDE_DIRS = (1 << 3), +}; + +/** Predicate conditions for filter */ +typedef struct { + char dp_name[DFS_MAX_NAME]; /** name condition for entry - regex */ + time_t dp_newer; /** timestamp for newer condition */ + size_t dp_size; /** size of files - not supported for now */ +} dfs_predicate_t; + +/** + * Same as dfs_get_size() but using the OID of the file instead of the open handle. Note that the + * chunk_size of the file is also required to be passed if the file was created with a different + * chunk size than the default (passing other than 0 to dfs_open). Otherwise, 0 should be passed to + * chunk size. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] oid Object ID of the file. + * \param[in] chunk_size Chunk size of the file (pass 0 if it was created with default). + * \param[out] size Returned size of the file. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size); + +/** + * Create a pipeline object to be used during readdir with filter. Should be destroyed with + * dfs_pipeline_destroy(). + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] pred Predicate condition values (name/regex, newer timestamp, etc.). + * \param[in] flags Pipeline flags (conditions to apply). + * \param[out] dpipe Pipeline object created. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **dpipe); + +/** + * Destroy pipeline object. + * + * \param[in] dpipe Pipeline object. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_destroy(dfs_pipeline_t *dpipe); + +/** + * Same as dfs_readdir() but this additionally applies a filter created with dfs_pipeline_create() + * on the entries that are enumerated. This function also optionally returns the object ID of each + * dirent if requested through a pre-allocated OID input array. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] obj Opened directory object. + * \param[in] dpipe DFS pipeline filter. + * \param[in,out] + * anchor Hash anchor for the next call, it should be set to + * zeroes for the first call, it should not be changed + * by caller between calls. + * \param[in,out] + * nr [in]: number of dirents allocated in \a dirs. + * [out]: number of returned dirents. + * \param[in,out] + * dirs [in] preallocated array of dirents. + * [out]: dirents returned with d_name filled only. + * \param[in,out] + * oids [in] Optional preallocated array of object IDs. + * [out]: Object ID associated with each dirent that was read. + * \param[in,out] + * csizes [in] Optional preallocated array of sizes. + * [out]: chunk size associated with each dirent that was read. + * \param[out] Total number of entries scanned by readdir before returning. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, + uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csizes, + uint64_t *nr_scanned); + #if defined(__cplusplus) } #endif diff --git a/src/tests/suite/dfs_unit_test.c b/src/tests/suite/dfs_unit_test.c index c1def757c4c..47845f57b20 100644 --- a/src/tests/suite/dfs_unit_test.c +++ b/src/tests/suite/dfs_unit_test.c @@ -3053,6 +3053,129 @@ dfs_test_fix_chunk_size(void **state) D_FREE(buf); } +#define NUM_ENTRIES 1024 +#define NR_ENUM 64 + +static void +dfs_test_pipeline_find(void **state) +{ + dfs_obj_t *dir1, *f1; + int i; + time_t ts = 0; + mode_t create_mode = S_IWUSR | S_IRUSR; + int create_flags = O_RDWR | O_CREAT | O_EXCL; + char *dirname = "pipeline_dir"; + int rc; + + rc = dfs_open(dfs_mt, NULL, dirname, create_mode | S_IFDIR, create_flags, + OC_SX, 0, NULL, &dir1); + assert_int_equal(rc, 0); + + for (i = 0; i < NUM_ENTRIES; i++) { + char name[24]; + + /* create 1 dir for every 100 files */ + if (i % 100 == 0) { + sprintf(name, "dir.%d", i); + rc = dfs_mkdir(dfs_mt, dir1, name, create_mode | S_IFDIR, 0); + assert_int_equal(rc, 0); + } else { + daos_obj_id_t oid; + + sprintf(name, "file.%d", i); + rc = dfs_open(dfs_mt, dir1, name, create_mode | S_IFREG, create_flags, 0, 0, + NULL, &f1); + assert_int_equal(rc, 0); + + dfs_obj2id(f1, &oid); + /* printf("File %s \t OID: %"PRIu64".%"PRIu64"\n", name, oid.hi, oid.lo); */ + + rc = dfs_release(f1); + assert_int_equal(rc, 0); + } + + if (i == NUM_ENTRIES / 2) { + sleep(1); + ts = time(NULL); + sleep(1); + } + } + + dfs_predicate_t pred = {0}; + dfs_pipeline_t *dpipe = NULL; + + strcpy(pred.dp_name, "%.6%"); + pred.dp_newer = ts; + rc = dfs_pipeline_create(dfs_mt, pred, DFS_FILTER_NAME | DFS_FILTER_NEWER, &dpipe); + assert_int_equal(rc, 0); + + + uint32_t num_split = 0, j; + + rc = dfs_obj_anchor_split(dir1, &num_split, NULL); + assert_int_equal(rc, 0); + print_message("Anchor split in %u parts\n", num_split); + + daos_anchor_t *anchors; + struct dirent *dents = NULL; + daos_obj_id_t *oids = NULL; + daos_size_t *csizes = NULL; + + anchors = malloc(sizeof(daos_anchor_t) * num_split); + dents = malloc (sizeof(struct dirent) * NR_ENUM); + oids = calloc(NR_ENUM, sizeof(daos_obj_id_t)); + csizes = calloc(NR_ENUM, sizeof(daos_size_t)); + + uint64_t nr_total = 0, nr_matched = 0, nr_scanned; + + for (j = 0; j < num_split; j++) { + daos_anchor_t *anchor = &anchors[j]; + uint32_t nr; + + memset(anchor, 0, sizeof(daos_anchor_t)); + + rc = dfs_obj_anchor_set(dir1, j, anchor); + assert_int_equal(rc, 0); + + while (!daos_anchor_is_eof(anchor)) { + nr = NR_ENUM; + rc = dfs_readdir_with_filter(dfs_mt, dir1, dpipe, anchor, &nr, dents, oids, + csizes, &nr_scanned); + assert_int_equal(rc, 0); + + nr_total += nr_scanned; + nr_matched += nr; + + for (i = 0; i < nr; i++) { + print_message("Name: %s\t", dents[i].d_name); + print_message("OID: %"PRIu64".%"PRIu64"\t", oids[i].hi, oids[i].lo); + print_message("CSIZE = %zu\n", csizes[i]); + if (dents[i].d_type == DT_DIR) + print_message("Type: DIR\n"); + else if (dents[i].d_type == DT_REG) + print_message("Type: FILE\n"); + else + assert(0); + } + } + } + + print_message("total entries scanned = %"PRIu64"\n", nr_total); + print_message("total entries matched = %"PRIu64"\n", nr_matched); + + free(dents); + free(anchors); + free(oids); + free(csizes); + rc = dfs_pipeline_destroy(dpipe); + assert_int_equal(rc, 0); + /** close / finalize */ + rc = dfs_release(dir1); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, dirname, true, NULL); + assert_int_equal(rc, 0); +} + static const struct CMUnitTest dfs_unit_tests[] = { { "DFS_UNIT_TEST1: DFS mount / umount", dfs_test_mount, async_disable, test_case_teardown}, @@ -3106,6 +3229,8 @@ static const struct CMUnitTest dfs_unit_tests[] = { dfs_test_relink_root, async_disable, test_case_teardown}, { "DFS_UNIT_TEST26: dfs MWC chunk size fix", dfs_test_fix_chunk_size, async_disable, test_case_teardown}, + { "DFS_UNIT_TEST27: dfs pipeline find", + dfs_test_pipeline_find, async_disable, test_case_teardown}, }; static int From 60004a11f664ea26fdfafebc46a2a2fe3b1ffd81 Mon Sep 17 00:00:00 2001 From: wiliamhuang Date: Sun, 10 Sep 2023 10:55:05 -0500 Subject: [PATCH 33/80] client: bump hadoop-common version from 3.3.3 to 3.3.6 (#13019) minimize possible vulnerabilities in dependent packages per SDL requirement. Signed-off-by: Lei Huang --- src/client/java/hadoop-daos/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/java/hadoop-daos/pom.xml b/src/client/java/hadoop-daos/pom.xml index 34ecdf445ac..7f8dac9f9f6 100644 --- a/src/client/java/hadoop-daos/pom.xml +++ b/src/client/java/hadoop-daos/pom.xml @@ -15,7 +15,7 @@ jar - 3.3.3 + 3.3.6 ${project.basedir}/build ${project.basedir}/install From c75710cc4cfe549c125f7ec06e513b6aad2f6dab Mon Sep 17 00:00:00 2001 From: wangdi Date: Mon, 11 Sep 2023 06:35:08 -0700 Subject: [PATCH 34/80] DAOS-14208 rebuild: several fixes for multiple shards in the same target (#13022) Checking rebuilding shard by comparing the old and new layout map, similar as drain/reintegration, since non-failure shard might be moved to other targets due to co-locate and failure domain factors. Add reclaim phase for rebuild as well. Add test to verify it. A few other fixes due to this. Increase rebuild EC timeout value. Signed-off-by: Di Wang --- src/object/srv_obj_migrate.c | 59 ++++-------- src/placement/jump_map.c | 96 +++++++------------ src/placement/pl_map_common.c | 3 +- src/placement/tests/jump_map_place_obj.c | 2 +- src/rebuild/scan.c | 2 +- src/rebuild/srv.c | 3 +- src/tests/ftest/daos_test/suite.yaml | 2 +- src/tests/ftest/rebuild/basic.py | 2 +- .../ftest/rebuild/container_create_race.py | 4 +- src/tests/ftest/rebuild/with_io.py | 2 +- src/tests/ftest/util/rebuild_test_base.py | 4 +- src/tests/suite/daos_rebuild_ec.c | 44 ++++++++- 12 files changed, 111 insertions(+), 112 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 51280364c2b..c3bc5472b83 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -620,7 +620,6 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, d_iov_t *csum_iov_fetch) { struct migrate_pool_tls *tls; - struct dc_object *obj; int rc = 0; tls = migrate_pool_tls_lookup(mrone->mo_pool_uuid, @@ -634,21 +633,7 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, if (daos_oclass_grp_size(&mrone->mo_oca) > 1) flags |= DIOF_TO_LEADER; - /** - * For EC data migration, let's force it to do degraded fetch, - * make sure reintegration will not fetch from the original - * shard, which might cause parity corruption. - */ - obj = obj_hdl2ptr(oh); - if (iods[0].iod_type != DAOS_IOD_SINGLE && - daos_oclass_is_ec(&mrone->mo_oca) && - is_ec_data_shard(obj, mrone->mo_dkey_hash, mrone->mo_oid.id_shard) && - obj_ec_parity_alive(oh, mrone->mo_dkey_hash, NULL)) - flags |= DIOF_FOR_FORCE_DEGRADE; - - obj_decref(obj); - - rc = dsc_obj_fetch(oh, mrone->mo_epoch, &mrone->mo_dkey, + rc = dsc_obj_fetch(oh, eph, &mrone->mo_dkey, iod_num, iods, sgls, NULL, flags, NULL, csum_iov_fetch); if (rc != 0) @@ -669,7 +654,7 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, csum_iov_fetch->iov_len = 0; csum_iov_fetch->iov_buf = p; - rc = dsc_obj_fetch(oh, mrone->mo_epoch, &mrone->mo_dkey, iod_num, iods, sgls, + rc = dsc_obj_fetch(oh, eph, &mrone->mo_dkey, iod_num, iods, sgls, NULL, flags, NULL, csum_iov_fetch); } @@ -1223,7 +1208,8 @@ migrate_fetch_update_single(struct migrate_one *mrone, daos_handle_t oh, static int __migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, - daos_iod_t *iods, int iod_num, daos_epoch_t update_eph, + daos_iod_t *iods, int iod_num, daos_epoch_t fetch_eph, + daos_epoch_t update_eph, uint32_t flags, struct ds_cont_child *ds_cont) { d_sg_list_t sgls[OBJ_ENUM_UNPACK_MAX_IODS]; @@ -1282,8 +1268,7 @@ __migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, p_csum_iov = &csum_iov; } - rc = mrone_obj_fetch(mrone, oh, sgls, iods, iod_num, mrone->mo_epoch, - flags, p_csum_iov); + rc = mrone_obj_fetch(mrone, oh, sgls, iods, iod_num, fetch_eph, flags, p_csum_iov); if (rc) { D_ERROR("migrate dkey "DF_KEY" failed: "DF_RC"\n", DP_KEY(&mrone->mo_dkey), DP_RC(rc)); @@ -1358,6 +1343,7 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, if (!daos_oclass_is_ec(&mrone->mo_oca)) return __migrate_fetch_update_bulk(mrone, oh, mrone->mo_iods, mrone->mo_iod_num, + mrone->mo_epoch, mrone->mo_min_epoch, DIOF_FOR_MIGRATION, ds_cont); @@ -1370,22 +1356,19 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, * this data shard. */ - if (mrone->mo_iods_num_from_parity > 0) { - daos_epoch_t min_eph = DAOS_EPOCH_MAX; + for (i = 0; i < mrone->mo_iods_num_from_parity; i++) { + for (j = 0; j < mrone->mo_iods_from_parity[i].iod_nr; j++) { + daos_iod_t iod = mrone->mo_iods_from_parity[i]; - for (i = 0; i < mrone->mo_iods_num_from_parity; i++) { - for (j = 0; j < mrone->mo_iods_from_parity[i].iod_nr; j++) - min_eph = min(min_eph, - mrone->mo_iods_update_ephs_from_parity[i][j]); + iod.iod_nr = 1; + iod.iod_recxs = &mrone->mo_iods_from_parity[i].iod_recxs[j]; + rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, + mrone->mo_iods_update_ephs_from_parity[i][j], + mrone->mo_iods_update_ephs_from_parity[i][j], + DIOF_EC_RECOV_FROM_PARITY, ds_cont); + if (rc != 0) + D_GOTO(out, rc); } - - rc = __migrate_fetch_update_bulk(mrone, oh, mrone->mo_iods_from_parity, - mrone->mo_iods_num_from_parity, - min_eph, - DIOF_FOR_MIGRATION | DIOF_EC_RECOV_FROM_PARITY, - ds_cont); - if (rc != 0) - D_GOTO(out, rc); } /* The data, rebuilt from replication, needs to keep the same epoch during rebuild, @@ -1401,6 +1384,7 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, iod.iod_nr = 1; iod.iod_recxs = &mrone->mo_iods[i].iod_recxs[j]; rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, + mrone->mo_epoch, mrone->mo_iods_update_ephs[i][j], DIOF_FOR_MIGRATION, ds_cont); if (rc < 0) { @@ -2343,10 +2327,9 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) migrate_tgt_off = obj_ec_shard_off_by_layout_ver(layout_ver, io->ui_dkey_hash, &arg->oc_attr, shard); unpack_tgt_off = obj_ec_shard_off(obj, io->ui_dkey_hash, io->ui_oid.id_shard); - if ((rc == 1 && + if (rc == 1 && (is_ec_data_shard_by_tgt_off(unpack_tgt_off, &arg->oc_attr) || - (io->ui_oid.id_layout_ver > 0 && io->ui_oid.id_shard != parity_shard))) || - (tls->mpt_opc == RB_OP_EXCLUDE && io->ui_oid.id_shard == shard)) { + (io->ui_oid.id_layout_ver > 0 && io->ui_oid.id_shard != parity_shard))) { D_DEBUG(DB_REBUILD, DF_UOID" ignore shard "DF_KEY"/%u/%d/%u/%d.\n", DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, (int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc); @@ -2579,7 +2562,7 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, /* Only open with RW flag, reintegrating flag will be set, which is needed * during unpack_cb to check if parity shard alive. */ - rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RW, &oh); + rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RO, &oh); if (rc) { D_ERROR("dsc_obj_open failed: "DF_RC"\n", DP_RC(rc)); D_GOTO(out_cont, rc); diff --git a/src/placement/jump_map.c b/src/placement/jump_map.c index bbcc07f2dc1..1b57aff1719 100644 --- a/src/placement/jump_map.c +++ b/src/placement/jump_map.c @@ -734,6 +734,8 @@ get_object_layout(struct pl_jump_map *jmap, uint32_t layout_ver, struct pl_obj_l } else { if (domain != NULL) setbit(dom_cur_grp_real, domain - root); + if (pool_target_down(target)) + layout->ol_shards[k].po_rebuilding = 1; } if (is_extending != NULL && pool_target_is_up_or_drain(target)) @@ -743,7 +745,7 @@ get_object_layout(struct pl_jump_map *jmap, uint32_t layout_ver, struct pl_obj_l if (fail_tgt_cnt > 0) rc = obj_remap_shards(jmap, layout_ver, md, layout, jmop, &remap_list, out_list, - allow_status, md->omd_ver, tgts_used, dom_used, dom_full, + allow_status, allow_version, tgts_used, dom_used, dom_full, fail_tgt_cnt, is_extending, fdom_lvl); out: if (rc) @@ -1025,7 +1027,12 @@ jump_map_obj_place(struct pl_map *map, uint32_t layout_version, struct daos_obj_ return rc; } - allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN; + if (mode & DAOS_OO_RO) + allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN | + PO_COMP_ST_DOWN; + else + allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN; + rc = obj_layout_alloc_and_get(jmap, layout_version, &jmop, md, allow_status, md->omd_ver, &layout, NULL, &is_extending); if (rc != 0) { @@ -1090,66 +1097,16 @@ jump_map_obj_place(struct pl_map *map, uint32_t layout_version, struct daos_obj_ * another target, Or 0 if none need to be rebuilt. */ static int -jump_map_obj_find_rebuild(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, - struct daos_obj_shard_md *shard_md, uint32_t rebuild_ver, - uint32_t *tgt_id, uint32_t *shard_idx, unsigned int array_size) -{ - struct pl_jump_map *jmap; - struct pl_obj_layout *layout; - d_list_t remap_list; - struct jm_obj_placement jmop; - daos_obj_id_t oid; - int rc; - - int idx = 0; - - D_DEBUG(DB_PL, "Finding Rebuild at version: %u\n", rebuild_ver); - - /* Caller should guarantee the pl_map is up-to-date */ - if (pl_map_version(map) < rebuild_ver) { - D_ERROR("pl_map version(%u) < rebuild version(%u)\n", - pl_map_version(map), rebuild_ver); - return -DER_INVAL; - } - - jmap = pl_map2jmap(map); - oid = md->omd_id; - - rc = jm_obj_placement_init(jmap, md, shard_md, &jmop); - if (rc) { - D_ERROR("jm_obj_placement_init failed, rc "DF_RC"\n", DP_RC(rc)); - return rc; - } - - D_INIT_LIST_HEAD(&remap_list); - rc = obj_layout_alloc_and_get(jmap, layout_ver, &jmop, md, PO_COMP_ST_UPIN, - rebuild_ver, &layout, &remap_list, NULL); - if (rc < 0) - D_GOTO(out, rc); - - obj_layout_dump(oid, layout); - rc = remap_list_fill(map, md, shard_md, rebuild_ver, tgt_id, shard_idx, - array_size, &idx, layout, &remap_list, false); - -out: - jm_obj_placement_fini(&jmop); - remap_list_free_all(&remap_list); - if (layout != NULL) - pl_obj_layout_free(layout); - return rc < 0 ? rc : idx; -} - -static int -jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, - struct daos_obj_shard_md *shard_md, uint32_t reint_ver, - uint32_t *tgt_rank, uint32_t *shard_id, unsigned int array_size) +jump_map_obj_find_diff(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, + struct daos_obj_shard_md *shard_md, uint32_t reint_ver, + uint32_t old_status, uint32_t new_status, + uint32_t *tgt_rank, uint32_t *shard_id, unsigned int array_size) { struct pl_jump_map *jmap; struct pl_obj_layout *layout = NULL; struct pl_obj_layout *reint_layout = NULL; d_list_t reint_list; struct jm_obj_placement jop; - uint32_t allow_status; int rc; int idx = 0; @@ -1170,16 +1127,14 @@ jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj return rc; } - allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN; D_INIT_LIST_HEAD(&reint_list); - rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, allow_status, + rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, old_status, reint_ver, &layout, NULL, NULL); if (rc < 0) D_GOTO(out, rc); obj_layout_dump(md->omd_id, layout); - allow_status |= PO_COMP_ST_UP; - rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, allow_status, + rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, new_status, reint_ver, &reint_layout, NULL, NULL); if (rc < 0) D_GOTO(out, rc); @@ -1200,6 +1155,27 @@ jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj return rc < 0 ? rc : idx; } +static int +jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, + struct daos_obj_shard_md *shard_md, uint32_t reint_ver, + uint32_t *tgt_id, uint32_t *shard_id, unsigned int array_size) +{ + return jump_map_obj_find_diff(map, layout_ver, md, shard_md, reint_ver, + PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN, + PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN | PO_COMP_ST_UP, + tgt_id, shard_id, array_size); +} + +static int +jump_map_obj_find_rebuild(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, + struct daos_obj_shard_md *shard_md, uint32_t rebuild_ver, + uint32_t *tgt_id, uint32_t *shard_id, unsigned int array_size) +{ + return jump_map_obj_find_diff(map, layout_ver, md, shard_md, rebuild_ver, + PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN | PO_COMP_ST_DOWN, + PO_COMP_ST_UPIN, tgt_id, shard_id, array_size); +} + /** API for generic placement map functionality */ struct pl_map_ops jump_map_ops = { .o_create = jump_map_create, diff --git a/src/placement/pl_map_common.c b/src/placement/pl_map_common.c index 691d0e1e600..47f620d6635 100644 --- a/src/placement/pl_map_common.c +++ b/src/placement/pl_map_common.c @@ -327,7 +327,8 @@ determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md, * skip this shard. */ if (f_shard->fs_status == PO_COMP_ST_DOWN || - f_shard->fs_status == PO_COMP_ST_DRAIN) + f_shard->fs_status == PO_COMP_ST_DRAIN || + pool_target_down(spare_tgt)) l_shard->po_rebuilding = 1; } else { l_shard->po_shard = -1; diff --git a/src/placement/tests/jump_map_place_obj.c b/src/placement/tests/jump_map_place_obj.c index fdbc08ef07e..5de8ba810c4 100644 --- a/src/placement/tests/jump_map_place_obj.c +++ b/src/placement/tests/jump_map_place_obj.c @@ -1607,7 +1607,7 @@ placement_handles_multiple_states(void **state) */ ctx.ver = ver_after_fail; jtc_scan(&ctx); - assert_int_equal(ctx.rebuild.out_nr, 1); + assert_int_equal(ctx.rebuild.out_nr, 2); /* Complete the rebuild */ ctx.ver = ver_after_reint_complete; /* Restore the version first */ diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c index 8587d7b5d8c..0f8707f5aab 100644 --- a/src/rebuild/scan.c +++ b/src/rebuild/scan.c @@ -646,7 +646,7 @@ rebuild_object(struct rebuild_tgt_pool_tracker *rpt, uuid_t co_uuid, daos_unit_o rc = 0; if (myrank == target->ta_comp.co_rank && mytarget == target->ta_comp.co_index && - rpt->rt_rebuild_op != RB_OP_UPGRADE) { + (shard == oid.id_shard) && rpt->rt_rebuild_op != RB_OP_UPGRADE) { D_DEBUG(DB_REBUILD, DF_UOID" %u/%u already on the target shard\n", DP_UOID(oid), myrank, mytarget); return 0; diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 94d0b2a79bc..e16583436ce 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -1412,7 +1412,8 @@ rebuild_task_complete_schedule(struct rebuild_task *task, struct ds_pool *pool, task->dst_new_layout_version, &task->dst_tgts, retry_opc, 5); } else if (task->dst_rebuild_op == RB_OP_REINT || task->dst_rebuild_op == RB_OP_EXTEND || - task->dst_rebuild_op == RB_OP_UPGRADE) { + task->dst_rebuild_op == RB_OP_UPGRADE || task->dst_rebuild_op == RB_OP_EXCLUDE || + task->dst_rebuild_op == RB_OP_DRAIN) { /* Otherwise schedule reclaim for reintegrate/extend/upgrade. */ rgt->rgt_status.rs_state = DRS_IN_PROGRESS; rc = ds_rebuild_schedule(pool, task->dst_map_ver, rgt->rgt_reclaim_epoch, diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index 20bbfcf6296..a016e1937c9 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -27,7 +27,7 @@ timeouts: test_daos_extend_simple: 3600 test_daos_oid_allocator: 640 test_daos_checksum: 500 - test_daos_rebuild_ec: 4800 + test_daos_rebuild_ec: 6400 test_daos_aggregate_ec: 200 test_daos_degraded_ec: 1900 test_daos_dedup: 220 diff --git a/src/tests/ftest/rebuild/basic.py b/src/tests/ftest/rebuild/basic.py index c6263211190..2d7b0e723c1 100644 --- a/src/tests/ftest/rebuild/basic.py +++ b/src/tests/ftest/rebuild/basic.py @@ -97,7 +97,7 @@ def run_rebuild_test(self, pool_quantity): pi_ndisabled=target_count ) status &= pool.check_rebuild_status( - rs_state=2, rs_obj_nr=rs_obj_nr[index], rs_rec_nr=rs_rec_nr[index], rs_errno=0) + rs_state=2, rs_errno=0) self.assertTrue(status, "Error confirming pool info after rebuild") # Verify the data after rebuild diff --git a/src/tests/ftest/rebuild/container_create_race.py b/src/tests/ftest/rebuild/container_create_race.py index 2607c9ef6f0..6684d89ad53 100644 --- a/src/tests/ftest/rebuild/container_create_race.py +++ b/src/tests/ftest/rebuild/container_create_race.py @@ -152,8 +152,8 @@ def test_rebuild_container_create(self): # Check for pool and rebuild info after rebuild self.log.info("=> (6) Check for pool and rebuild info after rebuild") info_checks["pi_ndisabled"] += targets - rebuild_checks["rs_obj_nr"] = ">0" - rebuild_checks["rs_rec_nr"] = ">0" + rebuild_checks["rs_obj_nr"] = ">=0" + rebuild_checks["rs_rec_nr"] = ">=0" rebuild_checks["rs_state"] = 2 self.assertTrue( self.pool.check_pool_info(**info_checks), diff --git a/src/tests/ftest/rebuild/with_io.py b/src/tests/ftest/rebuild/with_io.py index 229b3fa3ca3..7e7a1e623d4 100644 --- a/src/tests/ftest/rebuild/with_io.py +++ b/src/tests/ftest/rebuild/with_io.py @@ -92,7 +92,7 @@ def test_rebuild_with_io(self): pi_ndisabled=targets, # DAOS-2799 ) status &= self.pool.check_rebuild_status( - rs_state=2, rs_obj_nr=">0", rs_rec_nr=">0", rs_errno=0) + rs_state=2, rs_errno=0) self.assertTrue(status, "Error confirming pool info after rebuild") # Verify the data after rebuild diff --git a/src/tests/ftest/util/rebuild_test_base.py b/src/tests/ftest/util/rebuild_test_base.py index a4f7d845e2e..1435aa1815e 100644 --- a/src/tests/ftest/util/rebuild_test_base.py +++ b/src/tests/ftest/util/rebuild_test_base.py @@ -75,8 +75,8 @@ def update_pool_verify(self): """Update the pool verification expected values.""" self.info_checks["pi_ndisabled"] = ">0" self.rebuild_checks["rs_state"] = 2 - self.rebuild_checks["rs_obj_nr"] = ">0" - self.rebuild_checks["rs_rec_nr"] = ">0" + self.rebuild_checks["rs_obj_nr"] = ">=0" + self.rebuild_checks["rs_rec_nr"] = ">=0" def execute_pool_verify(self, msg=None): """Verify the pool info. diff --git a/src/tests/suite/daos_rebuild_ec.c b/src/tests/suite/daos_rebuild_ec.c index 0863647d845..6669d32490e 100644 --- a/src/tests/suite/daos_rebuild_ec.c +++ b/src/tests/suite/daos_rebuild_ec.c @@ -1111,6 +1111,7 @@ rebuild_ec_multiple_shards(void **state) d_rank_t rank = 2; int i, j, k; char *data; + char *verify_data; uint64_t stripe_size = 4 * CELL_SIZE; daos_recx_t recx; @@ -1118,32 +1119,69 @@ rebuild_ec_multiple_shards(void **state) return; data = (char *)malloc(stripe_size); + verify_data = (char *)malloc(stripe_size); assert_true(data != NULL); + assert_true(verify_data != NULL); + for (i = 0; i < 20; i++) + oids[i] = daos_test_oid_gen(arg->coh, OC_EC_4P2GX, 0, 0, arg->myrank); + for (k = 0; k < 3; k++) { for (i = 0; i < 20; i++) { - oids[i] = daos_test_oid_gen(arg->coh, OC_EC_4P2GX, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(data, 'a' + i, stripe_size); for (j = 5 * k; j < 5 * (k + 1); j++) { req.iod_type = DAOS_IOD_ARRAY; recx.rx_nr = stripe_size; recx.rx_idx = j * stripe_size; - memset(data, 'a', stripe_size); insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, data, stripe_size, &req); } ioreq_fini(&req); } + rebuild_pools_ranks(&arg, 1, &rank, 1, false); daos_cont_status_clear(arg->coh, NULL); + print_message("exclude rank %u\n", rank); rank++; + + for (i = 0; i < 20; i++) { + ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(verify_data, 'a' + i, stripe_size); + for (j = 5 * k; j < 5 * (k + 1); j++) { + req.iod_type = DAOS_IOD_ARRAY; + recx.rx_nr = stripe_size; + recx.rx_idx = j * stripe_size; + memset(data, 0, stripe_size); + lookup_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, + data, stripe_size, &req); + assert_memory_equal(verify_data, data, stripe_size); + } + ioreq_fini(&req); + } } rank = 2; - for (i = 0; i < 3; i++) { + for (k = 0; k < 3; k++) { reintegrate_pools_ranks(&arg, 1, &rank, 1, false); rank++; + + for (i = 0; i < 20; i++) { + ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(verify_data, 'a' + i, stripe_size); + for (j = 5 * k; j < 5 * (k + 1); j++) { + req.iod_type = DAOS_IOD_ARRAY; + recx.rx_nr = stripe_size; + recx.rx_idx = j * stripe_size; + memset(data, 0, stripe_size); + lookup_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, + data, stripe_size, &req); + assert_memory_equal(verify_data, data, stripe_size); + } + ioreq_fini(&req); + } } + free(verify_data); free(data); } From 92ef794e0abd2360b55ed2e713ba8bd97a561b64 Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Mon, 11 Sep 2023 12:37:24 -0400 Subject: [PATCH 35/80] DAOS-14251 control: Allow premounted empty tmpfs (#12968) In the special case where the tmpfs has already been mounted but is empty, don't skip NVMe format and configuration. Enables the use case of running daos_server in a container with an external tmpfs. Signed-off-by: Michael MacDonald --- src/control/cmd/daos_server/start.go | 4 +- src/control/fault/code/codes.go | 1 + src/control/server/config/server.go | 46 +++++++-------- src/control/server/config/server_legacy.go | 9 +++ src/control/server/ctl_storage_rpc.go | 22 +++++++- src/control/server/ctl_storage_rpc_test.go | 56 ++++++++++++++++++- src/control/server/harness.go | 4 +- src/control/server/instance_exec.go | 14 ++--- src/control/server/instance_storage.go | 15 +---- src/control/server/instance_storage_test.go | 13 +---- src/control/server/instance_superblock.go | 4 +- .../server/instance_superblock_test.go | 2 +- src/control/server/instance_test.go | 2 +- src/control/server/server_utils.go | 19 +++++++ src/control/server/server_utils_test.go | 37 ++++++++++-- src/control/server/storage/faults.go | 11 ++++ 16 files changed, 185 insertions(+), 74 deletions(-) diff --git a/src/control/cmd/daos_server/start.go b/src/control/cmd/daos_server/start.go index bb773d02b3a..f2d7b77feda 100644 --- a/src/control/cmd/daos_server/start.go +++ b/src/control/cmd/daos_server/start.go @@ -64,7 +64,9 @@ func (cmd *startCmd) setCLIOverrides() error { if cmd.Modules != nil { cmd.config.WithModules(*cmd.Modules) } - cmd.config.RecreateSuperblocks = cmd.RecreateSuperblocks + if cmd.RecreateSuperblocks { + cmd.Notice("--recreate-superblocks is deprecated and no longer needed to use externally-managed tmpfs") + } for _, srv := range cmd.config.Engines { if cmd.Targets > 0 { diff --git a/src/control/fault/code/codes.go b/src/control/fault/code/codes.go index eff960b6f30..89bfb32bed0 100644 --- a/src/control/fault/code/codes.go +++ b/src/control/fault/code/codes.go @@ -79,6 +79,7 @@ const ( ScmBadRegion ScmInvalidPMem ScmRamdiskLowMem + ScmRamdiskBadSize ScmConfigTierMissing ) diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index a860edae8f9..974d11161f8 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -40,26 +40,25 @@ const ( // See utils/config/daos_server.yml for parameter descriptions. type Server struct { // control-specific - ControlPort int `yaml:"port"` - TransportConfig *security.TransportConfig `yaml:"transport_config"` - Engines []*engine.Config `yaml:"engines"` - BdevExclude []string `yaml:"bdev_exclude,omitempty"` - DisableVFIO bool `yaml:"disable_vfio"` - DisableVMD *bool `yaml:"disable_vmd"` - EnableHotplug bool `yaml:"enable_hotplug"` - NrHugepages int `yaml:"nr_hugepages"` // total for all engines - SystemRamReserved int `yaml:"system_ram_reserved"` // total for all engines - DisableHugepages bool `yaml:"disable_hugepages"` - ControlLogMask common.ControlLogLevel `yaml:"control_log_mask"` - ControlLogFile string `yaml:"control_log_file,omitempty"` - ControlLogJSON bool `yaml:"control_log_json,omitempty"` - HelperLogFile string `yaml:"helper_log_file,omitempty"` - FWHelperLogFile string `yaml:"firmware_helper_log_file,omitempty"` - RecreateSuperblocks bool `yaml:"recreate_superblocks,omitempty"` - FaultPath string `yaml:"fault_path,omitempty"` - TelemetryPort int `yaml:"telemetry_port,omitempty"` - CoreDumpFilter uint8 `yaml:"core_dump_filter,omitempty"` - ClientEnvVars []string `yaml:"client_env_vars,omitempty"` + ControlPort int `yaml:"port"` + TransportConfig *security.TransportConfig `yaml:"transport_config"` + Engines []*engine.Config `yaml:"engines"` + BdevExclude []string `yaml:"bdev_exclude,omitempty"` + DisableVFIO bool `yaml:"disable_vfio"` + DisableVMD *bool `yaml:"disable_vmd"` + EnableHotplug bool `yaml:"enable_hotplug"` + NrHugepages int `yaml:"nr_hugepages"` // total for all engines + SystemRamReserved int `yaml:"system_ram_reserved"` // total for all engines + DisableHugepages bool `yaml:"disable_hugepages"` + ControlLogMask common.ControlLogLevel `yaml:"control_log_mask"` + ControlLogFile string `yaml:"control_log_file,omitempty"` + ControlLogJSON bool `yaml:"control_log_json,omitempty"` + HelperLogFile string `yaml:"helper_log_file,omitempty"` + FWHelperLogFile string `yaml:"firmware_helper_log_file,omitempty"` + FaultPath string `yaml:"fault_path,omitempty"` + TelemetryPort int `yaml:"telemetry_port,omitempty"` + CoreDumpFilter uint8 `yaml:"core_dump_filter,omitempty"` + ClientEnvVars []string `yaml:"client_env_vars,omitempty"` // duplicated in engine.Config SystemName string `yaml:"name"` @@ -87,13 +86,6 @@ func (cfg *Server) WithCoreDumpFilter(filter uint8) *Server { return cfg } -// WithRecreateSuperblocks indicates that a missing superblock should not be treated as -// an error. The server will create new superblocks as necessary. -func (cfg *Server) WithRecreateSuperblocks() *Server { - cfg.RecreateSuperblocks = true - return cfg -} - // WithSystemName sets the system name. func (cfg *Server) WithSystemName(name string) *Server { cfg.SystemName = name diff --git a/src/control/server/config/server_legacy.go b/src/control/server/config/server_legacy.go index b09092a9ca9..fba515a0c54 100644 --- a/src/control/server/config/server_legacy.go +++ b/src/control/server/config/server_legacy.go @@ -18,6 +18,8 @@ type ServerLegacy struct { EnableVMD *bool `yaml:"enable_vmd,omitempty"` // Detect outdated "servers" config, to direct users to change their config file. Servers []*engine.Config `yaml:"servers,omitempty"` + // Detect outdated "recreate_superblocks" config, to direct users to change their config file. + RecreateSuperblocks bool `yaml:"recreate_superblocks,omitempty"` } // WithEnableVMD can be used to set the state of VMD functionality, @@ -27,6 +29,13 @@ func (sl *ServerLegacy) WithEnableVMD(enabled bool) *ServerLegacy { return sl } +// WithRecreateSuperblocks indicates that a missing superblock should not be treated as +// an error. The server will create new superblocks as necessary. +func (sl *ServerLegacy) WithRecreateSuperblocks() *ServerLegacy { + sl.RecreateSuperblocks = true + return sl +} + func updateVMDSetting(legacyCfg ServerLegacy, srvCfg *Server) error { switch { case legacyCfg.EnableVMD == nil: diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index cf0ca43be13..1880eb4c7f9 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -623,6 +623,7 @@ type formatScmReq struct { func formatScm(ctx context.Context, req formatScmReq, resp *ctlpb.StorageFormatResp) (map[int]string, map[int]bool, error) { needFormat := make(map[int]bool) + emptyTmpfs := make(map[int]bool) scmCfgs := make(map[int]*storage.TierConfig) allNeedFormat := true @@ -641,6 +642,15 @@ func formatScm(ctx context.Context, req formatScmReq, resp *ctlpb.StorageFormatR return nil, nil, errors.Wrap(err, "retrieving SCM config") } scmCfgs[idx] = scmCfg + + // If the tmpfs was already mounted but empty, record that fact for later usage. + if scmCfg.Class == storage.ClassRam && !needs { + info, err := ei.GetStorage().GetScmUsage() + if err != nil { + return nil, nil, errors.Wrapf(err, "failed to check SCM usage for instance %d", idx) + } + emptyTmpfs[idx] = info.TotalBytes-info.AvailBytes == 0 + } } if allNeedFormat { @@ -673,7 +683,15 @@ func formatScm(ctx context.Context, req formatScmReq, resp *ctlpb.StorageFormatR }, }) - skipped[idx] = true + // In the normal case, where SCM wasn't already mounted, we want + // to trigger NVMe format. In the case where SCM was mounted and + // wasn't empty, we want to skip NVMe format, as we're using + // mountedness as a proxy for already-formatted. In the special + // case where tmpfs was already mounted but empty, we will treat it + // as an indication that the NVMe format needs to occur. + if !emptyTmpfs[idx] { + skipped[idx] = true + } } for formatting > 0 { @@ -708,7 +726,7 @@ func formatNvme(ctx context.Context, req formatNvmeReq, resp *ctlpb.StorageForma _, hasError := req.errored[idx] _, skipped := req.skipped[idx] if hasError || (skipped && !req.mdFormatted) { - // if scm errored or was already formatted, indicate skipping bdev format + // if scm failed to format or was already formatted, indicate skipping bdev format ret := ei.newCret(storage.NilBdevAddress, nil) ret.State.Info = fmt.Sprintf(msgNvmeFormatSkip, ei.Index()) resp.Crets = append(resp.Crets, ret) diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go index 76ff043af92..ba9f3de1e0a 100644 --- a/src/control/server/ctl_storage_rpc_test.go +++ b/src/control/server/ctl_storage_rpc_test.go @@ -1777,6 +1777,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { for name, tc := range map[string]struct { scmMounted bool // if scmMounted we emulate ext4 fs is mounted + tmpfsEmpty bool // if false, an already-mounted ramdisk is not empty superblockExists bool instancesStarted bool // engine already started sMounts []string @@ -1995,6 +1996,44 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { }, }, }, + "ram already mounted but empty": { + scmMounted: true, + tmpfsEmpty: true, + sMounts: []string{"/mnt/daos"}, + sClass: storage.ClassRam, + sSize: 6, + bClass: storage.ClassNvme, + bDevs: [][]string{{mockNvmeController0.PciAddr}}, + bmbc: &bdev.MockBackendConfig{ + ScanRes: &storage.BdevScanResponse{ + Controllers: storage.NvmeControllers{mockNvmeController0}, + }, + FormatRes: &storage.BdevFormatResponse{ + DeviceResponses: storage.BdevDeviceFormatResponses{ + mockNvmeController0.PciAddr: &storage.BdevDeviceFormatResponse{ + Formatted: true, + }, + }, + }, + }, + expResp: &ctlpb.StorageFormatResp{ + Crets: []*ctlpb.NvmeControllerResult{ + { + PciAddr: mockNvmeController0.PciAddr, + State: new(ctlpb.ResponseState), + }, + }, + Mrets: []*ctlpb.ScmMountResult{ + { + Mntpoint: "/mnt/daos", + State: &ctlpb.ResponseState{ + Status: ctlpb.ResponseStatus_CTL_SUCCESS, + Info: "SCM is already formatted", + }, + }, + }, + }, + }, "ram already mounted and reformat set": { scmMounted: true, reformat: true, @@ -2247,6 +2286,19 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { GetfsStr: getFsRetStr, SourceToTarget: devToMount, } + if tc.sClass == storage.ClassRam { + total := uint64(1234) + avail := total + if !tc.tmpfsEmpty { + avail-- + } + smsc.GetfsUsageResps = []system.GetfsUsageRetval{ + { + Total: total, + Avail: avail, + }, + } + } sysProv := system.NewMockSysProvider(log, smsc) mounter := mount.NewProvider(log, sysProv) scmProv := scm.NewProvider(log, nil, sysProv, mounter) @@ -2301,7 +2353,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { // if the instance is expected to have a valid superblock, create one if tc.superblockExists { - if err := ei.createSuperblock(false); err != nil { + if err := ei.createSuperblock(); err != nil { t.Fatal(err) } } else { @@ -2332,7 +2384,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { go func(ctx context.Context, e *EngineInstance) { select { case <-ctx.Done(): - case awaitCh <- e.awaitStorageReady(ctx, false): + case awaitCh <- e.awaitStorageReady(ctx): } }(ctx, ei.(*EngineInstance)) } diff --git a/src/control/server/harness.go b/src/control/server/harness.go index 88028bc658a..f27febc1dce 100644 --- a/src/control/server/harness.go +++ b/src/control/server/harness.go @@ -62,7 +62,7 @@ type Engine interface { IsReady() bool LocalState() system.MemberState RemoveSuperblock() error - Run(context.Context, bool) + Run(context.Context) SetupRank(context.Context, ranklist.Rank, uint32) error Stop(os.Signal) error OnInstanceExit(...onInstanceExitFn) @@ -260,7 +260,7 @@ func (h *EngineHarness) Start(ctx context.Context, db dbLeader, cfg *config.Serv defer h.started.SetFalse() for _, ei := range h.Instances() { - ei.Run(ctx, cfg.RecreateSuperblocks) + ei.Run(ctx) } h.OnDrpcFailure(newOnDrpcFailureFn(h.log, db)) diff --git a/src/control/server/instance_exec.go b/src/control/server/instance_exec.go index ab22cb4504f..19143782ec3 100644 --- a/src/control/server/instance_exec.go +++ b/src/control/server/instance_exec.go @@ -30,14 +30,14 @@ type EngineRunner interface { GetConfig() *engine.Config } -func (ei *EngineInstance) format(ctx context.Context, recreateSBs bool) error { +func (ei *EngineInstance) format(ctx context.Context) error { idx := ei.Index() ei.log.Debugf("instance %d: checking if storage is formatted", idx) - if err := ei.awaitStorageReady(ctx, recreateSBs); err != nil { + if err := ei.awaitStorageReady(ctx); err != nil { return err } - if err := ei.createSuperblock(recreateSBs); err != nil { + if err := ei.createSuperblock(); err != nil { return err } @@ -158,7 +158,7 @@ func (ei *EngineInstance) handleExit(ctx context.Context, exitPid int, exitErr e // will only return (if no errors are returned during setup) on I/O Engine // process exit (triggered by harness shutdown through context cancellation // or abnormal I/O Engine process termination). -func (ei *EngineInstance) startRunner(parent context.Context, recreateSBs bool) (_ chan *engine.RunnerExitInfo, err error) { +func (ei *EngineInstance) startRunner(parent context.Context) (_ chan *engine.RunnerExitInfo, err error) { ctx, cancel := context.WithCancel(parent) defer func() { if err != nil { @@ -168,7 +168,7 @@ func (ei *EngineInstance) startRunner(parent context.Context, recreateSBs bool) } }() - if err = ei.format(ctx, recreateSBs); err != nil { + if err = ei.format(ctx); err != nil { return } @@ -192,7 +192,7 @@ func (ei *EngineInstance) requestStart(ctx context.Context) { // Run starts the control loop for an EngineInstance. Engine starts are triggered by // calling requestStart() on the instance. -func (ei *EngineInstance) Run(ctx context.Context, recreateSBs bool) { +func (ei *EngineInstance) Run(ctx context.Context) { // Start the instance control loop. go func() { var runnerExitCh engine.RunnerExitChan @@ -212,7 +212,7 @@ func (ei *EngineInstance) Run(ctx context.Context, recreateSBs bool) { continue } - runnerExitCh, err = ei.startRunner(ctx, recreateSBs) + runnerExitCh, err = ei.startRunner(ctx) if err != nil { ei.log.Errorf("runner exited without starting process: %s", err) ei.handleExit(ctx, 0, err) diff --git a/src/control/server/instance_storage.go b/src/control/server/instance_storage.go index 7be5c570b86..2cc4f1f5443 100644 --- a/src/control/server/instance_storage.go +++ b/src/control/server/instance_storage.go @@ -76,7 +76,7 @@ func createPublishFormatRequiredFunc(publish func(*events.RASEvent), hostname st } // awaitStorageReady blocks until instance has storage available and ready to be used. -func (ei *EngineInstance) awaitStorageReady(ctx context.Context, skipMissingSuperblock bool) error { +func (ei *EngineInstance) awaitStorageReady(ctx context.Context) error { idx := ei.Index() if ei.IsStarted() { @@ -117,9 +117,6 @@ func (ei *EngineInstance) awaitStorageReady(ctx context.Context, skipMissingSupe } if !needsMetaFormat && !needsScmFormat { - if skipMissingSuperblock { - return nil - } ei.log.Debugf("instance %d: no SCM format required; checking for superblock", idx) needsSuperblock, err := ei.NeedsSuperblock() if err != nil { @@ -132,16 +129,6 @@ func (ei *EngineInstance) awaitStorageReady(ctx context.Context, skipMissingSupe ei.log.Debugf("instance %d: superblock needed", idx) } - if needsScmFormat { - cfg, err := ei.storage.GetScmConfig() - if err != nil { - return err - } - if skipMissingSuperblock { - return FaultScmUnmanaged(cfg.Scm.MountPoint) - } - } - // by this point we need superblock and possibly scm format formatType := "SCM" if !needsScmFormat { diff --git a/src/control/server/instance_storage_test.go b/src/control/server/instance_storage_test.go index 0a73da3458c..2bbc049bd65 100644 --- a/src/control/server/instance_storage_test.go +++ b/src/control/server/instance_storage_test.go @@ -361,7 +361,6 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { engineStarted bool needsScmFormat bool hasSB bool - skipMissingSB bool engineIndex uint32 expFmtType string expErr error @@ -370,14 +369,6 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { engineStarted: true, expErr: errStarted, }, - "needs format but skip missing superblock": { - needsScmFormat: true, - skipMissingSB: true, - expErr: FaultScmUnmanaged("/mnt/test"), - }, - "no need to format and skip missing superblock": { - skipMissingSB: true, - }, "no need to format and existing superblock": { hasSB: true, }, @@ -432,9 +423,9 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { ctx, cancel := context.WithTimeout(test.Context(t), time.Millisecond*100) defer cancel() - gotErr := engine.awaitStorageReady(ctx, tc.skipMissingSB) + gotErr := engine.awaitStorageReady(ctx) test.CmpErr(t, tc.expErr, gotErr) - if tc.expErr == errStarted || tc.skipMissingSB == true || tc.hasSB == true { + if tc.expErr == errStarted || tc.hasSB == true { return } diff --git a/src/control/server/instance_superblock.go b/src/control/server/instance_superblock.go index 11b7f3849ea..0d6ec613a8b 100644 --- a/src/control/server/instance_superblock.go +++ b/src/control/server/instance_superblock.go @@ -111,7 +111,7 @@ func (ei *EngineInstance) NeedsSuperblock() (bool, error) { } // createSuperblock creates instance superblock if needed. -func (ei *EngineInstance) createSuperblock(recreate bool) error { +func (ei *EngineInstance) createSuperblock() error { if ei.IsStarted() { return errors.Errorf("can't create superblock: instance %d already started", ei.Index()) } @@ -120,7 +120,7 @@ func (ei *EngineInstance) createSuperblock(recreate bool) error { if !needsSuperblock { return nil } - if err != nil && !recreate { + if err != nil { return err } diff --git a/src/control/server/instance_superblock_test.go b/src/control/server/instance_superblock_test.go index 8354f17f320..5c1220cd7dc 100644 --- a/src/control/server/instance_superblock_test.go +++ b/src/control/server/instance_superblock_test.go @@ -56,7 +56,7 @@ func TestServer_Instance_createSuperblock(t *testing.T) { } for _, e := range h.Instances() { - if err := e.(*EngineInstance).createSuperblock(false); err != nil { + if err := e.(*EngineInstance).createSuperblock(); err != nil { t.Fatal(err) } } diff --git a/src/control/server/instance_test.go b/src/control/server/instance_test.go index cb6552fc84a..d6767df30c0 100644 --- a/src/control/server/instance_test.go +++ b/src/control/server/instance_test.go @@ -248,7 +248,7 @@ func (mi *MockInstance) RemoveSuperblock() error { return mi.cfg.RemoveSuperblockErr } -func (mi *MockInstance) Run(_ context.Context, _ bool) {} +func (mi *MockInstance) Run(_ context.Context) {} func (mi *MockInstance) SetupRank(_ context.Context, _ ranklist.Rank, _ uint32) error { return mi.cfg.SetupRankErr diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index f6f02e55731..eca82a964ff 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -540,6 +540,25 @@ func checkEngineTmpfsMem(srv *server, ei *EngineInstance, mi *common.MemInfo) er memRamdisk := uint64(sc.Scm.RamdiskSize) * humanize.GiByte memAvail := uint64(mi.MemAvailableKiB) * humanize.KiByte + // In the event that tmpfs was already mounted, we need to verify that it + // is the correct size and that the memory usage still makes sense. + if isMounted, err := ei.storage.ScmIsMounted(); err == nil && isMounted { + usage, err := ei.storage.GetScmUsage() + if err != nil { + return errors.Wrap(err, "unable to check tmpfs usage") + } + // Ensure that the existing ramdisk is not larger than the calculated + // optimal size, in order to avoid potential OOM situations. + if usage.TotalBytes > memRamdisk { + return storage.FaultRamdiskBadSize(usage.TotalBytes, memRamdisk) + } + // Looks OK, so we can return early and bypass additional checks. + srv.log.Debugf("using existing tmpfs of size %s", humanize.IBytes(usage.TotalBytes)) + return nil + } else if err != nil { + return errors.Wrap(err, "unable to check for mounted tmpfs") + } + if err := checkMemForRamdisk(srv.log, memRamdisk, memAvail); err != nil { return err } diff --git a/src/control/server/server_utils_test.go b/src/control/server/server_utils_test.go index 069fac5e028..1fb0567fadd 100644 --- a/src/control/server/server_utils_test.go +++ b/src/control/server/server_utils_test.go @@ -753,9 +753,11 @@ func TestServer_prepBdevStorage(t *testing.T) { func TestServer_checkEngineTmpfsMem(t *testing.T) { for name, tc := range map[string]struct { - srvCfgExtra func(*config.Server) *config.Server - memAvailGiB int - expErr error + srvCfgExtra func(*config.Server) *config.Server + memAvailGiB int + tmpfsMounted bool + tmpfsSize uint64 + expErr error }{ "pmem tier; skip check": { srvCfgExtra: func(sc *config.Server) *config.Server { @@ -780,6 +782,21 @@ func TestServer_checkEngineTmpfsMem(t *testing.T) { expErr: storage.FaultRamdiskLowMem("Available", 10*humanize.GiByte, 9*humanize.GiByte, 8*humanize.GiByte), }, + "tmpfs already mounted; more than calculated": { + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithEngines(ramEngine(0, 10)) + }, + tmpfsMounted: true, + tmpfsSize: 11, + expErr: errors.New("ramdisk size"), + }, + "tmpfs already mounted; less than calculated": { + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithEngines(ramEngine(0, 10)) + }, + tmpfsMounted: true, + tmpfsSize: 9, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(name) @@ -799,7 +816,19 @@ func TestServer_checkEngineTmpfsMem(t *testing.T) { ec := cfg.Engines[0] runner := engine.NewRunner(log, ec) - provider := storage.MockProvider(log, 0, &ec.Storage, nil, nil, nil, nil) + sysMockCfg := &sysprov.MockSysConfig{ + IsMountedBool: tc.tmpfsMounted, + } + if tc.tmpfsMounted { + sysMockCfg.GetfsUsageResps = []sysprov.GetfsUsageRetval{ + { + Total: tc.tmpfsSize * humanize.GiByte, + }, + } + } + sysMock := sysprov.NewMockSysProvider(log, sysMockCfg) + scmMock := &storage.MockScmProvider{} + provider := storage.MockProvider(log, 0, &ec.Storage, sysMock, scmMock, nil, nil) instance := NewEngineInstance(log, provider, nil, runner) srv, err := newServer(log, cfg, &system.FaultDomain{}) diff --git a/src/control/server/storage/faults.go b/src/control/server/storage/faults.go index fb64eabc51f..cbf029c93f2 100644 --- a/src/control/server/storage/faults.go +++ b/src/control/server/storage/faults.go @@ -78,6 +78,17 @@ func FaultRamdiskLowMem(memType string, confRamdiskSize, memNeed, memHave uint64 "file if reducing the requested amount of RAM is not possible") } +// FaultRamdiskBadSize indicates that the already-mounted ramdisk is out +// of spec with the calculated ramdisk size for the engine. +func FaultRamdiskBadSize(existingSize, calcSize uint64) *fault.Fault { + return storageFault( + code.ScmRamdiskBadSize, + fmt.Sprintf("already-mounted ramdisk size %s is too far from optimal size of %s", + humanize.IBytes(existingSize), humanize.IBytes(calcSize)), + fmt.Sprintf("unmount the ramdisk and allow DAOS to manage it, or remount with size %s", + humanize.IBytes(calcSize))) +} + // FaultConfigRamdiskUnderMinMem indicates that the tmpfs size requested in config is less than // minimum allowed. func FaultConfigRamdiskUnderMinMem(confSize, memRamdiskMin uint64) *fault.Fault { From e3cb161381c502137a64d4cb7b2c83c4f069c5cc Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 11 Sep 2023 21:06:02 +0100 Subject: [PATCH 36/80] DAOS-623 build: Fail the build stage in Jenkins for rpm build failure. (#13014) Some debugging for failures meant that the actual result was being masked. Move the debugging to the "unsuccessful" script and re-instate the fail-on-failure behaviour. Signed-off-by: Ashley Pittman --- ci/rpm/build_unsuccessful.sh | 5 ++++- utils/rpms/packaging/rpm_chrootbuild | 12 ++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/ci/rpm/build_unsuccessful.sh b/ci/rpm/build_unsuccessful.sh index 3c88a98dc56..d1d1f3606e4 100755 --- a/ci/rpm/build_unsuccessful.sh +++ b/ci/rpm/build_unsuccessful.sh @@ -8,7 +8,7 @@ mydir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" ci_envs="$mydir/../parse_ci_envs.sh" if [ -e "${ci_envs}" ]; then # at some point we want to use: shellcheck source=ci/parse_ci_envs.sh - # shellcheck disable=SC1091 + # shellcheck disable=SC1091,SC1090 source "${ci_envs}" fi @@ -25,6 +25,9 @@ if [ -d /var/cache/pbuilder/ ]; then exit 0 fi +rpm -q mock +mock --debug-config + mockroot="/var/lib/mock/$CHROOT_NAME" cat "$mockroot"/result/{root,build}.log 2>/dev/null || true diff --git a/utils/rpms/packaging/rpm_chrootbuild b/utils/rpms/packaging/rpm_chrootbuild index 73ce60c0746..9cf177e4a41 100755 --- a/utils/rpms/packaging/rpm_chrootbuild +++ b/utils/rpms/packaging/rpm_chrootbuild @@ -117,11 +117,7 @@ echo "\"\"\"" >> "$cfg_file" if [ -n "$DISTRO_VERSION" ]; then releasever_opt=("--config-opts=releasever=$DISTRO_VERSION") fi -# shellcheck disable=SC2086 -if ! eval mock --configdir "$mock_config_dir" -r "${CHROOT_NAME}" \ - ${repo_dels[*]} ${repo_adds[*]} --disablerepo=\*-debug* \ - "${releasever_opt[@]}" $MOCK_OPTIONS $RPM_BUILD_OPTIONS "$TARGET"; then - # Debug information for filing bugs on mock - rpm -q mock - mock --debug-config -fi + +# shellcheck disable=SC2086,SC2048,SC2294 +eval mock --configdir "$mock_config_dir" -r "${CHROOT_NAME}" ${repo_dels[*]} ${repo_adds[*]} \ + --disablerepo=\*-debug* "${releasever_opt[@]}" $MOCK_OPTIONS $RPM_BUILD_OPTIONS "$TARGET" From dedd8e14feceb6b835396739fa3bc6555f74e49c Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 12 Sep 2023 11:25:35 -0400 Subject: [PATCH 37/80] DAOS-14254 test: Fix functional test server cleanup (#13040) Resolve an issue where cleanup commands need to be run on a subset of the test hosts running servers. Signed-off-by: Phil Henderson --- src/tests/ftest/util/server_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 7b312457995..514375ccce9 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -331,7 +331,7 @@ def clean_mount(self, hosts, mount, verbose=True, index=None): self.log.debug("Checking for the existence of the %s mount point", mount) command = "test -d {}".format(mount) result = run_remote(self.log, hosts, command, verbose) - if result.passed: + if result.passed_hosts: mounted_hosts = result.passed_hosts # Remove the superblocks From a61940319d8cad8c6475113185327239d180837f Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 12 Sep 2023 20:08:59 +0100 Subject: [PATCH 38/80] DAOS-13392 test: Do not fail build on missing NLT stash. (#12941) Catch the error and continue if unstash fails, the code is written to handle this. Signed-off-by: Ashley Pittman --- Jenkinsfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0f257aa9dfe..ce8e946bc7b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -74,6 +74,11 @@ void job_step_update(def value) { Map nlt_test() { // groovylint-disable-next-line NoJavaUtilDate Date startDate = new Date() + try { + unstash('nltr') + } catch (e) { + print 'Unstash failed, results from NLT stage will not be included' + } sh label: 'Fault injection testing using NLT', script: './ci/docker_nlt.sh --class-name el8.fault-injection fi' List filesList = [] @@ -1104,7 +1109,6 @@ pipeline { sconsBuild(parallel_build: true, scons_args: 'PREFIX=/opt/daos TARGET_TYPE=release BUILD_TYPE=debug', build_deps: 'no')) - unstash('nltr') job_step_update(nlt_test()) recordCoverage(tools: [[parser: 'COBERTURA', pattern:'nltr.xml']], skipPublishingChecks: true, From 693821c079d82ce5f896f4f3a6a0e4776683ff46 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 13 Sep 2023 22:08:28 +0900 Subject: [PATCH 39/80] DAOS-14232 tests: Increase pool/svc.yaml timeout (#13009) Due to the recent increase of SWIM suspicion timeout, increase the test timeout of pool/svc.yaml. Signed-off-by: Li Wei --- src/tests/ftest/pool/svc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ftest/pool/svc.yaml b/src/tests/ftest/pool/svc.yaml index 9c9f47448c0..a2811dae6ca 100644 --- a/src/tests/ftest/pool/svc.yaml +++ b/src/tests/ftest/pool/svc.yaml @@ -12,7 +12,7 @@ server_config: class: ram scm_mount: /mnt/daos system_ram_reserved: 1 -timeout: 200 +timeout: 300 pool: control_method: dmg scm_size: 134217728 From 7062dc4b754f4459f370dc270b9adb6c58fe35c2 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 13 Sep 2023 12:25:31 -0400 Subject: [PATCH 40/80] DAOS-14302 test: Skip DAOS_Drain_Simple w/o fault injection (#13039) Do not run the DAOS_Drain_Simple test when DAOS is built without fault injection. Also skipping the DAOS_Extend_Simple.EXTEND[7,9,11,13,15,16] and DAOS_EC.EC28 test when fault injection is disabled. Signed-off-by: Phil Henderson --- src/tests/suite/daos_drain_simple.c | 2 ++ src/tests/suite/daos_extend_simple.c | 12 ++++++++++++ src/tests/suite/daos_obj_ec.c | 2 ++ 3 files changed, 16 insertions(+) diff --git a/src/tests/suite/daos_drain_simple.c b/src/tests/suite/daos_drain_simple.c index a250d044f15..57a4772d666 100644 --- a/src/tests/suite/daos_drain_simple.c +++ b/src/tests/suite/daos_drain_simple.c @@ -1003,6 +1003,8 @@ run_daos_drain_simple_test(int rank, int size, int *sub_tests, { int rc = 0; + FAULT_INJECTION_REQUIRED(); + par_barrier(PAR_COMM_WORLD); if (sub_tests_size == 0) { sub_tests_size = ARRAY_SIZE(drain_tests); diff --git a/src/tests/suite/daos_extend_simple.c b/src/tests/suite/daos_extend_simple.c index d7e32e99a1f..c0635359527 100644 --- a/src/tests/suite/daos_extend_simple.c +++ b/src/tests/suite/daos_extend_simple.c @@ -499,6 +499,8 @@ dfs_extend_punch_kill(void **state) void dfs_extend_punch_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_PUNCH, extend_cb_internal, false); } @@ -511,6 +513,8 @@ dfs_extend_stat_kill(void **state) void dfs_extend_stat_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_STAT, extend_cb_internal, false); } @@ -523,6 +527,8 @@ dfs_extend_enumerate_kill(void **state) void dfs_extend_enumerate_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_ENUMERATE, extend_cb_internal, false); } @@ -535,6 +541,8 @@ dfs_extend_fetch_kill(void **state) void dfs_extend_fetch_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_FETCH, extend_cb_internal, false); } @@ -547,6 +555,8 @@ dfs_extend_write_kill(void **state) void dfs_extend_write_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_UPDATE, extend_cb_internal, false); } @@ -562,6 +572,8 @@ dfs_extend_fail_retry(void **state) dfs_attr_t attr = {}; int rc; + FAULT_INJECTION_REQUIRED(); + attr.da_props = daos_prop_alloc(1); assert_non_null(attr.da_props); attr.da_props->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_LVL; diff --git a/src/tests/suite/daos_obj_ec.c b/src/tests/suite/daos_obj_ec.c index 609b0ab319f..2eef576d096 100644 --- a/src/tests/suite/daos_obj_ec.c +++ b/src/tests/suite/daos_obj_ec.c @@ -2464,6 +2464,8 @@ ec_three_stripes_nvme_io(void **state) daos_recx_t recx; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 6)) return; From b3edc5eee84e2579d17cb3483472959f820b4b7e Mon Sep 17 00:00:00 2001 From: Michael Hennecke Date: Wed, 13 Sep 2023 18:52:32 +0200 Subject: [PATCH 41/80] DAOS-14309 packaging: remove daos tests RPMs from distro (#13034) Do not ship DAOS test RPMs (add all daos*tests RPMs to .rpmignore) Updated .rpmignore to retire centos7 (no longer supported), and add el9 Signed-off-by: Michael Hennecke --- .rpmignore | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/.rpmignore b/.rpmignore index d0657d759c0..86666be323b 100644 --- a/.rpmignore +++ b/.rpmignore @@ -3,28 +3,22 @@ # but should not be included in the current release # -centos7/daos-client-tests-openmpi*.rpm -centos7/daos-firmware*.rpm -centos7/daos-mofed*.rpm -centos7/daos-serialize*.rpm -centos7/daos-server-tests-openmpi*.rpm -centos7/daos-tests-internal*.rpm -centos7/ucx*.rpm - -el8/daos-client-tests-openmpi*.rpm +el8/daos-*tests*.rpm el8/daos-firmware*.rpm el8/daos-mofed*.rpm el8/daos-serialize*.rpm -el8/daos-server-tests-openmpi*.rpm -el8/daos-tests-internal*.rpm el8/ucx*.rpm -leap15/daos-client-tests-openmpi*.rpm +el9/daos-*tests*.rpm +el9/daos-firmware*.rpm +el9/daos-mofed*.rpm +el9/daos-serialize*.rpm +el9/ucx*.rpm + +leap15/daos-*tests*.rpm leap15/daos-firmware*.rpm leap15/daos-mofed*.rpm leap15/daos-serialize*.rpm -leap15/daos-server-tests-openmpi*.rpm -leap15/daos-tests-internal*.rpm leap15/openucx*.rpm leap15/ucx*.rpm leap15/*protobuf-c*.rpm From 9f5c089852593e9901d1915b14d28ff5c0d0d1ee Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Thu, 14 Sep 2023 14:08:54 -0400 Subject: [PATCH 42/80] DAOS-14332 control: Use gRPC metadata for interop (#13018) The interoperability checking code relies on getting the peer component from the peer certificate. When running in insecure mode, the peer component information is unavailable, and the interoperability check defaults to the most stringent requirements. This patch adds new component/version headers to the grpc client request to allow the server to perform interoperability checks without the peer certificate. Signed-off-by: Michael MacDonald --- src/control/cmd/daos_agent/main.go | 1 + src/control/cmd/dmg/main.go | 1 + src/control/common/proto/consts.go | 14 +++ src/control/lib/control/interceptors.go | 30 +++++- src/control/lib/control/mocks.go | 6 ++ src/control/lib/control/rpc.go | 26 ++++- src/control/security/grpc_authorization.go | 24 +++++ .../security/grpc_authorization_test.go | 56 +++++++++++ src/control/server/interceptors.go | 98 ++++++++++++++----- src/control/server/interceptors_test.go | 98 ++++++++++++++++++- src/control/server/server.go | 1 + src/control/server/server_utils.go | 2 +- 12 files changed, 322 insertions(+), 35 deletions(-) create mode 100644 src/control/common/proto/consts.go diff --git a/src/control/cmd/daos_agent/main.go b/src/control/cmd/daos_agent/main.go index 8f5c135b4d9..f6906a1fc83 100644 --- a/src/control/cmd/daos_agent/main.go +++ b/src/control/cmd/daos_agent/main.go @@ -257,6 +257,7 @@ func main() { ctlInvoker := control.NewClient( control.WithClientLogger(log), + control.WithClientComponent(build.ComponentAgent), ) if err := parseOpts(os.Args[1:], &opts, ctlInvoker, log); err != nil { diff --git a/src/control/cmd/dmg/main.go b/src/control/cmd/dmg/main.go index d9d1eacfa2d..a15c65867a4 100644 --- a/src/control/cmd/dmg/main.go +++ b/src/control/cmd/dmg/main.go @@ -302,6 +302,7 @@ func main() { ctlInvoker := control.NewClient( control.WithClientLogger(log), + control.WithClientComponent(build.ComponentAdmin), ) if err := parseOpts(os.Args[1:], &opts, ctlInvoker, log); err != nil { diff --git a/src/control/common/proto/consts.go b/src/control/common/proto/consts.go new file mode 100644 index 00000000000..039e7fe23f2 --- /dev/null +++ b/src/control/common/proto/consts.go @@ -0,0 +1,14 @@ +// +// (C) Copyright 2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package proto + +const ( + // DaosComponentHeader defines the header name used to convey the component name. + DaosComponentHeader = "x-daos-component" + // DaosVersionHeader defines the header name used to convey the component version. + DaosVersionHeader = "x-daos-version" +) diff --git a/src/control/lib/control/interceptors.go b/src/control/lib/control/interceptors.go index d3c4c3da375..d507845fc46 100644 --- a/src/control/lib/control/interceptors.go +++ b/src/control/lib/control/interceptors.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -12,8 +12,10 @@ import ( "github.com/pkg/errors" "google.golang.org/grpc" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/security" ) @@ -59,8 +61,8 @@ func streamErrorInterceptor() grpc.DialOption { } // unaryErrorInterceptor calls the specified unary RPC and returns any unwrapped errors. -func unaryErrorInterceptor() grpc.DialOption { - return grpc.WithUnaryInterceptor(func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { +func unaryErrorInterceptor() grpc.UnaryClientInterceptor { + return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { err := invoker(ctx, method, req, reply, cc, opts...) if err != nil { st := status.Convert(err) @@ -71,5 +73,25 @@ func unaryErrorInterceptor() grpc.DialOption { return connErrToFault(st, cc.Target()) } return nil - }) + } +} + +// unaryVersionedComponentInterceptor appends the component name and version to the +// outgoing request headers. +func unaryVersionedComponentInterceptor(comp build.Component) grpc.UnaryClientInterceptor { + return func(parent context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + // NB: The caller should specify its component, but as a fallback, we + // can make a decent guess about the calling component based on the method. + if comp == build.ComponentAny { + var err error + if comp, err = security.MethodToComponent(method); err != nil { + return errors.Wrap(err, "unable to determine component from method") + } + } + ctx := metadata.AppendToOutgoingContext(parent, + proto.DaosComponentHeader, comp.String(), + proto.DaosVersionHeader, build.DaosVersion, + ) + return invoker(ctx, method, req, reply, cc, opts...) + } } diff --git a/src/control/lib/control/mocks.go b/src/control/lib/control/mocks.go index 752d597abdb..077937d26a0 100644 --- a/src/control/lib/control/mocks.go +++ b/src/control/lib/control/mocks.go @@ -21,6 +21,7 @@ import ( "google.golang.org/protobuf/reflect/protoreflect" "google.golang.org/protobuf/runtime/protoimpl" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" commonpb "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/common/proto/convert" @@ -50,6 +51,7 @@ type ( // for a MockInvoker. MockInvokerConfig struct { Sys string + Component build.Component UnaryError error UnaryResponse *UnaryResponse UnaryResponseSet []*UnaryResponse @@ -102,6 +104,10 @@ func (mi *MockInvoker) GetSystem() string { return mi.cfg.Sys } +func (mi *MockInvoker) GetComponent() build.Component { + return mi.cfg.Component +} + func (mi *MockInvoker) InvokeUnaryRPC(ctx context.Context, uReq UnaryRequest) (*UnaryResponse, error) { // Allow the test to override the timeouts set by the caller. if mi.cfg.ReqTimeout > 0 { diff --git a/src/control/lib/control/rpc.go b/src/control/lib/control/rpc.go index f5424ebaf91..e00374603c4 100644 --- a/src/control/lib/control/rpc.go +++ b/src/control/lib/control/rpc.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -19,6 +19,7 @@ import ( "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/fault" "github.com/daos-stack/daos/src/control/fault/code" @@ -88,6 +89,7 @@ type ( UnaryInvoker interface { sysGetter debugLogger + GetComponent() build.Component InvokeUnaryRPC(ctx context.Context, req UnaryRequest) (*UnaryResponse, error) InvokeUnaryRPCAsync(ctx context.Context, req UnaryRequest) (HostResponseChan, error) } @@ -122,14 +124,22 @@ type ( // Client implements the Invoker interface and should be provided to // API methods to invoke RPCs. Client struct { - config *Config - log debugLogger + config *Config + log debugLogger + component build.Component } // ClientOption defines the signature for functional Client options. ClientOption func(c *Client) ) +// WithClientComponent sets the client's component. +func WithClientComponent(comp build.Component) ClientOption { + return func(c *Client) { + c.component = comp + } +} + // WithClientLogger sets the client's debugLogger. func WithClientLogger(log debugLogger) ClientOption { return func(c *Client) { @@ -171,6 +181,11 @@ func DefaultClient() *Client { ) } +// GetComponent returns the client's component. +func (c *Client) GetComponent() build.Component { + return c.component +} + // SetConfig sets the client configuration for an // existing Client. func (c *Client) SetConfig(cfg *Config) { @@ -196,7 +211,10 @@ func (c *Client) Debugf(fmtStr string, args ...interface{}) { func (c *Client) dialOptions() ([]grpc.DialOption, error) { opts := []grpc.DialOption{ streamErrorInterceptor(), - unaryErrorInterceptor(), + grpc.WithChainUnaryInterceptor( + unaryErrorInterceptor(), + unaryVersionedComponentInterceptor(c.GetComponent()), + ), grpc.FailOnNonTempDialError(true), } diff --git a/src/control/security/grpc_authorization.go b/src/control/security/grpc_authorization.go index 39a3d67dc2b..de9ef2bddf5 100644 --- a/src/control/security/grpc_authorization.go +++ b/src/control/security/grpc_authorization.go @@ -6,6 +6,12 @@ package security +import ( + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/build" +) + // Component represents the DAOS component being granted authorization. type Component int @@ -77,6 +83,24 @@ var methodAuthorizations = map[string][]Component{ "/RaftTransport/InstallSnapshot": {ComponentServer}, } +func methodToComponent(method string, methodAuthorizations map[string][]Component) (build.Component, error) { + comps, found := methodAuthorizations[method] + if !found || len(comps) == 0 { + return build.ComponentAny, errors.Errorf("method %q does not map to a known authorized component", method) + } else if len(comps) > 1 { + // In this case, the caller must explicitly set the component and cannot + // rely on this helper to resolve it. + return build.ComponentAny, errors.Errorf("method %q maps to multiple authorized components", method) + } + + return build.Component(comps[0].String()), nil +} + +// MethodToComponent resolves a gRPC method string to a build.Component. +func MethodToComponent(method string) (build.Component, error) { + return methodToComponent(method, methodAuthorizations) +} + // HasAccess check if the given component has access to method given in FullMethod func (c Component) HasAccess(FullMethod string) bool { compList, ok := methodAuthorizations[FullMethod] diff --git a/src/control/security/grpc_authorization_test.go b/src/control/security/grpc_authorization_test.go index 25b43873744..ee31dcbd0e2 100644 --- a/src/control/security/grpc_authorization_test.go +++ b/src/control/security/grpc_authorization_test.go @@ -12,6 +12,9 @@ import ( "strings" "testing" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/build" ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" @@ -218,3 +221,56 @@ func TestSecurity_AuthorizedRpcsAreValid(t *testing.T) { }) } } + +func TestSecurity_MethodToCompnent(t *testing.T) { + for name, tc := range map[string]struct { + method string + authMap map[string][]Component + expComp build.Component + expErr error + }{ + "method maps to an unknown component": { + method: "/unknown", + expErr: errors.New("does not map"), + }, + "method maps to 0 components": { + method: "/zero", + authMap: map[string][]Component{ + "/zero": nil, + }, + expErr: errors.New("does not map"), + }, + "method maps to 2 components": { + method: "/two", + authMap: map[string][]Component{ + "/two": {ComponentAdmin, ComponentAgent}, + }, + expErr: errors.New("multiple authorized"), + }, + "method maps to 1 component": { + method: "/one", + authMap: map[string][]Component{ + "/one": {ComponentServer}, + }, + expComp: build.ComponentServer, + }, + } { + t.Run(name, func(t *testing.T) { + var gotComp build.Component + var gotErr error + + if tc.authMap != nil { + gotComp, gotErr = methodToComponent(tc.method, tc.authMap) + } else { + gotComp, gotErr = MethodToComponent(tc.method) + } + + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + test.AssertEqual(t, tc.expComp, gotComp, "unexpected component") + }) + } +} diff --git a/src/control/server/interceptors.go b/src/control/server/interceptors.go index 1762b58d790..1f0aa24efb6 100644 --- a/src/control/server/interceptors.go +++ b/src/control/server/interceptors.go @@ -16,6 +16,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/peer" "google.golang.org/grpc/status" "google.golang.org/protobuf/reflect/protoreflect" @@ -28,6 +29,10 @@ import ( "github.com/daos-stack/daos/src/control/system" ) +var ( + errNoReqMetadata = errors.New("no component/version metadata found in request") +) + func componentFromContext(ctx context.Context) (comp *security.Component, err error) { clientPeer, ok := peer.FromContext(ctx) if !ok { @@ -116,7 +121,25 @@ var selfServerComponent = func() *build.VersionedComponent { return self }() -func checkVersion(ctx context.Context, self *build.VersionedComponent, req interface{}) error { +func compVersionFromHeaders(ctx context.Context) (*build.VersionedComponent, error) { + md, hasMD := metadata.FromIncomingContext(ctx) + if !hasMD { + return nil, errNoReqMetadata + } + compName, hasName := md[proto.DaosComponentHeader] + if !hasName { + return nil, errNoReqMetadata + } + comp := build.Component(compName[0]) + compVersion, hasVersion := md[proto.DaosVersionHeader] + if !hasVersion { + return nil, errNoReqMetadata + } + + return build.NewVersionedComponent(comp, compVersion[0]) +} + +func checkVersion(ctx context.Context, log logging.Logger, self *build.VersionedComponent, req interface{}) error { // If we can't determine our own version, then there's no // checking to be done. if self.Version.IsZero() { @@ -127,33 +150,58 @@ func checkVersion(ctx context.Context, self *build.VersionedComponent, req inter // are most stringent for server/server communication. We have // to set a default because this security component lookup // will fail if certificates are disabled. - buildComponent := build.ComponentServer + otherComponent := build.ComponentServer + otherVersion := build.MustNewVersion("0.0.0") secComponent, err := componentFromContext(ctx) if err == nil { - buildComponent = build.Component(secComponent.String()) + otherComponent = build.Component(secComponent.String()) } isInsecure := status.Code(err) == codes.Unauthenticated - otherVersion := build.MustNewVersion("0.0.0") - if sReq, ok := req.(interface{ GetSys() string }); ok { - comps := strings.Split(sReq.GetSys(), "-") - if len(comps) > 1 { - if ver, err := build.NewVersion(comps[len(comps)-1]); err == nil { - otherVersion = ver - } + fromHeaders, err := compVersionFromHeaders(ctx) + if err != nil && err != errNoReqMetadata { + return errors.Wrap(err, "failed to extract peer component/version from headers") + } + + // Prefer the new header-based component/version mechanism. + // If we are in secure mode, verify that the component presented + // in the header matches the certificate's component. + if fromHeaders != nil { + otherVersion = fromHeaders.Version + if isInsecure { + otherComponent = fromHeaders.Component + } else if otherComponent != fromHeaders.Component { + return status.Errorf(codes.PermissionDenied, + "component mismatch (req: %q != cert: %q)", fromHeaders.Component, otherComponent) } } else { - // If the request message type does not implement GetSys(), then - // there is no version to check. We leave message compatibility - // to lower layers. - return nil - } + // If we did not receive a version via request header, then we need to fall back + // to trying to pick it out of the overloaded system name field. + // + // TODO (DAOS-14336): Remove this once the compatibility window has closed (e.g. for 2.8+). + if sReq, ok := req.(interface{ GetSys() string }); ok { + comps := strings.Split(sReq.GetSys(), "-") + if len(comps) > 1 { + if ver, err := build.NewVersion(comps[len(comps)-1]); err == nil { + otherVersion = ver + } + } + } else { + // If the request message type does not implement GetSys(), then + // there is no version to check. We leave message compatibility + // to lower layers. + return nil + } - if isInsecure && !self.Version.Equals(otherVersion) { - return FaultNoCompatibilityInsecure(self.Version, otherVersion) + // If we're running without certificates and we didn't receive a component + // via headers, then we have to enforce the strictest compatibility requirements, + // i.e. exact same version. + if isInsecure && !self.Version.Equals(otherVersion) { + return FaultNoCompatibilityInsecure(self.Version, otherVersion) + } } - other, err := build.NewVersionedComponent(buildComponent, otherVersion.String()) + other, err := build.NewVersionedComponent(otherComponent, otherVersion.String()) if err != nil { other = &build.VersionedComponent{ Component: "unknown", @@ -163,18 +211,22 @@ func checkVersion(ctx context.Context, self *build.VersionedComponent, req inter } if err := build.CheckCompatibility(self, other); err != nil { + log.Errorf("%s is incompatible with %s", other, self) return FaultIncompatibleComponents(self, other) } + log.Debugf("%s is compatible with %s", other, self) return nil } -func unaryVersionInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { - if err := checkVersion(ctx, selfServerComponent, req); err != nil { - return nil, errors.Wrapf(err, "version check failed for %T", req) - } +func unaryVersionInterceptor(log logging.Logger) grpc.UnaryServerInterceptor { + return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { + if err := checkVersion(ctx, log, selfServerComponent, req); err != nil { + return nil, errors.Wrapf(err, "version check failed for %T", req) + } - return handler(ctx, req) + return handler(ctx, req) + } } func unaryErrorInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { diff --git a/src/control/server/interceptors_test.go b/src/control/server/interceptors_test.go index 2dbfbca30c8..bdde5bde6e8 100644 --- a/src/control/server/interceptors_test.go +++ b/src/control/server/interceptors_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -16,12 +16,15 @@ import ( "github.com/google/go-cmp/cmp" "github.com/pkg/errors" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/peer" "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" + "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/logging" ) type testStatus struct { @@ -141,10 +144,96 @@ func TestServer_checkVersion(t *testing.T) { otherVersion: "2.4.0", ctx: newTestAuthCtx(test.Context(t), "agent"), }, - "non-sys msg bypasses version checks": { + "non-sys msg bypasses version checks in secure mode": { selfVersion: "2.4.0", + ctx: newTestAuthCtx(test.Context(t), "agent"), nonSysMsg: true, }, + "insecure prelease agent with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.3.108", + )), + nonSysMsg: true, + }, + "insecure 2.4.1 agent with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.4.1", + )), + nonSysMsg: true, + }, + "insecure 2.4.0 agent with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.4.0", + )), + nonSysMsg: true, + }, + "insecure 2.6.0 agent with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + }, + "insecure 2.4.1 dmg with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAdmin.String(), + proto.DaosVersionHeader, "2.4.1", + )), + nonSysMsg: true, + }, + "insecure 2.6.0 dmg with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAdmin.String(), + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + expErr: errors.New("not compatible"), + }, + "insecure 2.4.0 server with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentServer.String(), + proto.DaosVersionHeader, "2.4.0", + )), + nonSysMsg: true, + }, + "insecure 2.6.0 server with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentServer.String(), + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + expErr: errors.New("not compatible"), + }, + "invalid component": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, "banana", + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + expErr: errors.New("invalid component"), + }, + "header/certificate component mismatch": { + selfVersion: "2.4.0", + ctx: newTestAuthCtx( + metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentServer.String(), + proto.DaosVersionHeader, "2.6.0"), + ), "agent"), + nonSysMsg: true, + expErr: errors.New("component mismatch"), + }, } { t.Run(name, func(t *testing.T) { ctx := test.Context(t) @@ -169,7 +258,10 @@ func TestServer_checkVersion(t *testing.T) { req = verReq } - gotErr := checkVersion(ctx, selfComp, req) + log, buf := logging.NewTestLogger(name) + test.ShowBufferOnFailure(t, buf) + + gotErr := checkVersion(ctx, log, selfComp, req) test.CmpErr(t, tc.expErr, gotErr) }) } diff --git a/src/control/server/server.go b/src/control/server/server.go index 8e5d921bb57..e4f7b5bdfa1 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -188,6 +188,7 @@ func (srv *server) createServices(ctx context.Context) (err error) { cliCfg := control.DefaultConfig() cliCfg.TransportConfig = srv.cfg.TransportConfig rpcClient := control.NewClient( + control.WithClientComponent(build.ComponentServer), control.WithConfig(cliCfg), control.WithClientLogger(srv.log)) diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index eca82a964ff..6e059aef32e 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -733,7 +733,7 @@ func getGrpcOpts(log logging.Logger, cfgTransport *security.TransportConfig, ldr unaryLoggingInterceptor(log, ldrChk), // must be first in order to properly log errors unaryErrorInterceptor, unaryStatusInterceptor, - unaryVersionInterceptor, + unaryVersionInterceptor(log), } streamInterceptors := []grpc.StreamServerInterceptor{ streamErrorInterceptor, From a03b55f78d227a838691dec4873a3d8ee3f6a99f Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Thu, 14 Sep 2023 19:55:14 +0100 Subject: [PATCH 43/80] DAOS-14223 control: Add --meta-size to dmg pool create (#13000) Add plumbing for new pool create option to specify size of blob on meta-role SSDs. In MD-on-SSD phase II this can be significantly larger than the VOS file in tmpfs in order to to support larger md capacity with limited memory capacity, Limit PR scope by only propagating new parameter to dRPC pool create handler and only supporting the new size when manually specifying SCM and NVMe storage sizes in dmg pool create call. Further work required to: * Propagate meta size throughout create call stack * Expand meta size param support to auto-sizing create options * Consolidate and improve control-plane pool create workflow and logic Signed-off-by: Tom Nabarro --- src/control/cmd/dmg/pool.go | 37 +- src/control/cmd/dmg/pool_test.go | 34 +- src/control/common/proto/mgmt/pool.pb.go | 596 ++++++++++++----------- src/control/common/test/utils.go | 4 + src/control/lib/control/pool.go | 1 + src/control/lib/control/pool_test.go | 56 ++- src/control/server/ctl_storage.go | 3 + src/control/server/ctl_storage_rpc.go | 2 + src/control/server/engine/utils.go | 4 +- src/control/server/engine/utils_test.go | 2 +- src/control/server/mgmt_pool.go | 1 + src/control/server/mgmt_pool_test.go | 47 +- src/mgmt/pool.pb-c.c | 38 +- src/mgmt/pool.pb-c.h | 12 +- src/mgmt/srv_drpc.c | 3 +- src/mgmt/srv_internal.h | 2 +- src/mgmt/srv_pool.c | 9 +- src/mgmt/tests/mocks.c | 8 +- src/proto/mgmt/pool.proto | 2 + 19 files changed, 530 insertions(+), 331 deletions(-) diff --git a/src/control/cmd/dmg/pool.go b/src/control/cmd/dmg/pool.go index 39e1c4179c8..df6936c8a9f 100644 --- a/src/control/cmd/dmg/pool.go +++ b/src/control/cmd/dmg/pool.go @@ -199,6 +199,7 @@ type PoolCreateCmd struct { NumSvcReps uint32 `short:"v" long:"nsvc" description:"Number of pool service replicas"` ScmSize sizeFlag `short:"s" long:"scm-size" description:"Per-engine SCM allocation for DAOS pool (manual)"` NVMeSize sizeFlag `short:"n" long:"nvme-size" description:"Per-engine NVMe allocation for DAOS pool (manual)"` + MetaSize sizeFlag `long:"meta-size" description:"In MD-on-SSD mode specify meta blob size to be used in DAOS pool (manual)"` RankList ui.RankSetFlag `short:"r" long:"ranks" description:"Storage engine unique identifiers (ranks) for DAOS pool"` Args struct { @@ -208,11 +209,18 @@ type PoolCreateCmd struct { // Execute is run when PoolCreateCmd subcommand is activated func (cmd *PoolCreateCmd) Execute(args []string) error { - if cmd.Size.IsSet() && (cmd.ScmSize.IsSet() || cmd.NVMeSize.IsSet()) { - return errIncompatFlags("size", "scm-size", "nvme-size") - } - if !cmd.Size.IsSet() && !cmd.ScmSize.IsSet() { - return errors.New("either --size or --scm-size must be supplied") + if cmd.Size.IsSet() { + if cmd.ScmSize.IsSet() || cmd.NVMeSize.IsSet() { + return errIncompatFlags("size", "scm-size", "nvme-size") + } + if cmd.MetaSize.IsSet() { + // NOTE DAOS-14223: --meta-size value is currently not taken into account + // when storage tier sizes are auto-calculated so only + // support in manual mode. + return errors.New("--meta-size can only be set if --scm-size is set") + } + } else if !cmd.ScmSize.IsSet() { + return errors.New("either --size or --scm-size must be set") } if cmd.Args.PoolLabel != "" { @@ -299,13 +307,22 @@ func (cmd *PoolCreateCmd) Execute(args []string) error { scmBytes := cmd.ScmSize.bytes nvmeBytes := cmd.NVMeSize.bytes + metaBytes := cmd.MetaSize.bytes scmRatio := cmd.updateRequest(req, scmBytes, nvmeBytes) - cmd.Infof("Creating DAOS pool with manual per-engine storage allocation: "+ - "%s SCM, %s NVMe (%0.2f%% ratio)", - humanize.Bytes(scmBytes), - humanize.Bytes(nvmeBytes), - scmRatio*100) + if metaBytes > 0 && metaBytes < scmBytes { + return errors.Errorf("--meta-size (%s) can not be smaller than --scm-size (%s)", + humanize.Bytes(metaBytes), humanize.Bytes(scmBytes)) + } + req.MetaBytes = metaBytes + + msg := fmt.Sprintf("Creating DAOS pool with manual per-engine storage allocation:"+ + " %s SCM, %s NVMe (%0.2f%% ratio)", humanize.Bytes(scmBytes), + humanize.Bytes(nvmeBytes), scmRatio*100) + if metaBytes > 0 { + msg += fmt.Sprintf(" with %s meta-blob-size", humanize.Bytes(metaBytes)) + } + cmd.Info(msg) } resp, err := control.PoolCreate(context.Background(), cmd.ctlInvoker, req) diff --git a/src/control/cmd/dmg/pool_test.go b/src/control/cmd/dmg/pool_test.go index 8f17d7e6e4f..0c53669aa10 100644 --- a/src/control/cmd/dmg/pool_test.go +++ b/src/control/cmd/dmg/pool_test.go @@ -226,7 +226,7 @@ func TestPoolCommands(t *testing.T) { "Create pool with missing size", "pool create label", "", - errors.New("must be supplied"), + errors.New("must be set"), }, { "Create pool with missing label", @@ -276,6 +276,12 @@ func TestPoolCommands(t *testing.T) { "", errors.New("may not be mixed"), }, + { + "Create pool with incompatible arguments (auto with meta-blob)", + fmt.Sprintf("pool create label --size %s --meta-size 32G", testSizeStr), + "", + errors.New("can only be set"), + }, { "Create pool with too-large tier-ratio (auto)", fmt.Sprintf("pool create label --size %s --tier-ratio 200", testSizeStr), @@ -355,7 +361,7 @@ func TestPoolCommands(t *testing.T) { "Create pool with incompatible arguments (-n without -s)", fmt.Sprintf("pool create label --nvme-size %s", testSizeStr), "", - errors.New("must be supplied"), + errors.New("must be set"), }, { "Create pool with minimal arguments", @@ -374,6 +380,30 @@ func TestPoolCommands(t *testing.T) { }, " "), nil, }, + { + "Create pool with manual meta blob size", + fmt.Sprintf("pool create label --scm-size %s --meta-size 1024G", + testSizeStr), + strings.Join([]string{ + printRequest(t, &control.PoolCreateReq{ + User: eUsr.Username + "@", + UserGroup: eGrp.Name + "@", + Ranks: []ranklist.Rank{}, + TierBytes: []uint64{uint64(testSize), 0}, + MetaBytes: humanize.GByte * 1024, + Properties: []*daos.PoolProperty{ + propWithVal("label", "label"), + }, + }), + }, " "), + nil, + }, + { + "Create pool with manual meta blob size smaller than scm", + "pool create label --scm-size 1026G --meta-size 1024G", + "", + errors.New("can not be smaller than"), + }, { "Create pool with manual ranks", fmt.Sprintf("pool create label --size %s --ranks 1,2", testSizeStr), diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index 00841f9fa28..60ae78baa40 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.31.0 // protoc v3.5.0 // source: mgmt/pool.proto @@ -308,13 +308,14 @@ type PoolCreateReq struct { // representing members of the tree in a breadth-first traversal order. // Each domain above rank consists of: (level, id, num children) // Each rank consists of: (rank number) - FaultDomains []uint32 `protobuf:"varint,7,rep,packed,name=faultDomains,proto3" json:"faultDomains,omitempty"` // Fault domain tree, minimal format - Numsvcreps uint32 `protobuf:"varint,8,opt,name=numsvcreps,proto3" json:"numsvcreps,omitempty"` // desired number of pool service replicas - Totalbytes uint64 `protobuf:"varint,9,opt,name=totalbytes,proto3" json:"totalbytes,omitempty"` // Total pool size in bytes (auto config) - Tierratio []float64 `protobuf:"fixed64,10,rep,packed,name=tierratio,proto3" json:"tierratio,omitempty"` // Ratio of storage tiers expressed as % of totalbytes (auto config) - Numranks uint32 `protobuf:"varint,11,opt,name=numranks,proto3" json:"numranks,omitempty"` // Number of target ranks to use (auto config) - Ranks []uint32 `protobuf:"varint,12,rep,packed,name=ranks,proto3" json:"ranks,omitempty"` // target ranks (manual config) - Tierbytes []uint64 `protobuf:"varint,13,rep,packed,name=tierbytes,proto3" json:"tierbytes,omitempty"` // Size in bytes of storage tiers (manual config) + FaultDomains []uint32 `protobuf:"varint,7,rep,packed,name=faultDomains,proto3" json:"faultDomains,omitempty"` // Fault domain tree, minimal format + Numsvcreps uint32 `protobuf:"varint,8,opt,name=numsvcreps,proto3" json:"numsvcreps,omitempty"` // desired number of pool service replicas + Totalbytes uint64 `protobuf:"varint,9,opt,name=totalbytes,proto3" json:"totalbytes,omitempty"` // Total pool size in bytes (auto config) + Tierratio []float64 `protobuf:"fixed64,10,rep,packed,name=tierratio,proto3" json:"tierratio,omitempty"` // Ratio of storage tiers expressed as % of totalbytes (auto config) + Numranks uint32 `protobuf:"varint,11,opt,name=numranks,proto3" json:"numranks,omitempty"` // Number of target ranks to use (auto config) + Ranks []uint32 `protobuf:"varint,12,rep,packed,name=ranks,proto3" json:"ranks,omitempty"` // target ranks (manual config) + Tierbytes []uint64 `protobuf:"varint,13,rep,packed,name=tierbytes,proto3" json:"tierbytes,omitempty"` // Size in bytes of storage tiers (manual config) + MetaBlobSize uint64 `protobuf:"varint,14,opt,name=meta_blob_size,json=metaBlobSize,proto3" json:"meta_blob_size,omitempty"` // Size in bytes of metadata blob on SSD (manual config) } func (x *PoolCreateReq) Reset() { @@ -440,17 +441,25 @@ func (x *PoolCreateReq) GetTierbytes() []uint64 { return nil } +func (x *PoolCreateReq) GetMetaBlobSize() uint64 { + if x != nil { + return x.MetaBlobSize + } + return 0 +} + // PoolCreateResp returns created pool uuid and ranks. type PoolCreateResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code - Leader uint32 `protobuf:"varint,2,opt,name=leader,proto3" json:"leader,omitempty"` // Current service leader - SvcReps []uint32 `protobuf:"varint,3,rep,packed,name=svc_reps,json=svcReps,proto3" json:"svc_reps,omitempty"` // pool service replica ranks - TgtRanks []uint32 `protobuf:"varint,4,rep,packed,name=tgt_ranks,json=tgtRanks,proto3" json:"tgt_ranks,omitempty"` // pool target ranks - TierBytes []uint64 `protobuf:"varint,5,rep,packed,name=tier_bytes,json=tierBytes,proto3" json:"tier_bytes,omitempty"` // storage tiers allocated to pool + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code + Leader uint32 `protobuf:"varint,2,opt,name=leader,proto3" json:"leader,omitempty"` // Current service leader + SvcReps []uint32 `protobuf:"varint,3,rep,packed,name=svc_reps,json=svcReps,proto3" json:"svc_reps,omitempty"` // pool service replica ranks + TgtRanks []uint32 `protobuf:"varint,4,rep,packed,name=tgt_ranks,json=tgtRanks,proto3" json:"tgt_ranks,omitempty"` // pool target ranks + TierBytes []uint64 `protobuf:"varint,5,rep,packed,name=tier_bytes,json=tierBytes,proto3" json:"tier_bytes,omitempty"` // storage tiers allocated to pool + MetaBlobSize uint64 `protobuf:"varint,6,opt,name=meta_blob_size,json=metaBlobSize,proto3" json:"meta_blob_size,omitempty"` // Size in bytes of metadata blob on SSD (manual config) } func (x *PoolCreateResp) Reset() { @@ -520,6 +529,13 @@ func (x *PoolCreateResp) GetTierBytes() []uint64 { return nil } +func (x *PoolCreateResp) GetMetaBlobSize() uint64 { + if x != nil { + return x.MetaBlobSize + } + return 0 +} + // PoolDestroyReq supplies pool identifier and force flag. type PoolDestroyReq struct { state protoimpl.MessageState @@ -2823,7 +2839,7 @@ var File_mgmt_pool_proto protoreflect.FileDescriptor var file_mgmt_pool_proto_rawDesc = []byte{ 0x0a, 0x0f, 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x70, 0x6f, 0x6f, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x12, 0x04, 0x6d, 0x67, 0x6d, 0x74, 0x22, 0xff, 0x02, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, + 0x6f, 0x12, 0x04, 0x6d, 0x67, 0x6d, 0x74, 0x22, 0xa5, 0x03, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, @@ -2847,294 +2863,298 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, - 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x22, 0x97, 0x01, 0x0a, 0x0e, 0x50, 0x6f, - 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x19, 0x0a, 0x08, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, - 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x67, 0x74, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x74, 0x67, 0x74, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, - 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, - 0x74, 0x65, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, 0x74, - 0x72, 0x6f, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, - 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x72, - 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, - 0x72, 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, - 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x22, 0xc0, 0x01, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, - 0x63, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x12, 0x18, - 0x0a, 0x07, 0x64, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x07, 0x64, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x6f, 0x72, 0x63, - 0x65, 0x5f, 0x64, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x0c, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x18, 0x0a, - 0x07, 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x22, 0x3d, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x45, - 0x76, 0x69, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x74, + 0x61, 0x5f, 0x62, 0x6c, 0x6f, 0x62, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x0e, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x74, 0x61, 0x42, 0x6c, 0x6f, 0x62, 0x53, 0x69, 0x7a, 0x65, 0x22, + 0xbd, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, + 0x65, 0x72, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, + 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1b, 0x0a, + 0x09, 0x74, 0x67, 0x74, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, + 0x52, 0x08, 0x74, 0x67, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, + 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, + 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x74, + 0x61, 0x5f, 0x62, 0x6c, 0x6f, 0x62, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x74, 0x61, 0x42, 0x6c, 0x6f, 0x62, 0x53, 0x69, 0x7a, 0x65, 0x22, + 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x72, 0x65, 0x63, 0x75, 0x72, + 0x73, 0x69, 0x76, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x72, 0x65, 0x63, 0x75, + 0x72, 0x73, 0x69, 0x76, 0x65, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, + 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0x81, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, - 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, - 0x1c, 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, - 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, - 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, - 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x7f, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, - 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x22, 0xc0, 0x01, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, 0x63, 0x74, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x12, 0x18, 0x0a, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x09, 0x52, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x64, 0x65, + 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x64, 0x65, 0x73, + 0x74, 0x72, 0x6f, 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x5f, 0x64, 0x65, + 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x66, 0x6f, 0x72, + 0x63, 0x65, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x61, 0x63, + 0x68, 0x69, 0x6e, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x61, 0x63, 0x68, + 0x69, 0x6e, 0x65, 0x22, 0x3d, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, 0x63, 0x74, + 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, + 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x63, 0x6f, 0x75, + 0x6e, 0x74, 0x22, 0x81, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, + 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, - 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x27, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, - 0x61, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, - 0xa6, 0x01, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, + 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, + 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x22, 0x7f, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, - 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, - 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, - 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, - 0x79, 0x74, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, - 0x61, 0x69, 0x6e, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x0c, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x73, 0x22, 0x47, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, - 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, 0x74, 0x65, - 0x73, 0x22, 0xa3, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, - 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, - 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, - 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, - 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, - 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x22, 0x2d, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, - 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, - 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x20, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, - 0x6f, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x83, 0x02, 0x0a, 0x0d, 0x4c, 0x69, 0x73, - 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x2e, 0x0a, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, - 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x05, 0x70, 0x6f, 0x6f, - 0x6c, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x1a, 0x86, 0x01, 0x0a, 0x04, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x12, - 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, - 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, - 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, - 0x65, 0x70, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x62, - 0x75, 0x69, 0x6c, 0x64, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0c, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x22, 0x4c, - 0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, + 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, + 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x22, 0x27, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, + 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0xa6, 0x01, 0x0a, 0x0d, + 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x7b, 0x0a, 0x0c, - 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x37, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, - 0x72, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6f, 0x6e, - 0x74, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x1a, 0x1a, 0x0a, - 0x04, 0x43, 0x6f, 0x6e, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0xb7, 0x01, 0x0a, 0x0c, 0x50, 0x6f, - 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, - 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, - 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x32, 0x0a, 0x15, 0x69, 0x6e, 0x63, - 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, - 0x6b, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x13, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, - 0x65, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x34, 0x0a, - 0x16, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, - 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x69, - 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, - 0x6e, 0x6b, 0x73, 0x22, 0xac, 0x01, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, - 0x73, 0x61, 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, - 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, - 0x72, 0x65, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x03, 0x6d, 0x69, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x61, 0x78, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x03, 0x6d, 0x61, 0x78, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x12, 0x35, 0x0a, 0x0a, 0x6d, - 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, - 0x70, 0x65, 0x22, 0xbb, 0x01, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, - 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x05, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, + 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, + 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, + 0x12, 0x22, 0x0a, 0x0c, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x73, + 0x18, 0x06, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x0c, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, + 0x61, 0x69, 0x6e, 0x73, 0x22, 0x47, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, + 0x6e, 0x64, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1d, + 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, + 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, 0x74, 0x65, 0x73, 0x22, 0xa3, 0x01, + 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, + 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x09, 0x74, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, + 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, + 0x74, 0x65, 0x73, 0x22, 0x2d, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, + 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x22, 0x20, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x22, 0x83, 0x02, 0x0a, 0x0d, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, + 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2e, + 0x0a, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, + 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x65, + 0x73, 0x70, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x21, + 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x1a, 0x86, 0x01, 0x0a, 0x04, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, + 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, + 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, + 0x61, 0x62, 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, + 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, + 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x72, 0x65, + 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x22, 0x4c, 0x0a, 0x0b, 0x4c, 0x69, + 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x7b, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x1d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, - 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, - 0x18, 0x0a, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, 0x05, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, - 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x02, - 0x22, 0xed, 0x04, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, - 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, - 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, - 0x61, 0x62, 0x65, 0x6c, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, - 0x69, 0x76, 0x65, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x0d, 0x52, 0x0d, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, - 0x12, 0x29, 0x0a, 0x10, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x74, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x64, 0x69, 0x73, 0x61, - 0x62, 0x6c, 0x65, 0x64, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x31, 0x0a, 0x07, 0x72, - 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x36, - 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, - 0x65, 0x55, 0x73, 0x61, 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x09, 0x74, 0x69, 0x65, - 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, - 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0c, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x25, 0x0a, - 0x0e, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, - 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x6e, - 0x67, 0x69, 0x6e, 0x65, 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x45, 0x6e, 0x67, 0x69, 0x6e, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x70, 0x6f, 0x6f, - 0x6c, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x0f, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x0d, 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, - 0x72, 0x12, 0x2c, 0x0a, 0x12, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6c, 0x61, 0x79, - 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x10, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x75, - 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, - 0x2c, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x4a, 0x04, 0x08, - 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, - 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, - 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, - 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, - 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, - 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, - 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, + 0x12, 0x37, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x0a, 0x63, + 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x1a, 0x1a, 0x0a, 0x04, 0x43, 0x6f, 0x6e, + 0x74, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0xb7, 0x01, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x32, 0x0a, 0x15, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, + 0x5f, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x13, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x45, 0x6e, 0x61, + 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x34, 0x0a, 0x16, 0x69, 0x6e, 0x63, + 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x69, 0x6e, 0x63, 0x6c, 0x75, + 0x64, 0x65, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, + 0xac, 0x01, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, 0x67, 0x65, + 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, + 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, + 0x10, 0x0a, 0x03, 0x6d, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x6d, 0x69, + 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x61, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, + 0x6d, 0x61, 0x78, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xbb, + 0x01, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x33, 0x0a, 0x05, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x72, + 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x72, 0x65, + 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, + 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x4e, 0x45, + 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x02, 0x22, 0xed, 0x04, 0x0a, + 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, - 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, - 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, + 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, + 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, + 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x61, + 0x63, 0x74, 0x69, 0x76, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x29, 0x0a, 0x10, + 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x31, 0x0a, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, + 0x6c, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x52, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x36, 0x0a, 0x0a, 0x74, 0x69, + 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, + 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x53, 0x74, 0x61, + 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, + 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x65, 0x6e, 0x61, + 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x69, 0x73, + 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0d, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0d, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x6e, 0x67, 0x69, 0x6e, 0x65, + 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x6e, + 0x67, 0x69, 0x6e, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x6c, 0x61, + 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, + 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, + 0x12, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, + 0x76, 0x65, 0x72, 0x18, 0x10, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x75, 0x70, 0x67, 0x72, 0x61, + 0x64, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x05, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, + 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, + 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, + 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, + 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, + 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, + 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, + 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, + 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, + 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, + 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, + 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, + 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, - 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, - 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, - 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, - 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, - 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, - 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, - 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, - 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, - 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, - 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, - 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, - 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, - 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, - 0x70, 0x65, 0x22, 0xda, 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, - 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, - 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, - 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, - 0x74, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, - 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, - 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, - 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, - 0x48, 0x44, 0x44, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, - 0x0a, 0x02, 0x50, 0x4d, 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, - 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, - 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, - 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, - 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, - 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, - 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, - 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, - 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, - 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, - 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, 0x56, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, - 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, - 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, - 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, - 0x67, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, - 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, - 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, - 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, - 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, + 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, + 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, + 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, + 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, + 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, + 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, + 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, + 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, + 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xda, + 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, + 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, + 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x3b, 0x0a, + 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, + 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, + 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, + 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, + 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, + 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, + 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, + 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, + 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, + 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, + 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, + 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, + 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, + 0x10, 0x01, 0x2a, 0x56, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, + 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, + 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, + 0x0c, 0x0a, 0x08, 0x44, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, + 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, + 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, + 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/test/utils.go b/src/control/common/test/utils.go index cd88b5acf25..81c17facecd 100644 --- a/src/control/common/test/utils.go +++ b/src/control/common/test/utils.go @@ -129,6 +129,10 @@ func CmpErrBool(want, got error) bool { func CmpErr(t *testing.T, want, got error) { t.Helper() + if want != nil && want.Error() == "" { + t.Fatal("comparison with empty error will always return true, don't do it") + } + if !CmpErrBool(want, got) { t.Fatalf("unexpected error\n(wanted: %v, got: %v)", want, got) } diff --git a/src/control/lib/control/pool.go b/src/control/lib/control/pool.go index b849099c511..4982a9edc61 100644 --- a/src/control/lib/control/pool.go +++ b/src/control/lib/control/pool.go @@ -248,6 +248,7 @@ type ( // manual params Ranks []ranklist.Rank TierBytes []uint64 + MetaBytes uint64 `json:"meta_blob_size"` } // PoolCreateResp contains the response from a pool create request. diff --git a/src/control/lib/control/pool_test.go b/src/control/lib/control/pool_test.go index 7ee31167317..4d2e85ef913 100644 --- a/src/control/lib/control/pool_test.go +++ b/src/control/lib/control/pool_test.go @@ -17,6 +17,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/pkg/errors" + "github.com/daos-stack/daos/src/control/common/proto/convert" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/fault" @@ -349,13 +350,58 @@ func TestControl_PoolEvict(t *testing.T) { } } +func strVal(s string) daos.PoolPropertyValue { + v := daos.PoolPropertyValue{} + v.SetString(s) + return v +} + +func TestControl_PoolCreateReq_Convert(t *testing.T) { + req := &PoolCreateReq{ + User: "bob", + UserGroup: "work", + NumSvcReps: 2, + TotalBytes: 1, + TierRatio: []float64{0.06, 0.94}, + NumRanks: 3, + Ranks: []ranklist.Rank{1, 2, 3}, + TierBytes: []uint64{humanize.GiByte, 10 * humanize.GiByte}, + MetaBytes: 2 * humanize.GiByte, + Properties: []*daos.PoolProperty{ + { + Name: "label", + Number: daos.PoolPropertyLabel, + Value: strVal("foo"), + }, + }, + } + reqPB := new(mgmtpb.PoolCreateReq) + if err := convert.Types(req, reqPB); err != nil { + t.Fatal(err) + } + expReqPB := &mgmtpb.PoolCreateReq{ + User: "bob", + Usergroup: "work", + Numsvcreps: 2, + Totalbytes: 1, + Tierratio: []float64{0.06, 0.94}, + Numranks: 3, + Ranks: []uint32{1, 2, 3}, + Tierbytes: []uint64{humanize.GiByte, 10 * humanize.GiByte}, + MetaBlobSize: 2 * humanize.GiByte, + Properties: []*mgmtpb.PoolProperty{ + {Number: 1, Value: &mgmtpb.PoolProperty_Strval{"foo"}}, + }, + } + + cmpOpt := cmpopts.IgnoreUnexported(mgmtpb.PoolCreateReq{}, mgmtpb.PoolProperty{}) + if diff := cmp.Diff(expReqPB, reqPB, cmpOpt); diff != "" { + t.Fatalf("Unexpected response (-want, +got):\n%s\n", diff) + } +} + func TestControl_PoolCreate(t *testing.T) { mockExt := auth.NewMockExtWithUser("poolTest", 0, 0) - strVal := func(s string) daos.PoolPropertyValue { - v := daos.PoolPropertyValue{} - v.SetString(s) - return v - } for name, tc := range map[string]struct { mic *MockInvokerConfig diff --git a/src/control/server/ctl_storage.go b/src/control/server/ctl_storage.go index fd75f01263c..f4747f87513 100644 --- a/src/control/server/ctl_storage.go +++ b/src/control/server/ctl_storage.go @@ -212,6 +212,9 @@ func (cs *ControlService) scanAssignedBdevs(ctx context.Context, nsps []*ctl.Scm continue } + // NOTE DAOS-14223: This metadata size calculation won't necessarily match + // the meta blob size on SSD if --meta-size is specified in + // pool create command. md_size = mp.GetUsableBytes() / uint64(ei.GetTargetCount()) engineCfg, err := cs.getEngineCfgFromScmNsp(nsp) diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index 1880eb4c7f9..f0d0e0e58f6 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -308,6 +308,8 @@ func (c *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdju engineTargetNb := uint64(engineCfg.TargetCount) if dev.GetRoleBits()&storage.BdevRoleMeta != 0 { + // TODO DAOS-14223: GetMetaSize() should reflect custom values set through pool + // create --meta-size option. clusterCount := getClusterCount(dev.GetMetaSize(), engineTargetNb, clusterSize) c.log.Tracef("Removing %d Metadata clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s): ", clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) diff --git a/src/control/server/engine/utils.go b/src/control/server/engine/utils.go index 56734985c22..9f3098389a9 100644 --- a/src/control/server/engine/utils.go +++ b/src/control/server/engine/utils.go @@ -141,8 +141,8 @@ var ( "CRT", "RPC", "BULK", "CORPC", "GRP", "LM", "HG", // CaRT subsystems "EXTERNAL", "ST", "IV", "CTL", } - errLogNameAllWithOther = errors.New("'all' identifier cannot be used with any other") - errLogNameAllInMasks = errors.New("'all' identifier cannot be used in log mask level assignments") + errLogNameAllWithOther = errors.New("'all' identifier can not be used with any other log identifier") + errLogNameAllInMasks = errors.New("'all' identifier can not be used in log mask level assignments") ) func isLogLevelValid(name string) bool { diff --git a/src/control/server/engine/utils_test.go b/src/control/server/engine/utils_test.go index 41b765b4c7c..f4c9aa7a49a 100644 --- a/src/control/server/engine/utils_test.go +++ b/src/control/server/engine/utils_test.go @@ -47,7 +47,7 @@ func Test_ValidateLogMasks(t *testing.T) { }, "single level; single assignment; illegal use of all": { masks: "ERR,all=DBUG", - expErr: errors.New(""), + expErr: errors.New("identifier can not be used"), }, "single level; single assignment; bad level": { masks: "ERR,mgmt=DEG", diff --git a/src/control/server/mgmt_pool.go b/src/control/server/mgmt_pool.go index 9189a7ef116..6ae1e1287da 100644 --- a/src/control/server/mgmt_pool.go +++ b/src/control/server/mgmt_pool.go @@ -299,6 +299,7 @@ func (svc *mgmtSvc) poolCreate(parent context.Context, req *mgmtpb.PoolCreateReq resp.SvcReps = ranklist.RanksToUint32(ps.Replicas) resp.TgtRanks = ranklist.RanksToUint32(ps.Storage.CreationRanks()) resp.TierBytes = ps.Storage.PerRankTierStorage + // TODO DAOS-14223: Store Meta-Blob-Size in sysdb. return resp, nil } diff --git a/src/control/server/mgmt_pool_test.go b/src/control/server/mgmt_pool_test.go index e484b7be19b..004dc19a08a 100644 --- a/src/control/server/mgmt_pool_test.go +++ b/src/control/server/mgmt_pool_test.go @@ -314,6 +314,7 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { targetCount int memberCount int req *mgmtpb.PoolCreateReq + drpcRet *mgmtpb.PoolCreateResp expResp *mgmtpb.PoolCreateResp expErr error }{ @@ -388,11 +389,34 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { Tierbytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, Properties: testPoolLabelProp(), }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + TgtRanks: []uint32{0, 1}, + }, expResp: &mgmtpb.PoolCreateResp{ TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, TgtRanks: []uint32{0, 1}, }, }, + "successful creation with meta size": { + targetCount: 8, + req: &mgmtpb.PoolCreateReq{ + Uuid: test.MockUUID(1), + Tierbytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + MetaBlobSize: 2 * humanize.GiByte, + Properties: testPoolLabelProp(), + }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + MetaBlobSize: 2 * humanize.GiByte, + TgtRanks: []uint32{0, 1}, + }, + expResp: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + MetaBlobSize: 2 * humanize.GiByte, + TgtRanks: []uint32{0, 1}, + }, + }, "successful creation minimum size": { targetCount: 8, req: &mgmtpb.PoolCreateReq{ @@ -400,6 +424,10 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { Tierbytes: []uint64{engine.ScmMinBytesPerTarget * 8, engine.NvmeMinBytesPerTarget * 8}, Properties: testPoolLabelProp(), }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{engine.ScmMinBytesPerTarget * 8, engine.NvmeMinBytesPerTarget * 8}, + TgtRanks: []uint32{0, 1}, + }, expResp: &mgmtpb.PoolCreateResp{ TierBytes: []uint64{engine.ScmMinBytesPerTarget * 8, engine.NvmeMinBytesPerTarget * 8}, TgtRanks: []uint32{0, 1}, @@ -412,9 +440,19 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { Totalbytes: 100 * humanize.GiByte, Properties: testPoolLabelProp(), }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{ + (100 * humanize.GiByte * DefaultPoolScmRatio) / 2, + (100 * humanize.GiByte * DefaultPoolNvmeRatio) / 2, + }, + TgtRanks: []uint32{0, 1}, + }, expResp: &mgmtpb.PoolCreateResp{ - TierBytes: []uint64{((100 * humanize.GiByte) * DefaultPoolScmRatio) / 2, (100 * humanize.GiByte * DefaultPoolNvmeRatio) / 2}, - TgtRanks: []uint32{0, 1}, + TierBytes: []uint64{ + (100 * humanize.GiByte * DefaultPoolScmRatio) / 2, + (100 * humanize.GiByte * DefaultPoolNvmeRatio) / 2, + }, + TgtRanks: []uint32{0, 1}, }, }, "failed creation invalid ranks": { @@ -512,14 +550,15 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { numMembers = 2 } for i := 0; i < numMembers; i++ { - if _, err := tc.mgmtSvc.membership.Add(system.MockMember(t, uint32(i), system.MemberStateJoined)); err != nil { + mm := system.MockMember(t, uint32(i), system.MemberStateJoined) + if _, err := tc.mgmtSvc.membership.Add(mm); err != nil { t.Fatal(err) } } if tc.setupMockDrpc == nil { tc.setupMockDrpc = func(svc *mgmtSvc, err error) { - setupMockDrpcClient(tc.mgmtSvc, tc.expResp, tc.expErr) + setupMockDrpcClient(tc.mgmtSvc, tc.drpcRet, tc.expErr) } } tc.setupMockDrpc(tc.mgmtSvc, tc.expErr) diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index 912218b58bd..6b740050c98 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -1504,7 +1504,7 @@ void mgmt__pool_query_target_resp__free_unpacked assert(message->base.descriptor == &mgmt__pool_query_target_resp__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } -static const ProtobufCFieldDescriptor mgmt__pool_create_req__field_descriptors[13] = +static const ProtobufCFieldDescriptor mgmt__pool_create_req__field_descriptors[14] = { { "uuid", @@ -1662,10 +1662,23 @@ static const ProtobufCFieldDescriptor mgmt__pool_create_req__field_descriptors[1 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "meta_blob_size", + 14, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolCreateReq, meta_blob_size), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_create_req__field_indices_by_name[] = { 4, /* field[4] = acl */ 6, /* field[6] = faultDomains */ + 13, /* field[13] = meta_blob_size */ 10, /* field[10] = numranks */ 7, /* field[7] = numsvcreps */ 5, /* field[5] = properties */ @@ -1681,7 +1694,7 @@ static const unsigned mgmt__pool_create_req__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__pool_create_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 13 } + { 0, 14 } }; const ProtobufCMessageDescriptor mgmt__pool_create_req__descriptor = { @@ -1691,14 +1704,14 @@ const ProtobufCMessageDescriptor mgmt__pool_create_req__descriptor = "Mgmt__PoolCreateReq", "mgmt", sizeof(Mgmt__PoolCreateReq), - 13, + 14, mgmt__pool_create_req__field_descriptors, mgmt__pool_create_req__field_indices_by_name, 1, mgmt__pool_create_req__number_ranges, (ProtobufCMessageInit) mgmt__pool_create_req__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_create_resp__field_descriptors[5] = +static const ProtobufCFieldDescriptor mgmt__pool_create_resp__field_descriptors[6] = { { "status", @@ -1760,9 +1773,22 @@ static const ProtobufCFieldDescriptor mgmt__pool_create_resp__field_descriptors[ 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "meta_blob_size", + 6, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolCreateResp, meta_blob_size), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_create_resp__field_indices_by_name[] = { 1, /* field[1] = leader */ + 5, /* field[5] = meta_blob_size */ 0, /* field[0] = status */ 2, /* field[2] = svc_reps */ 3, /* field[3] = tgt_ranks */ @@ -1771,7 +1797,7 @@ static const unsigned mgmt__pool_create_resp__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__pool_create_resp__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 5 } + { 0, 6 } }; const ProtobufCMessageDescriptor mgmt__pool_create_resp__descriptor = { @@ -1781,7 +1807,7 @@ const ProtobufCMessageDescriptor mgmt__pool_create_resp__descriptor = "Mgmt__PoolCreateResp", "mgmt", sizeof(Mgmt__PoolCreateResp), - 5, + 6, mgmt__pool_create_resp__field_descriptors, mgmt__pool_create_resp__field_indices_by_name, 1, mgmt__pool_create_resp__number_ranges, diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index 1fc8a18ae76..9357267326f 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -209,10 +209,14 @@ struct _Mgmt__PoolCreateReq */ size_t n_tierbytes; uint64_t *tierbytes; + /* + * Size in bytes of metadata blob on SSD (manual config) + */ + uint64_t meta_blob_size; }; #define MGMT__POOL_CREATE_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_create_req__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL, 0, 0, 0,NULL, 0, 0,NULL, 0,NULL } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL, 0, 0, 0,NULL, 0, 0,NULL, 0,NULL, 0 } /* @@ -244,10 +248,14 @@ struct _Mgmt__PoolCreateResp */ size_t n_tier_bytes; uint64_t *tier_bytes; + /* + * Size in bytes of metadata blob on SSD (manual config) + */ + uint64_t meta_blob_size; }; #define MGMT__POOL_CREATE_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_create_resp__descriptor) \ - , 0, 0, 0,NULL, 0,NULL, 0,NULL } + , 0, 0, 0,NULL, 0,NULL, 0,NULL, 0 } /* diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index a5d089f2c11..9ef6054beda 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -497,7 +497,8 @@ ds_mgmt_drpc_pool_create(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) /* Ranks to allocate targets (in) & svc for pool replicas (out). */ rc = ds_mgmt_create_pool(pool_uuid, req->sys, "pmem", targets, req->tierbytes[DAOS_MEDIA_SCM], req->tierbytes[DAOS_MEDIA_NVME], - prop, &svc, req->n_faultdomains, req->faultdomains); + prop, &svc, req->n_faultdomains, req->faultdomains, + req->meta_blob_size); if (rc != 0) { D_ERROR("failed to create pool: "DF_RC"\n", DP_RC(rc)); goto out; diff --git a/src/mgmt/srv_internal.h b/src/mgmt/srv_internal.h index 9cfc0b5a0ab..6bd142022ab 100644 --- a/src/mgmt/srv_internal.h +++ b/src/mgmt/srv_internal.h @@ -66,7 +66,7 @@ int ds_mgmt_group_update_handler(struct mgmt_grp_up_in *in); /** srv_pool.c */ int ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, d_rank_list_t *targets, size_t scm_size, size_t nvme_size, daos_prop_t *prop, d_rank_list_t **svcp, - int domains_nr, uint32_t *domains); + int domains_nr, uint32_t *domains, size_t meta_blob_size); int ds_mgmt_destroy_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks); int ds_mgmt_evict_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks, uuid_t *handles, size_t n_handles, uint32_t destroy, uint32_t force_destroy, diff --git a/src/mgmt/srv_pool.c b/src/mgmt/srv_pool.c index e5f94333f1c..5f99c9ef406 100644 --- a/src/mgmt/srv_pool.c +++ b/src/mgmt/srv_pool.c @@ -168,16 +168,17 @@ ds_mgmt_pool_svc_create(uuid_t pool_uuid, int ntargets, const char *group, d_ran } int -ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, - d_rank_list_t *targets, size_t scm_size, size_t nvme_size, - daos_prop_t *prop, d_rank_list_t **svcp, - int domains_nr, uint32_t *domains) +ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, d_rank_list_t *targets, + size_t scm_size, size_t nvme_size, daos_prop_t *prop, d_rank_list_t **svcp, + int domains_nr, uint32_t *domains, size_t meta_blob_size) { d_rank_list_t *pg_ranks = NULL; d_rank_list_t *pg_targets = NULL; int rc; int rc_cleanup; + D_DEBUG(DB_MGMT, DF_UUID ": meta blob size %ld", DP_UUID(pool_uuid), meta_blob_size); + /* Sanity check targets versus cart's current primary group members. * If any targets not in PG, flag error before MGMT_TGT_ corpcs fail. */ diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index 4b104f19195..80f95891c8d 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -509,11 +509,9 @@ ds_mgmt_group_update_handler(struct mgmt_grp_up_in *in) } int -ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, - d_rank_list_t *targets, size_t scm_size, - size_t nvme_size, daos_prop_t *prop, - d_rank_list_t **svcp, int nr_domains, - uint32_t *domains) +ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, d_rank_list_t *targets, + size_t scm_size, size_t nvme_size, daos_prop_t *prop, d_rank_list_t **svcp, + int domains_nr, uint32_t *domains, size_t meta_blob_size) { return 0; } diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index 1dd9842db51..51b55b1254f 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -32,6 +32,7 @@ message PoolCreateReq { uint32 numranks = 11; // Number of target ranks to use (auto config) repeated uint32 ranks = 12; // target ranks (manual config) repeated uint64 tierbytes = 13; // Size in bytes of storage tiers (manual config) + uint64 meta_blob_size = 14; // Size in bytes of metadata blob on SSD (manual config) } // PoolCreateResp returns created pool uuid and ranks. @@ -41,6 +42,7 @@ message PoolCreateResp { repeated uint32 svc_reps = 3; // pool service replica ranks repeated uint32 tgt_ranks = 4; // pool target ranks repeated uint64 tier_bytes = 5; // storage tiers allocated to pool + uint64 meta_blob_size = 6; // Size in bytes of metadata blob on SSD (manual config) } // PoolDestroyReq supplies pool identifier and force flag. From 35f269b9c38ffb833f82968f8f85a1d4d3e9e118 Mon Sep 17 00:00:00 2001 From: wangdi Date: Thu, 14 Sep 2023 23:52:10 -0700 Subject: [PATCH 44/80] DAOS-14338 object: create migrate container in all cases (#13041) Since exclude/drain might change the shard location as well, so let's create the container for all casing during migration. Checking pool stopping before creating container during migration. Signed-off-by: Di Wang --- src/object/srv_obj_migrate.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index c3bc5472b83..fa7579ca516 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -1484,21 +1484,31 @@ migrate_punch(struct migrate_pool_tls *tls, struct migrate_one *mrone, static int migrate_get_cont_child(struct migrate_pool_tls *tls, uuid_t cont_uuid, - struct ds_cont_child **cont_p) + struct ds_cont_child **cont_p, bool create) { struct ds_cont_child *cont_child = NULL; int rc; *cont_p = NULL; - if (tls->mpt_opc == RB_OP_EXTEND || tls->mpt_opc == RB_OP_REINT) { - /* For extend and reintegration, it may need create the container */ + if (tls->mpt_pool->spc_pool->sp_stopping) { + D_DEBUG(DB_REBUILD, DF_UUID "pool is being destroyed.\n", + DP_UUID(tls->mpt_pool_uuid)); + return 0; + } + + if (create) { + /* Since the shard might be moved different location for any pool operation, + * so it may need create the container in all cases. + */ rc = ds_cont_child_open_create(tls->mpt_pool_uuid, cont_uuid, &cont_child); if (rc != 0) { - if (rc == -DER_SHUTDOWN) { + if (rc == -DER_SHUTDOWN || (cont_child && cont_child->sc_stopping)) { D_DEBUG(DB_REBUILD, DF_UUID "container is being destroyed\n", DP_UUID(cont_uuid)); rc = 0; } + if (cont_child) + ds_cont_child_put(cont_child); return rc; } } else { @@ -1532,7 +1542,7 @@ migrate_dkey(struct migrate_pool_tls *tls, struct migrate_one *mrone, int rc; D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); - rc = migrate_get_cont_child(tls, mrone->mo_cont_uuid, &cont); + rc = migrate_get_cont_child(tls, mrone->mo_cont_uuid, &cont, true); if (rc || cont == NULL) D_GOTO(cont_put, rc); @@ -2441,7 +2451,7 @@ migrate_obj_punch_one(void *data) tls, DP_UUID(tls->mpt_pool_uuid), arg->version, arg->punched_epoch, DP_UOID(arg->oid)); - rc = migrate_get_cont_child(tls, arg->cont_uuid, &cont); + rc = migrate_get_cont_child(tls, arg->cont_uuid, &cont, true); if (rc != 0 || cont == NULL) D_GOTO(put, rc); @@ -2943,7 +2953,7 @@ migrate_obj_ult(void *data) struct ds_cont_child *cont_child = NULL; /* check again to see if the container is being destroyed. */ - migrate_get_cont_child(tls, arg->cont_uuid, &cont_child); + migrate_get_cont_child(tls, arg->cont_uuid, &cont_child, false); if (cont_child == NULL || cont_child->sc_stopping) rc = 0; From 8647532007bd3a932692f85738e00962e30a45fd Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 15 Sep 2023 08:26:09 +0100 Subject: [PATCH 45/80] DAOS-6854 dfuse: Use dfs_ostatx for aync getattr calls. (#12832) This allows stat calls to happen with both rpcs in parallel reducing wall-clock time for the query, and moves to a async interface so that it can be completed in a callback and not require a dedicated thread for the duration of the operaton. Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 25 +++++++++------- src/client/dfuse/ops/fgetattr.c | 51 +++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index c0a6453354d..4c4d88bbbdb 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -323,17 +323,22 @@ struct dfuse_inode_ops { }; struct dfuse_event { - fuse_req_t de_req; /**< The fuse request handle */ - daos_event_t de_ev; - size_t de_len; /**< The size returned by daos */ - d_iov_t de_iov; - d_sg_list_t de_sgl; - d_list_t de_list; - struct dfuse_eq *de_eqt; - struct dfuse_obj_hdl *de_oh; - off_t de_req_position; /**< The file position requested by fuse */ - size_t de_req_len; + fuse_req_t de_req; /**< The fuse request handle */ + daos_event_t de_ev; + size_t de_len; /**< The size returned by daos */ + d_iov_t de_iov; + d_sg_list_t de_sgl; + d_list_t de_list; + struct dfuse_eq *de_eqt; + union { + struct dfuse_obj_hdl *de_oh; + struct dfuse_inode_entry *de_ie; + }; + off_t de_req_position; /**< The file position requested by fuse */ + size_t de_req_len; void (*de_complete_cb)(struct dfuse_event *ev); + + struct stat de_attr; }; extern struct dfuse_inode_ops dfuse_dfs_ops; diff --git a/src/client/dfuse/ops/fgetattr.c b/src/client/dfuse/ops/fgetattr.c index 481c9fc56e6..6fdee73515c 100644 --- a/src/client/dfuse/ops/fgetattr.c +++ b/src/client/dfuse/ops/fgetattr.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -7,11 +7,32 @@ #include "dfuse_common.h" #include "dfuse.h" +static void +dfuse_cb_getattr_cb(struct dfuse_event *ev) +{ + if (ev->de_ev.ev_error != 0) { + DFUSE_REPLY_ERR_RAW(ev->de_ie, ev->de_req, ev->de_ev.ev_error); + D_GOTO(release, 0); + } + + ev->de_attr.st_ino = ev->de_ie->ie_stat.st_ino; + + ev->de_ie->ie_stat = ev->de_attr; + + DFUSE_REPLY_ATTR(ev->de_ie, ev->de_req, &ev->de_attr); +release: + daos_event_fini(&ev->de_ev); + D_FREE(ev); +} + void dfuse_cb_getattr(fuse_req_t req, struct dfuse_inode_entry *ie) { - struct stat attr = {}; - int rc; + struct dfuse_info *dfuse_info = fuse_req_userdata(req); + struct dfuse_event *ev; + uint64_t eqt_idx; + struct dfuse_eq *eqt; + int rc; if (ie->ie_unlinked) { DFUSE_TRA_DEBUG(ie, "File is unlinked, returning most recent data"); @@ -19,17 +40,29 @@ dfuse_cb_getattr(fuse_req_t req, struct dfuse_inode_entry *ie) return; } - rc = dfs_ostat(ie->ie_dfs->dfs_ns, ie->ie_obj, &attr); - if (rc != 0) - D_GOTO(err, rc); + eqt_idx = atomic_fetch_add_relaxed(&dfuse_info->di_eqt_idx, 1); + eqt = &dfuse_info->di_eqt[eqt_idx % dfuse_info->di_eq_count]; + D_ALLOC_PTR(ev); + if (ev == NULL) + D_GOTO(err, rc = ENOMEM); - attr.st_ino = ie->ie_stat.st_ino; + ev->de_req = req; + ev->de_complete_cb = dfuse_cb_getattr_cb; + ev->de_ie = ie; - ie->ie_stat = attr; + rc = daos_event_init(&ev->de_ev, eqt->de_eq, NULL); + if (rc != -DER_SUCCESS) + D_GOTO(ev, rc = daos_der2errno(rc)); + + rc = dfs_ostatx(ie->ie_dfs->dfs_ns, ie->ie_obj, &ev->de_attr, &ev->de_ev); + if (rc != 0) + D_GOTO(ev, rc); - DFUSE_REPLY_ATTR(ie, req, &attr); + sem_post(&eqt->de_sem); return; +ev: + D_FREE(ev); err: DFUSE_REPLY_ERR_RAW(ie, req, rc); } From f479b9e45f8a26ac7a1ed0f1bf57443749804592 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 15 Sep 2023 11:22:50 -0400 Subject: [PATCH 46/80] DAOS-14302 test: Fix skipping DAOS_Drain_Simple tests (#13051) Signed-off-by: Phil Henderson --- src/tests/suite/daos_drain_simple.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/tests/suite/daos_drain_simple.c b/src/tests/suite/daos_drain_simple.c index 57a4772d666..65bcd069de9 100644 --- a/src/tests/suite/daos_drain_simple.c +++ b/src/tests/suite/daos_drain_simple.c @@ -38,6 +38,8 @@ drain_dkeys(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -103,6 +105,8 @@ cont_open_in_drain(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -157,6 +161,8 @@ drain_akeys(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -208,6 +214,8 @@ drain_indexes(void **state) int i; int j; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -267,6 +275,7 @@ drain_snap_update_keys(void **state) char buf[256]; int buf_len = 256; + FAULT_INJECTION_REQUIRED(); if (!test_runable(arg, 4)) return; @@ -343,6 +352,8 @@ drain_snap_punch_keys(void **state) int buf_len = 256; uint32_t number; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -429,6 +440,8 @@ drain_multiple(void **state) int j; int k; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -495,6 +508,8 @@ drain_large_rec(void **state) char buffer[5000]; char v_buffer[5000]; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -543,6 +558,8 @@ drain_objects(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -569,6 +586,8 @@ drain_fail_and_retry_objects(void **state) daos_obj_id_t oids[OBJ_NR]; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -598,6 +617,8 @@ drain_then_exclude(void **state) test_arg_t *arg = *state; daos_obj_id_t oid; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -849,6 +870,8 @@ dfs_extend_drain_common(void **state, int opc, uint32_t objclass) dfs_attr_t attr = {}; int rc; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -1003,8 +1026,6 @@ run_daos_drain_simple_test(int rank, int size, int *sub_tests, { int rc = 0; - FAULT_INJECTION_REQUIRED(); - par_barrier(PAR_COMM_WORLD); if (sub_tests_size == 0) { sub_tests_size = ARRAY_SIZE(drain_tests); From 7172771be81b0999b995c51eb69a9e44a59cb24a Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sat, 16 Sep 2023 10:06:04 -0500 Subject: [PATCH 47/80] DAOS-14110 il: create and do io against an EQ in the IL (#13007) - Use an EQ in the IL for dfs read and write for better performance and support of fork mode in fio() - add a new env variable D_IL_MAX_EQ to make the number of EQs / network contexts configurable. 64 is the default and the maximum allowed. Asking for more than that will not be an error though and the library will print a warning an set that to 64. - in case of multiple threads, each thread will create a new EQ till the max are created, then other threads will just round robin use of the EQs that are created. - When fork is called, the main thread eq is replaced with a new one from the child process (with at_fork child handler) to prevent sharing of any internal network context resources. - Update the IL VM build test to use less EQs than the default to prevent OOM since the VMs are constrained on memory. Signed-off-by: Mohamad Chaarawi Co-authored-by: Fan Yong --- SConstruct | 2 +- src/client/api/event.c | 12 ++-- src/client/dfs/dfs.c | 8 ++- src/client/dfuse/il/int_posix.c | 99 +++++++++++++++++++++++++++-- src/client/dfuse/il/int_read.c | 47 ++++++++++++-- src/client/dfuse/il/int_write.c | 42 ++++++++++-- src/client/dfuse/il/ioil.h | 4 +- src/common/misc.c | 16 +++-- src/include/daos/dtx.h | 1 + src/tests/ftest/dfuse/daos_build.py | 3 +- 10 files changed, 204 insertions(+), 30 deletions(-) diff --git a/SConstruct b/SConstruct index abc04dc8b64..8e59cc56c4f 100644 --- a/SConstruct +++ b/SConstruct @@ -363,7 +363,7 @@ MINIMAL_ENV = ('HOME', 'TERM', 'SSH_AUTH_SOCK', 'http_proxy', 'https_proxy', 'PK # Environment variables that are also kept when LD_PRELOAD is set. PRELOAD_ENV = ('LD_PRELOAD', 'D_LOG_FILE', 'DAOS_AGENT_DRPC_DIR', 'D_LOG_MASK', 'DD_MASK', - 'DD_SUBSYS') + 'DD_SUBSYS', 'D_IL_MAX_EQ') def scons(): diff --git a/src/client/api/event.c b/src/client/api/event.c index 85dd514da17..e6996fb6155 100644 --- a/src/client/api/event.c +++ b/src/client/api/event.c @@ -484,8 +484,13 @@ daos_event_complete(struct daos_event *ev, int rc) } if (evx->evx_status == DAOS_EVS_READY || evx->evx_status == DAOS_EVS_COMPLETED || - evx->evx_status == DAOS_EVS_ABORTED) + evx->evx_status == DAOS_EVS_ABORTED) { + if (evx->is_errno) + ev->ev_error = daos_der2errno(rc); + else + ev->ev_error = rc; goto out; + } D_ASSERT(evx->evx_status == DAOS_EVS_RUNNING); @@ -830,7 +835,7 @@ daos_eq_destroy(daos_handle_t eqh, int flags) eqx = daos_eq_lookup(eqh); if (eqx == NULL) { - D_ERROR("eqh nonexist.\n"); + D_ERROR("daos_eq_lookup() failed: "DF_RC"\n", DP_RC(-DER_NONEXIST)); return -DER_NONEXIST; } @@ -862,8 +867,7 @@ daos_eq_destroy(daos_handle_t eqh, int flags) if (eqx->eqx_ctx != NULL) { rc = crt_context_flush(eqx->eqx_ctx, 0); if (rc != 0) { - D_ERROR("failed to flush client context: "DF_RC"\n", - DP_RC(rc)); + D_ERROR("failed to flush client context: "DF_RC"\n", DP_RC(rc)); return rc; } } diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 3313c188c64..e502a95ff83 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -4489,7 +4489,7 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, D_ALLOC_PTR(params); if (params == NULL) - D_GOTO(err_task, rc = ENOMEM); + D_GOTO(err_task, rc = -DER_NOMEM); params->read_size = read_size; @@ -4513,10 +4513,12 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, daos_task_set_priv(task, params); rc = tse_task_register_cbs(task, NULL, NULL, 0, read_cb, NULL, 0); if (rc) - D_GOTO(err_params, rc = daos_der2errno(rc)); + D_GOTO(err_params, rc); rc = dc_task_schedule(task, true); - return daos_der2errno(rc); + if (rc) + D_GOTO(err_task, rc); + return 0; err_params: D_FREE(params); diff --git a/src/client/dfuse/il/int_posix.c b/src/client/dfuse/il/int_posix.c index b845c85c05c..93a91cd6215 100644 --- a/src/client/dfuse/il/int_posix.c +++ b/src/client/dfuse/il/int_posix.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -33,6 +33,10 @@ FOREACH_INTERCEPT(IOIL_FORWARD_DECL) +static __thread daos_handle_t ioil_eqh; + +#define IOIL_MAX_EQ 64 + struct ioil_pool { daos_handle_t iop_poh; uuid_t iop_uuid; @@ -43,13 +47,17 @@ struct ioil_pool { struct ioil_global { pthread_mutex_t iog_lock; d_list_t iog_pools_head; + daos_handle_t iog_main_eqh; + daos_handle_t iog_eqs[IOIL_MAX_EQ]; + uint16_t iog_eq_count_max; + uint16_t iog_eq_count; + uint16_t iog_eq_idx; pid_t iog_init_tid; bool iog_initialized; bool iog_no_daos; bool iog_daos_init; bool iog_show_summary; /**< Should a summary be shown at teardown */ - unsigned iog_report_count; /**< Number of operations that should be logged */ ATOMIC uint64_t iog_file_count; /**< Number of file opens intercepted */ @@ -277,6 +285,7 @@ ioil_init(void) struct rlimit rlimit; int rc; uint64_t report_count = 0; + uint64_t eq_count = 0; pthread_once(&init_links_flag, init_links); @@ -319,6 +328,18 @@ ioil_init(void) if (rc) return; + rc = d_getenv_uint64_t("D_IL_MAX_EQ", &eq_count); + if (rc != -DER_NONEXIST) { + if (eq_count > IOIL_MAX_EQ) { + DFUSE_LOG_WARNING("Max EQ count (%"PRIu64") should not exceed: %d", + eq_count, IOIL_MAX_EQ); + eq_count = IOIL_MAX_EQ; + } + ioil_iog.iog_eq_count_max = (uint16_t)eq_count; + } else { + ioil_iog.iog_eq_count_max = IOIL_MAX_EQ; + } + ioil_iog.iog_initialized = true; } @@ -377,12 +398,55 @@ ioil_fini(void) ioil_shrink_pool(pool); } - if (ioil_iog.iog_daos_init) + if (ioil_iog.iog_daos_init) { + int i; + + /** destroy EQs created by threads */ + for (i = 0; i < ioil_iog.iog_eq_count; i++) + daos_eq_destroy(ioil_iog.iog_eqs[i], 0); + /** destroy main thread eq */ + if (daos_handle_is_valid(ioil_iog.iog_main_eqh)) + daos_eq_destroy(ioil_iog.iog_main_eqh, 0); daos_fini(); + } ioil_iog.iog_daos_init = false; daos_debug_fini(); } +int +ioil_get_eqh(daos_handle_t *eqh) +{ + int rc; + + if (daos_handle_is_valid(ioil_eqh)) { + *eqh = ioil_eqh; + return 0; + } + + /** No EQ support requested */ + if (ioil_iog.iog_eq_count_max == 0) + return -1; + + rc = pthread_mutex_lock(&ioil_iog.iog_lock); + /** create a new EQ if the EQ pool is not full; otherwise round robin EQ use from pool */ + if (ioil_iog.iog_eq_count >= ioil_iog.iog_eq_count_max) { + ioil_eqh = ioil_iog.iog_eqs[ioil_iog.iog_eq_idx ++]; + if (ioil_iog.iog_eq_idx == ioil_iog.iog_eq_count_max) + ioil_iog.iog_eq_idx = 0; + } else { + rc = daos_eq_create(&ioil_eqh); + if (rc) { + pthread_mutex_unlock(&ioil_iog.iog_lock); + return -1; + } + ioil_iog.iog_eqs[ioil_iog.iog_eq_count] = ioil_eqh; + ioil_iog.iog_eq_count ++; + } + pthread_mutex_unlock(&ioil_iog.iog_lock); + *eqh = ioil_eqh; + return 0; +} + /* Get the object handle for the file itself */ static int fetch_dfs_obj_handle(int fd, struct fd_entry *entry) @@ -729,6 +793,20 @@ call_daos_init(int fd) return rcb; } +static void +child_hdlr(void) +{ + int rc; + + daos_dti_reset(); + ioil_eqh = DAOS_HDL_INVAL; + rc = daos_eq_create(&ioil_eqh); + if (rc) + DFUSE_LOG_WARNING("daos_eq_create() failed: "DF_RC, DP_RC(rc)); + else + ioil_iog.iog_main_eqh = ioil_eqh; +} + /* Returns true on success */ static bool check_ioctl_on_open(int fd, struct fd_entry *entry, int flags) @@ -764,10 +842,23 @@ check_ioctl_on_open(int fd, struct fd_entry *entry, int flags) rc = pthread_mutex_lock(&ioil_iog.iog_lock); D_ASSERT(rc == 0); - if (!ioil_iog.iog_daos_init) + if (!ioil_iog.iog_daos_init) { if (!call_daos_init(fd)) goto err; + if (ioil_iog.iog_eq_count_max) { + rc = daos_eq_create(&ioil_eqh); + if (rc) { + DFUSE_LOG_WARNING("daos_eq_create() failed: "DF_RC, DP_RC(rc)); + D_GOTO(err, rc = daos_der2errno(rc)); + } + ioil_iog.iog_main_eqh = ioil_eqh; + + rc = pthread_atfork(NULL, NULL, &child_hdlr); + D_ASSERT(rc == 0); + } + } + d_list_for_each_entry(pool, &ioil_iog.iog_pools_head, iop_pools) { if (uuid_compare(pool->iop_uuid, il_reply.fir_pool) != 0) continue; diff --git a/src/client/dfuse/il/int_read.c b/src/client/dfuse/il/int_read.c index 6b5ee1fd7b5..497e39273ab 100644 --- a/src/client/dfuse/il/int_read.c +++ b/src/client/dfuse/il/int_read.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,17 +15,52 @@ static ssize_t read_bulk(char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) { - daos_size_t read_size = 0; - d_iov_t iov = {}; - d_sg_list_t sgl = {}; - int rc; + daos_size_t read_size = 0; + d_iov_t iov = {}; + d_sg_list_t sgl = {}; + daos_event_t ev; + daos_handle_t eqh; + int rc; DFUSE_TRA_DEBUG(entry->fd_dfsoh, "%#zx-%#zx", position, position + len - 1); sgl.sg_nr = 1; d_iov_set(&iov, (void *)buff, len); sgl.sg_iovs = &iov; - rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &read_size, NULL); + + rc = ioil_get_eqh(&eqh); + if (rc == 0) { + bool flag = false; + + rc = daos_event_init(&ev, eqh, NULL); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_init() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, + &read_size, &ev); + if (rc) + D_GOTO(out, rc); + + while (1) { + rc = daos_event_test(&ev, DAOS_EQ_NOWAIT, &flag); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_test() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + if (flag) + break; + sched_yield(); + } + rc = ev.ev_error; + } else { + rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &read_size, + NULL); + } +out: if (rc) { DFUSE_TRA_ERROR(entry->fd_dfsoh, "dfs_read() failed: %d (%s)", rc, strerror(rc)); *errcode = rc; diff --git a/src/client/dfuse/il/int_write.c b/src/client/dfuse/il/int_write.c index fc602f0a1c3..abbb573638d 100644 --- a/src/client/dfuse/il/int_write.c +++ b/src/client/dfuse/il/int_write.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,9 +15,11 @@ ssize_t ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) { - d_iov_t iov = {}; - d_sg_list_t sgl = {}; - int rc; + d_iov_t iov = {}; + d_sg_list_t sgl = {}; + daos_event_t ev; + daos_handle_t eqh; + int rc; DFUSE_TRA_DEBUG(entry->fd_dfsoh, "%#zx-%#zx", position, position + len - 1); @@ -25,7 +27,37 @@ ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *en d_iov_set(&iov, (void *)buff, len); sgl.sg_iovs = &iov; - rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, NULL); + rc = ioil_get_eqh(&eqh); + if (rc == 0) { + bool flag = false; + + rc = daos_event_init(&ev, eqh, NULL); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_init() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &ev); + if (rc) + D_GOTO(out, rc); + + while (1) { + rc = daos_event_test(&ev, DAOS_EQ_NOWAIT, &flag); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_test() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + if (flag) + break; + sched_yield(); + } + rc = ev.ev_error; + } else { + rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, NULL); + } +out: if (rc) { DFUSE_TRA_ERROR(entry->fd_dfsoh, "dfs_write() failed: %d (%s)", rc, strerror(rc)); *errcode = rc; diff --git a/src/client/dfuse/il/ioil.h b/src/client/dfuse/il/ioil.h index 8c4a7205e4e..b9581b3bd77 100644 --- a/src/client/dfuse/il/ioil.h +++ b/src/client/dfuse/il/ioil.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -53,5 +53,7 @@ ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *en ssize_t ioil_do_pwritev(const struct iovec *iov, int count, off_t position, struct fd_entry *entry, int *errcode); +int +ioil_get_eqh(daos_handle_t *eqh); #endif /* __IOIL_H__ */ diff --git a/src/common/misc.c b/src/common/misc.c index bc902538e1a..eeb5c4522ef 100644 --- a/src/common/misc.c +++ b/src/common/misc.c @@ -705,6 +705,8 @@ daos_crt_init_opt_get(bool server, int ctx_nr) return &daos_crt_init_opt; } +static __thread uuid_t dti_uuid; + void daos_dti_gen_unique(struct dtx_id *dti) { @@ -719,19 +721,23 @@ daos_dti_gen_unique(struct dtx_id *dti) void daos_dti_gen(struct dtx_id *dti, bool zero) { - static __thread uuid_t uuid; - if (zero) { memset(dti, 0, sizeof(*dti)); } else { - if (uuid_is_null(uuid)) - uuid_generate(uuid); + if (uuid_is_null(dti_uuid)) + uuid_generate(dti_uuid); - uuid_copy(dti->dti_uuid, uuid); + uuid_copy(dti->dti_uuid, dti_uuid); dti->dti_hlc = d_hlc_get(); } } +void +daos_dti_reset(void) +{ + memset(dti_uuid, 0, sizeof(dti_uuid)); +} + /** * daos_recx_alloc/_free to provide same log facility for recx's alloc and free * for iom->iom_recxs' usage for example. diff --git a/src/include/daos/dtx.h b/src/include/daos/dtx.h index 272c041dabf..14b2337ea0f 100644 --- a/src/include/daos/dtx.h +++ b/src/include/daos/dtx.h @@ -174,6 +174,7 @@ struct dtx_id { void daos_dti_gen_unique(struct dtx_id *dti); void daos_dti_gen(struct dtx_id *dti, bool zero); +void daos_dti_reset(void); static inline void daos_dti_copy(struct dtx_id *des, const struct dtx_id *src) diff --git a/src/tests/ftest/dfuse/daos_build.py b/src/tests/ftest/dfuse/daos_build.py index acb2b8cb6a5..5edd0b328df 100644 --- a/src/tests/ftest/dfuse/daos_build.py +++ b/src/tests/ftest/dfuse/daos_build.py @@ -135,9 +135,11 @@ def run_build_test(self, cache_mode, intercept=False, run_on_vms=False): # Note that run_on_vms does not tell ftest where to run, this should be set according to # the test tags so the test can run with appropriate settings. + remote_env = {} if run_on_vms: dfuse_namespace = dfuse_namespace = "/run/dfuse_vm/*" build_jobs = 6 * 2 + remote_env['D_IL_MAX_EQ'] = '6' intercept_jobs = build_jobs if intercept: @@ -189,7 +191,6 @@ def run_build_test(self, cache_mode, intercept=False, run_on_vms=False): mount_dir = self.dfuse.mount_dir.value build_dir = os.path.join(mount_dir, 'daos') - remote_env = {} remote_env['PATH'] = '{}:$PATH'.format(os.path.join(mount_dir, 'venv', 'bin')) remote_env['VIRTUAL_ENV'] = os.path.join(mount_dir, 'venv') remote_env['COVFILE'] = os.environ['COVFILE'] From f6b1ec8bea82c291ca61a2d1924ab46c403e91a9 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Mon, 18 Sep 2023 16:39:20 -0400 Subject: [PATCH 48/80] DAOS-14385 test: Fix TestPool.pool_query_delay use (#13062) When using the TestPool.pool_query_delay BasicParameter access its value property not the object. Signed-off-by: Phil Henderson --- src/tests/ftest/util/test_utils_pool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 333cc2c93b2..0826ea7d864 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -729,10 +729,10 @@ def query(self, show_enabled=False, show_disabled=False): "test yaml parameter.".format( self.pool_query_timeout.value, self.identifier)) from error - if self.pool_query_delay: + if self.pool_query_delay.value: self.log.info( "Waiting %s seconds before issuing next dmg pool query", - self.pool_query_delay) + self.pool_query_delay.value) sleep(self.pool_query_delay.value) @fail_on(CommandFailure) From 0cf35608bfbde2d2f3704e061031f89432bd7618 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 19 Sep 2023 14:33:32 +0800 Subject: [PATCH 49/80] DAOS-13562 vea: add bitmap to optimize small allocation (#12678) Currently VEA use extents to manage free space, for small extent(<64MiB) it is managed by a size tree, while large extents sorted by heap tree. To reduce metadata overhead, VEA can use bitmap to manage free space for small allocation. In this case, each free fragment can be represented by a few bits or bytes, instead of a B+Tree leaf node. vea_stree showed it reduced heap_bytes by ~28% if all allocation could be allocated from bitmap. For old pools, bitmap feature will not be enabled unless "upgrade" command is triggered. Required-githooks: true Signed-off-by: Wang Shilong --- src/common/ad_mem.c | 99 +---- src/common/misc.c | 105 ++++++ src/include/daos/common.h | 21 ++ src/include/daos_srv/vea.h | 45 ++- src/vea/tests/vea_stress.c | 25 +- src/vea/tests/vea_ut.c | 428 +++++++++++++++++++--- src/vea/vea_alloc.c | 562 +++++++++++++++++++++++----- src/vea/vea_api.c | 399 ++++++++++++++------ src/vea/vea_free.c | 669 ++++++++++++++++++++++++++++------ src/vea/vea_hint.c | 4 +- src/vea/vea_init.c | 82 +++-- src/vea/vea_internal.h | 207 +++++++++-- src/vea/vea_util.c | 267 +++++++++++--- src/vos/tests/vts_aggregate.c | 9 +- src/vos/vos_pool.c | 5 + 15 files changed, 2301 insertions(+), 626 deletions(-) diff --git a/src/common/ad_mem.c b/src/common/ad_mem.c index c3454a4cd2d..675906d466e 100644 --- a/src/common/ad_mem.c +++ b/src/common/ad_mem.c @@ -34,7 +34,6 @@ static int arena_tx_publish(struct ad_arena *arena, struct ad_tx *tx); static void arena_dump(struct ad_arena *arena); static inline int group_unit_avail(const struct ad_group_df *gd); static inline int group_weight(const struct ad_group_df *gd); -static int find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits); #define ASSERT_DUMP_ARENA(cond, arena) \ do { \ @@ -129,21 +128,6 @@ static struct ad_group_spec grp_specs_large[] = { static struct ad_blob *dummy_blob; -static inline void -setbits64(uint64_t *bmap, int at, int bits) -{ - setbit_range((uint8_t *)bmap, at, at + bits - 1); -} - -static inline void -clrbits64(uint64_t *bmap, int at, int bits) -{ - clrbit_range((uint8_t *)bmap, at, at + bits - 1); -} - -#define setbit64(bm, at) setbit(((uint8_t *)bm), at) -#define clrbit64(bm, at) clrbit(((uint8_t *)bm), at) -#define isset64(bm, at) isset(((uint8_t *)bm), at) static int group_u2b(int unit, int unit_nr) @@ -1007,7 +991,7 @@ arena_find(struct ad_blob *blob, uint32_t *arena_id, struct ad_arena_df **ad_p) if (id == AD_ARENA_ANY) { int bits = 1; - id = find_bits(bd->bd_bmap, blob->bb_bmap_rsv, blob_bmap_size(blob), 1, &bits); + id = daos_find_bits(bd->bd_bmap, blob->bb_bmap_rsv, blob_bmap_size(blob), 1, &bits); if (id < 0) { rc = -DER_NOSPACE; D_ERROR("Blob %s is full, cannot create more arena, "DF_RC"\n", @@ -1867,83 +1851,6 @@ arena_remove_grp(struct ad_arena *arena, struct ad_group *group) arena->ar_grp_nr--; } -/** Find requested number of unused bits (neither set it @used or @reserved */ -static int -find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits) -{ - int nr_saved; - int at_saved; - int nr; - int at; - int i; - int j; - - nr = nr_saved = 0; - at = at_saved = -1; - - for (i = 0; i < bmap_sz; i++) { - uint64_t free_bits = ~used[i]; - - if (reserved) - free_bits &= ~reserved[i]; - - if (free_bits == 0) { /* no space in the current int64 */ - if (nr > nr_saved) { - nr_saved = nr; - at_saved = at; - } - nr = 0; - at = -1; - continue; - } - - j = ffsll(free_bits); - D_ASSERT(j > 0); - if (at >= 0 && j == 1) { - D_ASSERT(nr > 0); - nr++; - } else { - at = i * 64 + j - 1; - nr = 1; - } - - for (; j < 64; j++) { - if (nr == *bits) /* done */ - goto out; - - if (isset64(&free_bits, j)) { - if (at < 0) - at = i * 64 + j; - nr++; - continue; - } - - if (nr > nr_saved) { - nr_saved = nr; - at_saved = at; - } - nr = 0; - at = -1; - if ((free_bits >> j) == 0) - break; - } - if (nr == *bits) - goto out; - } - out: - if (nr == *bits || nr > nr_saved) { - nr_saved = nr; - at_saved = at; - } - - if (nr_saved >= bits_min) - *bits = nr_saved; - else - at_saved = -1; - - return at_saved; -} - /** reserve a new group within @arena */ static int arena_reserve_grp(struct ad_arena *arena, daos_size_t size, int *pos, @@ -1981,7 +1888,7 @@ arena_reserve_grp(struct ad_arena *arena, daos_size_t size, int *pos, if (bits_min > bits) bits_min = bits; - bit_at = find_bits(ad->ad_bmap, arena->ar_space_rsv, ARENA_GRP_BMSZ, bits_min, &bits); + bit_at = daos_find_bits(ad->ad_bmap, arena->ar_space_rsv, ARENA_GRP_BMSZ, bits_min, &bits); if (bit_at < 0) return -DER_NOSPACE; @@ -2076,7 +1983,7 @@ group_reserve_addr(struct ad_group *grp, struct ad_reserv_act *act) int b = 1; int at; - at = find_bits(gd->gd_bmap, grp->gp_bmap_rsv, GRP_UNIT_BMSZ, 1, &b); + at = daos_find_bits(gd->gd_bmap, grp->gp_bmap_rsv, GRP_UNIT_BMSZ, 1, &b); /* NB: bitmap may includes more bits than the actual number of units */ if (at < 0 || at >= gd->gd_unit_nr) return 0; diff --git a/src/common/misc.c b/src/common/misc.c index eeb5c4522ef..a3a8c7bfd6c 100644 --- a/src/common/misc.c +++ b/src/common/misc.c @@ -779,3 +779,108 @@ daos_hlc2timestamp(uint64_t hlc, time_t *ts) *ts = tspec.tv_sec; return 0; } + +/** Find requested number of unused bits (neither set it @used or @reserved */ +int +daos_find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits) +{ + int nr_saved; + int at_saved; + int nr; + int at; + int i; + int j; + + nr = nr_saved = 0; + at = at_saved = -1; + + for (i = 0; i < bmap_sz; i++) { + uint64_t free_bits = ~used[i]; + + if (reserved) + free_bits &= ~reserved[i]; + + if (free_bits == 0) { /* no space in the current int64 */ + if (nr > nr_saved) { + nr_saved = nr; + at_saved = at; + } + nr = 0; + at = -1; + continue; + } + + j = ffsll(free_bits); + D_ASSERT(j > 0); + if (at >= 0 && j == 1) { + D_ASSERT(nr > 0); + nr++; + } else { + at = i * 64 + j - 1; + nr = 1; + } + + for (; j < 64; j++) { + if (nr == *bits) /* done */ + goto out; + + if (isset64(&free_bits, j)) { + if (at < 0) + at = i * 64 + j; + nr++; + continue; + } + + if (nr > nr_saved) { + nr_saved = nr; + at_saved = at; + } + nr = 0; + at = -1; + if ((free_bits >> j) == 0) + break; + } + if (nr == *bits) + goto out; + } + out: + if (nr == *bits || nr > nr_saved) { + nr_saved = nr; + at_saved = at; + } + + if (nr_saved >= bits_min) + *bits = nr_saved; + else + at_saved = -1; + + return at_saved; +} + +int +daos_count_free_bits(uint64_t *used, int bmap_sz) +{ + int i; + int j; + int nr = 0; + + for (i = 0; i < bmap_sz; i++) { + uint64_t free_bits = ~used[i]; + + /* no free bits in the current int64 */ + if (free_bits == 0) + continue; + + j = ffsll(free_bits); + D_ASSERT(j > 0); + nr++; + for (; j < 64; j++) { + if (isset64(&free_bits, j)) + nr++; + if ((free_bits >> j) == 0) + break; + } + } + + return nr; +} diff --git a/src/include/daos/common.h b/src/include/daos/common.h index c7af0fc6563..78acb71858c 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -237,6 +237,27 @@ setbit_range(uint8_t *bitmap, uint32_t start, uint32_t end) setbit(bitmap, index); } +static inline void +setbits64(uint64_t *bmap, int at, int bits) +{ + setbit_range((uint8_t *)bmap, at, at + bits - 1); +} + +static inline void +clrbits64(uint64_t *bmap, int at, int bits) +{ + clrbit_range((uint8_t *)bmap, at, at + bits - 1); +} + +#define setbit64(bm, at) setbit(((uint8_t *)bm), at) +#define clrbit64(bm, at) clrbit(((uint8_t *)bm), at) +#define isset64(bm, at) isset(((uint8_t *)bm), at) + +int +daos_find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits); +int +daos_count_free_bits(uint64_t *used, int bmap_sz); + static inline unsigned int daos_power2_nbits(unsigned int val) { diff --git a/src/include/daos_srv/vea.h b/src/include/daos_srv/vea.h index bdcd6c2ad21..1b37d1c042f 100644 --- a/src/include/daos_srv/vea.h +++ b/src/include/daos_srv/vea.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -20,23 +20,6 @@ #include #include -/* Common free extent structure for both SCM & in-memory index */ -struct vea_free_extent { - uint64_t vfe_blk_off; /* Block offset of the extent */ - uint32_t vfe_blk_cnt; /* Total blocks of the extent */ - uint32_t vfe_age; /* Monotonic timestamp */ -}; - -/* Maximum extents a non-contiguous allocation can have */ -#define VEA_EXT_VECTOR_MAX 9 - -/* Allocated extent vector */ -struct vea_ext_vector { - uint64_t vev_blk_off[VEA_EXT_VECTOR_MAX]; - uint32_t vev_blk_cnt[VEA_EXT_VECTOR_MAX]; - uint32_t vev_size; /* Size of the extent vector */ -}; - /* Reserved extent(s) */ struct vea_resrvd_ext { /* Link to a list for a series of vea_reserve() calls */ @@ -49,8 +32,12 @@ struct vea_resrvd_ext { uint64_t vre_hint_seq; /* Total reserved blocks */ uint32_t vre_blk_cnt; + /* New extent allocated for bitmap */ + uint32_t vre_new_bitmap_chunk:1; /* Extent vector for non-contiguous reserve */ struct vea_ext_vector *vre_vector; + /* private pointer */ + void *vre_private; }; /* @@ -83,6 +70,8 @@ struct vea_unmap_context { bool vnc_ext_flush; }; +#define VEA_COMPAT_FEATURE_BITMAP (1 << 0) + /* Free space tracking information on SCM */ struct vea_space_df { uint32_t vsd_magic; @@ -95,8 +84,8 @@ struct vea_space_df { uint64_t vsd_tot_blks; /* Free extent tree, sorted by offset */ struct btr_root vsd_free_tree; - /* Allocated extent vector tree, for non-contiguous allocation */ - struct btr_root vsd_vec_tree; + /* Free bitmap tree, sorted by offset */ + struct btr_root vsd_bitmap_tree; }; /* VEA attributes */ @@ -116,8 +105,10 @@ struct vea_stat { uint64_t vs_resrv_hint; /* Number of hint reserve */ uint64_t vs_resrv_large; /* Number of large reserve */ uint64_t vs_resrv_small; /* Number of small reserve */ + uint64_t vs_resrv_bitmap; /* Number of bitmap reserve */ uint64_t vs_frags_large; /* Large free frags */ uint64_t vs_frags_small; /* Small free frags */ + uint64_t vs_frags_bitmap; /* Bitmap frags */ uint64_t vs_frags_aging; /* Aging frags */ }; @@ -148,6 +139,20 @@ int vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, struct vea_space_df *md, uint32_t blk_sz, uint32_t hdr_blks, uint64_t capacity, vea_format_callback_t cb, void *cb_data, bool force); +/** + * Upgrade VEA to support latest disk format + * + * \param vsi [IN] In-memory compound free extent index + * \param umem [IN] An instance of SCM + * \param md [IN] The allocation metadata on SCM + * \param version [IN] Version which we try to upgrade + * + * \return Zero on success, in-memory compound free extent + * index returned by @vsi; Appropriated negative + * value on error + */ +int vea_upgrade(struct vea_space_info *vsi, struct umem_instance *umem, + struct vea_space_df *md, uint32_t version); /** * Load space tracking information from SCM to initialize the in-memory compound diff --git a/src/vea/tests/vea_stress.c b/src/vea/tests/vea_stress.c index 49d56e684cd..b50f37f8e7a 100644 --- a/src/vea/tests/vea_stress.c +++ b/src/vea/tests/vea_stress.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2021-2022 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -24,6 +24,7 @@ uint64_t pool_capacity = (1024ULL << 30); /* 1TB */ unsigned int cont_per_pool = 1; unsigned int obj_per_cont = 100; unsigned int test_duration = (2 * 60); /* 2 mins */ +unsigned int upd_blks_max = 256; /* 1MB by default */ unsigned int rand_seed; bool loading_test; /* test loading pool */ @@ -40,7 +41,6 @@ enum { #define VS_RSRV_CNT_MAX 10 /* extents */ #define VS_FREE_CNT_MAX 30 /* extents */ #define VS_MERGE_CNT_MAX 10 /* extents */ -#define VS_UPD_BLKS_MAX 256 /* 1MB */ #define VS_AGG_BLKS_MAX 1024 /* 4MB */ struct vs_perf_cntr { @@ -311,7 +311,7 @@ vs_update(struct vea_stress_pool *vs_pool) rsrv_cnt = get_random_count(VS_RSRV_CNT_MAX); for (i = 0; i < rsrv_cnt; i++) { - blk_cnt = get_random_count(VS_UPD_BLKS_MAX); + blk_cnt = get_random_count(upd_blks_max); cur_ts = daos_getutime(); rc = vea_reserve(vs_pool->vsp_vsi, blk_cnt, hint, &r_list); @@ -601,10 +601,11 @@ vs_stop_run(struct vea_stress_pool *vs_pool, int rc) } fprintf(stdout, "free_blks:["DF_12U64","DF_12U64"] frags_l:"DF_12U64" frags_s:"DF_12U64" " - "frags_a:"DF_12U64" r_hint:"DF_12U64" r_large:"DF_12U64" r_small:"DF_12U64"\n", + "frags_a:"DF_12U64" frags_bitmap:"DF_12U64" r_hint:"DF_12U64" r_large:"DF_12U64" " + "r_small:"DF_12U64" r_bitmap:"DF_12U64"\n", stat.vs_free_persistent, stat.vs_free_transient, stat.vs_frags_large, - stat.vs_frags_small, stat.vs_frags_aging, stat.vs_resrv_hint, stat.vs_resrv_large, - stat.vs_resrv_small); + stat.vs_frags_small, stat.vs_frags_aging, stat.vs_frags_bitmap, + stat.vs_resrv_hint, stat.vs_resrv_large, stat.vs_resrv_small, stat.vs_resrv_bitmap); return stop; } @@ -873,6 +874,7 @@ vs_init(void) const char vs_stress_options[] = "Available options are:\n" +"-b max blocks per update\n" "-C pool capacity\n" "-c container nr\n" "-d test duration in seconds\n" @@ -932,6 +934,7 @@ vs_op2str(unsigned int op) int main(int argc, char **argv) { static struct option long_ops[] = { + { "block_max", required_argument, NULL, 'b' }, { "capacity", required_argument, NULL, 'C' }, { "cont_nr", required_argument, NULL, 'c' }, { "duration", required_argument, NULL, 'd' }, @@ -949,8 +952,16 @@ int main(int argc, char **argv) rand_seed = (unsigned int)(time(NULL) & 0xFFFFFFFFUL); memset(pool_file, 0, sizeof(pool_file)); - while ((rc = getopt_long(argc, argv, "C:c:d:f:H:lo:s:h", long_ops, NULL)) != -1) { + while ((rc = getopt_long(argc, argv, "b:C:c:d:f:H:lo:s:h", long_ops, NULL)) != -1) { switch (rc) { + case 'b': + upd_blks_max = strtoull(optarg, &endp, 0); + if (*endp != '\0') { + printf("invalid update max blocks\n"); + print_usage(); + return -1; + } + break; case 'C': pool_capacity = strtoul(optarg, &endp, 0); pool_capacity = val_unit(pool_capacity, *endp); diff --git a/src/vea/tests/vea_ut.c b/src/vea/tests/vea_ut.c index a16590329c3..3f6c8369550 100644 --- a/src/vea/tests/vea_ut.c +++ b/src/vea/tests/vea_ut.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -83,6 +83,8 @@ ut_load(void **state) rc = vea_load(&args->vua_umm, &args->vua_txd, args->vua_md, &unmap_ctxt, NULL, &args->vua_vsi); assert_rc_equal(rc, 0); + /* turn off bitmap feature to test legacy allocation */ + args->vua_md->vsd_compat = 0; } static void @@ -114,10 +116,12 @@ ut_query(void **state) assert_int_equal(stat.vs_free_transient, tot_blks); assert_int_equal(stat.vs_frags_large, 1); assert_int_equal(stat.vs_frags_small, 0); + assert_int_equal(stat.vs_frags_bitmap, 0); assert_int_equal(stat.vs_frags_aging, 0); assert_int_equal(stat.vs_resrv_hint, 0); assert_int_equal(stat.vs_resrv_large, 0); assert_int_equal(stat.vs_resrv_small, 0); + assert_int_equal(stat.vs_resrv_bitmap, 0); } static void @@ -172,9 +176,11 @@ ut_reserve(void **state) else assert_int_equal(ext->vre_blk_off, off_a); - rc = vea_verify_alloc(args->vua_vsi, true, off_a, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, off_a, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, off_a, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, off_a, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 1); /* update hint offset */ @@ -199,9 +205,11 @@ ut_reserve(void **state) else assert_int_equal(ext->vre_blk_off, off_b); - rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 1); /* update hint offset */ @@ -226,10 +234,12 @@ ut_reserve(void **state) assert_int_equal(ext->vre_blk_off, off_b); /* Verify transient is allocated */ - rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 0); /* Verify persistent is not allocated */ - rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 1); /* Verify statistics */ @@ -238,12 +248,135 @@ ut_reserve(void **state) assert_int_equal(stat.vs_frags_large, 1); assert_int_equal(stat.vs_frags_small, 1); + assert_int_equal(stat.vs_frags_bitmap, 0); /* 2 hint from the second reserve for io stream 0 & 1 */ assert_int_equal(stat.vs_resrv_hint, 2); /* 2 large from the first reserve for io stream 0 & 1 */ assert_int_equal(stat.vs_resrv_large, 2); /* 1 small from the reserve for io stream 2 */ assert_int_equal(stat.vs_resrv_small, 1); + /* 0 bitmap reserve */ + assert_int_equal(stat.vs_resrv_bitmap, 0); +} + +static void +ut_reserve_bitmap(void **state) +{ + struct vea_ut_args *args = *state; + uint32_t blk_cnt; + struct vea_resrvd_ext *ext; + struct vea_hint_context *h_ctxt; + d_list_t *r_list; + struct vea_stat stat; + int rc, ext_cnt; + uint32_t hdr_blks = 1; + uint64_t capacity = UT_TOTAL_BLKS; + struct vea_unmap_context unmap_ctxt = { 0 }; + uint32_t blk_cnt_stream0[3] = { 4, 32, 4}; + uint32_t blk_cnt_stream1[3] = { 1, 2, 3}; + int i; + + rc = vea_format(&args->vua_umm, &args->vua_txd, args->vua_md, 0, + hdr_blks, capacity, NULL, NULL, true); + assert_rc_equal(rc, 0); + + rc = vea_load(&args->vua_umm, &args->vua_txd, args->vua_md, &unmap_ctxt, + NULL, &args->vua_vsi); + assert_rc_equal(rc, 0); + + for (i = 0; i < IO_STREAM_CNT; i++) { + /* reset off and seq */ + args->vua_hint[i]->vhd_off = 0; + args->vua_hint[i]->vhd_seq = 0; + + rc = vea_hint_load(args->vua_hint[i], &args->vua_hint_ctxt[i]); + assert_rc_equal(rc, 0); + } + /* + * Reserve three blocks from I/O stream 0 and I/O stream 1 in + * interleaved order, the reservation from I/O stream 0 will be + * canceled later, and the reservation from I/O stream 1 will + * be published. + */ + for (ext_cnt = 0; ext_cnt < 3; ext_cnt++) { + print_message("reserve extent %d from I/O stream 0\n", ext_cnt); + + r_list = &args->vua_resrvd_list[0]; + h_ctxt = args->vua_hint_ctxt[0]; + + blk_cnt = blk_cnt_stream0[ext_cnt]; + rc = vea_reserve(args->vua_vsi, blk_cnt, h_ctxt, r_list); + assert_rc_equal(rc, 0); + + /* correctness check */ + ext = d_list_entry(r_list->prev, struct vea_resrvd_ext, vre_link); + assert_int_equal(ext->vre_blk_cnt, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 0); + rc = vea_verify_alloc(args->vua_vsi, false, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 1); + + print_message("reserve extent %d from I/O stream 1\n", ext_cnt); + + r_list = &args->vua_resrvd_list[1]; + h_ctxt = args->vua_hint_ctxt[1]; + + blk_cnt = blk_cnt_stream1[ext_cnt]; + rc = vea_reserve(args->vua_vsi, blk_cnt, h_ctxt, r_list); + assert_rc_equal(rc, 0); + + /* correctness check */ + ext = d_list_entry(r_list->prev, struct vea_resrvd_ext, vre_link); + assert_int_equal(ext->vre_blk_cnt, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 0); + rc = vea_verify_alloc(args->vua_vsi, false, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 1); + } + + /* Reserve from I/O stream 2, it will reserve from small free extent */ + print_message("reserve extent from I/O stream 2\n"); + + r_list = &args->vua_resrvd_list[2]; + h_ctxt = args->vua_hint_ctxt[2]; + + blk_cnt = 1024; + rc = vea_reserve(args->vua_vsi, blk_cnt, h_ctxt, r_list); + assert_rc_equal(rc, 0); + + /* correctness check */ + ext = d_list_entry(r_list->prev, struct vea_resrvd_ext, vre_link); + assert_int_equal(ext->vre_hint_off, VEA_HINT_OFF_INVAL); + assert_int_equal(ext->vre_blk_cnt, blk_cnt); + + /* Verify transient is allocated */ + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 0); + /* Verify persistent is not allocated */ + rc = vea_verify_alloc(args->vua_vsi, false, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 1); + + /* Verify statistics */ + rc = vea_query(args->vua_vsi, NULL, &stat); + assert_rc_equal(rc, 0); + + assert_int_equal(stat.vs_frags_large, 1); + assert_int_equal(stat.vs_frags_small, 1); + /* 5 bitmaps for io stream 0 & 1 */ + assert_int_equal(stat.vs_frags_bitmap, 5); + /* 4 hint from */ + assert_int_equal(stat.vs_resrv_hint, 4); + /* 1 large from the first reserve for io stream 2 */ + assert_int_equal(stat.vs_resrv_large, 1); + assert_int_equal(stat.vs_resrv_small, 1); + /* 6 bitmap reserve */ + assert_int_equal(stat.vs_resrv_bitmap, 6); } static void @@ -269,11 +402,48 @@ ut_cancel(void **state) print_message("cancel reservation from I/O stream 0\n"); rc = vea_cancel(args->vua_vsi, h_ctxt, r_list); assert_int_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt, true); + assert_rc_equal(rc, 1); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt, false); assert_rc_equal(rc, 1); assert_int_equal(h_ctxt->vhc_off, VEA_HINT_OFF_INVAL); } +static void +ut_cancel_bitmap(void **state) +{ + + struct vea_ut_args *args = *state; + struct vea_hint_context *h_ctxt; + struct vea_resrvd_ext *ext; + d_list_t *r_list; + struct vea_stat stat; + int rc; + + r_list = &args->vua_resrvd_list[0]; + h_ctxt = args->vua_hint_ctxt[0]; + + print_message("cancel reservation from I/O stream 0\n"); + rc = vea_cancel(args->vua_vsi, h_ctxt, r_list); + assert_int_equal(rc, 0); + + d_list_for_each_entry(ext, r_list, vre_link) { + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, + ext->vre_blk_cnt, true); + assert_rc_equal(rc, 1); + + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, + ext->vre_blk_cnt, false); + assert_rc_equal(rc, 1); + } + + /* Verify statistics */ + rc = vea_query(args->vua_vsi, NULL, &stat); + + /* 3 bitmaps left */ + assert_int_equal(stat.vs_frags_bitmap, 3); +} + static void ut_tx_publish(void **state) { @@ -301,9 +471,11 @@ ut_tx_publish(void **state) assert_ptr_not_equal(copy, NULL); D_INIT_LIST_HEAD(©->vre_link); + copy->vre_new_bitmap_chunk = ext->vre_new_bitmap_chunk; + copy->vre_private = ext->vre_private; copy->vre_blk_off = ext->vre_blk_off; copy->vre_blk_cnt = ext->vre_blk_cnt; - d_list_add(©->vre_link, &args->vua_alloc_list); + d_list_add_tail(©->vre_link, &args->vua_alloc_list); } print_message("publish reservation from I/O stream %d\n", i); @@ -319,10 +491,12 @@ ut_tx_publish(void **state) blk_off = copy->vre_blk_off; blk_cnt = copy->vre_blk_cnt; - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, + blk_cnt, !!copy->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, blk_off, + blk_cnt, !!copy->vre_private); assert_rc_equal(rc, 0); } } @@ -331,7 +505,7 @@ static void ut_free(void **state) { struct vea_ut_args *args = *state; - struct vea_resrvd_ext *ext; + struct vea_resrvd_ext *ext, *tmp; d_list_t *r_list; uint64_t blk_off; uint32_t blk_cnt, nr_flushed; @@ -346,10 +520,12 @@ ut_free(void **state) assert_rc_equal(rc, 0); /* not immediately visual for allocation */ - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 1); } @@ -363,13 +539,21 @@ ut_free(void **state) assert_rc_equal(rc, 0); assert_true(nr_flushed > 0); + print_message("transient free extents after flush:\n"); + vea_dump(args->vua_vsi, true); + print_message("persistent free extents after flush:\n"); + vea_dump(args->vua_vsi, false); + r_list = &args->vua_alloc_list; - d_list_for_each_entry(ext, r_list, vre_link) { + d_list_for_each_entry_safe(ext, tmp, r_list, vre_link) { blk_off = ext->vre_blk_off; blk_cnt = ext->vre_blk_cnt; - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 1); + d_list_del_init(&ext->vre_link); + D_FREE(ext); } print_message("transient free extents after migration:\n"); @@ -400,6 +584,14 @@ ut_unload(void **state) args->vua_vsi = NULL; } +static void +ut_free_bitmap(void **state) +{ + ut_free(state); + ut_hint_unload(state); + ut_unload(state); +} + static int ut_setup(struct vea_ut_args *test_args) { @@ -885,23 +1077,6 @@ ut_inval_params_set_ext_age(void **state) ut_teardown(&args); } -static void -ut_inval_params_get_ext_vector(void **state) -{ - struct vea_ut_args args; - uint64_t block_offset = 0; - uint64_t block_count = 1; - struct vea_ext_vector ext_vector; - - print_message("Testing invalid parameters to vea_get_ext_vector\n"); - ut_setup(&args); - expect_assert_failure(vea_get_ext_vector(NULL, block_offset, - block_count, &ext_vector)); - expect_assert_failure(vea_get_ext_vector(args.vua_vsi, block_offset, - block_count, NULL)); - ut_teardown(&args); -} - static void ut_free_invalid_space(void **state) { @@ -932,6 +1107,13 @@ ut_free_invalid_space(void **state) rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list); assert_int_equal(rc, 0); + print_message("transient free extents:\n"); + rc = vea_dump(args.vua_vsi, true); + assert_rc_equal(rc, 0); + print_message("persistent free extents:\n"); + rc = vea_dump(args.vua_vsi, false); + assert_rc_equal(rc, 0); + /* Try to free from I/O Stream 1, which hasn't been reserved */ r_list = &args.vua_resrvd_list[1]; h_ctxt = args.vua_hint_ctxt[1]; @@ -957,12 +1139,13 @@ print_stats(struct vea_ut_args *args, bool verbose) rc = vea_query(args->vua_vsi, NULL, &stat); assert_int_equal(rc, 0); print_message("free_blks:"DF_U64"/"DF_U64", frags_large:"DF_U64", " - "frags_small:"DF_U64", frags_aging:"DF_U64"\n" + "frags_small:"DF_U64", frags_bitmap:"DF_U64" frags_aging:"DF_U64"\n" "resrv_hint:"DF_U64"\nresrv_large:"DF_U64"\n" - "resrv_small:"DF_U64"\n", + "resrv_small:"DF_U64"\nresrv_bitmap:"DF_U64"\n", stat.vs_free_persistent, stat.vs_free_transient, - stat.vs_frags_large, stat.vs_frags_small, stat.vs_frags_aging, - stat.vs_resrv_hint, stat.vs_resrv_large, stat.vs_resrv_small); + stat.vs_frags_large, stat.vs_frags_small, stat.vs_frags_bitmap, + stat.vs_frags_aging, stat.vs_resrv_hint, stat.vs_resrv_large, + stat.vs_resrv_small, stat.vs_resrv_bitmap); if (verbose) vea_dump(args->vua_vsi, true); @@ -980,6 +1163,8 @@ ut_interleaved_ops(void **state) uint32_t header_blocks = 1; uint64_t capacity = ((VEA_LARGE_EXT_MB * 2) << 20); /* 128 MB */ uint32_t block_count; + d_list_t tmp_list; + struct vea_resrvd_ext *ext, *tmp; int rc; print_message("Test interleaved operations\n"); @@ -992,8 +1177,6 @@ ut_interleaved_ops(void **state) NULL, &args.vua_vsi); assert_int_equal(rc, 0); - rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); - assert_int_equal(rc, 0); /* * Do the following interleaved operations: @@ -1006,6 +1189,7 @@ ut_interleaved_ops(void **state) * 7. reserve A, reserve B, cancel A, cancel B * 8. reserve A, reserve B, cancel B, cancel A * 9. reserve A, reserve B, reserve C, publish B, publish A & C + * 10. reserve A, reserve B, reserve C, cancel A & C. publish B. **/ block_count = 2; r_list_a = &args.vua_resrvd_list[0]; @@ -1020,10 +1204,14 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 2 */ block_count += 2; @@ -1032,10 +1220,14 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 3 */ block_count += 2; @@ -1046,8 +1238,12 @@ ut_interleaved_ops(void **state) assert_rc_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 4 */ block_count += 2; @@ -1056,8 +1252,12 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); @@ -1070,8 +1270,12 @@ ut_interleaved_ops(void **state) assert_rc_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 6 */ block_count += 2; @@ -1080,8 +1284,12 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); @@ -1120,12 +1328,46 @@ ut_interleaved_ops(void **state) /* Reserve C */ rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); /* Publish B */ rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_rc_equal(rc, 0); /* Publish A & C */ rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_rc_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); + + /* Case 10 */ + block_count = 256; + /* Reserve A */ + rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + block_count = 260; + /* Reserve B */ + rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + + block_count = 261; + /* Reserve C */ + rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + + D_INIT_LIST_HEAD(&tmp_list); + d_list_for_each_entry_safe(ext, tmp, r_list_a, vre_link) { + /* move second reserve out */ + if (ext->vre_blk_cnt == 260) + d_list_move_tail(&ext->vre_link, &tmp_list); + } + /* cancel A & C */ + rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); + /* Publish B */ + rc = vea_tx_publish(args.vua_vsi, h_ctxt, &tmp_list); + assert_rc_equal(rc, 0); rc = umem_tx_commit(&args.vua_umm); assert_int_equal(rc, 0); @@ -1190,6 +1432,7 @@ ut_fragmentation(void **state) D_INIT_LIST_HEAD(©->vre_link); copy->vre_blk_off = ext->vre_blk_off; copy->vre_blk_cnt = ext->vre_blk_cnt; + copy->vre_private = ext->vre_private; d_list_add(©->vre_link, &args.vua_alloc_list); } } @@ -1239,10 +1482,12 @@ ut_fragmentation(void **state) assert_rc_equal(rc, 0); /* not immediately visual for allocation */ - rc = vea_verify_alloc(args.vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args.vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args.vua_vsi, false, blk_off, blk_cnt); + rc = vea_verify_alloc(args.vua_vsi, false, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 1); } @@ -1250,6 +1495,98 @@ ut_fragmentation(void **state) ut_teardown(&args); } +static void +ut_reclaim_unused_bitmap(void **state) +{ + struct vea_ut_args args; + struct vea_unmap_context unmap_ctxt = { 0 }; + d_list_t *r_list; + uint64_t capacity = 1llu << 27; /* 128 MiB */ + uint32_t block_size = 4096; /* use the default size */ + uint32_t header_blocks = 1; + d_list_t persist_list; + struct vea_resrvd_ext *ext, *copy; + struct vea_resrvd_ext *tmp_ext; + int rc; + + print_message("Test bitmap allocation\n"); + ut_setup(&args); + rc = vea_format(&args.vua_umm, &args.vua_txd, args.vua_md, block_size, + header_blocks, capacity, NULL, NULL, false); + assert_rc_equal(rc, 0); + + rc = vea_load(&args.vua_umm, &args.vua_txd, args.vua_md, &unmap_ctxt, + NULL, &args.vua_vsi); + assert_rc_equal(rc, 0); + + r_list = &args.vua_resrvd_list[0]; + /* keep reserving until we run out of space */ + while (rc == 0) { + rc = vea_reserve(args.vua_vsi, 8, NULL, r_list); + } + + D_INIT_LIST_HEAD(&persist_list); + d_list_for_each_entry_safe(ext, tmp_ext, r_list, vre_link) { + /* Copy the extents to keep to persist_list */ + D_ALLOC_PTR(copy); + assert_ptr_not_equal(copy, NULL); + + D_INIT_LIST_HEAD(©->vre_link); + copy->vre_blk_off = ext->vre_blk_off; + copy->vre_blk_cnt = ext->vre_blk_cnt; + copy->vre_private = ext->vre_private; + d_list_add(©->vre_link, &persist_list); + } + + /* Publish the ones to persist */ + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); + rc = vea_tx_publish(args.vua_vsi, NULL, r_list); + assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); + + print_message("Fragments after filling 8 blocks:\n"); + print_stats(&args, true); + + d_list_for_each_entry_safe(ext, tmp_ext, &persist_list, vre_link) { + uint64_t blk_off = ext->vre_blk_off; + uint32_t blk_cnt = ext->vre_blk_cnt; + + rc = vea_free(args.vua_vsi, blk_off, blk_cnt); + assert_rc_equal(rc, 0); + + /* not immediately visual for allocation */ + rc = vea_verify_alloc(args.vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); + assert_rc_equal(rc, 0); + + rc = vea_verify_alloc(args.vua_vsi, false, blk_off, + blk_cnt, !!ext->vre_private); + assert_rc_equal(rc, 1); + d_list_del_init(&ext->vre_link); + D_FREE(ext); + } + + rc = 0; + D_INIT_LIST_HEAD(&persist_list); + while (rc == 0) { + rc = vea_reserve(args.vua_vsi, 16, NULL, &persist_list); + } + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); + rc = vea_tx_publish(args.vua_vsi, NULL, &persist_list); + assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); + + print_message("Fragments after filling 16 blocks:\n"); + print_stats(&args, true); + + vea_unload(args.vua_vsi); + ut_teardown(&args); +} + static const struct CMUnitTest vea_uts[] = { { "vea_format", ut_format, NULL, NULL}, { "vea_load", ut_load, NULL, NULL}, @@ -1261,6 +1598,10 @@ static const struct CMUnitTest vea_uts[] = { { "vea_free", ut_free, NULL, NULL}, { "vea_hint_unload", ut_hint_unload, NULL, NULL}, { "vea_unload", ut_unload, NULL, NULL}, + { "vea_reserve_bitmap", ut_reserve_bitmap, NULL, NULL}, + { "vea_cancel_bitmap", ut_cancel_bitmap, NULL, NULL}, + { "vea_tx_publish_bitmap", ut_tx_publish, NULL, NULL}, + { "vea_free_bitmap", ut_free_bitmap, NULL, NULL}, { "vea_reserve_special", ut_reserve_special, NULL, NULL}, { "vea_inval_params_format", ut_inval_params_format, NULL, NULL}, { "vea_inval_params_load", ut_inval_params_load, NULL, NULL}, @@ -1271,11 +1612,10 @@ static const struct CMUnitTest vea_uts[] = { { "vea_inval_param_hint_load", ut_inval_params_hint_load, NULL, NULL}, { "vea_inval_param_set_ext_age", ut_inval_params_set_ext_age, NULL, NULL}, - { "vea_inval_param_get_ext_vector", ut_inval_params_get_ext_vector, - NULL, NULL}, { "vea_free_invalid_space", ut_free_invalid_space, NULL, NULL}, { "vea_interleaved_ops", ut_interleaved_ops, NULL, NULL}, - { "vea_fragmentation", ut_fragmentation, NULL, NULL} + { "vea_fragmentation", ut_fragmentation, NULL, NULL}, + { "vea_reclaim_unused_bitmap", ut_reclaim_unused_bitmap, NULL, NULL} }; int main(int argc, char **argv) diff --git a/src/vea/vea_alloc.c b/src/vea/vea_alloc.c index 6e0986c0dba..a9fd9424184 100644 --- a/src/vea/vea_alloc.c +++ b/src/vea/vea_alloc.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -10,27 +10,20 @@ #include #include "vea_internal.h" -int -compound_vec_alloc(struct vea_space_info *vsi, struct vea_ext_vector *vec) -{ - /* TODO Add in in-memory extent vector tree */ - return 0; -} - static int -compound_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe, - struct vea_entry *entry) +compound_alloc_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe, + struct vea_extent_entry *entry) { struct vea_free_extent *remain; d_iov_t key; int rc; - remain = &entry->ve_ext; + remain = &entry->vee_ext; D_ASSERT(remain->vfe_blk_cnt >= vfe->vfe_blk_cnt); D_ASSERT(remain->vfe_blk_off == vfe->vfe_blk_off); /* Remove the found free extent from compound index */ - free_class_remove(vsi, entry); + extent_free_class_remove(vsi, entry); if (remain->vfe_blk_cnt == vfe->vfe_blk_cnt) { d_iov_set(&key, &vfe->vfe_blk_off, sizeof(vfe->vfe_blk_off)); @@ -40,7 +33,7 @@ compound_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe, remain->vfe_blk_off += vfe->vfe_blk_cnt; remain->vfe_blk_cnt -= vfe->vfe_blk_cnt; - rc = free_class_add(vsi, entry); + rc = extent_free_class_add(vsi, entry); } return rc; @@ -51,7 +44,7 @@ reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd) { struct vea_free_extent vfe; - struct vea_entry *entry; + struct vea_extent_entry *entry; d_iov_t key, val; int rc; @@ -72,12 +65,12 @@ reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, if (rc) return (rc == -DER_NONEXIST) ? 0 : rc; - entry = (struct vea_entry *)val.iov_buf; + entry = (struct vea_extent_entry *)val.iov_buf; /* The matching free extent isn't big enough */ - if (entry->ve_ext.vfe_blk_cnt < vfe.vfe_blk_cnt) + if (entry->vee_ext.vfe_blk_cnt < vfe.vfe_blk_cnt) return 0; - rc = compound_alloc(vsi, &vfe, entry); + rc = compound_alloc_extent(vsi, &vfe, entry); if (rc) return rc; @@ -94,82 +87,33 @@ reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, static int reserve_small(struct vea_space_info *vsi, uint32_t blk_cnt, - struct vea_resrvd_ext *resrvd) -{ - daos_handle_t btr_hdl; - struct vea_sized_class *sc; - struct vea_free_extent vfe; - struct vea_entry *entry; - d_iov_t key, val_out; - uint64_t int_key = blk_cnt; - int rc; - - /* Skip huge allocate request */ - if (blk_cnt > vsi->vsi_class.vfc_large_thresh) - return 0; - - btr_hdl = vsi->vsi_class.vfc_size_btr; - D_ASSERT(daos_handle_is_valid(btr_hdl)); - - d_iov_set(&key, &int_key, sizeof(int_key)); - d_iov_set(&val_out, NULL, 0); - - rc = dbtree_fetch(btr_hdl, BTR_PROBE_GE, DAOS_INTENT_DEFAULT, &key, NULL, &val_out); - if (rc == -DER_NONEXIST) { - return 0; - } else if (rc) { - D_ERROR("Search size class:%u failed. "DF_RC"\n", blk_cnt, DP_RC(rc)); - return rc; - } - - sc = (struct vea_sized_class *)val_out.iov_buf; - D_ASSERT(sc != NULL); - D_ASSERT(!d_list_empty(&sc->vsc_lru)); - - /* Get the least used item from head */ - entry = d_list_entry(sc->vsc_lru.next, struct vea_entry, ve_link); - D_ASSERT(entry->ve_sized_class == sc); - D_ASSERT(entry->ve_ext.vfe_blk_cnt >= blk_cnt); - - vfe.vfe_blk_off = entry->ve_ext.vfe_blk_off; - vfe.vfe_blk_cnt = blk_cnt; - - rc = compound_alloc(vsi, &vfe, entry); - if (rc) - return rc; - - resrvd->vre_blk_off = vfe.vfe_blk_off; - resrvd->vre_blk_cnt = blk_cnt; - inc_stats(vsi, STAT_RESRV_SMALL, 1); - - D_DEBUG(DB_IO, "["DF_U64", %u]\n", resrvd->vre_blk_off, resrvd->vre_blk_cnt); - - return rc; -} + struct vea_resrvd_ext *resrvd); +static int +reserve_size_tree(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd); -int -reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, +static int +reserve_extent(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd) { struct vea_free_class *vfc = &vsi->vsi_class; struct vea_free_extent vfe; - struct vea_entry *entry; + struct vea_extent_entry *entry; struct d_binheap_node *root; int rc; - /* No large free extent available */ if (d_binheap_is_empty(&vfc->vfc_heap)) - return reserve_small(vsi, blk_cnt, resrvd); + return 0; root = d_binheap_root(&vfc->vfc_heap); - entry = container_of(root, struct vea_entry, ve_node); + entry = container_of(root, struct vea_extent_entry, vee_node); - D_ASSERT(entry->ve_ext.vfe_blk_cnt > vfc->vfc_large_thresh); + D_ASSERT(entry->vee_ext.vfe_blk_cnt > vfc->vfc_large_thresh); D_DEBUG(DB_IO, "largest free extent ["DF_U64", %u]\n", - entry->ve_ext.vfe_blk_off, entry->ve_ext.vfe_blk_cnt); + entry->vee_ext.vfe_blk_off, entry->vee_ext.vfe_blk_cnt); /* The largest free extent can't satisfy huge allocate request */ - if (entry->ve_ext.vfe_blk_cnt < blk_cnt) + if (entry->vee_ext.vfe_blk_cnt < blk_cnt) return 0; /* @@ -178,16 +122,11 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, * reserve from the small extents first, if it fails, reserve from the * largest free extent. */ - if (entry->ve_ext.vfe_blk_cnt <= (max(blk_cnt, vfc->vfc_large_thresh) * 2)) { - /* Try small extents first */ - rc = reserve_small(vsi, blk_cnt, resrvd); - if (rc != 0 || resrvd->vre_blk_cnt != 0) - return rc; - - vfe.vfe_blk_off = entry->ve_ext.vfe_blk_off; + if (entry->vee_ext.vfe_blk_cnt <= (max(blk_cnt, vfc->vfc_large_thresh) * 2)) { + vfe.vfe_blk_off = entry->vee_ext.vfe_blk_off; vfe.vfe_blk_cnt = blk_cnt; - rc = compound_alloc(vsi, &vfe, entry); + rc = compound_alloc_extent(vsi, &vfe, entry); if (rc) return rc; @@ -195,15 +134,15 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, uint32_t half_blks, tot_blks; uint64_t blk_off; - blk_off = entry->ve_ext.vfe_blk_off; - tot_blks = entry->ve_ext.vfe_blk_cnt; + blk_off = entry->vee_ext.vfe_blk_off; + tot_blks = entry->vee_ext.vfe_blk_cnt; half_blks = tot_blks >> 1; D_ASSERT(tot_blks >= (half_blks + blk_cnt)); /* Shrink the original extent to half size */ - free_class_remove(vsi, entry); - entry->ve_ext.vfe_blk_cnt = half_blks; - rc = free_class_add(vsi, entry); + extent_free_class_remove(vsi, entry); + entry->vee_ext.vfe_blk_cnt = half_blks; + rc = extent_free_class_add(vsi, entry); if (rc) return rc; @@ -213,8 +152,8 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, vfe.vfe_blk_cnt = tot_blks - half_blks - blk_cnt; vfe.vfe_age = 0; /* Not used */ - rc = compound_free(vsi, &vfe, VEA_FL_NO_MERGE | - VEA_FL_NO_ACCOUNTING); + rc = compound_free_extent(vsi, &vfe, VEA_FL_NO_MERGE | + VEA_FL_NO_ACCOUNTING); if (rc) return rc; } @@ -232,16 +171,263 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, return 0; } +static int +reserve_size_tree(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + daos_handle_t btr_hdl; + struct vea_sized_class *sc; + struct vea_free_extent vfe; + struct vea_extent_entry *extent_entry; + d_iov_t key, val_out; + uint64_t int_key = blk_cnt; + int rc; + + btr_hdl = vsi->vsi_class.vfc_size_btr; + D_ASSERT(daos_handle_is_valid(btr_hdl)); + + d_iov_set(&key, &int_key, sizeof(int_key)); + d_iov_set(&val_out, NULL, 0); + + rc = dbtree_fetch(btr_hdl, BTR_PROBE_GE, DAOS_INTENT_DEFAULT, &key, NULL, &val_out); + if (rc == -DER_NONEXIST) + return 0; + else if (rc) + return rc; + + sc = (struct vea_sized_class *)val_out.iov_buf; + D_ASSERT(sc != NULL); + + /* Get the least used item from head */ + extent_entry = d_list_entry(sc->vsc_extent_lru.next, struct vea_extent_entry, vee_link); + D_ASSERT(extent_entry->vee_sized_class == sc); + D_ASSERT(extent_entry->vee_ext.vfe_blk_cnt >= blk_cnt); + + vfe.vfe_blk_off = extent_entry->vee_ext.vfe_blk_off; + vfe.vfe_blk_cnt = blk_cnt; + + rc = compound_alloc_extent(vsi, &vfe, extent_entry); + if (rc) + return rc; + resrvd->vre_blk_off = vfe.vfe_blk_off; + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = NULL; + inc_stats(vsi, STAT_RESRV_SMALL, 1); + + return 0; +} + +static int +reserve_bitmap_chunk(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + int rc; + + /* Get hint offset */ + hint_get(vsi->vsi_bitmap_hint_context, &resrvd->vre_hint_off); + + /* Reserve from hint offset */ + if (resrvd->vre_hint_off != VEA_HINT_OFF_INVAL) { + rc = reserve_hint(vsi, blk_cnt, resrvd); + if (rc != 0) + return rc; + else if (resrvd->vre_blk_cnt != 0) + goto done; + } + + if (blk_cnt >= vsi->vsi_class.vfc_large_thresh) + goto extent; + + rc = reserve_size_tree(vsi, blk_cnt, resrvd); + if (rc) + return rc; + + if (resrvd->vre_blk_cnt > 0) + goto done; + +extent: + rc = reserve_extent(vsi, blk_cnt, resrvd); + if (resrvd->vre_blk_cnt <= 0) + return -DER_NOSPACE; +done: + D_ASSERT(resrvd->vre_blk_off != VEA_HINT_OFF_INVAL); + D_ASSERT(resrvd->vre_blk_cnt == blk_cnt); + dec_stats(vsi, STAT_FREE_EXTENT_BLKS, blk_cnt); + + /* Update hint offset */ + hint_update(vsi->vsi_bitmap_hint_context, resrvd->vre_blk_off + blk_cnt, + &resrvd->vre_hint_seq); + return rc; +} + +#define LARGE_EXT_FREE_BLKS ((32UL << 30) / VEA_BLK_SZ) + +static inline uint32_t +get_bitmap_chunk_blks(struct vea_space_info *vsi, uint32_t blk_cnt) +{ + uint32_t chunk_blks = VEA_BITMAP_MIN_CHUNK_BLKS; + + D_ASSERT(blk_cnt <= VEA_MAX_BITMAP_CLASS); + chunk_blks *= blk_cnt; + + D_ASSERT(chunk_blks <= VEA_BITMAP_MAX_CHUNK_BLKS); + /* + * Always try to allocate large bitmap chunk if there + * is enough free extent blocks. + */ + if (vsi->vsi_stat[STAT_FREE_EXTENT_BLKS] >= LARGE_EXT_FREE_BLKS) { + int times = VEA_BITMAP_MAX_CHUNK_BLKS / chunk_blks; + + if (times > 1) + chunk_blks *= times; + } + + /* should be aligned with 64 bits */ + D_ASSERT(chunk_blks % (blk_cnt * 64) == 0); + + return chunk_blks; +} + +static inline int +get_bitmap_sz(uint32_t chunk_blks, uint16_t class) +{ + int bits = chunk_blks / class; + + D_ASSERT(chunk_blks % class == 0); + D_ASSERT(bits % 64 == 0); + + return bits / 64; +} + +static int +reserve_bitmap(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + struct vea_bitmap_entry *bitmap_entry, *tmp_entry; + struct vea_bitmap_entry *entry; + int rc; + struct vea_free_bitmap *vfb; + struct vea_free_bitmap new_vfb = { 0 }; + int bits = 1; + uint32_t chunk_blks; + int bitmap_sz; + d_list_t *list_head; + + if (!is_bitmap_feature_enabled(vsi)) + return 0; + + if (blk_cnt > VEA_MAX_BITMAP_CLASS) + return 0; + + D_ASSERT(blk_cnt > 0); + /* reserve from bitmap */ + d_list_for_each_entry_safe(bitmap_entry, tmp_entry, + &vsi->vsi_class.vfc_bitmap_lru[blk_cnt - 1], vbe_link) { + vfb = &bitmap_entry->vbe_bitmap; + D_ASSERT(vfb->vfb_class == blk_cnt); + /* Only assert in server mode */ + if (vsi->vsi_unmap_ctxt.vnc_ext_flush) + D_ASSERT(bitmap_entry->vbe_published_state != VEA_BITMAP_STATE_PUBLISHING); + rc = daos_find_bits(vfb->vfb_bitmaps, NULL, vfb->vfb_bitmap_sz, 1, &bits); + if (rc < 0) { + d_list_del_init(&bitmap_entry->vbe_link); + continue; + } + + D_ASSERT(rc * blk_cnt + blk_cnt <= vfb->vfb_blk_cnt); + resrvd->vre_blk_off = vfb->vfb_blk_off + (rc * blk_cnt); + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = (void *)bitmap_entry; + setbits64(vfb->vfb_bitmaps, rc, 1); + rc = 0; + inc_stats(vsi, STAT_RESRV_BITMAP, 1); + return 0; + } + + list_head = &vsi->vsi_class.vfc_bitmap_empty[blk_cnt - 1]; + if (!d_list_empty(list_head)) { + bitmap_entry = d_list_entry(list_head->next, struct vea_bitmap_entry, + vbe_link); + if (vsi->vsi_unmap_ctxt.vnc_ext_flush) + D_ASSERT(bitmap_entry->vbe_published_state != VEA_BITMAP_STATE_PUBLISHING); + vfb = &bitmap_entry->vbe_bitmap; + D_ASSERT(vfb->vfb_class == blk_cnt); + resrvd->vre_blk_off = vfb->vfb_blk_off; + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = (void *)bitmap_entry; + setbits64(vfb->vfb_bitmaps, 0, 1); + inc_stats(vsi, STAT_RESRV_BITMAP, 1); + d_list_move_tail(&bitmap_entry->vbe_link, + &vsi->vsi_class.vfc_bitmap_lru[blk_cnt - 1]); + return 0; + } + + chunk_blks = get_bitmap_chunk_blks(vsi, blk_cnt); + bitmap_sz = get_bitmap_sz(chunk_blks, blk_cnt); + rc = reserve_bitmap_chunk(vsi, chunk_blks, resrvd); + if (resrvd->vre_blk_cnt <= 0) + return 0; + + resrvd->vre_new_bitmap_chunk = 1; + + new_vfb.vfb_blk_off = resrvd->vre_blk_off; + new_vfb.vfb_class = blk_cnt; + new_vfb.vfb_blk_cnt = chunk_blks; + new_vfb.vfb_bitmap_sz = bitmap_sz; + rc = bitmap_entry_insert(vsi, &new_vfb, VEA_BITMAP_STATE_NEW, + &entry, VEA_FL_NO_ACCOUNTING); + if (rc) + return rc; + + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = (void *)entry; + + D_DEBUG(DB_IO, "["DF_U64", %u]\n", resrvd->vre_blk_off, resrvd->vre_blk_cnt); + inc_stats(vsi, STAT_FREE_BITMAP_BLKS, chunk_blks); + inc_stats(vsi, STAT_RESRV_BITMAP, 1); + + return rc; +} + +static int +reserve_small(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + int rc; + + /* Skip huge allocate request */ + if (blk_cnt >= vsi->vsi_class.vfc_large_thresh) + return 0; + + rc = reserve_bitmap(vsi, blk_cnt, resrvd); + if (rc || resrvd->vre_blk_cnt > 0) + return rc; + + return reserve_size_tree(vsi, blk_cnt, resrvd); +} + int -reserve_vector(struct vea_space_info *vsi, uint32_t blk_cnt, +reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd) { - /* TODO reserve extent vector for non-contiguous allocation */ - return -DER_NOSPACE; + struct vea_free_class *vfc = &vsi->vsi_class; + int rc; + + /* No large free extent available */ + if (d_binheap_is_empty(&vfc->vfc_heap)) + return reserve_small(vsi, blk_cnt, resrvd); + + if (blk_cnt < vsi->vsi_class.vfc_large_thresh) { + rc = reserve_small(vsi, blk_cnt, resrvd); + if (rc || resrvd->vre_blk_cnt > 0) + return rc; + } + + return reserve_extent(vsi, blk_cnt, resrvd); } -int -persistent_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe) +static int +persistent_alloc_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe) { struct vea_free_extent *found, frag = {0}; daos_handle_t btr_hdl; @@ -328,3 +514,183 @@ persistent_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe) return 0; } + +int +bitmap_tx_add_ptr(struct umem_instance *vsi_umem, uint64_t *bitmap, + uint32_t bit_at, uint32_t bits_nr) +{ + uint32_t bitmap_off = bit_at / 8; + uint32_t bitmap_sz = 0; + + if (bit_at % 8) + bitmap_sz = 1; + + if (bits_nr > (bit_at % 8)) + bitmap_sz += (bits_nr - (bit_at % 8) + 7) / 8; + + return umem_tx_add_ptr(vsi_umem, (char *)bitmap + bitmap_off, bitmap_sz); +} + +int +bitmap_set_range(struct umem_instance *vsi_umem, struct vea_free_bitmap *bitmap, + uint64_t blk_off, uint32_t blk_cnt, bool clear) +{ + uint32_t bit_at, bits_nr; + int rc; + + if (blk_off < bitmap->vfb_blk_off || + blk_off + blk_cnt > bitmap->vfb_blk_off + bitmap->vfb_blk_cnt) { + D_ERROR("range ["DF_U64", %u] is not within bitmap ["DF_U64", %u]\n", + blk_off, blk_cnt, bitmap->vfb_blk_off, bitmap->vfb_blk_cnt); + return -DER_INVAL; + } + + bit_at = blk_off - bitmap->vfb_blk_off; + if (bit_at % bitmap->vfb_class != 0) { + D_ERROR("invalid block offset: "DF_U64" which is not times of %u\n", + blk_off, bitmap->vfb_class); + return -DER_INVAL; + } + if (blk_cnt % bitmap->vfb_class != 0) { + D_ERROR("invalid block count: %u which is not times of %u\n", + blk_cnt, bitmap->vfb_class); + return -DER_INVAL; + } + bit_at /= bitmap->vfb_class; + bits_nr = blk_cnt / bitmap->vfb_class; + if (clear) { + if (!isset_range((uint8_t *)bitmap->vfb_bitmaps, + bit_at, bit_at + bits_nr - 1)) { + D_ERROR("bitmap already cleared in the range.\n"); + return -DER_INVAL; + } + } else { + if (!isclr_range((uint8_t *)bitmap->vfb_bitmaps, + bit_at, bit_at + bits_nr - 1)) { + D_ERROR("bitmap already set in the range.["DF_U64", %u]\n", + blk_off, blk_cnt); + return -DER_INVAL; + } + } + + if (vsi_umem) { + rc = bitmap_tx_add_ptr(vsi_umem, bitmap->vfb_bitmaps, bit_at, bits_nr); + if (rc) + return rc; + } + + D_ASSERT(bit_at + bits_nr <= bitmap->vfb_bitmap_sz * 64); + if (clear) + clrbits64(bitmap->vfb_bitmaps, bit_at, bits_nr); + else + setbits64(bitmap->vfb_bitmaps, bit_at, bits_nr); + + return 0; +} + +static void +new_chunk_commit_cb(void *data, bool noop) +{ + struct vea_bitmap_entry *bitmap_entry = (struct vea_bitmap_entry *)data; + + if (noop) + return; + + bitmap_entry->vbe_published_state = VEA_BITMAP_STATE_PUBLISHED; +} + +static void +new_chunk_abort_cb(void *data, bool noop) +{ + struct vea_bitmap_entry *bitmap_entry = (struct vea_bitmap_entry *)data; + + if (noop) + return; + + bitmap_entry->vbe_published_state = VEA_BITMAP_STATE_NEW; +} + +int +persistent_alloc(struct vea_space_info *vsi, struct vea_free_entry *vfe) +{ + struct vea_bitmap_entry *bitmap_entry = vfe->vfe_bitmap; + + if (bitmap_entry == NULL) + return persistent_alloc_extent(vsi, &vfe->vfe_ext); + + D_ASSERT(bitmap_entry != NULL); + + /* if this bitmap is new */ + if (bitmap_entry->vbe_published_state == VEA_BITMAP_STATE_NEW) { + d_iov_t key, val, val_out; + struct vea_free_bitmap *bitmap; + int rc; + struct vea_free_extent extent; + daos_handle_t btr_hdl = vsi->vsi_md_bitmap_btr; + rc = umem_tx_begin(vsi->vsi_umem, vsi->vsi_txd); + if (rc != 0) + return rc; + + rc = umem_tx_add_callback(vsi->vsi_umem, vsi->vsi_txd, UMEM_STAGE_ONABORT, + new_chunk_abort_cb, bitmap_entry); + if (rc) { + D_ERROR("add chunk abort callback failed. "DF_RC"\n", DP_RC(rc)); + goto out; + } + + bitmap_entry->vbe_published_state = VEA_BITMAP_STATE_PUBLISHING; + + rc = umem_tx_add_callback(vsi->vsi_umem, vsi->vsi_txd, UMEM_STAGE_ONCOMMIT, + new_chunk_commit_cb, bitmap_entry); + if (rc) { + D_ERROR("add chunk commit callback failed. "DF_RC"\n", DP_RC(rc)); + goto out; + } + + extent = vfe->vfe_ext; + extent.vfe_blk_off = bitmap_entry->vbe_bitmap.vfb_blk_off; + extent.vfe_blk_cnt = bitmap_entry->vbe_bitmap.vfb_blk_cnt; + rc = persistent_alloc_extent(vsi, &extent); + if (rc) + goto out; + + D_ALLOC(bitmap, alloc_free_bitmap_size(bitmap_entry->vbe_bitmap.vfb_bitmap_sz)); + if (!bitmap) { + rc = -DER_NOMEM; + goto out; + } + + D_ASSERT(vfe->vfe_ext.vfe_blk_cnt != 0); + bitmap->vfb_blk_off = extent.vfe_blk_off; + bitmap->vfb_class = bitmap_entry->vbe_bitmap.vfb_class; + bitmap->vfb_blk_cnt = bitmap_entry->vbe_bitmap.vfb_blk_cnt; + bitmap->vfb_bitmap_sz = bitmap_entry->vbe_bitmap.vfb_bitmap_sz; + rc = bitmap_set_range(NULL, bitmap, vfe->vfe_ext.vfe_blk_off, + vfe->vfe_ext.vfe_blk_cnt, false); + if (rc) { + D_FREE(bitmap); + goto out; + } + /* Add to persistent bitmap tree */ + D_ASSERT(daos_handle_is_valid(btr_hdl)); + d_iov_set(&key, &bitmap->vfb_blk_off, sizeof(bitmap->vfb_blk_off)); + d_iov_set(&val, bitmap, alloc_free_bitmap_size(bitmap->vfb_bitmap_sz)); + d_iov_set(&val_out, NULL, 0); + + rc = dbtree_upsert(btr_hdl, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, &key, + &val, &val_out); + D_FREE(bitmap); + if (rc) + D_ERROR("Insert persistent bitmap failed. "DF_RC"\n", DP_RC(rc)); + else + bitmap_entry->vbe_md_bitmap = (struct vea_free_bitmap *)val_out.iov_buf; +out: + /* Commit/Abort transaction on success/error */ + rc = rc ? umem_tx_abort(vsi->vsi_umem, rc) : umem_tx_commit(vsi->vsi_umem); + + return rc; + } + + return bitmap_set_range(vsi->vsi_umem, vfe->vfe_bitmap->vbe_md_bitmap, + vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, false); +} diff --git a/src/vea/vea_api.c b/src/vea/vea_api.c index ffad7b1b870..a5530a8e5f2 100644 --- a/src/vea/vea_api.c +++ b/src/vea/vea_api.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -14,7 +14,7 @@ static void erase_md(struct umem_instance *umem, struct vea_space_df *md) { struct umem_attr uma = {0}; - daos_handle_t free_btr, vec_btr; + daos_handle_t free_btr, bitmap_btr; int rc; uma.uma_id = umem->umm_id; @@ -27,15 +27,54 @@ erase_md(struct umem_instance *umem, struct vea_space_df *md) DP_RC(rc)); } - rc = dbtree_open_inplace(&md->vsd_vec_tree, &uma, &vec_btr); + rc = dbtree_open_inplace(&md->vsd_bitmap_tree, &uma, &bitmap_btr); if (rc == 0) { - rc = dbtree_destroy(vec_btr, NULL); + rc = dbtree_destroy(bitmap_btr, NULL); if (rc) - D_ERROR("destroy vector tree error: "DF_RC"\n", + D_ERROR("destroy bitmap tree error: "DF_RC"\n", DP_RC(rc)); } } +int +vea_upgrade(struct vea_space_info *vsi, struct umem_instance *umem, + struct vea_space_df *md, uint32_t version) +{ + int rc; + uint64_t offset; + d_iov_t key, val; + struct vea_hint_df dummy; + + if (version < 3) + return 0; + + /* Start transaction to initialize allocation metadata */ + rc = umem_tx_begin(umem, NULL); + if (rc != 0) + return rc; + + offset = VEA_BITMAP_CHUNK_HINT_KEY; + d_iov_set(&key, &offset, sizeof(offset)); + d_iov_set(&val, &dummy, sizeof(dummy)); + memset(&dummy, 0, sizeof(dummy)); + rc = dbtree_update(vsi->vsi_md_bitmap_btr, &key, &val); + if (rc) { + D_ERROR("upgrade to insert bitmap hint failed: "DF_RC"\n", + DP_RC(rc)); + goto out; + } + + rc = umem_tx_add_ptr(umem, md, sizeof(*md)); + if (rc != 0) + goto out; + + md->vsd_compat |= VEA_COMPAT_FEATURE_BITMAP; + +out: + /* Commit/Abort transaction on success/error */ + return rc ? umem_tx_abort(umem, rc) : umem_tx_commit(umem); +} + /* * Initialize the space tracking information on SCM and the header of the * block device. @@ -48,9 +87,11 @@ vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, { struct vea_free_extent free_ext; struct umem_attr uma; - uint64_t tot_blks; - daos_handle_t free_btr, vec_btr; + uint64_t tot_blks, offset; + daos_handle_t free_btr, bitmap_btr; + struct vea_hint_df dummy; d_iov_t key, val; + daos_handle_t md_bitmap_btr = DAOS_HDL_INVAL; int rc; D_ASSERT(umem != NULL); @@ -108,14 +149,15 @@ vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, if (rc != 0) return rc; - free_btr = vec_btr = DAOS_HDL_INVAL; + free_btr = bitmap_btr = DAOS_HDL_INVAL; rc = umem_tx_add_ptr(umem, md, sizeof(*md)); if (rc != 0) goto out; md->vsd_magic = VEA_MAGIC; - md->vsd_compat = 0; + /* Todo only enable bitmap for large pool size */ + md->vsd_compat = VEA_COMPAT_FEATURE_BITMAP; md->vsd_blk_sz = blk_sz; md->vsd_tot_blks = tot_blks; md->vsd_hdr_blks = hdr_blks; @@ -141,26 +183,59 @@ vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, if (rc != 0) goto out; - /* Create extent vector tree */ - rc = dbtree_create_inplace(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, - &md->vsd_vec_tree, &vec_btr); + /* Create bitmap tree */ + rc = dbtree_create_inplace(DBTREE_CLASS_IFV, BTR_FEAT_UINT_KEY, VEA_TREE_ODR, &uma, + &md->vsd_bitmap_tree, &bitmap_btr); + if (rc != 0) + goto out; + + /* Open bitmap tree */ + uma.uma_id = umem->umm_id; + uma.uma_pool = umem->umm_pool; + rc = dbtree_open_inplace(&md->vsd_bitmap_tree, &uma, + &md_bitmap_btr); if (rc != 0) goto out; + offset = VEA_BITMAP_CHUNK_HINT_KEY; + d_iov_set(&key, &offset, sizeof(offset)); + d_iov_set(&val, &dummy, sizeof(dummy)); + memset(&dummy, 0, sizeof(dummy)); + rc = dbtree_update(md_bitmap_btr, &key, &val); + if (rc) + goto out; out: if (daos_handle_is_valid(free_btr)) dbtree_close(free_btr); - if (daos_handle_is_valid(vec_btr)) - dbtree_close(vec_btr); + if (daos_handle_is_valid(bitmap_btr)) + dbtree_close(bitmap_btr); + if (daos_handle_is_valid(md_bitmap_btr)) + dbtree_close(md_bitmap_btr); /* Commit/Abort transaction on success/error */ return rc ? umem_tx_abort(umem, rc) : umem_tx_commit(umem); } +static int +destroy_free_bitmap_agg(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) +{ + struct vea_bitmap_entry *vbe; + + vbe = (struct vea_bitmap_entry *)val->iov_buf; + if (daos_handle_is_valid(vbe->vbe_agg_btr)) { + dbtree_destroy(vbe->vbe_agg_btr, NULL); + vbe->vbe_agg_btr = DAOS_HDL_INVAL; + } + + return 0; +} + /* Free the memory footprint created by vea_load(). */ void vea_unload(struct vea_space_info *vsi) { + int rc; + D_ASSERT(vsi != NULL); unload_space_info(vsi); @@ -170,10 +245,14 @@ vea_unload(struct vea_space_info *vsi) vsi->vsi_free_btr = DAOS_HDL_INVAL; } - /* Destroy the in-memory extent vector tree */ - if (daos_handle_is_valid(vsi->vsi_vec_btr)) { - dbtree_destroy(vsi->vsi_vec_btr, NULL); - vsi->vsi_vec_btr = DAOS_HDL_INVAL; + /* Destroy the in-memory bitmap tree */ + if (daos_handle_is_valid(vsi->vsi_bitmap_btr)) { + rc = dbtree_iterate(vsi->vsi_bitmap_btr, DAOS_INTENT_DEFAULT, + false, destroy_free_bitmap_agg, NULL); + if (rc) + D_ERROR("Failed to destroy free bitmap aggregation btr: "DF_RC"\n", DP_RC(rc)); + dbtree_destroy(vsi->vsi_bitmap_btr, NULL); + vsi->vsi_bitmap_btr = DAOS_HDL_INVAL; } /* Destroy the in-memory aggregation tree */ @@ -218,11 +297,11 @@ vea_load(struct umem_instance *umem, struct umem_tx_stage_data *txd, vsi->vsi_txd = txd; vsi->vsi_md = md; vsi->vsi_md_free_btr = DAOS_HDL_INVAL; - vsi->vsi_md_vec_btr = DAOS_HDL_INVAL; + vsi->vsi_md_bitmap_btr = DAOS_HDL_INVAL; vsi->vsi_free_btr = DAOS_HDL_INVAL; + vsi->vsi_bitmap_btr = DAOS_HDL_INVAL; D_INIT_LIST_HEAD(&vsi->vsi_agg_lru); vsi->vsi_agg_btr = DAOS_HDL_INVAL; - vsi->vsi_vec_btr = DAOS_HDL_INVAL; vsi->vsi_flush_time = 0; vsi->vsi_flush_scheduled = false; vsi->vsi_unmap_ctxt = *unmap_ctxt; @@ -240,15 +319,15 @@ vea_load(struct umem_instance *umem, struct umem_tx_stage_data *txd, if (rc != 0) goto error; - /* Create in-memory extent vector tree */ + /* Create in-memory aggregation tree */ rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, NULL, - &vsi->vsi_vec_btr); + &vsi->vsi_agg_btr); if (rc != 0) goto error; - /* Create in-memory aggregation tree */ - rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, NULL, - &vsi->vsi_agg_btr); + /* Create in-memory bitmap tree */ + rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_UINT_KEY, VEA_TREE_ODR, &uma, NULL, + &vsi->vsi_bitmap_btr); if (rc != 0) goto error; @@ -283,8 +362,7 @@ aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, uint32_t * half-and-half then reserve from the latter half. (lookup vfc_heap). Otherwise; * 3. Try to reserve from some small free extent (<= VEA_LARGE_EXT_MB) in best-fit, * if it fails, reserve from the largest free extent. (lookup vfc_size_btr) - * 4. Repeat the search in 3rd step to reserve an extent vector. (vsi_vec_btr) - * 5. Fail reserve with ENOMEM if all above attempts fail. + * 4. Fail reserve with ENOMEM if all above attempts fail. */ int vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, @@ -294,10 +372,14 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, uint32_t nr_flushed; bool force = false; int rc = 0; + bool try_hint = true; D_ASSERT(vsi != NULL); D_ASSERT(resrvd_list != NULL); + if (is_bitmap_feature_enabled(vsi) && blk_cnt <= VEA_MAX_BITMAP_CLASS) + try_hint = false; + D_ALLOC_PTR(resrvd); if (resrvd == NULL) return -DER_NOMEM; @@ -306,17 +388,20 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, resrvd->vre_hint_off = VEA_HINT_OFF_INVAL; /* Get hint offset */ - hint_get(hint, &resrvd->vre_hint_off); + if (try_hint) + hint_get(hint, &resrvd->vre_hint_off); /* Trigger aging extents flush */ aging_flush(vsi, force, MAX_FLUSH_FRAGS, &nr_flushed); retry: /* Reserve from hint offset */ - rc = reserve_hint(vsi, blk_cnt, resrvd); - if (rc != 0) - goto error; - else if (resrvd->vre_blk_cnt != 0) - goto done; + if (try_hint) { + rc = reserve_hint(vsi, blk_cnt, resrvd); + if (rc != 0) + goto error; + else if (resrvd->vre_blk_cnt != 0) + goto done; + } /* Reserve from the largest extent or a small extent */ rc = reserve_single(vsi, blk_cnt, resrvd); @@ -325,27 +410,28 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, else if (resrvd->vre_blk_cnt != 0) goto done; - /* Reserve extent vector as the last resort */ - rc = reserve_vector(vsi, blk_cnt, resrvd); - - if (rc == -DER_NOSPACE && !force) { + rc = -DER_NOSPACE; + if (!force) { force = true; trigger_aging_flush(vsi, force, MAX_FLUSH_FRAGS * 10, &nr_flushed); if (nr_flushed == 0) goto error; goto retry; - } else if (rc != 0) { + } else { goto error; } done: - D_ASSERT(resrvd->vre_blk_off != VEA_HINT_OFF_INVAL); D_ASSERT(resrvd->vre_blk_cnt == blk_cnt); - dec_stats(vsi, STAT_FREE_BLKS, blk_cnt); - - /* Update hint offset */ - hint_update(hint, resrvd->vre_blk_off + blk_cnt, - &resrvd->vre_hint_seq); + /* Update hint offset if allocation is from extent */ + if (resrvd->vre_private) { + dec_stats(vsi, STAT_FREE_BITMAP_BLKS, blk_cnt); + } else { + dec_stats(vsi, STAT_FREE_EXTENT_BLKS, blk_cnt); + D_ASSERT(resrvd->vre_blk_off != VEA_HINT_OFF_INVAL); + hint_update(hint, resrvd->vre_blk_off + blk_cnt, + &resrvd->vre_hint_seq); + } d_list_add_tail(&resrvd->vre_link, resrvd_list); @@ -355,67 +441,130 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, return rc; } +static int +process_free_entry(struct vea_space_info *vsi, struct vea_free_entry *vfe, bool publish) +{ + uint32_t expected_type = vfe->vfe_bitmap ? VEA_FREE_ENTRY_BITMAP : VEA_FREE_ENTRY_EXTENT; + + if (!publish) { + int type = free_type(vsi, vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, NULL); + + if (type < 0) + return type; + + if (type != expected_type) { + D_ERROR("mismatch free entry type expected: %d, but got: %d\n", + expected_type, type); + return -DER_INVAL; + } + return compound_free(vsi, vfe, 0); + } + + return persistent_alloc(vsi, vfe); +} + static int process_resrvd_list(struct vea_space_info *vsi, struct vea_hint_context *hint, d_list_t *resrvd_list, bool publish) { struct vea_resrvd_ext *resrvd, *tmp; - struct vea_free_extent vfe; + struct vea_free_entry vfe; uint64_t seq_max = 0, seq_min = 0; uint64_t off_c = 0, off_p = 0; unsigned int seq_cnt = 0; int rc = 0; + uint32_t entry_type; + void *private = NULL; + uint64_t bitmap_seq_max = 0, bitmap_seq_min = 0; + uint64_t bitmap_off_c = 0, bitmap_off_p = 0; + unsigned int bitmap_seq_cnt = 0; + struct vea_hint_context *bitmap_hint = vsi->vsi_bitmap_hint_context; if (d_list_empty(resrvd_list)) return 0; - vfe.vfe_blk_off = 0; - vfe.vfe_blk_cnt = 0; - vfe.vfe_age = 0; /* Not used */ + vfe.vfe_ext.vfe_blk_off = 0; + vfe.vfe_ext.vfe_blk_cnt = 0; + vfe.vfe_ext.vfe_age = 0; /* Not used */ + vfe.vfe_bitmap = NULL; d_list_for_each_entry(resrvd, resrvd_list, vre_link) { + struct vea_bitmap_entry *bitmap_entry; + rc = verify_resrvd_ext(resrvd); if (rc) goto error; + entry_type = resrvd->vre_private ? + VEA_FREE_ENTRY_BITMAP : VEA_FREE_ENTRY_EXTENT; + + bitmap_entry = (struct vea_bitmap_entry *)resrvd->vre_private; /* Reserved list is sorted by hint sequence */ - if (seq_min == 0) { - seq_min = resrvd->vre_hint_seq; - off_c = resrvd->vre_hint_off; - } else if (hint != NULL) { - D_ASSERT(seq_min < resrvd->vre_hint_seq); + /* use bitmap entry chunk offset */ + if (resrvd->vre_new_bitmap_chunk) { + D_ASSERT(bitmap_entry != NULL); + D_ASSERT(entry_type == VEA_FREE_ENTRY_BITMAP); + if (bitmap_seq_min == 0) { + bitmap_seq_min = resrvd->vre_hint_seq; + bitmap_off_c = resrvd->vre_hint_off; + } else { + D_ASSERT(bitmap_seq_min < resrvd->vre_hint_seq); + } + bitmap_seq_cnt++; + bitmap_seq_max = resrvd->vre_hint_seq; + bitmap_off_p = resrvd->vre_blk_off + bitmap_entry->vbe_bitmap.vfb_blk_cnt; + } else if (entry_type == VEA_FREE_ENTRY_EXTENT) { + if (seq_min == 0) { + seq_min = resrvd->vre_hint_seq; + off_c = resrvd->vre_hint_off; + } else if (hint != NULL) { + D_ASSERT(seq_min < resrvd->vre_hint_seq); + } + + seq_cnt++; + seq_max = resrvd->vre_hint_seq; + off_p = resrvd->vre_blk_off + resrvd->vre_blk_cnt; } - seq_cnt++; - seq_max = resrvd->vre_hint_seq; - off_p = resrvd->vre_blk_off + resrvd->vre_blk_cnt; - - if (vfe.vfe_blk_off + vfe.vfe_blk_cnt == resrvd->vre_blk_off) { - vfe.vfe_blk_cnt += resrvd->vre_blk_cnt; + if (private == resrvd->vre_private && + vfe.vfe_ext.vfe_blk_off + vfe.vfe_ext.vfe_blk_cnt == resrvd->vre_blk_off) { + vfe.vfe_ext.vfe_blk_cnt += resrvd->vre_blk_cnt; continue; } - if (vfe.vfe_blk_cnt != 0) { - rc = publish ? persistent_alloc(vsi, &vfe) : - compound_free(vsi, &vfe, 0); + if (vfe.vfe_ext.vfe_blk_cnt != 0) { + rc = process_free_entry(vsi, &vfe, publish); if (rc) goto error; } - vfe.vfe_blk_off = resrvd->vre_blk_off; - vfe.vfe_blk_cnt = resrvd->vre_blk_cnt; + vfe.vfe_ext.vfe_blk_off = resrvd->vre_blk_off; + vfe.vfe_ext.vfe_blk_cnt = resrvd->vre_blk_cnt; + vfe.vfe_bitmap = bitmap_entry; + private = resrvd->vre_private; } - if (vfe.vfe_blk_cnt != 0) { - rc = publish ? persistent_alloc(vsi, &vfe) : - compound_free(vsi, &vfe, 0); + if (vfe.vfe_ext.vfe_blk_cnt != 0) { + rc = process_free_entry(vsi, &vfe, publish); if (rc) goto error; } + if (seq_cnt == 0) + goto bitmap_publish; + rc = publish ? hint_tx_publish(vsi->vsi_umem, hint, off_p, seq_min, seq_max, seq_cnt) : hint_cancel(hint, off_c, seq_min, seq_max, seq_cnt); +bitmap_publish: + if (rc || bitmap_seq_cnt == 0) + goto error; + + rc = publish ? hint_tx_publish(vsi->vsi_umem, bitmap_hint, bitmap_off_p, + bitmap_seq_min, bitmap_seq_max, bitmap_seq_cnt) : + hint_cancel(bitmap_hint, bitmap_off_c, bitmap_seq_min, + bitmap_seq_max, bitmap_seq_cnt); + error: d_list_for_each_entry_safe(resrvd, tmp, resrvd_list, vre_link) { d_list_del_init(&resrvd->vre_link); @@ -457,40 +606,6 @@ vea_tx_publish(struct vea_space_info *vsi, struct vea_hint_context *hint, return process_resrvd_list(vsi, hint, resrvd_list, true); } -struct free_commit_cb_arg { - struct vea_space_info *fca_vsi; - struct vea_free_extent fca_vfe; -}; - -static void -free_commit_cb(void *data, bool noop) -{ - struct free_commit_cb_arg *fca = data; - int rc; - - /* Transaction aborted, only need to free callback arg */ - if (noop) - goto free; - - /* - * Aggregated free will be executed on outermost transaction - * commit. - * - * If it fails, the freed space on persistent free tree won't - * be added in in-memory free tree, hence the space won't be - * visible for allocation until the tree sync up on next server - * restart. Such temporary space leak is tolerable, what we must - * avoid is the contrary case: in-memory tree update succeeds - * but persistent tree update fails, which risks data corruption. - */ - rc = aggregated_free(fca->fca_vsi, &fca->fca_vfe); - - D_CDEBUG(rc, DLOG_ERR, DB_IO, "Aggregated free on vsi:%p rc %d\n", - fca->fca_vsi, rc); -free: - D_FREE(fca); -} - /* * Free allocated extent. * @@ -515,10 +630,10 @@ vea_free(struct vea_space_info *vsi, uint64_t blk_off, uint32_t blk_cnt) return -DER_NOMEM; fca->fca_vsi = vsi; - fca->fca_vfe.vfe_blk_off = blk_off; - fca->fca_vfe.vfe_blk_cnt = blk_cnt; + fca->fca_vfe.vfe_ext.vfe_blk_off = blk_off; + fca->fca_vfe.vfe_ext.vfe_blk_cnt = blk_cnt; - rc = verify_free_entry(NULL, &fca->fca_vfe); + rc = verify_free_entry(NULL, &fca->fca_vfe.vfe_ext); if (rc) goto error; @@ -571,16 +686,6 @@ vea_set_ext_age(struct vea_space_info *vsi, uint64_t blk_off, uint64_t age) return 0; } -/* Convert an extent into an allocated extent vector. */ -int -vea_get_ext_vector(struct vea_space_info *vsi, uint64_t blk_off, - uint32_t blk_cnt, struct vea_ext_vector *ext_vector) -{ - D_ASSERT(vsi != NULL); - D_ASSERT(ext_vector != NULL); - return 0; -} - /* Load persistent hint data and initialize in-memory hint context */ int vea_hint_load(struct vea_hint_df *phd, struct vea_hint_context **thc) @@ -609,8 +714,8 @@ vea_hint_unload(struct vea_hint_context *thc) } static int -count_free_persistent(daos_handle_t ih, d_iov_t *key, d_iov_t *val, - void *arg) +count_free_extent_persistent(daos_handle_t ih, d_iov_t *key, + d_iov_t *val, void *arg) { struct vea_free_extent *vfe; uint64_t *off, *free_blks = arg; @@ -629,16 +734,53 @@ count_free_persistent(daos_handle_t ih, d_iov_t *key, d_iov_t *val, return 0; } +static int +count_free_bitmap_persistent(daos_handle_t ih, d_iov_t *key, + d_iov_t *val, void *arg) +{ + struct vea_free_bitmap *vfb; + uint64_t *off, *free_blks = arg; + int rc; + + off = (uint64_t *)key->iov_buf; + if (*off == VEA_BITMAP_CHUNK_HINT_KEY) + return 0; + + vfb = (struct vea_free_bitmap *)val->iov_buf; + rc = verify_bitmap_entry(vfb); + if (rc != 0) + return rc; + + D_ASSERT(free_blks != NULL); + *free_blks += bitmap_free_blocks(vfb); + + return 0; +} + static int count_free_transient(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) { - struct vea_entry *ve; + struct vea_extent_entry *ve; uint64_t *free_blks = arg; - ve = (struct vea_entry *)val->iov_buf; + ve = (struct vea_extent_entry *)val->iov_buf; D_ASSERT(free_blks != NULL); - *free_blks += ve->ve_ext.vfe_blk_cnt; + *free_blks += ve->vee_ext.vfe_blk_cnt; + + return 0; +} + +static int +count_free_bitmap_transient(daos_handle_t ih, d_iov_t *key, + d_iov_t *val, void *arg) +{ + struct vea_bitmap_entry *vbe; + uint64_t *free_blks = arg; + + vbe = (struct vea_bitmap_entry *)val->iov_buf; + D_ASSERT(free_blks != NULL); + *free_blks += bitmap_free_blocks(&vbe->vbe_bitmap); return 0; } @@ -660,7 +802,8 @@ vea_query(struct vea_space_info *vsi, struct vea_attr *attr, attr->va_hdr_blks = vsd->vsd_hdr_blks; attr->va_large_thresh = vsi->vsi_class.vfc_large_thresh; attr->va_tot_blks = vsd->vsd_tot_blks; - attr->va_free_blks = vsi->vsi_stat[STAT_FREE_BLKS]; + attr->va_free_blks = vsi->vsi_stat[STAT_FREE_EXTENT_BLKS] + + vsi->vsi_stat[STAT_FREE_BITMAP_BLKS]; } if (stat != NULL) { @@ -668,7 +811,13 @@ vea_query(struct vea_space_info *vsi, struct vea_attr *attr, stat->vs_free_persistent = 0; rc = dbtree_iterate(vsi->vsi_md_free_btr, DAOS_INTENT_DEFAULT, - false, count_free_persistent, + false, count_free_extent_persistent, + (void *)&stat->vs_free_persistent); + if (rc != 0) + return rc; + + rc = dbtree_iterate(vsi->vsi_md_bitmap_btr, DAOS_INTENT_DEFAULT, + false, count_free_bitmap_persistent, (void *)&stat->vs_free_persistent); if (rc != 0) return rc; @@ -680,11 +829,19 @@ vea_query(struct vea_space_info *vsi, struct vea_attr *attr, if (rc != 0) return rc; + rc = dbtree_iterate(vsi->vsi_bitmap_btr, DAOS_INTENT_DEFAULT, + false, count_free_bitmap_transient, + (void *)&stat->vs_free_transient); + if (rc != 0) + return rc; + stat->vs_resrv_hint = vsi->vsi_stat[STAT_RESRV_HINT]; stat->vs_resrv_large = vsi->vsi_stat[STAT_RESRV_LARGE]; stat->vs_resrv_small = vsi->vsi_stat[STAT_RESRV_SMALL]; + stat->vs_resrv_bitmap = vsi->vsi_stat[STAT_RESRV_BITMAP]; stat->vs_frags_large = vsi->vsi_stat[STAT_FRAGS_LARGE]; stat->vs_frags_small = vsi->vsi_stat[STAT_FRAGS_SMALL]; + stat->vs_frags_bitmap = vsi->vsi_stat[STAT_FRAGS_BITMAP]; stat->vs_frags_aging = vsi->vsi_stat[STAT_FRAGS_AGING]; } diff --git a/src/vea/vea_free.c b/src/vea/vea_free.c index 53fa8492a91..f82fd299bd4 100644 --- a/src/vea/vea_free.c +++ b/src/vea/vea_free.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -7,6 +7,7 @@ #include #include +#include #include "vea_internal.h" enum vea_free_type { @@ -15,33 +16,97 @@ enum vea_free_type { VEA_TYPE_PERSIST, }; +int +free_type(struct vea_space_info *vsi, uint64_t blk_off, uint32_t blk_cnt, + struct vea_bitmap_entry **bitmap_entry) +{ + int type = VEA_FREE_ENTRY_BITMAP; + struct vea_free_bitmap *found; + daos_handle_t btr_hdl = vsi->vsi_bitmap_btr; + d_iov_t key_in, key_out, val; + uint64_t found_end, vfe_end; + int rc, opc = BTR_PROBE_LE; + struct vea_bitmap_entry *entry = NULL; + + if (blk_cnt > VEA_BITMAP_MAX_CHUNK_BLKS) { + type = VEA_FREE_ENTRY_EXTENT; + goto out; + } + + D_ASSERT(daos_handle_is_valid(btr_hdl)); + /* Fetch the in-tree record */ + d_iov_set(&key_in, &blk_off, sizeof(blk_off)); + d_iov_set(&key_out, NULL, sizeof(blk_off)); + d_iov_set(&val, NULL, 0); + + rc = dbtree_fetch(btr_hdl, opc, DAOS_INTENT_DEFAULT, &key_in, &key_out, &val); + if (rc == -DER_NONEXIST) + return VEA_FREE_ENTRY_EXTENT; + + if (rc) { + D_ERROR("failed to search range ["DF_U64", %u] int bitmap tree\n", + blk_off, blk_cnt); + return rc; + } + + entry = (struct vea_bitmap_entry *)val.iov_buf; + found = &entry->vbe_bitmap; + rc = verify_bitmap_entry(found); + if (rc) { + D_ERROR("verify bitmap failed in free_type\n"); + return rc; + } + + found_end = found->vfb_blk_off + found->vfb_blk_cnt - 1; + vfe_end = blk_off + blk_cnt - 1; + D_ASSERT(blk_off >= found->vfb_blk_off); + if (blk_off <= found_end) { + if (vfe_end <= found_end) { + if (bitmap_entry) + *bitmap_entry = entry; + return VEA_FREE_ENTRY_BITMAP; + } + + D_CRIT("["DF_U64", %u] should not cross bitmap tree\n", + found->vfb_blk_off, found->vfb_blk_cnt); + return -DER_INVAL; + } else { + type = VEA_FREE_ENTRY_EXTENT; + } +out: + return type; +} + void -free_class_remove(struct vea_space_info *vsi, struct vea_entry *entry) +extent_free_class_remove(struct vea_space_info *vsi, struct vea_extent_entry *entry) { struct vea_free_class *vfc = &vsi->vsi_class; - struct vea_sized_class *sc = entry->ve_sized_class; - uint32_t blk_cnt = entry->ve_ext.vfe_blk_cnt; + struct vea_sized_class *sc = entry->vee_sized_class; + uint32_t blk_cnt; if (sc == NULL) { + blk_cnt = entry->vee_ext.vfe_blk_cnt; D_ASSERTF(blk_cnt > vfc->vfc_large_thresh, "%u <= %u", blk_cnt, vfc->vfc_large_thresh); - D_ASSERT(d_list_empty(&entry->ve_link)); + D_ASSERT(d_list_empty(&entry->vee_link)); - d_binheap_remove(&vfc->vfc_heap, &entry->ve_node); + d_binheap_remove(&vfc->vfc_heap, &entry->vee_node); dec_stats(vsi, STAT_FRAGS_LARGE, 1); } else { d_iov_t key; - uint64_t int_key = blk_cnt; int rc; + blk_cnt = entry->vee_ext.vfe_blk_cnt; D_ASSERTF(blk_cnt > 0 && blk_cnt <= vfc->vfc_large_thresh, "%u > %u", blk_cnt, vfc->vfc_large_thresh); D_ASSERT(daos_handle_is_valid(vfc->vfc_size_btr)); - d_list_del_init(&entry->ve_link); - entry->ve_sized_class = NULL; + d_list_del_init(&entry->vee_link); + entry->vee_sized_class = NULL; /* Remove the sized class when it's empty */ - if (d_list_empty(&sc->vsc_lru)) { + if (d_list_empty(&sc->vsc_extent_lru)) { + uint64_t int_key = blk_cnt; + d_iov_set(&key, &int_key, sizeof(int_key)); rc = dbtree_delete(vfc->vfc_size_btr, BTR_PROBE_EQ, &key, NULL); if (rc) @@ -52,32 +117,16 @@ free_class_remove(struct vea_space_info *vsi, struct vea_entry *entry) } } -int -free_class_add(struct vea_space_info *vsi, struct vea_entry *entry) +static int +find_or_create_sized_class(struct vea_space_info *vsi, uint64_t int_key, + struct vea_sized_class **ret_sc) { struct vea_free_class *vfc = &vsi->vsi_class; daos_handle_t btr_hdl = vfc->vfc_size_btr; - uint32_t blk_cnt = entry->ve_ext.vfe_blk_cnt; d_iov_t key, val, val_out; - uint64_t int_key = blk_cnt; - struct vea_sized_class dummy, *sc; + struct vea_sized_class dummy, *sc = NULL; int rc; - D_ASSERT(entry->ve_sized_class == NULL); - D_ASSERT(d_list_empty(&entry->ve_link)); - - /* Add to heap if it's a large free extent */ - if (blk_cnt > vfc->vfc_large_thresh) { - rc = d_binheap_insert(&vfc->vfc_heap, &entry->ve_node); - if (rc != 0) { - D_ERROR("Failed to insert heap: %d\n", rc); - return rc; - } - - inc_stats(vsi, STAT_FRAGS_LARGE, 1); - return 0; - } - /* Add to a sized class */ D_ASSERT(daos_handle_is_valid(btr_hdl)); d_iov_set(&key, &int_key, sizeof(int_key)); @@ -88,50 +137,119 @@ free_class_add(struct vea_space_info *vsi, struct vea_entry *entry) /* Found an existing sized class */ sc = (struct vea_sized_class *)val_out.iov_buf; D_ASSERT(sc != NULL); - D_ASSERT(!d_list_empty(&sc->vsc_lru)); } else if (rc == -DER_NONEXIST) { /* Create a new sized class */ + memset(&dummy, 0, sizeof(dummy)); d_iov_set(&val, &dummy, sizeof(dummy)); d_iov_set(&val_out, NULL, 0); rc = dbtree_upsert(btr_hdl, BTR_PROBE_BYPASS, DAOS_INTENT_UPDATE, &key, &val, &val_out); if (rc != 0) { - D_ERROR("Insert size class:%u failed. "DF_RC"\n", - blk_cnt, DP_RC(rc)); + D_ERROR("Insert size class:%llu failed. "DF_RC"\n", + (unsigned long long)int_key, DP_RC(rc)); return rc; } sc = (struct vea_sized_class *)val_out.iov_buf; D_ASSERT(sc != NULL); - D_INIT_LIST_HEAD(&sc->vsc_lru); + D_INIT_LIST_HEAD(&sc->vsc_extent_lru); } else { - D_ERROR("Lookup size class:%u failed. "DF_RC"\n", blk_cnt, DP_RC(rc)); + D_ERROR("Lookup size class:%llu failed. "DF_RC"\n", + (unsigned long long)int_key, DP_RC(rc)); return rc; } + *ret_sc = sc; + + return rc; +} + +int +extent_free_class_add(struct vea_space_info *vsi, struct vea_extent_entry *entry) +{ + struct vea_free_class *vfc = &vsi->vsi_class; + uint64_t int_key; + struct vea_sized_class *sc; + int rc; + + D_ASSERT(entry->vee_sized_class == NULL); + D_ASSERT(d_list_empty(&entry->vee_link)); + + int_key = entry->vee_ext.vfe_blk_cnt; + /* Add to heap if it's a free extent */ + if (int_key > vfc->vfc_large_thresh) { + rc = d_binheap_insert(&vfc->vfc_heap, &entry->vee_node); + if (rc != 0) { + D_ERROR("Failed to insert heap: %d\n", rc); + return rc; + } + inc_stats(vsi, STAT_FRAGS_LARGE, 1); + return 0; + } + + rc = find_or_create_sized_class(vsi, int_key, &sc); + if (rc) + return rc; - entry->ve_sized_class = sc; - d_list_add_tail(&entry->ve_link, &sc->vsc_lru); + entry->vee_sized_class = sc; + d_list_add_tail(&entry->vee_link, &sc->vsc_extent_lru); inc_stats(vsi, STAT_FRAGS_SMALL, 1); return 0; } static void -undock_entry(struct vea_space_info *vsi, struct vea_entry *entry, - unsigned int type) +bitmap_free_class_add(struct vea_space_info *vsi, struct vea_bitmap_entry *entry, + int flags) +{ + uint64_t int_key; + int free_blks; + + D_ASSERT(d_list_empty(&entry->vbe_link)); + + int_key = entry->vbe_bitmap.vfb_class; + D_ASSERT(int_key <= VEA_MAX_BITMAP_CLASS && int_key > 0); + + free_blks = bitmap_free_blocks(&entry->vbe_bitmap); + if (!(flags & VEA_FL_NO_ACCOUNTING)) + inc_stats(vsi, STAT_FREE_BITMAP_BLKS, free_blks); + if (free_blks >= int_key) { + if (free_blks == entry->vbe_bitmap.vfb_blk_cnt) + d_list_add(&entry->vbe_link, + &vsi->vsi_class.vfc_bitmap_empty[int_key - 1]); + else + d_list_add(&entry->vbe_link, + &vsi->vsi_class.vfc_bitmap_lru[int_key - 1]); + } + inc_stats(vsi, STAT_FRAGS_BITMAP, 1); +} + +static void +undock_extent_entry(struct vea_space_info *vsi, struct vea_extent_entry *entry, + unsigned int type) { if (type == VEA_TYPE_PERSIST) return; D_ASSERT(entry != NULL); if (type == VEA_TYPE_COMPOUND) { - free_class_remove(vsi, entry); + extent_free_class_remove(vsi, entry); } else { - d_list_del_init(&entry->ve_link); + d_list_del_init(&entry->vee_link); dec_stats(vsi, STAT_FRAGS_AGING, 1); } } +static void +undock_free_entry(struct vea_space_info *vsi, struct vea_free_entry *entry, + unsigned int type) +{ + if (type == VEA_TYPE_PERSIST || type == VEA_TYPE_COMPOUND) + return; + + d_list_del_init(&entry->vfe_link); + dec_stats(vsi, STAT_FRAGS_AGING, 1); +} + #define LARGE_AGING_FRAG_BLKS 8192 static inline bool @@ -141,27 +259,20 @@ is_aging_frag_large(struct vea_free_extent *vfe) } static inline void -dock_aging_entry(struct vea_space_info *vsi, struct vea_entry *entry) +dock_aging_entry(struct vea_space_info *vsi, struct vea_free_entry *entry) { - d_list_add_tail(&entry->ve_link, &vsi->vsi_agg_lru); + d_list_add_tail(&entry->vfe_link, &vsi->vsi_agg_lru); inc_stats(vsi, STAT_FRAGS_AGING, 1); } static int -dock_entry(struct vea_space_info *vsi, struct vea_entry *entry, unsigned int type) +dock_extent_entry(struct vea_space_info *vsi, struct vea_extent_entry *entry, unsigned int type) { - int rc = 0; D_ASSERT(entry != NULL); - if (type == VEA_TYPE_COMPOUND) { - rc = free_class_add(vsi, entry); - } else { - D_ASSERT(type == VEA_TYPE_AGGREGATE); - D_ASSERT(d_list_empty(&entry->ve_link)); - dock_aging_entry(vsi, entry); - } + D_ASSERT(type == VEA_TYPE_COMPOUND); - return rc; + return extent_free_class_add(vsi, entry); } /* @@ -175,26 +286,17 @@ dock_entry(struct vea_space_info *vsi, struct vea_entry *entry, unsigned int typ */ static int merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, - unsigned int type, unsigned int flags) + unsigned int type, unsigned int flags, daos_handle_t btr_hdl) { struct vea_free_extent *ext, *neighbor = NULL; struct vea_free_extent merged = *ext_in; - struct vea_entry *entry, *neighbor_entry = NULL; - daos_handle_t btr_hdl; + struct vea_extent_entry *extent_entry, *neighbor_extent_entry = NULL; + struct vea_free_entry *free_entry, *neighbor_free_entry = NULL; d_iov_t key, key_out, val; uint64_t *off; bool fetch_prev = true, large_prev = false; int rc, del_opc = BTR_PROBE_BYPASS; - if (type == VEA_TYPE_COMPOUND) - btr_hdl = vsi->vsi_free_btr; - else if (type == VEA_TYPE_PERSIST) - btr_hdl = vsi->vsi_md_free_btr; - else if (type == VEA_TYPE_AGGREGATE) - btr_hdl = vsi->vsi_agg_btr; - else - return -DER_INVAL; - D_ASSERT(daos_handle_is_valid(btr_hdl)); d_iov_set(&key, &ext_in->vfe_blk_off, sizeof(ext_in->vfe_blk_off)); d_iov_set(&key_out, NULL, 0); @@ -215,7 +317,7 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, return rc; } repeat: - d_iov_set(&key_out, NULL, 0); + d_iov_set(&key_out, NULL, sizeof(ext_in->vfe_blk_off)); d_iov_set(&val, NULL, 0); if (fetch_prev) { @@ -249,11 +351,17 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, } if (type == VEA_TYPE_PERSIST) { - entry = NULL; + extent_entry = NULL; + free_entry = NULL; ext = (struct vea_free_extent *)val.iov_buf; + } else if (type == VEA_TYPE_COMPOUND) { + free_entry = NULL; + extent_entry = (struct vea_extent_entry *)val.iov_buf; + ext = &extent_entry->vee_ext; } else { - entry = (struct vea_entry *)val.iov_buf; - ext = &entry->ve_ext; + extent_entry = NULL; + free_entry = (struct vea_free_entry *)val.iov_buf; + ext = &free_entry->vfe_ext; } off = (uint64_t *)key_out.iov_buf; @@ -297,7 +405,8 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, merged.vfe_blk_cnt += ext->vfe_blk_cnt; neighbor = ext; - neighbor_entry = entry; + neighbor_extent_entry = extent_entry; + neighbor_free_entry = free_entry; } else { merged.vfe_blk_cnt += ext->vfe_blk_cnt; @@ -306,7 +415,10 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, * adjacent extent. */ if (neighbor != NULL) { - undock_entry(vsi, entry, type); + if (extent_entry) + undock_extent_entry(vsi, extent_entry, type); + else if (free_entry) + undock_free_entry(vsi, free_entry, type); rc = dbtree_delete(btr_hdl, del_opc, &key_out, NULL); if (rc) { D_ERROR("Failed to delete: %d\n", rc); @@ -314,7 +426,8 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, } } else { neighbor = ext; - neighbor_entry = entry; + neighbor_extent_entry = extent_entry; + neighbor_free_entry = free_entry; } } } @@ -335,7 +448,10 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, return rc; } } else { - undock_entry(vsi, neighbor_entry, type); + if (neighbor_extent_entry) + undock_extent_entry(vsi, neighbor_extent_entry, type); + else if (neighbor_free_entry) + undock_free_entry(vsi, neighbor_free_entry, type); } /* Adjust in-tree offset & length */ @@ -344,24 +460,123 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, if (type == VEA_TYPE_AGGREGATE || type == VEA_TYPE_COMPOUND) { neighbor->vfe_age = merged.vfe_age; - rc = dock_entry(vsi, neighbor_entry, type); - if (rc < 0) - return rc; + if (neighbor_extent_entry) { + rc = dock_extent_entry(vsi, neighbor_extent_entry, type); + if (rc < 0) + return rc; + } else if (neighbor_free_entry) { + D_ASSERT(type == VEA_TYPE_AGGREGATE); + D_ASSERT(d_list_empty(&neighbor_free_entry->vfe_link)); + dock_aging_entry(vsi, neighbor_free_entry); + } } return 1; } -/* Free extent to in-memory compound index */ +/* insert bitmap entry to in-memory index */ int -compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, - unsigned int flags) +bitmap_entry_insert(struct vea_space_info *vsi, struct vea_free_bitmap *vfb, + int state, struct vea_bitmap_entry **ret_entry, unsigned int flags) { - struct vea_entry *entry, dummy; + struct vea_bitmap_entry *entry, *dummy; + d_iov_t key, val, val_out; + int rc, ret; + struct umem_attr uma; + int dummy_size = sizeof(*dummy) + (vfb->vfb_bitmap_sz << 3); + + D_ALLOC(dummy, dummy_size); + if (!dummy) + return -DER_NOMEM; + + memset(dummy, 0, sizeof(*dummy)); + dummy->vbe_bitmap = *vfb; + dummy->vbe_agg_btr = DAOS_HDL_INVAL; + if (state == VEA_BITMAP_STATE_NEW) + setbits64(dummy->vbe_bitmap.vfb_bitmaps, 0, 1); + else + memcpy(dummy->vbe_bitmap.vfb_bitmaps, vfb->vfb_bitmaps, vfb->vfb_bitmap_sz << 3); + dummy->vbe_published_state = state; + + /* Add to in-memory bitmap tree */ + D_ASSERT(daos_handle_is_valid(vsi->vsi_free_btr)); + d_iov_set(&key, &dummy->vbe_bitmap.vfb_blk_off, sizeof(dummy->vbe_bitmap.vfb_blk_off)); + d_iov_set(&val, dummy, dummy_size); + d_iov_set(&val_out, NULL, 0); + + rc = dbtree_upsert(vsi->vsi_bitmap_btr, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, &key, + &val, &val_out); + D_FREE(dummy); + if (rc != 0) { + D_ERROR("Insert bitmap failed. "DF_RC" %llu\n", DP_RC(rc), + (unsigned long long)vfb->vfb_blk_off); + return rc; + } + + memset(&uma, 0, sizeof(uma)); + uma.uma_id = UMEM_CLASS_VMEM; + + D_ASSERT(val_out.iov_buf != NULL); + entry = (struct vea_bitmap_entry *)val_out.iov_buf; + rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, NULL, + &entry->vbe_agg_btr); + if (rc != 0) + goto error; + + D_INIT_LIST_HEAD(&entry->vbe_link); + D_ASSERT(entry->vbe_bitmap.vfb_class == vfb->vfb_class); + + bitmap_free_class_add(vsi, entry, flags); + if (ret_entry) + *ret_entry = entry; + return rc; + +error: + ret = dbtree_delete(vsi->vsi_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (ret) + D_ERROR("Failed to clean bitmap failed. "DF_RC" "DF_U64"\n", + DP_RC(rc), vfb->vfb_blk_off); + return rc; +} + +static int +bitmap_entry_remove(struct vea_space_info *vsi, struct vea_bitmap_entry *bitmap, + unsigned int flags) +{ + d_iov_t key; + int rc; + + rc = dbtree_destroy(bitmap->vbe_agg_btr, NULL); + if (rc) { + D_ERROR("Failed to destroy bitmap agg tree. "DF_RC" "DF_U64"\n", + DP_RC(rc), bitmap->vbe_bitmap.vfb_blk_off); + return rc; + } + bitmap->vbe_agg_btr = DAOS_HDL_INVAL; + + if (!(flags & VEA_FL_NO_ACCOUNTING)) + dec_stats(vsi, STAT_FREE_BITMAP_BLKS, bitmap->vbe_bitmap.vfb_blk_cnt); + d_list_del_init(&bitmap->vbe_link); + dec_stats(vsi, STAT_FRAGS_BITMAP, 1); + + d_iov_set(&key, &bitmap->vbe_bitmap.vfb_blk_off, sizeof(bitmap->vbe_bitmap.vfb_blk_off)); + rc = dbtree_delete(vsi->vsi_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (rc) + D_ERROR("Failed to clean bitmap failed. "DF_RC" "DF_U64"\n", + DP_RC(rc), bitmap->vbe_bitmap.vfb_blk_off); + + return rc; +} + +int +compound_free_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe, + unsigned int flags) +{ + struct vea_extent_entry *entry, dummy; d_iov_t key, val, val_out; int rc; - rc = merge_free_ext(vsi, vfe, VEA_TYPE_COMPOUND, flags); + rc = merge_free_ext(vsi, vfe, VEA_TYPE_COMPOUND, flags, vsi->vsi_free_btr); if (rc < 0) { return rc; } else if (rc > 0) { @@ -370,12 +585,12 @@ compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, } memset(&dummy, 0, sizeof(dummy)); - D_INIT_LIST_HEAD(&dummy.ve_link); - dummy.ve_ext = *vfe; + D_INIT_LIST_HEAD(&dummy.vee_link); + dummy.vee_ext = *vfe; /* Add to in-memory free extent tree */ D_ASSERT(daos_handle_is_valid(vsi->vsi_free_btr)); - d_iov_set(&key, &dummy.ve_ext.vfe_blk_off, sizeof(dummy.ve_ext.vfe_blk_off)); + d_iov_set(&key, &dummy.vee_ext.vfe_blk_off, sizeof(dummy.vee_ext.vfe_blk_off)); d_iov_set(&val, &dummy, sizeof(dummy)); d_iov_set(&val_out, NULL, 0); @@ -387,27 +602,81 @@ compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, } D_ASSERT(val_out.iov_buf != NULL); - entry = (struct vea_entry *)val_out.iov_buf; - D_INIT_LIST_HEAD(&entry->ve_link); + entry = (struct vea_extent_entry *)val_out.iov_buf; + D_INIT_LIST_HEAD(&entry->vee_link); - rc = free_class_add(vsi, entry); + rc = extent_free_class_add(vsi, entry); accounting: if (!rc && !(flags & VEA_FL_NO_ACCOUNTING)) - inc_stats(vsi, STAT_FREE_BLKS, vfe->vfe_blk_cnt); + inc_stats(vsi, STAT_FREE_EXTENT_BLKS, vfe->vfe_blk_cnt); return rc; } -/* Free extent to persistent free tree */ +/* Free entry to in-memory compound index */ int -persistent_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) +compound_free(struct vea_space_info *vsi, struct vea_free_entry *vfe, + unsigned int flags) +{ + int rc; + struct vea_bitmap_entry *found = vfe->vfe_bitmap; + + if (found == NULL) + return compound_free_extent(vsi, &vfe->vfe_ext, flags); + + rc = bitmap_set_range(NULL, &found->vbe_bitmap, + vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, true); + if (rc) + return rc; + + if (!(flags & VEA_FL_NO_ACCOUNTING)) + inc_stats(vsi, STAT_FREE_BITMAP_BLKS, vfe->vfe_ext.vfe_blk_cnt); + + /* if bitmap is not published and clear, then remove it */ + if (found->vbe_published_state == VEA_BITMAP_STATE_NEW) { + if (is_bitmap_empty(found->vbe_bitmap.vfb_bitmaps, + found->vbe_bitmap.vfb_bitmap_sz)) { + struct vea_free_extent ext; + + ext.vfe_blk_cnt = found->vbe_bitmap.vfb_blk_cnt; + ext.vfe_blk_off = found->vbe_bitmap.vfb_blk_off; + rc = bitmap_entry_remove(vsi, found, flags); + if (rc) + return rc; + return compound_free_extent(vsi, &ext, flags); + } + } + + if (is_bitmap_empty(found->vbe_bitmap.vfb_bitmaps, + found->vbe_bitmap.vfb_bitmap_sz)) { + if (d_list_empty(&found->vbe_link)) + d_list_add_tail(&found->vbe_link, + &vsi->vsi_class.vfc_bitmap_empty[found->vbe_bitmap.vfb_class - 1]); + else + d_list_move_tail(&found->vbe_link, + &vsi->vsi_class.vfc_bitmap_empty[found->vbe_bitmap.vfb_class - 1]); + return 0; + } + + if (d_list_empty(&found->vbe_link)) { + D_ASSERT(found->vbe_bitmap.vfb_class <= VEA_MAX_BITMAP_CLASS); + d_list_add_tail(&found->vbe_link, + &vsi->vsi_class.vfc_bitmap_lru[found->vbe_bitmap.vfb_class - 1]); + } + + return 0; +} + +/* Free extent to persistent free tree */ +static int +persistent_free_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe) { struct vea_free_extent dummy; d_iov_t key, val; daos_handle_t btr_hdl = vsi->vsi_md_free_btr; int rc; - rc = merge_free_ext(vsi, vfe, VEA_TYPE_PERSIST, 0); + rc = merge_free_ext(vsi, vfe, VEA_TYPE_PERSIST, 0, vsi->vsi_md_free_btr); if (rc < 0) return rc; else if (rc > 0) @@ -428,41 +697,70 @@ persistent_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) return rc; } +int +persistent_free(struct vea_space_info *vsi, struct vea_free_entry *vfe) +{ + int type; + + D_ASSERT(umem_tx_inprogress(vsi->vsi_umem) || + vsi->vsi_umem->umm_id == UMEM_CLASS_VMEM); + D_ASSERT(vfe->vfe_ext.vfe_blk_off != VEA_HINT_OFF_INVAL); + type = free_type(vsi, vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, + &vfe->vfe_bitmap); + if (type < 0) + return type; + + if (vfe->vfe_bitmap == NULL) + return persistent_free_extent(vsi, &vfe->vfe_ext); + + D_ASSERT(type == VEA_FREE_ENTRY_BITMAP); + + D_ASSERT(vfe->vfe_ext.vfe_blk_cnt > 0 && + vfe->vfe_ext.vfe_blk_cnt < vsi->vsi_class.vfc_large_thresh); + return bitmap_set_range(vsi->vsi_umem, vfe->vfe_bitmap->vbe_md_bitmap, + vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, true); +} + /* Free extent to the aggregate free tree */ int -aggregated_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) +aggregated_free(struct vea_space_info *vsi, struct vea_free_entry *vfe) { - struct vea_entry *entry, dummy; + struct vea_free_entry *entry, dummy; d_iov_t key, val, val_out; daos_handle_t btr_hdl = vsi->vsi_agg_btr; int rc; - vfe->vfe_age = get_current_age(); - rc = merge_free_ext(vsi, vfe, VEA_TYPE_AGGREGATE, 0); + /* free entry bitmap */ + if (vfe->vfe_bitmap == NULL) + btr_hdl = vsi->vsi_agg_btr; + else + btr_hdl = vfe->vfe_bitmap->vbe_agg_btr; + + vfe->vfe_ext.vfe_age = get_current_age(); + rc = merge_free_ext(vsi, &vfe->vfe_ext, VEA_TYPE_AGGREGATE, 0, btr_hdl); if (rc < 0) return rc; else if (rc > 0) - return 0; /* extent merged in tree */ + return 0; /* entry merged in tree */ - memset(&dummy, 0, sizeof(dummy)); - D_INIT_LIST_HEAD(&dummy.ve_link); - dummy.ve_ext = *vfe; + dummy = *vfe; + D_INIT_LIST_HEAD(&dummy.vfe_link); /* Add to in-memory aggregate free extent tree */ D_ASSERT(daos_handle_is_valid(btr_hdl)); - d_iov_set(&key, &dummy.ve_ext.vfe_blk_off, sizeof(dummy.ve_ext.vfe_blk_off)); + d_iov_set(&key, &dummy.vfe_ext.vfe_blk_off, sizeof(dummy.vfe_ext.vfe_blk_off)); d_iov_set(&val, &dummy, sizeof(dummy)); d_iov_set(&val_out, NULL, 0); rc = dbtree_upsert(btr_hdl, BTR_PROBE_BYPASS, DAOS_INTENT_UPDATE, &key, &val, &val_out); if (rc) { - D_ERROR("Insert aging extent failed. "DF_RC"\n", DP_RC(rc)); + D_ERROR("Insert aging entry failed. "DF_RC"\n", DP_RC(rc)); return rc; } D_ASSERT(val_out.iov_buf != NULL); - entry = (struct vea_entry *)val_out.iov_buf; - D_INIT_LIST_HEAD(&entry->ve_link); + entry = (struct vea_free_entry *)val_out.iov_buf; + D_INIT_LIST_HEAD(&entry->vfe_link); dock_aging_entry(vsi, entry); return 0; @@ -474,35 +772,48 @@ aggregated_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) static int flush_internal(struct vea_space_info *vsi, bool force, uint32_t cur_time, d_sg_list_t *unmap_sgl) { - struct vea_entry *entry, *tmp; + struct vea_free_entry *entry, *tmp; struct vea_free_extent vfe; + struct vea_free_entry free_entry; d_iov_t *unmap_iov; int i, rc = 0; + d_iov_t key; + struct vea_bitmap_entry *bitmap; + struct vea_bitmap_entry **flush_bitmaps; + daos_handle_t btr_hdl; D_ASSERT(umem_tx_none(vsi->vsi_umem)); D_ASSERT(unmap_sgl->sg_nr_out == 0); - d_list_for_each_entry_safe(entry, tmp, &vsi->vsi_agg_lru, ve_link) { - d_iov_t key; + D_ALLOC_ARRAY(flush_bitmaps, MAX_FLUSH_FRAGS); + if (!flush_bitmaps) + return -DER_NOMEM; - vfe = entry->ve_ext; + d_list_for_each_entry_safe(entry, tmp, &vsi->vsi_agg_lru, vfe_link) { + vfe = entry->vfe_ext; if (!force && cur_time < (vfe.vfe_age + EXPIRE_INTVL)) break; /* Remove entry from aggregate LRU list */ - d_list_del_init(&entry->ve_link); + d_list_del_init(&entry->vfe_link); dec_stats(vsi, STAT_FRAGS_AGING, 1); + bitmap = entry->vfe_bitmap; + if (bitmap) + btr_hdl = bitmap->vbe_agg_btr; + else + btr_hdl = vsi->vsi_agg_btr; /* Remove entry from aggregate tree, entry will be freed on deletion */ d_iov_set(&key, &vfe.vfe_blk_off, sizeof(vfe.vfe_blk_off)); - D_ASSERT(daos_handle_is_valid(vsi->vsi_agg_btr)); - rc = dbtree_delete(vsi->vsi_agg_btr, BTR_PROBE_EQ, &key, NULL); + D_ASSERT(daos_handle_is_valid(btr_hdl)); + rc = dbtree_delete(btr_hdl, BTR_PROBE_EQ, &key, NULL); if (rc) { D_ERROR("Remove ["DF_U64", %u] from aggregated tree error: "DF_RC"\n", vfe.vfe_blk_off, vfe.vfe_blk_cnt, DP_RC(rc)); break; } + flush_bitmaps[unmap_sgl->sg_nr_out] = bitmap; /* Unmap callback may yield, so we can't call it directly in this tight loop */ unmap_sgl->sg_nr_out++; unmap_iov = &unmap_sgl->sg_iovs[unmap_sgl->sg_nr_out - 1]; @@ -533,15 +844,18 @@ flush_internal(struct vea_space_info *vsi, bool force, uint32_t cur_time, d_sg_l for (i = 0; i < unmap_sgl->sg_nr_out; i++) { unmap_iov = &unmap_sgl->sg_iovs[i]; - vfe.vfe_blk_off = (uint64_t)unmap_iov->iov_buf; - vfe.vfe_blk_cnt = unmap_iov->iov_len; - vfe.vfe_age = cur_time; + free_entry.vfe_ext.vfe_blk_off = (uint64_t)unmap_iov->iov_buf; + free_entry.vfe_ext.vfe_blk_cnt = unmap_iov->iov_len; + free_entry.vfe_ext.vfe_age = cur_time; + free_entry.vfe_bitmap = flush_bitmaps[i]; - rc = compound_free(vsi, &vfe, 0); + rc = compound_free(vsi, &free_entry, 0); if (rc) D_ERROR("Compound free ["DF_U64", %u] error: "DF_RC"\n", - vfe.vfe_blk_off, vfe.vfe_blk_cnt, DP_RC(rc)); + free_entry.vfe_ext.vfe_blk_off, free_entry.vfe_ext.vfe_blk_cnt, + DP_RC(rc)); } + D_FREE(flush_bitmaps); return rc; } @@ -562,6 +876,127 @@ need_aging_flush(struct vea_space_info *vsi, uint32_t cur_time, bool force) return true; } +void +free_commit_cb(void *data, bool noop) +{ + struct free_commit_cb_arg *fca = data; + int rc; + + /* Transaction aborted, only need to free callback arg */ + if (noop) + goto free; + + /* + * Aggregated free will be executed on outermost transaction + * commit. + * + * If it fails, the freed space on persistent free tree won't + * be added in in-memory free tree, hence the space won't be + * visible for allocation until the tree sync up on next server + * restart. Such temporary space leak is tolerable, what we must + * avoid is the contrary case: in-memory tree update succeeds + * but persistent tree update fails, which risks data corruption. + */ + rc = aggregated_free(fca->fca_vsi, &fca->fca_vfe); + + D_CDEBUG(rc, DLOG_ERR, DB_IO, "Aggregated free on vsi:%p rc %d\n", + fca->fca_vsi, rc); +free: + D_FREE(fca); +} + +static int +reclaim_unused_bitmap(struct vea_space_info *vsi, uint32_t nr_reclaim, uint32_t *nr_reclaimed) +{ + int i; + struct vea_bitmap_entry *bitmap_entry, *tmp_entry; + struct vea_free_bitmap *vfb; + d_iov_t key; + int rc = 0; + struct free_commit_cb_arg *fca; + struct umem_instance *umem = vsi->vsi_umem; + int nr = 0; + uint64_t blk_off; + uint32_t blk_cnt; + + for (i = 0; i < VEA_MAX_BITMAP_CLASS; i++) { + d_list_for_each_entry_safe(bitmap_entry, tmp_entry, + &vsi->vsi_class.vfc_bitmap_empty[i], vbe_link) { + vfb = &bitmap_entry->vbe_bitmap; + D_ASSERT(vfb->vfb_class == i + 1); + D_ASSERT(is_bitmap_empty(vfb->vfb_bitmaps, vfb->vfb_bitmap_sz)); + d_list_del_init(&bitmap_entry->vbe_link); + D_ALLOC_PTR(fca); + if (!fca) + return -DER_NOMEM; + + blk_off = vfb->vfb_blk_off; + blk_cnt = vfb->vfb_blk_cnt; + fca->fca_vsi = vsi; + fca->fca_vfe.vfe_ext.vfe_blk_off = blk_off; + fca->fca_vfe.vfe_ext.vfe_blk_cnt = blk_cnt; + fca->fca_vfe.vfe_ext.vfe_age = 0; /* not used */ + + rc = umem_tx_begin(umem, vsi->vsi_txd); + if (rc != 0) { + D_FREE(fca); + return rc; + } + + /* + * Even in-memory bitmap failed to remove from tree, it is ok + * because this bitmap chunk has been removed from allocation LRU list. + */ + d_iov_set(&key, &fca->fca_vfe.vfe_ext.vfe_blk_off, + sizeof(fca->fca_vfe.vfe_ext.vfe_blk_off)); + dbtree_destroy(bitmap_entry->vbe_agg_btr, NULL); + rc = dbtree_delete(fca->fca_vsi->vsi_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (rc) { + D_ERROR("Remove ["DF_U64", %u] from bitmap tree " + "error: "DF_RC"\n", fca->fca_vfe.vfe_ext.vfe_blk_off, + fca->fca_vfe.vfe_ext.vfe_blk_cnt, DP_RC(rc)); + goto abort; + } + dec_stats(fca->fca_vsi, STAT_FRAGS_BITMAP, 1); + dec_stats(fca->fca_vsi, STAT_FREE_BITMAP_BLKS, blk_cnt); + + d_iov_set(&key, &blk_off, sizeof(blk_off)); + rc = dbtree_delete(vsi->vsi_md_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (rc) { + D_ERROR("Remove ["DF_U64", %u] from persistent bitmap " + "tree error: "DF_RC"\n", blk_off, blk_cnt, DP_RC(rc)); + goto abort; + } + /* call persistent_free_extent instead */ + rc = persistent_free(vsi, &fca->fca_vfe); + if (rc) { + D_ERROR("Remove ["DF_U64", %u] from persistent " + "extent tree error: "DF_RC"\n", blk_off, + blk_cnt, DP_RC(rc)); + goto abort; + } + rc = umem_tx_add_callback(umem, vsi->vsi_txd, UMEM_STAGE_ONCOMMIT, + free_commit_cb, fca); + if (rc == 0) + fca = NULL; +abort: + D_FREE(fca); + /* Commit/Abort transaction on success/error */ + rc = rc ? umem_tx_abort(umem, rc) : umem_tx_commit(umem); + if (rc) + return rc; + nr++; + if (nr >= nr_reclaim) + goto out; + } + } + +out: + if (nr_reclaimed) + *nr_reclaimed = nr; + return rc; +} + int trigger_aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, uint32_t *nr_flushed) @@ -597,6 +1032,10 @@ trigger_aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, } d_sgl_fini(&unmap_sgl, false); + + rc = reclaim_unused_bitmap(vsi, MAX_FLUSH_FRAGS, NULL); + if (rc) + goto out; out: if (nr_flushed != NULL) *nr_flushed = tot_flushed; diff --git a/src/vea/vea_hint.c b/src/vea/vea_hint.c index 65c923476b5..83f2a13e1e2 100644 --- a/src/vea/vea_hint.c +++ b/src/vea/vea_hint.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -54,7 +54,7 @@ hint_cancel(struct vea_hint_context *hint, uint64_t off, uint64_t seq_min, */ hint->vhc_off = off; return 0; - } else if (hint->vhc_seq > seq_max) { + } else if (hint->vhc_seq >= seq_max) { /* * Subsequent reserve detected, abort hint cancel. It could * result in un-allocated holes on out of order hint cancels, diff --git a/src/vea/vea_init.c b/src/vea/vea_init.c index d237c46af70..adf8258c2f3 100644 --- a/src/vea/vea_init.c +++ b/src/vea/vea_init.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -25,13 +25,13 @@ destroy_free_class(struct vea_free_class *vfc) static bool heap_node_cmp(struct d_binheap_node *a, struct d_binheap_node *b) { - struct vea_entry *nodea, *nodeb; + struct vea_extent_entry *nodea, *nodeb; - nodea = container_of(a, struct vea_entry, ve_node); - nodeb = container_of(b, struct vea_entry, ve_node); + nodea = container_of(a, struct vea_extent_entry, vee_node); + nodeb = container_of(b, struct vea_extent_entry, vee_node); /* Max heap, the largest free extent is heap root */ - return nodea->ve_ext.vfe_blk_cnt > nodeb->ve_ext.vfe_blk_cnt; + return nodea->vee_ext.vfe_blk_cnt > nodeb->vee_ext.vfe_blk_cnt; } static struct d_binheap_ops heap_ops = { @@ -45,6 +45,7 @@ create_free_class(struct vea_free_class *vfc, struct vea_space_df *md) { struct umem_attr uma; int rc; + int i; vfc->vfc_size_btr = DAOS_HDL_INVAL; rc = d_binheap_create_inplace(DBH_FT_NOLOCK, 0, NULL, &heap_ops, @@ -60,9 +61,17 @@ create_free_class(struct vea_free_class *vfc, struct vea_space_df *md) /* Create in-memory sized free extent tree */ rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_UINT_KEY, VEA_TREE_ODR, &uma, NULL, &vfc->vfc_size_btr); - if (rc != 0) + if (rc != 0) { destroy_free_class(vfc); + goto out; + } + + for (i = 0; i < VEA_MAX_BITMAP_CLASS; i++) { + D_INIT_LIST_HEAD(&vfc->vfc_bitmap_lru[i]); + D_INIT_LIST_HEAD(&vfc->vfc_bitmap_empty[i]); + } +out: return rc; } @@ -74,9 +83,14 @@ unload_space_info(struct vea_space_info *vsi) vsi->vsi_md_free_btr = DAOS_HDL_INVAL; } - if (daos_handle_is_valid(vsi->vsi_md_vec_btr)) { - dbtree_close(vsi->vsi_md_vec_btr); - vsi->vsi_md_vec_btr = DAOS_HDL_INVAL; + if (daos_handle_is_valid(vsi->vsi_md_bitmap_btr)) { + dbtree_close(vsi->vsi_md_bitmap_btr); + vsi->vsi_md_bitmap_btr = DAOS_HDL_INVAL; + } + + if (vsi->vsi_bitmap_hint_context) { + vea_hint_unload(vsi->vsi_bitmap_hint_context); + vsi->vsi_bitmap_hint_context = NULL; } } @@ -96,7 +110,7 @@ load_free_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) if (rc != 0) return rc; - rc = compound_free(vsi, vfe, VEA_FL_NO_MERGE); + rc = compound_free_extent(vsi, vfe, VEA_FL_NO_MERGE); if (rc != 0) return rc; @@ -104,22 +118,28 @@ load_free_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) } static int -load_vec_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) +load_bitmap_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) { - struct vea_ext_vector *vec; + struct vea_free_bitmap *vfb; struct vea_space_info *vsi; + struct vea_bitmap_entry *bitmap_entry; uint64_t *off; int rc; vsi = (struct vea_space_info *)arg; off = (uint64_t *)key->iov_buf; - vec = (struct vea_ext_vector *)val->iov_buf; + if (*off == VEA_BITMAP_CHUNK_HINT_KEY) + return 0; - rc = verify_vec_entry(off, vec); + vfb = (struct vea_free_bitmap *)val->iov_buf; + rc = verify_bitmap_entry(vfb); if (rc != 0) return rc; - return compound_vec_alloc(vsi, vec); + rc = bitmap_entry_insert(vsi, vfb, VEA_BITMAP_STATE_PUBLISHED, &bitmap_entry, 0); + bitmap_entry->vbe_md_bitmap = vfb; + + return rc; } int @@ -127,6 +147,9 @@ load_space_info(struct vea_space_info *vsi) { struct umem_attr uma = {0}; int rc; + struct vea_hint_df *df; + uint64_t offset; + d_iov_t key, val; D_ASSERT(vsi->vsi_umem != NULL); D_ASSERT(vsi->vsi_md != NULL); @@ -141,10 +164,9 @@ load_space_info(struct vea_space_info *vsi) if (rc != 0) goto error; - /* Open SCM extent vector tree */ - D_ASSERT(daos_handle_is_inval(vsi->vsi_md_vec_btr)); - rc = dbtree_open_inplace(&vsi->vsi_md->vsd_vec_tree, &uma, - &vsi->vsi_md_vec_btr); + /* Open SCM bitmap tree */ + rc = dbtree_open_inplace(&vsi->vsi_md->vsd_bitmap_tree, &uma, + &vsi->vsi_md_bitmap_btr); if (rc != 0) goto error; @@ -154,12 +176,28 @@ load_space_info(struct vea_space_info *vsi) if (rc != 0) goto error; - /* Build up in-memory extent vector tree */ - rc = dbtree_iterate(vsi->vsi_md_vec_btr, DAOS_INTENT_DEFAULT, false, - load_vec_entry, (void *)vsi); + /* Build up in-memory bitmap tree */ + rc = dbtree_iterate(vsi->vsi_md_bitmap_btr, DAOS_INTENT_DEFAULT, false, + load_bitmap_entry, (void *)vsi); if (rc != 0) goto error; + if (!is_bitmap_feature_enabled(vsi)) + return 0; + + offset = VEA_BITMAP_CHUNK_HINT_KEY; + d_iov_set(&key, &offset, sizeof(offset)); + d_iov_set(&val, NULL, 0); + rc = dbtree_fetch(vsi->vsi_md_bitmap_btr, BTR_PROBE_EQ, DAOS_INTENT_DEFAULT, + &key, NULL, &val); + if (rc) + goto error; + + df = (struct vea_hint_df *)val.iov_buf; + rc = vea_hint_load(df, &vsi->vsi_bitmap_hint_context); + if (rc) + goto error; + return 0; error: unload_space_info(vsi); diff --git a/src/vea/vea_internal.h b/src/vea/vea_internal.h index 3a5ac97fde0..e0880bde951 100644 --- a/src/vea/vea_internal.h +++ b/src/vea/vea_internal.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -11,12 +11,39 @@ #include #include #include +#include #include #define VEA_MAGIC (0xea201804) #define VEA_BLK_SZ (4 * 1024) /* 4K */ #define VEA_TREE_ODR 20 +/* Common free extent structure for both SCM & in-memory index */ +struct vea_free_extent { + uint64_t vfe_blk_off; /* Block offset of the extent */ + uint32_t vfe_blk_cnt; /* Total blocks of the extent */ + uint32_t vfe_age; /* Monotonic timestamp */ +}; + +/* Min bitmap allocation class */ +#define VEA_MIN_BITMAP_CLASS 1 +/* Max bitmap allocation class */ +#define VEA_MAX_BITMAP_CLASS 64 + +/* Bitmap chunk size */ +#define VEA_BITMAP_MIN_CHUNK_BLKS 256 /* 1MiB */ +#define VEA_BITMAP_MAX_CHUNK_BLKS (VEA_MAX_BITMAP_CLASS * 256) /* 64 MiB */ + + +/* Common free bitmap structure for both SCM & in-memory index */ +struct vea_free_bitmap { + uint64_t vfb_blk_off; /* Block offset of the bitmap */ + uint32_t vfb_blk_cnt; /* Block count of the bitmap */ + uint16_t vfb_class; /* Allocation class of bitmap */ + uint16_t vfb_bitmap_sz; /* Bitmap size*/ + uint64_t vfb_bitmaps[0]; /* Bitmaps of this chunk */ +}; + /* Per I/O stream hint context */ struct vea_hint_context { struct vea_hint_df *vhc_pd; @@ -27,18 +54,55 @@ struct vea_hint_context { }; /* Free extent informat stored in the in-memory compound free extent index */ -struct vea_entry { +struct vea_extent_entry { /* * Always keep it as first item, since vfe_blk_off is the direct key * of DBTREE_CLASS_IV */ - struct vea_free_extent ve_ext; - /* Link to one of vsc_lru or vsi_agg_lru */ - d_list_t ve_link; + struct vea_free_extent vee_ext; + /* Link to one of vsc_extent_lru */ + d_list_t vee_link; /* Back reference to sized tree entry */ - struct vea_sized_class *ve_sized_class; + struct vea_sized_class *vee_sized_class; /* Link to vfc_heap */ - struct d_binheap_node ve_node; + struct d_binheap_node vee_node; +}; + +enum { + VEA_BITMAP_STATE_PUBLISHED, + VEA_BITMAP_STATE_PUBLISHING, + VEA_BITMAP_STATE_NEW, +}; + +/* Bitmap entry */ +struct vea_bitmap_entry { + /* Link to one of vfc_bitmap_lru[] */ + d_list_t vbe_link; + /* Bitmap published state */ + int vbe_published_state; + /* + * Free entries sorted by offset, for coalescing the just recent + * free blocks inside this bitmap chunk. + */ + daos_handle_t vbe_agg_btr; + /* Point to persistent free bitmap entry */ + struct vea_free_bitmap *vbe_md_bitmap; + /* free bitmap, always keep it as last item*/ + struct vea_free_bitmap vbe_bitmap; +}; + +enum { + VEA_FREE_ENTRY_EXTENT, + VEA_FREE_ENTRY_BITMAP, +}; + +/* freed entry stored in aggregation tree */ +struct vea_free_entry { + struct vea_free_extent vfe_ext; + /* Back pointer bitmap entry */ + struct vea_bitmap_entry *vfe_bitmap; + /* Link to one vsi_agg_lru */ + d_list_t vfe_link; }; #define VEA_LARGE_EXT_MB 64 /* Large extent threshold in MB */ @@ -47,9 +111,10 @@ struct vea_entry { /* Value entry of sized free extent tree (vfc_size_btr) */ struct vea_sized_class { /* Small extents LRU list */ - d_list_t vsc_lru; + d_list_t vsc_extent_lru; }; +#define VEA_BITMAP_CHUNK_HINT_KEY (~(0ULL)) /* * Large free extents (>VEA_LARGE_EXT_MB) are tracked in max a heap, small * free extents (<= VEA_LARGE_EXT_MB) are tracked in a size tree. @@ -61,6 +126,10 @@ struct vea_free_class { daos_handle_t vfc_size_btr; /* Size threshold for large extent */ uint32_t vfc_large_thresh; + /* Bitmap LRU list for different bitmap allocation class*/ + d_list_t vfc_bitmap_lru[VEA_MAX_BITMAP_CLASS]; + /* Empty bitmap list for different allocation class */ + d_list_t vfc_bitmap_empty[VEA_MAX_BITMAP_CLASS]; }; enum { @@ -68,21 +137,27 @@ enum { STAT_RESRV_HINT = 0, /* Number of large reserve */ STAT_RESRV_LARGE = 1, - /* Number of small reserve */ + /* Number of small extents reserve */ STAT_RESRV_SMALL = 2, + /* Number of bitmap reserve */ + STAT_RESRV_BITMAP = 3, /* Max reserve type */ - STAT_RESRV_TYPE_MAX = 3, + STAT_RESRV_TYPE_MAX = 4, /* Number of large(> VEA_LARGE_EXT_MB) free frags available for allocation */ - STAT_FRAGS_LARGE = 3, - /* Number of small free frags available for allocation */ - STAT_FRAGS_SMALL = 4, + STAT_FRAGS_LARGE = 4, + /* Number of small free extent frags available for allocation */ + STAT_FRAGS_SMALL = 5, /* Number of frags in aging buffer (to be unmapped) */ - STAT_FRAGS_AGING = 5, + STAT_FRAGS_AGING = 6, + /* Number of bitmaps */ + STAT_FRAGS_BITMAP = 7, /* Max frag type */ - STAT_FRAGS_TYPE_MAX = 3, - /* Number of blocks available for allocation */ - STAT_FREE_BLKS = 6, - STAT_MAX = 7, + STAT_FRAGS_TYPE_MAX = 4, + /* Number of extent blocks available for allocation */ + STAT_FREE_EXTENT_BLKS = 8, + /* Number of bitmap blocks available for allocation */ + STAT_FREE_BITMAP_BLKS = 9, + STAT_MAX = 10, }; struct vea_metrics { @@ -91,6 +166,8 @@ struct vea_metrics { struct d_tm_node_t *vm_free_blks; }; +#define MAX_FLUSH_FRAGS 256 + /* In-memory compound index */ struct vea_space_info { /* Instance for the pmemobj pool on SCM */ @@ -106,18 +183,20 @@ struct vea_space_info { struct vea_space_df *vsi_md; /* Open handles for the persistent free extent tree */ daos_handle_t vsi_md_free_btr; - /* Open handles for the persistent extent vector tree */ - daos_handle_t vsi_md_vec_btr; + /* Open handles for the persistent bitmap tree */ + daos_handle_t vsi_md_bitmap_btr; /* Free extent tree sorted by offset, for all free extents. */ daos_handle_t vsi_free_btr; - /* Extent vector tree, for non-contiguous allocation */ - daos_handle_t vsi_vec_btr; + /* Bitmap tree, for small allocation */ + daos_handle_t vsi_bitmap_btr; + /* Hint context for bitmap chunk allocation */ + struct vea_hint_context *vsi_bitmap_hint_context; /* Index for searching free extent by size & age */ struct vea_free_class vsi_class; - /* LRU to aggergate just recent freed extents */ + /* LRU to aggergate just recent freed extents or bitmap blocks */ d_list_t vsi_agg_lru; /* - * Free extent tree sorted by offset, for coalescing the just recent + * Free entries sorted by offset, for coalescing the just recent * free extents. */ daos_handle_t vsi_agg_btr; @@ -132,6 +211,11 @@ struct vea_space_info { bool vsi_flush_scheduled; }; +struct free_commit_cb_arg { + struct vea_space_info *fca_vsi; + struct vea_free_entry fca_vfe; +}; + static inline uint32_t get_current_age(void) { @@ -146,6 +230,46 @@ enum vea_free_flags { VEA_FL_NO_ACCOUNTING = (1 << 1), }; +static inline bool +is_bitmap_feature_enabled(struct vea_space_info *vsi) +{ + return vsi->vsi_md->vsd_compat & VEA_COMPAT_FEATURE_BITMAP; +} + +static inline int +alloc_free_bitmap_size(uint16_t bitmap_sz) +{ + return sizeof(struct vea_free_bitmap) + (bitmap_sz << 3); +} + +static inline uint32_t +bitmap_free_blocks(struct vea_free_bitmap *vfb) +{ + uint32_t free_blocks; + int diff; + + int free_bits = daos_count_free_bits(vfb->vfb_bitmaps, vfb->vfb_bitmap_sz); + + free_blocks = free_bits * vfb->vfb_class; + diff = vfb->vfb_bitmap_sz * 64 * vfb->vfb_class - vfb->vfb_blk_cnt; + + D_ASSERT(diff == 0); + + return free_blocks; +} + +static inline bool +is_bitmap_empty(uint64_t *bitmap, int bitmap_sz) +{ + int i; + + for (i = 0; i < bitmap_sz; i++) + if (bitmap[i]) + return false; + + return true; +} + /* vea_init.c */ void destroy_free_class(struct vea_free_class *vfc); int create_free_class(struct vea_free_class *vfc, struct vea_space_df *md); @@ -154,36 +278,45 @@ int load_space_info(struct vea_space_info *vsi); /* vea_util.c */ int verify_free_entry(uint64_t *off, struct vea_free_extent *vfe); -int verify_vec_entry(uint64_t *off, struct vea_ext_vector *vec); +int verify_bitmap_entry(struct vea_free_bitmap *vfb); int ext_adjacent(struct vea_free_extent *cur, struct vea_free_extent *next); int verify_resrvd_ext(struct vea_resrvd_ext *resrvd); int vea_dump(struct vea_space_info *vsi, bool transient); int vea_verify_alloc(struct vea_space_info *vsi, bool transient, - uint64_t off, uint32_t cnt); + uint64_t off, uint32_t cnt, bool is_bitmap); void dec_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr); void inc_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr); /* vea_alloc.c */ -int compound_vec_alloc(struct vea_space_info *vsi, struct vea_ext_vector *vec); int reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd); int reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd); -int reserve_vector(struct vea_space_info *vsi, uint32_t blk_cnt, - struct vea_resrvd_ext *resrvd); -int persistent_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe); +int persistent_alloc(struct vea_space_info *vsi, struct vea_free_entry *vfe); +int +bitmap_tx_add_ptr(struct umem_instance *vsi_umem, uint64_t *bitmap, + uint32_t bit_at, uint32_t bits_nr); +int +bitmap_set_range(struct umem_instance *vsi_umem, struct vea_free_bitmap *bitmap, + uint64_t blk_off, uint32_t blk_cnt, bool clear); /* vea_free.c */ -#define MAX_FLUSH_FRAGS 256 -void free_class_remove(struct vea_space_info *vsi, struct vea_entry *entry); -int free_class_add(struct vea_space_info *vsi, struct vea_entry *entry); -int compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, - unsigned int flags); -int persistent_free(struct vea_space_info *vsi, struct vea_free_extent *vfe); -int aggregated_free(struct vea_space_info *vsi, struct vea_free_extent *vfe); +void extent_free_class_remove(struct vea_space_info *vsi, struct vea_extent_entry *entry); +int extent_free_class_add(struct vea_space_info *vsi, struct vea_extent_entry *entry); +int compound_free_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe, + unsigned int flags); +int compound_free(struct vea_space_info *vsi, struct vea_free_entry *vfe, unsigned int flags); +int persistent_free(struct vea_space_info *vsi, struct vea_free_entry *vfe); +int aggregated_free(struct vea_space_info *vsi, struct vea_free_entry *vfe); int trigger_aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, uint32_t *nr_flushed); int schedule_aging_flush(struct vea_space_info *vsi); +int bitmap_entry_insert(struct vea_space_info *vsi, struct vea_free_bitmap *vfb, + int state, struct vea_bitmap_entry **ret_entry, unsigned int flags); +int free_type(struct vea_space_info *vsi, uint64_t blk_off, uint32_t blk_cnt, + struct vea_bitmap_entry **bitmap_entry); +void +free_commit_cb(void *data, bool noop); /* vea_hint.c */ void hint_get(struct vea_hint_context *hint, uint64_t *off); diff --git a/src/vea/vea_util.c b/src/vea/vea_util.c index c7452cc2ebf..21c11e3daa4 100644 --- a/src/vea/vea_util.c +++ b/src/vea/vea_util.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -35,35 +35,38 @@ verify_free_entry(uint64_t *off, struct vea_free_extent *vfe) } int -verify_vec_entry(uint64_t *off, struct vea_ext_vector *vec) +verify_bitmap_entry(struct vea_free_bitmap *vfb) { - int i; - uint64_t prev_off = 0; + D_ASSERT(vfb != NULL); + if (vfb->vfb_blk_off == VEA_HINT_OFF_INVAL) { + D_CRIT("corrupted bitmap entry, off == VEA_HINT_OFF_INVAL(%d)\n", + VEA_HINT_OFF_INVAL); + return -DER_INVAL; + } - D_ASSERT(vec != NULL); - if (vec->vev_size == 0 || vec->vev_size > VEA_EXT_VECTOR_MAX) { - D_CRIT("corrupted vector entry, sz: %u\n", vec->vev_size); + if (vfb->vfb_class < VEA_MIN_BITMAP_CLASS || vfb->vfb_class > VEA_MAX_BITMAP_CLASS) { + D_CRIT("corrupted bitmap entry, class: %u is out of [%u, %u]\n", + vfb->vfb_class, VEA_MIN_BITMAP_CLASS, VEA_MAX_BITMAP_CLASS); return -DER_INVAL; } - if (off != NULL && *off != vec->vev_blk_off[0]) { - D_CRIT("corrupted vector entry, off: "DF_U64" != "DF_U64"\n", - *off, vec->vev_blk_off[0]); + if (vfb->vfb_blk_cnt < VEA_BITMAP_MIN_CHUNK_BLKS || + vfb->vfb_blk_cnt > VEA_BITMAP_MAX_CHUNK_BLKS) { + D_CRIT("corrupted bitmap entry, chunk size: %u is out of [%u, %u]\n", + vfb->vfb_blk_cnt, VEA_BITMAP_MIN_CHUNK_BLKS, VEA_BITMAP_MAX_CHUNK_BLKS); return -DER_INVAL; } - for (i = 0; i < vec->vev_size; i++) { - if (vec->vev_blk_off[i] <= prev_off) { - D_CRIT("corrupted vector entry[%d]," - " "DF_U64" <= "DF_U64"\n", - i, vec->vev_blk_off[i], prev_off); - return -DER_INVAL; - } - if (vec->vev_blk_cnt[i] == 0) { - D_CRIT("corrupted vector entry[%d], %u\n", - i, vec->vev_blk_cnt[i]); - return -DER_INVAL; - } + if (vfb->vfb_blk_cnt % VEA_BITMAP_MIN_CHUNK_BLKS) { + D_CRIT("coruppted bitmap entry, chunk size: %u should be times of %u\n", + vfb->vfb_blk_cnt, VEA_BITMAP_MIN_CHUNK_BLKS); + return -DER_INVAL; + } + + if (vfb->vfb_bitmap_sz * 64 * vfb->vfb_class < vfb->vfb_blk_cnt) { + D_CRIT("corrupted bitmap entry, bitmap size: %u could not cover chunk size: %u\n", + vfb->vfb_bitmap_sz, vfb->vfb_blk_cnt); + return -DER_INVAL; } return 0; @@ -102,28 +105,25 @@ verify_resrvd_ext(struct vea_resrvd_ext *resrvd) } else if (resrvd->vre_blk_cnt == 0) { D_CRIT("invalid blk_cnt %u\n", resrvd->vre_blk_cnt); return -DER_INVAL; - } else if (resrvd->vre_vector != NULL) { - /* Vector allocation isn't supported yet. */ - D_CRIT("vector isn't NULL?\n"); - return -DER_NOSYS; } return 0; } -int -vea_dump(struct vea_space_info *vsi, bool transient) +static int +vea_dump_bitmap(struct vea_space_info *vsi, bool transient) { - struct vea_free_extent *ext; - daos_handle_t ih, btr_hdl; - d_iov_t key, val; - uint64_t *off; - int rc, print_cnt = 0, opc = BTR_PROBE_FIRST; + struct vea_free_bitmap *bitmap; + struct vea_bitmap_entry *entry; + daos_handle_t ih, btr_hdl; + d_iov_t key, val; + uint64_t *off; + int rc, print_cnt = 0, opc = BTR_PROBE_FIRST; if (transient) - btr_hdl = vsi->vsi_free_btr; + btr_hdl = vsi->vsi_bitmap_btr; else - btr_hdl = vsi->vsi_md_free_btr; + btr_hdl = vsi->vsi_md_bitmap_btr; D_ASSERT(daos_handle_is_valid(btr_hdl)); rc = dbtree_iter_prepare(btr_hdl, BTR_ITER_EMBEDDED, &ih); @@ -132,6 +132,7 @@ vea_dump(struct vea_space_info *vsi, bool transient) rc = dbtree_iter_probe(ih, opc, DAOS_INTENT_DEFAULT, NULL, NULL); + D_PRINT("Bitmaps:"); while (rc == 0) { d_iov_set(&key, NULL, 0); d_iov_set(&val, NULL, 0); @@ -140,15 +141,76 @@ vea_dump(struct vea_space_info *vsi, bool transient) break; off = (uint64_t *)key.iov_buf; + if (*off == VEA_BITMAP_CHUNK_HINT_KEY) + goto next; + if (transient) { - struct vea_entry *entry; + entry = (struct vea_bitmap_entry *)val.iov_buf; + bitmap = &entry->vbe_bitmap; + } else { + bitmap = (struct vea_free_bitmap *)val.iov_buf; + + } + rc = verify_bitmap_entry(bitmap); + if (rc != 0) { + D_ERROR("dump failed???\n"); + break; + } + + D_PRINT("["DF_U64", %u]", bitmap->vfb_blk_off, bitmap->vfb_blk_cnt); + print_cnt++; + if (print_cnt % 10 == 0) + D_PRINT("\n"); + else + D_PRINT(" "); +next: + rc = dbtree_iter_next(ih); + } + + D_PRINT("\n"); + dbtree_iter_finish(ih); + + return rc = -DER_NONEXIST ? 0 : rc; + - entry = (struct vea_entry *)val.iov_buf; - ext = &entry->ve_ext; +} + +static int +vea_dump_extent(struct vea_space_info *vsi, bool transient) +{ + struct vea_free_extent *ext; + struct vea_extent_entry *entry; + daos_handle_t ih, btr_hdl; + d_iov_t key, val; + uint64_t *off; + int rc, print_cnt = 0, opc = BTR_PROBE_FIRST; + + if (transient) + btr_hdl = vsi->vsi_free_btr; + else + btr_hdl = vsi->vsi_md_free_btr; + D_ASSERT(daos_handle_is_valid(btr_hdl)); + rc = dbtree_iter_prepare(btr_hdl, BTR_ITER_EMBEDDED, &ih); + if (rc) + return rc; + + rc = dbtree_iter_probe(ih, opc, DAOS_INTENT_DEFAULT, NULL, NULL); + + D_PRINT("Free extents:"); + while (rc == 0) { + d_iov_set(&key, NULL, 0); + d_iov_set(&val, NULL, 0); + rc = dbtree_iter_fetch(ih, &key, &val, NULL); + if (rc != 0) + break; + + off = (uint64_t *)key.iov_buf; + if (transient) { + entry = (struct vea_extent_entry *)val.iov_buf; + ext = &entry->vee_ext; } else { ext = (struct vea_free_extent *)val.iov_buf; } - rc = verify_free_entry(off, ext); if (rc != 0) break; @@ -169,6 +231,18 @@ vea_dump(struct vea_space_info *vsi, bool transient) return rc = -DER_NONEXIST ? 0 : rc; } +int +vea_dump(struct vea_space_info *vsi, bool transient) +{ + int rc; + + rc = vea_dump_bitmap(vsi, transient); + if (rc) + return rc; + + return vea_dump_extent(vsi, transient); +} + /** * Check if two extents are overlapping. * returns 0 - Non-overlapping @@ -189,27 +263,70 @@ ext_overlapping(struct vea_free_extent *ext1, struct vea_free_extent *ext2) return -DER_INVAL; } -/** - * Verify if an extent is allocated in persistent or transient metadata. - * - * \param vsi [IN] In-memory compound index - * \param transient [IN] Persistent or transient - * \param off [IN] Block offset of extent - * \param cnt [IN] Block count of extent - * - * \return 0 - Allocated - * 1 - Not allocated - * Negative value on error - */ -int -vea_verify_alloc(struct vea_space_info *vsi, bool transient, uint64_t off, - uint32_t cnt) +static int +verify_alloc_bitmap(struct vea_space_info *vsi, bool transient, uint64_t off, + uint32_t cnt) { - struct vea_free_extent vfe, *ext; daos_handle_t btr_hdl; d_iov_t key, key_out, val; - uint64_t *key_off; int rc, opc = BTR_PROBE_LE; + struct vea_free_bitmap *vfb; + + if (transient) + btr_hdl = vsi->vsi_bitmap_btr; + else + btr_hdl = vsi->vsi_md_bitmap_btr; + + D_ASSERT(daos_handle_is_valid(btr_hdl)); + d_iov_set(&key, &off, sizeof(off)); + + d_iov_set(&key_out, NULL, 0); + d_iov_set(&val, NULL, 0); + rc = dbtree_fetch(btr_hdl, opc, DAOS_INTENT_DEFAULT, &key, &key_out, + &val); + /* bitmap not allocated */ + if (rc == -DER_NONEXIST) + return 1; + + if (rc) + return rc; + + if (transient) { + struct vea_bitmap_entry *entry; + + entry = (struct vea_bitmap_entry *)val.iov_buf; + vfb = &entry->vbe_bitmap; + } else { + vfb = (struct vea_free_bitmap *)val.iov_buf; + } + + rc = verify_bitmap_entry(vfb); + if (rc != 0) { + D_ERROR("verify bitmap alloc failed\n"); + return rc; + } + + /* not in the bitmap range */ + if (off + cnt <= vfb->vfb_blk_off || off >= vfb->vfb_blk_off + vfb->vfb_blk_cnt) + return 1; + + if (isset_range((uint8_t *)vfb->vfb_bitmaps, + (off - vfb->vfb_blk_off) / vfb->vfb_class, + (off - vfb->vfb_blk_off + cnt - 1) / vfb->vfb_class)) + return 0; + + return 1; +} + + +static int +verify_alloc_extent(struct vea_space_info *vsi, bool transient, uint64_t off, uint32_t cnt) +{ + struct vea_free_extent vfe, *ext; + daos_handle_t btr_hdl; + d_iov_t key, key_out, val; + uint64_t *key_off; + int rc, opc = BTR_PROBE_LE; /* Sanity check on input parameters */ vfe.vfe_blk_off = off; @@ -243,10 +360,10 @@ vea_verify_alloc(struct vea_space_info *vsi, bool transient, uint64_t off, key_off = (uint64_t *)key_out.iov_buf; if (transient) { - struct vea_entry *entry; + struct vea_extent_entry *entry; - entry = (struct vea_entry *)val.iov_buf; - ext = &entry->ve_ext; + entry = (struct vea_extent_entry *)val.iov_buf; + ext = &entry->vee_ext; } else { ext = (struct vea_free_extent *)val.iov_buf; } @@ -267,6 +384,29 @@ vea_verify_alloc(struct vea_space_info *vsi, bool transient, uint64_t off, return rc; } +/** + * Verify if an extent is allocated in persistent or transient metadata. + * + * \param vsi [IN] In-memory compound index + * \param transient [IN] Persistent or transient + * \param off [IN] Block offset of extent + * \param cnt [IN] Block count of extent + * \param is_bitmap [IN] Bitmap or extent + * + * \return 0 - Allocated + * 1 - Not allocated + * Negative value on error + */ +int +vea_verify_alloc(struct vea_space_info *vsi, bool transient, + uint64_t off, uint32_t cnt, bool is_bitmap) +{ + if (!is_bitmap) + return verify_alloc_extent(vsi, transient, off, cnt); + + return verify_alloc_bitmap(vsi, transient, off, cnt); +} + void vea_metrics_free(void *data) { @@ -283,6 +423,8 @@ rsrv_type2str(int rsrv_type) return "large"; case STAT_RESRV_SMALL: return "small"; + case STAT_RESRV_BITMAP: + return "bitmap"; default: return "unknown"; } @@ -298,6 +440,8 @@ frags_type2str(int frags_type) return "small"; case STAT_FRAGS_AGING: return "aging"; + case STAT_FRAGS_BITMAP: + return "bitmap"; default: return "unknown"; } @@ -366,6 +510,7 @@ update_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr, bool de case STAT_RESRV_HINT: case STAT_RESRV_LARGE: case STAT_RESRV_SMALL: + case STAT_RESRV_BITMAP: D_ASSERT(!dec && nr == 1); vsi->vsi_stat[type] += nr; if (metrics && metrics->vm_rsrv[type]) @@ -373,6 +518,7 @@ update_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr, bool de break; case STAT_FRAGS_LARGE: case STAT_FRAGS_SMALL: + case STAT_FRAGS_BITMAP: case STAT_FRAGS_AGING: D_ASSERT(nr == 1 && type >= STAT_FRAGS_LARGE); if (dec) { @@ -385,7 +531,8 @@ update_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr, bool de if (metrics && metrics->vm_frags[frag_idx]) d_tm_set_gauge(metrics->vm_frags[frag_idx], vsi->vsi_stat[type]); break; - case STAT_FREE_BLKS: + case STAT_FREE_EXTENT_BLKS: + case STAT_FREE_BITMAP_BLKS: if (dec) { D_ASSERTF(vsi->vsi_stat[type] >= nr, "free:"DF_U64" < rsrvd:"DF_U64"\n", vsi->vsi_stat[type], nr); diff --git a/src/vos/tests/vts_aggregate.c b/src/vos/tests/vts_aggregate.c index 2b2b92082af..67ff1539e83 100644 --- a/src/vos/tests/vts_aggregate.c +++ b/src/vos/tests/vts_aggregate.c @@ -1840,13 +1840,14 @@ print_space_info(vos_pool_info_t *pi, char *desc) VERBOSE_MSG(" NVMe allocator statistics:\n"); VERBOSE_MSG(" free_p: "DF_U64", \tfree_t: "DF_U64", " "\tfrags_large: "DF_U64", \tfrags_small: "DF_U64", " - "\tfrags_aging: "DF_U64"\n", + "\tfrags_aging: "DF_U64" \tfrags_bitmap: "DF_U64"\n", stat->vs_free_persistent, stat->vs_free_transient, stat->vs_frags_large, stat->vs_frags_small, - stat->vs_frags_aging); + stat->vs_frags_aging, stat->vs_frags_bitmap); VERBOSE_MSG(" resrv_hit: "DF_U64", \tresrv_large: "DF_U64", " - "\tresrv_small: "DF_U64"\n", stat->vs_resrv_hint, - stat->vs_resrv_large, stat->vs_resrv_small); + "\tresrv_small: "DF_U64", \tresrv_bitmap: "DF_U64"\n", + stat->vs_resrv_hint, stat->vs_resrv_large, + stat->vs_resrv_small, stat->vs_resrv_bitmap); } static int diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index b2b82d1494b..171235b7ceb 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -1424,6 +1424,11 @@ vos_pool_upgrade(daos_handle_t poh, uint32_t version) "Invalid pool upgrade version %d, current version is %d\n", version, pool_df->pd_version); + rc = vea_upgrade(pool->vp_vea_info, &pool->vp_umm, &pool_df->pd_vea_df, + pool_df->pd_version); + if (rc) + return rc; + rc = umem_tx_begin(&pool->vp_umm, NULL); if (rc != 0) return rc; From 6afb094004556ea900af60f7f469fd321d59f607 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Tue, 19 Sep 2023 23:02:09 -0500 Subject: [PATCH 50/80] DAOS-14409 build: use libfabric pkgconfig files (#13066) Let mercury use libfabric pkg-config instead of hard-coded lib path Signed-off-by: Jerome Soumagne --- site_scons/components/__init__.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/site_scons/components/__init__.py b/site_scons/components/__init__.py index 94154c6d2f9..3044a6b58f6 100644 --- a/site_scons/components/__init__.py +++ b/site_scons/components/__init__.py @@ -134,6 +134,7 @@ def define_mercury(reqs): libs=['fabric'], config_cb=ofi_config, headers=['rdma/fabric.h'], + pkgconfig='libfabric', package='libfabric-devel' if inst(reqs, 'ofi') else None, patch_rpath=['lib'], build_env={'CFLAGS': "-fstack-usage"}) @@ -186,12 +187,6 @@ def define_mercury(reqs): else: mercury_build.append('-DMERCURY_ENABLE_DEBUG:BOOL=OFF') - mercury_build.extend(check(reqs, - 'ofi', - ['-DOFI_INCLUDE_DIR:PATH=$OFI_PREFIX/include', - '-DOFI_LIBRARY:FILEPATH=$OFI_PREFIX/lib/libfabric.so'], - [])) - reqs.define('mercury', retriever=GitRepoRetriever('https://github.com/mercury-hpc/mercury.git', True), commands=[mercury_build, From f7fe80fafca98bd6d47d38b98e1a4ba160c4067e Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 20 Sep 2023 07:20:24 -0500 Subject: [PATCH 51/80] DAOS-623 build: fix build when lustre APIs are installed (#13069) Signed-off-by: Mohamad Chaarawi --- src/client/dfs/duns.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/client/dfs/duns.c b/src/client/dfs/duns.c index cad9790507c..6c0e089bd58 100644 --- a/src/client/dfs/duns.c +++ b/src/client/dfs/duns.c @@ -859,7 +859,7 @@ duns_link_lustre_path(const char *pool, const char *cont, daos_cont_layout_t typ { char str[DUNS_MAX_XATTR_LEN + 1]; int len; - int rc, rc2; + int rc; /* XXX if liblustreapi is not binded, do it now ! */ if (liblustre_binded == false && liblustre_notfound == false) { @@ -1247,7 +1247,9 @@ duns_link_cont(daos_handle_t poh, const char *cont, const char *path) #ifdef LUSTRE_INCLUDE struct statfs fs; char *dir, *dirp; + size_t path_len; + path_len = strnlen(path, PATH_MAX); D_STRNDUP(dir, path, path_len); if (dir == NULL) D_GOTO(out_cont, rc = ENOMEM); From 478eb165c03dbad7a3bb117fd52d972db4ed849a Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 21 Sep 2023 16:26:06 -0400 Subject: [PATCH 52/80] DAOS-14203 test - Adding debug info. (#12960) Display memory info before and after server format. Signed-off-by: Phil Henderson --- src/tests/ftest/util/server_utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 514375ccce9..87716bb0465 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -474,6 +474,14 @@ def support_collect_log(self, **kwargs): return run_remote( self.log, self._hosts, cmd.with_exports, timeout=self.collect_log_timeout.value) + def display_memory_info(self): + """Display server hosts memory info.""" + self.log.debug("#" * 80) + self.log.debug(" Collection debug memory info") + run_remote(self.log, self._hosts, "free -m") + run_remote(self.log, self._hosts, "ps -eo size,pid,user,command --sort -size | head -n 6") + self.log.debug("#" * 80) + def detect_format_ready(self, reformat=False): """Detect when all the daos_servers are ready for storage format. @@ -666,11 +674,14 @@ def start(self): self.prepare() # Start the servers and wait for them to be ready for storage format + self.display_memory_info() self.detect_format_ready() # Collect storage and network information from the servers. + self.display_memory_info() self.information.collect_storage_information() self.information.collect_network_information() + self.display_memory_info() # Format storage and wait for server to change ownership self.log.info(" Formatting hosts: <%s>", self.dmg.hostlist) From d016a2bdb583bb1754006410563ff328c822c731 Mon Sep 17 00:00:00 2001 From: wangdi Date: Thu, 21 Sep 2023 20:55:37 -0700 Subject: [PATCH 53/80] DAOS-13503 gurt: add memory metrics (#11956) - Add memory metrics for DTX and VOS memory usage. - Update error message for PARTIAL update. - Add total memory usage track. - Only enable it by D_MEMORY_TRACK=1 - Add memory allocation(-m) information to telemetry. Signed-off-by: Di Wang --- src/cart/README.env | 5 ++ src/dtx/dtx_common.c | 21 +++++- src/dtx/dtx_internal.h | 2 + src/dtx/dtx_srv.c | 17 ++++- src/engine/init.c | 1 - src/engine/srv.c | 50 +++++++++++++ src/engine/srv_internal.h | 10 +++ src/engine/srv_metrics.c | 2 +- src/gurt/misc.c | 90 ++++++++++++++++++++-- src/gurt/telemetry.c | 99 +++++++++++++++++++++++++ src/include/daos_errno.h | 4 +- src/include/daos_srv/dtx_srv.h | 7 +- src/include/gurt/common.h | 4 + src/include/gurt/telemetry_common.h | 14 +++- src/include/gurt/telemetry_producer.h | 3 +- src/tests/ftest/util/telemetry_utils.py | 10 +++ src/utils/daos_metrics/daos_metrics.c | 10 ++- src/vos/lru_array.c | 32 +++++++- src/vos/lru_array.h | 6 +- src/vos/tests/vts_io.c | 4 +- src/vos/tests/vts_ts.c | 4 +- src/vos/vos_common.c | 60 +++++++++++++-- src/vos/vos_container.c | 9 ++- src/vos/vos_dtx.c | 11 ++- src/vos/vos_internal.h | 2 + src/vos/vos_obj_cache.c | 9 ++- src/vos/vos_tls.h | 3 + src/vos/vos_ts.c | 34 ++++++++- src/vos/vos_ts.h | 8 +- 29 files changed, 483 insertions(+), 48 deletions(-) diff --git a/src/cart/README.env b/src/cart/README.env index 8a8cca74f85..edfbb39c561 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -80,6 +80,11 @@ This file lists the environment variables used in CaRT. by default, and can be specified using DD_SUBSYS, for example: "DD_SUBSYS=RPC,BULK,CORPC,GRP,LM,HG,PMIX,ST,IV" or also "DD_SUBSYS=all". + . D_MEMORY_TRACK + User can enable memory track for daos engine by D_MEMORY_TRACK=1. With the + environment, all of allocations inside DAOS (by D_ALLOC) will be tracked, and + total allocated bytes per xstream can be shown through metrics. + . CRT_TIMEOUT Set it as integer in the range of (0, 3600] to set the global timeout value of all RPCs (second). Without setting it or set it as any other value will diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c index a23bc3094c7..9f59060f87e 100644 --- a/src/dtx/dtx_common.c +++ b/src/dtx/dtx_common.c @@ -1114,6 +1114,7 @@ dtx_leader_begin(daos_handle_t coh, struct dtx_id *dti, struct dtx_memberships *mbs, struct dtx_leader_handle **p_dlh) { struct dtx_leader_handle *dlh; + struct dtx_tls *tls = dtx_tls_get(); struct dtx_handle *dth; int rc; int i; @@ -1151,10 +1152,12 @@ dtx_leader_begin(daos_handle_t coh, struct dtx_id *dti, DP_DTI(dti), sub_modification_cnt, dth->dth_ver, DP_UOID(*leader_oid), dti_cos_cnt, tgt_cnt, flags, DP_RC(rc)); - if (rc != 0) + if (rc != 0) { D_FREE(dlh); - else + } else { *p_dlh = dlh; + d_tm_inc_gauge(tls->dt_dtx_leader_total, 1); + } return rc; } @@ -1178,6 +1181,17 @@ dtx_leader_wait(struct dtx_leader_handle *dlh) return dlh->dlh_result; }; +void +dtx_entry_put(struct dtx_entry *dte) +{ + if (--(dte->dte_refs) == 0) { + struct dtx_tls *tls = dtx_tls_get(); + + d_tm_dec_gauge(tls->dt_dtx_entry_total, 1); + D_FREE(dte); + } +} + /** * Stop the leader thandle. * @@ -1192,6 +1206,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul { struct ds_cont_child *cont = coh->sch_cont; struct dtx_handle *dth = &dlh->dlh_handle; + struct dtx_tls *tls = dtx_tls_get(); struct dtx_entry *dte; struct dtx_memberships *mbs; size_t size; @@ -1308,6 +1323,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul dte->dte_ver = dth->dth_ver; dte->dte_refs = 1; dte->dte_mbs = mbs; + d_tm_inc_gauge(tls->dt_dtx_entry_total, 1); /* Use the new created @dte instead of dth->dth_dte that will be * released after dtx_leader_end(). @@ -1419,6 +1435,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul D_FREE(dth->dth_oid_array); D_FREE(dlh); + d_tm_dec_gauge(tls->dt_dtx_leader_total, 1); return result; } diff --git a/src/dtx/dtx_internal.h b/src/dtx/dtx_internal.h index 3abaa197b1d..a38c747a61d 100644 --- a/src/dtx/dtx_internal.h +++ b/src/dtx/dtx_internal.h @@ -160,6 +160,8 @@ struct dtx_pool_metrics { */ struct dtx_tls { struct d_tm_node_t *dt_committable; + struct d_tm_node_t *dt_dtx_leader_total; + struct d_tm_node_t *dt_dtx_entry_total; uint64_t dt_agg_gen; uint32_t dt_batched_ult_cnt; }; diff --git a/src/dtx/dtx_srv.c b/src/dtx/dtx_srv.c index 095c3d7fa20..9ea25a9dcd0 100644 --- a/src/dtx/dtx_srv.c +++ b/src/dtx/dtx_srv.c @@ -39,6 +39,22 @@ dtx_tls_init(int tags, int xs_id, int tgt_id) D_WARN("Failed to create DTX committable metric: " DF_RC"\n", DP_RC(rc)); + rc = d_tm_add_metric(&tls->dt_dtx_leader_total, D_TM_GAUGE, + "total number of leader dtx in cache", "entry", + "mem/dtx/dtx_leader_handle_%u/tgt_%u", + sizeof(struct dtx_leader_handle), tgt_id); + if (rc != DER_SUCCESS) + D_WARN("Failed to create DTX leader metric: " DF_RC"\n", + DP_RC(rc)); + + rc = d_tm_add_metric(&tls->dt_dtx_entry_total, D_TM_GAUGE, + "total number of dtx entry in cache", "entry", + "mem/dtx/dtx_entry_%u/tgt_%u", + sizeof(struct dtx_entry), tgt_id); + if (rc != DER_SUCCESS) + D_WARN("Failed to create DTX entry metric: " DF_RC"\n", + DP_RC(rc)); + return tls; } @@ -105,7 +121,6 @@ dtx_metrics_alloc(const char *path, int tgt_id) D_WARN("Failed to create DTX RPC cnt metric for %s: " DF_RC"\n", dtx_opc_to_str(opc), DP_RC(rc)); } - return metrics; } diff --git a/src/engine/init.c b/src/engine/init.c index 874fbf62ebf..5e90d4ec248 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -678,7 +678,6 @@ server_init(int argc, char *argv[]) DP_RC(rc)); metrics = &dss_engine_metrics; - /** Report timestamp when engine was started */ d_tm_record_timestamp(metrics->started_time); diff --git a/src/engine/srv.c b/src/engine/srv.c index 1be1aa2d9f1..d7f1acdbe80 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "drpc_internal.h" #include "srv_internal.h" @@ -353,6 +354,7 @@ wait_all_exited(struct dss_xstream *dx, struct dss_module_info *dmi) D_DEBUG(DB_TRACE, "XS(%d) drained ULTs.\n", dx->dx_xs_id); } +#define D_MEMORY_TRACK_ENV "D_MEMORY_TRACK" /* * The server handler ULT first sets CPU affinity, initialize the per-xstream * TLS, CRT(comm) context, NVMe context, creates the long-run ULTs (GC & NVMe @@ -366,12 +368,18 @@ dss_srv_handler(void *arg) struct dss_thread_local_storage *dtc; struct dss_module_info *dmi; int rc; + bool track_mem = false; bool signal_caller = true; rc = dss_xstream_set_affinity(dx); if (rc) goto signal; + d_getenv_bool(D_MEMORY_TRACK_ENV, &track_mem); + if (unlikely(track_mem)) + d_set_alloc_track_cb(dss_mem_total_alloc_track, dss_mem_total_free_track, + &dx->dx_mem_stats); + /* initialize xstream-local storage */ dtc = dss_tls_init(dx->dx_tag, dx->dx_xs_id, dx->dx_tgt_id); if (dtc == NULL) { @@ -643,6 +651,46 @@ dss_xstream_free(struct dss_xstream *dx) D_FREE(dx); } +static void +dss_mem_stats_init(struct mem_stats *stats, int xs_id) +{ + int rc; + + rc = d_tm_add_metric(&stats->ms_total_usage, D_TM_GAUGE, + "Total memory usage", "byte", "mem/total_mem/xs_%u", xs_id); + if (rc) + D_WARN("Failed to create memory telemetry: "DF_RC"\n", DP_RC(rc)); + + rc = d_tm_add_metric(&stats->ms_mallinfo, D_TM_MEMINFO, + "Total memory arena", "", "mem/meminfo/xs_%u", xs_id); + if (rc) + D_WARN("Failed to create memory telemetry: "DF_RC"\n", DP_RC(rc)); + stats->ms_current = 0; +} + +void +dss_mem_total_alloc_track(void *arg, daos_size_t bytes) +{ + struct mem_stats *stats = arg; + + D_ASSERT(arg != NULL); + + d_tm_inc_gauge(stats->ms_total_usage, bytes); + /* Only retrieve mallocinfo every 10 allocation */ + if ((stats->ms_current++ % 10) == 0) + d_tm_record_meminfo(stats->ms_mallinfo); +} + +void +dss_mem_total_free_track(void *arg, daos_size_t bytes) +{ + struct mem_stats *stats = arg; + + D_ASSERT(arg != NULL); + + d_tm_dec_gauge(stats->ms_total_usage, bytes); +} + /** * Start one xstream. * @@ -735,6 +783,8 @@ dss_start_one_xstream(hwloc_cpuset_t cpus, int tag, int xs_id) D_GOTO(out_dx, rc); } + dss_mem_stats_init(&dx->dx_mem_stats, xs_id); + /** start XS, ABT rank 0 is reserved for the primary xstream */ rc = ABT_xstream_create_with_rank(dx->dx_sched, xs_id + 1, &dx->dx_xstream); diff --git a/src/engine/srv_internal.h b/src/engine/srv_internal.h index b3e0ca7ee9e..4fbe5d386d7 100644 --- a/src/engine/srv_internal.h +++ b/src/engine/srv_internal.h @@ -54,6 +54,12 @@ struct sched_info { unsigned int si_stop:1; }; +struct mem_stats { + struct d_tm_node_t *ms_total_usage; /* Total memory usage (bytes) */ + struct d_tm_node_t *ms_mallinfo; /* memory allocate information */ + uint64_t ms_current; +}; + /** Per-xstream configuration data */ struct dss_xstream { char dx_name[DSS_XS_NAME_LEN]; @@ -80,6 +86,7 @@ struct dss_xstream { bool dx_main_xs; /* true for main XS */ bool dx_comm; /* true with cart context */ bool dx_dsc_started; /* DSC progress ULT started */ + struct mem_stats dx_mem_stats; /* memory usages stats on this xstream */ #ifdef ULT_MMAP_STACK /* per-xstream pool/list of free stacks */ struct stack_pool *dx_sp; @@ -95,6 +102,7 @@ struct engine_metrics { struct d_tm_node_t *rank_id; struct d_tm_node_t *dead_rank_events; struct d_tm_node_t *last_event_time; + struct d_tm_node_t *meminfo; }; extern struct engine_metrics dss_engine_metrics; @@ -150,6 +158,8 @@ void dss_dump_ABT_state(FILE *fp); void dss_xstreams_open_barrier(void); struct dss_xstream *dss_get_xstream(int stream_id); int dss_xstream_cnt(void); +void dss_mem_total_alloc_track(void *arg, daos_size_t bytes); +void dss_mem_total_free_track(void *arg, daos_size_t bytes); /* srv_metrics.c */ int dss_engine_metrics_init(void); diff --git a/src/engine/srv_metrics.c b/src/engine/srv_metrics.c index ef302241ca9..0be06a4733c 100644 --- a/src/engine/srv_metrics.c +++ b/src/engine/srv_metrics.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2021-2022 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ diff --git a/src/gurt/misc.c b/src/gurt/misc.c index ca40fcc194a..ef3a2e91709 100644 --- a/src/gurt/misc.c +++ b/src/gurt/misc.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,20 @@ /* state buffer for DAOS rand and srand calls, NOT thread safe */ static struct drand48_data randBuffer = {0}; +d_alloc_track_cb_t d_alloc_track_cb; +d_alloc_track_cb_t d_free_track_cb; +static __thread void *track_arg; + +void +d_set_alloc_track_cb(d_alloc_track_cb_t alloc_cb, d_alloc_track_cb_t free_cb, void *arg) +{ + d_alloc_track_cb = alloc_cb; + d_free_track_cb = free_cb; + track_arg = arg; + + D_INFO("memory track is enabled for the engine.\n"); +} + void d_srand(long int seedval) { @@ -49,6 +64,12 @@ d_rand() void d_free(void *ptr) { + if (unlikely(track_arg != NULL)) { + size_t size = malloc_usable_size(ptr); + + d_free_track_cb(track_arg, size); + } + free(ptr); } @@ -87,25 +108,62 @@ d_free(void *ptr) void * d_calloc(size_t count, size_t eltsize) { - return calloc(count, eltsize); + void *ptr; + + ptr = calloc(count, eltsize); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, malloc_usable_size(ptr)); + } + + return ptr; } void * d_malloc(size_t size) { - return malloc(size); + void *ptr; + + ptr = malloc(size); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, size); + } + + return ptr; } void * d_realloc(void *ptr, size_t size) { - return realloc(ptr, size); + void *new_ptr; + + if (unlikely(track_arg != NULL)) { + size_t old_size = malloc_usable_size(ptr); + + new_ptr = realloc(ptr, size); + if (new_ptr != NULL) { + d_free_track_cb(track_arg, old_size); + d_alloc_track_cb(track_arg, size); + } + } else { + new_ptr = realloc(ptr, size); + } + return new_ptr; } char * d_strndup(const char *s, size_t n) { - return strndup(s, n); + char *ptr; + + ptr = strndup(s, n); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, malloc_usable_size(ptr)); + } + + return ptr; } int @@ -118,6 +176,11 @@ d_asprintf(char **strp, const char *fmt, ...) rc = vasprintf(strp, fmt, ap); va_end(ap); + if (unlikely(track_arg != NULL)) { + if (rc > 0 && *strp != NULL) + d_alloc_track_cb(track_arg, (size_t)rc); + } + return rc; } @@ -143,16 +206,31 @@ d_asprintf2(int *_rc, const char *fmt, ...) char * d_realpath(const char *path, char *resolved_path) { - return realpath(path, resolved_path); + char *ptr; + + ptr = realpath(path, resolved_path); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, malloc_usable_size(ptr)); + } + + return ptr; } void * d_aligned_alloc(size_t alignment, size_t size, bool zero) { - void *buf = aligned_alloc(alignment, size); + void *buf; + + buf = aligned_alloc(alignment, size); + if (unlikely(track_arg != NULL)) { + if (buf != NULL) + d_alloc_track_cb(track_arg, size); + } if (!zero || buf == NULL) return buf; + memset(buf, 0, size); return buf; } diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index e34abe19ad0..a5e34fab2d4 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -933,6 +934,27 @@ d_tm_print_timestamp(time_t *clk, char *name, int format, int opt_fields, } } +static void +d_tm_print_meminfo(struct d_tm_meminfo_t *meminfo, char *name, int format, + int opt_fields, FILE *stream) +{ + if ((name == NULL) || (stream == NULL)) + return; + + if (format == D_TM_CSV) { + fprintf(stream, "%s", name); + if (opt_fields & D_TM_INCLUDE_TYPE) + fprintf(stream, ",arena,ordblks,uordblks,fordblks"); + fprintf(stream, ",%zu,%zu,%zu,%zu", meminfo->arena, meminfo->ordblks, + meminfo->uordblks, meminfo->fordblks); + } else { + if (opt_fields & D_TM_INCLUDE_TYPE) + fprintf(stream, "type: arena,ordblks,uordblks,fordblks,"); + fprintf(stream, "%s:%zu,%zu,%zu,%zu", name, meminfo->arena, + meminfo->ordblks, meminfo->uordblks, meminfo->fordblks); + } +} + /** * Prints the time snapshot \a tms with \a name to the \a stream provided * @@ -1147,6 +1169,9 @@ d_tm_print_metadata(char *desc, char *units, int format, FILE *stream) } } +static int +d_tm_get_meminfo(struct d_tm_context *ctx, struct d_tm_meminfo_t *meminfo, + struct d_tm_node_t *node); /** * Prints a single \a node. * Used as a convenience function to demonstrate usage for the client @@ -1179,6 +1204,7 @@ d_tm_print_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *name = NULL; char *desc = NULL; char *units = NULL; + struct d_tm_meminfo_t meminfo; bool stats_printed = false; bool show_timestamp = false; bool show_meta = false; @@ -1247,6 +1273,14 @@ d_tm_print_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, } d_tm_print_timestamp(&clk, name, format, opt_fields, stream); break; + case D_TM_MEMINFO: + rc = d_tm_get_meminfo(ctx, &meminfo, node); + if (rc != DER_SUCCESS) { + fprintf(stream, "Error on meminfo read: %d\n", rc); + break; + } + d_tm_print_meminfo(&meminfo, name, format, opt_fields, stream); + break; case D_TM_TIMER_SNAPSHOT: case (D_TM_TIMER_SNAPSHOT | D_TM_CLOCK_REALTIME): case (D_TM_TIMER_SNAPSHOT | D_TM_CLOCK_PROCESS_CPUTIME): @@ -1745,6 +1779,41 @@ d_tm_record_timestamp(struct d_tm_node_t *metric) d_tm_node_unlock(metric); } +/** + * Record the current meminfo + * + * \param[in] metric Pointer to the metric + */ +void +d_tm_record_meminfo(struct d_tm_node_t *metric) +{ +#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 33) + struct mallinfo2 mi; + + mi = mallinfo2(); +#else + struct mallinfo mi; + + mi = mallinfo(); +#endif + + if (metric == NULL) + return; + + if (metric->dtn_type != D_TM_MEMINFO) { + D_ERROR("Failed to record meminfo on item %s not a " + "meminfo. Operation mismatch: " DF_RC "\n", + metric->dtn_name, DP_RC(-DER_OP_NOT_PERMITTED)); + return; + } + d_tm_node_lock(metric); + metric->dtn_metric->dtm_data.meminfo.arena = mi.arena; + metric->dtn_metric->dtm_data.meminfo.ordblks = mi.ordblks; + metric->dtn_metric->dtm_data.meminfo.uordblks = mi.uordblks; + metric->dtn_metric->dtm_data.meminfo.fordblks = mi.fordblks; + d_tm_node_unlock(metric); +} + /** * Read and store a high resolution timer snapshot value * @@ -2977,6 +3046,36 @@ d_tm_get_timestamp(struct d_tm_context *ctx, time_t *val, return DER_SUCCESS; } +static int +d_tm_get_meminfo(struct d_tm_context *ctx, struct d_tm_meminfo_t *meminfo, + struct d_tm_node_t *node) +{ + struct d_tm_metric_t *metric_data = NULL; + struct d_tm_shmem_hdr *shmem = NULL; + int rc; + + if (ctx == NULL || meminfo == NULL || node == NULL) + return -DER_INVAL; + + rc = validate_node_ptr(ctx, node, &shmem); + if (rc != 0) + return rc; + + if (node->dtn_type != D_TM_MEMINFO) + return -DER_OP_NOT_PERMITTED; + + metric_data = conv_ptr(shmem, node->dtn_metric); + if (metric_data != NULL) { + d_tm_node_lock(node); + *meminfo = metric_data->dtm_data.meminfo; + d_tm_node_unlock(node); + } else { + return -DER_METRIC_NOT_FOUND; + } + return DER_SUCCESS; +} + + /** * Client function to read the specified high resolution timer. * diff --git a/src/include/daos_errno.h b/src/include/daos_errno.h index 25b517c9f21..86709a6bd94 100644 --- a/src/include/daos_errno.h +++ b/src/include/daos_errno.h @@ -205,12 +205,10 @@ extern "C" { ACTION(DER_NVME_IO, NVMe I / O error) \ ACTION(DER_NO_CERT, Unable to access one or more certificates) \ ACTION(DER_BAD_CERT, Invalid x509 certificate) \ - ACTION(DER_VOS_PARTIAL_UPDATE, VOS partial update error) \ + ACTION(DER_VOS_PARTIAL_UPDATE, Same epoch partial overwrite of VOS array value disallowed) \ ACTION(DER_CHKPT_BUSY, Page is temporarily read only due to checkpointing) \ ACTION(DER_DIV_BY_ZERO, Division by zero) -/* clang-format on */ - /** Defines the gurt error codes */ #define D_FOREACH_ERR_RANGE(ACTION) \ ACTION(GURT, 1000) \ diff --git a/src/include/daos_srv/dtx_srv.h b/src/include/daos_srv/dtx_srv.h index 05cc162b19e..d0b2352783a 100644 --- a/src/include/daos_srv/dtx_srv.h +++ b/src/include/daos_srv/dtx_srv.h @@ -305,12 +305,7 @@ dtx_entry_get(struct dtx_entry *dte) return dte; } -static inline void -dtx_entry_put(struct dtx_entry *dte) -{ - if (--(dte->dte_refs) == 0) - D_FREE(dte); -} +void dtx_entry_put(struct dtx_entry *dte); static inline bool dtx_is_valid_handle(const struct dtx_handle *dth) diff --git a/src/include/gurt/common.h b/src/include/gurt/common.h index c6a8f241b26..cfce1a490ec 100644 --- a/src/include/gurt/common.h +++ b/src/include/gurt/common.h @@ -506,6 +506,10 @@ int d_getenv_uint64_t(const char *env, uint64_t *val); int d_write_string_buffer(struct d_string_buffer_t *buf, const char *fmt, ...); void d_free_string(struct d_string_buffer_t *buf); +typedef void (*d_alloc_track_cb_t)(void *arg, size_t size); + +void d_set_alloc_track_cb(d_alloc_track_cb_t alloc_cb, d_alloc_track_cb_t free_cb, void *arg); + #if !defined(container_of) /* given a pointer @ptr to the field @member embedded into type (usually * struct) @type, return pointer to the embedding instance of @type. diff --git a/src/include/gurt/telemetry_common.h b/src/include/gurt/telemetry_common.h index 8852a1764cf..983ec2553f2 100644 --- a/src/include/gurt/telemetry_common.h +++ b/src/include/gurt/telemetry_common.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2022 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -139,6 +139,7 @@ enum { D_TM_CLOCK_PROCESS_CPUTIME = 0x100, D_TM_CLOCK_THREAD_CPUTIME = 0x200, D_TM_LINK = 0x400, + D_TM_MEMINFO = 0x800, D_TM_ALL_NODES = (D_TM_DIRECTORY | \ D_TM_COUNTER | \ D_TM_TIMESTAMP | \ @@ -146,7 +147,8 @@ enum { D_TM_DURATION | \ D_TM_GAUGE | \ D_TM_STATS_GAUGE | \ - D_TM_LINK) + D_TM_LINK | \ + D_TM_MEMINFO) }; enum { @@ -203,10 +205,18 @@ struct d_tm_histogram_t { int dth_value_multiplier; }; +struct d_tm_meminfo_t { + uint64_t arena; + uint64_t ordblks; + uint64_t uordblks; + uint64_t fordblks; +}; + struct d_tm_metric_t { union data { uint64_t value; struct timespec tms[2]; + struct d_tm_meminfo_t meminfo; } dtm_data; struct d_tm_stats_t *dtm_stats; struct d_tm_histogram_t *dtm_histogram; diff --git a/src/include/gurt/telemetry_producer.h b/src/include/gurt/telemetry_producer.h index de85ea11932..5cd323637d4 100644 --- a/src/include/gurt/telemetry_producer.h +++ b/src/include/gurt/telemetry_producer.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -12,6 +12,7 @@ void d_tm_set_counter(struct d_tm_node_t *metric, uint64_t value); void d_tm_inc_counter(struct d_tm_node_t *metric, uint64_t value); void d_tm_record_timestamp(struct d_tm_node_t *metric); +void d_tm_record_meminfo(struct d_tm_node_t *metric); void d_tm_take_timer_snapshot(struct d_tm_node_t *metric, int clk_id); void d_tm_mark_duration_start(struct d_tm_node_t *metric, int clk_id); void d_tm_mark_duration_end(struct d_tm_node_t *metric); diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index 2f3defae916..cc1cee34127 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -557,6 +557,14 @@ class TelemetryUtils(): ENGINE_NVME_RELIABILITY_METRICS +\ ENGINE_NVME_CRIT_WARN_METRICS +\ ENGINE_NVME_INTEL_VENDOR_METRICS + ENGINE_MEM_USAGE_METRICS = [ + "engine_mem_vos_dtx_cmt_ent_48", + "engine_mem_vos_vos_obj_360", + "engine_mem_vos_vos_lru_size", + "engine_mem_dtx_dtx_leader_handle_336", + "engine_mem_dtx_dtx_entry_40"] + ENGINE_MEM_TOTAL_USAGE_METRICS = [ + "engine_mem_total_mem"] def __init__(self, dmg, servers): """Create a TelemetryUtils object. @@ -587,6 +595,8 @@ def get_all_server_metrics_names(self, server, with_pools=False): all_metrics_names.extend(self.ENGINE_NET_METRICS) all_metrics_names.extend(self.ENGINE_RANK_METRICS) all_metrics_names.extend(self.ENGINE_DMABUFF_METRICS) + all_metrics_names.extend(self.ENGINE_MEM_USAGE_METRICS) + all_metrics_names.extend(self.ENGINE_MEM_TOTAL_USAGE_METRICS) if with_pools: all_metrics_names.extend(self.ENGINE_POOL_METRICS) all_metrics_names.extend(self.ENGINE_CONTAINER_METRICS) diff --git a/src/utils/daos_metrics/daos_metrics.c b/src/utils/daos_metrics/daos_metrics.c index b2f99e4abd2..8a8190d5203 100644 --- a/src/utils/daos_metrics/daos_metrics.c +++ b/src/utils/daos_metrics/daos_metrics.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2021 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -93,6 +93,7 @@ main(int argc, char **argv) {"path", required_argument, NULL, 'p'}, {"delay", required_argument, NULL, 'D'}, {"meta", no_argument, NULL, 'M'}, + {"meminfo", no_argument, NULL, 'm'}, {"type", no_argument, NULL, 'T'}, {"read", no_argument, NULL, 'r'}, {"reset", no_argument, NULL, 'e'}, @@ -100,7 +101,7 @@ main(int argc, char **argv) {NULL, 0, NULL, 0} }; - opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MTrhe", + opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MmTrhe", long_options, NULL); if (opt == -1) break; @@ -136,6 +137,9 @@ main(int argc, char **argv) case 'M': show_meta = true; break; + case 'm': + filter |= D_TM_MEMINFO; + break; case 'T': show_type = true; break; @@ -160,7 +164,7 @@ main(int argc, char **argv) ops |= D_TM_ITER_READ; if (filter == 0) - filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | + filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | D_TM_MEMINFO | D_TM_TIMER_SNAPSHOT | D_TM_GAUGE | D_TM_STATS_GAUGE; ctx = d_tm_open(srv_idx); diff --git a/src/vos/lru_array.c b/src/vos/lru_array.c index b94ff873a51..186026c5ba9 100644 --- a/src/vos/lru_array.c +++ b/src/vos/lru_array.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -11,6 +11,7 @@ */ #define D_LOGFAC DD_FAC(vos) #include "lru_array.h" +#include "vos_internal.h" /** Internal converter for real index to entity index in sub array */ #define ent2idx(array, sub, ent_idx) \ @@ -63,6 +64,24 @@ fini_cb(struct lru_array *array, struct lru_sub *sub, struct lru_entry *entry, array->la_cbs.lru_on_fini(entry->le_payload, real_idx, array->la_arg); } +static void +alloc_cb(struct lru_array *array, daos_size_t size) +{ + if (array->la_cbs.lru_on_alloc == NULL) + return; + + array->la_cbs.lru_on_alloc(array->la_arg, size); +} + +static void +free_cb(struct lru_array *array, daos_size_t size) +{ + if (array->la_cbs.lru_on_free == NULL) + return; + + array->la_cbs.lru_on_free(array->la_arg, size); +} + int lrua_array_alloc_one(struct lru_array *array, struct lru_sub *sub) { @@ -78,6 +97,8 @@ lrua_array_alloc_one(struct lru_array *array, struct lru_sub *sub) if (sub->ls_table == NULL) return -DER_NOMEM; + alloc_cb(array, rec_size * nr_ents); + /** Add newly allocated ones to head of list */ d_list_del(&sub->ls_link); d_list_add(&sub->ls_link, &array->la_free_sub); @@ -283,6 +304,7 @@ lrua_array_alloc(struct lru_array **arrayp, uint32_t nr_ent, uint32_t nr_arrays, if (cbs != NULL) array->la_cbs = *cbs; + alloc_cb(array, sizeof(*array) + sizeof(array->la_sub[0]) * nr_arrays); /** Only allocate one sub array, add the rest to free list */ D_INIT_LIST_HEAD(&array->la_free_sub); D_INIT_LIST_HEAD(&array->la_unused_sub); @@ -294,6 +316,7 @@ lrua_array_alloc(struct lru_array **arrayp, uint32_t nr_ent, uint32_t nr_arrays, rc = lrua_array_alloc_one(array, &array->la_sub[0]); if (rc != 0) { + free_cb(array, sizeof(*array) + sizeof(array->la_sub[0]) * nr_arrays); D_FREE(array); return rc; } @@ -312,6 +335,10 @@ array_free_one(struct lru_array *array, struct lru_sub *sub) fini_cb(array, sub, &sub->ls_table[idx], idx); D_FREE(sub->ls_table); + + free_cb(array, + (sizeof(struct lru_entry) + array->la_payload_size) * + (array->la_idx_mask + 1)); } void @@ -323,13 +350,14 @@ lrua_array_free(struct lru_array *array) if (array == NULL) return; - for (i = 0; i < array->la_array_nr; i++) { sub = &array->la_sub[i]; if (sub->ls_table != NULL) array_free_one(array, sub); } + free_cb(array, sizeof(*array) + sizeof(array->la_sub[0]) * array->la_array_nr); + D_FREE(array); } diff --git a/src/vos/lru_array.h b/src/vos/lru_array.h index af9705ea72a..7a620c23b87 100644 --- a/src/vos/lru_array.h +++ b/src/vos/lru_array.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -22,6 +22,10 @@ struct lru_callbacks { void (*lru_on_init)(void *entry, uint32_t idx, void *arg); /** Called on finalization of an entry */ void (*lru_on_fini)(void *entry, uint32_t idx, void *arg); + /** Called on allocation of any LRU entries */ + void (*lru_on_alloc)(void *arg, daos_size_t size); + /** Called on free of any LRU entries */ + void (*lru_on_free)(void *arg, daos_size_t size); }; struct lru_entry { diff --git a/src/vos/tests/vts_io.c b/src/vos/tests/vts_io.c index aaa5911e3ca..2a23baacd6b 100644 --- a/src/vos/tests/vts_io.c +++ b/src/vos/tests/vts_io.c @@ -240,8 +240,8 @@ teardown_io(void **state) int rc; if (table) { - vos_ts_table_free(&table); - rc = vos_ts_table_alloc(&table); + vos_ts_table_free(&table, NULL); + rc = vos_ts_table_alloc(&table, NULL); if (rc != 0) { printf("Fatal error, table couldn't be reallocated\n"); exit(rc); diff --git a/src/vos/tests/vts_ts.c b/src/vos/tests/vts_ts.c index 60302ffe262..f882496dc17 100644 --- a/src/vos/tests/vts_ts.c +++ b/src/vos/tests/vts_ts.c @@ -235,7 +235,7 @@ alloc_ts_cache(void **state) if (ts_table != NULL) ts_arg->old_table = ts_table; - rc = vos_ts_table_alloc(&ts_table); + rc = vos_ts_table_alloc(&ts_table, NULL); if (rc != 0) { print_message("Can't allocate timestamp table: "DF_RC"\n", DP_RC(rc)); @@ -757,7 +757,7 @@ ts_test_fini(void **state) vos_ts_set_free(ts_arg->ta_ts_set); ts_table = vos_ts_table_get(true); - vos_ts_table_free(&ts_table); + vos_ts_table_free(&ts_table, NULL); vos_ts_table_set(ts_arg->old_table); D_FREE(ts_arg); diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index cf2ae1520ad..45252f9da0e 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -408,7 +408,7 @@ vos_tls_fini(int tags, void *data) umem_fini_txd(&tls->vtl_txd); if (tls->vtl_ts_table) - vos_ts_table_free(&tls->vtl_ts_table); + vos_ts_table_free(&tls->vtl_ts_table, tls); D_FREE(tls); } @@ -419,7 +419,28 @@ vos_standalone_tls_fini(void) vos_tls_fini(DAOS_TGT_TAG, self_mode.self_tls); self_mode.self_tls = NULL; } +} + +void +vos_lru_alloc_track(void *arg, daos_size_t size) +{ + struct vos_tls *tls = arg; + + if (tls == NULL || tls->vtl_lru_alloc_size == NULL) + return; + d_tm_inc_gauge(tls->vtl_lru_alloc_size, size); +} + +void +vos_lru_free_track(void *arg, daos_size_t size) +{ + struct vos_tls *tls = arg; + + if (tls == NULL || tls->vtl_lru_alloc_size == NULL) + return; + + d_tm_dec_gauge(tls->vtl_lru_alloc_size, size); } static void * @@ -464,17 +485,13 @@ vos_tls_init(int tags, int xs_id, int tgt_id) } if (tags & DAOS_TGT_TAG) { - rc = vos_ts_table_alloc(&tls->vtl_ts_table); + rc = vos_ts_table_alloc(&tls->vtl_ts_table, tls); if (rc) { D_ERROR("Error in creating timestamp table: %d\n", rc); goto failed; } } - if (tgt_id < 0) - /** skip sensor setup on standalone vos & sys xstream */ - return tls; - rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, "Number of committed entries kept around for reply" " reconstruction", "entries", @@ -482,6 +499,37 @@ vos_tls_init(int tags, int xs_id, int tgt_id) if (rc) D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", DP_RC(rc)); + if (tgt_id >= 0) { + rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, + "Number of committed entries kept around for reply" + " reconstruction", "entries", + "io/dtx/committed/tgt_%u", tgt_id); + if (rc) + D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", + DP_RC(rc)); + + rc = d_tm_add_metric(&tls->vtl_dtx_cmt_ent_cnt, D_TM_GAUGE, + "Number of committed entries", "entry", + "mem/vos/dtx_cmt_ent_%u/tgt_%u", + sizeof(struct vos_dtx_cmt_ent), tgt_id); + if (rc) + D_WARN("Failed to create committed cnt: "DF_RC"\n", + DP_RC(rc)); + + rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE, + "Number of cached vos object", "entry", + "mem/vos/vos_obj_%u/tgt_%u", + sizeof(struct vos_object), tgt_id); + if (rc) + D_WARN("Failed to create vos obj cnt: "DF_RC"\n", DP_RC(rc)); + + } + + rc = d_tm_add_metric(&tls->vtl_lru_alloc_size, D_TM_GAUGE, + "Active DTX table LRU size", "byte", + "mem/vos/vos_lru_size/tgt_%d", tgt_id); + if (rc) + D_WARN("Failed to create LRU alloc size: "DF_RC"\n", DP_RC(rc)); return tls; failed: diff --git a/src/vos/vos_container.c b/src/vos/vos_container.c index 19a10d6acac..93cc62ceeb5 100644 --- a/src/vos/vos_container.c +++ b/src/vos/vos_container.c @@ -314,6 +314,11 @@ vos_cont_create(daos_handle_t poh, uuid_t co_uuid) return rc; } +static const struct lru_callbacks lru_cont_cbs = { + .lru_on_alloc = vos_lru_alloc_track, + .lru_on_free = vos_lru_free_track, +}; + /** * Open a container within a VOSP */ @@ -395,8 +400,8 @@ vos_cont_open(daos_handle_t poh, uuid_t co_uuid, daos_handle_t *coh) rc = lrua_array_alloc(&cont->vc_dtx_array, DTX_ARRAY_LEN, DTX_ARRAY_NR, sizeof(struct vos_dtx_act_ent), - LRU_FLAG_REUSE_UNIQUE, - NULL, NULL); + LRU_FLAG_REUSE_UNIQUE, &lru_cont_cbs, + vos_tls_get(cont->vc_pool->vp_sysdb)); if (rc != 0) { D_ERROR("Failed to create DTX active array: rc = "DF_RC"\n", DP_RC(rc)); diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 63b102e32f9..4eefa622b7a 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -756,6 +756,7 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t daos_epoch_t cmt_time, struct vos_dtx_cmt_ent **dce_p, struct vos_dtx_act_ent **dae_p, bool *rm_cos, bool *fatal) { + struct vos_tls *tls = vos_tls_get(false); struct vos_dtx_act_ent *dae = NULL; struct vos_dtx_cmt_ent *dce = NULL; d_iov_t kiov; @@ -820,6 +821,7 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t if (dce == NULL) D_GOTO(out, rc = -DER_NOMEM); + d_tm_inc_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); DCE_CMT_TIME(dce) = cmt_time; if (dae != NULL) { DCE_XID(dce) = DAE_XID(dae); @@ -2471,6 +2473,7 @@ vos_dtx_aggregate(daos_handle_t coh) cont->vc_dtx_committed_count--; cont->vc_pool->vp_dtx_committed_count--; d_tm_dec_gauge(tls->vtl_committed, 1); + d_tm_dec_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); } if (epoch != cont_df->cd_newest_aggregated) { @@ -3136,6 +3139,11 @@ vos_dtx_rsrvd_fini(struct dtx_handle *dth) } } +static const struct lru_callbacks lru_dtx_cache_cbs = { + .lru_on_alloc = vos_lru_alloc_track, + .lru_on_free = vos_lru_free_track, +}; + int vos_dtx_cache_reset(daos_handle_t coh, bool force) { @@ -3170,7 +3178,8 @@ vos_dtx_cache_reset(daos_handle_t coh, bool force) lrua_array_free(cont->vc_dtx_array); rc = lrua_array_alloc(&cont->vc_dtx_array, DTX_ARRAY_LEN, DTX_ARRAY_NR, - sizeof(struct vos_dtx_act_ent), LRU_FLAG_REUSE_UNIQUE, NULL, NULL); + sizeof(struct vos_dtx_act_ent), LRU_FLAG_REUSE_UNIQUE, + &lru_dtx_cache_cbs, vos_tls_get(false)); if (rc != 0) { D_ERROR("Failed to re-create DTX active array for "DF_UUID": "DF_RC"\n", DP_UUID(cont->vc_id), DP_RC(rc)); diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 67f4980b66f..2bee64673bf 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -1726,4 +1726,6 @@ int vos_oi_upgrade_layout_ver(struct vos_container *cont, daos_unit_oid_t oid, uint32_t layout_ver); +void vos_lru_free_track(void *arg, daos_size_t size); +void vos_lru_alloc_track(void *arg, daos_size_t size); #endif /* __VOS_INTERNAL_H__ */ diff --git a/src/vos/vos_obj_cache.c b/src/vos/vos_obj_cache.c index 826c53f06a5..11e55e9d156 100644 --- a/src/vos/vos_obj_cache.c +++ b/src/vos/vos_obj_cache.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -58,11 +58,13 @@ obj_lop_alloc(void *key, unsigned int ksize, void *args, struct vos_object *obj; struct obj_lru_key *lkey; struct vos_container *cont; + struct vos_tls *tls; int rc; cont = (struct vos_container *)args; D_ASSERT(cont != NULL); + tls = vos_tls_get(cont->vc_pool->vp_sysdb); lkey = (struct obj_lru_key *)key; D_ASSERT(lkey != NULL); @@ -74,7 +76,7 @@ obj_lop_alloc(void *key, unsigned int ksize, void *args, D_GOTO(failed, rc = -DER_NOMEM); init_object(obj, lkey->olk_oid, cont); - + d_tm_inc_gauge(tls->vtl_obj_cnt, 1); *llink_p = &obj->obj_llink; rc = 0; failed: @@ -123,10 +125,13 @@ static void obj_lop_free(struct daos_llink *llink) { struct vos_object *obj; + struct vos_tls *tls; D_DEBUG(DB_TRACE, "lru free callback for vos_obj_cache\n"); obj = container_of(llink, struct vos_object, obj_llink); + tls = vos_tls_get(obj->obj_cont->vc_pool->vp_sysdb); + d_tm_dec_gauge(tls->vtl_obj_cnt, 1); clean_object(obj); D_FREE(obj); } diff --git a/src/vos/vos_tls.h b/src/vos/vos_tls.h index 96c9a3e0c6d..981cce10be5 100644 --- a/src/vos/vos_tls.h +++ b/src/vos/vos_tls.h @@ -63,6 +63,9 @@ struct vos_tls { bool vtl_hash_set; }; struct d_tm_node_t *vtl_committed; + struct d_tm_node_t *vtl_obj_cnt; + struct d_tm_node_t *vtl_dtx_cmt_ent_cnt; + struct d_tm_node_t *vtl_lru_alloc_size; }; struct bio_xs_context *vos_xsctxt_get(void); diff --git a/src/vos/vos_ts.c b/src/vos/vos_ts.c index 9e47d100097..4018c2e685e 100644 --- a/src/vos/vos_ts.c +++ b/src/vos/vos_ts.c @@ -99,13 +99,29 @@ static void init_entry(void *payload, uint32_t idx, void *arg) entry->te_info = info; } +static void vos_lru_ts_alloc(void *arg, daos_size_t size) +{ + struct vos_ts_info *info = arg; + + vos_lru_alloc_track(info->ti_tls, size); +} + +static void vos_lru_ts_free(void *arg, daos_size_t size) +{ + struct vos_ts_info *info = arg; + + vos_lru_free_track(info->ti_tls, size); +} + static const struct lru_callbacks lru_cbs = { .lru_on_evict = evict_entry, .lru_on_init = init_entry, + .lru_on_alloc = vos_lru_ts_alloc, + .lru_on_free = vos_lru_ts_free, }; int -vos_ts_table_alloc(struct vos_ts_table **ts_tablep) +vos_ts_table_alloc(struct vos_ts_table **ts_tablep, struct vos_tls *tls) { struct vos_ts_entry *entry; struct vos_ts_table *ts_table; @@ -129,6 +145,11 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) goto free_table; } + if (tls != NULL) + d_tm_inc_gauge(tls->vtl_lru_alloc_size, + sizeof(*ts_table->tt_misses) * + (OBJ_MISS_SIZE + DKEY_MISS_SIZE + AKEY_MISS_SIZE)); + ts_table->tt_ts_rl = vos_start_epoch; ts_table->tt_ts_rh = vos_start_epoch; uuid_clear(ts_table->tt_tx_rl.dti_uuid); @@ -140,6 +161,7 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) info->ti_type = i; info->ti_count = type_counts[i]; info->ti_table = ts_table; + info->ti_tls = tls; switch (i) { case VOS_TS_TYPE_OBJ: miss_size = OBJ_MISS_SIZE; @@ -192,6 +214,10 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) cleanup: for (i = 0; i < VOS_TS_TYPE_COUNT; i++) lrua_array_free(ts_table->tt_type_info[i].ti_array); + if (tls != NULL) + d_tm_dec_gauge(tls->vtl_lru_alloc_size, + sizeof(*ts_table->tt_misses) * + (OBJ_MISS_SIZE + DKEY_MISS_SIZE + AKEY_MISS_SIZE)); D_FREE(ts_table->tt_misses); free_table: D_FREE(ts_table); @@ -200,7 +226,7 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) } void -vos_ts_table_free(struct vos_ts_table **ts_tablep) +vos_ts_table_free(struct vos_ts_table **ts_tablep, struct vos_tls *tls) { struct vos_ts_table *ts_table = *ts_tablep; int i; @@ -208,6 +234,10 @@ vos_ts_table_free(struct vos_ts_table **ts_tablep) for (i = 0; i < VOS_TS_TYPE_COUNT; i++) lrua_array_free(ts_table->tt_type_info[i].ti_array); + if (tls != NULL) + d_tm_dec_gauge(tls->vtl_lru_alloc_size, + sizeof(*ts_table->tt_misses) * + (OBJ_MISS_SIZE + DKEY_MISS_SIZE + AKEY_MISS_SIZE)); D_FREE(ts_table->tt_misses); D_FREE(ts_table); diff --git a/src/vos/vos_ts.h b/src/vos/vos_ts.h index 2772fab2ce2..379f59f819c 100644 --- a/src/vos/vos_ts.h +++ b/src/vos/vos_ts.h @@ -27,6 +27,8 @@ struct vos_ts_info { struct vos_ts_table *ti_table; /** Negative entries for this type */ struct vos_ts_entry *ti_misses; + /** TLS for tracking memory usage */ + struct vos_tls *ti_tls; /** Type identifier */ uint32_t ti_type; /** Mask for negative entry cache */ @@ -620,20 +622,22 @@ vos_ts_peek_entry(uint32_t *idx, uint32_t type, struct vos_ts_entry **entryp, /** Allocate thread local timestamp cache. Set the initial global times * * \param[in,out] ts_table Thread local table pointer + * \param[in] tls TLS to track memory usage. * * \return -DER_NOMEM Not enough memory available * 0 Success */ int -vos_ts_table_alloc(struct vos_ts_table **ts_table); +vos_ts_table_alloc(struct vos_ts_table **ts_table, struct vos_tls *tls); /** Free the thread local timestamp cache and reset pointer to NULL * * \param[in,out] ts_table Thread local table pointer + * \param[in] tls TLS to track memory usage. */ void -vos_ts_table_free(struct vos_ts_table **ts_table); +vos_ts_table_free(struct vos_ts_table **ts_table, struct vos_tls *tls); /** Allocate a timestamp set * From ae40bce6cef56900c8f21f3f3c42de0f588b721e Mon Sep 17 00:00:00 2001 From: Michael Hennecke Date: Fri, 22 Sep 2023 10:58:01 +0200 Subject: [PATCH 54/80] DAOS-9355 doc: mkdocs for 2.5 (#13082) edit mkdocs.yml for 2.5 Signed-off-by: Michael Hennecke --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 41027df9a6f..82c3c8125d8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,5 +1,5 @@ # Project Information -site_name: DAOS v2.5 - master +site_name: DAOS v2.5 site_description: Distributed Asynchronous Object Storage site_author: DAOS Project From 281b4fad37a339ddfe37e7d25883d900f4360867 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 22 Sep 2023 14:04:22 +0100 Subject: [PATCH 55/80] DAOS-14225 control: Prevent duplicate call to SetRank (#13058) Remove legacy workaround where SetRank is called directly as a special case when rank 0 is bootstrapping the DAOS system as this creates a window where SetRank can be called a second time. The localJoin flag is removed from JoinResp and SetRank will return immediately if rank has already been set as ready. Signed-off-by: Tom Nabarro --- src/control/common/proto/logging.go | 2 +- src/control/common/proto/mgmt/svc.pb.go | 169 +++++++++++------------- src/control/server/instance.go | 25 ++-- src/control/server/mgmt_system.go | 16 --- src/control/server/mgmt_system_test.go | 2 - src/mgmt/svc.pb-c.c | 26 +--- src/mgmt/svc.pb-c.h | 6 +- src/proto/mgmt/svc.proto | 3 +- 8 files changed, 103 insertions(+), 146 deletions(-) diff --git a/src/control/common/proto/logging.go b/src/control/common/proto/logging.go index a2edc22c67f..624e58fb459 100644 --- a/src/control/common/proto/logging.go +++ b/src/control/common/proto/logging.go @@ -136,7 +136,7 @@ func Debug(msg proto.Message) string { fmt.Fprintf(&bld, " %s:%s", p.Label, p.State) } case *mgmtpb.JoinResp: - fmt.Fprintf(&bld, "%T rank:%d (state:%s, local:%t) map:%d", m, m.Rank, m.State, m.LocalJoin, m.MapVersion) + fmt.Fprintf(&bld, "%T rank:%d (state:%s) map:%d", m, m.Rank, m.State, m.MapVersion) case *mgmtpb.GetAttachInfoResp: msRanks := ranklist.RankSetFromRanks(ranklist.RanksFromUint32(m.MsRanks)) uriRanks := ranklist.NewRankSet() diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index 74d11533864..e6988dca637 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.31.0 // protoc v3.5.0 // source: mgmt/svc.proto @@ -342,7 +342,6 @@ type JoinResp struct { Rank uint32 `protobuf:"varint,2,opt,name=rank,proto3" json:"rank,omitempty"` // Server rank assigned. State JoinResp_State `protobuf:"varint,3,opt,name=state,proto3,enum=mgmt.JoinResp_State" json:"state,omitempty"` // Server state in the system map. FaultDomain string `protobuf:"bytes,4,opt,name=faultDomain,proto3" json:"faultDomain,omitempty"` // Fault domain for the instance - LocalJoin bool `protobuf:"varint,5,opt,name=localJoin,proto3" json:"localJoin,omitempty"` // Join processed locally. MapVersion uint32 `protobuf:"varint,6,opt,name=map_version,json=mapVersion,proto3" json:"map_version,omitempty"` // Join processed in this version of the system map. } @@ -406,13 +405,6 @@ func (x *JoinResp) GetFaultDomain() string { return "" } -func (x *JoinResp) GetLocalJoin() bool { - if x != nil { - return x.LocalJoin - } - return false -} - func (x *JoinResp) GetMapVersion() uint32 { if x != nil { return x.MapVersion @@ -1159,7 +1151,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x03, 0x69, 0x64, 0x78, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x09, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x22, 0xdd, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, + 0x22, 0xd0, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, 0x0a, 0x05, 0x73, 0x74, 0x61, @@ -1167,85 +1159,84 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, - 0x4a, 0x6f, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, - 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, - 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, 0x10, 0x01, - 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, - 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, 0x0f, 0x4c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x25, 0x0a, - 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, - 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, - 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, - 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, - 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, - 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, - 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, - 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, - 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, - 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, - 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, - 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, - 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, - 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, - 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, - 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, - 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x19, 0x0a, - 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x09, 0x52, - 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, 0x47, 0x65, 0x74, - 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, - 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, - 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, - 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, - 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, - 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, - 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, - 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, - 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, - 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, - 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, - 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, - 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, - 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, - 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, - 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x41, 0x0a, 0x0a, - 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1f, - 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, - 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, - 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, - 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, - 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, - 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, - 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, - 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, - 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x33, + 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, + 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x12, 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, + 0x10, 0x01, 0x4a, 0x04, 0x08, 0x05, 0x10, 0x06, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, + 0x6f, 0x69, 0x6e, 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, + 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, + 0x0f, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, + 0x12, 0x25, 0x0a, 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, + 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, + 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, + 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, + 0x63, 0x61, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, + 0x63, 0x61, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, + 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, + 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, + 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, + 0x09, 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, + 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, + 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, + 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, + 0x0a, 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, + 0x61, 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, + 0x74, 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, + 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, + 0x6e, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, + 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, + 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, + 0x12, 0x19, 0x0a, 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, + 0x28, 0x09, 0x52, 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, + 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, + 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, + 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, + 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, + 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, + 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, + 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, + 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, + 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, + 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, + 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, + 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, + 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, + 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, + 0x41, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, + 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, + 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, + 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, + 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, + 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, + 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, + 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, + 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, + 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, + 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, + 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/server/instance.go b/src/control/server/instance.go index 14f53cf3b5b..4583c86f170 100644 --- a/src/control/server/instance.go +++ b/src/control/server/instance.go @@ -178,10 +178,10 @@ func (ei *EngineInstance) removeSocket() error { return nil } -func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, bool, uint32, error) { +func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, uint32, error) { superblock := ei.getSuperblock() if superblock == nil { - return ranklist.NilRank, false, 0, errors.New("nil superblock while determining rank") + return ranklist.NilRank, 0, errors.New("nil superblock while determining rank") } r := ranklist.NilRank @@ -200,11 +200,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify }) if err != nil { ei.log.Errorf("join failed: %s", err) - return ranklist.NilRank, false, 0, err + return ranklist.NilRank, 0, err } switch resp.State { case system.MemberStateAdminExcluded, system.MemberStateExcluded: - return ranklist.NilRank, resp.LocalJoin, 0, errors.Errorf("rank %d excluded", resp.Rank) + return ranklist.NilRank, 0, errors.Errorf("rank %d excluded", resp.Rank) } r = ranklist.Rank(resp.Rank) @@ -218,11 +218,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify superblock.URI = ready.GetUri() ei.setSuperblock(superblock) if err := ei.WriteSuperblock(); err != nil { - return ranklist.NilRank, resp.LocalJoin, 0, err + return ranklist.NilRank, 0, err } } - return r, resp.LocalJoin, resp.MapVersion, nil + return r, resp.MapVersion, nil } func (ei *EngineInstance) updateFaultDomainInSuperblock() error { @@ -259,21 +259,20 @@ func (ei *EngineInstance) handleReady(ctx context.Context, ready *srvpb.NotifyRe ei.log.Error(err.Error()) // nonfatal } - r, localJoin, mapVersion, err := ei.determineRank(ctx, ready) + r, mapVersion, err := ei.determineRank(ctx, ready) if err != nil { return err } - // If the join was already processed because it ran on the same server, - // skip the rest of these steps. - if localJoin { - return nil - } - return ei.SetupRank(ctx, r, mapVersion) } func (ei *EngineInstance) SetupRank(ctx context.Context, rank ranklist.Rank, map_version uint32) error { + if ei.IsReady() { + ei.log.Errorf("SetupRank called on an already set-up instance %d", ei.Index()) + return nil + } + if err := ei.callSetRank(ctx, rank, map_version); err != nil { return errors.Wrap(err, "SetRank failed") } diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index 8374b0b9e2e..620db09bf11 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -178,22 +178,6 @@ func (svc *mgmtSvc) join(ctx context.Context, req *mgmtpb.JoinReq, peerAddr *net MapVersion: joinResponse.MapVersion, } - // If the rank is local to the MS leader, then we need to wire up at least - // one in order to perform a CaRT group update. - if common.IsLocalAddr(peerAddr) && req.Idx == 0 { - resp.LocalJoin = true - - srvs := svc.harness.Instances() - if len(srvs) == 0 { - return nil, errors.New("invalid Join request (index 0 doesn't exist?!?)") - } - srv := srvs[0] - - if err := srv.SetupRank(ctx, joinResponse.Member.Rank, joinResponse.MapVersion); err != nil { - return nil, errors.Wrap(err, "SetupRank on local instance failed") - } - } - return resp, nil } diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index 375b77c3efb..0ac1112c4ba 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -1967,7 +1967,6 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, - LocalJoin: false, MapVersion: 2, }, }, @@ -1993,7 +1992,6 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, - LocalJoin: true, MapVersion: 2, }, }, diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index cfd562891e0..c3900429dfe 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -1010,7 +1010,7 @@ const ProtobufCEnumDescriptor mgmt__join_resp__state__descriptor = mgmt__join_resp__state__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = +static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[5] = { { "status", @@ -1060,18 +1060,6 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, - { - "localJoin", - 5, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_BOOL, - 0, /* quantifier_offset */ - offsetof(Mgmt__JoinResp, localjoin), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, { "map_version", 6, @@ -1087,16 +1075,16 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = }; static const unsigned mgmt__join_resp__field_indices_by_name[] = { 3, /* field[3] = faultDomain */ - 4, /* field[4] = localJoin */ - 5, /* field[5] = map_version */ + 4, /* field[4] = map_version */ 1, /* field[1] = rank */ 2, /* field[2] = state */ 0, /* field[0] = status */ }; -static const ProtobufCIntRange mgmt__join_resp__number_ranges[1 + 1] = +static const ProtobufCIntRange mgmt__join_resp__number_ranges[2 + 1] = { { 1, 0 }, - { 0, 6 } + { 6, 4 }, + { 0, 5 } }; const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = { @@ -1106,10 +1094,10 @@ const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = "Mgmt__JoinResp", "mgmt", sizeof(Mgmt__JoinResp), - 6, + 5, mgmt__join_resp__field_descriptors, mgmt__join_resp__field_indices_by_name, - 1, mgmt__join_resp__number_ranges, + 2, mgmt__join_resp__number_ranges, (ProtobufCMessageInit) mgmt__join_resp__init, NULL,NULL,NULL /* reserved[123] */ }; diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 55acb283028..c1d61ef44fb 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -163,10 +163,6 @@ struct _Mgmt__JoinResp * Fault domain for the instance */ char *faultdomain; - /* - * Join processed locally. - */ - protobuf_c_boolean localjoin; /* * Join processed in this version of the system map. */ @@ -174,7 +170,7 @@ struct _Mgmt__JoinResp }; #define MGMT__JOIN_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__join_resp__descriptor) \ - , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0, 0 } + , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0 } struct _Mgmt__LeaderQueryReq diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index 400452837ce..668a9905bfd 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -44,6 +44,8 @@ message JoinReq { } message JoinResp { + reserved 5; + reserved "localJoin"; int32 status = 1; // DAOS error code uint32 rank = 2; // Server rank assigned. enum State { @@ -52,7 +54,6 @@ message JoinResp { } State state = 3; // Server state in the system map. string faultDomain = 4; // Fault domain for the instance - bool localJoin = 5; // Join processed locally. uint32 map_version = 6; // Join processed in this version of the system map. } From 02774331ecc9e4959e28e66ba18d74dde5456fb1 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Fri, 22 Sep 2023 09:19:37 -0500 Subject: [PATCH 56/80] DAOS-14391 il: reduce eq count and build jobs for vm build test (#13070) Signed-off-by: Mohamad Chaarawi --- src/tests/ftest/dfuse/daos_build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/dfuse/daos_build.py b/src/tests/ftest/dfuse/daos_build.py index 5edd0b328df..d1afe8366b2 100644 --- a/src/tests/ftest/dfuse/daos_build.py +++ b/src/tests/ftest/dfuse/daos_build.py @@ -138,8 +138,8 @@ def run_build_test(self, cache_mode, intercept=False, run_on_vms=False): remote_env = {} if run_on_vms: dfuse_namespace = dfuse_namespace = "/run/dfuse_vm/*" - build_jobs = 6 * 2 - remote_env['D_IL_MAX_EQ'] = '6' + build_jobs = 6 + remote_env['D_IL_MAX_EQ'] = '2' intercept_jobs = build_jobs if intercept: From 527b38a8c8a2890e8741543fda12638cc130d0ac Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Mon, 25 Sep 2023 16:57:14 +0100 Subject: [PATCH 57/80] DAOS-12425 control: Add opt to auto-format on server start-up (#13074) Add daos_server start --auto-format option to enable engines to start without having to trigger a storage format with an externally executed dmg storage format command. If the flag is detected the server will call the format API during its start-up routine and if the server config file used is valid the engines will be started after format completes. Signed-off-by: Tom Nabarro --- src/control/cmd/daos_server/start.go | 15 +++++++++------ src/control/server/config/server.go | 3 +++ src/control/server/server.go | 8 ++++++++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/control/cmd/daos_server/start.go b/src/control/cmd/daos_server/start.go index f2d7b77feda..962d370db96 100644 --- a/src/control/cmd/daos_server/start.go +++ b/src/control/cmd/daos_server/start.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2022 Intel Corporation. +// (C) Copyright 2019-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -27,13 +27,14 @@ type startCmd struct { Port uint16 `short:"p" long:"port" description:"Port for the gRPC management interfect to listen on"` MountPath string `short:"s" long:"storage" description:"Storage path"` Modules *string `short:"m" long:"modules" description:"List of server modules to load"` - Targets uint16 `short:"t" long:"targets" description:"number of targets to use (default use all cores)"` - NrXsHelpers *uint16 `short:"x" long:"xshelpernr" description:"number of helper XS per VOS target"` - FirstCore uint16 `short:"f" long:"firstcore" default:"0" description:"index of first core for service thread"` + Targets uint16 `short:"t" long:"targets" description:"Number of targets to use (default use all cores)"` + NrXsHelpers *uint16 `short:"x" long:"xshelpernr" description:"Number of helper XS per VOS target"` + FirstCore uint16 `short:"f" long:"firstcore" default:"0" description:"Index of first core for service thread"` Group string `short:"g" long:"group" description:"Server group name"` SocketDir string `short:"d" long:"socket_dir" description:"Location for all daos_server & daos_engine sockets"` - Insecure bool `short:"i" long:"insecure" description:"allow for insecure connections"` - RecreateSuperblocks bool `long:"recreate-superblocks" description:"recreate missing superblocks rather than failing"` + Insecure bool `short:"i" long:"insecure" description:"Allow for insecure connections"` + RecreateSuperblocks bool `long:"recreate-superblocks" description:"Recreate missing superblocks rather than failing"` + AutoFormat bool `long:"auto-format" description:"Automatically format storage on server start to bring-up engines without requiring dmg storage format command"` } func (cmd *startCmd) setCLIOverrides() error { @@ -161,5 +162,7 @@ func (cmd *startCmd) Execute(args []string) error { return err } + cmd.config.AutoFormat = cmd.AutoFormat + return cmd.start(cmd.Logger, cmd.config) } diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index 974d11161f8..f9deaffad9a 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -78,6 +78,9 @@ type Server struct { // Legacy config file parameters stored in a separate struct. Legacy ServerLegacy `yaml:",inline"` + + // Behavior flags + AutoFormat bool `yaml:"-"` } // WithCoreDumpFilter sets the core dump filter written to /proc/self/coredump_filter. diff --git a/src/control/server/server.go b/src/control/server/server.go index e4f7b5bdfa1..c8812e3b987 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -467,6 +467,14 @@ func (srv *server) start(ctx context.Context) error { }() srv.mgmtSvc.startAsyncLoops(ctx) + + if srv.cfg.AutoFormat { + srv.log.Notice("--auto flag set on server start so formatting storage now") + if _, err := srv.ctlSvc.StorageFormat(ctx, &ctlpb.StorageFormatReq{}); err != nil { + return errors.WithMessage(err, "attempting to auto format") + } + } + return errors.Wrapf(srv.harness.Start(ctx, srv.sysdb, srv.cfg), "%s harness exited", build.ControlPlaneName) } From 448bb0f5758b167adca288816ee9bfd0df4ef117 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 26 Sep 2023 15:36:00 +0100 Subject: [PATCH 58/80] DAOS-14052 test: Update cart_logtest usage in launch.py (#13005) Update log_test to not check for descriptor consistency when parsing ftest logs. As ftest debug masks change on live systems these tests aren't valid and will fail if RPCs are in flight during the change. Signed-off-by: Ashley Pittman ashley.m.pittman@intel.com> --- src/tests/ftest/cart/util/cart_logtest.py | 249 +++++++++++++--------- src/tests/ftest/launch.py | 2 +- 2 files changed, 154 insertions(+), 97 deletions(-) diff --git a/src/tests/ftest/cart/util/cart_logtest.py b/src/tests/ftest/cart/util/cart_logtest.py index 1266b63dcff..57527b3c546 100755 --- a/src/tests/ftest/cart/util/cart_logtest.py +++ b/src/tests/ftest/cart/util/cart_logtest.py @@ -226,6 +226,7 @@ def __init__(self, log_iter, quiet=False): self.fi_location = None self.skip_suffixes = [] self._tracers = [] + self.ftest_mode = False # Records on number, type and frequency of logging. self.log_locs = Counter() @@ -340,11 +341,6 @@ def _check_pid_from_log_file(self, pid, abort_on_warning, leak_wf, show_memleaks warnings_mode = False server_shutdown = False - regions = OrderedDict() - memsize = HwmCounter() - - old_regions = {} - error_files = set() have_debug = False @@ -354,10 +350,20 @@ def _check_pid_from_log_file(self, pid, abort_on_warning, leak_wf, show_memleaks cb_list = [] if not self.quiet: - cb_list.append((RpcReporting(), ('hg', 'rpc'))) + rpc_r = RpcReporting() + if self.ftest_mode: + rpc_r.dynamic_level = True + + cb_list.append((rpc_r, ('hg', 'rpc'))) for tracer in self._tracers: cb_list.append((tracer[0], tracer[1])) + if not self.ftest_mode: + mem_r = MemReporting() + mem_r.wf = leak_wf + mem_r.show_memleaks = show_memleaks + cb_list.append((mem_r, None)) + for line in self._li.new_iter(pid=pid, stateful=True): for (cbe, facs) in cb_list: if facs is None or line.fac in facs: @@ -457,8 +463,8 @@ def _check_pid_from_log_file(self, pid, abort_on_warning, leak_wf, show_memleaks err_count += 1 if line.parent not in active_desc: show_line(line, 'error', 'add with bad parent') - if line.parent in regions: - show_line(regions[line.parent], 'NORMAL', + if not self.ftest_mode and line.parent in mem_r.regions: + show_line(mem_r.regions[line.parent], 'NORMAL', 'used as parent without registering') err_count += 1 active_desc[desc] = line @@ -480,81 +486,37 @@ def _check_pid_from_log_file(self, pid, abort_on_warning, leak_wf, show_memleaks if desc in active_rpcs: del active_rpcs[desc] else: - show_line(line, 'NORMAL', 'invalid rpc remove') - err_count += 1 + if not self.ftest_mode: + show_line(line, 'NORMAL', 'invalid rpc remove') + err_count += 1 else: if have_debug and desc not in active_desc and desc not in active_rpcs: show_line(line, 'NORMAL', 'inactive desc') - if line.descriptor in regions: - show_line(regions[line.descriptor], 'NORMAL', + if not self.ftest_mode and line.descriptor in mem_r.regions: + show_line(mem_r.regions[line.descriptor], 'NORMAL', 'Used as descriptor without registering') error_files.add(line.filename) err_count += 1 - elif len(line._fields) > 2: - # is_calloc() doesn't work on truncated output so only test if - # there are more than two fields to work with. + else: non_trace_lines += 1 - if line.is_calloc(): - pointer = line.calloc_pointer() - if pointer in regions: - # Report both the old and new allocation points here. - show_line(regions[pointer], 'NORMAL', - 'new allocation seen for same pointer (old)') - show_line(line, 'NORMAL', - 'new allocation seen for same pointer (new)') - err_count += 1 - regions[pointer] = line - memsize.add(line.calloc_size()) - elif line.is_free(): - pointer = line.free_pointer() - # If a pointer is freed then automatically remove the - # descriptor - if pointer in active_desc: - del active_desc[pointer] - if pointer in regions: - memsize.subtract(regions[pointer].calloc_size()) - old_regions[pointer] = [regions[pointer], line] - del regions[pointer] - elif pointer != '(nil)': - # Logs no longer contain free(NULL) however old logs might so continue - # to handle this case. - if pointer in old_regions: - if show_line(old_regions[pointer][0], 'ERROR', - 'double-free allocation point'): - print(f'Memory address is {pointer}') - - show_line(old_regions[pointer][1], 'ERROR', '1st double-free location') - show_line(line, 'ERROR', '2nd double-free location') - else: - show_line(line, 'HIGH', 'free of unknown memory') - err_count += 1 - elif line.is_realloc(): - (new_pointer, old_pointer) = line.realloc_pointers() - (new_size, old_size) = line.realloc_sizes() - if new_pointer != '(nil)' and old_pointer != '(nil)': - if old_pointer not in regions: - show_line(line, 'HIGH', 'realloc of unknown memory') - else: - # Use calloc_size() here as the memory might not - # come from a realloc() call. - exp_sz = regions[old_pointer].calloc_size() - if old_size not in (0, exp_sz, new_size): - show_line(line, 'HIGH', 'realloc used invalid old size') - memsize.subtract(exp_sz) - regions[new_pointer] = line - memsize.add(new_size) - if old_pointer not in (new_pointer, '(nil)'): - if old_pointer in regions: - old_regions[old_pointer] = [regions[old_pointer], line] - del regions[old_pointer] - else: - show_line(line, 'NORMAL', 'realloc of unknown memory') - err_count += 1 + if len(line._fields) > 2: + if line.is_free(): + pointer = line.free_pointer() + # If a pointer is freed then automatically remove the descriptor + if pointer in active_desc: + del active_desc[pointer] + + if not self.ftest_mode: + mem_r.active_desc = active_desc del active_desc['root'] for (cbe, _) in cb_list: cbe.report() + if not self.ftest_mode: + active_desc = mem_r.active_desc + err_count += mem_r.err_count + # This isn't currently used anyway. # if not have_debug: # print('DEBUG not enabled, No log consistency checking possible') @@ -565,29 +527,9 @@ def _check_pid_from_log_file(self, pid, abort_on_warning, leak_wf, show_memleaks if not self.quiet: print("Pid {}, {} lines total, {} trace ({:.2f}%)".format( pid, total_lines, trace_lines, p_trace)) - if fi_count and memsize.count: + if fi_count and mem_r.memsize.count: print("Number of faults injected {} {:.2f}%".format( - fi_count, (fi_count / memsize.count) * 100)) - - if memsize.has_data(): - print("Memsize: {}".format(memsize)) - - lost_memory = False - if show_memleaks: - for (_, line) in list(regions.items()): - if line.is_calloc(): - pointer = line.calloc_pointer() - else: - assert line.is_realloc() - (pointer, _) = line.realloc_pointers() - if pointer in active_desc: - if show_line(line, 'NORMAL', 'descriptor not freed', custom=leak_wf): - print(f'Memory address is {pointer}') - del active_desc[pointer] - else: - if show_line(line, 'NORMAL', 'memory not freed', custom=leak_wf): - print(f'Memory address is {pointer}') - lost_memory = True + fi_count, (fi_count / mem_r.memsize.count) * 100)) if active_desc: for (_, line) in list(active_desc.items()): @@ -599,7 +541,7 @@ def _check_pid_from_log_file(self, pid, abort_on_warning, leak_wf, show_memleaks show_line(line, 'NORMAL', 'rpc not deregistered') if error_files or err_count: raise LogError() - if lost_memory: + if not self.ftest_mode and mem_r.lost_memory: raise NotAllFreed() if warnings_strict: raise WarningStrict() @@ -607,6 +549,96 @@ def _check_pid_from_log_file(self, pid, abort_on_warning, leak_wf, show_memleaks raise WarningMode() +class MemReporting(): + """Class for checking memory allocations""" + + def __init__(self): + self.memsize = HwmCounter() + self.regions = {} + self._old_regions = {} + self.err_count = 0 + self.wf = None + self.lost_memory = False + self.show_memleaks = True + self.active_desc = None + + def add_line(self, line): + """Parse an output line""" + err_count = 0 + if line.is_calloc(): + pointer = line.calloc_pointer() + if pointer in self.regions: + # Report both the old and new allocation points here. + show_line(self.regions[pointer], 'NORMAL', + 'new allocation seen for same pointer (old)') + show_line(line, 'NORMAL', 'new allocation seen for same pointer (new)') + err_count += 1 + self.regions[pointer] = line + self.memsize.add(line.calloc_size()) + elif line.is_free(): + pointer = line.free_pointer() + if pointer in self.regions: + self.memsize.subtract(self.regions[pointer].calloc_size()) + self._old_regions[pointer] = [self.regions[pointer], line] + del self.regions[pointer] + elif pointer != '(nil)': + # Logs no longer contain free(NULL) however old logs might so continue to handle + # this case. + if pointer in self._old_regions: + if show_line(self._old_regions[pointer][0], 'ERROR', + 'double-free allocation point'): + print(f'Memory address is {pointer}') + + show_line(self._old_regions[pointer][1], 'ERROR', '1st double-free location') + show_line(line, 'ERROR', '2nd double-free location') + else: + show_line(line, 'HIGH', 'free of unknown memory') + err_count += 1 + elif line.is_realloc(): + (new_pointer, old_pointer) = line.realloc_pointers() + (new_size, old_size) = line.realloc_sizes() + if new_pointer != '(nil)' and old_pointer != '(nil)': + if old_pointer not in self.regions: + show_line(line, 'HIGH', 'realloc of unknown memory') + else: + # Use calloc_size() here as the memory might not come from a realloc() call. + exp_sz = self.regions[old_pointer].calloc_size() + if old_size not in (0, exp_sz, new_size): + show_line(line, 'HIGH', 'realloc used invalid old size') + self.memsize.subtract(exp_sz) + self.regions[new_pointer] = line + self.memsize.add(new_size) + if old_pointer not in (new_pointer, '(nil)'): + if old_pointer in self.regions: + self._old_regions[old_pointer] = [self.regions[old_pointer], line] + del self.regions[old_pointer] + else: + show_line(line, 'NORMAL', 'realloc of unknown memory') + err_count += 1 + self.err_count += err_count + + def report(self): + """Report the results""" + if self.memsize.has_data(): + print("Memsize: {}".format(self.memsize)) + + if self.show_memleaks: + for (_, line) in list(self.regions.items()): + if line.is_calloc(): + pointer = line.calloc_pointer() + else: + assert line.is_realloc() + (pointer, _) = line.realloc_pointers() + if pointer in self.active_desc: + if show_line(line, 'NORMAL', 'descriptor not freed', custom=self.wf): + print(f'Memory address is {pointer}') + del self.active_desc[pointer] + else: + if show_line(line, 'NORMAL', 'memory not freed', custom=self.wf): + print(f'Memory address is {pointer}') + self.lost_memory = True + + class RpcReporting(): """Class for reporting a summary of RPC states""" @@ -625,6 +657,7 @@ def __init__(self): self._c_states = {} self._c_state_names = set() self._current_opcodes = {} + self.dynamic_level = False def add_line(self, line): """Parse a output line""" @@ -666,9 +699,18 @@ def add_line(self, line): if rpc_state == 'ALLOCATED': self._current_opcodes[rpc] = opcode else: - opcode = self._current_opcodes[rpc] + try: + opcode = self._current_opcodes[rpc] + except KeyError: + if not self.dynamic_level: + raise + opcode = 'unknown' if rpc_state == 'DEALLOCATED': - del self._current_opcodes[rpc] + try: + del self._current_opcodes[rpc] + except KeyError: + if not self.dynamic_level: + raise if opcode not in self._op_state_counters: self._op_state_counters[opcode] = {'ALLOCATED': 0, @@ -733,6 +775,7 @@ def run(): parser = argparse.ArgumentParser() parser.add_argument('--dfuse', help='Summarise dfuse I/O', action='store_true') parser.add_argument('--warnings', action='store_true') + parser.add_argument('--ftest-mode', action='store_true') parser.add_argument('file', help='input file') args = parser.parse_args() try: @@ -747,7 +790,21 @@ def run(): # the encoding on, in which case this second attempt would fail with # an out-of-memory error. log_iter = cart_logparse.LogIter(args.file, check_encoding=True) + + # ftest mode is called from launch.py for logs after functional testing. + # It logs everything to a output file, and does not perform memory leak or double-free checks. + if args.ftest_mode: + in_file = args.file + if in_file.endswith('.bz2'): + in_file = args.file[:-4] + out_fd = open(f'{in_file}.cart_logtest', 'w') # pylint: disable=consider-using-with + real_stdout = sys.stdout + sys.stdout = out_fd + print(f'Logging to {in_file}.cart_logtest', file=real_stdout) + test_iter = LogTest(log_iter) + if args.ftest_mode: + test_iter.ftest_mode = True if args.dfuse: test_iter.check_dfuse_io() else: diff --git a/src/tests/ftest/launch.py b/src/tests/ftest/launch.py index 03da8e3718b..1b944aef468 100755 --- a/src/tests/ftest/launch.py +++ b/src/tests/ftest/launch.py @@ -2761,7 +2761,7 @@ def _cart_log_test(self, hosts, source, pattern, depth): logger.debug("-" * 80) logger.debug("Running %s on %s files on %s", cart_logtest, source_files, hosts) other = ["-print0", "|", "xargs", "-0", "-r0", "-n1", "-I", "%", "sh", "-c", - f"'{cart_logtest} % > %.cart_logtest 2>&1'"] + f"'{cart_logtest} --ftest-mode %'"] result = run_remote( logger, hosts, find_command(source, pattern, depth, other), timeout=4800) if not result.passed: From 0a3805d0ed355c933e1b22fd30528bf660679984 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Wed, 27 Sep 2023 09:03:36 -0700 Subject: [PATCH 59/80] DAOS-9653 control:Improve pool creation error message (#13056) Updated the error message in case of wrong value or none is provided. Code is trying to pass the command line value to ParseFloat(). If it's a string it returns ErrSyntax like "strconv.ParseFloat: parsing "": invalid syntax". Which is little bit more detailed coding error. So instead returning the ErrSyntax, it's wrapped with descriptive message. Signed-off-by: Samir Raval --- src/control/cmd/daos/flags.go | 2 +- src/control/cmd/daos/flags_test.go | 2 +- src/control/cmd/dmg/pool.go | 4 ++-- src/control/cmd/dmg/pool_test.go | 4 ++++ 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/control/cmd/daos/flags.go b/src/control/cmd/daos/flags.go index aa1018ed5a0..43975840719 100644 --- a/src/control/cmd/daos/flags.go +++ b/src/control/cmd/daos/flags.go @@ -99,7 +99,7 @@ func (f *ChunkSizeFlag) UnmarshalFlag(fv string) error { size, err := humanize.ParseBytes(fv) if err != nil { - return err + return errors.Errorf("invalid chunk-size %q", fv) } f.Size = C.uint64_t(size) diff --git a/src/control/cmd/daos/flags_test.go b/src/control/cmd/daos/flags_test.go index 51eb197becd..56fd5e99e02 100644 --- a/src/control/cmd/daos/flags_test.go +++ b/src/control/cmd/daos/flags_test.go @@ -227,7 +227,7 @@ func TestFlags_ChunkSizeFlag(t *testing.T) { }, "not a size": { arg: "snausages", - expErr: errors.New("ParseFloat"), + expErr: errors.New("invalid chunk-size \"snausages\""), }, // TODO: More validation of allowed sizes? } { diff --git a/src/control/cmd/dmg/pool.go b/src/control/cmd/dmg/pool.go index df6936c8a9f..1734de168dd 100644 --- a/src/control/cmd/dmg/pool.go +++ b/src/control/cmd/dmg/pool.go @@ -92,7 +92,7 @@ func (trf *tierRatioFlag) UnmarshalFlag(fv string) error { for _, trStr := range strings.Split(fv, ",") { tr, err := strconv.ParseFloat(strings.TrimSpace(strings.Trim(trStr, "%")), 64) if err != nil { - return errors.Wrapf(err, "invalid tier ratio %s", trStr) + return errors.Errorf("invalid tier ratio %q", trStr) } trf.ratios = append(trf.ratios, roundFloatTo(tr, 2)/100) } @@ -137,7 +137,7 @@ func (sf *sizeFlag) UnmarshalFlag(fv string) (err error) { sf.bytes, err = humanize.ParseBytes(fv) if err != nil { - return errors.Wrapf(err, "invalid size %q", fv) + return errors.Errorf("invalid size %q", fv) } return nil diff --git a/src/control/cmd/dmg/pool_test.go b/src/control/cmd/dmg/pool_test.go index 0c53669aa10..ae49324f214 100644 --- a/src/control/cmd/dmg/pool_test.go +++ b/src/control/cmd/dmg/pool_test.go @@ -42,6 +42,10 @@ func Test_Dmg_PoolTierRatioFlag(t *testing.T) { "empty": { expErr: errors.New("no tier ratio specified"), }, + "invalid": { + input: "ABCD", + expErr: errors.New("invalid tier ratio \"ABCD\""), + }, "less than 100%": { input: "10,80", expErr: errors.New("must add up to"), From abcafb8b668adcdf6028c55f3efaa8669ad19df1 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Wed, 27 Sep 2023 11:22:17 -0700 Subject: [PATCH 60/80] DAOS-13759 control: Update support collect-log tool. (#12906) - Added sysctl output to the log - Added list of rpms on servers - Added system Environment (printenv) - Add timestamp for log folder and zip archive - Added final summary in case any failure, It will print the failed command and host name with error failure. - Updated permission of the log folder, to avoid the log access by other users. Signed-off-by: Samir Raval --- src/control/cmd/daos_agent/support.go | 5 +- src/control/cmd/daos_server/support.go | 4 +- src/control/cmd/dmg/pretty/printers.go | 41 +++++++- src/control/cmd/dmg/pretty/printers_test.go | 98 ++++++++++++++++++- src/control/cmd/dmg/support.go | 28 +++--- src/control/common/proto/mgmt/pool.pb.go | 2 +- src/control/lib/control/pool.go | 2 +- src/control/lib/support/log.go | 7 +- src/mgmt/pool.pb-c.h | 2 +- src/proto/mgmt/pool.proto | 2 +- .../daos_server_support_collect_log.py | 1 + .../ftest/control/dmg_support_collect_log.py | 1 + src/tests/ftest/util/server_utils.py | 2 +- src/tests/ftest/util/support_test_base.py | 23 +++-- 14 files changed, 187 insertions(+), 31 deletions(-) diff --git a/src/control/cmd/daos_agent/support.go b/src/control/cmd/daos_agent/support.go index ae9e0d29443..c25e3734454 100644 --- a/src/control/cmd/daos_agent/support.go +++ b/src/control/cmd/daos_agent/support.go @@ -10,6 +10,7 @@ import ( "fmt" "os" "path/filepath" + "time" "github.com/daos-stack/daos/src/control/common/cmdutil" "github.com/daos-stack/daos/src/control/lib/support" @@ -54,8 +55,10 @@ func (cmd *collectLogCmd) Execute(_ []string) error { } if cmd.TargetFolder == "" { - cmd.TargetFolder = filepath.Join(os.TempDir(), "daos_support_client_logs") + folderName := fmt.Sprintf("daos_support_client_logs_%s", time.Now().Format(time.RFC3339)) + cmd.TargetFolder = filepath.Join(os.TempDir(), folderName) } + cmd.Infof("Support Logs will be copied to %s", cmd.TargetFolder) progress.Steps = 100 / progress.Total diff --git a/src/control/cmd/daos_server/support.go b/src/control/cmd/daos_server/support.go index 49d42f3bd46..9e52e2cb167 100644 --- a/src/control/cmd/daos_server/support.go +++ b/src/control/cmd/daos_server/support.go @@ -10,6 +10,7 @@ import ( "fmt" "os" "path/filepath" + "time" "github.com/daos-stack/daos/src/control/common/cmdutil" "github.com/daos-stack/daos/src/control/lib/support" @@ -52,7 +53,8 @@ func (cmd *collectLogCmd) Execute(_ []string) error { } if cmd.TargetFolder == "" { - cmd.TargetFolder = filepath.Join(os.TempDir(), "daos_support_server_logs") + folderName := fmt.Sprintf("daos_support_server_logs_%s", time.Now().Format(time.RFC3339)) + cmd.TargetFolder = filepath.Join(os.TempDir(), folderName) } cmd.Infof("Support logs will be copied to %s", cmd.TargetFolder) diff --git a/src/control/cmd/dmg/pretty/printers.go b/src/control/cmd/dmg/pretty/printers.go index c0b9d33cb89..997dee952f4 100644 --- a/src/control/cmd/dmg/pretty/printers.go +++ b/src/control/cmd/dmg/pretty/printers.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -14,6 +14,7 @@ import ( "github.com/pkg/errors" + "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/fault" "github.com/daos-stack/daos/src/control/lib/control" "github.com/daos-stack/daos/src/control/lib/txtfmt" @@ -155,3 +156,41 @@ func PrintResponseErrors(resp hostErrorsGetter, out io.Writer, opts ...PrintConf return nil } + +// PrintErrorsSummary generates a human-readable representation of the supplied +// HostErrorsMap summary struct and writes it to the supplied io.Writer. +func UpdateErrorSummary(resp hostErrorsGetter, cmd string, out io.Writer, opts ...PrintConfigOption) error { + if common.InterfaceIsNil(resp) { + return errors.Errorf("nil %T", resp) + } + + if len(resp.GetHostErrors()) > 0 { + setTitle := "Hosts" + cmdTitle := "Command" + errTitle := "Error" + + tablePrint := txtfmt.NewTableFormatter(setTitle, cmdTitle, errTitle) + tablePrint.InitWriter(out) + table := []txtfmt.TableRow{} + + for _, errStr := range resp.GetHostErrors().Keys() { + errHosts := getPrintHosts(resp.GetHostErrors()[errStr].HostSet.RangedString(), opts...) + row := txtfmt.TableRow{setTitle: errHosts} + + // Unpack the root cause error. If it's a fault, + // just print the description. + hostErr := errors.Cause(resp.GetHostErrors()[errStr].HostError) + row[cmdTitle] = cmd + row[errTitle] = hostErr.Error() + if f, ok := hostErr.(*fault.Fault); ok { + row[errTitle] = f.Description + } + + table = append(table, row) + } + + tablePrint.Format(table) + } + + return nil +} diff --git a/src/control/cmd/dmg/pretty/printers_test.go b/src/control/cmd/dmg/pretty/printers_test.go index 22cf9c1f302..c39abf5693a 100644 --- a/src/control/cmd/dmg/pretty/printers_test.go +++ b/src/control/cmd/dmg/pretty/printers_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -13,6 +13,7 @@ import ( "github.com/google/go-cmp/cmp" + "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/lib/control" ) @@ -107,3 +108,98 @@ host1:1,host2:2 whoops }) } } + +func TestControl_UpdateErrorSummary(t *testing.T) { + for name, tc := range map[string]struct { + resp *control.CollectLogResp + cmd string + expStdout string + expErr error + }{ + "nil response": { + resp: nil, + cmd: "empty", + expStdout: ``, + expErr: errors.New("nil *control.CollectLogResp"), + }, + "empty response": { + resp: new(control.CollectLogResp), + cmd: "empty", + expStdout: ``, + expErr: nil, + }, + "one host error": { + cmd: "hostname", + resp: &control.CollectLogResp{ + HostErrorsResp: control.MockHostErrorsResp(t, + &control.MockHostError{ + Hosts: "host1", + Error: "command not found", + }), + }, + expStdout: ` +Hosts Command Error +----- ------- ----- +host1 hostname command not found +`, + expErr: nil, + }, + "Two host, same error": { + cmd: "hostname", + resp: &control.CollectLogResp{ + HostErrorsResp: control.MockHostErrorsResp(t, + &control.MockHostError{ + Hosts: "host1", + Error: "command not found", + }, + &control.MockHostError{ + Hosts: "host2", + Error: "command not found", + }), + }, + expStdout: ` +Hosts Command Error +----- ------- ----- +host[1-2] hostname command not found +`, + expErr: nil, + }, + "Two host, different error": { + cmd: "hostname", + resp: &control.CollectLogResp{ + HostErrorsResp: control.MockHostErrorsResp(t, + &control.MockHostError{ + Hosts: "host1", + Error: "command not found", + }, + &control.MockHostError{ + Hosts: "host2", + Error: "command not available", + }), + }, + expStdout: ` +Hosts Command Error +----- ------- ----- +host1 hostname command not found +host2 hostname command not available +`, + expErr: nil, + }, + } { + t.Run(name, func(t *testing.T) { + var out strings.Builder + + err := UpdateErrorSummary(tc.resp, tc.cmd, &out) + + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + if diff := cmp.Diff(strings.TrimLeft(tc.expStdout, "\n"), out.String()); diff != "" { + t.Fatalf("unexpected print output (-want, +got):\n%s\n", diff) + } + + }) + } +} diff --git a/src/control/cmd/dmg/support.go b/src/control/cmd/dmg/support.go index 11bf13303b2..c18aa28966a 100644 --- a/src/control/cmd/dmg/support.go +++ b/src/control/cmd/dmg/support.go @@ -12,6 +12,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/daos-stack/daos/src/control/cmd/dmg/pretty" "github.com/daos-stack/daos/src/control/common/cmdutil" @@ -32,6 +33,7 @@ type collectLogCmd struct { hostListCmd cmdutil.JSONOutputCmd support.CollectLogSubCmd + bld strings.Builder } // gRPC call to initiate the rsync and copy the logs to Admin (central location). @@ -52,11 +54,9 @@ func (cmd *collectLogCmd) rsyncLog() error { return err } if len(resp.GetHostErrors()) > 0 { - var bld strings.Builder - if err := pretty.PrintResponseErrors(resp, &bld); err != nil { + if err := pretty.UpdateErrorSummary(resp, "rsync", &cmd.bld); err != nil { return err } - cmd.Info(bld.String()) return resp.Errors() } @@ -81,11 +81,9 @@ func (cmd *collectLogCmd) archLogsOnServer() error { return err } if len(resp.GetHostErrors()) > 0 { - var bld strings.Builder - if err := pretty.PrintResponseErrors(resp, &bld); err != nil { + if err := pretty.UpdateErrorSummary(resp, "archive", &cmd.bld); err != nil { return err } - cmd.Info(bld.String()) return resp.Errors() } @@ -127,8 +125,10 @@ func (cmd *collectLogCmd) Execute(_ []string) error { progress.Steps = 100 / progress.Total // Default TargetFolder location where logs will be copied. + // Included Date and time stamp to the log folder. if cmd.TargetFolder == "" { - cmd.TargetFolder = filepath.Join(os.TempDir(), "daos_support_server_logs") + folderName := fmt.Sprintf("daos_support_server_logs_%s", time.Now().Format(time.RFC3339)) + cmd.TargetFolder = filepath.Join(os.TempDir(), folderName) } cmd.Infof("Support logs will be copied to %s", cmd.TargetFolder) if err := os.Mkdir(cmd.TargetFolder, 0700); err != nil && !os.IsExist(err) { @@ -166,11 +166,10 @@ func (cmd *collectLogCmd) Execute(_ []string) error { return err } if len(resp.GetHostErrors()) > 0 { - var bld strings.Builder - if err := pretty.PrintResponseErrors(resp, &bld); err != nil { + if err := pretty.UpdateErrorSummary(resp, logCmd, &cmd.bld); err != nil { return err } - cmd.Info(bld.String()) + if cmd.Stop { return resp.Errors() } @@ -193,7 +192,6 @@ func (cmd *collectLogCmd) Execute(_ []string) error { err := support.CollectSupportLog(cmd.Logger, params) if err != nil { - fmt.Println(err) if cmd.Stop { return err } @@ -235,5 +233,13 @@ func (cmd *collectLogCmd) Execute(_ []string) error { return cmd.OutputJSON(nil, err) } + // Print the support command summary. + if len(cmd.bld.String()) == 0 { + fmt.Println("Summary : All Commands Successfully Executed") + } else { + fmt.Println("Summary :") + cmd.Info(cmd.bld.String()) + } + return nil } diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index 60ae78baa40..ddfd6d76f96 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -78,7 +78,7 @@ const ( PoolServiceState_Creating PoolServiceState = 0 // pool service is being created PoolServiceState_Ready PoolServiceState = 1 // pool service is ready to be used PoolServiceState_Destroying PoolServiceState = 2 // pool service is being destroyed - PoolServiceState_Degraded PoolServiceState = 3 // pool service is being Degraded + PoolServiceState_Degraded PoolServiceState = 3 // pool service is degraded PoolServiceState_Unknown PoolServiceState = 4 // pool service is Unknown state ) diff --git a/src/control/lib/control/pool.go b/src/control/lib/control/pool.go index 4982a9edc61..d4fe1a2d077 100644 --- a/src/control/lib/control/pool.go +++ b/src/control/lib/control/pool.go @@ -626,7 +626,7 @@ func PoolQuery(ctx context.Context, rpcClient UnaryInvoker, req *PoolQueryReq) ( return pqr, err } -// Update the pool state +// UpdateState update the pool state. func (pqr *PoolQueryResp) UpdateState() error { // Update the state as Ready if DAOS return code is 0. if pqr.Status == 0 { diff --git a/src/control/lib/support/log.go b/src/control/lib/support/log.go index e8d10dbd94e..fa21077dab3 100644 --- a/src/control/lib/support/log.go +++ b/src/control/lib/support/log.go @@ -94,6 +94,9 @@ var SystemCmd = []string{ "ps axf", "top -bcn1 -w512", "lspci -D", + "sysctl -a", + "printenv", + "rpm -qa --qf '(%{INSTALLTIME:date}): %{NAME}-%{VERSION}\n'", } var ServerLog = []string{ @@ -239,7 +242,7 @@ func ArchiveLogs(log logging.Logger, opts ...CollectLogsParams) error { // write to the the .tar.gz tarFileName := fmt.Sprintf("%s.tar.gz", opts[0].TargetFolder) log.Debugf("Archiving the log folder %s", tarFileName) - fileToWrite, err := os.OpenFile(tarFileName, os.O_CREATE|os.O_RDWR, os.FileMode(0755)) + fileToWrite, err := os.OpenFile(tarFileName, os.O_CREATE|os.O_RDWR, os.FileMode(0600)) if err != nil { return err } @@ -269,7 +272,7 @@ func createFolder(target string, log logging.Logger) error { if _, err := os.Stat(target); err != nil { log.Debugf("Log folder is not Exists, so creating %s", target) - if err := os.MkdirAll(target, 0777); err != nil { + if err := os.MkdirAll(target, 0700); err != nil { return err } } diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index 9357267326f..a8c0a5efadc 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -127,7 +127,7 @@ typedef enum _Mgmt__PoolServiceState { */ MGMT__POOL_SERVICE_STATE__Destroying = 2, /* - * pool service is being Degraded + * pool service is degraded */ MGMT__POOL_SERVICE_STATE__Degraded = 3, /* diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index 51b55b1254f..41533d2fdd1 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -212,7 +212,7 @@ enum PoolServiceState { Creating = 0; // pool service is being created Ready = 1; // pool service is ready to be used Destroying = 2; // pool service is being destroyed - Degraded = 3 ; // pool service is being Degraded + Degraded = 3 ; // pool service is degraded Unknown = 4 ; // pool service is Unknown state } diff --git a/src/tests/ftest/control/daos_server_support_collect_log.py b/src/tests/ftest/control/daos_server_support_collect_log.py index 542b8220df1..6858cb92b12 100644 --- a/src/tests/ftest/control/daos_server_support_collect_log.py +++ b/src/tests/ftest/control/daos_server_support_collect_log.py @@ -25,6 +25,7 @@ def test_daos_server_support_collect_log(self): :avocado: tags=DaosSupportCollectLogTest,test_daos_server_support_collect_log """ self.log_hosts = self.hostlist_servers + self.run_user = 'daos_server' # Create the custom log data which will be collected via support collect-log, # Later verify the data file is archived as part of collection. self.create_custom_log("Server_Support_Logs") diff --git a/src/tests/ftest/control/dmg_support_collect_log.py b/src/tests/ftest/control/dmg_support_collect_log.py index 8d95da510f8..24d537329cb 100644 --- a/src/tests/ftest/control/dmg_support_collect_log.py +++ b/src/tests/ftest/control/dmg_support_collect_log.py @@ -25,6 +25,7 @@ def test_dmg_support_collect_log(self): :avocado: tags=DmgSupportCollectLogTest,test_dmg_support_collect_log """ self.log_hosts = self.hostlist_servers + self.run_user = 'daos_server' # Create the custom log data which will be collected via support collect-log, # Later verify the dame data file is archived as part of collection. self.create_custom_log("Support_Custom_Dir") diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 87716bb0465..05ff15a1ac3 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -466,7 +466,7 @@ def support_collect_log(self, **kwargs): """ cmd = DaosServerCommand(self.manager.job.command_path) - cmd.sudo = False + cmd.run_user = "daos_server" cmd.debug.value = False cmd.config.value = get_default_config_file("server") self.log.info("Support collect-log on servers: %s", str(cmd)) diff --git a/src/tests/ftest/util/support_test_base.py b/src/tests/ftest/util/support_test_base.py index 27676f36a95..d2d8f8e2dd7 100644 --- a/src/tests/ftest/util/support_test_base.py +++ b/src/tests/ftest/util/support_test_base.py @@ -7,7 +7,7 @@ from datetime import datetime from control_test_base import ControlTestBase -from run_utils import run_remote +from run_utils import run_remote, command_as_user class SupportTestBase(ControlTestBase): @@ -21,6 +21,7 @@ def __init__(self, *args, **kwargs): self.custom_log_file = None self.custom_log_data = None self.log_hosts = None + self.run_user = 'root' self.extract_dir = os.path.join(self.base_test_dir, "extracted_support_logs") def create_custom_log(self, folder_name): @@ -54,10 +55,12 @@ def verify_custom_log_data(self): """Verify custom log files is collected and part of archive. """ - read_filedata = "find {} -name {} | xargs cat".format( + getfilename = "find {} -name {}".format( self.extract_dir, os.path.basename(self.custom_log_file)) + findcmd = command_as_user(getfilename, self.run_user) + readfiledata = command_as_user("xargs cat", self.run_user) - result = run_remote(self.log, self.log_hosts, read_filedata) + result = run_remote(self.log, self.log_hosts, findcmd + "|" + readfiledata) if not result.passed: self.fail("Failed to read the custom log file {} ".format(result)) @@ -77,14 +80,15 @@ def extract_logs(self, tar_gz_filename): """ # Create the new extract directory - cmd = "mkdir -p {}".format(self.extract_dir) - result = run_remote(self.log, self.log_hosts, cmd) + command = command_as_user("mkdir -p {}".format(self.extract_dir), self.run_user) + result = run_remote(self.log, self.log_hosts, command) if not result.passed: - self.fail("cmd {} failed, result:{}".format(cmd, result)) + self.fail("cmd {} failed, result:{}".format(command, result)) # Extract The tar.gz file to newly created directory - cmd = "tar -xf {} -C {}".format(tar_gz_filename, self.extract_dir) - result = run_remote(self.log, self.log_hosts, cmd) + command = command_as_user("tar -xf {} -C {}".format(tar_gz_filename, self.extract_dir), + self.run_user) + result = run_remote(self.log, self.log_hosts, command) if not result.passed: self.fail("Failed to extract the {} file, result:{}".format(tar_gz_filename, result)) @@ -106,7 +110,8 @@ def validate_server_log_files(self): # Verify server log files are collected. for log_file in log_files: list_file = "ls -lsaRt {} | grep {}".format(self.extract_dir, log_file) - result = run_remote(self.log, self.log_hosts, list_file) + command = command_as_user(list_file, self.run_user) + result = run_remote(self.log, self.log_hosts, command) if not result.passed: self.fail("Failed to list the {} file from extracted folder{}".format( result, self.extract_dir)) From 65f0b9fe2acd397a27513969c74204c3dea00acc Mon Sep 17 00:00:00 2001 From: Colin Howes <16161867+chowes@users.noreply.github.com> Date: Thu, 28 Sep 2023 01:12:39 -0700 Subject: [PATCH 61/80] DAOSGCP-213 dfuse: Ignore ENOENT errors on file descriptors (#12950) libfuse supports opening /dev/fuse and passing the file descriptor as the mountpoint. In some cases, realpath may not work for these file descriptors, and so we should ignore ENOENT errors and instead check that we can get file descriptor attributes from the given path. Signed-off-by: Colin Howes --- src/client/dfuse/dfuse_main.c | 45 +++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/client/dfuse/dfuse_main.c b/src/client/dfuse/dfuse_main.c index 27a0e0be092..26f3cf82d09 100644 --- a/src/client/dfuse/dfuse_main.c +++ b/src/client/dfuse/dfuse_main.c @@ -7,8 +7,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -348,6 +350,35 @@ show_help(char *name) name, DAOS_VERSION); } +/* + * Checks whether a mountpoint path is a valid file descriptor. + * + * Returns the file descriptor on success, -1 on failure. + */ +static int +check_fd_mountpoint(const char *mountpoint) +{ + int fd = -1; + int len = 0; + int fd_flags; + int res; + + res = sscanf(mountpoint, "/dev/fd/%u%n", &fd, &len); + if (res != 1) { + return -1; + } + if (len != strnlen(mountpoint, NAME_MAX)) { + return -1; + } + + fd_flags = fcntl(fd, F_GETFD); + if (fd_flags == -1) { + return -1; + } + + return fd; +} + int main(int argc, char **argv) { @@ -627,8 +658,18 @@ main(int argc, char **argv) duns_destroy_attr(&duns_attr); } else if (rc == ENOENT) { - printf("Mount point does not exist\n"); - D_GOTO(out_daos, rc = daos_errno2der(rc)); + /* In order to allow FUSE daemons to run without privileges, libfuse + * allows the caller to open /dev/fuse and pass the file descriptor by + * specifying /dev/fd/N as the mountpoint. In some cases, realpath may + * fail for these paths. + */ + int fd = check_fd_mountpoint(dfuse_info->di_mountpoint); + if (fd < 0) { + DFUSE_TRA_WARNING(dfuse_info, "Mount point is not a valid file descriptor"); + printf("Mount point does not exist\n"); + D_GOTO(out_daos, rc = daos_errno2der(rc)); + } + DFUSE_LOG_INFO("Mounting FUSE file descriptor %d", fd); } else if (rc == ENOTCONN) { printf("Stale mount point, run fusermount3 and retry\n"); D_GOTO(out_daos, rc = daos_errno2der(rc)); From 88807c372542bc2a0f6cd7325a162b0612c3aab5 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 28 Sep 2023 08:07:07 -0500 Subject: [PATCH 62/80] DAOS-623 dfs: make the dfs pipeline api public with caveat (#13080) - Similar to the daos_pipeline API, make the DFS filter API public with a comment in the header that it should not be used in production. - Free bulk handles in pipeline API to avoid leaks. Signed-off-by: Mohamad Chaarawi --- src/client/dfs/dfs_internal.h | 91 -------------------------------- src/include/daos_fs.h | 98 +++++++++++++++++++++++++++++++++++ src/pipeline/cli_pipeline.c | 10 ++++ 3 files changed, 108 insertions(+), 91 deletions(-) diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index 83ac13aeaab..c337ec1bf42 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -131,97 +131,6 @@ dfs_relink_root(daos_handle_t coh); int dfs_ostatx(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, daos_event_t *ev); -/** Internal pipeline readdir functionality */ - -/** DFS pipeline object */ -typedef struct dfs_pipeline dfs_pipeline_t; - -enum { - DFS_FILTER_NAME = (1 << 1), - DFS_FILTER_NEWER = (1 << 2), - DFS_FILTER_INCLUDE_DIRS = (1 << 3), -}; - -/** Predicate conditions for filter */ -typedef struct { - char dp_name[DFS_MAX_NAME]; /** name condition for entry - regex */ - time_t dp_newer; /** timestamp for newer condition */ - size_t dp_size; /** size of files - not supported for now */ -} dfs_predicate_t; - -/** - * Same as dfs_get_size() but using the OID of the file instead of the open handle. Note that the - * chunk_size of the file is also required to be passed if the file was created with a different - * chunk size than the default (passing other than 0 to dfs_open). Otherwise, 0 should be passed to - * chunk size. - * - * \param[in] dfs Pointer to the mounted file system. - * \param[in] oid Object ID of the file. - * \param[in] chunk_size Chunk size of the file (pass 0 if it was created with default). - * \param[out] size Returned size of the file. - * - * \return 0 on success, errno code on failure. - */ -int -dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size); - -/** - * Create a pipeline object to be used during readdir with filter. Should be destroyed with - * dfs_pipeline_destroy(). - * - * \param[in] dfs Pointer to the mounted file system. - * \param[in] pred Predicate condition values (name/regex, newer timestamp, etc.). - * \param[in] flags Pipeline flags (conditions to apply). - * \param[out] dpipe Pipeline object created. - * - * \return 0 on success, errno code on failure. - */ -int -dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **dpipe); - -/** - * Destroy pipeline object. - * - * \param[in] dpipe Pipeline object. - * - * \return 0 on success, errno code on failure. - */ -int -dfs_pipeline_destroy(dfs_pipeline_t *dpipe); - -/** - * Same as dfs_readdir() but this additionally applies a filter created with dfs_pipeline_create() - * on the entries that are enumerated. This function also optionally returns the object ID of each - * dirent if requested through a pre-allocated OID input array. - * - * \param[in] dfs Pointer to the mounted file system. - * \param[in] obj Opened directory object. - * \param[in] dpipe DFS pipeline filter. - * \param[in,out] - * anchor Hash anchor for the next call, it should be set to - * zeroes for the first call, it should not be changed - * by caller between calls. - * \param[in,out] - * nr [in]: number of dirents allocated in \a dirs. - * [out]: number of returned dirents. - * \param[in,out] - * dirs [in] preallocated array of dirents. - * [out]: dirents returned with d_name filled only. - * \param[in,out] - * oids [in] Optional preallocated array of object IDs. - * [out]: Object ID associated with each dirent that was read. - * \param[in,out] - * csizes [in] Optional preallocated array of sizes. - * [out]: chunk size associated with each dirent that was read. - * \param[out] Total number of entries scanned by readdir before returning. - * - * \return 0 on success, errno code on failure. - */ -int -dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, - uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csizes, - uint64_t *nr_scanned); - #if defined(__cplusplus) } #endif diff --git a/src/include/daos_fs.h b/src/include/daos_fs.h index ab8568a2974..95433bf966c 100644 --- a/src/include/daos_fs.h +++ b/src/include/daos_fs.h @@ -1167,6 +1167,104 @@ enum { int dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char *name); +/* + * The Pipeline DFS API (everything under this comment) is under heavy development and should not be + * used in production. The API is subject to change. + */ + +/** DFS pipeline object */ +typedef struct dfs_pipeline dfs_pipeline_t; + +enum { + DFS_FILTER_NAME = (1 << 1), + DFS_FILTER_NEWER = (1 << 2), + DFS_FILTER_INCLUDE_DIRS = (1 << 3), +}; + +/** Predicate conditions for filter */ +typedef struct { + /** name condition for entry - regex */ + char dp_name[DFS_MAX_NAME]; + /** timestamp for newer condition */ + time_t dp_newer; + /** size of files - not supported for now */ + size_t dp_size; +} dfs_predicate_t; + +/** + * Same as dfs_get_size() but using the OID of the file instead of the open handle. Note that the + * chunk_size of the file is also required to be passed if the file was created with a different + * chunk size than the default (passing other than 0 to dfs_open). Otherwise, 0 should be passed to + * chunk size. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] oid Object ID of the file. + * \param[in] chunk_size Chunk size of the file (pass 0 if it was created with default). + * \param[out] size Returned size of the file. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size); + +/** + * Create a pipeline object to be used during readdir with filter. Should be destroyed with + * dfs_pipeline_destroy(). + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] pred Predicate condition values (name/regex, newer timestamp, etc.). + * \param[in] flags Pipeline flags (conditions to apply). + * \param[out] dpipe Pipeline object created. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **dpipe); + +/** + * Destroy pipeline object. + * + * \param[in] dpipe Pipeline object. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_destroy(dfs_pipeline_t *dpipe); + +/** + * Same as dfs_readdir() but this additionally applies a filter created with dfs_pipeline_create() + * on the entries that are enumerated. This function also optionally returns the object ID of each + * dirent if requested through a pre-allocated OID input array. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] obj Opened directory object. + * \param[in] dpipe DFS pipeline filter. + * \param[in,out] + * anchor Hash anchor for the next call, it should be set to + * zeroes for the first call, it should not be changed + * by caller between calls. + * \param[in,out] + * nr [in]: number of dirents allocated in \a dirs. + * [out]: number of returned dirents. + * \param[in,out] + * dirs [in] preallocated array of dirents. + * [out]: dirents returned with d_name filled only. + * \param[in,out] + * oids [in] Optional preallocated array of object IDs. + * [out]: Object ID associated with each dirent that was read. + * \param[in,out] + * csizes [in] Optional preallocated array of sizes. + * [out]: chunk size associated with each dirent that was read. + * \param[out] nr_scanned + * Total number of entries scanned by readdir before returning. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, + uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csizes, + uint64_t *nr_scanned); + #if defined(__cplusplus) } #endif /* __cplusplus */ diff --git a/src/pipeline/cli_pipeline.c b/src/pipeline/cli_pipeline.c index c008aa01f4c..b30d8366b79 100644 --- a/src/pipeline/cli_pipeline.c +++ b/src/pipeline/cli_pipeline.c @@ -119,6 +119,7 @@ pipeline_shard_run_cb(tse_task_t *task, void *data) struct pipeline_run_cb_args *cb_args; daos_pipeline_run_t *api_args; struct pipeline_run_out *pro; /** received data from srv */ + struct pipeline_run_in *pri; int opc; int ret = task->dt_result; int rc = 0; @@ -132,6 +133,7 @@ pipeline_shard_run_cb(tse_task_t *task, void *data) api_args = cb_args->api_args; rpc = cb_args->rpc; opc = opc_get(rpc->cr_opc); + pri = (struct pipeline_run_in *)crt_req_get(rpc); if (ret != 0) { D_ERROR("RPC %d failed, " DF_RC "\n", opc, DP_RC(ret)); @@ -247,6 +249,14 @@ pipeline_shard_run_cb(tse_task_t *task, void *data) *api_args->anchor = pro->pro_anchor; out: + if (pri->pri_kds_bulk) + crt_bulk_free(pri->pri_kds_bulk); + if (pri->pri_iods_bulk) + crt_bulk_free(pri->pri_iods_bulk); + if (pri->pri_sgl_keys_bulk) + crt_bulk_free(pri->pri_sgl_keys_bulk); + if (pri->pri_sgl_recx_bulk) + crt_bulk_free(pri->pri_sgl_recx_bulk); crt_req_decref(rpc); tse_task_list_del(task); tse_task_decref(task); From 9961d263fc0f2c8fda8d3bd64ed0e49f0a6481a2 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 28 Sep 2023 16:44:59 +0100 Subject: [PATCH 63/80] Revert "DAOS-14225 control: Prevent duplicate call to SetRank (#13058)" (#13104) This reverts commit 281b4fad37a339ddfe37e7d25883d900f4360867. --- src/control/common/proto/logging.go | 2 +- src/control/common/proto/mgmt/svc.pb.go | 169 +++++++++++++----------- src/control/server/instance.go | 25 ++-- src/control/server/mgmt_system.go | 16 +++ src/control/server/mgmt_system_test.go | 2 + src/mgmt/svc.pb-c.c | 26 +++- src/mgmt/svc.pb-c.h | 6 +- src/proto/mgmt/svc.proto | 3 +- 8 files changed, 146 insertions(+), 103 deletions(-) diff --git a/src/control/common/proto/logging.go b/src/control/common/proto/logging.go index 624e58fb459..a2edc22c67f 100644 --- a/src/control/common/proto/logging.go +++ b/src/control/common/proto/logging.go @@ -136,7 +136,7 @@ func Debug(msg proto.Message) string { fmt.Fprintf(&bld, " %s:%s", p.Label, p.State) } case *mgmtpb.JoinResp: - fmt.Fprintf(&bld, "%T rank:%d (state:%s) map:%d", m, m.Rank, m.State, m.MapVersion) + fmt.Fprintf(&bld, "%T rank:%d (state:%s, local:%t) map:%d", m, m.Rank, m.State, m.LocalJoin, m.MapVersion) case *mgmtpb.GetAttachInfoResp: msRanks := ranklist.RankSetFromRanks(ranklist.RanksFromUint32(m.MsRanks)) uriRanks := ranklist.NewRankSet() diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index e6988dca637..74d11533864 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 +// protoc-gen-go v1.28.1 // protoc v3.5.0 // source: mgmt/svc.proto @@ -342,6 +342,7 @@ type JoinResp struct { Rank uint32 `protobuf:"varint,2,opt,name=rank,proto3" json:"rank,omitempty"` // Server rank assigned. State JoinResp_State `protobuf:"varint,3,opt,name=state,proto3,enum=mgmt.JoinResp_State" json:"state,omitempty"` // Server state in the system map. FaultDomain string `protobuf:"bytes,4,opt,name=faultDomain,proto3" json:"faultDomain,omitempty"` // Fault domain for the instance + LocalJoin bool `protobuf:"varint,5,opt,name=localJoin,proto3" json:"localJoin,omitempty"` // Join processed locally. MapVersion uint32 `protobuf:"varint,6,opt,name=map_version,json=mapVersion,proto3" json:"map_version,omitempty"` // Join processed in this version of the system map. } @@ -405,6 +406,13 @@ func (x *JoinResp) GetFaultDomain() string { return "" } +func (x *JoinResp) GetLocalJoin() bool { + if x != nil { + return x.LocalJoin + } + return false +} + func (x *JoinResp) GetMapVersion() uint32 { if x != nil { return x.MapVersion @@ -1151,7 +1159,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x03, 0x69, 0x64, 0x78, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x09, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x22, 0xd0, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, + 0x22, 0xdd, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, 0x0a, 0x05, 0x73, 0x74, 0x61, @@ -1159,84 +1167,85 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, - 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, - 0x10, 0x01, 0x4a, 0x04, 0x08, 0x05, 0x10, 0x06, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, - 0x6f, 0x69, 0x6e, 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, - 0x0f, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, - 0x12, 0x25, 0x0a, 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, - 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, - 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, - 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, - 0x63, 0x61, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, - 0x63, 0x61, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, - 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, - 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, - 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, - 0x09, 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, - 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, - 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, - 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, - 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, - 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, - 0x0a, 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, - 0x61, 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, - 0x74, 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, - 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, - 0x6e, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, - 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, - 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, - 0x12, 0x19, 0x0a, 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, - 0x28, 0x09, 0x52, 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, - 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, - 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, - 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, - 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, - 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, - 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, - 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, - 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, - 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, - 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, - 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x73, 0x79, 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, - 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, - 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, - 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, - 0x41, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, - 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, - 0x6b, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, - 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, - 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, - 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, - 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, - 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, - 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, - 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, - 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, - 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, - 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, + 0x4a, 0x6f, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, + 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, + 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, 0x10, 0x01, + 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, 0x0f, 0x4c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x25, 0x0a, + 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, + 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, + 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, + 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, + 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, + 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, + 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, + 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, + 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, + 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, + 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, + 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, + 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, + 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, + 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, + 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, + 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, + 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x19, 0x0a, + 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x09, 0x52, + 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, 0x47, 0x65, 0x74, + 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, + 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, + 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, + 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, + 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, + 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, + 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, + 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, + 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, + 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, + 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, + 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, + 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, + 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, + 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x41, 0x0a, 0x0a, + 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, + 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1f, + 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, + 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, + 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, + 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, + 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, + 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, + 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, + 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, } var ( diff --git a/src/control/server/instance.go b/src/control/server/instance.go index 4583c86f170..14f53cf3b5b 100644 --- a/src/control/server/instance.go +++ b/src/control/server/instance.go @@ -178,10 +178,10 @@ func (ei *EngineInstance) removeSocket() error { return nil } -func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, uint32, error) { +func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, bool, uint32, error) { superblock := ei.getSuperblock() if superblock == nil { - return ranklist.NilRank, 0, errors.New("nil superblock while determining rank") + return ranklist.NilRank, false, 0, errors.New("nil superblock while determining rank") } r := ranklist.NilRank @@ -200,11 +200,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify }) if err != nil { ei.log.Errorf("join failed: %s", err) - return ranklist.NilRank, 0, err + return ranklist.NilRank, false, 0, err } switch resp.State { case system.MemberStateAdminExcluded, system.MemberStateExcluded: - return ranklist.NilRank, 0, errors.Errorf("rank %d excluded", resp.Rank) + return ranklist.NilRank, resp.LocalJoin, 0, errors.Errorf("rank %d excluded", resp.Rank) } r = ranklist.Rank(resp.Rank) @@ -218,11 +218,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify superblock.URI = ready.GetUri() ei.setSuperblock(superblock) if err := ei.WriteSuperblock(); err != nil { - return ranklist.NilRank, 0, err + return ranklist.NilRank, resp.LocalJoin, 0, err } } - return r, resp.MapVersion, nil + return r, resp.LocalJoin, resp.MapVersion, nil } func (ei *EngineInstance) updateFaultDomainInSuperblock() error { @@ -259,20 +259,21 @@ func (ei *EngineInstance) handleReady(ctx context.Context, ready *srvpb.NotifyRe ei.log.Error(err.Error()) // nonfatal } - r, mapVersion, err := ei.determineRank(ctx, ready) + r, localJoin, mapVersion, err := ei.determineRank(ctx, ready) if err != nil { return err } + // If the join was already processed because it ran on the same server, + // skip the rest of these steps. + if localJoin { + return nil + } + return ei.SetupRank(ctx, r, mapVersion) } func (ei *EngineInstance) SetupRank(ctx context.Context, rank ranklist.Rank, map_version uint32) error { - if ei.IsReady() { - ei.log.Errorf("SetupRank called on an already set-up instance %d", ei.Index()) - return nil - } - if err := ei.callSetRank(ctx, rank, map_version); err != nil { return errors.Wrap(err, "SetRank failed") } diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index 620db09bf11..8374b0b9e2e 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -178,6 +178,22 @@ func (svc *mgmtSvc) join(ctx context.Context, req *mgmtpb.JoinReq, peerAddr *net MapVersion: joinResponse.MapVersion, } + // If the rank is local to the MS leader, then we need to wire up at least + // one in order to perform a CaRT group update. + if common.IsLocalAddr(peerAddr) && req.Idx == 0 { + resp.LocalJoin = true + + srvs := svc.harness.Instances() + if len(srvs) == 0 { + return nil, errors.New("invalid Join request (index 0 doesn't exist?!?)") + } + srv := srvs[0] + + if err := srv.SetupRank(ctx, joinResponse.Member.Rank, joinResponse.MapVersion); err != nil { + return nil, errors.Wrap(err, "SetupRank on local instance failed") + } + } + return resp, nil } diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index 0ac1112c4ba..375b77c3efb 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -1967,6 +1967,7 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, + LocalJoin: false, MapVersion: 2, }, }, @@ -1992,6 +1993,7 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, + LocalJoin: true, MapVersion: 2, }, }, diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index c3900429dfe..cfd562891e0 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -1010,7 +1010,7 @@ const ProtobufCEnumDescriptor mgmt__join_resp__state__descriptor = mgmt__join_resp__state__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[5] = +static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = { { "status", @@ -1060,6 +1060,18 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[5] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "localJoin", + 5, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_BOOL, + 0, /* quantifier_offset */ + offsetof(Mgmt__JoinResp, localjoin), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, { "map_version", 6, @@ -1075,16 +1087,16 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[5] = }; static const unsigned mgmt__join_resp__field_indices_by_name[] = { 3, /* field[3] = faultDomain */ - 4, /* field[4] = map_version */ + 4, /* field[4] = localJoin */ + 5, /* field[5] = map_version */ 1, /* field[1] = rank */ 2, /* field[2] = state */ 0, /* field[0] = status */ }; -static const ProtobufCIntRange mgmt__join_resp__number_ranges[2 + 1] = +static const ProtobufCIntRange mgmt__join_resp__number_ranges[1 + 1] = { { 1, 0 }, - { 6, 4 }, - { 0, 5 } + { 0, 6 } }; const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = { @@ -1094,10 +1106,10 @@ const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = "Mgmt__JoinResp", "mgmt", sizeof(Mgmt__JoinResp), - 5, + 6, mgmt__join_resp__field_descriptors, mgmt__join_resp__field_indices_by_name, - 2, mgmt__join_resp__number_ranges, + 1, mgmt__join_resp__number_ranges, (ProtobufCMessageInit) mgmt__join_resp__init, NULL,NULL,NULL /* reserved[123] */ }; diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index c1d61ef44fb..55acb283028 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -163,6 +163,10 @@ struct _Mgmt__JoinResp * Fault domain for the instance */ char *faultdomain; + /* + * Join processed locally. + */ + protobuf_c_boolean localjoin; /* * Join processed in this version of the system map. */ @@ -170,7 +174,7 @@ struct _Mgmt__JoinResp }; #define MGMT__JOIN_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__join_resp__descriptor) \ - , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0 } + , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0, 0 } struct _Mgmt__LeaderQueryReq diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index 668a9905bfd..400452837ce 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -44,8 +44,6 @@ message JoinReq { } message JoinResp { - reserved 5; - reserved "localJoin"; int32 status = 1; // DAOS error code uint32 rank = 2; // Server rank assigned. enum State { @@ -54,6 +52,7 @@ message JoinResp { } State state = 3; // Server state in the system map. string faultDomain = 4; // Fault domain for the instance + bool localJoin = 5; // Join processed locally. uint32 map_version = 6; // Join processed in this version of the system map. } From fbcb99abf7860734686fe59c2ec5d057495120ec Mon Sep 17 00:00:00 2001 From: wangdi Date: Fri, 29 Sep 2023 08:38:31 -0700 Subject: [PATCH 64/80] DAOS-14352 object: right epoch to fetch the data (#13049) If the epoch is higher than EC aggregate boundary, then it should use stable epoch to fetch the data, since the data could be aggregated independently on parity and data shard, so using stable epoch could make sure the consistency view during rebuild. And also EC aggregation should already aggregate the parity, so there should not be any partial update on the parity as well. Otherwise there might be partial update on this rebuilding shard, so let's use the epoch from the parity shard to fetch the data here, which will make sure partial update will not be fetched here. And also EC aggregation is being disabled at the moment, so there should not be any vos aggregation impact this process as well. Signed-off-by: Di Wang --- src/object/srv_obj_migrate.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index fa7579ca516..cd2b9018af1 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -1350,22 +1350,38 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, /* For EC object, if the migration include both extent from parity rebuild * and extent from replicate rebuild, let rebuild the extent with parity first, * then extent from replication. - * - * Since the parity shard epoch should be higher or equal to the data shard epoch, - * so let's use the minimum epochs of all parity shards as the update epoch of - * this data shard. */ - for (i = 0; i < mrone->mo_iods_num_from_parity; i++) { for (j = 0; j < mrone->mo_iods_from_parity[i].iod_nr; j++) { daos_iod_t iod = mrone->mo_iods_from_parity[i]; + daos_epoch_t fetch_eph; iod.iod_nr = 1; iod.iod_recxs = &mrone->mo_iods_from_parity[i].iod_recxs[j]; - rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, - mrone->mo_iods_update_ephs_from_parity[i][j], - mrone->mo_iods_update_ephs_from_parity[i][j], - DIOF_EC_RECOV_FROM_PARITY, ds_cont); + + /* If the epoch is higher than EC aggregate boundary, then + * it should use stable epoch to fetch the data, since + * the data could be aggregated independently on parity + * and data shard, so using stable epoch could make sure + * the consistency view during rebuild. And also EC aggregation + * should already aggregate the parity, so there should not + * be any partial update on the parity as well. + * + * Otherwise there might be partial update on this rebuilding + * shard, so let's use the epoch from the parity shard to fetch + * the data here, which will make sure partial update will not + * be fetched here. And also EC aggregation is being disabled + * at the moment, so there should not be any vos aggregation + * impact this process as well. + */ + if (ds_cont->sc_ec_agg_eph_boundary > + mrone->mo_iods_update_ephs_from_parity[i][j]) + fetch_eph = mrone->mo_epoch; + else + fetch_eph = mrone->mo_iods_update_ephs_from_parity[i][j]; + rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, fetch_eph, + mrone->mo_iods_update_ephs_from_parity[i][j], + DIOF_EC_RECOV_FROM_PARITY, ds_cont); if (rc != 0) D_GOTO(out, rc); } From ed692fe5702c4585f409c3094b84936095eb09f0 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 29 Sep 2023 18:09:37 +0100 Subject: [PATCH 65/80] DAOS-14438 mgmt: Improve error reporting on uuid_parse failure. (#13093) This function returns -1 rather than a daos errno so treat it as such. Do not use the result in DF_RC as it will print DER_UNKNOWN. Put the call in the if statement itself rather than assigning rc. Use the new logging macros to log the error proplerly. Do not log the invalid uuid. Signed-off-by: Ashley Pittman --- src/engine/drpc_client.c | 9 ++- src/mgmt/srv_drpc.c | 141 ++++++++++++++++++++------------------- 2 files changed, 76 insertions(+), 74 deletions(-) diff --git a/src/engine/drpc_client.c b/src/engine/drpc_client.c index 9be829e0f11..54c98d602ec 100644 --- a/src/engine/drpc_client.c +++ b/src/engine/drpc_client.c @@ -387,11 +387,10 @@ ds_pool_find_bylabel(d_const_string_t label, uuid_t pool_uuid, D_GOTO(out_resp, rc = frsp->status); } - rc = uuid_parse(frsp->uuid, pool_uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", frsp->uuid, - DP_RC(rc)); - D_GOTO(out_resp, rc = -DER_IO); + if (uuid_parse(frsp->uuid, pool_uuid) != 0) { + rc = -DER_IO; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out_resp; } ranks = uint32_array_to_rank_list(frsp->svcreps, diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index 9ef6054beda..11cabbf5990 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -471,11 +471,10 @@ ds_mgmt_drpc_pool_create(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_GOTO(out, rc = -DER_NOMEM); } - rc = uuid_parse(req->uuid, pool_uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->uuid, - DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->uuid, pool_uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } D_DEBUG(DB_MGMT, DF_UUID": creating pool\n", DP_UUID(pool_uuid)); @@ -558,11 +557,10 @@ ds_mgmt_drpc_pool_destroy(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to destroy pool %s\n", req->id); - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->id, - DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } /* @@ -630,11 +628,10 @@ ds_mgmt_drpc_pool_evict(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to evict pool connections %s\n", req->id); - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->id, - DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -648,11 +645,10 @@ ds_mgmt_drpc_pool_evict(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_GOTO(out, rc = -DER_NOMEM); } for (i = 0; i < req->n_handles; i++) { - rc = uuid_parse(req->handles[i], handles[i]); - if (rc != 0) { - D_ERROR("Unable to parse handle UUID %s: " - DF_RC"\n", req->id, DP_RC(rc)); - D_GOTO(out_free, rc = -DER_INVAL); + if (uuid_parse(req->handles[i], handles[i]) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Handle UUID is invalid"); + goto out_free; } } n_handles = req->n_handles; @@ -704,11 +700,10 @@ pool_change_target_state(char *id, d_rank_list_t *svc_ranks, size_t n_targetidx, int rc, i; num_addrs = (n_targetidx > 0) ? n_targetidx : 1; - rc = uuid_parse(id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", id, - DP_RC(rc)); - return -DER_INVAL; + if (uuid_parse(id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + return rc; } rc = pool_target_addr_list_alloc(num_addrs, &target_addr_list); @@ -871,10 +866,9 @@ ds_mgmt_drpc_pool_extend(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) nvme_bytes = req->tierbytes[DAOS_MEDIA_NVME]; } - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->id, - DP_RC(rc)); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); D_GOTO(out, rc = -DER_INVAL); } @@ -1003,10 +997,10 @@ void ds_mgmt_drpc_pool_set_prop(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) return; } - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Couldn't parse '%s' to UUID\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } D_INFO(DF_UUID": received request to set pool properties\n", @@ -1072,11 +1066,10 @@ ds_mgmt_drpc_pool_upgrade(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to upgrade pool %s\n", req->id); - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->id, - DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1222,10 +1215,10 @@ void ds_mgmt_drpc_pool_get_prop(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) return; } - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Couldn't parse '%s' to UUID\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } D_INFO(DF_UUID": received request to get pool properties\n", @@ -1392,8 +1385,9 @@ ds_mgmt_drpc_pool_get_acl(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to get ACL for pool %s\n", req->id); if (uuid_parse(req->id, pool_uuid) != 0) { - D_ERROR("Couldn't parse '%s' to UUID\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1444,8 +1438,9 @@ get_params_from_modify_acl_req(Drpc__Call *drpc_req, uuid_t uuid_out, } if (uuid_parse(req->id, uuid_out) != 0) { - D_ERROR("Couldn't parse UUID\n"); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "UUID is invalid"); + goto out; } rc = daos_acl_from_strs((const char **)req->entries, req->n_entries, acl_out); @@ -1561,8 +1556,9 @@ ds_mgmt_drpc_pool_delete_acl(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) } if (uuid_parse(req->id, pool_uuid) != 0) { - D_ERROR("Couldn't parse UUID\n"); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1622,8 +1618,9 @@ ds_mgmt_drpc_pool_list_cont(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) /* resp.containers, n_containers are NULL/0 */ if (uuid_parse(req->id, req_uuid) != 0) { - D_ERROR("Failed to parse pool uuid %s\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1764,8 +1761,9 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to query DAOS pool %s\n", req->id); if (uuid_parse(req->id, uuid) != 0) { - D_ERROR("Failed to parse pool uuid %s\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1874,8 +1872,9 @@ ds_mgmt_drpc_pool_query_targets(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to query DAOS pool %s, %zu targets\n", req->id, req->n_targets); if (uuid_parse(req->id, uuid) != 0) { - D_ERROR("Failed to parse pool uuid %s\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -2134,11 +2133,10 @@ ds_mgmt_drpc_bio_health_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) ctl__bio_health_resp__init(resp); if (strlen(req->dev_uuid) != 0) { - rc = uuid_parse(req->dev_uuid, uuid); - if (rc != 0) { - D_ERROR("Unable to parse device UUID %s: "DF_RC"\n", - req->dev_uuid, DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->dev_uuid, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Device UUID is invalid"); + goto out; } } else uuid_clear(uuid); /* need to set uuid = NULL */ @@ -2283,8 +2281,9 @@ ds_mgmt_drpc_dev_set_faulty(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) ctl__dev_manage_resp__init(resp); if (uuid_parse(req->uuid, dev_uuid) != 0) { - D_ERROR("Device UUID (%s) is invalid\n", req->uuid); - D_GOTO(pack_resp, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Device UUID is invalid"); + goto pack_resp; } rc = ds_mgmt_dev_set_faulty(dev_uuid, resp); @@ -2380,13 +2379,15 @@ ds_mgmt_drpc_dev_replace(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) resp->device->uuid = NULL; if (uuid_parse(req->old_dev_uuid, old_uuid) != 0) { - D_ERROR("Old device UUID (%s) is invalid\n", req->old_dev_uuid); - D_GOTO(pack_resp, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Old device UUID is invalid"); + goto pack_resp; } if (uuid_parse(req->new_dev_uuid, new_uuid) != 0) { - D_ERROR("New device UUID (%s) is invalid\n", req->new_dev_uuid); - D_GOTO(pack_resp, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "New device UUID is invalid"); + goto pack_resp; } /* TODO DAOS-6283: Implement no-reint device replacement option */ @@ -2449,13 +2450,15 @@ ds_mgmt_drpc_cont_set_owner(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to change container owner\n"); if (uuid_parse(req->contuuid, cont_uuid) != 0) { - D_ERROR("Container UUID is invalid\n"); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Container UUID is invalid"); + goto out; } if (uuid_parse(req->pooluuid, pool_uuid) != 0) { - D_ERROR("Pool UUID is invalid\n"); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); From b58e1cc55967dd1f9de292c4891db8e44bb5fb68 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Mon, 2 Oct 2023 22:38:09 +0800 Subject: [PATCH 66/80] DAOS-14417 vea: reclaim bitmaps might yield for MD-on-SSD (#13096) Reclaiming unused bitmaps might yeild for MD-on-SSD, we need pick up empty lists firstly to make sure those lists(to be reclaimed) could be not allocated by reserve ult Signed-off-by: Wang Shilong --- src/vea/vea_free.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/vea/vea_free.c b/src/vea/vea_free.c index f82fd299bd4..17877bc277b 100644 --- a/src/vea/vea_free.c +++ b/src/vea/vea_free.c @@ -909,7 +909,7 @@ static int reclaim_unused_bitmap(struct vea_space_info *vsi, uint32_t nr_reclaim, uint32_t *nr_reclaimed) { int i; - struct vea_bitmap_entry *bitmap_entry, *tmp_entry; + struct vea_bitmap_entry *bitmap_entry; struct vea_free_bitmap *vfb; d_iov_t key; int rc = 0; @@ -920,12 +920,11 @@ reclaim_unused_bitmap(struct vea_space_info *vsi, uint32_t nr_reclaim, uint32_t uint32_t blk_cnt; for (i = 0; i < VEA_MAX_BITMAP_CLASS; i++) { - d_list_for_each_entry_safe(bitmap_entry, tmp_entry, - &vsi->vsi_class.vfc_bitmap_empty[i], vbe_link) { + while ((bitmap_entry = d_list_pop_entry(&vsi->vsi_class.vfc_bitmap_empty[i], + struct vea_bitmap_entry, vbe_link))) { vfb = &bitmap_entry->vbe_bitmap; D_ASSERT(vfb->vfb_class == i + 1); D_ASSERT(is_bitmap_empty(vfb->vfb_bitmaps, vfb->vfb_bitmap_sz)); - d_list_del_init(&bitmap_entry->vbe_link); D_ALLOC_PTR(fca); if (!fca) return -DER_NOMEM; From 6e745b257e3b4b71900fb9707156d0788d4a134f Mon Sep 17 00:00:00 2001 From: Ravindran Padmanabhan Date: Mon, 2 Oct 2023 12:59:41 -0700 Subject: [PATCH 67/80] DAOS-14441 test: Perform exclude or drain during/after extend rebuild process. (#13095) Summary: Add new test cases to existing OSA tests (exclude or drain during/after the extend operation respectively). Add the tests to offline and online OSA tests. Drain feature is not supported when extend rebuild happens. It can work only after extend rebuild is completed. It is not the case for exclude [ie: we can perform an exclude when extend rebuild happens]. Signed-off-by: Padmanabhan --- src/tests/ftest/osa/offline_extend.py | 46 ++++++++++++++++++++++++++- src/tests/ftest/osa/online_extend.py | 45 +++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/osa/offline_extend.py b/src/tests/ftest/osa/offline_extend.py index 0f8bcc35fe5..0c3d65046be 100644 --- a/src/tests/ftest/osa/offline_extend.py +++ b/src/tests/ftest/osa/offline_extend.py @@ -3,6 +3,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ +from time import sleep from osa_utils import OSAUtils from test_utils_pool import add_pool from dmg_utils import check_system_query_status @@ -29,7 +30,8 @@ def setUp(self): self.test_oclass = None self.dmg_command.exit_status_exception = True - def run_offline_extend_test(self, num_pool, data=False, oclass=None): + def run_offline_extend_test(self, num_pool, data=False, oclass=None, + exclude_or_drain=None): """Run the offline extend without data. Args: @@ -37,6 +39,7 @@ def run_offline_extend_test(self, num_pool, data=False, oclass=None): data (bool) : whether pool has no data or to create some data in pool. Defaults to False. oclass (list) : list of daos object class (eg: "RP_2G8") + exclude_or_drain (str): Pass "exclude" or "drain" string. Defaults to None. """ # Create a pool pool = {} @@ -95,6 +98,19 @@ def run_offline_extend_test(self, num_pool, data=False, oclass=None): if self.test_during_aggregation is True and (num_pool > 1): self.delete_extra_container(self.pool) output = self.pool.extend(rank_val) + self.log.info(output) + if exclude_or_drain == "exclude": + self.pool.wait_for_rebuild_to_start() + # Give a 4 second delay so that some objects are moved + # as part of rebuild operation. + sleep(4) + self.log.info("Exclude rank 3 while rebuild is happening") + output = self.pool.exclude("3") + elif exclude_or_drain == "drain": + # Drain cannot be performed while extend rebuild is happening. + self.print_and_assert_on_rebuild_failure(output) + self.log.info("Drain rank 3 after extend rebuild is completed") + output = self.pool.drain("3") self.print_and_assert_on_rebuild_failure(output) free_space_after_extend = self.pool.get_total_free_space(refresh=True) @@ -202,3 +218,31 @@ def test_osa_offline_extend_after_snapshot(self): self.test_with_snapshot = self.params.get("test_with_snapshot", '/run/snapshot/*') self.log.info("Offline Extend Testing: After taking snapshot") self.run_offline_extend_test(1, data=True) + + def test_osa_offline_extend_exclude_during_rebuild(self): + """Test ID: DAOS-14441. + + Test Description: Validate Offline extend after rebuild is started + and a rank is excluded. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_extend,offline_extend + :avocado: tags=OSAOfflineExtend,test_osa_offline_extend_exclude_during_rebuild + """ + self.log.info("Offline Extend Testing: Exclude during Rebuild") + self.run_offline_extend_test(1, data=True, exclude_or_drain="exclude") + + def test_osa_offline_extend_drain_after_rebuild(self): + """Test ID: DAOS-14441. + + Test Description: Validate Offline extend after rebuild is started + and a rank is drained. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_extend,offline_extend + :avocado: tags=OSAOfflineExtend,test_osa_offline_extend_drain_after_rebuild + """ + self.log.info("Offline Extend Testing: Drain after rebuild") + self.run_offline_extend_test(1, data=True, exclude_or_drain="drain") diff --git a/src/tests/ftest/osa/online_extend.py b/src/tests/ftest/osa/online_extend.py index a2d6569c528..cb5931b33db 100644 --- a/src/tests/ftest/osa/online_extend.py +++ b/src/tests/ftest/osa/online_extend.py @@ -44,7 +44,8 @@ def daos_racer_thread(self): self.daos_racer.get_params(self) self.daos_racer.run() - def run_online_extend_test(self, num_pool, racer=False, oclass=None, app_name="ior"): + def run_online_extend_test(self, num_pool, racer=False, oclass=None, app_name="ior", + exclude_or_drain=None): """Run the Online extend without data. Args: @@ -52,6 +53,7 @@ def run_online_extend_test(self, num_pool, racer=False, oclass=None, app_name="i racer (bool): Run the testing along with daos_racer. Defaults to False. oclass (str): Object Class (eg: RP_2G1, etc). Default to None. app_name (str): App (ior or mdtest) to run during the testing. Defaults to ior. + exclude_or_drain (str): Pass "exclude" or "drain" string. Defaults to None. """ # Pool dictionary pool = {} @@ -111,6 +113,19 @@ def run_online_extend_test(self, num_pool, racer=False, oclass=None, app_name="i # Get initial total free space (scm+nvme) initial_free_space = self.pool.get_total_free_space(refresh=True) output = self.pool.extend(self.ranks) + self.log.info(output) + if exclude_or_drain == "exclude": + self.pool.wait_for_rebuild_to_start() + # Give a 4 minute delay so that some objects are moved + # as part of rebuild operation. + time.sleep(4) + self.log.info("Exclude rank 3 while rebuild is happening") + output = self.pool.exclude("3") + elif exclude_or_drain == "drain": + # Drain cannot be performed while extend rebuild is happening. + self.print_and_assert_on_rebuild_failure(output) + self.log.info("Drain rank 3 after extend rebuild is completed") + output = self.pool.drain("3") self.print_and_assert_on_rebuild_failure(output) free_space_after_extend = self.pool.get_total_free_space(refresh=True) @@ -213,3 +228,31 @@ def test_osa_online_extend_with_aggregation(self): self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.run_online_extend_test(1) + + def test_osa_online_extend_exclude_during_rebuild(self): + """Test ID: DAOS-14441. + + Test Description: Validate Online extend after rebuild is started + and a rank is excluded. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_extend,online_extend + :avocado: tags=OSAOnlineExtend,test_osa_online_extend_exclude_during_rebuild + """ + self.log.info("Online Extend Testing: Exclude during Rebuild") + self.run_online_extend_test(1, exclude_or_drain="exclude") + + def test_osa_online_extend_drain_after_rebuild(self): + """Test ID: DAOS-14441. + + Test Description: Validate Online extend after rebuild is completed + and a rank is drained. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_extend,online_extend + :avocado: tags=OSAOnlineExtend,test_osa_online_extend_drain_after_rebuild + """ + self.log.info("Online Extend Testing: Drain after rebuild") + self.run_online_extend_test(1, exclude_or_drain="drain") From f0a9d3e67f8262dd32909b86e57224b45c0ecd2c Mon Sep 17 00:00:00 2001 From: Colin Howes <16161867+chowes@users.noreply.github.com> Date: Tue, 3 Oct 2023 09:35:11 -0700 Subject: [PATCH 68/80] DAOS-14420 control: use hostname -s instead of -d (#13086) The test runs hostname -d and checks that the result is a substring of os.Hostname(), which is not the case in our environment. From the man page, hostname -s should give us the hostname cut at the first dot, so I would expect this to be portable. Signed-off-by: Colin Howes --- src/control/lib/support/log_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/control/lib/support/log_test.go b/src/control/lib/support/log_test.go index 9641dcd0ce4..120659049d9 100644 --- a/src/control/lib/support/log_test.go +++ b/src/control/lib/support/log_test.go @@ -236,7 +236,7 @@ func TestSupport_cpOutputToFile(t *testing.T) { "Check valid Command with option": { target: targetTestDir, cmd: "hostname", - option: "-d", + option: "-s", expResult: hostName, expErr: nil, }, From 2fbe122b3a75b341f4461d81fceef50fb6b5bdf4 Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Tue, 3 Oct 2023 13:41:06 -0400 Subject: [PATCH 69/80] DAOS-11552 doc: Document Interoperability Policy (#13027) Formally document the policy with a reference table in the Admin Guide. Signed-off-by: Michael MacDonald --- docs/admin/administration.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/docs/admin/administration.md b/docs/admin/administration.md index 6c52325ad84..a65ef4a85ce 100644 --- a/docs/admin/administration.md +++ b/docs/admin/administration.md @@ -960,3 +960,28 @@ DAOS v2.2 client connections to pools which were created by DAOS v2.4 will be rejected. DAOS v2.4 client should work with DAOS v2.4 and DAOS v2.2 server. To upgrade all pools to latest format after software upgrade, run `dmg pool upgrade ` + +### Interoperability Matrix + +The following table is intended to visually depict the interoperability +policies for all major components in a DAOS system. + + +||Server
(daos_server)|Engine
(daos_engine)|Agent
(daos_agent)|Client
(libdaos)|Admin
(dmg)| +|:---|:---:|:---:|:---:|:---:|:---:| +|Server|x.y.z|x.y.z|x.(y±1)|n/a|x.y| +|Engine|x.y.z|x.y.z|n/a|x.(y±1)|n/a| +|Agent|x.(y±1)|n/a|n/a|x.y.z|n/a| +|Client|n/a|x.(y±1)|x.y.z|n/a|n/a| +|Admin|x.y|n/a|n/a|n/a|n/a| + +Key: + * x.y.z: Major.Minor.Patch must be equal + * x.y: Major.Minor must be equal + * x.(y±1): Major must be equal, Minor must be equal or -1/+1 release version + * n/a: Components do not communicate + +Examples: + * daos_server 2.4.0 is only compatible with daos_engine 2.4.0 + * daos_agent 2.6.0 is compatible with daos_server 2.4.0 (2.5 is a development version) + * dmg 2.4.1 is compatible with daos_server 2.4.0 From bfa979e2e170388e7e7bdf4de1a062d283c41680 Mon Sep 17 00:00:00 2001 From: Colin Howes <16161867+chowes@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:15:15 -0700 Subject: [PATCH 70/80] DAOS-10942 utils: chmod should ignore unsupported bits (#12949) The setuid, setgid, and sticky bit can cause fatal errors when the datamover tool sets file permissions after copying a file, since these are not supported by DFS. We can just ignore this bit when calling dfs_chmod. Signed-off-by: Colin Howes --- src/control/cmd/daos/filesystem.go | 12 ++++-- src/utils/daos_hdlr.c | 61 +++++++++++++++--------------- src/utils/daos_hdlr.h | 13 ++++--- 3 files changed, 46 insertions(+), 40 deletions(-) diff --git a/src/control/cmd/daos/filesystem.go b/src/control/cmd/daos/filesystem.go index a03d38ff0b9..e7fcf828a45 100644 --- a/src/control/cmd/daos/filesystem.go +++ b/src/control/cmd/daos/filesystem.go @@ -44,9 +44,10 @@ type fsCmd struct { type fsCopyCmd struct { daosCmd - Source string `long:"src" short:"s" description:"copy source" required:"1"` - Dest string `long:"dst" short:"d" description:"copy destination" required:"1"` - Preserve string `long:"preserve-props" short:"m" description:"preserve container properties, requires HDF5 library" required:"0"` + Source string `long:"src" short:"s" description:"copy source" required:"1"` + Dest string `long:"dst" short:"d" description:"copy destination" required:"1"` + Preserve string `long:"preserve-props" short:"m" description:"preserve container properties, requires HDF5 library" required:"0"` + IgnoreUnsup bool `long:"ignore-unsupported" description:"ignore unsupported filesystem features when copying to DFS" required:"0"` } func (cmd *fsCopyCmd) Execute(_ []string) error { @@ -64,6 +65,7 @@ func (cmd *fsCopyCmd) Execute(_ []string) error { ap.preserve_props = C.CString(cmd.Preserve) defer freeString(ap.preserve_props) } + ap.ignore_unsup = C.bool(cmd.IgnoreUnsup) ap.fs_op = C.FS_COPY rc := C.fs_copy_hdlr(ap) @@ -107,6 +109,10 @@ func (cmd *fsCopyCmd) Execute(_ []string) error { cmd.Infof(" Files: %d", ap.fs_copy_stats.num_files) cmd.Infof(" Links: %d", ap.fs_copy_stats.num_links) + if ap.fs_copy_stats.num_chmod_enotsup > 0 { + return errors.New(fmt.Sprintf("Copy completed successfully, but %d files had unsupported mode bits that could not be applied. Run with --ignore-unsupported to suppress this warning.", ap.fs_copy_stats.num_chmod_enotsup)) + } + return nil } diff --git a/src/utils/daos_hdlr.c b/src/utils/daos_hdlr.c index cf192241ece..431a92df867 100644 --- a/src/utils/daos_hdlr.c +++ b/src/utils/daos_hdlr.c @@ -523,11 +523,19 @@ file_close(struct cmd_args_s *ap, struct file_dfs *file_dfs, const char *file) } static int -file_chmod(struct cmd_args_s *ap, struct file_dfs *file_dfs, const char *path, - mode_t mode) +file_chmod(struct cmd_args_s *ap, struct file_dfs *file_dfs, const char *path, mode_t mode, + bool ignore_unsup, uint64_t *num_chmod_enotsup) { int rc = 0; + /* Unset any unsupported mode bits. We track these errors so they can + * be surfaced to the user at the end of the copy operation. + */ + if (!ignore_unsup && mode & (S_ISVTX | S_ISGID | S_ISUID)) { + (*num_chmod_enotsup)++; + } + mode &= ~(S_ISVTX | S_ISGID | S_ISUID); + if (file_dfs->type == POSIX) { rc = chmod(path, mode); /* POSIX returns -1 on error and sets errno @@ -547,12 +555,9 @@ file_chmod(struct cmd_args_s *ap, struct file_dfs *file_dfs, const char *path, } static int -fs_copy_file(struct cmd_args_s *ap, - struct file_dfs *src_file_dfs, - struct file_dfs *dst_file_dfs, - struct stat *src_stat, - const char *src_path, - const char *dst_path) +fs_copy_file(struct cmd_args_s *ap, struct file_dfs *src_file_dfs, struct file_dfs *dst_file_dfs, + struct stat *src_stat, const char *src_path, const char *dst_path, bool ignore_unsup, + uint64_t *num_chmod_enotsup) { int src_flags = O_RDONLY; int dst_flags = O_CREAT | O_TRUNC | O_WRONLY; @@ -603,7 +608,8 @@ fs_copy_file(struct cmd_args_s *ap, } /* set perms on destination to original source perms */ - rc = file_chmod(ap, dst_file_dfs, dst_path, src_stat->st_mode); + rc = file_chmod(ap, dst_file_dfs, dst_path, src_stat->st_mode, ignore_unsup, + num_chmod_enotsup); if (rc != 0) { rc = daos_errno2der(rc); DH_PERROR_DER(ap, rc, "updating dst file permissions failed"); @@ -704,12 +710,8 @@ fs_copy_symlink(struct cmd_args_s *ap, } static int -fs_copy_dir(struct cmd_args_s *ap, - struct file_dfs *src_file_dfs, - struct file_dfs *dst_file_dfs, - struct stat *src_stat, - const char *src_path, - const char *dst_path, +fs_copy_dir(struct cmd_args_s *ap, struct file_dfs *src_file_dfs, struct file_dfs *dst_file_dfs, + struct stat *src_stat, const char *src_path, const char *dst_path, bool ignore_unsup, struct fs_copy_stats *num) { DIR *src_dir = NULL; @@ -783,9 +785,9 @@ fs_copy_dir(struct cmd_args_s *ap, switch (next_src_stat.st_mode & S_IFMT) { case S_IFREG: - rc = fs_copy_file(ap, src_file_dfs, dst_file_dfs, - &next_src_stat, next_src_path, - next_dst_path); + rc = fs_copy_file(ap, src_file_dfs, dst_file_dfs, &next_src_stat, + next_src_path, next_dst_path, ignore_unsup, + &num->num_chmod_enotsup); if ((rc != 0) && (rc != -DER_EXIST)) D_GOTO(out, rc); num->num_files++; @@ -800,7 +802,7 @@ fs_copy_dir(struct cmd_args_s *ap, break; case S_IFDIR: rc = fs_copy_dir(ap, src_file_dfs, dst_file_dfs, &next_src_stat, - next_src_path, next_dst_path, num); + next_src_path, next_dst_path, ignore_unsup, num); if ((rc != 0) && (rc != -DER_EXIST)) D_GOTO(out, rc); num->num_dirs++; @@ -815,7 +817,8 @@ fs_copy_dir(struct cmd_args_s *ap, } /* set original source perms on directories after copying */ - rc = file_chmod(ap, dst_file_dfs, dst_path, src_stat->st_mode); + rc = file_chmod(ap, dst_file_dfs, dst_path, src_stat->st_mode, ignore_unsup, + &num->num_chmod_enotsup); if (rc != 0) { rc = daos_errno2der(rc); DH_PERROR_DER(ap, rc, "updating destination permissions failed on '%s'", dst_path); @@ -842,12 +845,8 @@ fs_copy_dir(struct cmd_args_s *ap, } static int -fs_copy(struct cmd_args_s *ap, - struct file_dfs *src_file_dfs, - struct file_dfs *dst_file_dfs, - const char *src_path, - const char *dst_path, - struct fs_copy_stats *num) +fs_copy(struct cmd_args_s *ap, struct file_dfs *src_file_dfs, struct file_dfs *dst_file_dfs, + const char *src_path, const char *dst_path, bool ignore_unsup, struct fs_copy_stats *num) { int rc = 0; struct stat src_stat; @@ -902,14 +901,14 @@ fs_copy(struct cmd_args_s *ap, switch (src_stat.st_mode & S_IFMT) { case S_IFREG: - rc = fs_copy_file(ap, src_file_dfs, dst_file_dfs, &src_stat, src_path, - dst_path); + rc = fs_copy_file(ap, src_file_dfs, dst_file_dfs, &src_stat, src_path, dst_path, + ignore_unsup, &num->num_chmod_enotsup); if (rc == 0) num->num_files++; break; case S_IFDIR: - rc = fs_copy_dir(ap, src_file_dfs, dst_file_dfs, &src_stat, src_path, - dst_path, num); + rc = fs_copy_dir(ap, src_file_dfs, dst_file_dfs, &src_stat, src_path, dst_path, + ignore_unsup, num); if (rc == 0) num->num_dirs++; break; @@ -1869,7 +1868,7 @@ fs_copy_hdlr(struct cmd_args_s *ap) D_GOTO(out, rc); } - rc = fs_copy(ap, &src_file_dfs, &dst_file_dfs, src_str, dst_str, num); + rc = fs_copy(ap, &src_file_dfs, &dst_file_dfs, src_str, dst_str, ap->ignore_unsup, num); if (rc != 0) { DH_PERROR_DER(ap, rc, "fs copy failed"); D_GOTO(out_disconnect, rc); diff --git a/src/utils/daos_hdlr.h b/src/utils/daos_hdlr.h index e576f2ba751..1f25c0ccf49 100644 --- a/src/utils/daos_hdlr.h +++ b/src/utils/daos_hdlr.h @@ -72,9 +72,10 @@ enum sh_op { }; struct fs_copy_stats { - uint64_t num_dirs; - uint64_t num_files; - uint64_t num_links; + uint64_t num_dirs; + uint64_t num_files; + uint64_t num_links; + uint64_t num_chmod_enotsup; }; struct dm_args { @@ -91,8 +92,7 @@ struct dm_args { uint32_t cont_prop_oid; uint32_t cont_prop_layout; uint64_t cont_layout; - uint64_t cont_oid; - + uint64_t cont_oid; }; /* cmd_args_s: consolidated result of parsing command-line arguments @@ -141,7 +141,8 @@ struct cmd_args_s { /* Container datamover related */ struct dm_args *dm_args; /* datamover arguments */ struct fs_copy_stats *fs_copy_stats; /* fs copy stats */ - bool fs_copy_posix; /* fs copy to POSIX */ + bool ignore_unsup; /* ignore unsupported filesystem features */ + bool fs_copy_posix; /* fs copy to POSIX */ FILE *outstream; /* normal output stream */ FILE *errstream; /* errors stream */ From 3188558363ac3f45f359fab42d9989c8f4f1f038 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 4 Oct 2023 06:40:06 +0900 Subject: [PATCH 71/80] DAOS-11955 pool: Ensure a PS is inside pool (#13046) * DAOS-11955 pool: Ensure a PS is inside its pool It was found that a PS leader may enter ds_pool_plan_svc_reconfs with itself being an undesirable replica. This may lead to an assertion failure at "move n replicas from undesired to to_remove" in ds_pool_plan_svc_reconfs. Moreover, such a PS leader may be outside of the pool group, making it incapable of performing many duties that involve collective communication. This patch therefore ensures that a PS leader will remove undesirable PS replicas synchronously before committing a pool map modification that introduces new undesirable PS replicas. (If we were to keep an undesirable PS replica, it might become a PS leader.) - Extend and clean up pool_svc_sched. * Allow pool_svc_reconf_ult to return an error, so that we can fail a pool map modification if its synchronous PS replica removal fails. * Allow pool_svc_reconf_ult to get an argument, so that we can tell pool_svc_reconf_ult whether we want a synchronous remove-only run or an asyncrhonous add-remove run. * Move pool_svc_sched.{psc_svc_rf,psc_force_notify} up to pool_svc. - Prevent pool_svc_step_up_cb from canceling in-progress reconfigurations by comparing pool map versions for which the reconfigurations are scheduled. - Rename POOL_GROUP_MAP_STATUS to POOL_GROUP_MAP_STATES so that we are consistent with the pool_map module. Signed-off-by: Li Wei --- src/pool/srv_internal.h | 13 +- src/pool/srv_pool.c | 322 +++++++++++++++++++++++++++++++--------- src/pool/srv_target.c | 2 +- src/pool/srv_util.c | 181 ++++++++++------------ 4 files changed, 338 insertions(+), 180 deletions(-) diff --git a/src/pool/srv_internal.h b/src/pool/srv_internal.h index a7b9a55bd86..c6936527970 100644 --- a/src/pool/srv_internal.h +++ b/src/pool/srv_internal.h @@ -16,8 +16,17 @@ #include #include -/* Map status of ranks that make up the pool group */ -#define POOL_GROUP_MAP_STATUS (PO_COMP_ST_UP | PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN) +/* Map states of ranks that make up the pool group */ +#define POOL_GROUP_MAP_STATES (PO_COMP_ST_UP | PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN) + +/* Map states of ranks that make up the pool service */ +#define POOL_SVC_MAP_STATES (PO_COMP_ST_UP | PO_COMP_ST_UPIN) + +/* + * Since we want all PS replicas to belong to the pool group, + * POOL_SVC_MAP_STATES must be a subset of POOL_GROUP_MAP_STATES. + */ +D_CASSERT((POOL_SVC_MAP_STATES & POOL_GROUP_MAP_STATES) == POOL_SVC_MAP_STATES); /** * Global pool metrics diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 71e7e2d358b..0fc06e0d739 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -65,12 +65,12 @@ struct pool_svc_events { /* Pool service schedule state */ struct pool_svc_sched { - int psc_svc_rf; - bool psc_force_notify; /* for pool_svc_step_up_cb */ ABT_mutex psc_mutex; /* only for psc_cv */ ABT_cond psc_cv; bool psc_in_progress; bool psc_canceled; + void *psc_arg; + int psc_rc; }; static int @@ -89,10 +89,10 @@ sched_init(struct pool_svc_sched *sched) return dss_abterr2der(rc); } - sched->psc_svc_rf = -1; - sched->psc_force_notify = false; sched->psc_in_progress = false; sched->psc_canceled = false; + sched->psc_arg = NULL; + sched->psc_rc = 0; return 0; } @@ -104,10 +104,12 @@ sched_fini(struct pool_svc_sched *sched) } static void -sched_begin(struct pool_svc_sched *sched) +sched_begin(struct pool_svc_sched *sched, void *arg) { sched->psc_in_progress = true; sched->psc_canceled = false; + sched->psc_arg = arg; + sched->psc_rc = 0; } static void @@ -118,20 +120,32 @@ sched_end(struct pool_svc_sched *sched) } static void -sched_cancel_and_wait(struct pool_svc_sched *sched) +sched_cancel(struct pool_svc_sched *sched) +{ + if (sched->psc_in_progress) + sched->psc_canceled = true; +} + +static void +sched_wait(struct pool_svc_sched *sched) { /* * The CV requires a mutex. We don't otherwise need it for ULTs within * the same xstream. */ ABT_mutex_lock(sched->psc_mutex); - if (sched->psc_in_progress) - sched->psc_canceled = true; while (sched->psc_in_progress) ABT_cond_wait(sched->psc_cv, sched->psc_mutex); ABT_mutex_unlock(sched->psc_mutex); } +static void +sched_cancel_and_wait(struct pool_svc_sched *sched) +{ + sched_cancel(sched); + sched_wait(sched); +} + /* Pool service */ struct pool_svc { struct ds_rsvc ps_rsvc; @@ -144,8 +158,9 @@ struct pool_svc { struct ds_pool *ps_pool; struct pool_svc_events ps_events; uint32_t ps_global_version; + int ps_svc_rf; + bool ps_force_notify;/* MS of PS membership */ struct pool_svc_sched ps_reconf_sched; - /* Check all containers RF for the pool */ struct pool_svc_sched ps_rfcheck_sched; /* The global pool map version on all pool targets */ @@ -1035,6 +1050,8 @@ pool_svc_alloc_cb(d_iov_t *id, struct ds_rsvc **rsvc) uuid_copy(svc->ps_uuid, id->iov_buf); D_INIT_LIST_HEAD(&svc->ps_events.pse_queue); svc->ps_events.pse_handler = ABT_THREAD_NULL; + svc->ps_svc_rf = -1; + svc->ps_force_notify = false; rc = ABT_rwlock_create(&svc->ps_lock); if (rc != ABT_SUCCESS) { @@ -1536,9 +1553,9 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf, svc_rf_entry = daos_prop_entry_get(*prop, DAOS_PROP_PO_SVC_REDUN_FAC); D_ASSERT(svc_rf_entry != NULL); if (daos_prop_is_set(svc_rf_entry)) - svc->ps_reconf_sched.psc_svc_rf = svc_rf_entry->dpe_val; + svc->ps_svc_rf = svc_rf_entry->dpe_val; else - svc->ps_reconf_sched.psc_svc_rf = -1; + svc->ps_svc_rf = -1; out_lock: ABT_rwlock_unlock(svc->ps_lock); @@ -1637,9 +1654,10 @@ pool_svc_check_node_status(struct pool_svc *svc) D_PRINT(fmt, ## __VA_ARGS__); \ } while (0) -static void pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, - void (*func)(void *)); -static void pool_svc_reconf_ult(void *arg); +static int pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, + void (*func)(void *), void *arg); +static int pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, + uint32_t map_version_for, bool sync_remove); static void pool_svc_rfcheck_ult(void *arg); static int @@ -1653,7 +1671,6 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) daos_prop_t *prop = NULL; bool cont_svc_up = false; bool events_initialized = false; - bool svc_scheduled = false; d_rank_t rank = dss_self_rank(); int rc; @@ -1700,10 +1717,22 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) * Just in case the previous leader didn't finish the last series of * reconfigurations or the last MS notification. */ - svc->ps_reconf_sched.psc_force_notify = true; - pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult); - pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult); - svc_scheduled = true; + svc->ps_force_notify = true; + rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */); + if (rc == -DER_OP_CANCELED) { + DL_INFO(rc, DF_UUID": not scheduling pool service reconfiguration", + DP_UUID(svc->ps_uuid)); + } else if (rc != 0) { + DL_ERROR(rc, DF_UUID": failed to schedule pool service reconfiguration", + DP_UUID(svc->ps_uuid)); + goto out; + } + + rc = pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult, NULL /* arg */); + if (rc != 0) { + DL_ERROR(rc, DF_UUID": failed to schedule RF check", DP_UUID(svc->ps_uuid)); + goto out; + } rc = ds_pool_iv_prop_update(svc->ps_pool, prop); if (rc) { @@ -1742,11 +1771,8 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) if (rc != 0) { if (events_initialized) fini_events(svc); - if (svc_scheduled) { - sched_cancel_and_wait(&svc->ps_reconf_sched); - sched_cancel_and_wait(&svc->ps_rfcheck_sched); - } - + sched_cancel_and_wait(&svc->ps_rfcheck_sched); + sched_cancel_and_wait(&svc->ps_reconf_sched); if (cont_svc_up) ds_cont_svc_step_down(svc->ps_cont_svc); if (svc->ps_pool != NULL) @@ -5581,27 +5607,47 @@ ds_pool_svc_delete_acl(uuid_t pool_uuid, d_rank_list_t *ranks, return rc; } +struct pool_svc_reconf_arg { + struct pool_map *sca_map; + uint32_t sca_map_version_for; + bool sca_sync_remove; +}; + +/* Must be used with pool_svc.ps_reconf_sched (see container_of below). */ static void -pool_svc_reconf_ult(void *arg) -{ - struct pool_svc *svc = arg; - struct pool_svc_sched *reconf = &svc->ps_reconf_sched; - d_rank_list_t *current; - d_rank_list_t *to_add; - d_rank_list_t *to_remove; - d_rank_list_t *new; - int rc; +pool_svc_reconf_ult(void *varg) +{ + struct pool_svc_sched *reconf = varg; + struct pool_svc_reconf_arg *arg = reconf->psc_arg; + struct pool_svc *svc; + struct pool_map *map; + d_rank_list_t *current; + d_rank_list_t *to_add; + d_rank_list_t *to_remove; + d_rank_list_t *new; + int rc; + + svc = container_of(reconf, struct pool_svc, ps_reconf_sched); + + if (arg->sca_map == NULL) + map = svc->ps_pool->sp_map; + else + map = arg->sca_map; D_DEBUG(DB_MD, DF_UUID": begin\n", DP_UUID(svc->ps_uuid)); - if (reconf->psc_canceled) + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; goto out; + } /* When there are pending events, the pool map may be unstable. */ - while (events_pending(svc)) { + while (!arg->sca_sync_remove && events_pending(svc)) { dss_sleep(3000 /* ms */); - if (reconf->psc_canceled) + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; goto out; + } } rc = rdb_get_ranks(svc->ps_rsvc.s_db, ¤t); @@ -5611,10 +5657,12 @@ pool_svc_reconf_ult(void *arg) goto out; } - ABT_rwlock_rdlock(svc->ps_pool->sp_lock); - rc = ds_pool_plan_svc_reconfs(reconf->psc_svc_rf, svc->ps_pool->sp_map, current, - dss_self_rank(), &to_add, &to_remove); - ABT_rwlock_unlock(svc->ps_pool->sp_lock); + if (arg->sca_map == NULL) + ABT_rwlock_rdlock(svc->ps_pool->sp_lock); + rc = ds_pool_plan_svc_reconfs(svc->ps_svc_rf, map, current, dss_self_rank(), &to_add, + &to_remove); + if (arg->sca_map == NULL) + ABT_rwlock_unlock(svc->ps_pool->sp_lock); if (rc != 0) { D_ERROR(DF_UUID": cannot plan pool service reconfigurations: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); @@ -5622,7 +5670,7 @@ pool_svc_reconf_ult(void *arg) } D_DEBUG(DB_MD, DF_UUID": svc_rf=%d current=%u to_add=%u to_remove=%u\n", - DP_UUID(svc->ps_uuid), reconf->psc_svc_rf, current->rl_nr, to_add->rl_nr, + DP_UUID(svc->ps_uuid), svc->ps_svc_rf, current->rl_nr, to_add->rl_nr, to_remove->rl_nr); /* @@ -5633,14 +5681,17 @@ pool_svc_reconf_ult(void *arg) * of the two calls returns an error, we still need to report any * membership changes to the MS. */ - if (to_add->rl_nr > 0) + if (!arg->sca_sync_remove && to_add->rl_nr > 0) { ds_rsvc_add_replicas_s(&svc->ps_rsvc, to_add, ds_rsvc_get_md_cap()); - if (reconf->psc_canceled) - goto out_to_add_remove; - if (to_add->rl_nr > to_remove->rl_nr) - to_remove->rl_nr = 0; - else - to_remove->rl_nr -= to_add->rl_nr; + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; + goto out_to_add_remove; + } + if (to_add->rl_nr > to_remove->rl_nr) + to_remove->rl_nr = 0; + else + to_remove->rl_nr -= to_add->rl_nr; + } if (to_remove->rl_nr > 0) { d_rank_list_t *tmp; @@ -5669,26 +5720,35 @@ pool_svc_reconf_ult(void *arg) d_rank_list_sort(current); d_rank_list_sort(new); - if (reconf->psc_force_notify || !d_rank_list_identical(new, current)) { + if (svc->ps_force_notify || !d_rank_list_identical(new, current)) { + int rc_tmp; + /* * Send RAS event to control-plane over dRPC to indicate * change in pool service replicas. */ - rc = ds_notify_pool_svc_update(&svc->ps_uuid, new, svc->ps_rsvc.s_term); - if (rc == 0) - reconf->psc_force_notify = false; + rc_tmp = ds_notify_pool_svc_update(&svc->ps_uuid, new, svc->ps_rsvc.s_term); + if (rc_tmp == 0) + svc->ps_force_notify = false; else - D_ERROR(DF_UUID": replica update notify failure: "DF_RC"\n", - DP_UUID(svc->ps_uuid), DP_RC(rc)); + DL_ERROR(rc_tmp, DF_UUID": replica update notify failure", + DP_UUID(svc->ps_uuid)); } d_rank_list_free(new); } - if (reconf->psc_canceled) + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; goto out_to_add_remove; + } - /* Ignore the return value of this ds_rsvc_dist_stop call. */ - if (to_remove->rl_nr > 0) + /* + * Don't attempt to destroy any removed replicas in the "synchronous + * remove" mode, so that we don't delay pool_svc_update_map_internal + * for too long. Ignore the return value of this ds_rsvc_dist_stop + * call. + */ + if (!arg->sca_sync_remove && to_remove->rl_nr > 0) ds_rsvc_dist_stop(svc->ps_rsvc.s_class, &svc->ps_rsvc.s_id, to_remove, NULL /* excluded */, svc->ps_rsvc.s_term, true /* destroy */); @@ -5698,14 +5758,17 @@ pool_svc_reconf_ult(void *arg) out_cur: d_rank_list_free(current); out: + /* Do not yield between the D_FREE and the sched_end. */ + D_FREE(reconf->psc_arg); + reconf->psc_rc = rc; sched_end(reconf); ABT_cond_broadcast(reconf->psc_cv); - D_DEBUG(DB_MD, DF_UUID": end\n", DP_UUID(svc->ps_uuid)); + D_DEBUG(DB_MD, DF_UUID": end: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); } -static void -pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, - void (*func)(void *)) +static int +pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, void (*func)(void *), + void *arg) { enum ds_rsvc_state state; int rc; @@ -5720,13 +5783,13 @@ pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, if (state == DS_RSVC_DRAINING) { D_DEBUG(DB_MD, DF_UUID": end: service %s\n", DP_UUID(svc->ps_uuid), ds_rsvc_state_str(state)); - return; + return -DER_OP_CANCELED; } D_ASSERT(&svc->ps_reconf_sched == sched || &svc->ps_rfcheck_sched == sched); sched_cancel_and_wait(sched); - sched_begin(sched); + sched_begin(sched, arg); /* * An extra svc leader reference is not required, because @@ -5734,14 +5797,16 @@ pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, * * ULT tracking is achieved through sched, not a ULT handle. */ - rc = dss_ult_create(func, svc, DSS_XS_SELF, 0, 0, NULL /* ult */); + rc = dss_ult_create(func, sched, DSS_XS_SELF, 0, 0, NULL /* ult */); if (rc != 0) { D_ERROR(DF_UUID": failed to create ULT: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); sched_end(sched); + return rc; } D_DEBUG(DB_MD, DF_UUID": end: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); + return 0; } static int pool_find_all_targets_by_addr(struct pool_map *map, @@ -5771,11 +5836,12 @@ cont_rf_check_cb(uuid_t pool_uuid, uuid_t cont_uuid, struct rdb_tx *tx, void *ar return 0; } +/* Must be used with pool_svc.ps_rfcheck_sched (see container_of below). */ static void pool_svc_rfcheck_ult(void *arg) { - struct pool_svc *svc = arg; - int rc; + struct pool_svc *svc = container_of(arg, struct pool_svc, ps_rfcheck_sched); + int rc; do { /* retry until some one stop the pool svc(rc == 1) or succeed */ @@ -5795,6 +5861,69 @@ pool_svc_rfcheck_ult(void *arg) ABT_cond_broadcast(svc->ps_rfcheck_sched.psc_cv); } +/* + * If map is NULL, map_version_for must be provided, and svc->ps_pool->sp_map + * will be used during reconfiguration; otherwise, map_version_for is ignored. + */ +static int +pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, uint32_t map_version_for, + bool sync_remove) +{ + struct pool_svc_reconf_arg *reconf_arg; + uint32_t v; + int rc; + + if (map == NULL) + v = map_version_for; + else + v = pool_map_get_version(map); + + if (svc->ps_reconf_sched.psc_in_progress) { + uint32_t v_in_progress; + + /* Safe to access psc_arg as long as we don't yield. */ + reconf_arg = svc->ps_reconf_sched.psc_arg; + if (reconf_arg->sca_map == NULL) + v_in_progress = reconf_arg->sca_map_version_for; + else + v_in_progress = pool_map_get_version(reconf_arg->sca_map); + if (v_in_progress >= v) { + D_DEBUG(DB_MD, DF_UUID": stale request: v_in_progress=%u v=%u\n", + DP_UUID(svc->ps_uuid), v_in_progress, v); + return -DER_OP_CANCELED; + } + } + + D_ALLOC_PTR(reconf_arg); + if (reconf_arg == NULL) + return -DER_NOMEM; + reconf_arg->sca_map = map; + reconf_arg->sca_map_version_for = v; + reconf_arg->sca_sync_remove = sync_remove; + + /* + * If successful, this call passes the ownership of reconf_arg to + * pool_svc_reconf_ult. + */ + rc = pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult, reconf_arg); + if (rc != 0) { + D_FREE(reconf_arg); + return rc; + } + + if (sync_remove) { + sched_wait(&svc->ps_reconf_sched); + + rc = svc->ps_reconf_sched.psc_rc; + if (rc != 0) { + DL_ERROR(rc, DF_UUID": pool service reconfigurator", DP_UUID(svc->ps_uuid)); + return rc; + } + } + + return 0; +} + /* * Perform an update to the pool map of \a svc. * @@ -5834,6 +5963,7 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, uint32_t map_version_before; uint32_t map_version; struct pool_buf *map_buf = NULL; + struct pool_domain *node; bool updated = false; int rc; @@ -5903,13 +6033,13 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, } } } + /* * Attempt to modify the temporary pool map and save its versions * before and after. If the version hasn't changed, we are done. */ map_version_before = pool_map_get_version(map); - rc = ds_pool_map_tgts_update(map, tgts, opc, exclude_rank, tgt_map_ver, - true); + rc = ds_pool_map_tgts_update(map, tgts, opc, exclude_rank, tgt_map_ver, true); if (rc != 0) D_GOTO(out_map, rc); map_version = pool_map_get_version(map); @@ -5918,6 +6048,35 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, if (map_version == map_version_before) D_GOTO(out_map, rc = 0); + /* + * If the map modification affects myself, leave it to a new PS leader + * if there's another PS replica, or reject it. + */ + node = pool_map_find_node_by_rank(map, dss_self_rank()); + if (node == NULL || !(node->do_comp.co_status & POOL_SVC_MAP_STATES)) { + d_rank_list_t *replicas; + + rc = rdb_get_ranks(svc->ps_rsvc.s_db, &replicas); + if (replicas->rl_nr == 1) { + D_ERROR(DF_UUID": rejecting rank exclusion: self removal requested\n", + DP_UUID(svc->ps_uuid)); + rc = -DER_INVAL; + } else { + /* + * The handling is unreliable, for we may become a new + * PS leader again; a more reliable implementation + * requires the currently unavailable Raft leadership + * transfer support. + */ + D_INFO(DF_UUID": resigning PS leadership: self removal requested\n", + DP_UUID(svc->ps_uuid)); + rdb_resign(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term); + rc = -DER_NOTLEADER; + } + d_rank_list_free(replicas); + goto out_map; + } + /* Write the new pool map. */ rc = pool_buf_extract(map, &map_buf); if (rc != 0) @@ -5926,6 +6085,17 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, if (rc != 0) goto out_map_buf; + /* + * Remove all undesired PS replicas (if any) before committing map, so + * that the set of PS replicas remains a subset of the pool groups. + */ + rc = pool_svc_schedule_reconf(svc, map, 0 /* map_version_for */, true /* sync_remove */); + if (rc != 0) { + DL_ERROR(rc, DF_UUID": failed to remove undesired pool service replicas", + DP_UUID(svc->ps_uuid)); + goto out_map; + } + rc = rdb_tx_commit(&tx); if (rc != 0) { D_DEBUG(DB_MD, DF_UUID": failed to commit: "DF_RC"\n", @@ -5951,9 +6121,17 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, ds_rsvc_request_map_dist(&svc->ps_rsvc); - pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult); - if (opc == POOL_EXCLUDE) - pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult); + rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */); + if (rc != 0) + DL_INFO(rc, DF_UUID": failed to schedule pool service reconfiguration", + DP_UUID(svc->ps_uuid)); + + if (opc == POOL_EXCLUDE) { + rc = pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult, + NULL /* arg */); + if (rc != 0) + DL_INFO(rc, DF_UUID": failed to schedule RF check", DP_UUID(svc->ps_uuid)); + } out_map_buf: pool_buf_free(map_buf); @@ -6841,7 +7019,7 @@ ds_pool_ranks_get_handler(crt_rpc_t *rpc) D_GOTO(out, rc = -DER_INVAL); /* Get available ranks */ - rc = ds_pool_get_ranks(in->prgi_op.pi_uuid, POOL_GROUP_MAP_STATUS, &out_ranks); + rc = ds_pool_get_ranks(in->prgi_op.pi_uuid, POOL_GROUP_MAP_STATES, &out_ranks); if (rc != 0) { D_ERROR(DF_UUID ": get ranks failed, " DF_RC "\n", DP_UUID(in->prgi_op.pi_uuid), DP_RC(rc)); diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index 8d07e66d9ea..8c8eddaa561 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -1376,7 +1376,7 @@ update_pool_group(struct ds_pool *pool, struct pool_map *map) D_DEBUG(DB_MD, DF_UUID": %u -> %u\n", DP_UUID(pool->sp_uuid), version, pool_map_get_version(map)); - rc = map_ranks_init(map, POOL_GROUP_MAP_STATUS, &ranks); + rc = map_ranks_init(map, POOL_GROUP_MAP_STATES, &ranks); if (rc != 0) return rc; diff --git a/src/pool/srv_util.c b/src/pool/srv_util.c index 3657b84d647..75beb2bc8d8 100644 --- a/src/pool/srv_util.c +++ b/src/pool/srv_util.c @@ -90,7 +90,7 @@ ds_pool_map_rank_up(struct pool_map *map, d_rank_t rank) return false; D_ASSERTF(rc == 1, "%d\n", rc); - return node->do_comp.co_status & POOL_GROUP_MAP_STATUS; + return node->do_comp.co_status & POOL_GROUP_MAP_STATES; } int @@ -332,12 +332,12 @@ compute_svc_reconf_objective(int svc_rf, d_rank_list_t *replicas) } /* - * Find n ranks with states in nodes but not in blacklist_0 or blacklist_1, and - * append them to list. Return the number of ranks appended or an error. + * Find n ranks with states in nodes but not in blacklist, and append them to + * list. Return the number of ranks appended or an error. */ static int find_ranks(int n, pool_comp_state_t states, struct pool_domain *nodes, int nnodes, - d_rank_list_t *blacklist_0, d_rank_list_t *blacklist_1, d_rank_list_t *list) + d_rank_list_t *blacklist, d_rank_list_t *list) { int n_appended = 0; int i; @@ -349,9 +349,7 @@ find_ranks(int n, pool_comp_state_t states, struct pool_domain *nodes, int nnode for (i = 0; i < nnodes; i++) { if (!(nodes[i].do_comp.co_status & states)) continue; - if (d_rank_list_find(blacklist_0, nodes[i].do_comp.co_rank, NULL /* idx */)) - continue; - if (d_rank_list_find(blacklist_1, nodes[i].do_comp.co_rank, NULL /* idx */)) + if (d_rank_list_find(blacklist, nodes[i].do_comp.co_rank, NULL /* idx */)) continue; rc = d_rank_list_append(list, nodes[i].do_comp.co_rank); if (rc != 0) @@ -370,7 +368,10 @@ find_ranks(int n, pool_comp_state_t states, struct pool_domain *nodes, int nnode * caller is responsible for freeing \a to_add_out and \a to_remove_out with * d_rank_list_free. * - * We desire replicas in UP or UPIN states. + * We desire replicas in POOL_SVC_MAP_STATES. The \a self replica must be in a + * desired state in \a map, or this function will return -DER_INVAL. All + * undesired replicas, if any, will be appended to \a to_remove, so that no + * replica is outside the pool group. * * If removals are necessary, we only append desired replicas to \a * to_remove_out after all undesired replicas have already been appended to the @@ -392,12 +393,10 @@ int ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replicas, d_rank_t self, d_rank_list_t **to_add_out, d_rank_list_t **to_remove_out) { - const pool_comp_state_t desired_states = PO_COMP_ST_UP | PO_COMP_ST_UPIN; struct pool_domain *nodes = NULL; int nnodes; int objective; d_rank_list_t *desired = NULL; - d_rank_list_t *undesired = NULL; d_rank_list_t *to_add = NULL; d_rank_list_t *to_remove = NULL; int i; @@ -409,93 +408,56 @@ ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replic objective = compute_svc_reconf_objective(svc_rf, replicas); desired = d_rank_list_alloc(0); - undesired = d_rank_list_alloc(0); to_add = d_rank_list_alloc(0); to_remove = d_rank_list_alloc(0); - if (desired == NULL || undesired == NULL || to_add == NULL || to_remove == NULL) { + if (desired == NULL || to_add == NULL || to_remove == NULL) { rc = -DER_NOMEM; goto out; } - /* Classify replicas into desired and undesired. */ + /* Classify replicas into desired and to_remove. */ for (i = 0; i < replicas->rl_nr; i++) { + d_rank_t rank = replicas->rl_ranks[i]; d_rank_list_t *list; int j; for (j = 0; j < nnodes; j++) - if (nodes[j].do_comp.co_rank == replicas->rl_ranks[i]) + if (nodes[j].do_comp.co_rank == rank) break; if (j == nnodes) /* not found (hypothetical) */ - list = undesired; - else if (nodes[j].do_comp.co_status & desired_states) + list = to_remove; + else if (nodes[j].do_comp.co_status & POOL_SVC_MAP_STATES) list = desired; else - list = undesired; - rc = d_rank_list_append(list, replicas->rl_ranks[i]); + list = to_remove; + if (rank == self && list == to_remove) { + D_ERROR("self undesired: state=%x\n", + j < nnodes ? nodes[j].do_comp.co_status : -1); + rc = -DER_INVAL; + goto out; + } + rc = d_rank_list_append(list, rank); if (rc != 0) goto out; } - D_DEBUG(DB_MD, "desired=%u undesired=%u objective=%d\n", desired->rl_nr, undesired->rl_nr, + D_DEBUG(DB_MD, "desired=%u undesired=%u objective=%d\n", desired->rl_nr, to_remove->rl_nr, objective); - /* - * If we have too many replicas, remove undesired ones (if any) before - * desired ones. - */ - while (desired->rl_nr + undesired->rl_nr > objective) { - rc = move_rank_except_for(self, undesired, to_remove); - if (rc == -DER_NONEXIST) - break; - else if (rc != 0) - goto out; - } - while (desired->rl_nr + undesired->rl_nr > objective) { - rc = move_rank_except_for(self, desired, to_remove); - D_ASSERT(rc != -DER_NONEXIST); - if (rc != 0) - goto out; - } - - /* If necessary, add more replicas towards the objective. */ - if (desired->rl_nr + undesired->rl_nr < objective) { - rc = find_ranks(objective - desired->rl_nr - undesired->rl_nr, desired_states, - nodes, nnodes, desired, undesired, to_add); - if (rc < 0) - goto out; - /* Copy the new ones to desired. */ - for (i = 0; i < to_add->rl_nr; i++) { - rc = d_rank_list_append(desired, to_add->rl_ranks[i]); + if (desired->rl_nr > objective) { + /* Too many replicas, remove one by one. */ + do { + rc = move_rank_except_for(self, desired, to_remove); + D_ASSERT(rc != -DER_NONEXIST); if (rc != 0) goto out; - } - } - - /* - * If there are undesired ones, try to replace as many of them as - * possible. - */ - if (undesired->rl_nr > 0) { - int n; - - rc = find_ranks(undesired->rl_nr, desired_states, nodes, nnodes, desired, undesired, - to_add); + } while (desired->rl_nr > objective); + } else if (desired->rl_nr < objective) { + /* Too few replicas, add some. */ + rc = find_ranks(objective - desired->rl_nr, POOL_SVC_MAP_STATES, nodes, nnodes, + desired, to_add); if (rc < 0) goto out; - n = rc; - /* Copy the n replacements to desired. */ - for (i = 0; i < n; i++) { - rc = d_rank_list_append(desired, to_add->rl_ranks[i]); - if (rc != 0) - goto out; - } - /* Move n replicas from undesired to to_remove. */ - for (i = 0; i < n; i++) { - rc = move_rank_except_for(self, undesired, to_remove); - D_ASSERT(rc != -DER_NONEXIST); - if (rc != 0) - goto out; - } } rc = 0; @@ -507,7 +469,6 @@ ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replic d_rank_list_free(to_remove); d_rank_list_free(to_add); } - d_rank_list_free(undesired); d_rank_list_free(desired); return rc; } @@ -546,10 +507,6 @@ testu_rank_sets_belong(d_rank_list_t *x, d_rank_t *y_ranks, int y_ranks_len) static struct pool_map * testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_down_ranks) { - d_rank_list_t ranks_list = { - .rl_ranks = ranks, - .rl_nr = n_ranks - }; struct pool_buf *map_buf; struct pool_map *map; uint32_t *domains; @@ -567,8 +524,7 @@ testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_ domains[3 + i] = i; rc = gen_pool_buf(NULL /* map */, &map_buf, 1 /* map_version */, n_domains, - n_ranks, n_ranks * 1 /* ntargets */, domains, &ranks_list, - 1 /* dss_tgt_nr */); + n_ranks, n_ranks * 1 /* ntargets */, domains, 1 /* dss_tgt_nr */); D_ASSERT(rc == 0); rc = pool_map_create(map_buf, 1, &map); @@ -590,7 +546,8 @@ testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_ static void testu_plan_svc_reconfs(int svc_rf, d_rank_t ranks[], int n_ranks, d_rank_t down_ranks[], int n_down_ranks, d_rank_t replicas_ranks[], int n_replicas_ranks, - d_rank_t self, d_rank_list_t **to_add, d_rank_list_t **to_remove) + d_rank_t self, int expected_rc, d_rank_list_t **to_add, + d_rank_list_t **to_remove) { struct pool_map *map; d_rank_list_t replicas_list; @@ -602,7 +559,7 @@ testu_plan_svc_reconfs(int svc_rf, d_rank_t ranks[], int n_ranks, d_rank_t down_ replicas_list.rl_nr = n_replicas_ranks; rc = ds_pool_plan_svc_reconfs(svc_rf, map, &replicas_list, self, to_add, to_remove); - D_ASSERT(rc == 0); + D_ASSERTF(rc == expected_rc, "rc=%d expected_rc=%d\n", rc, expected_rc); pool_map_decref(map); } @@ -614,10 +571,11 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_list_t *to_add; d_rank_list_t *to_remove; -#define call_testu_plan_svc_reconfs \ +#define call_testu_plan_svc_reconfs(expected_rc) \ testu_plan_svc_reconfs(svc_rf, ranks, ARRAY_SIZE(ranks), down_ranks, \ ARRAY_SIZE(down_ranks), replicas_ranks, \ - ARRAY_SIZE(replicas_ranks), self, &to_add, &to_remove); + ARRAY_SIZE(replicas_ranks), self, expected_rc, &to_add, \ + &to_remove); #define call_d_rank_list_free \ d_rank_list_free(to_add); \ @@ -630,7 +588,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t down_ranks[] = {}; d_rank_t replicas_ranks[] = {0, 1, 2, 3, 4}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(to_remove->rl_nr == 0); @@ -638,6 +596,16 @@ ds_pool_test_plan_svc_reconfs(void) call_d_rank_list_free } + /* The PS leader itself must not be undesired. */ + { + int svc_rf = 1; + d_rank_t ranks[] = {0, 1, 2}; + d_rank_t down_ranks[] = {0}; + d_rank_t replicas_ranks[] = {0, 1, 2}; + + call_testu_plan_svc_reconfs(-DER_INVAL) + } + /* One lonely replica. */ { int svc_rf = 0; @@ -645,7 +613,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t down_ranks[] = {}; d_rank_t replicas_ranks[] = {0}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(to_remove->rl_nr == 0); @@ -661,7 +629,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -678,7 +646,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -695,7 +663,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1, 2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -704,17 +672,19 @@ ds_pool_test_plan_svc_reconfs(void) call_d_rank_list_free } - /* A PS holds its ground when there's no replacement. */ + /* A PS removes the down rank even when there's no replacement. */ { int svc_rf = 1; d_rank_t ranks[] = {0, 1, 2}; d_rank_t down_ranks[] = {2}; d_rank_t replicas_ranks[] = {0, 1, 2}; + d_rank_t expected_to_remove[] = {2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); - D_ASSERT(to_remove->rl_nr == 0); + D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, + ARRAY_SIZE(expected_to_remove))); call_d_rank_list_free } @@ -728,7 +698,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t expected_to_add_candidates[] = {3, 4}; d_rank_t expected_to_remove[] = {2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 1); D_ASSERT(testu_rank_sets_belong(to_add, expected_to_add_candidates, @@ -750,7 +720,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1, 2, 3}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -767,7 +737,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0, 1, 2}; d_rank_t expected_to_remove[] = {1, 2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, @@ -776,19 +746,21 @@ ds_pool_test_plan_svc_reconfs(void) call_d_rank_list_free } - /* A PS keeps down ranks while growing. */ + /* A PS removes down ranks while growing. */ { int svc_rf = 2; - d_rank_t ranks[] = {0, 1, 2, 3, 4}; + d_rank_t ranks[] = {0, 1, 2, 3, 4, 5}; d_rank_t down_ranks[] = {2}; d_rank_t replicas_ranks[] = {0, 1, 2}; - d_rank_t expected_to_add[] = {3, 4}; + d_rank_t expected_to_add[] = {3, 4, 5}; + d_rank_t expected_to_remove[] = {2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); - D_ASSERT(to_remove->rl_nr == 0); + D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, + ARRAY_SIZE(expected_to_remove))); call_d_rank_list_free } @@ -802,7 +774,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t expected_to_remove_candidates[] = {1, 2, 3, 4, 5, 6, 7, 8}; d_rank_list_t tmp; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(to_remove->rl_nr == 4); @@ -822,15 +794,14 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t down_ranks[] = {1, 3, 5, 7}; d_rank_t replicas_ranks[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; d_rank_t expected_to_add[] = {9}; - d_rank_t expected_to_remove_candidates[] = {1, 3, 5, 7}; + d_rank_t expected_to_remove[] = {1, 3, 5, 7}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); - D_ASSERT(to_remove->rl_nr == 3); - D_ASSERT(testu_rank_sets_belong(to_remove, expected_to_remove_candidates, - ARRAY_SIZE(expected_to_remove_candidates))); + D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, + ARRAY_SIZE(expected_to_remove))); call_d_rank_list_free } From 7229422791e82c1a2c3bf2ed8d149af13241e643 Mon Sep 17 00:00:00 2001 From: wangdi Date: Thu, 5 Oct 2023 01:08:14 -0700 Subject: [PATCH 72/80] DAOS-14450 rebuild: Only print target buffers if there are any (#13127) Only show tgts_buf if there are real targets in task structure. Signed-off-by: Di Wang --- src/rebuild/srv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index e16583436ce..5b1c2089266 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -1093,7 +1093,7 @@ rebuild_debug_print_queue() * This only accumulates the targets in a single task, so it doesn't * need to be very big. 200 bytes is enough for ~30 5-digit target ids */ - char tgts_buf[200]; + char tgts_buf[200] = { 0 }; int i; /* Position in stack buffer where str data should be written next */ size_t tgts_pos; @@ -1121,7 +1121,7 @@ rebuild_debug_print_queue() } D_DEBUG(DB_REBUILD, DF_UUID" op=%s ver=%u tgts=%s\n", DP_UUID(task->dst_pool_uuid), RB_OP_STR(task->dst_rebuild_op), - task->dst_map_ver, tgts_buf); + task->dst_map_ver, task->dst_tgts.pti_number > 0 ? tgts_buf : "None"); } } From 454b1c70b739db6a322f7c141b23cb5a6d55b507 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Thu, 5 Oct 2023 14:48:00 -0700 Subject: [PATCH 73/80] DAOS-623 doc: fix githook config command (#13114) Fix the githook config command. Signed-off-by: Dalton Bohning --- utils/githooks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/githooks/README.md b/utils/githooks/README.md index 0db2a5cb653..55ccb56eec6 100644 --- a/utils/githooks/README.md +++ b/utils/githooks/README.md @@ -15,7 +15,7 @@ To enable these standard githooks requires a two step process: Configure your core.hookspath as follows (Recommended): ```sh -git config.hookspath utils/githooks +git config core.hookspath utils/githooks ``` Additionally, one can copy the files into an already configured path. From 2a1e7a558b3f46b25a103b2ff32da9ff8385110d Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Fri, 6 Oct 2023 14:45:07 +0200 Subject: [PATCH 74/80] DAOS-6412 doc: fix Lustre dynamic tuneable name (#13112) The old Lustre dynamic tuneable name had been mistakenly left in documentation (daos_enable vs now foreign_symlink_enable). Signed-off-by: Bruno Faccini --- docs/admin/tiering_uns.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/admin/tiering_uns.md b/docs/admin/tiering_uns.md index ca249e5c5fa..d856d0a153f 100644 --- a/docs/admin/tiering_uns.md +++ b/docs/admin/tiering_uns.md @@ -31,7 +31,8 @@ The current state of work can be summarized as follows : pools/containers as `/` relative paths. - `daos` foreign support is enabled at mount time with the `symlink=` option - present or dynamically, through the `llite.*.daos_enable` setting. + present or dynamically, through the `llite.*.foreign_symlink_enable` Lustre + dynamic tuneable. ### Building and using a DAOS-aware Lustre version From d6da0fac54a78963673580e132bebece2d7077a1 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard-intel@users.noreply.github.com> Date: Fri, 6 Oct 2023 17:08:35 +0200 Subject: [PATCH 75/80] DAOS-14306 test: Fix pool create all tests (#13115) Information on PMEM usage in /sys are asynchronously updated when a pool is destroyed. This PR adds a timeout to let them be updated after deleting a pool. Signed-off-by: Cedric Koch-Hofer --- src/tests/ftest/util/pool_create_all_base.py | 45 ++++++++++++++++++-- utils/ansible/ftest/vars/Rocky8.yml | 7 +-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/tests/ftest/util/pool_create_all_base.py b/src/tests/ftest/util/pool_create_all_base.py index 2ab16d42485..666f905b76a 100644 --- a/src/tests/ftest/util/pool_create_all_base.py +++ b/src/tests/ftest/util/pool_create_all_base.py @@ -4,6 +4,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ import sys +import time from avocado.core.exceptions import TestFail @@ -51,8 +52,8 @@ def get_usable_bytes(self): host_size += len(host_storage["hosts"].split(',')) scm_bytes = 0 - for scm_devices in host_storage["storage"]["scm_namespaces"]: - scm_bytes += scm_devices["mount"]["usable_bytes"] + for scm_device in host_storage["storage"]["scm_namespaces"]: + scm_bytes += scm_device["mount"]["usable_bytes"] scm_engine_bytes = min(scm_engine_bytes, scm_bytes) if host_storage["storage"]["nvme_devices"] is None: @@ -74,6 +75,26 @@ def get_usable_bytes(self): return host_size * scm_engine_bytes, host_size * nvme_engine_bytes + def find_hosts_low_scm(self, min_ratio): + """Returns list of hosts with available SCM storage below min_ratio. + + Args: + min_ratio (int): Minimal storage of available SCM. + + Returns: + list: List of host storage tuple without enough available SCM storage. + """ + result = self.dmg.storage_query_usage() + hosts = [] + for host_storage in result["response"]["HostStorage"].values(): + for scm_device in host_storage["storage"]["scm_namespaces"]: + avail_ratio = scm_device["mount"]["avail_bytes"] * 100 + avail_ratio /= scm_device["mount"]["total_bytes"] + if avail_ratio < min_ratio: + hosts.append((host_storage["hosts"], scm_device["mount"]["path"], avail_ratio)) + + return hosts + def check_pool_full_storage(self, scm_delta_bytes, nvme_delta_bytes=None, ranks=None): """Check the creation of one pool with all the storage capacity. @@ -200,7 +221,6 @@ def check_pool_recycling(self, pool_count, scm_delta_bytes, nvme_delta_bytes=Non nvme_delta_bytes (int, optional): Allowed difference of the NVMe pool storage. Defaults to None. """ - self.add_pool_qty(pool_count, namespace="/run/pool/*", create=False) first_pool_size = None @@ -215,6 +235,23 @@ def check_pool_recycling(self, pool_count, scm_delta_bytes, nvme_delta_bytes=Non "Pool %d created: scm_size=%d, nvme_size=%d", index, *pool_size) self.pool[index].destroy() + self.log.info("Checking SCM available storage") + timeout = 3 + while timeout > 0: + hosts = self.find_hosts_low_scm(90) + if not hosts: + break + for it in hosts: + self.log.info( + "Find hosts without enough available SCM: " + "timeout=%is, hosts=%s, path=%s, ratio=%f%%", + timeout, *it) + time.sleep(1) + timeout -= 1 + self.assertNotEqual( + 0, timeout, + "Destroying pool did not restore available SCM storage space") + if first_pool_size is None: first_pool_size = pool_size continue @@ -254,7 +291,7 @@ def check_pool_distribution(self, scm_delta_bytes, nvme_delta_bytes=None): engine. Defaults to None. """ self.log.info("Retrieving available size") - result = self.server_managers[0].dmg.storage_query_usage() + result = self.dmg.storage_query_usage() scm_used_bytes = [sys.maxsize, 0] if nvme_delta_bytes is not None: diff --git a/utils/ansible/ftest/vars/Rocky8.yml b/utils/ansible/ftest/vars/Rocky8.yml index a64b0413c5a..aecb5478787 100644 --- a/utils/ansible/ftest/vars/Rocky8.yml +++ b/utils/ansible/ftest/vars/Rocky8.yml @@ -8,12 +8,13 @@ daos_base_deps: - bash - gdb - glibc + - jq + - lbzip2 + - mercury - platform-python - python36 - - tree - systemd - - lbzip2 - - jq + - tree daos_dev_deps: rpms: From 5953189d6feb34d35142a240818a3ab2b9f3c289 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Fri, 6 Oct 2023 08:10:51 -0700 Subject: [PATCH 76/80] DAOS-14469 gurt: Add D_LOG_FILE_APPEND_RANK env (#13120) - Add new D_LOG_FILE_APPEND_RANK environment variable - When set, will cause a first crt_rank_self_set() invocation to rename a log file and append rank to it. Required-githooks: true Signed-off-by: Alexander A Oganezov --- src/cart/README.env | 7 ++++ src/cart/crt_group.c | 1 + src/cart/crt_init.c | 2 +- src/gurt/dlog.c | 44 +++++++++++++++++++++++-- src/include/gurt/debug.h | 4 +++ src/include/gurt/dlog.h | 1 + src/tests/ftest/cart/util/cart_utils.py | 2 +- src/tests/ftest/daos_test/rebuild.yaml | 2 ++ src/tests/ftest/daos_test/suite.yaml | 2 ++ src/tests/ftest/launch.py | 1 + src/tests/ftest/util/daos_core_base.py | 1 + 11 files changed, 63 insertions(+), 4 deletions(-) diff --git a/src/cart/README.env b/src/cart/README.env index edfbb39c561..58df8cf2baf 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -26,6 +26,13 @@ This file lists the environment variables used in CaRT. in D_LOG_FILE. If not defined or setting to zero value will result in this feature being disabled. + . D_LOG_FILE_APPEND_RANK + Set this to a non-zero value to create separate log files based upon the + process cart rank. When enabled this feature will rename the existing log file + and append rank to the filename upon the first invocation of rank setting. + Subsequent settings of the rank within the process are treated as noops. + As ranks are only set on the server, this is a server-only feature. + . D_LOG_SIZE DAOS debug logs (both server and client) have a 1GB file size limit by default. When this limit is reached, the current log file is closed and diff --git a/src/cart/crt_group.c b/src/cart/crt_group.c index fe539944c90..7f6e79f6470 100644 --- a/src/cart/crt_group.c +++ b/src/cart/crt_group.c @@ -2608,6 +2608,7 @@ crt_rank_self_set(d_rank_t rank, uint32_t group_version_min) } } + d_log_rank_setup(rank); unlock: D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); out: diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index a9a912dbd8a..7a4f83c57c8 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -72,7 +72,7 @@ dump_envariables(void) "FI_UNIVERSE_SIZE", "CRT_ENABLE_MEM_PIN", "FI_OFI_RXM_USE_SRX", "D_LOG_FLUSH", "CRT_MRC_ENABLE", "CRT_SECONDARY_PROVIDER", "D_PROVIDER_AUTH_KEY", "D_PORT_AUTO_ADJUST", - "D_POLL_TIMEOUT"}; + "D_POLL_TIMEOUT", "D_LOG_FILE_APPEND_RANK"}; D_INFO("-- ENVARS: --\n"); for (i = 0; i < ARRAY_SIZE(envars); i++) { diff --git a/src/gurt/dlog.c b/src/gurt/dlog.c index 3a720e3c942..9dc94ab7914 100644 --- a/src/gurt/dlog.c +++ b/src/gurt/dlog.c @@ -83,6 +83,8 @@ struct d_log_state { int stdout_isatty; /* non-zero if stdout is a tty */ int stderr_isatty; /* non-zero if stderr is a tty */ int flush_pri; /* flush priority */ + bool append_rank; /* append rank to the log filename */ + bool rank_appended; /* flag to indicate if rank is already appended */ #ifdef DLOG_MUTEX pthread_mutex_t clogmux; /* protect clog in threaded env */ #endif @@ -882,7 +884,6 @@ d_log_open(char *tag, int maxfac_hint, int default_mask, int stderr_mask, env = getenv(D_LOG_FILE_APPEND_PID_ENV); if (logfile != NULL && env != NULL) { if (strcmp(env, "0") != 0) { - /* Append pid/tgid to log file name */ rc = asprintf(&buffer, "%s.%d", logfile, getpid()); if (buffer != NULL && rc != -1) logfile = buffer; @@ -893,6 +894,10 @@ d_log_open(char *tag, int maxfac_hint, int default_mask, int stderr_mask, } } + env = getenv(D_LOG_FILE_APPEND_RANK_ENV); + if (env && strcmp(env, "0") != 0) + mst.append_rank = true; + /* quick sanity check (mst.tag is non-null if already open) */ if (d_log_xst.tag || !tag || (maxfac_hint < 0) || (default_mask & ~DLOG_PRIMASK) || @@ -1032,10 +1037,45 @@ d_log_open(char *tag, int maxfac_hint, int default_mask, int stderr_mask, if (buffer) free(buffer); if (newtag) - free(newtag); /* was never installed */ + free(newtag); /* was never installed */ return -1; } +void d_log_rank_setup(int rank) +{ + char *filename = NULL; + int rc; + + clog_lock(); + if (!mst.append_rank || !mst.log_file) + goto unlock; + + if (mst.rank_appended == true) + goto unlock; + + /* Note: Can't use D_* allocation macros for mst.log_file */ + rc = asprintf(&filename, "%s.rank=%d", mst.log_file, rank); + if (filename == NULL || rc == -1) { + fprintf(stderr, "Failed to asprintf for file=%s rank=%d\n", + mst.log_file, rank); + goto unlock; + } + + rc = rename(mst.log_file, filename); + if (rc) { + dlog_print_err(errno, "failed to rename log file\n"); + free(filename); + goto unlock; + } + + free(mst.log_file); + mst.log_file = filename; + mst.rank_appended = true; + +unlock: + clog_unlock(); +} + /* * d_log_close: close off an clog and release any allocated resources * (e.g. as part of an orderly shutdown, after all worker threads have diff --git a/src/include/gurt/debug.h b/src/include/gurt/debug.h index 1dbe4cc9b3d..f590dae543e 100644 --- a/src/include/gurt/debug.h +++ b/src/include/gurt/debug.h @@ -51,6 +51,10 @@ extern void (*d_alt_assert)(const int, const char*, const char*, const int); /**< Env to specify log file pid append to filename*/ #define D_LOG_FILE_APPEND_PID_ENV "D_LOG_FILE_APPEND_PID" +/**< Env to specify rank to append to the log filename */ +#define D_LOG_FILE_APPEND_RANK_ENV "D_LOG_FILE_APPEND_RANK" + +/**< Env to specify log truncation option */ #define D_LOG_TRUNCATE_ENV "D_LOG_TRUNCATE" /**< Env to specify flush priority */ diff --git a/src/include/gurt/dlog.h b/src/include/gurt/dlog.h index 821bd8cbe9c..fd9106597bd 100644 --- a/src/include/gurt/dlog.h +++ b/src/include/gurt/dlog.h @@ -357,6 +357,7 @@ void d_log_fini(void); */ void d_log_close(void); +void d_log_rank_setup(int rank); /** * Reapplies the masks set in D_LOG_MASK. Can be called after adding new * log facilities to ensure that the mask is set appropriately for the diff --git a/src/tests/ftest/cart/util/cart_utils.py b/src/tests/ftest/cart/util/cart_utils.py index cb6712fb55a..c9222c47874 100644 --- a/src/tests/ftest/cart/util/cart_utils.py +++ b/src/tests/ftest/cart/util/cart_utils.py @@ -230,7 +230,7 @@ def get_env(self): output_filename_path = os.path.join(log_path, log_dir, log_filename).replace(";", "_") env = " --output-filename {!s}".format(output_filename_path) env += " -x D_LOG_FILE={!s}".format(log_file) - env += " -x D_LOG_FILE_APPEND_PID=1" + env += " -x D_LOG_FILE_APPEND_PID=1 -x D_LOG_FILE_APPEND_RANK=1 " env += yaml_envs diff --git a/src/tests/ftest/daos_test/rebuild.yaml b/src/tests/ftest/daos_test/rebuild.yaml index 52d9b893c03..6c1c3f21c4b 100644 --- a/src/tests/ftest/daos_test/rebuild.yaml +++ b/src/tests/ftest/daos_test/rebuild.yaml @@ -23,6 +23,7 @@ server_config: env_vars: - DD_MASK=group_metadata_only,io,epc,rebuild - D_LOG_FILE_APPEND_PID=1 + - D_LOG_FILE_APPEND_RANK=1 storage: auto 1: pinned_numa_node: 1 @@ -34,6 +35,7 @@ server_config: env_vars: - DD_MASK=group_metadata_only,io,epc,rebuild - D_LOG_FILE_APPEND_PID=1 + - D_LOG_FILE_APPEND_RANK=1 storage: auto transport_config: allow_insecure: false diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index a016e1937c9..7f5f444a32a 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -51,6 +51,7 @@ server_config: env_vars: - DD_MASK=mgmt,io,md,epc,rebuild,any - D_LOG_FILE_APPEND_PID=1 + - D_LOG_FILE_APPEND_RANK=1 - D_LOG_FLUSH=DEBUG - FI_LOG_LEVEL=warn - D_LOG_STDERR_IN_LOG=1 @@ -65,6 +66,7 @@ server_config: env_vars: - DD_MASK=mgmt,io,md,epc,rebuild,any - D_LOG_FILE_APPEND_PID=1 + - D_LOG_FILE_APPEND_RANK=1 - D_LOG_FLUSH=DEBUG - FI_LOG_LEVEL=warn - D_LOG_STDERR_IN_LOG=1 diff --git a/src/tests/ftest/launch.py b/src/tests/ftest/launch.py index 1b944aef468..ca958280e0d 100755 --- a/src/tests/ftest/launch.py +++ b/src/tests/ftest/launch.py @@ -1083,6 +1083,7 @@ def _set_test_environment(self, servers, clients, list_tests, provider, insecure os.environ["DAOS_TEST_SHARED_DIR"], "daos_test", "apps") os.environ["D_LOG_FILE"] = os.path.join(os.environ["DAOS_TEST_LOG_DIR"], "daos.log") os.environ["D_LOG_FILE_APPEND_PID"] = "1" + os.environ["D_LOG_FILE_APPEND_RANK"] = "1" # Assign the default value for transport configuration insecure mode os.environ["DAOS_INSECURE_MODE"] = str(insecure_mode) diff --git a/src/tests/ftest/util/daos_core_base.py b/src/tests/ftest/util/daos_core_base.py index cc560e3e0fd..428c5290230 100644 --- a/src/tests/ftest/util/daos_core_base.py +++ b/src/tests/ftest/util/daos_core_base.py @@ -86,6 +86,7 @@ def start_server_managers(self, force=False): env_dict["CRT_CTX_SHARE_ADDR"] = "1" env_dict["COVFILE"] = "/tmp/test.cov" env_dict["D_LOG_FILE_APPEND_PID"] = "1" + env_dict["D_LOG_FILE_APPEND_RANK"] = "1" if "CRT_CTX_NUM" not in env_dict or \ int(env_dict["CRT_CTX_NUM"]) < int(targets): env_dict["CRT_CTX_NUM"] = str(targets) From d441ebc7a4d73c3a6b9c56b128e510bdabe8c592 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 6 Oct 2023 12:43:01 -0400 Subject: [PATCH 77/80] DAOS-3672 test: Verify pool space usage via sys commands (#13045) Automating a manual test that verifies the pool space usage with system commands. Signed-off-by: Phil Henderson --- src/tests/ftest/pool/verify_space.py | 350 +++++++++++++++++++ src/tests/ftest/pool/verify_space.yaml | 41 +++ src/tests/ftest/util/apricot/apricot/test.py | 7 +- src/tests/ftest/util/command_utils.py | 17 +- src/tests/ftest/util/general_utils.py | 38 +- src/tests/ftest/util/server_utils.py | 2 +- src/tests/ftest/util/test_utils_pool.py | 4 + 7 files changed, 429 insertions(+), 30 deletions(-) create mode 100644 src/tests/ftest/pool/verify_space.py create mode 100644 src/tests/ftest/pool/verify_space.yaml diff --git a/src/tests/ftest/pool/verify_space.py b/src/tests/ftest/pool/verify_space.py new file mode 100644 index 00000000000..9f392349296 --- /dev/null +++ b/src/tests/ftest/pool/verify_space.py @@ -0,0 +1,350 @@ +""" + (C) Copyright 2023 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import os +import re + +from apricot import TestWithServers + +from exception_utils import CommandFailure +from general_utils import human_to_bytes +from ior_utils import run_ior +from job_manager_utils import get_job_manager +from run_utils import run_remote + + +def compare_initial(rank, pool_size): + """Determine if all of the pool size is greater than or equal to the available. + + Args: + rank (int): server rank + pool_size (list): list of pool_size dictionaries + + Returns: + bool: is all of the pool size is greater than or equal to the available + """ + current_size = human_to_bytes(pool_size[-1]['data'][rank]['size']) + current_avail = human_to_bytes(pool_size[-1]['data'][rank]['avail']) + return current_size >= current_avail + + +def compare_equal(rank, pool_size): + """Determine if the previous value equal the current value. + + Args: + rank (int): server rank + pool_size (list): list of pool_size dictionaries + + Returns: + bool: does the previous value equal the current value + """ + previous_avail = human_to_bytes(pool_size[-2]['data'][rank]['avail']) + current_avail = human_to_bytes(pool_size[-1]['data'][rank]['avail']) + return previous_avail == current_avail + + +def compare_reduced(rank, pool_size): + """Determine if the previous value is greater than the current value. + + Args: + rank (int): server rank + pool_size (list): list of pool_size dictionaries + + Returns: + bool: does the previous value equal the current value + """ + previous_avail = human_to_bytes(pool_size[-2]['data'][rank]['avail']) + current_avail = human_to_bytes(pool_size[-1]['data'][rank]['avail']) + return previous_avail > current_avail + + +class VerifyPoolSpace(TestWithServers): + """Verify pool space with system commands. + + :avocado: recursive + """ + + def _query_pool_size(self, description, pools): + """Query the pool size for the specified pools. + + Args: + description (str): pool description + pools (list): list of pools to query + """ + self.log_step(f'Query pool information for {description}') + for pool in pools: + pool.query() + + def _create_pools(self, description, namespaces): + """Create the specified number of pools. + + Args: + description (str): pool description + namespaces (list): pool namespaces + + Returns: + list: a list of created pools + """ + pools = [] + self.log_step(' '.join(['Create', description]), True) + for item in namespaces: + namespace = os.path.join(os.sep, 'run', '_'.join(['pool', 'rank', str(item)]), '*') + pools.append(self.get_pool(namespace=namespace)) + self._query_pool_size(description, pools) + return pools + + def _write_data(self, description, ior_kwargs, container, block_size): + """Write data using ior to the specified pool and container. + + Args: + description (str): pool description + ior_kwargs (dict): arguments to use to run ior + container (TestContainer): the container in which to write data + block_size (str): block size to use with the ior + """ + self.log_step(f'Writing data ({block_size} block size) to a container in {description}') + ior_kwargs['pool'] = container.pool + ior_kwargs['container'] = container + ior_kwargs['ior_params']['block_size'] = block_size + try: + run_ior(**ior_kwargs) + except CommandFailure as error: + self.fail(f'IOR write to {description} failed, {error}') + + def _get_system_pool_size(self, description, scm_mounts): + """Get the pool size information from the df system command. + + Args: + description (str): pool description + scm_mounts (list): mount points used by the engine ranks + + Returns: + dict: the df command information per server rank + """ + system_pool_size = {} + self.log_step(f'Collect system-level DAOS mount information for {description}') + fields = ('source', 'size', 'used', 'avail', 'pcent', 'target') + command = f"df -h --output={','.join(fields)} | grep -E '{'|'.join(scm_mounts)}'" + result = run_remote(self.log, self.server_managers[0].hosts, command, stderr=True) + if not result.passed: + self.fail('Error collecting system level daos mount information') + for data in result.output: + for line in data.stdout: + info = re.split(r'\s+', line) + if len(info) >= len(fields): + for rank in self.server_managers[0].get_host_ranks(data.hosts): + system_pool_size[rank] = { + field: info[index] for index, field in enumerate(fields)} + if len(system_pool_size) != len(self.server_managers[0].hosts): + self.fail(f'Error obtaining system pool data for all hosts: {system_pool_size}') + return system_pool_size + + def _compare_system_pool_size(self, pool_size, compare_methods): + """Compare the pool size information from the system command. + + Args: + pool_size (list): the list of pool size information + compare_methods (list): a list of compare methods to execute per rank + """ + self.log.info('Verifying system reported pool size for %s', pool_size[-1]['label']) + self.log.debug( + ' Rank Mount Previous (Avail/Size) Current (Avail/Size) Compare Status') + self.log.debug( + ' ---- ---------- --------------------- --------------------- ------- ------') + overall = True + for rank in sorted(pool_size[-1]['data'].keys()): + status = compare_methods[rank](rank, pool_size) + current = pool_size[-1]['data'][rank] + if len(pool_size) > 1: + previous = pool_size[-2]['data'][rank] + else: + previous = {'size': 'None', 'avail': 'None'} + if compare_methods[rank] is compare_initial: + compare = 'cA Step %s: %s", self._test_step, message) self._test_step += 1 diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index f990d80b25b..df4b1a72a58 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -4,14 +4,15 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ # pylint: disable=too-many-lines -from logging import getLogger +import contextlib from datetime import datetime from getpass import getuser +import json +from logging import getLogger +import os import re -import time import signal -import os -import json +import time from avocado.utils import process from ClusterShell.NodeSet import NodeSet @@ -168,6 +169,14 @@ def with_exports(self): """ return command_as_user(self.with_bind, self.run_user, self.env) + @contextlib.contextmanager + def no_exception(self): + """Temporarily disable raising exceptions for failed commands.""" + original_value = self.exit_status_exception + self.exit_status_exception = False + yield + self.exit_status_exception = original_value + def run(self, raise_exception=None): """Run the command. diff --git a/src/tests/ftest/util/general_utils.py b/src/tests/ftest/util/general_utils.py index 4e5b345dde2..6174ed7a325 100644 --- a/src/tests/ftest/util/general_utils.py +++ b/src/tests/ftest/util/general_utils.py @@ -147,32 +147,24 @@ def human_to_bytes(size): DaosTestError: when an invalid human readable size value is provided Returns: - int: value translated to bytes. + int|float: value translated to bytes. """ - conversion_sizes = ("", "k", "m", "g", "t", "p", "e") - conversion = { - 1000: ["{}b".format(item) for item in conversion_sizes], - 1024: ["{}ib".format(item) for item in conversion_sizes], - } - match = re.findall(r"([0-9.]+)\s*([a-zA-Z]+|)", size) + conversion = {} + for index, unit in enumerate(('', 'k', 'm', 'g', 't', 'p', 'e')): + conversion[unit] = 1000 ** index + conversion[f'{unit}b'] = 1000 ** index + conversion[f'{unit}ib'] = 1024 ** index try: - multiplier = 1 - if match[0][1]: - multiplier = -1 - unit = match[0][1].lower() - for item, units in conversion.items(): - if unit in units: - multiplier = item ** units.index(unit) - break - if multiplier == -1: - raise DaosTestError( - "Invalid unit detected, not in {}: {}".format( - conversion[1000] + conversion[1024][1:], unit)) - value = float(match[0][0]) * multiplier - except IndexError as error: - raise DaosTestError( - "Invalid human readable size format: {}".format(size)) from error + match = re.findall(r'([0-9.]+)\s*([a-zA-Z]+|)', str(size)) + number = match[0][0] + unit = match[0][1].lower() + except (TypeError, IndexError) as error: + raise DaosTestError(f'Invalid human readable size format: {size}') from error + try: + value = float(number) * conversion[unit] + except KeyError as error: + raise DaosTestError(f'Invalid unit detected, not in {conversion.keys()}: {unit}') from error return int(value) if value.is_integer() else value diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 05ff15a1ac3..1e9af207308 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -346,7 +346,7 @@ def clean_mount(self, hosts, mount, verbose=True, index=None): # Remove the shared memory segment associated with this io server self.log.debug("Removing the shared memory segment") command = "sudo ipcrm -M {}".format(self.D_TM_SHARED_MEMORY_KEY + index) - run_remote(self.log, self._hosts, command, verbose) + run_remote(self.log, mounted_hosts, command, verbose) # Dismount the scm mount point self.log.debug("Dismount the %s mount point", mount) diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 0826ea7d864..627599bdec6 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -330,6 +330,10 @@ def dmg(self, value): raise TypeError("Invalid 'dmg' object type: {}".format(type(value))) self._dmg = value + def no_exception(self): + """Temporarily disable raising exceptions for failed commands.""" + return self.dmg.no_exception() + def skip_cleanup(self): """Prevent pool from being removed during cleanup. From aa7ecb7c0286a01ab5e0411924fd897392014df5 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 6 Oct 2023 12:57:22 -0700 Subject: [PATCH 78/80] DAOS-12859 test: use pool and container labels (pass 2) (#12241) - Update tests to use labels instead of UUIDs. - Add ExecutableCommand.temp_exit_status_exception - Add ExecutableCommand.temp_run_user - Add TestContainer.get_acl - Add TestContainer.get_attr - Add TestContainer.overwrite_acl - Add TestContainer.list_attrs - Add TestContainer.set_owner - Cleanup ContSecurityTestBase - Cleanup PoolSecurityTestBase Signed-off-by: Dalton Bohning --- src/tests/ftest/container/query_attribute.py | 96 ++---- src/tests/ftest/security/cont_acl.py | 47 ++- src/tests/ftest/security/cont_create_acl.py | 40 +-- src/tests/ftest/security/cont_delete_acl.py | 50 ++-- src/tests/ftest/security/cont_get_acl.py | 60 ++-- .../ftest/security/cont_overwrite_acl.py | 75 ++--- src/tests/ftest/security/cont_update_acl.py | 89 ++---- src/tests/ftest/security/pool_acl.py | 8 +- src/tests/ftest/security/pool_groups.py | 5 +- .../ftest/server/multiengine_persocket.py | 11 +- src/tests/ftest/util/command_utils.py | 12 + .../ftest/util/cont_security_test_base.py | 277 +++--------------- src/tests/ftest/util/daos_utils.py | 25 +- src/tests/ftest/util/general_utils.py | 14 - .../ftest/util/pool_security_test_base.py | 107 ++++--- src/tests/ftest/util/test_utils_container.py | 158 ++++++++-- src/tests/ftest/util/test_utils_pool.py | 10 +- 17 files changed, 410 insertions(+), 674 deletions(-) diff --git a/src/tests/ftest/container/query_attribute.py b/src/tests/ftest/container/query_attribute.py index a9dd78d00aa..7a8cdd19501 100644 --- a/src/tests/ftest/container/query_attribute.py +++ b/src/tests/ftest/container/query_attribute.py @@ -55,12 +55,6 @@ class ContainerQueryAttributeTest(TestWithServers): :avocado: recursive """ - def __init__(self, *args, **kwargs): - """Initialize a ContainerQueryAttribute object.""" - super().__init__(*args, **kwargs) - self.expected_cont_uuid = None - self.daos_cmd = None - def test_container_query_attr(self): """JIRA ID: DAOS-4640 @@ -74,25 +68,23 @@ def test_container_query_attr(self): :avocado: tags=all,full_regression :avocado: tags=vm :avocado: tags=container,daos_cmd - :avocado: tags=cont_query_attr,test_container_query_attr + :avocado: tags=ContainerQueryAttributeTest,test_container_query_attr """ # Create a pool and a container. - self.add_pool() - self.add_container(pool=self.pool) - - self.daos_cmd = self.get_daos_command() + pool = self.get_pool() + container = self.get_container(pool) # Call daos container query, obtain pool and container UUID, and # compare against those used when creating the pool and the container. - kwargs = { - "pool": self.pool.uuid, - "cont": self.container.uuid - } - data = self.daos_cmd.container_query(**kwargs)['response'] + data = container.query()['response'] actual_pool_uuid = data['pool_uuid'] actual_cont_uuid = data['container_uuid'] - self.assertEqual(actual_pool_uuid, self.pool.uuid.lower()) - self.assertEqual(actual_cont_uuid, self.container.uuid.lower()) + self.assertEqual( + actual_pool_uuid, pool.uuid.lower(), + 'pool UUID from cont query does not match pool create') + self.assertEqual( + actual_cont_uuid, container.uuid.lower(), + 'container UUID from cont query does not match cont create') # Prepare attr-value pairs. Use the test_strings in value for the first # 7 and in attr for the next 7. @@ -111,12 +103,9 @@ def test_container_query_attr(self): expected_attrs = [] for attr_value in attr_values: - self.daos_cmd.container_set_attr( - pool=actual_pool_uuid, cont=actual_cont_uuid, - attrs={attr_value[0]: attr_value[1]}) + container.set_attr(attrs={attr_value[0]: attr_value[1]}) - kwargs["attr"] = attr_value[0] - data = self.daos_cmd.container_get_attr(**kwargs)['response'] + data = container.get_attr(attr_value[0])['response'] actual_val = base64.b64decode(data["value"]).decode() if attr_value[1] in escape_to_not: @@ -141,15 +130,8 @@ def test_container_query_attr(self): # Verify that attr-lists works with test_strings. expected_attrs.sort() - kwargs = { - "pool": actual_pool_uuid, - "cont": actual_cont_uuid - } - data = self.daos_cmd.container_list_attrs(**kwargs)['response'] - actual_attrs = list(data) - actual_attrs.sort() - self.log.debug(str(actual_attrs)) - self.assertEqual(actual_attrs, expected_attrs) + actual_attrs = sorted(list(container.list_attrs()['response'])) + self.assertEqual(actual_attrs, expected_attrs, 'list-attrs does not match set-attr') def test_container_query_attrs(self): """JIRA ID: DAOS-4640 @@ -164,25 +146,23 @@ def test_container_query_attrs(self): :avocado: tags=all,full_regression :avocado: tags=vm :avocado: tags=container,daos_cmd - :avocado: tags=cont_query_attr,test_container_query_attrs + :avocado: tags=ContainerQueryAttributeTest,test_container_query_attrs """ # Create a pool and a container. - self.add_pool() - self.add_container(pool=self.pool) - - self.daos_cmd = self.get_daos_command() + pool = self.get_pool() + container = self.get_container(pool) # Call daos container query, obtain pool and container UUID, and # compare against those used when creating the pool and the container. - kwargs = { - "pool": self.pool.uuid, - "cont": self.container.uuid - } - data = self.daos_cmd.container_query(**kwargs)['response'] + data = container.query()['response'] actual_pool_uuid = data['pool_uuid'] actual_cont_uuid = data['container_uuid'] - self.assertEqual(actual_pool_uuid, self.pool.uuid.lower()) - self.assertEqual(actual_cont_uuid, self.container.uuid.lower()) + self.assertEqual( + actual_pool_uuid, pool.uuid.lower(), + 'pool UUID from cont query does not match pool create') + self.assertEqual( + actual_cont_uuid, container.uuid.lower(), + 'container UUID from cont query does not match cont create') # Prepare attr-value pairs. Use the test_strings in value for the first # 7 and in attr for the next 7. @@ -200,17 +180,10 @@ def test_container_query_attrs(self): errors = [] # bulk-set all attributes - self.daos_cmd.container_set_attr( - pool=actual_pool_uuid, cont=actual_cont_uuid, - attrs=attr_values) - - attrs = [] - for attr in attr_values: - attrs.append(attr) + container.set_attr(attrs=attr_values) # bulk-get all attributes - kwargs["attrs"] = attrs - data = self.daos_cmd.container_get_attrs(**kwargs)['response'] + data = container.get_attr(list(attr_values))['response'] for attr_resp in data: key = attr_resp["name"] @@ -247,24 +220,17 @@ def test_list_attrs_long(self): :avocado: tags=all,full_regression :avocado: tags=vm :avocado: tags=container,daos_cmd - :avocado: tags=cont_list_attrs,test_list_attrs_long + :avocado: tags=ContainerQueryAttributeTest,test_list_attrs_long """ # Create a pool and a container. - self.add_pool() - self.add_container(pool=self.pool) - - self.daos_cmd = self.get_daos_command() + pool = self.get_pool() + container = self.get_container(pool) expected_attrs = {"attr" + str(idx): "val" + str(idx) for idx in range(50)} - self.container.set_attr(attrs=expected_attrs) + container.set_attr(attrs=expected_attrs) - kwargs = { - "pool": self.pool.uuid, - "cont": self.container.uuid - } - response = self.daos_cmd.container_list_attrs(**kwargs)['response'] - actual_attr_names = sorted(list(response)) + actual_attr_names = sorted(list(container.list_attrs()['response'])) expected_attr_names = sorted(expected_attrs.keys()) self.assertEqual( actual_attr_names, expected_attr_names, "Unexpected output from list_attrs") diff --git a/src/tests/ftest/security/cont_acl.py b/src/tests/ftest/security/cont_acl.py index 1d8c21ff0cb..dd3a6a69733 100644 --- a/src/tests/ftest/security/cont_acl.py +++ b/src/tests/ftest/security/cont_acl.py @@ -91,26 +91,25 @@ def test_container_user_acl(self): " base_acl_entries= %s\n", base_acl_entries) self.add_pool() secTestBase.create_acl_file(acl_file_name, base_acl_entries) - self.container_uuid = self.create_container_with_daos( - self.pool, None, acl_file_name) + self.container = self.create_container_with_daos(self.pool, None, acl_file_name) # (3)Verify container permissions rw, rw-attribute permission_type = "attribute" self.log.info("(3)==>Verify container permission %s", permission_type) - self.update_container_acl( - secTestBase.acl_entry(test_user_type, test_user, "rw")) + with self.container.no_exception(): + self.container.update_acl(entry=secTestBase.acl_entry(test_user_type, test_user, "rw")) self.verify_cont_rw_attribute( - "write", "pass", attribute_name, attribute_value) + self.container, "write", "pass", attribute_name, attribute_value) self.setup_container_acl_and_permission( - test_user_type, test_user, permission_type, cont_permission) + self.container, test_user_type, test_user, permission_type, cont_permission) self.log.info( "(3.1)Verify container_attribute: write, expect: %s", expect_write) self.verify_cont_rw_attribute( - "write", expect_write, attribute_name, attribute_value) + self.container, "write", expect_write, attribute_name, attribute_value) self.log.info( "(3.2)Verify container_attribute: read, expect: %s", expect_read) - self.verify_cont_rw_attribute("read", expect_read, attribute_name) + self.verify_cont_rw_attribute(self.container, "read", expect_read, attribute_name) # (4)Verify container permissions tT rw-property permission_type = "property" @@ -119,16 +118,17 @@ def test_container_user_acl(self): "(4.1)Update container-acl %s, %s, permission_type: %s with %s", test_user_type, test_user, permission_type, cont_permission) self.setup_container_acl_and_permission( - test_user_type, test_user, permission_type, cont_permission) + self.container, test_user_type, test_user, permission_type, cont_permission) self.log.info("(4.2)Verify container_attribute: read, expect: %s", expect_read) - self.verify_cont_rw_property("read", expect_read) + self.verify_cont_rw_property(self.container, "read", expect_read) self.log.info("(4.3)Verify container_attribute: write, expect: %s", expect_write) - self.verify_cont_rw_property("write", expect_write, property_name, property_value) + self.verify_cont_rw_property( + self.container, "write", expect_write, property_name, property_value) # Update container label so it is in-sync if expect_write == "pass" and property_name.lower() == "label": self.container.label.update(property_value) self.log.info("(4.4)Verify container_attribute: read, expect: %s", expect_read) - self.verify_cont_rw_property("read", expect_read) + self.verify_cont_rw_property(self.container, "read", expect_read) # (5)Verify container permissions aA, rw-acl permission_type = "acl" @@ -138,13 +138,13 @@ def test_container_user_acl(self): test_user_type, test_user, permission_type, cont_permission) expect = "pass" # User who created the container has full acl access. self.setup_container_acl_and_permission( - test_user_type, test_user, permission_type, cont_permission) + self.container, test_user_type, test_user, permission_type, cont_permission) self.log.info("(5.2)Verify container_acl: write, expect: %s", expect) self.verify_cont_rw_acl( - "write", expect, secTestBase.acl_entry( + self.container, "write", expect, secTestBase.acl_entry( test_user_type, test_user, cont_permission)) self.log.info("(5.3)Verify container_acl: read, expect: %s", expect) - self.verify_cont_rw_acl("read", expect) + self.verify_cont_rw_acl(self.container, "read", expect) # (6)Verify container permission o, set-owner self.log.info("(6)==>Verify container permission o, set-owner") @@ -157,11 +157,10 @@ def test_container_user_acl(self): " %s with %s", test_user_type, test_user, permission_type, cont_permission) self.setup_container_acl_and_permission( - test_user_type, test_user, permission_type, cont_permission) - self.log.info("(6.2)Verify container_ownership: write, expect: %s", - expect) + self.container, test_user_type, test_user, permission_type, cont_permission) + self.log.info("(6.2)Verify container_ownership: write, expect: %s", expect) self.verify_cont_set_owner( - expect, new_test_user + "@", new_test_group + "@") + self.container, expect, new_test_user + "@", new_test_group + "@") # Verify container permission A acl-write after set container # to a different owner. @@ -170,7 +169,7 @@ def test_container_user_acl(self): expect = "deny" self.log.info("(6.3)Verify container_acl write after changed " "ownership: expect: %s", expect) - self.verify_cont_rw_acl("write", expect, + self.verify_cont_rw_acl(self.container, "write", expect, secTestBase.acl_entry( test_user_type, test_user, cont_permission)) @@ -188,12 +187,12 @@ def test_container_user_acl(self): p_permission = "rct" if cont_permission == "": expect = "deny" - self.update_container_acl(secTestBase.acl_entry(test_user_type, - test_user, - c_permission)) + with self.container.no_exception(): + self.container.update_acl( + entry=secTestBase.acl_entry(test_user_type, test_user, c_permission)) self.update_pool_acl_entry( "update", secTestBase.acl_entry("user", "OWNER", p_permission)) - self.verify_cont_delete(expect) + self.verify_cont_delete(self.container, expect) if expect == "pass": # Container deleted self.container = None diff --git a/src/tests/ftest/security/cont_create_acl.py b/src/tests/ftest/security/cont_create_acl.py index d8712307b10..136adeade77 100644 --- a/src/tests/ftest/security/cont_create_acl.py +++ b/src/tests/ftest/security/cont_create_acl.py @@ -42,18 +42,18 @@ def test_container_basics(self): # Getting the default ACL list expected_acl = generate_acl_file("default", acl_args) - # 1. Create a pool and obtain its UUID + # 1. Create a pool self.log.info("===> Creating a pool with no ACL file passed") - pool_uuid = self.create_pool_with_dmg() + pool = self.get_pool() # 2. Create a container with no ACL file passed self.log.info("===> Creating a container with no ACL file passed") - self.container_uuid = self.create_container_with_daos(self.pool) + container = self.create_container_with_daos(pool) - if not self.container_uuid: + if not container: self.fail(" An expected container could not be created") - cont_acl = self.get_container_acl_list(pool_uuid, self.container_uuid) + cont_acl = self.get_container_acl_list(container) if not self.compare_acl_lists(cont_acl, expected_acl): self.fail(" ACL permissions mismatch:\n\t \ Container ACL: {}\n\tExpected ACL: {}".format(cont_acl, expected_acl)) @@ -62,12 +62,9 @@ def test_container_basics(self): # 3. Destroy the container self.log.info("===> Destroying the container") - result = self.destroy_containers(self.container) + result = self.destroy_containers(container) if result: - self.fail(" Unable to destroy container '{}'".format( - self.container_uuid)) - else: - self.container_uuid = None + self.fail(" Unable to destroy container {}".format(str(container))) # Create a valid ACL file self.log.info("===> Generating a valid ACL file") @@ -75,13 +72,12 @@ def test_container_basics(self): # 4. Create a container with a valid ACL file passed self.log.info("===> Creating a container with an ACL file passed") - self.container_uuid = self.create_container_with_daos( - self.pool, "valid") + container = self.create_container_with_daos(pool, "valid") - if not self.container_uuid: + if not container: self.fail(" An expected container could not be created") - cont_acl = self.get_container_acl_list(pool_uuid, self.container_uuid, True) + cont_acl = self.get_container_acl_list(container, True) if not self.compare_acl_lists(cont_acl, expected_acl): self.fail(" ACL permissions mismatch:\n\t \ Container ACL: {}\n\tExpected ACL: {}".format(cont_acl, expected_acl)) @@ -90,12 +86,9 @@ def test_container_basics(self): # 5. Destroy the container self.log.info("===> Destroying the container") - result = self.destroy_containers(self.container) + result = self.destroy_containers(container) if result: - self.fail(" Unable to destroy container '{}'".format( - self.container_uuid)) - else: - self.container_uuid = None + self.fail(" Unable to destroy container {}".format(str(container))) # Create an invalid ACL file self.log.info("===> Generating an invalid ACL file") @@ -103,13 +96,10 @@ def test_container_basics(self): # 6. Create a container with an invalid ACL file passed self.log.info("===> Creating a container with invalid ACL file passed") - self.container_uuid = self.create_container_with_daos( - self.pool, "invalid") + container = self.create_container_with_daos(pool, "invalid") - if self.container_uuid: - self.fail( - " Did not expect the container '{}' to be created".format( - self.container_uuid)) + if container: + self.fail("Did not expect the container {} to be created".format(str(container))) # 7. Cleanup environment self.log.info("===> Cleaning the environment") diff --git a/src/tests/ftest/security/cont_delete_acl.py b/src/tests/ftest/security/cont_delete_acl.py index 5c4ba8c2af1..3415c7e2b6a 100644 --- a/src/tests/ftest/security/cont_delete_acl.py +++ b/src/tests/ftest/security/cont_delete_acl.py @@ -20,13 +20,10 @@ class DeleteContainerACLTest(ContSecurityTestBase): def setUp(self): """Set up each test case.""" super().setUp() - self.daos_cmd = self.get_daos_command() - self.prepare_pool() - self.add_container(self.pool) - + self.pool = self.get_pool() + self.container = self.get_container(self.pool) # Get list of ACL entries - cont_acl = self.get_container_acl_list( - self.pool.uuid, self.container.uuid) + cont_acl = self.get_container_acl_list(self.container) # Get principals self.principals_table = {} @@ -48,17 +45,13 @@ def test_acl_delete_invalid_inputs(self): # Get list of invalid ACL principal values invalid_principals = self.params.get("invalid_principals", "/run/*") - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False - # Check for failure on invalid inputs. test_errs = [] for principal in invalid_principals: - self.daos_cmd.container_delete_acl( - self.pool.uuid, - self.container.uuid, - principal) - test_errs.extend(self.error_handling(self.daos_cmd.result, "-1003")) + # Disable raising an exception if the daos command fails + with self.container.no_exception(): + result = self.container.delete_acl(principal) + test_errs.extend(self.error_handling(result, "-1003")) if test_errs: self.fail("container delete-acl command expected to fail: \ {}".format("\n".join(test_errs))) @@ -77,14 +70,11 @@ def test_delete_valid_acl(self): :avocado: tags=DeleteContainerACLTest,test_delete_valid_acl """ for principal, entry in self.principals_table.items(): - self.daos_cmd.container_delete_acl( - self.pool.uuid, - self.container.uuid, - principal) - if entry in self.daos_cmd.result.stdout_text: + result = self.container.delete_acl(principal) + if entry in result.stdout_text: self.fail( "Found acl that was to be deleted in output: {}".format( - self.daos_cmd.result.stdout_text)) + result.stdout_text)) def test_cont_delete_acl_no_perm(self): """ @@ -99,24 +89,18 @@ def test_cont_delete_acl_no_perm(self): :avocado: tags=DeleteContainerACLTest,test_cont_delete_acl_no_perm """ # Let's give access to the pool to the root user - self.get_dmg_command().pool_update_acl( - self.pool.uuid, entry="A::EVERYONE@:rw") - - # The root user shouldn't have access to deleting container ACL entries - self.daos_cmd.sudo = True - - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False + self.pool.update_acl(False, entry="A::EVERYONE@:rw") # Let's check that we can't run as root (or other user) and delete # entries if no permissions are set for that user. test_errs = [] for principal in self.principals_table: - self.daos_cmd.container_delete_acl( - self.pool.uuid, - self.container.uuid, - principal) - test_errs.extend(self.error_handling(self.daos_cmd.result, "-1001")) + # The root user shouldn't have access to deleting container ACL entries + with self.container.as_user('root'): + # Disable raising an exception if the daos command fails + with self.container.no_exception(): + result = self.container.delete_acl(principal) + test_errs.extend(self.error_handling(result, "-1001")) if test_errs: self.fail("container delete-acl command expected to fail: \ {}".format("\n".join(test_errs))) diff --git a/src/tests/ftest/security/cont_get_acl.py b/src/tests/ftest/security/cont_get_acl.py index 20fd5cb6950..86a675fab5d 100644 --- a/src/tests/ftest/security/cont_get_acl.py +++ b/src/tests/ftest/security/cont_get_acl.py @@ -20,17 +20,9 @@ class GetContainerACLTest(ContSecurityTestBase): :avocado: recursive """ - def setUp(self): - """Set up each test case.""" - super().setUp() - self.daos_cmd = self.get_daos_command() - self.prepare_pool() - self.add_container(self.pool) - @fail_on(CommandFailure) def test_get_acl_valid(self): - """ - JIRA ID: DAOS-3705 + """JIRA ID: DAOS-3705 Test Description: Test that container get-acl command performs as expected with valid inputs and verify that we can't overwrite @@ -41,35 +33,29 @@ def test_get_acl_valid(self): :avocado: tags=security,container,container_acl,daos_cmd :avocado: tags=GetContainerACLTest,test_get_acl_valid """ + self.pool = self.get_pool() + self.container = self.get_container(self.pool) + test_errs = [] for verbose in [True, False]: for outfile in self.params.get("valid_out_filename", "/run/*"): path_to_file = os.path.join( self.tmp, "{}_{}".format(outfile, verbose)) - # Enable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False - self.daos_cmd.container_get_acl( - self.pool.uuid, - self.container.uuid, - verbose=verbose, - outfile=path_to_file) + # Disable raising an exception if the daos command fails + with self.container.no_exception(): + self.container.get_acl(verbose, path_to_file) # Verify consistency of acl obtained through the file file_acl = read_acl_file(path_to_file) - self.acl_file_diff(file_acl) + self.acl_file_diff(self.container, file_acl) # Let's verify that we can't overwrite an already existing file # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False - self.daos_cmd.container_get_acl( - self.pool.uuid, - self.container.uuid, - verbose=verbose, - outfile=path_to_file) + with self.container.no_exception(): + self.container.get_acl(verbose, path_to_file) test_errs.extend( - self.error_handling( - self.daos_cmd.result, "ile exists")) + self.error_handling(self.container.daos.result, "file exists")) if test_errs: self.fail("container get-acl command expected to fail: \ @@ -87,24 +73,20 @@ def test_cont_get_acl_no_perm(self): :avocado: tags=security,container,container_acl,daos_cmd :avocado: tags=GetContainerACLTest,test_cont_get_acl_no_perm """ - # Let's give access to the pool to the root user - self.get_dmg_command().pool_update_acl( - self.pool.uuid, entry="A::EVERYONE@:rw") + self.pool = self.get_pool() + self.container = self.get_container(self.pool) - # The root user shouldn't have access to getting container ACL entries - self.daos_cmd.sudo = True - - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False + # Let's give access to the pool to the root user + self.pool.update_acl(False, entry="A::EVERYONE@:rw") # Let's check that we can't run as root (or other user) and get # acl information if no permissions are set for that user. - test_errs = [] - self.daos_cmd.container_get_acl( - self.pool.uuid, - self.container.uuid, - outfile="outfile.txt") - test_errs.extend(self.error_handling(self.daos_cmd.result, "-1001")) + # The root user shouldn't have access to getting container ACL entries + with self.container.as_user('root'): + # Disable raising an exception if the daos command fails + with self.container.no_exception(): + self.container.get_acl(outfile="outfile.txt") + test_errs = self.error_handling(self.container.daos.result, "-1001") if test_errs: self.fail("container get-acl command expected to fail: \ diff --git a/src/tests/ftest/security/cont_overwrite_acl.py b/src/tests/ftest/security/cont_overwrite_acl.py index 786f66c96bf..9a494c58ebf 100644 --- a/src/tests/ftest/security/cont_overwrite_acl.py +++ b/src/tests/ftest/security/cont_overwrite_acl.py @@ -24,13 +24,11 @@ def setUp(self): """Set up each test case.""" super().setUp() self.acl_filename = "test_overwrite_acl_file.txt" - self.daos_cmd = self.get_daos_command() - self.prepare_pool() - self.add_container(self.pool) + self.pool = self.get_pool() + self.container = self.get_container(self.pool) # List of ACL entries - self.cont_acl = self.get_container_acl_list( - self.pool.uuid, self.container.uuid) + self.cont_acl = self.get_container_acl_list(self.container) def test_acl_overwrite_invalid_inputs(self): """ @@ -48,23 +46,18 @@ def test_acl_overwrite_invalid_inputs(self): # Get list of invalid ACL principal values invalid_acl_filename = self.params.get("invalid_acl_filename", "/run/*") - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False - # Check for failure on invalid inputs test_errs = [] for acl_file in invalid_acl_filename: # Run overwrite command - self.daos_cmd.container_overwrite_acl( - self.pool.uuid, - self.container.uuid, - acl_file) - test_errs.extend(self.error_handling( - self.daos_cmd.result, "no such file or directory")) + # Disable raising an exception if the daos command fails + with self.container.no_exception(): + result = self.container.overwrite_acl(acl_file) + test_errs.extend(self.error_handling(result, "no such file or directory")) # Check that the acl file was unchanged - self.acl_file_diff(self.cont_acl) + self.acl_file_diff(self.container, self.cont_acl) if test_errs: self.fail("container overwrite-acl command expected to fail: \ @@ -87,9 +80,6 @@ def test_overwrite_invalid_acl_file(self): "invalid_acl_file_content", "/run/*") path_to_file = os.path.join(self.tmp, self.acl_filename) - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False - test_errs = [] for content in invalid_file_content: create_acl_file(path_to_file, content) @@ -98,14 +88,13 @@ def test_overwrite_invalid_acl_file(self): exp_err = "no entries" # Run overwrite command - self.daos_cmd.container_overwrite_acl( - self.pool.uuid, - self.container.uuid, - path_to_file) - test_errs.extend(self.error_handling(self.daos_cmd.result, exp_err)) + # Disable raising an exception if the daos command fails + with self.container.no_exception(): + result = self.container.overwrite_acl(path_to_file) + test_errs.extend(self.error_handling(result, exp_err)) # Check that the acl file was unchanged - self.acl_file_diff(self.cont_acl) + self.acl_file_diff(self.container, self.cont_acl) if test_errs: self.fail("container overwrite-acl command expected to fail: \ @@ -127,19 +116,15 @@ def test_overwrite_valid_acl_file(self): valid_file_acl = self.params.get("valid_acl_file", "/run/*") path_to_file = os.path.join(self.tmp, self.acl_filename) - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False - - # Run overwrite command, test will fail if command fails. for content in valid_file_acl: create_acl_file(path_to_file, content) - self.daos_cmd.container_overwrite_acl( - self.pool.uuid, - self.container.uuid, - path_to_file) + # Run overwrite command + # Disable raising an exception if the daos command fails + with self.container.no_exception(): + self.container.overwrite_acl(path_to_file) # Check that the acl file was unchanged - self.acl_file_diff(content) + self.acl_file_diff(self.container, content) def test_cont_overwrite_acl_no_perm(self): """ @@ -157,29 +142,23 @@ def test_cont_overwrite_acl_no_perm(self): path_to_file = os.path.join(self.tmp, self.acl_filename) # Let's give access to the pool to the root user - self.get_dmg_command().pool_update_acl( - self.pool.uuid, entry="A::EVERYONE@:rw") - - # The root user shouldn't have access to deleting container ACL entries - self.daos_cmd.sudo = True - - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False + self.pool.update_acl(False, entry="A::EVERYONE@:rw") # Let's check that we can't run as root (or other user) and overwrite # entries if no permissions are set for that user. test_errs = [] for content in valid_file_content: create_acl_file(path_to_file, content) - self.daos_cmd.container_overwrite_acl( - self.pool.uuid, - self.container.uuid, - path_to_file) - test_errs.extend(self.error_handling(self.daos_cmd.result, "-1001")) + # Run overwrite command + # The root user shouldn't have access to deleting container ACL entries + with self.container.as_user('root'): + # Disable raising an exception if the daos command fails + with self.container.no_exception(): + result = self.container.overwrite_acl(path_to_file) + test_errs.extend(self.error_handling(result, "-1001")) # Check that the acl was unchanged. - post_test_acls = self.get_container_acl_list( - self.pool.uuid, self.container.uuid) + post_test_acls = self.get_container_acl_list(self.container) if not self.compare_acl_lists(self.cont_acl, post_test_acls): self.fail("Previous ACL:\n{} Post command ACL:{}".format( self.cont_acl, post_test_acls)) diff --git a/src/tests/ftest/security/cont_update_acl.py b/src/tests/ftest/security/cont_update_acl.py index 21aa1bf31f2..c0f971dd260 100644 --- a/src/tests/ftest/security/cont_update_acl.py +++ b/src/tests/ftest/security/cont_update_acl.py @@ -21,13 +21,11 @@ def setUp(self): """Set up each test case.""" super().setUp() self.acl_filename = "test_acl_file.txt" - self.daos_cmd = self.get_daos_command() - self.prepare_pool() - self.add_container(self.pool) + self.pool = self.get_pool() + self.container = self.get_container(self.pool) # List of ACL entries - self.cont_acl = self.get_container_acl_list( - self.pool.uuid, self.container.uuid) + self.cont_acl = self.get_container_acl_list(self.container) def test_acl_update_invalid_inputs(self): """ @@ -46,34 +44,25 @@ def test_acl_update_invalid_inputs(self): invalid_entries = self.params.get("invalid_acl_entries", "/run/*") invalid_filename = self.params.get("invalid_acl_filename", "/run/*") - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False - test_errs = [] for entry in invalid_entries: - # Run update command - self.daos_cmd.container_update_acl( - self.pool.uuid, - self.container.uuid, - entry=entry) - test_errs.extend(self.error_handling(self.daos_cmd.result, "-1003")) + with self.container.no_exception(): + self.container.update_acl(entry=entry) + test_errs.extend(self.error_handling(self.container.daos.result, "-1003")) # Check that the acl file was unchanged - self.acl_file_diff(self.cont_acl) + self.acl_file_diff(self.container, self.cont_acl) for acl_file in invalid_filename: - # Run update command - self.daos_cmd.container_update_acl( - self.pool.uuid, - self.container.uuid, - acl_file=acl_file) + with self.container.no_exception(): + self.container.update_acl(acl_file=acl_file) test_errs.extend(self.error_handling( - self.daos_cmd.result, "no such file or directory")) + self.container.daos.result, "no such file or directory")) # Check that the acl file was unchanged - self.acl_file_diff(self.cont_acl) + self.acl_file_diff(self.container, self.cont_acl) if test_errs: self.fail("container update-acl command expected to fail: \ @@ -94,9 +83,6 @@ def test_update_invalid_acl(self): "invalid_acl_file_content", "/run/*") path_to_file = os.path.join(self.tmp, self.acl_filename) - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False - test_errs = [] for content in invalid_file_content: create_acl_file(path_to_file, content) @@ -105,14 +91,12 @@ def test_update_invalid_acl(self): exp_err = "no entries" # Run update command - self.daos_cmd.container_update_acl( - self.pool.uuid, - self.container.uuid, - acl_file=path_to_file) - test_errs.extend(self.error_handling(self.daos_cmd.result, exp_err)) + with self.container.no_exception(): + self.container.update_acl(acl_file=path_to_file) + test_errs.extend(self.error_handling(self.container.daos.result, exp_err)) # Check that the acl file was unchanged - self.acl_file_diff(self.cont_acl) + self.acl_file_diff(self.container, self.cont_acl) if test_errs: self.fail("container update-acl command expected to fail: \ @@ -139,39 +123,30 @@ def test_update_acl_file(self): create_acl_file(path_to_file, self.cont_acl + ace_to_add) # Run update command - self.daos_cmd.container_update_acl( - self.pool.uuid, - self.container.uuid, - acl_file=path_to_file) + self.container.update_acl(acl_file=path_to_file) # Verify that the entry added did not affect any other entry - self.acl_file_diff(self.cont_acl + ace_to_add) + self.acl_file_diff(self.container, self.cont_acl + ace_to_add) # Let's add a file with existing principals and verify overridden values ace_to_add_2 = ["A:G:my_great_test@:rwd", "A::my_new_principal@:rw"] create_acl_file(path_to_file, ace_to_add_2) # Run update command - self.daos_cmd.container_update_acl( - self.pool.uuid, - self.container.uuid, - acl_file=path_to_file) + self.container.update_acl(acl_file=path_to_file) # Verify that the ACL file is now composed of the updated ACEs - self.acl_file_diff(self.cont_acl + ace_to_add_2) + self.acl_file_diff(self.container, self.cont_acl + ace_to_add_2) # Lastly, let's add a file that contains only new principals ace_to_add_3 = ["A:G:new_new_principal@:rwd", "A::last_one@:rw"] create_acl_file(path_to_file, ace_to_add_3) # Run update command - self.daos_cmd.container_update_acl( - self.pool.uuid, - self.container.uuid, - acl_file=path_to_file) + self.container.update_acl(acl_file=path_to_file) # Verify that the ACL file is now composed of the updated ACEs - self.acl_file_diff(self.cont_acl + ace_to_add_2 + ace_to_add_3) + self.acl_file_diff(self.container, self.cont_acl + ace_to_add_2 + ace_to_add_3) def test_update_cont_acl_no_perm(self): """ @@ -189,28 +164,22 @@ def test_update_cont_acl_no_perm(self): path_to_file = os.path.join(self.tmp, self.acl_filename) # Let's give access to the pool to the root user - self.get_dmg_command().pool_update_acl( - self.pool.uuid, entry="A::EVERYONE@:rw") - - # The root user shouldn't have access to updating container ACL entries - self.daos_cmd.sudo = True - - # Disable raising an exception if the daos command fails - self.daos_cmd.exit_status_exception = False + self.pool.update_acl(False, entry="A::EVERYONE@:rw") # Let's check that we can't run as root (or other user) and update # entries if no permissions are set for that user. test_errs = [] for content in valid_file_content: create_acl_file(path_to_file, content) - self.daos_cmd.container_update_acl( - self.pool.uuid, - self.container.uuid, - acl_file=path_to_file) - test_errs.extend(self.error_handling(self.daos_cmd.result, "-1001")) + # Disable raising an exception if the daos command fails + with self.container.no_exception(): + # The root user shouldn't have access to updating container ACL entries + with self.container.as_user('root'): + self.container.update_acl(acl_file=path_to_file) + test_errs.extend(self.error_handling(self.container.daos.result, "-1001")) # Check that the acl file was unchanged - self.acl_file_diff(self.cont_acl) + self.acl_file_diff(self.container, self.cont_acl) if test_errs: self.fail("container update-acl command expected to fail: \ diff --git a/src/tests/ftest/security/pool_acl.py b/src/tests/ftest/security/pool_acl.py index a60ad76ffd6..d46635083b4 100644 --- a/src/tests/ftest/security/pool_acl.py +++ b/src/tests/ftest/security/pool_acl.py @@ -7,7 +7,7 @@ import pwd import grp -import security_test_base as secTestBase +from security_test_base import acl_entry from pool_security_test_base import PoolSecurityTestBase PERMISSIONS = ["", "r", "w", "rw"] @@ -53,11 +53,9 @@ def test_daos_pool_acl_enforcement(self): user_types = ["owner", "user", "ownergroup", "group", "everyone"] default_acl_entries = ["A::OWNER@:", - secTestBase.acl_entry("user", current_user, "", - PERMISSIONS), + acl_entry("user", current_user, "", PERMISSIONS), "A:G:GROUP@:", - secTestBase.acl_entry("group", current_group, "", - PERMISSIONS), + acl_entry("group", current_group, "", PERMISSIONS), "A::EVERYONE@:"] test_acl_entries = ["", "", "", "", ""] diff --git a/src/tests/ftest/security/pool_groups.py b/src/tests/ftest/security/pool_groups.py index 2fab929b9a7..fe02295b75d 100644 --- a/src/tests/ftest/security/pool_groups.py +++ b/src/tests/ftest/security/pool_groups.py @@ -6,7 +6,7 @@ import os import grp -import security_test_base as secTestBase +from security_test_base import acl_entry from pool_security_test_base import PoolSecurityTestBase PERMISSIONS = ["", "r", "w", "rw"] @@ -49,8 +49,7 @@ def test_daos_pool_acl_groups(self): read, write = self.params.get( "pg_read_write", "/run/pool_acl/primary_secondary_group_test/*") acl_entries = ["", "", "", - secTestBase.acl_entry("group", current_group, primary_grp_perm, - PERMISSIONS), ""] + acl_entry("group", current_group, primary_grp_perm, PERMISSIONS), ""] if primary_grp_perm.lower() == "none": primary_grp_perm = "" if primary_grp_perm not in PERMISSIONS: diff --git a/src/tests/ftest/server/multiengine_persocket.py b/src/tests/ftest/server/multiengine_persocket.py index 299908f1725..f838610e91c 100644 --- a/src/tests/ftest/server/multiengine_persocket.py +++ b/src/tests/ftest/server/multiengine_persocket.py @@ -281,21 +281,14 @@ def test_multiengines_per_socket(self): self.log.info("===(%s)===Container create and attributes test", step) self.add_container(self.pool) self.container.open() - daos_cmd = self.get_daos_command() num_attributes = self.params.get("num_attributes", '/run/attrtests/*') attr_dict = self.create_data_set(num_attributes) try: self.container.container.set_attr(data=attr_dict) - data = daos_cmd.container_list_attrs( - pool=self.pool.uuid, - cont=self.container.uuid, - verbose=False) + data = self.container.list_attrs(verbose=False) self.verify_list_attr(attr_dict, data['response']) - data = daos_cmd.container_list_attrs( - pool=self.pool.uuid, - cont=self.container.uuid, - verbose=True) + data = self.container.list_attrs(verbose=True) self.verify_get_attr(attr_dict, data['response']) except DaosApiError as excep: self.log.info(excep) diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index df4b1a72a58..0744dc21ad9 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -177,6 +177,18 @@ def no_exception(self): yield self.exit_status_exception = original_value + @contextlib.contextmanager + def as_user(self, user): + """Temporarily run commands as a different user. + + Args: + user (str): the user to temporarily run as + """ + original_value = self.run_user + self.run_user = user + yield + self.run_user = original_value + def run(self, raise_exception=None): """Run the command. diff --git a/src/tests/ftest/util/cont_security_test_base.py b/src/tests/ftest/util/cont_security_test_base.py index d99216e83e9..6bfb1ef9801 100644 --- a/src/tests/ftest/util/cont_security_test_base.py +++ b/src/tests/ftest/util/cont_security_test_base.py @@ -8,15 +8,12 @@ import grp import re -from avocado import fail_on from avocado.core.exceptions import TestFail from apricot import TestWithServers -from daos_utils import DaosCommand -from exception_utils import CommandFailure import general_utils from general_utils import DaosTestError -import security_test_base as secTestBase +from security_test_base import acl_entry class ContSecurityTestBase(TestWithServers): @@ -33,87 +30,49 @@ def __init__(self, *args, **kwargs): """Initialize a ContSecurityTestBase object.""" super().__init__(*args, **kwargs) self.dmg = None - self.daos_tool = None - self.user_uid = None - self.user_gid = None - self.current_user = None - self.current_group = None - self.container_uuid = None - - def setUp(self): - """Set up each test case.""" - super().setUp() self.user_uid = os.geteuid() self.user_gid = os.getegid() self.current_user = pwd.getpwuid(self.user_uid)[0] self.current_group = grp.getgrgid(self.user_uid)[0] - self.co_prop = self.params.get("container_properties", - "/run/container/*") - self.dmg = self.get_dmg_command() - self.daos_tool = DaosCommand(self.bin) - - @fail_on(CommandFailure) - def create_pool_with_dmg(self): - """Create a pool with the dmg tool. - - Obtains the pool uuid from the operation's result - - Returns: - pool_uuid (str): Pool UUID, randomly generated. - """ - self.prepare_pool() - pool_uuid = self.pool.pool.get_uuid_str() - return pool_uuid + def setUp(self): + """Set up each test case.""" + super().setUp() + self.co_prop = self.params.get("container_properties", "/run/container/*") + self.dmg = self.get_dmg_command() def create_container_with_daos(self, pool, acl_type=None, acl_file=None): """Create a container with the daos tool. - Also, obtains the container uuid from the operation's result. - Args: pool (TestPool): Pool object. acl_type (str, optional): valid or invalid. + acl_file (str, optional): acl file Returns: - container_uuid: Container UUID created. + TestContainer: the new container """ - file_name = None - get_acl_file = None expected_acl_types = [None, "valid", "invalid"] if acl_file is None: if acl_type not in expected_acl_types: - self.fail( - " Invalid '{}' acl type passed.".format(acl_type)) + self.fail("Invalid '{}' acl type passed.".format(acl_type)) if acl_type: - get_acl_file = "acl_{}.txt".format(acl_type) - file_name = os.path.join(self.tmp, get_acl_file) - else: - get_acl_file = "" - else: - file_name = acl_file + acl_file = os.path.join(self.tmp, "acl_{}.txt".format(acl_type)) try: - self.container = self.get_container(pool, create=False, daos_command=self.daos_tool) - self.container.create(acl_file=file_name) - container_uuid = self.container.uuid + return self.get_container(pool, acl_file=acl_file) except TestFail as error: if acl_type != "invalid": - raise DaosTestError( - "Could not create expected container ") from error - container_uuid = None + raise DaosTestError("Could not create expected container ") from error + return None - return container_uuid - - def get_container_acl_list(self, pool_uuid, container_uuid, - verbose=False, outfile=None): + def get_container_acl_list(self, container, verbose=False, outfile=None): """Get daos container acl list by daos container get-acl. Args: - pool_uuid (str): Pool uuid. - container_uuid (str): Container uuid. + container (TestContainer): the container. verbose (bool, optional): Verbose mode. outfile (str, optional): Write ACL to file @@ -121,17 +80,7 @@ def get_container_acl_list(self, pool_uuid, container_uuid, cont_permission_list: daos container acl list. """ - if not general_utils.check_uuid_format(pool_uuid): - self.fail( - " Invalid Pool UUID '{}' provided.".format(pool_uuid)) - - if not general_utils.check_uuid_format(container_uuid): - self.fail( - " Invalid Container UUID '{}' provided.".format( - container_uuid)) - - result = self.daos_tool.container_get_acl(pool_uuid, container_uuid, - verbose, outfile) + result = container.get_acl(verbose, outfile) cont_permission_list = [] for line in result.stdout_text.splitlines(): @@ -147,150 +96,6 @@ def get_container_acl_list(self, pool_uuid, container_uuid, cont_permission_list.append(line) return cont_permission_list - def overwrite_container_acl(self, acl_file): - """Overwrite existing container acl-entries with acl_file. - - Args: - acl_file (str): acl filename. - - Return: - result (str): daos_tool.container_overwrite_acl. - """ - self.daos_tool.exit_status_exception = False - result = self.daos_tool.container_overwrite_acl( - self.pool.uuid, self.container_uuid, acl_file) - return result - - def update_container_acl(self, entry): - """Update container acl entry. - - Args: - entry (str): acl entry to be updated. - - Return: - result (str): daos_tool.container_update_acl. - """ - self.daos_tool.exit_status_exception = False - result = self.daos_tool.container_update_acl( - self.pool.uuid, self.container_uuid, entry=entry) - return result - - def destroy_test_container(self, pool, container): - """Test container destroy/delete. - - Args: - pool (str): pool label or UUID. - container (str): container label or UUID. - - Return: - result (str): daos_tool.container_destroy result. - """ - self.daos_tool.exit_status_exception = False - return self.daos_tool.container_destroy(pool, container, True) - - def set_container_attribute( - self, pool_uuid, container_uuid, attr, value): - """Write/Set container attribute. - - Args: - pool_uuid (str): pool uuid. - container_uuid (str): container uuid. - attr (str): container attribute. - value (str): container attribute value to be set. - - Return: - result (str): daos_tool.container_set_attr result. - """ - self.daos_tool.exit_status_exception = False - result = self.daos_tool.container_set_attr( - pool_uuid, container_uuid, attrs={attr: value}) - return result - - def get_container_attribute( - self, pool_uuid, container_uuid, attr): - """Get container attribute. - - Args: - pool_uuid (str): pool uuid. - container_uuid (str): container uuid. - attr (str): container attribute. - - Return: - CmdResult: Object that contains exit status, stdout, and other - information. - """ - self.daos_tool.exit_status_exception = False - self.daos_tool.container_get_attr( - pool_uuid, container_uuid, attr) - return self.daos_tool.result - - def list_container_attribute( - self, pool_uuid, container_uuid): - """List container attribute. - - Args: - pool_uuid (str): pool uuid. - container_uuid (str): container uuid. - - Return: - result (str): daos_tool.container_list_attrs result. - """ - self.daos_tool.exit_status_exception = False - result = self.daos_tool.container_list_attrs( - pool_uuid, container_uuid) - return result - - def set_container_property( - self, pool_uuid, container_uuid, prop, value): - """Write/Set container property. - - Args: - pool_uuid (str): pool uuid. - container_uuid (str): container uuid. - prop (str): container property name. - value (str): container property value to be set. - - Return: - result (str): daos_tool.container_set_prop result. - """ - self.daos_tool.exit_status_exception = False - result = self.daos_tool.container_set_prop( - pool_uuid, container_uuid, prop, value) - return result - - def get_container_property(self, pool_uuid, container_uuid): - """Get container property. - - Args: - pool_uuid (str): pool uuid. - container_uuid (str): container uuid. - - Return: - result (str): daos_tool.container_get_prop. - """ - self.daos_tool.exit_status_exception = False - result = self.daos_tool.container_get_prop( - pool_uuid, container_uuid) - return result - - def set_container_owner( - self, pool_uuid, container_uuid, user, group): - """Set container owner. - - Args: - pool_uuid (str): pool uuid. - container_uuid (str): container uuid. - user (str): container user-name to be set owner to. - group (str): container group-name to be set owner to. - - Return: - result (str): daos_tool.container_set_owner. - """ - self.daos_tool.exit_status_exception = False - result = self.daos_tool.container_set_owner( - pool_uuid, container_uuid, user, group) - return result - def compare_acl_lists(self, get_acl_list, expected_list): """Compare two permission lists. @@ -304,16 +109,7 @@ def compare_acl_lists(self, get_acl_list, expected_list): """ self.log.info(" ===> get-acl ACL: %s", get_acl_list) self.log.info(" ===> Expected ACL: %s", expected_list) - - exp_list = expected_list[:] - if len(get_acl_list) != len(exp_list): - return False - for acl in get_acl_list: - if acl in exp_list: - exp_list.remove(acl) - else: - return False - return True + return sorted(get_acl_list) == sorted(expected_list) def get_base_acl_entries(self, test_user): """Get container acl entries per cont enforcement order for test_user. @@ -327,39 +123,39 @@ def get_base_acl_entries(self, test_user): """ if test_user == "OWNER": base_acl_entries = [ - secTestBase.acl_entry("user", "OWNER", ""), - secTestBase.acl_entry("user", self.current_user, ""), - secTestBase.acl_entry("group", "GROUP", "rwcdtTaAo"), - secTestBase.acl_entry("group", self.current_group, "rwcdtTaAo"), - secTestBase.acl_entry("user", "EVERYONE", "rwcdtTaAo")] + acl_entry("user", "OWNER", ""), + acl_entry("user", self.current_user, ""), + acl_entry("group", "GROUP", "rwcdtTaAo"), + acl_entry("group", self.current_group, "rwcdtTaAo"), + acl_entry("user", "EVERYONE", "rwcdtTaAo")] elif test_user == "user": base_acl_entries = [ "", - secTestBase.acl_entry("user", self.current_user, ""), - secTestBase.acl_entry("group", "GROUP", "rwcdtTaAo"), - secTestBase.acl_entry("group", self.current_group, ""), - secTestBase.acl_entry("user", "EVERYONE", "rwcdtTaAo")] + acl_entry("user", self.current_user, ""), + acl_entry("group", "GROUP", "rwcdtTaAo"), + acl_entry("group", self.current_group, ""), + acl_entry("user", "EVERYONE", "rwcdtTaAo")] elif test_user == "group": base_acl_entries = [ "", "", - secTestBase.acl_entry("group", "GROUP", ""), - secTestBase.acl_entry("group", self.current_group, ""), - secTestBase.acl_entry("user", "EVERYONE", "rwcdtTaAo")] + acl_entry("group", "GROUP", ""), + acl_entry("group", self.current_group, ""), + acl_entry("user", "EVERYONE", "rwcdtTaAo")] elif test_user == "GROUP": base_acl_entries = [ "", "", "", - secTestBase.acl_entry("group", self.current_group, ""), - secTestBase.acl_entry("user", "EVERYONE", "rwcdtTaAo")] + acl_entry("group", self.current_group, ""), + acl_entry("user", "EVERYONE", "rwcdtTaAo")] elif test_user == "EVERYONE": base_acl_entries = [ "", "", "", "", - secTestBase.acl_entry("user", "EVERYONE", "")] + acl_entry("user", "EVERYONE", "")] else: base_acl_entries = ["", "", "", "", ""] return base_acl_entries @@ -405,7 +201,7 @@ def error_handling(self, results, err_msg): unexpected error: {}".format(results.command, results)) return test_errs - def acl_file_diff(self, prev_acl, flag=True): + def acl_file_diff(self, container, prev_acl, flag=True): """Compare current content of acl-file with helper function. If provided prev_acl file information is different from current acl @@ -413,6 +209,7 @@ def acl_file_diff(self, prev_acl, flag=True): fail in the case that the acl contents are found to have no difference. Args: + container (TestContainer): container for which to compare acl file. prev_acl (list): list of acl entries within acl-file. Defaults to True. flag (bool, optional): if True, test will fail when acl-file @@ -420,8 +217,6 @@ def acl_file_diff(self, prev_acl, flag=True): contents are same. Defaults to True. """ - current_acl = self.get_container_acl_list( - self.pool.uuid, self.container.uuid) + current_acl = self.get_container_acl_list(container) if self.compare_acl_lists(prev_acl, current_acl) != flag: - self.fail("Previous ACL:\n{} \nPost command ACL:\n{}".format( - prev_acl, current_acl)) + self.fail("Previous ACL:\n{} \nPost command ACL:\n{}".format(prev_acl, current_acl)) diff --git a/src/tests/ftest/util/daos_utils.py b/src/tests/ftest/util/daos_utils.py index c1d9b30d5ea..429945f8b00 100644 --- a/src/tests/ftest/util/daos_utils.py +++ b/src/tests/ftest/util/daos_utils.py @@ -623,7 +623,7 @@ def container_get_attr(self, pool, cont, attr, sys_name=None): Args: pool (str): pool UUID or label cont (str): container UUID or label - attr (str): attribute name + attr (str/list): single attribute name or list of names sys_name (str, optional): DAOS system name context for servers. Defaults to None. @@ -634,30 +634,11 @@ def container_get_attr(self, pool, cont, attr, sys_name=None): CommandFailure: if the daos get-attr command fails. """ + if isinstance(attr, (list, tuple)): + attr = list_to_str(attr, ",") return self._get_json_result( ("container", "get-attr"), pool=pool, cont=cont, attr=attr, sys_name=sys_name) - def container_get_attrs(self, pool, cont, attrs, sys_name=None): - """Call daos container get-attr for multiple attributes. - - Args: - pool (str): Pool UUID. - cont (str): Container UUID. - attrs (list): Attribute names. - sys_name (str, optional): DAOS system name context for servers. - Defaults to None. - - Returns: - dict: the daos json command output converted to a python dictionary - - Raises: - CommandFailure: if the daos get-attr command fails. - - """ - return self._get_json_result( - ("container", "get-attr"), pool=pool, cont=cont, - attr=list_to_str(attrs, ","), sys_name=sys_name) - def container_list_attrs(self, pool, cont, sys_name=None, verbose=False): """Call daos container list-attrs. diff --git a/src/tests/ftest/util/general_utils.py b/src/tests/ftest/util/general_utils.py index 6174ed7a325..e724682f67b 100644 --- a/src/tests/ftest/util/general_utils.py +++ b/src/tests/ftest/util/general_utils.py @@ -889,20 +889,6 @@ def get_log_file(name): return os.path.join(os.environ.get("DAOS_TEST_LOG_DIR", "/tmp"), name) -def check_uuid_format(uuid): - """Check for a correct UUID format. - - Args: - uuid (str): Pool or Container UUID. - - Returns: - bool: status of valid or invalid uuid - - """ - pattern = re.compile("([0-9a-fA-F-]+)") - return bool(len(uuid) == 36 and pattern.match(uuid)) - - def get_numeric_list(numeric_range): """Convert a string of numeric ranges into an expanded list of integers. diff --git a/src/tests/ftest/util/pool_security_test_base.py b/src/tests/ftest/util/pool_security_test_base.py index 11ef268e561..4dc4250af77 100644 --- a/src/tests/ftest/util/pool_security_test_base.py +++ b/src/tests/ftest/util/pool_security_test_base.py @@ -146,21 +146,22 @@ def verify_daos_pool_cont_result(self, result, action, expect, err_code): " =Test Passed on verify_daos_pool %s expected error of %s.\n", action, expect) - def verify_cont_rw_attribute(self, action, expect, attribute, value=None): + def verify_cont_rw_attribute(self, container, action, expect, attribute, value=None): """verify container rw attribute. Args: + container (TestContainer): container to verify. action (str): daos pool read or write. expect (str): expecting pass or deny. attribute (str): Container attribute to be verified. value (str optional): Container attribute value to write. """ if action.lower() == "write": - result = self.set_container_attribute( - self.pool.uuid, self.container_uuid, attribute, value) + with container.no_exception(): + result = container.set_attr(attrs={attribute: value}) elif action.lower() == "read": - result = self.get_container_attribute( - self.pool.uuid, self.container_uuid, attribute) + with container.no_exception(): + result = container.get_attr(attr=attribute) else: self.fail( "##In verify_cont_rw_attribute, " @@ -170,60 +171,58 @@ def verify_cont_rw_attribute(self, action, expect, attribute, value=None): action, result) self.verify_daos_pool_cont_result(result, action, expect, DENY_ACCESS) - def verify_cont_rw_property( - self, action, expect, cont_property=None, value=None): + def verify_cont_rw_property(self, container, action, expect, cont_property=None, value=None): """verify container rw property. Args: + container (TestContainer): container to verify. action (str): daos container read or write. expect (str): expecting pass or deny. cont_property (str optional): Container property to be verified. value (str optional): Container property value to write. """ if action.lower() == "write": - result = self.set_container_property( - self.pool.uuid, self.container_uuid, cont_property, value) + with container.no_exception(): + result = container.set_prop(prop=cont_property, value=value) elif action.lower() == "read": - result = self.get_container_property( - self.pool.uuid, self.container_uuid) + with container.no_exception(): + result = container.get_prop() else: - self.fail( - "##In verify_cont_rw_property, " - "invalid action: {}".format(action)) + self.fail("##In verify_cont_rw_property, invalid action: {}".format(action)) self.log.info( - " In verify_cont_rw_property %s.\n =daos_cmd.run() result:\n%s", - action, result) + " In verify_cont_rw_property %s.\n =daos_cmd.run() result:\n%s", action, result) self.verify_daos_pool_cont_result(result, action, expect, DENY_ACCESS) - def verify_cont_set_owner(self, expect, user, group): + def verify_cont_set_owner(self, container, expect, user, group): """verify container set owner. Args: + container (TestContainer): container to verify. expect (str): expecting pass or deny. user (str): New user to be set. group (str): New group to be set. """ action = "set" - result = self.set_container_owner( - self.pool.uuid, self.container_uuid, user, group) + with container.no_exception(): + result = container.set_owner(user, group) self.log.info( - " In verify_cont_set_owner %s.\n =daos_cmd.run() result:\n%s", - action, result) + " In verify_cont_set_owner %s.\n =daos_cmd.run() result:\n%s", action, result) self.verify_daos_pool_cont_result(result, action, expect, DENY_ACCESS) - def verify_cont_rw_acl(self, action, expect, entry=None): + def verify_cont_rw_acl(self, container, action, expect, entry=None): """verify container rw acl. Args: + container (TestContainer): container to verify action (str): daos container read or write. expect (str): expecting pass or deny. entry (str optional): New ace entry to be write. """ if action.lower() == "write": - result = self.update_container_acl(entry) + with container.no_exception(): + result = container.update_acl(entry=entry) elif action.lower() == "read": - result = self.get_container_acl_list( - self.pool.uuid, self.container_uuid) + result = self.get_container_acl_list(container) else: self.fail( "##In verify_cont_rw_acl, invalid action: {}".format(action)) @@ -245,39 +244,37 @@ def verify_cont_test_result(self, result, expect): """ if expect.lower() == 'pass': if DENY_ACCESS in result: - self.fail( - "##Test Fail on verify_cont_test_result, expected Pass, " - "but Failed.") + self.fail("##Test Fail on verify_cont_test_result, expected Pass, but Failed.") else: - self.log.info( - " =Test Passed on verify_cont_test_result Succeed.\n") + self.log.info(" =Test Passed on verify_cont_test_result Succeed.\n") elif DENY_ACCESS not in result: self.fail( - "##Test Fail on verify_cont_test_result, expected Failure of " - "-1001, but Passed.") + "##Test Fail on verify_cont_test_result, expected Failure of -1001, but Passed.") else: self.log.info( - " =Test Passed on verify_cont_test_result expected error of " - "denial error -1001.\n") + " =Test Passed on verify_cont_test_result expected error of denial error -1001.\n") - def verify_cont_delete(self, expect): + def verify_cont_delete(self, container, expect): """verify container delete. Args: + container (TestContainer): container to verify. expect (str): expecting pass or deny. """ action = "cont_delete" - result = self.destroy_test_container(self.pool.identifier, self.container.identifier) + with container.daos.no_exception(): + result = container.daos.container_destroy( + container.pool.identifier, container.identifier, True) self.log.info( - " In verify_cont_delete %s.\n =destroy_test_container() result:" - "\n%s", action, result) + " In verify_cont_delete %s.\n =container.destroy() result:\n%s", action, result) self.verify_daos_pool_cont_result(result, action, expect, DENY_ACCESS) def setup_container_acl_and_permission( - self, user_type, user_name, perm_type, perm_action): + self, container, user_type, user_name, perm_type, perm_action): """Setup container acl and permissions. Args: + container (TestContainer): container to setup. user_type (str): Container user_type. user_name (str): Container user_name. perm_type (str): Container permission type: @@ -303,12 +300,13 @@ def setup_container_acl_and_permission( self.log.info( "At setup_container_acl_and_permission, setup %s, %s, %s, with %s", user_type, user_name, perm_type, permission) - result = self.update_container_acl( - secTestBase.acl_entry(user_type, user_name, permission)) + with container.no_exception(): + result = container.update_acl( + entry=secTestBase.acl_entry(user_type, user_name, permission)) if result.stderr_text: self.fail( "##setup_container_acl_and_permission, fail on " - "update_container_acl, expected Pass, but Failed.") + "container.update_acl, expected Pass, but Failed.") def verify_pool_readwrite(self, pool, action, expect='Pass'): """Verify client is able to perform read or write on a pool. @@ -324,15 +322,14 @@ def verify_pool_readwrite(self, pool, action, expect='Pass'): """ deny_access = '-1001' daos_cmd = self.get_daos_command() - daos_cmd.exit_status_exception = False - if action.lower() == "write": - container = self.get_container(pool, create=False, daos_command=daos_cmd) - result = container.create() - elif action.lower() == "read": - result = daos_cmd.pool_query(pool.identifier) - else: - self.fail( - "##In verify_pool_readwrite, invalid action: {}".format(action)) + with daos_cmd.no_exception(): + if action.lower() == "write": + container = self.get_container(pool, create=False, daos_command=daos_cmd) + result = container.create() + elif action.lower() == "read": + result = daos_cmd.pool_query(pool.identifier) + else: + self.fail("##In verify_pool_readwrite, invalid action: {}".format(action)) self.log.info( " In verify_pool_readwrite %s.\n =daos_cmd.run() result:\n%s", action, result) @@ -502,18 +499,14 @@ def pool_acl_verification(self, current_user_acl, read, write, acl_file) # (3)Create a pool with acl - self.add_pool(create=False) - self.pool.scm_size.update(scm_size) - self.pool.acl_file.update(acl_file) - self.pool.dmg.exit_status_exception = True - self.pool.create() + self.pool = self.get_pool(connect=False, scm_size=scm_size, acl_file=acl_file) self.log.info(" (2)dmg= %s", self.pool.dmg) self.log.info(" (3)Create a pool with acl") # (4)Verify the pool create status self.log.info(" (4)dmg.run() result=\n%s", self.pool.dmg.result) if "ERR" in self.pool.dmg.result.stderr_text: - self.fail("##(4)Unable to parse pool uuid and svc.") + self.fail("##(4)Unexpected error from pool create.") # (5)Get the pool's acl list # dmg pool get-acl diff --git a/src/tests/ftest/util/test_utils_container.py b/src/tests/ftest/util/test_utils_container.py index 8a535e59b17..6152870ab8f 100644 --- a/src/tests/ftest/util/test_utils_container.py +++ b/src/tests/ftest/util/test_utils_container.py @@ -273,6 +273,7 @@ def __init__(self, pool, daos_command=None, label_generator=None): self.file_oclass = BasicParameter(None) self.chunk_size = BasicParameter(None) self.properties = BasicParameter(None) + self.acl_file = BasicParameter(None) self.daos_timeout = BasicParameter(None) self.label = BasicParameter(None, "TestContainer") self.label_generator = label_generator @@ -313,6 +314,18 @@ def identifier(self): return self.label.value return self.uuid + def no_exception(self): + """Temporarily disable raising exceptions for failed commands.""" + return self.daos.no_exception() + + def as_user(self, user): + """Temporarily run commands as a different user. + + Args: + user (str): the user to temporarily run as + """ + return self.daos.as_user(user) + def get_params(self, test): """Get values for all of the command params from the yaml file. @@ -337,12 +350,11 @@ def get_params(self, test): @fail_on(DaosApiError) @fail_on(CommandFailure) - def create(self, con_in=None, acl_file=None): + def create(self, con_in=None): """Create a container. Args: con_in (optional): to be defined. Defaults to None. - acl_file (str, optional): path of the ACL file. Defaults to None. Returns: dict: the daos json command output converted to a python dictionary @@ -397,7 +409,7 @@ def create(self, con_in=None, acl_file=None): "file_oclass": self.file_oclass.value, "chunk_size": self.chunk_size.value, "properties": self.properties.value, - "acl_file": acl_file, + "acl_file": self.acl_file.value, "label": self.label.value } @@ -905,23 +917,53 @@ def check(self, *args, **kwargs): return self.daos.container_check( pool=self.pool.identifier, cont=self.identifier, *args, **kwargs) - @fail_on(CommandFailure) - def set_prop(self, *args, **kwargs): - """Set container properties by calling daos container set-prop. + def delete_acl(self, *args, **kwargs): + """Set container properties by calling daos container delete-acl. Args: - args (tuple, optional): positional arguments to DaosCommand.container_set_prop - kwargs (dict, optional): named arguments to DaosCommand.container_set_prop + args (tuple, optional): positional arguments to DaosCommand.container_delete_acl + kwargs (dict, optional): named arguments to DaosCommand.container_delete_acl Returns: - str: JSON output of daos container set-prop. + CmdResult: Object that contains exit status, stdout, and other information. Raises: CommandFailure: Raised from the daos command call. """ - return self.daos.container_set_prop( - pool=self.pool.identifier, cont=self.identifier, *args, **kwargs) + return self.daos.container_delete_acl( + self.pool.identifier, self.identifier, *args, **kwargs) + + def get_acl(self, *args, **kwargs): + """Call daos container get-acl. + + Args: + args (tuple, optional): args to pass to container_get_acl + kwargs (dict, optional): keyword args to pass to container_get_acl + + Returns: + CmdResult: Object that contains exit status, stdout, and other information. + + Raises: + CommandFailure: Raised from the daos command call. + + """ + return self.daos.container_get_acl( + self.pool.identifier, self.identifier, *args, **kwargs) + + def get_attr(self, *args, **kwargs): + """Call daos container get-attr. + + Args: + args (tuple, optional): positional arguments to DaosCommand.container_get_attr + kwargs (dict, optional): named arguments to DaosCommand.container_get_attr + + Returns: + str: JSON output of daos container get-attr. + + """ + return self.daos.container_get_attr( + self.pool.identifier, self.identifier, *args, **kwargs) @fail_on(CommandFailure) def get_prop(self, *args, **kwargs): @@ -977,6 +1019,23 @@ def verify_prop(self, expected_props): return False return True + def list_attrs(self, *args, **kwargs): + """Get container properties by calling daos container list-attrs. + + Args: + args (tuple, optional): positional arguments to DaosCommand.container_list_attrs + kwargs (dict, optional): named arguments to DaosCommand.container_list_attrs + + Returns: + str: JSON output of daos container list-attrs + + Raises: + CommandFailure: Raised from the daos command call + + """ + return self.daos.container_list_attrs( + self.pool.identifier, self.identifier, *args, **kwargs) + @fail_on(CommandFailure) def list_snaps(self): """Get container properties by calling daos container list-snaps. @@ -990,6 +1049,23 @@ def list_snaps(self): """ return self.daos.container_list_snaps(pool=self.pool.identifier, cont=self.identifier) + def overwrite_acl(self, *args, **kwargs): + """Call daos container overwrite-acl. + + Args: + args (tuple, optional): args to pass to overwrite_acl + kwargs (dict, optional): keyword args to pass to overwrite_acl + + Returns: + CmdResult: Object that contains exit status, stdout, and other information. + + Raises: + CommandFailure: Raised from the daos command call. + + """ + return self.daos.container_overwrite_acl( + self.pool.identifier, self.identifier, *args, **kwargs) + @fail_on(CommandFailure) def query(self, *args, **kwargs): """Call daos container query. @@ -1008,34 +1084,68 @@ def query(self, *args, **kwargs): return self.daos.container_query( pool=self.pool.identifier, cont=self.identifier, *args, **kwargs) + def set_attr(self, *args, **kwargs): + """Call daos container set-attr. + + Args: + args (tuple, optional): positional arguments to DaosCommand.container_set_attr + kwargs (dict, optional): named arguments to DaosCommand.container_set_attr + + Returns: + CmdResult: Object that contains exit status, stdout, and other information. + + """ + return self.daos.container_set_attr( + pool=self.pool.identifier, cont=self.identifier, *args, **kwargs) + + def set_owner(self, *args, **kwargs): + """Set container properties by calling daos container set-owner. + + Args: + args (tuple, optional): positional arguments to DaosCommand.container_set_owner + kwargs (dict, optional): named arguments to DaosCommand.container_set_owner + + Returns: + CmdResult: Object that contains exit status, stdout, and other information. + + Raises: + CommandFailure: Raised from the daos command call. + + """ + return self.daos.container_set_owner( + self.pool.identifier, self.identifier, *args, **kwargs) + @fail_on(CommandFailure) - def update_acl(self, entry=None, acl_file=None): - """Update container acl by calling daos container update-acl. + def set_prop(self, *args, **kwargs): + """Set container properties by calling daos container set-prop. Args: - entry (bool, optional): Add or modify a single ACL entry - acl_file (str, optional): Input file containing ACL + args (tuple, optional): positional arguments to DaosCommand.container_set_prop + kwargs (dict, optional): named arguments to DaosCommand.container_set_prop Returns: - str: JSON output of daos container update-acl. + str: JSON output of daos container set-prop. Raises: CommandFailure: Raised from the daos command call. """ - return self.daos.container_update_acl( - pool=self.pool.identifier, cont=self.identifier, entry=entry, acl_file=acl_file) + return self.daos.container_set_prop( + pool=self.pool.identifier, cont=self.identifier, *args, **kwargs) - def set_attr(self, *args, **kwargs): - """Call daos container set-attr. + def update_acl(self, *args, **kwargs): + """Call daos container update-acl. Args: - args (tuple, optional): positional arguments to DaosCommand.container_set_attr - kwargs (dict, optional): named arguments to DaosCommand.container_set_attr + args (tuple, optional): args to pass to container_update_acl + kwargs (dict, optional): keyword args to pass to container_update_acl Returns: - CmdResult: Object that contains exit status, stdout, and other information. + str: JSON output of daos container update-acl. + + Raises: + CommandFailure: Raised from the daos command call. """ - return self.daos.container_set_attr( + return self.daos.container_update_acl( pool=self.pool.identifier, cont=self.identifier, *args, **kwargs) diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 627599bdec6..6f8b5e1d275 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -448,8 +448,8 @@ def connect(self, permission=2): if self.pool and not self.connected: kwargs = {"flags": permission} self.log.info( - "Connecting to pool %s with permission %s (flag: %s)", - self.uuid, permission, kwargs["flags"]) + "Connecting to %s with permission %s (flag: %s)", + str(self), permission, kwargs["flags"]) self._call_method(self.pool.connect, kwargs) self.connected = True return True @@ -465,7 +465,7 @@ def disconnect(self): """ if self.pool and self.connected: - self.log.info("Disconnecting from pool %s", self.uuid) + self.log.info("Disconnecting from %s", str(self)) self._call_method(self.pool.disconnect, {}) self.connected = False return True @@ -527,7 +527,7 @@ def delete_acl(self, principal): CmdResult: Object that contains exit status, stdout, and other information. """ - return self.dmg.pool_delete_acl(pool=self.identifier, principal=principal) + return self.dmg.pool_delete_acl(self.identifier, principal=principal) @fail_on(CommandFailure) def drain(self, rank, tgt_idx=None): @@ -1097,7 +1097,7 @@ def display_pool_daos_space(self, msg=None): for key in sorted(daos_space.keys()) for index, item in enumerate(daos_space[key])] self.log.info( - "Pool %s space%s:\n %s", self.uuid, + "%s space%s:\n %s", str(self), " " + msg if isinstance(msg, str) else "", "\n ".join(sizes)) def pool_percentage_used(self): From 443ff8557f10c7e1f0e79094384588720dba02a1 Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Fri, 6 Oct 2023 18:05:12 -0400 Subject: [PATCH 79/80] DAOS-14018 pool: dup operation detection metadata layout (#13078) This is the first of multiple patches that will implement a duplicate (retry) RPC/operation detection feature for pool/container service metadata RPCs. With this change, a new pool/container service root-level KVS (svc_ops) is added so that (eventually, after subsequent patches) a recent history of client opss handled can be kept. And so that duplicate ops can be detected and handled appropriately. A boolean (svc_ops_enabled) is also added to the pool rdb. For old layout pools, or those with the prior rdb default size 128 MiB, svc_ops_enabled will be false, since the amount of history to be kept in this design is too large for that rdb size. Related to this, the default value of DAOS_MD_CAP is increased to 1024 MiB so that new pools created will have sufficient rdb capacity. A debug log message is shown during step up to reveal (especially for pool start/create) if the feature is enabled. Because of the layout change, pool and container upgrade logic is also changed to enable migration from older layouts to this latest version (e.g., DAOS v2.4 pools at global version 2 -> 3). Also, some comments and minor logic changes in the container create/destroy handling execution flows are added, mostly as a TODO, and to show how duplicate ops scenarios can/will be handled. Finally, an independent enhancement to pool service distributed start is included in this patch. When pool service membership is changed to add new replicas, the existing rdb size is provided as the size argument, rather than rely on the DAOS_MD_CAP environment variable value that of course could have changed between server/engine starts with an administrator's edits of the daos_server.yml file. Signed-off-by: Kenneth Cain Co-authored-by: Li Wei --- src/container/srv_container.c | 121 +++++++++++++++++-------- src/container/srv_internal.h | 1 + src/container/srv_layout.c | 2 + src/container/srv_layout.h | 17 +++- src/include/daos/pool.h | 2 +- src/include/daos_srv/container.h | 4 +- src/include/daos_srv/control.h | 6 +- src/include/daos_srv/pool.h | 11 +++ src/include/daos_srv/rdb.h | 4 +- src/pool/srv_layout.c | 2 + src/pool/srv_layout.h | 16 +++- src/pool/srv_pool.c | 148 ++++++++++++++++++++++++++++--- src/rdb/rdb.c | 23 ++++- src/rsvc/srv.c | 2 +- 14 files changed, 294 insertions(+), 65 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 78e9f73ab9b..431fad52971 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -102,37 +102,47 @@ cont_svc_init(struct cont_svc *svc, const uuid_t pool_uuid, uint64_t id, /* cs_root */ rc = rdb_path_init(&svc->cs_root); if (rc != 0) - D_GOTO(err_lock, rc); + goto err_lock; rc = rdb_path_push(&svc->cs_root, &rdb_path_root_key); if (rc != 0) - D_GOTO(err_root, rc); + goto err_root; /* cs_uuids */ rc = rdb_path_clone(&svc->cs_root, &svc->cs_uuids); if (rc != 0) - D_GOTO(err_root, rc); + goto err_root; rc = rdb_path_push(&svc->cs_uuids, &ds_cont_prop_cuuids); if (rc != 0) - D_GOTO(err_uuids, rc); + goto err_uuids; /* cs_conts */ rc = rdb_path_clone(&svc->cs_root, &svc->cs_conts); if (rc != 0) - D_GOTO(err_uuids, rc); + goto err_uuids; rc = rdb_path_push(&svc->cs_conts, &ds_cont_prop_conts); if (rc != 0) - D_GOTO(err_conts, rc); + goto err_conts; /* cs_hdls */ rc = rdb_path_clone(&svc->cs_root, &svc->cs_hdls); if (rc != 0) - D_GOTO(err_conts, rc); + goto err_conts; rc = rdb_path_push(&svc->cs_hdls, &ds_cont_prop_cont_handles); if (rc != 0) - D_GOTO(err_hdls, rc); + goto err_hdls; + + /* cs_ops */ + rc = rdb_path_clone(&svc->cs_root, &svc->cs_ops); + if (rc != 0) + goto err_hdls; + rc = rdb_path_push(&svc->cs_ops, &ds_cont_prop_svc_ops); + if (rc != 0) + goto err_svcops; return 0; +err_svcops: + rdb_path_fini(&svc->cs_ops); err_hdls: rdb_path_fini(&svc->cs_hdls); err_conts: @@ -150,6 +160,7 @@ cont_svc_init(struct cont_svc *svc, const uuid_t pool_uuid, uint64_t id, static void cont_svc_fini(struct cont_svc *svc) { + rdb_path_fini(&svc->cs_ops); rdb_path_fini(&svc->cs_hdls); rdb_path_fini(&svc->cs_conts); rdb_path_fini(&svc->cs_uuids); @@ -544,8 +555,8 @@ get_nhandles(struct rdb_tx *tx, struct d_hash_table *nhc, struct cont *cont, enu /* check if container exists by UUID and (if applicable) non-default label */ static int -cont_existence_check(struct rdb_tx *tx, struct cont_svc *svc, - uuid_t puuid, uuid_t cuuid, char *clabel) +cont_create_existence_check(struct rdb_tx *tx, struct cont_svc *svc, uuid_t puuid, uuid_t cuuid, + char *clabel, bool dup_op) { d_iov_t key; d_iov_t val; @@ -568,6 +579,11 @@ cont_existence_check(struct rdb_tx *tx, struct cont_svc *svc, D_DEBUG(DB_MD, DF_CONT": no label, lookup by UUID "DF_UUIDF " "DF_RC"\n", DP_CONT(puuid, cuuid), DP_UUID(cuuid), DP_RC(rc)); + + /* UUID found is an "already exists" error if this is a new (not a retry) RPC */ + if (may_exist && !dup_op) + return -DER_EXIST; + return rc; } @@ -597,6 +613,11 @@ cont_existence_check(struct rdb_tx *tx, struct cont_svc *svc, DP_UUID(match_cuuid)); return -DER_INVAL; } + + /* UUID and label found and match. Error if this is a new (not a retry) RPC */ + if (!dup_op) + return -DER_EXIST; + return 0; } @@ -998,8 +1019,8 @@ cont_prop_write(struct rdb_tx *tx, const rdb_path_t *kvs, daos_prop_t *prop, } static int -cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, - struct cont_svc *svc, crt_rpc_t *rpc) +cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, bool dup_op, + crt_rpc_t *rpc) { struct cont_create_in *in = crt_req_get(rpc); daos_prop_t *prop_dup = NULL; @@ -1064,8 +1085,8 @@ cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, } /* Check if a container with this UUID and label already exists */ - rc = cont_existence_check(tx, svc, pool_hdl->sph_pool->sp_uuid, - in->cci_op.ci_uuid, lbl); + rc = cont_create_existence_check(tx, svc, pool_hdl->sph_pool->sp_uuid, in->cci_op.ci_uuid, + lbl, dup_op); if (rc != -DER_NONEXIST) { if (rc == 0) D_DEBUG(DB_MD, DF_CONT": container already exists\n", @@ -5136,20 +5157,23 @@ static int cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, crt_rpc_t *rpc, int cont_proto_ver) { - struct cont_op_in *in = crt_req_get(rpc); - struct cont_open_bylabel_in *olbl_in = NULL; - struct cont_open_bylabel_out *olbl_out = NULL; - struct cont_destroy_bylabel_in *dlbl_in = NULL; - struct rdb_tx tx; - crt_opcode_t opc = opc_get(rpc->cr_opc); - struct cont *cont = NULL; - struct cont_pool_metrics *metrics; - bool update_mtime = false; - int rc; + struct cont_op_in *in = crt_req_get(rpc); + struct cont_open_bylabel_in *olbl_in = NULL; + struct cont_open_bylabel_out *olbl_out = NULL; + struct cont_destroy_bylabel_in *dlbl_in = NULL; + struct rdb_tx tx; + crt_opcode_t opc = opc_get(rpc->cr_opc); + struct cont *cont = NULL; + struct cont_pool_metrics *metrics; + bool update_mtime = false; + bool dup_op = false; + const char *clbl = NULL; + char cuuid[37]; + int rc; rc = rdb_tx_begin(svc->cs_rsvc->s_db, svc->cs_rsvc->s_term, &tx); if (rc != 0) - D_GOTO(out, rc); + goto out; /* TODO: Implement per-container locking. */ if (opc == CONT_QUERY || opc == CONT_ATTR_GET || @@ -5158,18 +5182,24 @@ cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, else ABT_rwlock_wrlock(svc->cs_lock); + /* TODO: add client-provided metadata RPC key, lookup in cs_ops KVS, assign dup_op */ + switch (opc) { case CONT_CREATE: - rc = cont_create(&tx, pool_hdl, svc, rpc); + rc = cont_create(&tx, pool_hdl, svc, dup_op, rpc); if (likely(rc == 0)) { metrics = pool_hdl->sph_pool->sp_metrics[DAOS_CONT_MODULE]; d_tm_inc_counter(metrics->create_total, 1); } + if (dup_op) + goto out_lock; + break; case CONT_OPEN_BYLABEL: olbl_in = crt_req_get(rpc); olbl_out = crt_reply_get(rpc); rc = cont_lookup_bylabel(&tx, svc, olbl_in->coli_label, &cont); + /* TODO: idempotent rc=0 return for dup_op case. */ if (rc != 0) goto out_lock; /* NB: call common cont_op_with_cont() same as CONT_OPEN case */ @@ -5177,11 +5207,30 @@ cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, uuid_copy(olbl_out->colo_uuid, cont->c_uuid); break; case CONT_DESTROY_BYLABEL: - dlbl_in = crt_req_get(rpc); - rc = cont_lookup_bylabel(&tx, svc, dlbl_in->cdli_label, &cont); - if (rc != 0) + case CONT_DESTROY: + if (opc == CONT_DESTROY_BYLABEL) { + dlbl_in = crt_req_get(rpc); + clbl = dlbl_in->cdli_label; + rc = cont_lookup_bylabel(&tx, svc, dlbl_in->cdli_label, &cont); + } else { + uuid_unparse(in->ci_uuid, cuuid); + rc = cont_lookup(&tx, svc, in->ci_uuid, &cont); + } + if (rc == -DER_NONEXIST && dup_op) { + D_DEBUG(DB_MD, DF_UUID ":%s: do not destroy already-destroyed container\n", + DP_UUID(pool_hdl->sph_pool->sp_uuid), clbl ? clbl : cuuid); + rc = 0; + goto out_lock; + } else if (rc == 0 && dup_op) { + /* original rpc destroyed container. But another one was created! */ + D_DEBUG(DB_MD, + DF_UUID ":%s: do not destroy already-destroyed " + "(and since recreated!) container\n", + DP_UUID(pool_hdl->sph_pool->sp_uuid), clbl ? clbl : cuuid); + goto out_contref; + } else if (rc != 0) { goto out_lock; - /* NB: call common cont_op_with_cont() same as CONT_DESTROY */ + } rc = cont_op_with_cont(&tx, pool_hdl, cont, rpc, &update_mtime, cont_proto_ver); break; default: @@ -5192,6 +5241,7 @@ cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, } if (rc != 0) goto out_contref; + /* TODO: assign cs_ops value rc=0 */ /* Update container metadata modified times as applicable * NB: this is a NOOP if the pool has not been upgraded to the layout containing mdtimes. @@ -5200,12 +5250,13 @@ cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, if (rc != 0) goto out_contref; + /* TODO: insert client RPC key (UUID + timestamp) and value (rc) in cs_ops */ + rc = rdb_tx_commit(&tx); if (rc != 0) - D_ERROR(DF_CONT": rpc=%p opc=%u hdl="DF_UUID" rdb_tx_commit " - "failed: "DF_RC"\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, in->ci_uuid), - rpc, opc, DP_UUID(in->ci_hdl), DP_RC(rc)); + D_ERROR(DF_CONT ": rpc=%p opc=%u hdl=" DF_UUID " rdb_tx_commit failed: " DF_RC "\n", + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->ci_uuid), rpc, opc, + DP_UUID(in->ci_hdl), DP_RC(rc)); out_contref: if (cont) @@ -5215,7 +5266,7 @@ cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, rdb_tx_end(&tx); out: /* Propagate new snapshot list by IV */ - if (rc == 0) { + if (!dup_op && (rc == 0)) { if (opc == CONT_SNAP_CREATE || opc == CONT_SNAP_DESTROY) ds_cont_update_snap_iv(svc, in->ci_uuid); else if (opc == CONT_PROP_SET) diff --git a/src/container/srv_internal.h b/src/container/srv_internal.h index 4f34183521b..beb7a8a3395 100644 --- a/src/container/srv_internal.h +++ b/src/container/srv_internal.h @@ -86,6 +86,7 @@ struct cont_svc { rdb_path_t cs_uuids; /* container UUIDs KVS */ rdb_path_t cs_conts; /* container KVS */ rdb_path_t cs_hdls; /* container handle KVS */ + rdb_path_t cs_ops; /* metadata ops KVS */ struct ds_pool *cs_pool; /* Manage the EC aggregation epoch */ diff --git a/src/container/srv_layout.c b/src/container/srv_layout.c index 3846b52f65b..359e6574034 100644 --- a/src/container/srv_layout.c +++ b/src/container/srv_layout.c @@ -17,6 +17,8 @@ RDB_STRING_KEY(ds_cont_prop_, cuuids); RDB_STRING_KEY(ds_cont_prop_, conts); RDB_STRING_KEY(ds_cont_prop_, cont_handles); RDB_STRING_KEY(ds_cont_prop_, oit_oids); +RDB_STRING_KEY(ds_cont_prop_, svc_ops); +RDB_STRING_KEY(ds_cont_prop_, svc_ops_enabled); /* Container properties KVS */ RDB_STRING_KEY(ds_cont_prop_, ghce); diff --git a/src/container/srv_layout.h b/src/container/srv_layout.h index 0ccda8831f2..643e36bb70c 100644 --- a/src/container/srv_layout.h +++ b/src/container/srv_layout.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,6 +15,7 @@ * for ds_cont: * * Root KVS (GENERIC): + * Container UUIDs KVS (GENERIC): * Container KVS (GENERIC): * Container property KVS (GENERIC): * Snapshot KVS (INTEGER) @@ -22,6 +23,7 @@ * Handle index KVS (GENERIC) * ... (more container property KVSs) * Container handle KVS (GENERIC) + * Service ops KVS (GENERIC) - NB used by both pool and container modules * * The version of the whole layout is defined by ds_pool_prop_global_version. */ @@ -40,8 +42,8 @@ * * extern d_iov_t ds_cont_prop_new_key; comment_on_value_type * - * Note 1. The "new_key" name in ds_cont_prop_new_key must not appear in the - * root KVS in src/pool/srv_layout.h, that is, there must not be a + * Note 1. The "new_key" name in ds_cont_prop_new_key must not appear (with very few exceptions) + * in the root KVS in src/pool/srv_layout.h, that is, there must usually not be a * ds_pool_prop_new_key, because the two root KVSs are the same RDB KVS. * * Note 2. The comment_on_value_type shall focus on the value type only; @@ -51,6 +53,8 @@ extern d_iov_t ds_cont_prop_cuuids; /* container UUIDs KVS */ extern d_iov_t ds_cont_prop_conts; /* container KVS */ extern d_iov_t ds_cont_prop_cont_handles; /* container handle KVS */ +extern d_iov_t ds_cont_prop_svc_ops; /* service ops KVS - common to pool, container */ +extern d_iov_t ds_cont_prop_svc_ops_enabled; /* uint32_t - common to pool, container */ /* Please read the IMPORTANT notes above before adding new keys. */ /* @@ -156,6 +160,13 @@ struct container_hdl { uint64_t ch_sec_capas; }; +/* + * Service ops KVS (RDB_KVS_GENERIC) + * + * Each key is a client UUID and HLC timestamp, defined in struct svc_op_key. + * Each value represents the result of handling that RPC, defined in struct svc_op_val. + */ + extern daos_prop_t cont_prop_default; extern daos_prop_t cont_prop_default_v0; diff --git a/src/include/daos/pool.h b/src/include/daos/pool.h index 48d8fdb0209..370f626f1f0 100644 --- a/src/include/daos/pool.h +++ b/src/include/daos/pool.h @@ -68,7 +68,7 @@ /* * Version 1 corresponds to 2.2 (aggregation optimizations) * Version 2 corresponds to 2.4 (dynamic evtree, checksum scrubbing) - * Version 3 corresponds to 2.6 (root embedded values) + * Version 3 corresponds to 2.6 (root embedded values, pool service operations tracking KVS) */ #define DAOS_POOL_GLOBAL_VERSION 3 diff --git a/src/include/daos_srv/container.h b/src/include/daos_srv/container.h index 7f0a5d3390d..e267dd4aad3 100644 --- a/src/include/daos_srv/container.h +++ b/src/include/daos_srv/container.h @@ -25,8 +25,8 @@ void ds_cont_wrlock_metadata(struct cont_svc *svc); void ds_cont_rdlock_metadata(struct cont_svc *svc); void ds_cont_unlock_metadata(struct cont_svc *svc); -int ds_cont_init_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, - const uuid_t pool_uuid); +int + ds_cont_init_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, const uuid_t pool_uuid); int ds_cont_svc_init(struct cont_svc **svcp, const uuid_t pool_uuid, uint64_t id, struct ds_rsvc *rsvc); void ds_cont_svc_fini(struct cont_svc **svcp); diff --git a/src/include/daos_srv/control.h b/src/include/daos_srv/control.h index bd87c809e9c..5b39171767a 100644 --- a/src/include/daos_srv/control.h +++ b/src/include/daos_srv/control.h @@ -29,10 +29,10 @@ dpdk_cli_override_opts; #define NVME_DEV_FL_INUSE (1 << 1) /* Used by DAOS (present in SMD) */ #define NVME_DEV_FL_FAULTY (1 << 2) /* Faulty state has been assigned */ -/** Env defining the size of a metadata pmem pool/file in MiBs */ +/** Env defining the size of a metadata pmem pool/file allocated during pool create, in MiBs */ #define DAOS_MD_CAP_ENV "DAOS_MD_CAP" -/** Default size of a metadata pmem pool/file (128 MiB) */ -#define DEFAULT_DAOS_MD_CAP_SIZE (1ul << 27) +/** Default size of a metadata pmem pool/file (1024 MiB) */ +#define DEFAULT_DAOS_MD_CAP_SIZE (1ul << 30) /** Utility macros */ #define CHK_FLAG(x, m) ((x & m) == m) diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index 8b67f1ae83c..4d55e328cc7 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -171,6 +171,17 @@ struct ds_pool_child { void *spc_metrics[DAOS_NR_MODULE]; }; +struct svc_op_key { + uint64_t mdk_client_time; + uuid_t mdk_client_id; + /* TODO: add a (cart) opcode to the key? */ +}; + +struct svc_op_val { + int mdv_rc; + char mdv_resvd[62]; +}; + struct ds_pool_child *ds_pool_child_lookup(const uuid_t uuid); struct ds_pool_child *ds_pool_child_get(struct ds_pool_child *child); void ds_pool_child_put(struct ds_pool_child *child); diff --git a/src/include/daos_srv/rdb.h b/src/include/daos_srv/rdb.h index d6e2181b2d8..a32f4deec37 100644 --- a/src/include/daos_srv/rdb.h +++ b/src/include/daos_srv/rdb.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -164,6 +164,8 @@ int rdb_campaign(struct rdb *db); bool rdb_is_leader(struct rdb *db, uint64_t *term); int rdb_get_leader(struct rdb *db, uint64_t *term, d_rank_t *rank); int rdb_get_ranks(struct rdb *db, d_rank_list_t **ranksp); +int + rdb_get_size(struct rdb *db, size_t *sizep); int rdb_add_replicas(struct rdb *db, d_rank_list_t *replicas); int rdb_remove_replicas(struct rdb *db, d_rank_list_t *replicas); int rdb_ping(struct rdb *db, uint64_t caller_term); diff --git a/src/pool/srv_layout.c b/src/pool/srv_layout.c index 62c1145a474..98f434389c4 100644 --- a/src/pool/srv_layout.c +++ b/src/pool/srv_layout.c @@ -26,6 +26,8 @@ RDB_STRING_KEY(ds_pool_prop_, owner); RDB_STRING_KEY(ds_pool_prop_, owner_group); RDB_STRING_KEY(ds_pool_prop_, connectable); RDB_STRING_KEY(ds_pool_prop_, nhandles); +RDB_STRING_KEY(ds_pool_prop_, svc_ops); +RDB_STRING_KEY(ds_pool_prop_, svc_ops_enabled); /** pool handle KVS */ RDB_STRING_KEY(ds_pool_prop_, handles); diff --git a/src/pool/srv_layout.h b/src/pool/srv_layout.h index 7423ecf6fac..1043aeca5d3 100644 --- a/src/pool/srv_layout.h +++ b/src/pool/srv_layout.h @@ -16,6 +16,7 @@ * Root KVS (GENERIC): * Pool handle KVS (GENERIC) * Pool user attribute KVS (GENERIC) + * Service ops KVS (GENERIC) - NB used by both pool and container modules * * The version of the whole layout is stored in ds_pool_prop_global_version. */ @@ -39,9 +40,9 @@ * * extern d_iov_t ds_pool_prop_new_key; comment_on_value_type * - * Note 1. The "new_key" name in ds_pool_prop_new_key must not appear in the - * root KVS in src/container/srv_layout.h, that is, there must not be a - * ds_cont_prop_new_key, because the two root KVSs are the same RDB KVS. + * Note 1. The "new_key" name in ds_pool_prop_new_key must not appear (with very few exceptions) + * in the root KVS in src/container/srv_layout.h, that is, there must not usually be + * a ds_cont_prop_new_key, because the two root KVSs are the same RDB KVS. * * Note 2. The comment_on_value_type shall focus on the value type only; * usage shall be described above in this comment following existing @@ -78,6 +79,8 @@ extern d_iov_t ds_pool_prop_checkpoint_mode; /* uint32_t */ extern d_iov_t ds_pool_prop_checkpoint_freq; /* uint32_t */ extern d_iov_t ds_pool_prop_checkpoint_thresh; /* uint32_t */ extern d_iov_t ds_pool_prop_reint_mode; /* uint32_t */ +extern d_iov_t ds_pool_prop_svc_ops; /* service ops KVS - common to pool, container */ +extern d_iov_t ds_pool_prop_svc_ops_enabled; /* uint32_t - common to pool, container */ /* Please read the IMPORTANT notes above before adding new keys. */ /* @@ -107,6 +110,13 @@ struct pool_hdl_v0 { * array. Sizes of keys (or values) may vary. */ +/* + * Service ops KVS (RDB_KVS_GENERIC) + * + * Each key is a client UUID and HLC timestamp, defined in struct svc_op_key. + * Each value represents the result of handling that RPC, defined in struct svc_op_val. + */ + extern daos_prop_t pool_prop_default; /** diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 0fc06e0d739..a0ebdd3cde4 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -38,7 +38,10 @@ #include "srv_layout.h" #include "srv_pool_map.h" -#define DAOS_POOL_GLOBAL_VERSION_WITH_HDL_CRED 1 +#define DAOS_POOL_GLOBAL_VERSION_WITH_HDL_CRED 1 +#define DAOS_POOL_GLOBAL_VERSION_WITH_SVC_OPS_KVS 3 + +#define DUP_OP_MIN_RDB_SIZE (1 << 30) /* Pool service crt event */ struct pool_svc_event { @@ -676,6 +679,8 @@ init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, co struct rdb_kvs_attr attr; int ntargets = nnodes * dss_tgt_nr; uint32_t upgrade_global_version = DAOS_POOL_GLOBAL_VERSION; + uint32_t svc_ops_enabled = 0; + uint64_t rdb_size; int rc; struct daos_prop_entry *entry; @@ -749,8 +754,28 @@ init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, co /* Create pool user attributes KVS */ rc = rdb_tx_create_kvs(tx, kvs, &ds_pool_attr_user, &attr); - if (rc != 0) + if (rc != 0) { D_ERROR("failed to create user attr KVS, "DF_RC"\n", DP_RC(rc)); + goto out_map_buf; + } + + /* Create pool service operations KVS */ + rc = rdb_tx_create_kvs(tx, kvs, &ds_pool_prop_svc_ops, &attr); + if (rc != 0) { + D_ERROR("failed to create service ops KVS, " DF_RC "\n", DP_RC(rc)); + goto out_map_buf; + } + + /* Determine if duplicate service operations detection will be enabled */ + rc = rdb_get_size(tx->dt_db, &rdb_size); + if (rc != 0) + goto out_map_buf; + if (rdb_size >= DUP_OP_MIN_RDB_SIZE) + svc_ops_enabled = 1; + d_iov_set(&value, &svc_ops_enabled, sizeof(svc_ops_enabled)); + rc = rdb_tx_update(tx, kvs, &ds_pool_prop_svc_ops_enabled, &value); + if (rc != 0) + D_ERROR("failed to set svc_ops_enabled, " DF_RC "\n", DP_RC(rc)); out_map_buf: pool_buf_free(map_buf); @@ -1473,7 +1498,10 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf, { struct rdb_tx tx; d_iov_t value; - bool version_exists = false; + bool version_exists = false; + bool rdb_size_ok = false; + uint32_t svc_ops_enabled = 0; + uint64_t rdb_size; struct daos_prop_entry *svc_rf_entry; int rc; @@ -1557,6 +1585,28 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf, else svc->ps_svc_rf = -1; + /* Check if duplicate operations detection is enabled, for informative debug log */ + rc = rdb_get_size(svc->ps_rsvc.s_db, &rdb_size); + if (rc != 0) + goto out_lock; + rdb_size_ok = (rdb_size >= DUP_OP_MIN_RDB_SIZE); + + d_iov_set(&value, &svc_ops_enabled, sizeof(svc_ops_enabled)); + rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_svc_ops_enabled, &value); + if (rc == -DER_NONEXIST) { + D_DEBUG(DB_MD, DF_UUID ": duplicate ops detection is disabled due to old layout\n", + DP_UUID(svc->ps_uuid)); + rc = 0; + } else if (rc != 0) { + D_ERROR(DF_UUID ": failed to lookup svc_ops_enabled: " DF_RC "\n", + DP_UUID(svc->ps_uuid), DP_RC(rc)); + goto out_lock; + } + + D_DEBUG(DB_MD, DF_UUID ": duplicate ops detection %s (rdb size: " DF_U64 " %s %u)\n", + DP_UUID(svc->ps_uuid), svc_ops_enabled ? "enabled" : "disabled", rdb_size, + rdb_size_ok ? ">=" : "<", DUP_OP_MIN_RDB_SIZE); + out_lock: ABT_rwlock_unlock(svc->ps_lock); rdb_tx_end(&tx); @@ -2568,11 +2618,14 @@ pool_prop_read(struct rdb_tx *tx, const struct pool_svc *svc, uint64_t bits, if (bits & DAOS_PO_QUERY_PROP_REINT_MODE) { d_iov_set(&value, &val32, sizeof(val32)); rc = rdb_tx_lookup(tx, &svc->ps_root, &ds_pool_prop_reint_mode, &value); - if (rc == -DER_NONEXIST && global_ver < 2) { /* needs to be upgraded */ + /* NB: would test global_ver < 2, but on master branch, code added after v3 bump. */ + if (rc == -DER_NONEXIST && global_ver < 3) { /* needs to be upgraded */ rc = 0; val32 = DAOS_PROP_PO_REINT_MODE_DEFAULT; prop->dpp_entries[idx].dpe_flags |= DAOS_PROP_ENTRY_NOT_SET; } else if (rc != 0) { + D_ERROR(DF_UUID ": DAOS_PROP_PO_REINT_MODE missing from the pool\n", + DP_UUID(svc->ps_uuid)); D_GOTO(out_prop, rc); } D_ASSERT(idx < nr); @@ -4593,6 +4646,7 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc, size_t hdl_uuids_size; int n_hdl_uuids = 0; uint32_t connectable; + uint32_t svc_ops_enabled = 0; if (rpc) { rc = find_hdls_to_evict(tx, svc, &hdl_uuids, &hdl_uuids_size, @@ -4818,6 +4872,63 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc, need_commit = true; } + /* Upgrade for the pool/container service operations KVS */ + D_DEBUG(DB_MD, DF_UUID ": check ds_pool_prop_svc_ops\n", DP_UUID(pool_uuid)); + + d_iov_set(&value, NULL, 0); + rc = rdb_tx_lookup(tx, &svc->ps_root, &ds_pool_prop_svc_ops, &value); + if (rc && rc != -DER_NONEXIST) { + D_ERROR(DF_UUID ": failed to lookup service ops KVS: %d\n", DP_UUID(pool_uuid), rc); + D_GOTO(out_free, rc); + } else if (rc == -DER_NONEXIST) { + struct rdb_kvs_attr attr; + + D_DEBUG(DB_MD, DF_UUID ": creating service ops KVS\n", DP_UUID(pool_uuid)); + attr.dsa_class = RDB_KVS_GENERIC; + attr.dsa_order = 16; + rc = rdb_tx_create_kvs(tx, &svc->ps_root, &ds_pool_prop_svc_ops, &attr); + if (rc != 0) { + D_ERROR(DF_UUID ": failed to create service ops KVS: %d\n", + DP_UUID(pool_uuid), rc); + D_GOTO(out_free, rc); + } + need_commit = true; + } + + /* And enable the new service operations KVS only if rdb is large enough */ + D_DEBUG(DB_MD, DF_UUID ": check ds_pool_prop_svc_ops_enabled\n", DP_UUID(pool_uuid)); + d_iov_set(&value, &svc_ops_enabled, sizeof(svc_ops_enabled)); + rc = rdb_tx_lookup(tx, &svc->ps_root, &ds_pool_prop_svc_ops_enabled, &value); + if (rc && rc != -DER_NONEXIST) { + D_ERROR(DF_UUID ": failed to lookup service ops enabled boolean: %d\n", + DP_UUID(pool_uuid), rc); + D_GOTO(out_free, rc); + } else if (rc == -DER_NONEXIST) { + uint64_t rdb_nbytes; + + D_DEBUG(DB_MD, DF_UUID ": creating service ops enabled boolean\n", + DP_UUID(pool_uuid)); + + rc = rdb_get_size(tx->dt_db, &rdb_nbytes); + if (rc != 0) + D_GOTO(out_free, rc); + if (rdb_nbytes >= DUP_OP_MIN_RDB_SIZE) + svc_ops_enabled = 1; + rc = rdb_tx_update(tx, &svc->ps_root, &ds_pool_prop_svc_ops_enabled, &value); + if (rc != 0) { + D_ERROR(DF_UUID ": set svc_ops_enabled=%d failed, " DF_RC "\n", + DP_UUID(pool_uuid), svc_ops_enabled, DP_RC(rc)); + D_GOTO(out_free, rc); + } + D_DEBUG(DB_MD, + DF_UUID ": duplicate RPC detection %s (rdb size: " DF_U64 " %s %u)\n", + DP_UUID(pool_uuid), svc_ops_enabled ? "enabled" : "disabled", rdb_nbytes, + svc_ops_enabled ? ">=" : "<", DUP_OP_MIN_RDB_SIZE); + need_commit = true; + } + + D_DEBUG(DB_MD, DF_UUID ": need_commit=%s\n", DP_UUID(pool_uuid), + need_commit ? "true" : "false"); if (need_commit) { daos_prop_t *prop = NULL; @@ -5617,15 +5728,16 @@ struct pool_svc_reconf_arg { static void pool_svc_reconf_ult(void *varg) { - struct pool_svc_sched *reconf = varg; - struct pool_svc_reconf_arg *arg = reconf->psc_arg; - struct pool_svc *svc; - struct pool_map *map; - d_rank_list_t *current; - d_rank_list_t *to_add; - d_rank_list_t *to_remove; - d_rank_list_t *new; - int rc; + struct pool_svc_sched *reconf = varg; + struct pool_svc_reconf_arg *arg = reconf->psc_arg; + struct pool_svc *svc; + struct pool_map *map; + d_rank_list_t *current; + d_rank_list_t *to_add; + d_rank_list_t *to_remove; + d_rank_list_t *new; + uint64_t rdb_nbytes = 0; + int rc; svc = container_of(reconf, struct pool_svc, ps_reconf_sched); @@ -5657,6 +5769,14 @@ pool_svc_reconf_ult(void *varg) goto out; } + /* If adding replicas, get the correct rdb size (do not trust DAOS_MD_CAP). */ + rc = rdb_get_size(svc->ps_rsvc.s_db, &rdb_nbytes); + if (rc != 0) { + D_ERROR(DF_UUID ": failed to get rdb size: " DF_RC "\n", DP_UUID(svc->ps_uuid), + DP_RC(rc)); + goto out_cur; + } + if (arg->sca_map == NULL) ABT_rwlock_rdlock(svc->ps_pool->sp_lock); rc = ds_pool_plan_svc_reconfs(svc->ps_svc_rf, map, current, dss_self_rank(), &to_add, @@ -5682,7 +5802,7 @@ pool_svc_reconf_ult(void *varg) * membership changes to the MS. */ if (!arg->sca_sync_remove && to_add->rl_nr > 0) { - ds_rsvc_add_replicas_s(&svc->ps_rsvc, to_add, ds_rsvc_get_md_cap()); + ds_rsvc_add_replicas_s(&svc->ps_rsvc, to_add, rdb_nbytes); if (reconf->psc_canceled) { rc = -DER_OP_CANCELED; goto out_to_add_remove; diff --git a/src/rdb/rdb.c b/src/rdb/rdb.c index cdfed7dd618..f8a543ab8cf 100644 --- a/src/rdb/rdb.c +++ b/src/rdb/rdb.c @@ -50,8 +50,9 @@ rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, size_t siz struct rdb *db; int rc; - D_DEBUG(DB_MD, DF_UUID": creating db %s with %u replicas: caller_term="DF_X64"\n", - DP_UUID(uuid), path, replicas == NULL ? 0 : replicas->rl_nr, caller_term); + D_DEBUG(DB_MD, + DF_UUID ": creating db %s with %u replicas: caller_term=" DF_X64 " size=" DF_U64, + DP_UUID(uuid), path, replicas == NULL ? 0 : replicas->rl_nr, caller_term, size); /* * Create and open a VOS pool. RDB pools specify VOS_POF_SMALL for @@ -770,6 +771,24 @@ rdb_get_ranks(struct rdb *db, d_rank_list_t **ranksp) return rdb_raft_get_ranks(db, ranksp); } +int +rdb_get_size(struct rdb *db, uint64_t *sizep) +{ + int rc; + struct vos_pool_space vps; + + rc = vos_pool_query_space(db->d_uuid, &vps); + if (rc != 0) { + D_ERROR(DF_DB ": failed to query vos pool space: " DF_RC "\n", DP_DB(db), + DP_RC(rc)); + return rc; + } + + *sizep = SCM_TOTAL(&vps); + + return rc; +} + /** Implementation of the RDB pool checkpoint ULT. The ULT * is only active if DAOS is using MD on SSD. */ diff --git a/src/rsvc/srv.c b/src/rsvc/srv.c index 348ba7d0858..0182f37fc4c 100644 --- a/src/rsvc/srv.c +++ b/src/rsvc/srv.c @@ -1428,7 +1428,7 @@ ds_rsvc_get_md_cap(void) v = getenv(DAOS_MD_CAP_ENV); /* in MB */ if (v == NULL) return size_default; - n = atoi(v); /* FIXME DAOS-9846 */ + n = atoi(v); if (n < size_default >> 20) { D_ERROR("metadata capacity too low; using %zu MB\n", size_default >> 20); From 875e23304ecb84241bfae23e7c5a5e74e9ddffeb Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Sat, 7 Oct 2023 08:34:27 +0100 Subject: [PATCH 80/80] DAOS-13216 dfuse: Add a pre-read feature for non-cached files. (#12015) When the kernel cache is in use but a file is not cached then pre-read the file on open. This works for files up to the read buffer size (1Mb) and is enabled based on the I/O pattern of the last file closed in the same directory. Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 70 ++++++++++++++-- src/client/dfuse/dfuse_core.c | 4 +- src/client/dfuse/ops/create.c | 2 + src/client/dfuse/ops/open.c | 62 +++++++++++++- src/client/dfuse/ops/read.c | 153 +++++++++++++++++++++++++++++++++- utils/node_local_test.py | 64 ++++++++++++++ 6 files changed, 341 insertions(+), 14 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index 4c4d88bbbdb..369ab21fa09 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -89,15 +89,59 @@ dfuse_launch_fuse(struct dfuse_info *dfuse_info, struct fuse_args *args); struct dfuse_inode_entry; +/* Preread. + * + * DFuse can start a pre-read of the file on open, then when reads do occur they can happen directly + * from the buffer. For 'linear reads' of a file this means that the read can be triggered sooner + * and performed as one dfs request. Make use of the pre-read code to only use this for trivial + * reads, if a file is not read linearly or it's written to then back off to the regular behavior, + * which will likely use the kernel cache. + * + * Pre-read is enabled when: + * Caching is enabled + * The file is not cached + * The file is small enough to fit in one buffer (1mb) + * The previous file from the same directory was read linearly. + * Similar to the READDIR_PLUS_AUTO logic this feature is enabled bassed on the I/O pattern of the + * most recent access to the parent directory, general I/O workloads or interception library use are + * unlikely to trigger this code however something that is reading the entire contents of a + * directory tree should. + * + * This works by creating a new descriptor which is pointed to by the open handle, on open dfuse + * decides if it will use pre-read and if so allocate a new descriptor, add it to the open handle + * and then once it's replied to the open immediately issue a read. The new descriptor includes a + * lock which is locked by open before it replies to the kernel request and unlocked by the dfs read + * callback. Read requests then take the lock to ensure the dfs read is complete and reply directly + * with the data in the buffer. + * + * This works up to the buffer size, the pre-read tries to read the expected file size is smaller + * then dfuse will detect this and back off to regular read, however it will not detect if the file + * has grown in size. + * + * A dfuse_event is hung off this new descriptor and these come from the same pool as regular reads, + * this buffer is kept as long as it's needed but released as soon as possible, either on error or + * when EOF is returned to the kernel. If it's still present on release then it's freed then. + */ +struct dfuse_read_ahead { + pthread_mutex_t dra_lock; + struct dfuse_event *dra_ev; + int dra_rc; +}; + /** what is returned as the handle for fuse fuse_file_info on create/open/opendir */ struct dfuse_obj_hdl { /** pointer to dfs_t */ dfs_t *doh_dfs; /** the DFS object handle. Not created for directories. */ dfs_obj_t *doh_obj; + + struct dfuse_read_ahead *doh_readahead; + /** the inode entry for the file */ struct dfuse_inode_entry *doh_ie; + struct dfuse_inode_entry *doh_parent_dir; + /** readdir handle. */ struct dfuse_readdir_hdl *doh_rd; @@ -335,9 +379,11 @@ struct dfuse_event { struct dfuse_inode_entry *de_ie; }; off_t de_req_position; /**< The file position requested by fuse */ - size_t de_req_len; + union { + size_t de_req_len; + size_t de_readahead_len; + }; void (*de_complete_cb)(struct dfuse_event *ev); - struct stat de_attr; }; @@ -680,6 +726,8 @@ struct fuse_lowlevel_ops dfuse_ops; #define DFUSE_REPLY_ENTRY(inode, req, entry) \ do { \ int __rc; \ + DFUSE_TRA_DEBUG(inode, "Returning entry inode %#lx mode %#o size %#zx", \ + (entry).attr.st_ino, (entry).attr.st_mode, (entry).attr.st_size); \ if ((entry).attr_timeout > 0) { \ (inode)->ie_stat = (entry).attr; \ dfuse_mcache_set_time(inode); \ @@ -793,6 +841,13 @@ struct dfuse_inode_entry { /** File has been unlinked from daos */ bool ie_unlinked; + + /** Last file closed in this directory was read linearly. Directories only. + * + * Set on close() of a file in the directory to the value of linear_read from the fh. + * Checked on open of a file to determine if pre-caching is used. + */ + ATOMIC bool ie_linear_read; }; static inline struct dfuse_inode_entry * @@ -847,21 +902,21 @@ dfuse_cache_evict_dir(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *i void dfuse_mcache_set_time(struct dfuse_inode_entry *ie); -/* Set the cache as invalid */ +/* Set the metadata cache as invalid */ void dfuse_mcache_evict(struct dfuse_inode_entry *ie); -/* Check the cache setting against a given timeout, and return time left */ +/* Check the metadata cache setting against a given timeout, and return time left */ bool dfuse_mcache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout); /* Data caching functions */ -/* Mark the cache as up-to-date from now */ +/* Mark the data cache as up-to-date from now */ void dfuse_dcache_set_time(struct dfuse_inode_entry *ie); -/* Set the cache as invalid */ +/* Set the data cache as invalid */ void dfuse_dcache_evict(struct dfuse_inode_entry *ie); @@ -873,6 +928,9 @@ dfuse_cache_evict(struct dfuse_inode_entry *ie); bool dfuse_dcache_get_valid(struct dfuse_inode_entry *ie, double max_age); +void +dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh); + int check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, char *attr, daos_size_t len); diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index c3ebda252e9..cc346df619d 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -1171,7 +1171,7 @@ dfuse_read_event_reset(void *arg) int rc; if (ev->de_iov.iov_buf == NULL) { - D_ALLOC(ev->de_iov.iov_buf, DFUSE_MAX_READ); + D_ALLOC_NZ(ev->de_iov.iov_buf, DFUSE_MAX_READ); if (ev->de_iov.iov_buf == NULL) return false; @@ -1195,7 +1195,7 @@ dfuse_write_event_reset(void *arg) int rc; if (ev->de_iov.iov_buf == NULL) { - D_ALLOC(ev->de_iov.iov_buf, DFUSE_MAX_READ); + D_ALLOC_NZ(ev->de_iov.iov_buf, DFUSE_MAX_READ); if (ev->de_iov.iov_buf == NULL) return false; diff --git a/src/client/dfuse/ops/create.c b/src/client/dfuse/ops/create.c index 8aa4222053c..5a544279920 100644 --- a/src/client/dfuse/ops/create.c +++ b/src/client/dfuse/ops/create.c @@ -111,6 +111,8 @@ dfuse_cb_create(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na DFUSE_TRA_DEBUG(parent, "Parent:%#lx " DF_DE, parent->ie_stat.st_ino, DP_DE(name)); + atomic_store_relaxed(&parent->ie_linear_read, false); + /* O_LARGEFILE should always be set on 64 bit systems, and in fact is * defined to 0 so IOF defines LARGEFILE to the value that O_LARGEFILE * would otherwise be using and check that is set. diff --git a/src/client/dfuse/ops/open.c b/src/client/dfuse/ops/open.c index 1928b2b41af..2ac6a9e247c 100644 --- a/src/client/dfuse/ops/open.c +++ b/src/client/dfuse/ops/open.c @@ -15,6 +15,7 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) struct dfuse_obj_hdl *oh; struct fuse_file_info fi_out = {0}; int rc; + bool prefetch = false; ie = dfuse_inode_lookup(dfuse_info, ino); if (!ie) { @@ -29,6 +30,7 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) DFUSE_TRA_UP(oh, ie, "open handle"); dfuse_open_handle_init(dfuse_info, oh, ie); + oh->doh_parent_dir = dfuse_inode_lookup(dfuse_info, ie->ie_parent); /* Upgrade fd permissions from O_WRONLY to O_RDWR if wb caching is * enabled so the kernel can do read-modify-write @@ -52,10 +54,16 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) if (fi->flags & O_DIRECT) fi_out.direct_io = 1; - if (atomic_load_relaxed(&ie->ie_open_count) > 0) { - fi_out.keep_cache = 1; - } else if (dfuse_dcache_get_valid(ie, ie->ie_dfs->dfc_data_timeout)) { + /* If the file is already open or (potentially) in cache then allow any existing + * kernel cache to be used. If not then use pre-read. + * This should mean that pre-read is only used on the first read, and on files + * which pre-existed in the container. + */ + if (atomic_load_relaxed(&ie->ie_open_count) > 0 || + dfuse_dcache_get_valid(ie, ie->ie_dfs->dfc_data_timeout)) { fi_out.keep_cache = 1; + } else { + prefetch = true; } } else { fi_out.direct_io = 1; @@ -84,9 +92,23 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) atomic_fetch_add_relaxed(&ie->ie_open_count, 1); + /* Enable this for files up to the max read size. */ + if (prefetch && oh->doh_parent_dir && + atomic_load_relaxed(&oh->doh_parent_dir->ie_linear_read) && ie->ie_stat.st_size > 0 && + ie->ie_stat.st_size <= DFUSE_MAX_READ) { + D_ALLOC_PTR(oh->doh_readahead); + if (oh->doh_readahead) { + D_MUTEX_INIT(&oh->doh_readahead->dra_lock, 0); + D_MUTEX_LOCK(&oh->doh_readahead->dra_lock); + } + } + dfuse_inode_decref(dfuse_info, ie); DFUSE_REPLY_OPEN(oh, req, &fi_out); + if (oh->doh_readahead) + dfuse_pre_read(dfuse_info, oh); + return; err: dfuse_inode_decref(dfuse_info, ie); @@ -108,6 +130,26 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) DFUSE_TRA_DEBUG(oh, "Closing %d", oh->doh_caching); + if (oh->doh_readahead) { + struct dfuse_event *ev; + + /* Grab this lock first to ensure that the read cb has been completed. The + * callback might register an error and release ev so do not read it's value + * until after this has completed. + */ + D_MUTEX_LOCK(&oh->doh_readahead->dra_lock); + D_MUTEX_UNLOCK(&oh->doh_readahead->dra_lock); + + ev = oh->doh_readahead->dra_ev; + + D_MUTEX_DESTROY(&oh->doh_readahead->dra_lock); + if (ev) { + daos_event_fini(&ev->de_ev); + d_slab_release(ev->de_eqt->de_read_slab, ev); + } + D_FREE(oh->doh_readahead); + } + /* If the file was read from then set the data cache time for future use, however if the * file was written to then evict the metadata cache. * The problem here is that if the file was written to then the contents will be in the @@ -121,8 +163,9 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) if (atomic_load_relaxed(&oh->doh_write_count) != 0) { if (oh->doh_caching) { if (il_calls == 0) { - DFUSE_TRA_DEBUG(oh, "Evicting metadata cache"); + DFUSE_TRA_DEBUG(oh, "Evicting metadata cache, setting data cache"); dfuse_mcache_evict(oh->doh_ie); + dfuse_dcache_set_time(oh->doh_ie); } else { DFUSE_TRA_DEBUG(oh, "Evicting cache"); dfuse_cache_evict(oh->doh_ie); @@ -151,7 +194,18 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) DFUSE_REPLY_ZERO(oh, req); else DFUSE_REPLY_ERR_RAW(oh, req, rc); + if (oh->doh_parent_dir) { + bool use_linear_read = false; + if (oh->doh_linear_read && oh->doh_linear_read_eof) + use_linear_read = true; + + DFUSE_TRA_DEBUG(oh->doh_parent_dir, "Setting linear_read to %d", use_linear_read); + + atomic_store_relaxed(&oh->doh_parent_dir->ie_linear_read, use_linear_read); + + dfuse_inode_decref(dfuse_info, oh->doh_parent_dir); + } if (oh->doh_evict_on_close) { rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, oh->doh_ie->ie_parent, oh->doh_ie->ie_name, diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 2e5ba497bb5..7a5bbedc0d2 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -30,7 +30,7 @@ dfuse_cb_read_complete(struct dfuse_event *ev) if (ev->de_len == 0) { DFUSE_TRA_DEBUG(oh, "%#zx-%#zx requested (EOF)", ev->de_req_position, - ev->de_req_position + ev->de_iov.iov_buf_len - 1); + ev->de_req_position + ev->de_req_len - 1); DFUSE_REPLY_BUFQ(oh, ev->de_req, ev->de_iov.iov_buf, ev->de_len); D_GOTO(release, 0); @@ -51,6 +51,55 @@ dfuse_cb_read_complete(struct dfuse_event *ev) d_slab_release(ev->de_eqt->de_read_slab, ev); } +static bool +dfuse_readahead_reply(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) +{ + size_t reply_len; + + if (oh->doh_readahead->dra_rc) { + DFUSE_REPLY_ERR_RAW(oh, req, oh->doh_readahead->dra_rc); + return true; + } + + if (!oh->doh_linear_read || oh->doh_readahead->dra_ev == NULL) { + DFUSE_TRA_DEBUG(oh, "Readahead disabled"); + return false; + } + + if (oh->doh_linear_read_pos != position) { + DFUSE_TRA_DEBUG(oh, "disabling readahead"); + daos_event_fini(&oh->doh_readahead->dra_ev->de_ev); + d_slab_release(oh->doh_readahead->dra_ev->de_eqt->de_read_slab, + oh->doh_readahead->dra_ev); + oh->doh_readahead->dra_ev = NULL; + return false; + } + + oh->doh_linear_read_pos = position + len; + if (position + len >= oh->doh_readahead->dra_ev->de_readahead_len) { + oh->doh_linear_read_eof = true; + } + + /* At this point there is a buffer of known length that contains the data, and a read + * request. + * If the attempted read is bigger than the data then it will be truncated. + * It the atttempted read is smaller than the buffer it will be met in full. + */ + + if (position + len < oh->doh_readahead->dra_ev->de_readahead_len) { + reply_len = len; + DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", position, position + reply_len - 1); + } else { + /* The read will be truncated */ + reply_len = oh->doh_readahead->dra_ev->de_readahead_len - position; + DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read %#zx-%#zx not read (truncated)", position, + position + reply_len - 1, position + reply_len, position + len - 1); + } + + DFUSE_REPLY_BUFQ(oh, req, oh->doh_readahead->dra_ev->de_iov.iov_buf + position, reply_len); + return true; +} + void dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct fuse_file_info *fi) { @@ -66,12 +115,36 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct DFUSE_TRA_DEBUG(oh, "Returning EOF early without round trip %#zx", position); oh->doh_linear_read_eof = false; oh->doh_linear_read = false; + + if (oh->doh_readahead) { + D_MUTEX_LOCK(&oh->doh_readahead->dra_lock); + ev = oh->doh_readahead->dra_ev; + + oh->doh_readahead->dra_ev = NULL; + D_MUTEX_UNLOCK(&oh->doh_readahead->dra_lock); + + if (ev) { + daos_event_fini(&ev->de_ev); + d_slab_release(ev->de_eqt->de_read_slab, ev); + } + } DFUSE_REPLY_BUFQ(oh, req, NULL, 0); return; } - eqt_idx = atomic_fetch_add_relaxed(&dfuse_info->di_eqt_idx, 1); + if (oh->doh_readahead) { + bool replied; + + D_MUTEX_LOCK(&oh->doh_readahead->dra_lock); + replied = dfuse_readahead_reply(req, len, position, oh); + D_MUTEX_UNLOCK(&oh->doh_readahead->dra_lock); + if (replied) { + return; + } + } + + eqt_idx = atomic_fetch_add_relaxed(&dfuse_info->di_eqt_idx, 1); eqt = &dfuse_info->di_eqt[eqt_idx % dfuse_info->di_eq_count]; ev = d_slab_acquire(eqt->de_read_slab); @@ -131,3 +204,79 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct d_slab_release(eqt->de_read_slab, ev); } } + +static void +dfuse_cb_pre_read_complete(struct dfuse_event *ev) +{ + struct dfuse_obj_hdl *oh = ev->de_oh; + + oh->doh_readahead->dra_rc = ev->de_ev.ev_error; + + if (ev->de_ev.ev_error != 0) { + oh->doh_readahead->dra_rc = ev->de_ev.ev_error; + daos_event_fini(&ev->de_ev); + d_slab_release(ev->de_eqt->de_read_slab, ev); + oh->doh_readahead->dra_ev = NULL; + } + + /* If the length is not as expected then the file has been modified since the last stat so + * discard this cache and use regular reads. Note that this will only detect files which + * have shrunk in size, not grown. + */ + if (ev->de_len != ev->de_readahead_len) { + daos_event_fini(&ev->de_ev); + d_slab_release(ev->de_eqt->de_read_slab, ev); + oh->doh_readahead->dra_ev = NULL; + } + + D_MUTEX_UNLOCK(&oh->doh_readahead->dra_lock); +} + +void +dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh) +{ + struct dfuse_eq *eqt; + int rc; + struct dfuse_event *ev; + uint64_t eqt_idx; + size_t len = oh->doh_ie->ie_stat.st_size; + + eqt_idx = atomic_fetch_add_relaxed(&dfuse_info->di_eqt_idx, 1); + eqt = &dfuse_info->di_eqt[eqt_idx % dfuse_info->di_eq_count]; + + ev = d_slab_acquire(eqt->de_read_slab); + if (ev == NULL) + D_GOTO(err, rc = ENOMEM); + + ev->de_iov.iov_len = len; + ev->de_req = 0; + ev->de_sgl.sg_nr = 1; + ev->de_oh = oh; + ev->de_readahead_len = len; + ev->de_req_position = 0; + + ev->de_complete_cb = dfuse_cb_pre_read_complete; + oh->doh_readahead->dra_ev = ev; + + rc = dfs_read(oh->doh_dfs, oh->doh_obj, &ev->de_sgl, 0, &ev->de_len, &ev->de_ev); + if (rc != 0) { + D_GOTO(err, rc); + return; + } + + /* Send a message to the async thread to wake it up and poll for events */ + sem_post(&eqt->de_sem); + + /* Now ensure there are more descriptors for the next request */ + d_slab_restock(eqt->de_read_slab); + + return; +err: + oh->doh_readahead->dra_rc = rc; + if (ev) { + daos_event_fini(&ev->de_ev); + d_slab_release(eqt->de_read_slab, ev); + oh->doh_readahead->dra_ev = NULL; + } + D_MUTEX_UNLOCK(&oh->doh_readahead->dra_lock); +} diff --git a/utils/node_local_test.py b/utils/node_local_test.py index ec7c5ce3cf0..51872dca987 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -22,6 +22,7 @@ import stat import errno import argparse +import random import threading import functools import traceback @@ -2145,6 +2146,69 @@ def test_read(self): print(data) assert data == 'test' + def test_pre_read(self): + """Test the pre-read code. + + Test reading a file which is previously unknown to fuse with caching on. This should go + into the pre_read code and load the file contents automatically after the open call. + """ + dfuse = DFuse(self.server, self.conf, container=self.container) + dfuse.start(v_hint='pre_read_0') + + with open(join(dfuse.dir, 'file0'), 'w') as fd: + fd.write('test') + + with open(join(dfuse.dir, 'file1'), 'w') as fd: + fd.write('test') + + with open(join(dfuse.dir, 'file2'), 'w') as fd: + fd.write('testing') + + raw_data0 = ''.join(random.choices(['d', 'a', 'o', 's'], k=1024 * 1024)) # nosec + with open(join(dfuse.dir, 'file3'), 'w') as fd: + fd.write(raw_data0) + + raw_data1 = ''.join(random.choices(['d', 'a', 'o', 's'], k=(1024 * 1024) - 1)) # nosec + with open(join(dfuse.dir, 'file4'), 'w') as fd: + fd.write(raw_data1) + + if dfuse.stop(): + self.fatal_errors = True + + dfuse = DFuse(self.server, self.conf, caching=True, container=self.container) + dfuse.start(v_hint='pre_read_1') + + with open(join(dfuse.dir, 'file0'), 'r') as fd: + data0 = fd.read() + + with open(join(dfuse.dir, 'file1'), 'r') as fd: + data1 = fd.read(16) + + with open(join(dfuse.dir, 'file2'), 'r') as fd: + data2 = fd.read(2) + + with open(join(dfuse.dir, 'file3'), 'r') as fd: + data3 = fd.read() + + with open(join(dfuse.dir, 'file4'), 'r') as fd: + data4 = fd.read() + data5 = fd.read() + + # This should not use the pre-read feature, to be validated via the logs. + with open(join(dfuse.dir, 'file4'), 'r') as fd: + data6 = fd.read() + + if dfuse.stop(): + self.fatal_errors = True + print(data0) + assert data0 == 'test' + assert data1 == 'test' + assert data2 == 'te' + assert raw_data0 == data3 + assert raw_data1 == data4 + assert len(data5) == 0 + assert raw_data1 == data6 + def test_two_mounts(self): """Create two mounts, and check that a file created in one can be read from the other""" dfuse0 = DFuse(self.server,