diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 5605a35b8641..fd64c7cdda75 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -146,6 +146,7 @@ struct spa_aux_vdev { vdev_t **sav_vdevs; /* devices */ int sav_count; /* number devices */ boolean_t sav_sync; /* sync the device list */ + boolean_t sav_label_sync; /* sync aux labels */ nvlist_t **sav_pending; /* pending device additions */ uint_t sav_npending; /* # pending devices */ }; diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 638572996c3a..d3982a11f4ca 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -6755,6 +6755,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_load_spares(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_spares.sav_sync = B_TRUE; + spa->spa_spares.sav_label_sync = B_TRUE; } if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { @@ -6770,6 +6771,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_load_l2cache(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_l2cache.sav_sync = B_TRUE; + spa->spa_l2cache.sav_label_sync = B_TRUE; } /* diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index ed592514fded..aae4f65ab688 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -1007,6 +1007,56 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason, return (state == POOL_STATE_ACTIVE); } +static nvlist_t * +vdev_aux_label_generate(vdev_t *vd, boolean_t reason_spare) +{ + nvlist_t *label; + + /* + * For inactive hot spares and level 2 ARC devices, we generate + * a special label that identifies as a mutually shared hot + * spare or l2cache device. We write the label in case of + * addition or removal of hot spare or l2cache vdev (in which + * case we want to revert the labels). + */ + VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, + spa_version(vd->vdev_spa)) == 0); + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, reason_spare ? + POOL_STATE_SPARE : POOL_STATE_L2CACHE) == 0); + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, + vd->vdev_guid) == 0); + + /* + * This is merely to facilitate reporting the ashift of the + * cache device through zdb. The actual retrieval of the + * ashift (in vdev_alloc()) uses the nvlist + * spa->spa_l2cache->sav_config (populated in + * spa_ld_open_aux_vdevs()). + */ + if (!reason_spare) { + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT, + vd->vdev_ashift) == 0); + } + + /* + * Add path information to help find it during pool import + */ + if (vd->vdev_path != NULL) { + VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PATH, + vd->vdev_path) == 0); + } + if (vd->vdev_devid != NULL) { + VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_DEVID, + vd->vdev_devid) == 0); + } + if (vd->vdev_physpath != NULL) { + VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PHYS_PATH, + vd->vdev_physpath) == 0); + } + return (label); +} + /* * Initialize a vdev label. We check to make sure each leaf device is not in * use, and writable. We put down an initial label which we will later @@ -1121,49 +1171,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) * be written again with a meaningful txg by spa_sync(). */ if (reason_spare || reason_l2cache) { - /* - * For inactive hot spares and level 2 ARC devices, we generate - * a special label that identifies as a mutually shared hot - * spare or l2cache device. We write the label in case of - * addition or removal of hot spare or l2cache vdev (in which - * case we want to revert the labels). - */ - VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); - - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, - spa_version(spa)) == 0); - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, - reason_spare ? POOL_STATE_SPARE : POOL_STATE_L2CACHE) == 0); - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, - vd->vdev_guid) == 0); - - /* - * This is merely to facilitate reporting the ashift of the - * cache device through zdb. The actual retrieval of the - * ashift (in vdev_alloc()) uses the nvlist - * spa->spa_l2cache->sav_config (populated in - * spa_ld_open_aux_vdevs()). - */ - if (reason_l2cache) { - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT, - vd->vdev_ashift) == 0); - } - - /* - * Add path information to help find it during pool import - */ - if (vd->vdev_path != NULL) { - VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PATH, - vd->vdev_path) == 0); - } - if (vd->vdev_devid != NULL) { - VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_DEVID, - vd->vdev_devid) == 0); - } - if (vd->vdev_physpath != NULL) { - VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PHYS_PATH, - vd->vdev_physpath) == 0); - } + label = vdev_aux_label_generate(vd, reason_spare); /* * When spare or l2cache (aux) vdev is added during pool @@ -1900,6 +1908,8 @@ vdev_label_sync(zio_t *zio, uint64_t *good_writes, abd_t *vp_abd; char *buf; size_t buflen; + vdev_t *pvd = vd->vdev_parent; + boolean_t spare_in_use = B_FALSE; for (int c = 0; c < vd->vdev_children; c++) { vdev_label_sync(zio, good_writes, @@ -1920,10 +1930,17 @@ vdev_label_sync(zio_t *zio, uint64_t *good_writes, if (vd->vdev_ops == &vdev_draid_spare_ops) return; + if (pvd && pvd->vdev_ops == &vdev_spare_ops) + spare_in_use = B_TRUE; + /* * Generate a label describing the top-level config to which we belong. */ - label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE); + if ((vd->vdev_isspare && !spare_in_use) || vd->vdev_isl2cache) { + label = vdev_aux_label_generate(vd, vd->vdev_isspare); + } else { + label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE); + } vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE); abd_zero(vp_abd, sizeof (vdev_phys_t)); @@ -1973,6 +1990,24 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) zio_nowait(vio); } + /* + * AUX path may have changed during import + */ + spa_aux_vdev_t *sav[2] = {&spa->spa_spares, &spa->spa_l2cache}; + for (int i = 0; i < 2; i++) { + for (int v = 0; v < sav[i]->sav_count; v++) { + uint64_t *good_writes; + if (!sav[i]->sav_label_sync) + continue; + good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); + zio_t *vio = zio_null(zio, spa, NULL, + vdev_label_sync_ignore_done, good_writes, flags); + vdev_label_sync(vio, good_writes, sav[i]->sav_vdevs[v], + l, txg, flags); + zio_nowait(vio); + } + } + error = zio_wait(zio); /* @@ -1983,6 +2018,15 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) zio_flush(zio, vd); + for (int i = 0; i < 2; i++) { + if (!sav[i]->sav_label_sync) + continue; + for (int v = 0; v < sav[i]->sav_count; v++) + zio_flush(zio, sav[i]->sav_vdevs[v]); + if (l == 1) + sav[i]->sav_label_sync = B_FALSE; + } + (void) zio_wait(zio); return (error);