commit 4b64cd356c8f3d242c23059aff6c6636ac9e290f Author: Andriy Gapon Date: Sun Nov 18 23:43:08 2012 +0200 zfs roopool: add support for multi-vdev configurations diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c index 94b25d4..66ccf4a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -3755,54 +3755,114 @@ out: #else -extern int -vdev_geom_read_pool_label(const char *name, nvlist_t **config); +extern int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs, + uint64_t *count); static nvlist_t * spa_generate_rootconf(const char *name) { + nvlist_t **configs, **tops; nvlist_t *config; - nvlist_t *nvtop, *nvroot; + nvlist_t *best_cfg, *nvtop, *nvroot; + uint64_t *holes; + uint64_t best_txg; uint64_t nchildren; uint64_t pgid; + uint64_t count; + uint64_t i; + uint_t nholes; - if (vdev_geom_read_pool_label(name, &config) != 0) + if (vdev_geom_read_pool_label(name, &configs, &count) != 0) return (NULL); + ASSERT3U(count, !=, 0); + best_txg = 0; + for (i = 0; i < count; i++) { + uint64_t txg; + + VERIFY(nvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG, + &txg) == 0); + if (txg > best_txg) { + best_txg = txg; + best_cfg = configs[i]; + } + } + /* * Multi-vdev root pool configuration discovery is not supported yet. */ nchildren = 0; - nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, &nchildren); - if (nchildren != 1) { - nvlist_free(config); - return (NULL); + VERIFY(nvlist_lookup_uint64(best_cfg, ZPOOL_CONFIG_VDEV_CHILDREN, + &nchildren) == 0); + holes = NULL; + nvlist_lookup_uint64_array(best_cfg, ZPOOL_CONFIG_HOLE_ARRAY, + &holes, &nholes); + + tops = kmem_alloc(nchildren * sizeof(void *), KM_SLEEP | KM_ZERO); + for (i = 0; i < nchildren; i++) { + if (i >= count) + break; + if (configs[i] == NULL) + continue; + VERIFY(nvlist_lookup_nvlist(configs[i], ZPOOL_CONFIG_VDEV_TREE, + &nvtop) == 0); + nvlist_dup(nvtop, &tops[i], KM_SLEEP); + } + for (i = 0; holes != NULL && i < nholes; i++) { + if (i >= nchildren) + continue; + if (tops[holes[i]] != NULL) + continue; + nvlist_alloc(&tops[holes[i]], NV_UNIQUE_NAME, KM_SLEEP); + VERIFY(nvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE, + VDEV_TYPE_HOLE) == 0); + VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID, + holes[i]) == 0); + VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID, + 0) == 0); + } + for (i = 0; i < nchildren; i++) { + if (tops[i] != NULL) + continue; + nvlist_alloc(&tops[i], NV_UNIQUE_NAME, KM_SLEEP); + VERIFY(nvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE, + VDEV_TYPE_MISSING) == 0); + VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID, + i) == 0); + VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID, + 0) == 0); } /* - * Add this top-level vdev to the child array. + * Create pool config based on the best vdev config. */ - VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvtop) == 0); - VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &pgid) == 0); + nvlist_dup(best_cfg, &config, KM_SLEEP); /* * Put this pool's top-level vdevs into a root vdev. */ + VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &pgid) == 0); VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &nvtop, 1) == 0); + tops, nchildren) == 0); /* * Replace the existing vdev_tree with the new root vdev in * this pool's configuration (remove the old, add the new). */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); + + for (i = 0; i < count; i++) + nvlist_free(configs[i]); + kmem_free(configs, count * sizeof(void *)); + for (i = 0; i < nchildren; i++) + nvlist_free(tops[i]); + kmem_free(tops, nchildren * sizeof(void *)); nvlist_free(nvroot); return (config); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index ecab3a1..d764393 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -274,27 +274,74 @@ vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) return (*config == NULL ? ENOENT : 0); } -static int -vdev_geom_check_config(nvlist_t *config, const char *name, uint64_t *best_txg) +static void +resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) { - uint64_t vdev_guid; - uint64_t txg; + nvlist_t **new_configs; + uint64_t i; + + if (id < *count) + return; + new_configs = kmem_alloc((id + 1) * sizeof(nvlist_t *), + KM_SLEEP | KM_ZERO); + for (i = 0; i < *count; i++) + new_configs[i] = (*configs)[i]; + if (*configs != NULL) + kmem_free(*configs, *count * sizeof(void *)); + *configs = new_configs; + *count = id + 1; +} + +static void +process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, + const char *name, uint64_t* known_pool_guid) +{ + nvlist_t *vdev_tree; + uint64_t pool_guid; + uint64_t vdev_guid, known_guid; + uint64_t id, txg, known_txg; char *pname; + int i; - if (nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || + if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || strcmp(pname, name) != 0) - return (ENOENT); + goto ignore; - ZFS_LOG(1, "found pool: %s", pname); + if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) + goto ignore; - txg = 0; - nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg); - if (txg <= *best_txg) - return (ENOENT); - *best_txg = txg; - ZFS_LOG(1, "txg: %ju", (uintmax_t)*best_txg); + if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) + goto ignore; - return (0); + if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) + goto ignore; + + if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) + goto ignore; + + VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); + + if (*known_pool_guid != 0) { + if (pool_guid != *known_pool_guid) + goto ignore; + } else + *known_pool_guid = pool_guid; + + resize_configs(configs, count, id); + + if ((*configs)[id] != NULL) { + VERIFY(nvlist_lookup_uint64((*configs)[id], + ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0); + if (txg <= known_txg) + goto ignore; + nvlist_free((*configs)[id]); + } + + (*configs)[id] = cfg; + return; + +ignore: + nvlist_free(cfg); } static int @@ -321,14 +368,15 @@ vdev_geom_dettach_taster(struct g_consumer *cp) } int -vdev_geom_read_pool_label(const char *name, nvlist_t **config) +vdev_geom_read_pool_label(const char *name, + nvlist_t ***configs, uint64_t *count) { struct g_class *mp; struct g_geom *gp, *zgp; struct g_provider *pp; struct g_consumer *zcp; nvlist_t *vdev_cfg; - uint64_t best_txg; + uint64_t pool_guid; int error; DROP_GIANT(); @@ -339,8 +387,9 @@ vdev_geom_read_pool_label(const char *name, nvlist_t **config) zgp->orphan = vdev_geom_taste_orphan; zcp = g_new_consumer(zgp); - best_txg = 0; - *config = NULL; + *configs = NULL; + *count = 0; + pool_guid = 0; LIST_FOREACH(mp, &g_classes, class) { if (mp == &zfs_vdev_class) continue; @@ -360,14 +409,8 @@ vdev_geom_read_pool_label(const char *name, nvlist_t **config) continue; ZFS_LOG(1, "successfully read vdev config"); - error = vdev_geom_check_config(vdev_cfg, name, - &best_txg); - if (error != 0) { - nvlist_free(vdev_cfg); - continue; - } - nvlist_free(*config); - *config = vdev_cfg; + process_vdev_config(configs, count, + vdev_cfg, name, &pool_guid); } } } @@ -376,7 +419,8 @@ vdev_geom_read_pool_label(const char *name, nvlist_t **config) g_destroy_geom(zgp); g_topology_unlock(); PICKUP_GIANT(); - return (*config == NULL ? ENOENT : 0); + + return (*count > 0 ? 0 : ENOENT); } static uint64_t