FreeBSD ZFS
The Zettabyte File System

spa_config.c

Go to the documentation of this file.
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 
00022 /*
00023  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
00024  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
00025  * Copyright (c) 2012 by Delphix. All rights reserved.
00026  */
00027 
00028 #include <sys/zfs_context.h>
00029 #include <sys/fm/fs/zfs.h>
00030 #include <sys/spa.h>
00031 #include <sys/spa_impl.h>
00032 #include <sys/nvpair.h>
00033 #include <sys/uio.h>
00034 #include <sys/fs/zfs.h>
00035 #include <sys/vdev_impl.h>
00036 #include <sys/zfs_ioctl.h>
00037 #include <sys/utsname.h>
00038 #include <sys/sunddi.h>
00039 #include <sys/zfeature.h>
00040 #ifdef _KERNEL
00041 #include <sys/kobj.h>
00042 #include <sys/zone.h>
00043 #endif
00044 
00063 static uint64_t spa_config_generation = 1;
00064 
00069 const char *spa_config_path = ZPOOL_CACHE;
00070 
00076 void
00077 spa_config_load(void)
00078 {
00079         void *buf = NULL;
00080         nvlist_t *nvlist, *child;
00081         nvpair_t *nvpair;
00082         char *pathname;
00083         struct _buf *file;
00084         uint64_t fsize;
00085 
00086         /*
00087          * Open the configuration file.
00088          */
00089         pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
00090 
00091         (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path);
00092 
00093         file = kobj_open_file(pathname);
00094 
00095         kmem_free(pathname, MAXPATHLEN);
00096 
00097         if (file == (struct _buf *)-1)
00098                 return;
00099 
00100         if (kobj_get_filesize(file, &fsize) != 0)
00101                 goto out;
00102 
00103         buf = kmem_alloc(fsize, KM_SLEEP);
00104 
00105         /*
00106          * Read the nvlist from the file.
00107          */
00108         if (kobj_read_file(file, buf, fsize, 0) < 0)
00109                 goto out;
00110 
00111         /*
00112          * Unpack the nvlist.
00113          */
00114         if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0)
00115                 goto out;
00116 
00117         /*
00118          * Iterate over all elements in the nvlist, creating a new spa_t for
00119          * each one with the specified configuration.
00120          */
00121         mutex_enter(&spa_namespace_lock);
00122         nvpair = NULL;
00123         while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) {
00124                 if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
00125                         continue;
00126 
00127                 VERIFY(nvpair_value_nvlist(nvpair, &child) == 0);
00128 
00129                 if (spa_lookup(nvpair_name(nvpair)) != NULL)
00130                         continue;
00131                 (void) spa_add(nvpair_name(nvpair), child, NULL);
00132         }
00133         mutex_exit(&spa_namespace_lock);
00134 
00135         nvlist_free(nvlist);
00136 
00137 out:
00138         if (buf != NULL)
00139                 kmem_free(buf, fsize);
00140 
00141         kobj_close_file(file);
00142 }
00143 
00144 static int
00145 spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
00146 {
00147         size_t buflen;
00148         char *buf;
00149         vnode_t *vp;
00150         int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
00151         char *temp;
00152         int err;
00153 
00154         /*
00155          * If the nvlist is empty (NULL), then remove the old cachefile.
00156          */
00157         if (nvl == NULL) {
00158                 err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
00159                 return (err);
00160         }
00161 
00162         /*
00163          * Pack the configuration into a buffer.
00164          */
00165         VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0);
00166 
00167         buf = kmem_alloc(buflen, KM_SLEEP);
00168         temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
00169 
00170         VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR,
00171             KM_SLEEP) == 0);
00172 
00173         /*
00174          * Write the configuration to disk.  We need to do the traditional
00175          * 'write to temporary file, sync, move over original' to make sure we
00176          * always have a consistent view of the data.
00177          */
00178         (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path);
00179 
00180         err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0);
00181         if (err == 0) {
00182                 if ((err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
00183                     0, RLIM64_INFINITY, kcred, NULL)) == 0 &&
00184                     (err = VOP_FSYNC(vp, FSYNC, kcred, NULL)) == 0) {
00185                         err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
00186                 }
00187                 (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
00188         }
00189 
00190         (void) vn_remove(temp, UIO_SYSSPACE, RMFILE);
00191 
00192         kmem_free(buf, buflen);
00193         kmem_free(temp, MAXPATHLEN);
00194         return (err);
00195 }
00196 
00201 void
00202 spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
00203 {
00204         spa_config_dirent_t *dp, *tdp;
00205         nvlist_t *nvl;
00206         boolean_t ccw_failure;
00207         int error;
00208 
00209         ASSERT(MUTEX_HELD(&spa_namespace_lock));
00210 
00211         if (rootdir == NULL || !(spa_mode_global & FWRITE))
00212                 return;
00213 
00214         /*
00215          * Iterate over all cachefiles for the pool, past or present.  When the
00216          * cachefile is changed, the new one is pushed onto this list, allowing
00217          * us to update previous cachefiles that no longer contain this pool.
00218          */
00219         ccw_failure = B_FALSE;
00220         for (dp = list_head(&target->spa_config_list); dp != NULL;
00221             dp = list_next(&target->spa_config_list, dp)) {
00222                 spa_t *spa = NULL;
00223                 if (dp->scd_path == NULL)
00224                         continue;
00225 
00226                 /*
00227                  * Iterate over all pools, adding any matching pools to 'nvl'.
00228                  */
00229                 nvl = NULL;
00230                 while ((spa = spa_next(spa)) != NULL) {
00231                         if (spa == target && removing)
00232                                 continue;
00233 
00234                         mutex_enter(&spa->spa_props_lock);
00235                         tdp = list_head(&spa->spa_config_list);
00236                         if (spa->spa_config == NULL ||
00237                             tdp->scd_path == NULL ||
00238                             strcmp(tdp->scd_path, dp->scd_path) != 0) {
00239                                 mutex_exit(&spa->spa_props_lock);
00240                                 continue;
00241                         }
00242 
00243                         if (nvl == NULL)
00244                                 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME,
00245                                     KM_SLEEP) == 0);
00246 
00247                         VERIFY(nvlist_add_nvlist(nvl, spa->spa_name,
00248                             spa->spa_config) == 0);
00249                         mutex_exit(&spa->spa_props_lock);
00250                 }
00251 
00252                 error = spa_config_write(dp, nvl);
00253                 if (error != 0) {
00254         
00255                         printf("ZFS ERROR: Update of cache file %s failed: "
00256                             "Errno %d\n", dp->scd_path, error);
00257                         ccw_failure = B_TRUE;
00258                 }
00259 
00260                 nvlist_free(nvl);
00261         }
00262 
00263         if (ccw_failure) {
00264                 /*
00265                  * Keep trying so that configuration data is 
00266                  * written if/when any temporary filesystem
00267                  * resource issues are resolved.
00268                  */
00269                 target->spa_ccw_fail_time = ddi_get_lbolt64();
00270                 spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
00271                 zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
00272                     target, NULL, NULL, 0, 0);
00273         } else {
00274                 /*
00275                  * Do not rate limit future attempts to update
00276                  * the config cache.
00277                  */
00278                 target->spa_ccw_fail_time = 0;
00279         }
00280 
00281         /*
00282          * Remove any config entries older than the current one.
00283          */
00284         dp = list_head(&target->spa_config_list);
00285         while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) {
00286                 list_remove(&target->spa_config_list, tdp);
00287                 if (tdp->scd_path != NULL)
00288                         spa_strfree(tdp->scd_path);
00289                 kmem_free(tdp, sizeof (spa_config_dirent_t));
00290         }
00291 
00292         spa_config_generation++;
00293 
00294         if (postsysevent)
00295                 spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC);
00296 }
00297 
00304 nvlist_t *
00305 spa_all_configs(uint64_t *generation)
00306 {
00307         nvlist_t *pools;
00308         spa_t *spa = NULL;
00309 
00310         if (*generation == spa_config_generation)
00311                 return (NULL);
00312 
00313         VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0);
00314 
00315         mutex_enter(&spa_namespace_lock);
00316         while ((spa = spa_next(spa)) != NULL) {
00317                 if (INGLOBALZONE(curthread) ||
00318                     zone_dataset_visible(spa_name(spa), NULL)) {
00319                         mutex_enter(&spa->spa_props_lock);
00320                         VERIFY(nvlist_add_nvlist(pools, spa_name(spa),
00321                             spa->spa_config) == 0);
00322                         mutex_exit(&spa->spa_props_lock);
00323                 }
00324         }
00325         *generation = spa_config_generation;
00326         mutex_exit(&spa_namespace_lock);
00327 
00328         return (pools);
00329 }
00330 
00331 void
00332 spa_config_set(spa_t *spa, nvlist_t *config)
00333 {
00334         mutex_enter(&spa->spa_props_lock);
00335         if (spa->spa_config != NULL)
00336                 nvlist_free(spa->spa_config);
00337         spa->spa_config = config;
00338         mutex_exit(&spa->spa_props_lock);
00339 }
00340 
00347 nvlist_t *
00348 spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
00349 {
00350         nvlist_t *config, *nvroot;
00351         vdev_t *rvd = spa->spa_root_vdev;
00352         unsigned long hostid = 0;
00353         boolean_t locked = B_FALSE;
00354         uint64_t split_guid;
00355 
00356         if (vd == NULL) {
00357                 vd = rvd;
00358                 locked = B_TRUE;
00359                 spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
00360         }
00361 
00362         ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
00363             (SCL_CONFIG | SCL_STATE));
00364 
00365         /*
00366          * If txg is -1, report the current value of spa->spa_config_txg.
00367          */
00368         if (txg == -1ULL)
00369                 txg = spa->spa_config_txg;
00370 
00371         VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
00372 
00373         VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
00374             spa_version(spa)) == 0);
00375         VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
00376             spa_name(spa)) == 0);
00377         VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
00378             spa_state(spa)) == 0);
00379         VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
00380             txg) == 0);
00381         VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
00382             spa_guid(spa)) == 0);
00383         VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
00384             ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);
00385 
00386 
00387 #ifdef  _KERNEL
00388         hostid = zone_get_hostid(NULL);
00389 #else   /* _KERNEL */
00390         /*
00391          * We're emulating the system's hostid in userland, so we can't use
00392          * zone_get_hostid().
00393          */
00394         (void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
00395 #endif  /* _KERNEL */
00396         if (hostid != 0) {
00397                 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
00398                     hostid) == 0);
00399         }
00400         VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
00401             utsname.nodename) == 0);
00402 
00403         if (vd != rvd) {
00404                 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
00405                     vd->vdev_top->vdev_guid) == 0);
00406                 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
00407                     vd->vdev_guid) == 0);
00408                 if (vd->vdev_isspare)
00409                         VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
00410                             1ULL) == 0);
00411                 if (vd->vdev_islog)
00412                         VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
00413                             1ULL) == 0);
00414                 vd = vd->vdev_top;              /* label contains top config */
00415         } else {
00416                 /*
00417                  * Only add the (potentially large) split information
00418                  * in the mos config, and not in the vdev labels
00419                  */
00420                 if (spa->spa_config_splitting != NULL)
00421                         VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
00422                             spa->spa_config_splitting) == 0);
00423         }
00424 
00425         /*
00426          * Add the top-level config.  We even add this on pools which
00427          * don't support holes in the namespace.
00428          */
00429         vdev_top_config_generate(spa, config);
00430 
00431         /*
00432          * If we're splitting, record the original pool's guid.
00433          */
00434         if (spa->spa_config_splitting != NULL &&
00435             nvlist_lookup_uint64(spa->spa_config_splitting,
00436             ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
00437                 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
00438                     split_guid) == 0);
00439         }
00440 
00441         nvroot = vdev_config_generate(spa, vd, getstats, 0);
00442         VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
00443         nvlist_free(nvroot);
00444 
00445         /*
00446          * Store what's necessary for reading the MOS in the label.
00447          */
00448         VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
00449             spa->spa_label_features) == 0);
00450 
00451         if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
00452                 ddt_histogram_t *ddh;
00453                 ddt_stat_t *dds;
00454                 ddt_object_t *ddo;
00455 
00456                 ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
00457                 ddt_get_dedup_histogram(spa, ddh);
00458                 VERIFY(nvlist_add_uint64_array(config,
00459                     ZPOOL_CONFIG_DDT_HISTOGRAM,
00460                     (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
00461                 kmem_free(ddh, sizeof (ddt_histogram_t));
00462 
00463                 ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
00464                 ddt_get_dedup_object_stats(spa, ddo);
00465                 VERIFY(nvlist_add_uint64_array(config,
00466                     ZPOOL_CONFIG_DDT_OBJ_STATS,
00467                     (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
00468                 kmem_free(ddo, sizeof (ddt_object_t));
00469 
00470                 dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
00471                 ddt_get_dedup_stats(spa, dds);
00472                 VERIFY(nvlist_add_uint64_array(config,
00473                     ZPOOL_CONFIG_DDT_STATS,
00474                     (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
00475                 kmem_free(dds, sizeof (ddt_stat_t));
00476         }
00477 
00478         if (locked)
00479                 spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
00480 
00481         return (config);
00482 }
00483 
00489 void
00490 spa_config_update(spa_t *spa, int what)
00491 {
00492         vdev_t *rvd = spa->spa_root_vdev;
00493         uint64_t txg;
00494         int c;
00495 
00496         ASSERT(MUTEX_HELD(&spa_namespace_lock));
00497 
00498         spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
00499         txg = spa_last_synced_txg(spa) + 1;
00500         if (what == SPA_CONFIG_UPDATE_POOL) {
00501                 vdev_config_dirty(rvd);
00502         } else {
00503                 /*
00504                  * If we have top-level vdevs that were added but have
00505                  * not yet been prepared for allocation, do that now.
00506                  * (It's safe now because the config cache is up to date,
00507                  * so it will be able to translate the new DVAs.)
00508                  * See comments in spa_vdev_add() for full details.
00509                  */
00510                 for (c = 0; c < rvd->vdev_children; c++) {
00511                         vdev_t *tvd = rvd->vdev_child[c];
00512                         if (tvd->vdev_ms_array == 0)
00513                                 vdev_metaslab_set_size(tvd);
00514                         vdev_expand(tvd, txg);
00515                 }
00516         }
00517         spa_config_exit(spa, SCL_ALL, FTAG);
00518 
00519         /*
00520          * Wait for the mosconfig to be regenerated and synced.
00521          */
00522         txg_wait_synced(spa->spa_dsl_pool, txg);
00523 
00524         /*
00525          * Update the global config cache to reflect the new mosconfig.
00526          */
00527         if (!spa->spa_is_root)
00528                 spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL);
00529 
00530         if (what == SPA_CONFIG_UPDATE_POOL)
00531                 spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
00532 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines