FreeBSD ZFS
The Zettabyte File System

vdev_geom.c

Go to the documentation of this file.
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 /*
00022  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
00023  * All rights reserved.
00024  *
00025  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
00026  */
00027 
00028 #include <sys/zfs_context.h>
00029 #include <sys/param.h>
00030 #include <sys/kernel.h>
00031 #include <sys/bio.h>
00032 #include <sys/disk.h>
00033 #include <sys/spa.h>
00034 #include <sys/spa_impl.h>
00035 #include <sys/vdev_impl.h>
00036 #include <sys/fs/zfs.h>
00037 #include <sys/zio.h>
00038 #include <geom/geom.h>
00039 #include <geom/geom_int.h>
00040 
00048 struct g_class zfs_vdev_class = {
00049         .name = "ZFS::VDEV",
00050         .version = G_VERSION,
00051 };
00052 
00053 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
00054 
00055 SYSCTL_DECL(_vfs_zfs_vdev);
00063 static int vdev_geom_bio_flush_disable = 0;
00064 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
00065 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
00066     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
00074 static int vdev_geom_bio_delete_disable = 0;
00075 TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
00076 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
00077     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
00078 
00079 static void
00080 vdev_geom_orphan(struct g_consumer *cp)
00081 {
00082         vdev_t *vd;
00083 
00084         g_topology_assert();
00085 
00086         vd = cp->private;
00087         if (vd == NULL) {
00088                 /* Vdev close in progress.  Ignore the event. */
00089                 return;
00090         }
00091 
00092         /*
00093          * Orphan callbacks occur from the GEOM event thread.
00094          * Concurrent with this call, new I/O requests may be
00095          * working their way through GEOM about to find out
00096          * (only once executed by the g_down thread) that we've
00097          * been orphaned from our disk provider.  These I/Os
00098          * must be retired before we can detach our consumer.
00099          * This is most easily achieved by acquiring the
00100          * SPA ZIO configuration lock as a writer, but doing
00101          * so with the GEOM topology lock held would cause
00102          * a lock order reversal.  Instead, rely on the SPA's
00103          * async removal support to invoke a close on this
00104          * vdev once it is safe to do so.
00105          */
00106         zfs_post_remove(vd->vdev_spa, vd);
00107         vd->vdev_remove_wanted = B_TRUE;
00108         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
00109 }
00110 
00111 static void
00112 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
00113 {
00114         vdev_t *vd;
00115         spa_t *spa;
00116         char *physpath;
00117         int error, physpath_len;
00118 
00119         g_topology_assert();
00120 
00121         if (strcmp(attr, "GEOM::physpath") != 0)
00122                 return;
00123 
00124         if (g_access(cp, 1, 0, 0) != 0)
00125                 return;
00126 
00127         /*
00128          * Record/Update physical path information for this device.
00129          */
00130         vd = cp->private;
00131         spa = vd->vdev_spa;
00132         physpath_len = MAXPATHLEN;
00133         physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
00134         error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
00135         g_access(cp, -1, 0, 0);
00136         if (error == 0) {
00137                 char *old_physpath;
00138 
00139                 old_physpath = vd->vdev_physpath;
00140                 vd->vdev_physpath = spa_strdup(physpath);
00141                 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
00142 
00143                 if (old_physpath != NULL) {
00144                         int held_lock;
00145 
00146                         held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER);
00147                         if (held_lock == 0) {
00148                                 g_topology_unlock();
00149                                 spa_config_enter(spa, SCL_STATE, FTAG,
00150                                     RW_WRITER);
00151                         }
00152 
00153                         spa_strfree(old_physpath);
00154 
00155                         if (held_lock == 0) {
00156                                 spa_config_exit(spa, SCL_STATE, FTAG);
00157                                 g_topology_lock();
00158                         }
00159                 }
00160         }
00161         g_free(physpath);
00162 }
00163 
00164 static struct g_consumer *
00165 vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
00166 {
00167         struct g_geom *gp;
00168         struct g_consumer *cp;
00169         int error;
00170 
00171         g_topology_assert();
00172 
00173         ZFS_LOG(1, "Attaching to %s.", pp->name);
00174         /* Do we have geom already? No? Create one. */
00175         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
00176                 if (gp->flags & G_GEOM_WITHER)
00177                         continue;
00178                 if (strcmp(gp->name, "zfs::vdev") != 0)
00179                         continue;
00180                 break;
00181         }
00182         if (gp == NULL) {
00183                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
00184                 gp->orphan = vdev_geom_orphan;
00185                 gp->attrchanged = vdev_geom_attrchanged;
00186                 cp = g_new_consumer(gp);
00187                 error = g_attach(cp, pp);
00188                 if (error != 0) {
00189                         printf("%s(%d): g_attach failed: %d\n", __func__,
00190                                __LINE__, error);
00191                         g_wither_geom(gp, ENXIO);
00192                         return (NULL);
00193                 }
00194                 error = g_access(cp, 1, 0, 1);
00195                 if (error != 0) {
00196                         printf("%s(%d): g_access failed: %d\n", __func__,
00197                                __LINE__, error);
00198                         g_wither_geom(gp, ENXIO);
00199                         return (NULL);
00200                 }
00201                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
00202         } else {
00203                 /* Check if we are already connected to this provider. */
00204                 LIST_FOREACH(cp, &gp->consumer, consumer) {
00205                         if (cp->provider == pp) {
00206                                 ZFS_LOG(1, "Provider %s already in use by ZFS. "
00207                                     "Failing attach.", pp->name);
00208                                 return (NULL);
00209                         }
00210                 }
00211                 cp = g_new_consumer(gp);
00212                 error = g_attach(cp, pp);
00213                 if (error != 0) {
00214                         printf("%s(%d): g_attach failed: %d\n",
00215                                __func__, __LINE__, error);
00216                         g_destroy_consumer(cp);
00217                         return (NULL);
00218                 }
00219                 error = g_access(cp, 1, 0, 1);
00220                 if (error != 0) {
00221                         printf("%s(%d): g_access failed: %d\n",
00222                                __func__, __LINE__, error);
00223                         g_detach(cp);
00224                         g_destroy_consumer(cp);
00225                         return (NULL);
00226                 }
00227                 ZFS_LOG(1, "Created consumer for %s.", pp->name);
00228         }
00229 
00230         cp->private = vd;
00231         vd->vdev_tsd = cp;
00232 
00233         /* Fetch initial physical path information for this device. */
00234         vdev_geom_attrchanged(cp, "GEOM::physpath");
00235         
00236         return (cp);
00237 }
00238 
00239 static void
00240 vdev_geom_close_locked(vdev_t *vd)
00241 {
00242         struct g_geom *gp;
00243         struct g_consumer *cp;
00244 
00245         g_topology_assert();
00246 
00247         cp = vd->vdev_tsd;
00248         if (cp == NULL)
00249                 return;
00250 
00251         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
00252         KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
00253         KASSERT(cp->private == vd, ("%s: cp->private is not vd", __func__));
00254         vd->vdev_tsd = NULL;
00255         cp->private = NULL;
00256 
00257         gp = cp->geom;
00258         g_access(cp, -1, 0, -1);
00259         /* Destroy consumer on last close. */
00260         if (cp->acr == 0 && cp->ace == 0) {
00261                 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
00262                 if (cp->acw > 0)
00263                         g_access(cp, 0, -cp->acw, 0);
00264                 g_detach(cp);
00265                 g_destroy_consumer(cp);
00266         }
00267         /* Destroy geom if there are no consumers left. */
00268         if (LIST_EMPTY(&gp->consumer)) {
00269                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
00270                 g_wither_geom(gp, ENXIO);
00271         }
00272 }
00273 
00274 static void
00275 nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
00276 {
00277         nvpair_t *elem = NULL;
00278 
00279         *vguid = 0;
00280         *pguid = 0;
00281         while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
00282                 if (nvpair_type(elem) != DATA_TYPE_UINT64)
00283                         continue;
00284 
00285                 if (strcmp(nvpair_name(elem), ZPOOL_CONFIG_POOL_GUID) == 0) {
00286                         VERIFY(nvpair_value_uint64(elem, pguid) == 0);
00287                 } else if (strcmp(nvpair_name(elem), ZPOOL_CONFIG_GUID) == 0) {
00288                         VERIFY(nvpair_value_uint64(elem, vguid) == 0);
00289                 }
00290 
00291                 if (*pguid != 0 && *vguid != 0)
00292                         break;
00293         }
00294 }
00295 
00296 static int
00297 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
00298 {
00299         struct bio *bp;
00300         u_char *p;
00301         off_t off, maxio;
00302         int error;
00303 
00304         ASSERT((offset % cp->provider->sectorsize) == 0);
00305         ASSERT((size % cp->provider->sectorsize) == 0);
00306 
00307         bp = g_alloc_bio();
00308         off = offset;
00309         offset += size;
00310         p = data;
00311         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
00312         error = 0;
00313 
00314         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
00315                 bzero(bp, sizeof(*bp));
00316                 bp->bio_cmd = cmd;
00317                 bp->bio_done = NULL;
00318                 bp->bio_offset = off;
00319                 bp->bio_length = MIN(size, maxio);
00320                 bp->bio_data = p;
00321                 g_io_request(bp, cp);
00322                 error = biowait(bp, "vdev_geom_io");
00323                 if (error != 0)
00324                         break;
00325         }
00326 
00327         g_destroy_bio(bp);
00328         return (error);
00329 }
00330 
00331 static int
00332 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
00333 {
00334         struct g_provider *pp;
00335         vdev_label_t *label;
00336         char *p, *buf;
00337         size_t buflen;
00338         uint64_t psize, state, txg;
00339         off_t offset, size;
00340         int error, l, len;
00341 
00342         g_topology_assert_not();
00343 
00344         pp = cp->provider;
00345         ZFS_LOG(1, "Reading config from %s...", pp->name);
00346 
00347         psize = pp->mediasize;
00348         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
00349 
00350         size = sizeof(*label) + pp->sectorsize -
00351             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
00352 
00353         label = kmem_alloc(size, KM_SLEEP);
00354         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
00355 
00356         *config = NULL;
00357         for (l = 0; l < VDEV_LABELS; l++) {
00358 
00359                 offset = vdev_label_offset(psize, l, 0);
00360                 if ((offset % pp->sectorsize) != 0)
00361                         continue;
00362 
00363                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
00364                         continue;
00365                 buf = label->vl_vdev_phys.vp_nvlist;
00366 
00367                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
00368                         continue;
00369 
00370                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
00371                     &state) != 0 || state == POOL_STATE_DESTROYED ||
00372                     state > POOL_STATE_L2CACHE) {
00373                         nvlist_free(*config);
00374                         *config = NULL;
00375                         continue;
00376                 }
00377 
00378                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
00379                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
00380                     &txg) != 0 || txg == 0)) {
00381                         nvlist_free(*config);
00382                         *config = NULL;
00383                         continue;
00384                 }
00385 
00386                 break;
00387         }
00388 
00389         kmem_free(label, size);
00390         return (*config == NULL ? ENOENT : 0);
00391 }
00392 
00393 static int
00394 vdev_geom_check_config(nvlist_t *config, const char *name, uint64_t *best_txg)
00395 {
00396         uint64_t vdev_guid;
00397         uint64_t txg;
00398         char *pname;
00399 
00400         if (nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
00401             strcmp(pname, name) != 0)
00402                 return (ENOENT);
00403 
00404         ZFS_LOG(1, "found pool: %s", pname);
00405 
00406         txg = 0;
00407         nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg);
00408         if (txg <= *best_txg)
00409                 return (ENOENT);
00410         *best_txg = txg;
00411         ZFS_LOG(1, "txg: %ju", (uintmax_t)*best_txg);
00412 
00413         return (0);
00414 }
00415 
00416 static int
00417 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
00418 {
00419         int error;
00420 
00421         if (pp->flags & G_PF_WITHER)
00422                 return (EINVAL);
00423         if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
00424                 return (EINVAL);
00425         g_attach(cp, pp);
00426         error = g_access(cp, 1, 0, 0);
00427         if (error != 0)
00428                 g_detach(cp);
00429         return (error);
00430 }
00431 
00432 static void
00433 vdev_geom_detach_taster(struct g_consumer *cp)
00434 {
00435         g_access(cp, -1, 0, 0);
00436         g_detach(cp);
00437 }
00438 
00439 static void
00440 vdev_geom_taste_orphan(struct g_consumer *cp)
00441 {
00442 
00443         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
00444             cp->provider->name));
00445 }
00446 
00447 int
00448 vdev_geom_read_pool_label(const char *name, nvlist_t **config)
00449 {
00450         struct g_class *mp;
00451         struct g_geom *gp, *zgp;
00452         struct g_provider *pp;
00453         struct g_consumer *zcp;
00454         nvlist_t *vdev_cfg;
00455         uint64_t best_txg;
00456         int error;
00457 
00458         DROP_GIANT();
00459         g_topology_lock();
00460 
00461         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
00462         /* This orphan function should be never called. */
00463         zgp->orphan = vdev_geom_taste_orphan;
00464         zcp = g_new_consumer(zgp);
00465 
00466         best_txg = 0;
00467         *config = NULL;
00468         LIST_FOREACH(mp, &g_classes, class) {
00469                 if (mp == &zfs_vdev_class)
00470                         continue;
00471                 LIST_FOREACH(gp, &mp->geom, geom) {
00472                         if (gp->flags & G_GEOM_WITHER)
00473                                 continue;
00474                         LIST_FOREACH(pp, &gp->provider, provider) {
00475                                 if (pp->flags & G_PF_WITHER)
00476                                         continue;
00477                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
00478                                         continue;
00479                                 g_topology_unlock();
00480                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
00481                                 g_topology_lock();
00482                                 vdev_geom_detach_taster(zcp);
00483                                 if (error)
00484                                         continue;
00485                                 ZFS_LOG(1, "successfully read vdev config");
00486 
00487                                 error = vdev_geom_check_config(vdev_cfg, name,
00488                                     &best_txg);
00489                                 if (error != 0) {
00490                                         nvlist_free(vdev_cfg);
00491                                         continue;
00492                                 }
00493                                 nvlist_free(*config);
00494                                 *config = vdev_cfg;
00495                         }
00496                 }
00497         }
00498 
00499         g_destroy_consumer(zcp);
00500         g_destroy_geom(zgp);
00501         g_topology_unlock();
00502         PICKUP_GIANT();
00503         return (*config == NULL ? ENOENT : 0);
00504 }
00505 
00506 static void
00507 vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
00508 {
00509         nvlist_t *config;
00510 
00511         g_topology_assert_not();
00512         *pguid = 0;
00513         *vguid = 0;
00514 
00515         if (vdev_geom_read_config(cp, &config) == 0) {
00516                 nvlist_get_guids(config, pguid, vguid);
00517                 nvlist_free(config);
00518         }
00519 }
00520 
00521 static struct g_consumer *
00522 vdev_geom_attach_by_guids(vdev_t *vd)
00523 {
00524         struct g_class *mp;
00525         struct g_geom *gp, *zgp;
00526         struct g_provider *pp;
00527         struct g_consumer *cp, *zcp;
00528         uint64_t pguid;
00529         uint64_t vguid;
00530 
00531         g_topology_assert();
00532 
00533         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
00534         /* This orphan function should be never called. */
00535         zgp->orphan = vdev_geom_taste_orphan;
00536         zcp = g_new_consumer(zgp);
00537 
00538         cp = NULL;
00539         LIST_FOREACH(mp, &g_classes, class) {
00540                 if (mp == &zfs_vdev_class)
00541                         continue;
00542                 LIST_FOREACH(gp, &mp->geom, geom) {
00543                         if (gp->flags & G_GEOM_WITHER)
00544                                 continue;
00545                         LIST_FOREACH(pp, &gp->provider, provider) {
00546                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
00547                                         continue;
00548                                 g_topology_unlock();
00549                                 vdev_geom_read_guids(zcp, &pguid, &vguid);
00550                                 g_topology_lock();
00551                                 vdev_geom_detach_taster(zcp);
00552                                 if (pguid != spa_guid(vd->vdev_spa) ||
00553                                     vguid != vd->vdev_guid)
00554                                         continue;
00555                                 cp = vdev_geom_attach(pp, vd);
00556                                 if (cp == NULL) {
00557                                         printf("ZFS WARNING: Unable to attach "
00558                                             "to %s.\n", pp->name);
00559                                         continue;
00560                                 }
00561                                 break;
00562                         }
00563                         if (cp != NULL)
00564                                 break;
00565                 }
00566                 if (cp != NULL)
00567                         break;
00568         }
00569 end:
00570         g_destroy_consumer(zcp);
00571         g_destroy_geom(zgp);
00572         return (cp);
00573 }
00574 
00575 static struct g_consumer *
00576 vdev_geom_open_by_guids(vdev_t *vd)
00577 {
00578         struct g_consumer *cp;
00579         char *buf;
00580         size_t len;
00581 
00582         g_topology_assert();
00583 
00584         ZFS_LOG(1, "Searching by guids [%ju:%ju].",
00585             (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid);
00586         cp = vdev_geom_attach_by_guids(vd);
00587         if (cp != NULL) {
00588                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
00589                 buf = kmem_alloc(len, KM_SLEEP);
00590 
00591                 snprintf(buf, len, "/dev/%s", cp->provider->name);
00592                 spa_strfree(vd->vdev_path);
00593                 vd->vdev_path = buf;
00594 
00595                 ZFS_LOG(1, "Attach by guids [%ju:%ju] succeeded, provider %s.",
00596                     (uintmax_t)spa_guid(vd->vdev_spa),
00597                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
00598         } else {
00599                 ZFS_LOG(1, "Search by guids [%ju:%ju] failed.",
00600                     (uintmax_t)spa_guid(vd->vdev_spa),
00601                     (uintmax_t)vd->vdev_guid);
00602         }
00603 
00604         return (cp);
00605 }
00606 
00607 static struct g_consumer *
00608 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
00609 {
00610         struct g_provider *pp;
00611         struct g_consumer *cp;
00612         uint64_t pguid, vguid;
00613 
00614         g_topology_assert();
00615 
00616         ZFS_LOG(1, "Opening by path %s%s", vd->vdev_path,
00617             check_guid ? " with GUID verification." : "");
00618 
00619         cp = NULL;
00620         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
00621         if (pp != NULL) {
00622                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
00623                 cp = vdev_geom_attach(pp, vd);
00624                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
00625                     pp->sectorsize <= VDEV_PAD_SIZE) {
00626                         g_topology_unlock();
00627                         vdev_geom_read_guids(cp, &pguid, &vguid);
00628                         g_topology_lock();
00629                         if (pguid != spa_guid(vd->vdev_spa) ||
00630                             vguid != vd->vdev_guid) {
00631                                 vdev_geom_close_locked(vd);
00632                                 cp = NULL;
00633                                 ZFS_LOG(1, "guid mismatch for provider %s: "
00634                                     "%ju:%ju != %ju:%ju.", vd->vdev_path,
00635                                     (uintmax_t)spa_guid(vd->vdev_spa),
00636                                     (uintmax_t)vd->vdev_guid,
00637                                     (uintmax_t)pguid, (uintmax_t)vguid);
00638                         } else {
00639                                 ZFS_LOG(1, "guids match for provider %s.",
00640                                     vd->vdev_path);
00641                         }
00642                 }
00643         }
00644 
00645         return (cp);
00646 }
00647 
00648 static int
00649 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
00650     uint64_t *ashift)
00651 {
00652         struct g_provider *pp;
00653         struct g_consumer *cp;
00654         size_t bufsize;
00655         int error;
00656 
00657         /*
00658          * We must have a pathname, and it must be absolute.
00659          */
00660         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
00661                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
00662                 return (EINVAL);
00663         }
00664 
00665         vd->vdev_tsd = NULL;
00666 
00667         DROP_GIANT();
00668         g_topology_lock();
00669         error = 0;
00670 
00671         /*
00672          * Try using the recorded path for this device, but only
00673          * accept it if its label data contains the expected GUIDs.
00674          */
00675         cp = vdev_geom_open_by_path(vd, 1);
00676         if (cp == NULL) {
00677                 /*
00678                  * The device at vd->vdev_path doesn't have the
00679                  * expected GUIDs. The disks might have merely
00680                  * moved around so try all other GEOM providers
00681                  * to find one with the right GUIDs.
00682                  */
00683                 cp = vdev_geom_open_by_guids(vd);
00684         }
00685 
00686         if (cp == NULL &&
00687             ((vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
00688               vd->vdev_spa->spa_load_state == SPA_LOAD_NONE) ||
00689              vd->vdev_spa->spa_splitting_newspa == B_TRUE)) {
00690                 /*
00691                  * We are dealing with a vdev that hasn't been previosly
00692                  * opened (since boot), and we are not loading an
00693                  * existing pool configuration (e.g. this operations is
00694                  * an add of a vdev to new or * existing pool) or we are
00695                  * in the process of splitting a pool.  Find the GEOM
00696                  * provider by its name, ignoring GUID mismatches.
00697                  *
00698                  * XXPOLICY: It would be safer to only allow a device
00699                  *           that is unlabeled or labeled but missing
00700                  *           GUID information to be opened in this fashion.
00701                  */
00702                 cp = vdev_geom_open_by_path(vd, 0);
00703         }
00704 
00705         if (cp == NULL) {
00706                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
00707                 error = ENOENT;
00708         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
00709             !ISP2(cp->provider->sectorsize)) {
00710                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
00711                     vd->vdev_path);
00712                 vdev_geom_close_locked(vd);
00713                 error = EINVAL;
00714                 cp = NULL;
00715         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
00716                 int i;
00717 
00718                 for (i = 0; i < 5; i++) {
00719                         error = g_access(cp, 0, 1, 0);
00720                         if (error == 0)
00721                                 break;
00722                         g_topology_unlock();
00723                         tsleep(vd, 0, "vdev", hz / 2);
00724                         g_topology_lock();
00725                 }
00726                 if (error != 0) {
00727                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
00728                             vd->vdev_path, error);
00729                         vdev_geom_close_locked(vd);
00730                         cp = NULL;
00731                 }
00732         }
00733 
00734         g_topology_unlock();
00735         PICKUP_GIANT();
00736 
00737         if (cp == NULL) {
00738                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
00739                 return (error);
00740         }
00741         pp = cp->provider;
00742 
00743         /*
00744          * Determine the actual size of the device.
00745          */
00746         *max_psize = *psize = pp->mediasize;
00747 
00748         /*
00749          * Determine the device's minimum transfer size.
00750          */
00751         *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
00752 
00753         /*
00754          * Clear the nowritecache settings, so that on a vdev_reopen()
00755          * we will try again.
00756          */
00757         vd->vdev_nowritecache = B_FALSE;
00758 
00759         return (0);
00760 }
00761 
00762 static void
00763 vdev_geom_close(vdev_t *vd)
00764 {
00765         g_topology_lock();
00766         vdev_geom_close_locked(vd);
00767         g_topology_unlock();
00768 }
00769 
00770 static void
00771 vdev_geom_io_intr(struct bio *bp)
00772 {
00773         vdev_t *vd;
00774         zio_t *zio;
00775 
00776         zio = bp->bio_caller1;
00777         vd = zio->io_vd;
00778         zio->io_error = bp->bio_error;
00779         if (zio->io_error == 0 && bp->bio_resid != 0)
00780                 zio->io_error = EIO;
00781         if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
00782                 /*
00783                  * If we get ENOTSUP, we know that no future
00784                  * attempts will ever succeed.  In this case we
00785                  * set a persistent bit so that we don't bother
00786                  * with the ioctl in the future.
00787                  */
00788                 vd->vdev_nowritecache = B_TRUE;
00789         }
00790         if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) {
00791                 /*
00792                  * If we get ENOTSUP, we know that no future
00793                  * attempts will ever succeed.  In this case we
00794                  * set a persistent bit so that we don't bother
00795                  * with the ioctl in the future.
00796                  */
00797                 vd->vdev_notrim = B_TRUE;
00798         }
00799         if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
00800                 /*
00801                  * If provider's error is set we assume it is being
00802                  * removed.
00803                  */
00804                 if (bp->bio_to->error != 0) {
00805                         /*
00806                          * We post the resource as soon as possible, instead of
00807                          * when the async removal actually happens, because the
00808                          * DE is using this information to discard previous I/O
00809                          * errors.
00810                          */
00811                         /* XXX: zfs_post_remove() can sleep. */
00812                         zfs_post_remove(zio->io_spa, vd);
00813                         vd->vdev_remove_wanted = B_TRUE;
00814                         spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
00815                 } else if (!vd->vdev_delayed_close) {
00816                         vd->vdev_delayed_close = B_TRUE;
00817                 }
00818         }
00819         g_destroy_bio(bp);
00820         zio_interrupt(zio);
00821 }
00822 
00823 static int
00824 vdev_geom_io_start(zio_t *zio)
00825 {
00826         vdev_t *vd;
00827         struct g_consumer *cp;
00828         struct bio *bp;
00829         int error;
00830 
00831         vd = zio->io_vd;
00832 
00833         if (zio->io_type == ZIO_TYPE_IOCTL) {
00834                 /* XXPOLICY */
00835                 if (!vdev_readable(vd)) {
00836                         zio->io_error = ENXIO;
00837                         return (ZIO_PIPELINE_CONTINUE);
00838                 }
00839 
00840                 switch (zio->io_cmd) {
00841                 case DKIOCFLUSHWRITECACHE:
00842                         if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
00843                                 break;
00844                         if (vd->vdev_nowritecache) {
00845                                 zio->io_error = ENOTSUP;
00846                                 break;
00847                         }
00848                         goto sendreq;
00849                 case DKIOCTRIM:
00850                         if (vdev_geom_bio_delete_disable)
00851                                 break;
00852                         if (vd->vdev_notrim) {
00853                                 zio->io_error = ENOTSUP;
00854                                 break;
00855                         }
00856                         goto sendreq;
00857                 default:
00858                         zio->io_error = ENOTSUP;
00859                 }
00860 
00861                 return (ZIO_PIPELINE_CONTINUE);
00862         }
00863 sendreq:
00864         cp = vd->vdev_tsd;
00865         if (cp == NULL) {
00866                 zio->io_error = ENXIO;
00867                 return (ZIO_PIPELINE_CONTINUE);
00868         }
00869         bp = g_alloc_bio();
00870         bp->bio_caller1 = zio;
00871         switch (zio->io_type) {
00872         case ZIO_TYPE_READ:
00873         case ZIO_TYPE_WRITE:
00874                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
00875                 bp->bio_data = zio->io_data;
00876                 bp->bio_offset = zio->io_offset;
00877                 bp->bio_length = zio->io_size;
00878                 break;
00879         case ZIO_TYPE_IOCTL:
00880                 switch (zio->io_cmd) {
00881                 case DKIOCFLUSHWRITECACHE:
00882                         bp->bio_cmd = BIO_FLUSH;
00883                         bp->bio_flags |= BIO_ORDERED;
00884                         bp->bio_data = NULL;
00885                         bp->bio_offset = cp->provider->mediasize;
00886                         bp->bio_length = 0;
00887                         break;
00888                 case DKIOCTRIM:
00889                         bp->bio_cmd = BIO_DELETE;
00890                         bp->bio_data = NULL;
00891                         bp->bio_offset = zio->io_offset;
00892                         bp->bio_length = zio->io_size;
00893                         break;
00894                 }
00895                 break;
00896         }
00897         bp->bio_done = vdev_geom_io_intr;
00898 
00899         g_io_request(bp, cp);
00900 
00901         return (ZIO_PIPELINE_STOP);
00902 }
00903 
00904 static void
00905 vdev_geom_io_done(zio_t *zio)
00906 {
00907 }
00908 
00909 static void
00910 vdev_geom_hold(vdev_t *vd)
00911 {
00912 }
00913 
00914 static void
00915 vdev_geom_rele(vdev_t *vd)
00916 {
00917 }
00918 
00923 vdev_ops_t vdev_geom_ops = {
00924         vdev_geom_open,
00925         vdev_geom_close,
00926         vdev_default_asize,
00927         vdev_geom_io_start,
00928         vdev_geom_io_done,
00929         NULL,
00930         vdev_geom_hold,
00931         vdev_geom_rele,
00932         VDEV_TYPE_DISK,         /* name of this vdev type */
00933         B_TRUE                  /* leaf vdev */
00934 };
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines