FreeBSD ZFS
The Zettabyte File System

zvol.c

Go to the documentation of this file.
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 /*
00022  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
00023  *
00024  * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
00025  * All rights reserved.
00026  */
00027 
00028 /* Portions Copyright 2010 Robert Milkowski */
00029 /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
00030 
00050 #include <sys/types.h>
00051 #include <sys/param.h>
00052 #include <sys/kernel.h>
00053 #include <sys/errno.h>
00054 #include <sys/uio.h>
00055 #include <sys/bio.h>
00056 #include <sys/buf.h>
00057 #include <sys/kmem.h>
00058 #include <sys/conf.h>
00059 #include <sys/cmn_err.h>
00060 #include <sys/stat.h>
00061 #include <sys/zap.h>
00062 #include <sys/spa.h>
00063 #include <sys/zio.h>
00064 #include <sys/dmu_traverse.h>
00065 #include <sys/dnode.h>
00066 #include <sys/dsl_dataset.h>
00067 #include <sys/dsl_prop.h>
00068 #include <sys/dkio.h>
00069 #include <sys/byteorder.h>
00070 #include <sys/sunddi.h>
00071 #include <sys/dirent.h>
00072 #include <sys/policy.h>
00073 #include <sys/fs/zfs.h>
00074 #include <sys/zfs_ioctl.h>
00075 #include <sys/zil.h>
00076 #include <sys/refcount.h>
00077 #include <sys/zfs_znode.h>
00078 #include <sys/zfs_rlock.h>
00079 #include <sys/vdev_impl.h>
00080 #include <sys/zvol.h>
00081 #include <sys/zil_impl.h>
00082 #include <geom/geom.h>
00083 
00084 #include "zfs_namecheck.h"
00085 
00086 struct g_class zfs_zvol_class = {
00087         .name = "ZFS::ZVOL",
00088         .version = G_VERSION,
00089 };
00090 
00091 DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
00092 
00099 void *zfsdev_state;
00100 static char *zvol_tag = "zvol_tag";
00101 
00102 #define ZVOL_DUMPSIZE           "dumpsize"
00103 
00104 static uint32_t zvol_minors;
00105 
00106 typedef struct zvol_extent {
00107         list_node_t     ze_node;
00108         dva_t           ze_dva;         
00109         uint64_t        ze_nblks;       
00110 } zvol_extent_t;
00111 
00115 typedef struct zvol_state {
00116         char            zv_name[MAXPATHLEN]; 
00117         uint64_t        zv_volsize;     
00118         uint64_t        zv_volblocksize; 
00119         struct g_provider *zv_provider; 
00120         uint8_t         zv_min_bs;      
00121         uint8_t         zv_flags;       
00122         objset_t        *zv_objset;     
00123         uint32_t        zv_total_opens; 
00124         zilog_t         *zv_zilog;      
00125         list_t          zv_extents;     
00126         znode_t         zv_znode;       
00127         dmu_buf_t       *zv_dbuf;       
00128         int             zv_state;
00129         struct bio_queue_head zv_queue;
00130         struct mtx      zv_queue_mtx;   
00131 } zvol_state_t;
00132 
00133 /*
00134  * zvol specific flags
00135  */
00136 #define ZVOL_RDONLY     0x1
00137 #define ZVOL_DUMPIFIED  0x2
00138 #define ZVOL_EXCL       0x4
00139 #define ZVOL_WCE        0x8
00140 
00144 int zvol_maxphys = DMU_MAX_ACCESS/2;
00145 
00146 extern int zfs_set_prop_nvlist(const char *, zprop_source_t,
00147     nvlist_t *, nvlist_t **);
00148 static int zvol_remove_zv(zvol_state_t *);
00149 static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio);
00150 static int zvol_dumpify(zvol_state_t *zv);
00151 static int zvol_dump_fini(zvol_state_t *zv);
00152 static int zvol_dump_init(zvol_state_t *zv, boolean_t resize);
00153 
00154 static zvol_state_t *zvol_geom_create(const char *name);
00155 static void zvol_geom_run(zvol_state_t *zv);
00156 static void zvol_geom_destroy(zvol_state_t *zv);
00157 static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace);
00158 static void zvol_geom_start(struct bio *bp);
00159 static void zvol_geom_worker(void *arg);
00160 
00161 static void
00162 zvol_size_changed(zvol_state_t *zv)
00163 {
00164 #ifdef sun
00165         dev_t dev = makedevice(maj, min);
00166 
00167         VERIFY(ddi_prop_update_int64(dev, zfs_dip,
00168             "Size", volsize) == DDI_SUCCESS);
00169         VERIFY(ddi_prop_update_int64(dev, zfs_dip,
00170             "Nblocks", lbtodb(volsize)) == DDI_SUCCESS);
00171 
00172         /* Notify specfs to invalidate the cached size */
00173         spec_size_invalidate(dev, VBLK);
00174         spec_size_invalidate(dev, VCHR);
00175 #else   /* !sun */
00176         struct g_provider *pp;
00177 
00178         pp = zv->zv_provider;
00179         if (pp == NULL)
00180                 return;
00181         g_topology_lock();
00182         g_resize_provider(pp, zv->zv_volsize);
00183         g_topology_unlock();
00184 #endif  /* !sun */
00185 }
00186 
00187 int
00188 zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
00189 {
00190         if (volsize == 0)
00191                 return (EINVAL);
00192 
00193         if (volsize % blocksize != 0)
00194                 return (EINVAL);
00195 
00196 #ifdef _ILP32
00197         if (volsize - 1 > SPEC_MAXOFFSET_T)
00198                 return (EOVERFLOW);
00199 #endif
00200         return (0);
00201 }
00202 
00203 int
00204 zvol_check_volblocksize(uint64_t volblocksize)
00205 {
00206         if (volblocksize < SPA_MINBLOCKSIZE ||
00207             volblocksize > SPA_MAXBLOCKSIZE ||
00208             !ISP2(volblocksize))
00209                 return (EDOM);
00210 
00211         return (0);
00212 }
00213 
00214 int
00215 zvol_get_stats(objset_t *os, nvlist_t *nv)
00216 {
00217         int error;
00218         dmu_object_info_t doi;
00219         uint64_t val;
00220 
00221         error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
00222         if (error)
00223                 return (error);
00224 
00225         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
00226 
00227         error = dmu_object_info(os, ZVOL_OBJ, &doi);
00228 
00229         if (error == 0) {
00230                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
00231                     doi.doi_data_block_size);
00232         }
00233 
00234         return (error);
00235 }
00236 
00237 static zvol_state_t *
00238 zvol_minor_lookup(const char *name)
00239 {
00240         struct g_provider *pp;
00241         struct g_geom *gp;
00242         zvol_state_t *zv = NULL;
00243 
00244         ASSERT(MUTEX_HELD(&spa_namespace_lock));
00245 
00246         g_topology_lock();
00247         LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) {
00248                 pp = LIST_FIRST(&gp->provider);
00249                 if (pp == NULL)
00250                         continue;
00251                 zv = pp->private;
00252                 if (zv == NULL)
00253                         continue;
00254                 if (strcmp(zv->zv_name, name) == 0)
00255                         break;
00256         }
00257         g_topology_unlock();
00258 
00259         return (gp != NULL ? zv : NULL);
00260 }
00261 
00263 struct maparg {
00264         zvol_state_t    *ma_zv;
00265         uint64_t        ma_blks;
00266 };
00267 
00268 /*ARGSUSED*/
00269 static int
00270 zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
00271     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
00272 {
00273         struct maparg *ma = arg;
00274         zvol_extent_t *ze;
00275         int bs = ma->ma_zv->zv_volblocksize;
00276 
00277         if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
00278                 return (0);
00279 
00280         VERIFY3U(ma->ma_blks, ==, zb->zb_blkid);
00281         ma->ma_blks++;
00282 
00283         /* Abort immediately if we have encountered gang blocks */
00284         if (BP_IS_GANG(bp))
00285                 return (EFRAGS);
00286 
00287         /*
00288          * See if the block is at the end of the previous extent.
00289          */
00290         ze = list_tail(&ma->ma_zv->zv_extents);
00291         if (ze &&
00292             DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) &&
00293             DVA_GET_OFFSET(BP_IDENTITY(bp)) ==
00294             DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) {
00295                 ze->ze_nblks++;
00296                 return (0);
00297         }
00298 
00299         dprintf_bp(bp, "%s", "next blkptr:");
00300 
00301         /* start a new extent */
00302         ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP);
00303         ze->ze_dva = bp->blk_dva[0];    /* structure assignment */
00304         ze->ze_nblks = 1;
00305         list_insert_tail(&ma->ma_zv->zv_extents, ze);
00306         return (0);
00307 }
00308 
00309 static void
00310 zvol_free_extents(zvol_state_t *zv)
00311 {
00312         zvol_extent_t *ze;
00313 
00314         while (ze = list_head(&zv->zv_extents)) {
00315                 list_remove(&zv->zv_extents, ze);
00316                 kmem_free(ze, sizeof (zvol_extent_t));
00317         }
00318 }
00319 
00320 static int
00321 zvol_get_lbas(zvol_state_t *zv)
00322 {
00323         objset_t *os = zv->zv_objset;
00324         struct maparg   ma;
00325         int             err;
00326 
00327         ma.ma_zv = zv;
00328         ma.ma_blks = 0;
00329         zvol_free_extents(zv);
00330 
00331         /* commit any in-flight changes before traversing the dataset */
00332         txg_wait_synced(dmu_objset_pool(os), 0);
00333         err = traverse_dataset(dmu_objset_ds(os), 0,
00334             TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma);
00335         if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) {
00336                 zvol_free_extents(zv);
00337                 return (err ? err : EIO);
00338         }
00339 
00340         return (0);
00341 }
00342 
00343 /* ARGSUSED */
00344 void
00345 zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
00346 {
00347         zfs_creat_t *zct = arg;
00348         nvlist_t *nvprops = zct->zct_props;
00349         int error;
00350         uint64_t volblocksize, volsize;
00351 
00352         VERIFY(nvlist_lookup_uint64(nvprops,
00353             zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
00354         if (nvlist_lookup_uint64(nvprops,
00355             zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
00356                 volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
00357 
00358         /*
00359          * These properties must be removed from the list so the generic
00360          * property setting step won't apply to them.
00361          */
00362         VERIFY(nvlist_remove_all(nvprops,
00363             zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
00364         (void) nvlist_remove_all(nvprops,
00365             zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
00366 
00367         error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
00368             DMU_OT_NONE, 0, tx);
00369         ASSERT(error == 0);
00370 
00371         error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
00372             DMU_OT_NONE, 0, tx);
00373         ASSERT(error == 0);
00374 
00375         error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
00376         ASSERT(error == 0);
00377 }
00378 
00383 static int
00384 zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
00385 {
00386         objset_t *os = zv->zv_objset;
00387         char *data = (char *)(lr + 1);  /* data follows lr_write_t */
00388         uint64_t offset, length;
00389         dmu_tx_t *tx;
00390         int error;
00391 
00392         if (byteswap)
00393                 byteswap_uint64_array(lr, sizeof (*lr));
00394 
00395         offset = lr->lr_offset;
00396         length = lr->lr_length;
00397 
00398         /* If it's a dmu_sync() block, write the whole block */
00399         if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
00400                 uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
00401                 if (length < blocksize) {
00402                         offset -= offset % blocksize;
00403                         length = blocksize;
00404                 }
00405         }
00406 
00407         tx = dmu_tx_create(os);
00408         dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length);
00409         error = dmu_tx_assign(tx, TXG_WAIT);
00410         if (error) {
00411                 dmu_tx_abort(tx);
00412         } else {
00413                 dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
00414                 dmu_tx_commit(tx);
00415         }
00416 
00417         return (error);
00418 }
00419 
00420 /* ARGSUSED */
00421 static int
00422 zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
00423 {
00424         return (ENOTSUP);
00425 }
00426 
00431 zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
00432         zvol_replay_err,        
00433         zvol_replay_err,        
00434         zvol_replay_err,        
00435         zvol_replay_err,        
00436         zvol_replay_err,        
00437         zvol_replay_err,        
00438         zvol_replay_err,        
00439         zvol_replay_err,        
00440         zvol_replay_err,        
00441         zvol_replay_write,      
00442         zvol_replay_err,        
00443         zvol_replay_err,        
00444         zvol_replay_err,        
00445         zvol_replay_err,        
00446         zvol_replay_err,        
00447         zvol_replay_err,        
00448         zvol_replay_err,        
00449         zvol_replay_err,        
00450         zvol_replay_err,        
00451         zvol_replay_err,        
00452 };
00453 
00454 #ifdef sun
00455 int
00456 zvol_name2minor(const char *name, minor_t *minor)
00457 {
00458         zvol_state_t *zv;
00459 
00460         mutex_enter(&spa_namespace_lock);
00461         zv = zvol_minor_lookup(name);
00462         if (minor && zv)
00463                 *minor = zv->zv_minor;
00464         mutex_exit(&spa_namespace_lock);
00465         return (zv ? 0 : -1);
00466 }
00467 #endif  /* sun */
00468 
00472 int
00473 zvol_create_minor(const char *name)
00474 {
00475         zfs_soft_state_t *zs;
00476         zvol_state_t *zv;
00477         objset_t *os;
00478         dmu_object_info_t doi;
00479         uint64_t volsize;
00480         int error;
00481 
00482         ZFS_LOG(1, "Creating ZVOL %s...", name);
00483 
00484         mutex_enter(&spa_namespace_lock);
00485 
00486         if (zvol_minor_lookup(name) != NULL) {
00487                 mutex_exit(&spa_namespace_lock);
00488                 return (EEXIST);
00489         }
00490 
00491         /* lie and say we're read-only */
00492         error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os);
00493 
00494         if (error) {
00495                 mutex_exit(&spa_namespace_lock);
00496                 return (error);
00497         }
00498 
00499 #ifdef sun
00500         if ((minor = zfsdev_minor_alloc()) == 0) {
00501                 dmu_objset_disown(os, FTAG);
00502                 mutex_exit(&spa_namespace_lock);
00503                 return (ENXIO);
00504         }
00505 
00506         if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) {
00507                 dmu_objset_disown(os, FTAG);
00508                 mutex_exit(&spa_namespace_lock);
00509                 return (EAGAIN);
00510         }
00511         (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME,
00512             (char *)name);
00513 
00514         (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor);
00515 
00516         if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
00517             minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
00518                 ddi_soft_state_free(zfsdev_state, minor);
00519                 dmu_objset_disown(os, FTAG);
00520                 mutex_exit(&spa_namespace_lock);
00521                 return (EAGAIN);
00522         }
00523 
00524         (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor);
00525 
00526         if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK,
00527             minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
00528                 ddi_remove_minor_node(zfs_dip, chrbuf);
00529                 ddi_soft_state_free(zfsdev_state, minor);
00530                 dmu_objset_disown(os, FTAG);
00531                 mutex_exit(&spa_namespace_lock);
00532                 return (EAGAIN);
00533         }
00534 
00535         zs = ddi_get_soft_state(zfsdev_state, minor);
00536         zs->zss_type = ZSST_ZVOL;
00537         zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
00538 #else   /* !sun */
00539 
00540         error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
00541         if (error) {
00542                 ASSERT(error == 0);
00543                 dmu_objset_disown(os, zvol_tag);
00544                 mutex_exit(&spa_namespace_lock);
00545                 return (error);
00546         }
00547 
00548         DROP_GIANT();
00549         g_topology_lock();
00550         zv = zvol_geom_create(name);
00551         zv->zv_volsize = volsize;
00552         zv->zv_provider->mediasize = zv->zv_volsize;
00553 
00554 #endif  /* !sun */
00555 
00556         (void) strlcpy(zv->zv_name, name, MAXPATHLEN);
00557         zv->zv_min_bs = DEV_BSHIFT;
00558         zv->zv_objset = os;
00559         if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os)))
00560                 zv->zv_flags |= ZVOL_RDONLY;
00561         mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
00562         avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
00563             sizeof (rl_t), offsetof(rl_t, r_node));
00564         list_create(&zv->zv_extents, sizeof (zvol_extent_t),
00565             offsetof(zvol_extent_t, ze_node));
00566         /* get and cache the blocksize */
00567         error = dmu_object_info(os, ZVOL_OBJ, &doi);
00568         ASSERT(error == 0);
00569         zv->zv_volblocksize = doi.doi_data_block_size;
00570 
00571         if (spa_writeable(dmu_objset_spa(os))) {
00572                 if (zil_replay_disable)
00573                         zil_destroy(dmu_objset_zil(os), B_FALSE);
00574                 else
00575                         zil_replay(os, zv, zvol_replay_vector);
00576         }
00577         dmu_objset_disown(os, FTAG);
00578         zv->zv_objset = NULL;
00579 
00580         zvol_minors++;
00581 
00582         mutex_exit(&spa_namespace_lock);
00583 
00584         zvol_geom_run(zv);
00585 
00586         g_topology_unlock();
00587         PICKUP_GIANT();
00588 
00589         ZFS_LOG(1, "ZVOL %s created.", name);
00590 
00591         return (0);
00592 }
00593 
00597 static int
00598 zvol_remove_zv(zvol_state_t *zv)
00599 {
00600 #ifdef sun
00601         minor_t minor = zv->zv_minor;
00602 #endif
00603 
00604         ASSERT(MUTEX_HELD(&spa_namespace_lock));
00605         if (zv->zv_total_opens != 0)
00606                 return (EBUSY);
00607 
00608         ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name);
00609 
00610 #ifdef sun
00611         (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor);
00612         ddi_remove_minor_node(zfs_dip, nmbuf);
00613 #endif  /* sun */
00614 
00615         avl_destroy(&zv->zv_znode.z_range_avl);
00616         mutex_destroy(&zv->zv_znode.z_range_lock);
00617 
00618         zvol_geom_destroy(zv);
00619 
00620         zvol_minors--;
00621         return (0);
00622 }
00623 
00624 int
00625 zvol_remove_minor(const char *name)
00626 {
00627         zvol_state_t *zv;
00628         int rc;
00629 
00630         mutex_enter(&spa_namespace_lock);
00631         if ((zv = zvol_minor_lookup(name)) == NULL) {
00632                 mutex_exit(&spa_namespace_lock);
00633                 return (ENXIO);
00634         }
00635         g_topology_lock();
00636         rc = zvol_remove_zv(zv);
00637         g_topology_unlock();
00638         mutex_exit(&spa_namespace_lock);
00639         return (rc);
00640 }
00641 
00642 int
00643 zvol_first_open(zvol_state_t *zv)
00644 {
00645         objset_t *os;
00646         uint64_t volsize;
00647         int error;
00648         uint64_t readonly;
00649 
00650         /* lie and say we're read-only */
00651         error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE,
00652             zvol_tag, &os);
00653         if (error)
00654                 return (error);
00655 
00656         error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
00657         if (error) {
00658                 ASSERT(error == 0);
00659                 dmu_objset_disown(os, zvol_tag);
00660                 return (error);
00661         }
00662         zv->zv_objset = os;
00663         error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
00664         if (error) {
00665                 dmu_objset_disown(os, zvol_tag);
00666                 return (error);
00667         }
00668         zv->zv_volsize = volsize;
00669         zv->zv_zilog = zil_open(os, zvol_get_data);
00670         zvol_size_changed(zv);
00671 
00672         VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly,
00673             NULL) == 0);
00674         if (readonly || dmu_objset_is_snapshot(os) ||
00675             !spa_writeable(dmu_objset_spa(os)))
00676                 zv->zv_flags |= ZVOL_RDONLY;
00677         else
00678                 zv->zv_flags &= ~ZVOL_RDONLY;
00679         return (error);
00680 }
00681 
00682 void
00683 zvol_last_close(zvol_state_t *zv)
00684 {
00685         zil_close(zv->zv_zilog);
00686         zv->zv_zilog = NULL;
00687 
00688         dmu_buf_rele(zv->zv_dbuf, zvol_tag);
00689         zv->zv_dbuf = NULL;
00690 
00691         /*
00692          * Evict cached data
00693          */
00694         if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
00695             !(zv->zv_flags & ZVOL_RDONLY))
00696                 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
00697         (void) dmu_objset_evict_dbufs(zv->zv_objset);
00698 
00699         dmu_objset_disown(zv->zv_objset, zvol_tag);
00700         zv->zv_objset = NULL;
00701 }
00702 
00703 #ifdef sun
00704 int
00705 zvol_prealloc(zvol_state_t *zv)
00706 {
00707         objset_t *os = zv->zv_objset;
00708         dmu_tx_t *tx;
00709         uint64_t refd, avail, usedobjs, availobjs;
00710         uint64_t resid = zv->zv_volsize;
00711         uint64_t off = 0;
00712 
00713         /* Check the space usage before attempting to allocate the space */
00714         dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs);
00715         if (avail < zv->zv_volsize)
00716                 return (ENOSPC);
00717 
00718         /* Free old extents if they exist */
00719         zvol_free_extents(zv);
00720 
00721         while (resid != 0) {
00722                 int error;
00723                 uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE);
00724 
00725                 tx = dmu_tx_create(os);
00726                 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
00727                 error = dmu_tx_assign(tx, TXG_WAIT);
00728                 if (error) {
00729                         dmu_tx_abort(tx);
00730                         (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off);
00731                         return (error);
00732                 }
00733                 dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx);
00734                 dmu_tx_commit(tx);
00735                 off += bytes;
00736                 resid -= bytes;
00737         }
00738         txg_wait_synced(dmu_objset_pool(os), 0);
00739 
00740         return (0);
00741 }
00742 #endif  /* sun */
00743 
00744 int
00745 zvol_update_volsize(objset_t *os, uint64_t volsize)
00746 {
00747         dmu_tx_t *tx;
00748         int error;
00749 
00750         ASSERT(MUTEX_HELD(&spa_namespace_lock));
00751 
00752         tx = dmu_tx_create(os);
00753         dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
00754         error = dmu_tx_assign(tx, TXG_WAIT);
00755         if (error) {
00756                 dmu_tx_abort(tx);
00757                 return (error);
00758         }
00759 
00760         error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
00761             &volsize, tx);
00762         dmu_tx_commit(tx);
00763 
00764         if (error == 0)
00765                 error = dmu_free_long_range(os,
00766                     ZVOL_OBJ, volsize, DMU_OBJECT_END);
00767         return (error);
00768 }
00769 
00770 void
00771 zvol_remove_minors(const char *name)
00772 {
00773         struct g_geom *gp, *gptmp;
00774         struct g_provider *pp;
00775         zvol_state_t *zv;
00776         size_t namelen;
00777 
00778         namelen = strlen(name);
00779 
00780         DROP_GIANT();
00781         mutex_enter(&spa_namespace_lock);
00782         g_topology_lock();
00783 
00784         LIST_FOREACH_SAFE(gp, &zfs_zvol_class.geom, geom, gptmp) {
00785                 pp = LIST_FIRST(&gp->provider);
00786                 if (pp == NULL)
00787                         continue;
00788                 zv = pp->private;
00789                 if (zv == NULL)
00790                         continue;
00791                 if (strcmp(zv->zv_name, name) == 0 ||
00792                     (strncmp(zv->zv_name, name, namelen) == 0 &&
00793                      zv->zv_name[namelen] == '/')) {
00794                         (void) zvol_remove_zv(zv);
00795                 }
00796         }
00797 
00798         g_topology_unlock();
00799         mutex_exit(&spa_namespace_lock);
00800         PICKUP_GIANT();
00801 }
00802 
00803 int
00804 zvol_set_volsize(const char *name, major_t maj, uint64_t volsize)
00805 {
00806         zvol_state_t *zv = NULL;
00807         objset_t *os;
00808         int error;
00809         dmu_object_info_t doi;
00810         uint64_t old_volsize = 0ULL;
00811         uint64_t readonly;
00812 
00813         mutex_enter(&spa_namespace_lock);
00814         zv = zvol_minor_lookup(name);
00815         if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
00816                 mutex_exit(&spa_namespace_lock);
00817                 return (error);
00818         }
00819 
00820         if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 ||
00821             (error = zvol_check_volsize(volsize,
00822             doi.doi_data_block_size)) != 0)
00823                 goto out;
00824 
00825         VERIFY(dsl_prop_get_integer(name, "readonly", &readonly,
00826             NULL) == 0);
00827         if (readonly) {
00828                 error = EROFS;
00829                 goto out;
00830         }
00831 
00832         error = zvol_update_volsize(os, volsize);
00833         /*
00834          * Reinitialize the dump area to the new size. If we
00835          * failed to resize the dump area then restore it back to
00836          * its original size.
00837          */
00838         if (zv && error == 0) {
00839 #ifdef ZVOL_DUMP
00840                 if (zv->zv_flags & ZVOL_DUMPIFIED) {
00841                         old_volsize = zv->zv_volsize;
00842                         zv->zv_volsize = volsize;
00843                         if ((error = zvol_dumpify(zv)) != 0 ||
00844                             (error = dumpvp_resize()) != 0) {
00845                                 (void) zvol_update_volsize(os, old_volsize);
00846                                 zv->zv_volsize = old_volsize;
00847                                 error = zvol_dumpify(zv);
00848                         }
00849                 }
00850 #endif  /* ZVOL_DUMP */
00851                 if (error == 0) {
00852                         zv->zv_volsize = volsize;
00853                         zvol_size_changed(zv);
00854                 }
00855         }
00856 
00857 #ifdef sun
00858         /*
00859          * Generate a LUN expansion event.
00860          */
00861         if (zv && error == 0) {
00862                 sysevent_id_t eid;
00863                 nvlist_t *attr;
00864                 char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
00865 
00866                 (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV,
00867                     zv->zv_minor);
00868 
00869                 VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
00870                 VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
00871 
00872                 (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
00873                     ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
00874 
00875                 nvlist_free(attr);
00876                 kmem_free(physpath, MAXPATHLEN);
00877         }
00878 #endif  /* sun */
00879 
00880 out:
00881         dmu_objset_rele(os, FTAG);
00882 
00883         mutex_exit(&spa_namespace_lock);
00884 
00885         return (error);
00886 }
00887 
00888 /*ARGSUSED*/
00889 static int
00890 zvol_open(struct g_provider *pp, int flag, int count)
00891 {
00892         zvol_state_t *zv;
00893         int err = 0;
00894         boolean_t locked = B_FALSE;
00895 
00896         /*
00897          * Protect against recursively entering spa_namespace_lock
00898          * when spa_open() is used for a pool on a (local) ZVOL(s).
00899          * This is needed since we replaced upstream zfsdev_state_lock
00900          * with spa_namespace_lock in the ZVOL code.
00901          * We are using the same trick as spa_open().
00902          * Note that calls in zvol_first_open which need to resolve
00903          * pool name to a spa object will enter spa_open()
00904          * recursively, but that function already has all the
00905          * necessary protection.
00906          */
00907         if (!MUTEX_HELD(&spa_namespace_lock)) {
00908                 mutex_enter(&spa_namespace_lock);
00909                 locked = B_TRUE;
00910         }
00911 
00912         zv = pp->private;
00913         if (zv == NULL) {
00914                 if (locked)
00915                         mutex_exit(&spa_namespace_lock);
00916                 return (ENXIO);
00917         }
00918 
00919         if (zv->zv_total_opens == 0)
00920                 err = zvol_first_open(zv);
00921         if (err) {
00922                 if (locked)
00923                         mutex_exit(&spa_namespace_lock);
00924                 return (err);
00925         }
00926         if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
00927                 err = EROFS;
00928                 goto out;
00929         }
00930         if (zv->zv_flags & ZVOL_EXCL) {
00931                 err = EBUSY;
00932                 goto out;
00933         }
00934 #ifdef FEXCL
00935         if (flag & FEXCL) {
00936                 if (zv->zv_total_opens != 0) {
00937                         err = EBUSY;
00938                         goto out;
00939                 }
00940                 zv->zv_flags |= ZVOL_EXCL;
00941         }
00942 #endif
00943 
00944         zv->zv_total_opens += count;
00945         if (locked)
00946                 mutex_exit(&spa_namespace_lock);
00947 
00948         return (err);
00949 out:
00950         if (zv->zv_total_opens == 0)
00951                 zvol_last_close(zv);
00952         if (locked)
00953                 mutex_exit(&spa_namespace_lock);
00954         return (err);
00955 }
00956 
00957 /*ARGSUSED*/
00958 static int
00959 zvol_close(struct g_provider *pp, int flag, int count)
00960 {
00961         zvol_state_t *zv;
00962         int error = 0;
00963         boolean_t locked = B_FALSE;
00964 
00965         /* See comment in zvol_open(). */
00966         if (!MUTEX_HELD(&spa_namespace_lock)) {
00967                 mutex_enter(&spa_namespace_lock);
00968                 locked = B_TRUE;
00969         }
00970 
00971         zv = pp->private;
00972         if (zv == NULL) {
00973                 if (locked)
00974                         mutex_exit(&spa_namespace_lock);
00975                 return (ENXIO);
00976         }
00977 
00978         if (zv->zv_flags & ZVOL_EXCL) {
00979                 ASSERT(zv->zv_total_opens == 1);
00980                 zv->zv_flags &= ~ZVOL_EXCL;
00981         }
00982 
00983         /*
00984          * If the open count is zero, this is a spurious close.
00985          * That indicates a bug in the kernel / DDI framework.
00986          */
00987         ASSERT(zv->zv_total_opens != 0);
00988 
00989         /*
00990          * You may get multiple opens, but only one close.
00991          */
00992         zv->zv_total_opens -= count;
00993 
00994         if (zv->zv_total_opens == 0)
00995                 zvol_last_close(zv);
00996 
00997         if (locked)
00998                 mutex_exit(&spa_namespace_lock);
00999         return (error);
01000 }
01001 
01002 static void
01003 zvol_get_done(zgd_t *zgd, int error)
01004 {
01005         if (zgd->zgd_db)
01006                 dmu_buf_rele(zgd->zgd_db, zgd);
01007 
01008         zfs_range_unlock(zgd->zgd_rl);
01009 
01010         if (error == 0 && zgd->zgd_bp)
01011                 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
01012 
01013         kmem_free(zgd, sizeof (zgd_t));
01014 }
01015 
01019 static int
01020 zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
01021 {
01022         zvol_state_t *zv = arg;
01023         objset_t *os = zv->zv_objset;
01024         uint64_t object = ZVOL_OBJ;
01025         uint64_t offset = lr->lr_offset;
01026         uint64_t size = lr->lr_length;  /* length of user data */
01027         blkptr_t *bp = &lr->lr_blkptr;
01028         dmu_buf_t *db;
01029         zgd_t *zgd;
01030         int error;
01031 
01032         ASSERT(zio != NULL);
01033         ASSERT(size != 0);
01034 
01035         zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
01036         zgd->zgd_zilog = zv->zv_zilog;
01037         zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
01038 
01039         /*
01040          * Write records come in two flavors: immediate and indirect.
01041          * For small writes it's cheaper to store the data with the
01042          * log record (immediate); for large writes it's cheaper to
01043          * sync the data and get a pointer to it (indirect) so that
01044          * we don't have to write the data twice.
01045          */
01046         if (buf != NULL) {      /* immediate write */
01047                 error = dmu_read(os, object, offset, size, buf,
01048                     DMU_READ_NO_PREFETCH);
01049         } else {
01050                 size = zv->zv_volblocksize;
01051                 offset = P2ALIGN(offset, size);
01052                 error = dmu_buf_hold(os, object, offset, zgd, &db,
01053                     DMU_READ_NO_PREFETCH);
01054                 if (error == 0) {
01055                         zgd->zgd_db = db;
01056                         zgd->zgd_bp = bp;
01057 
01058                         ASSERT(db->db_offset == offset);
01059                         ASSERT(db->db_size == size);
01060 
01061                         error = dmu_sync(zio, lr->lr_common.lrc_txg,
01062                             zvol_get_done, zgd);
01063 
01064                         if (error == 0)
01065                                 return (0);
01066                 }
01067         }
01068 
01069         zvol_get_done(zgd, error);
01070 
01071         return (error);
01072 }
01073 
01074 ssize_t zvol_immediate_write_sz = 32768;
01081 static void
01082 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
01083     boolean_t sync)
01084 {
01085         uint32_t blocksize = zv->zv_volblocksize;
01086         zilog_t *zilog = zv->zv_zilog;
01087         boolean_t slogging;
01088         ssize_t immediate_write_sz;
01089 
01090         if (zil_replaying(zilog, tx))
01091                 return;
01092 
01093         immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
01094             ? 0 : zvol_immediate_write_sz;
01095 
01096         slogging = spa_has_slogs(zilog->zl_spa) &&
01097             (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
01098 
01099         while (resid) {
01100                 itx_t *itx;
01101                 lr_write_t *lr;
01102                 ssize_t len;
01103                 itx_wr_state_t write_state;
01104 
01105                 /*
01106                  * Unlike zfs_log_write() we can be called with
01107                  * upto DMU_MAX_ACCESS/2 (5MB) writes.
01108                  */
01109                 if (blocksize > immediate_write_sz && !slogging &&
01110                     resid >= blocksize && off % blocksize == 0) {
01111                         write_state = WR_INDIRECT; /* uses dmu_sync */
01112                         len = blocksize;
01113                 } else if (sync) {
01114                         write_state = WR_COPIED;
01115                         len = MIN(ZIL_MAX_LOG_DATA, resid);
01116                 } else {
01117                         write_state = WR_NEED_COPY;
01118                         len = MIN(ZIL_MAX_LOG_DATA, resid);
01119                 }
01120 
01121                 itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
01122                     (write_state == WR_COPIED ? len : 0));
01123                 lr = (lr_write_t *)&itx->itx_lr;
01124                 if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
01125                     ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
01126                         zil_itx_destroy(itx);
01127                         itx = zil_itx_create(TX_WRITE, sizeof (*lr));
01128                         lr = (lr_write_t *)&itx->itx_lr;
01129                         write_state = WR_NEED_COPY;
01130                 }
01131 
01132                 itx->itx_wr_state = write_state;
01133                 if (write_state == WR_NEED_COPY)
01134                         itx->itx_sod += len;
01135                 lr->lr_foid = ZVOL_OBJ;
01136                 lr->lr_offset = off;
01137                 lr->lr_length = len;
01138                 lr->lr_blkoff = 0;
01139                 BP_ZERO(&lr->lr_blkptr);
01140 
01141                 itx->itx_private = zv;
01142                 itx->itx_sync = sync;
01143 
01144                 zil_itx_assign(zilog, itx, tx);
01145 
01146                 off += len;
01147                 resid -= len;
01148         }
01149 }
01150 
01151 #ifdef sun
01152 static int
01153 zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size,
01154     boolean_t doread, boolean_t isdump)
01155 {
01156         vdev_disk_t *dvd;
01157         int c;
01158         int numerrors = 0;
01159 
01160         for (c = 0; c < vd->vdev_children; c++) {
01161                 ASSERT(vd->vdev_ops == &vdev_mirror_ops ||
01162                     vd->vdev_ops == &vdev_replacing_ops ||
01163                     vd->vdev_ops == &vdev_spare_ops);
01164                 int err = zvol_dumpio_vdev(vd->vdev_child[c],
01165                     addr, offset, size, doread, isdump);
01166                 if (err != 0) {
01167                         numerrors++;
01168                 } else if (doread) {
01169                         break;
01170                 }
01171         }
01172 
01173         if (!vd->vdev_ops->vdev_op_leaf)
01174                 return (numerrors < vd->vdev_children ? 0 : EIO);
01175 
01176         if (doread && !vdev_readable(vd))
01177                 return (EIO);
01178         else if (!doread && !vdev_writeable(vd))
01179                 return (EIO);
01180 
01181         dvd = vd->vdev_tsd;
01182         ASSERT3P(dvd, !=, NULL);
01183         offset += VDEV_LABEL_START_SIZE;
01184 
01185         if (ddi_in_panic() || isdump) {
01186                 ASSERT(!doread);
01187                 if (doread)
01188                         return (EIO);
01189                 return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
01190                     lbtodb(size)));
01191         } else {
01192                 return (vdev_disk_physio(dvd->vd_lh, addr, size, offset,
01193                     doread ? B_READ : B_WRITE));
01194         }
01195 }
01196 
01197 static int
01198 zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
01199     boolean_t doread, boolean_t isdump)
01200 {
01201         vdev_t *vd;
01202         int error;
01203         zvol_extent_t *ze;
01204         spa_t *spa = dmu_objset_spa(zv->zv_objset);
01205 
01206         /* Must be sector aligned, and not stradle a block boundary. */
01207         if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) ||
01208             P2BOUNDARY(offset, size, zv->zv_volblocksize)) {
01209                 return (EINVAL);
01210         }
01211         ASSERT(size <= zv->zv_volblocksize);
01212 
01213         /* Locate the extent this belongs to */
01214         ze = list_head(&zv->zv_extents);
01215         while (offset >= ze->ze_nblks * zv->zv_volblocksize) {
01216                 offset -= ze->ze_nblks * zv->zv_volblocksize;
01217                 ze = list_next(&zv->zv_extents, ze);
01218         }
01219 
01220         if (!ddi_in_panic())
01221                 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
01222 
01223         vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva));
01224         offset += DVA_GET_OFFSET(&ze->ze_dva);
01225         error = zvol_dumpio_vdev(vd, addr, offset, size, doread, isdump);
01226 
01227         if (!ddi_in_panic())
01228                 spa_config_exit(spa, SCL_STATE, FTAG);
01229 
01230         return (error);
01231 }
01232 #endif  /* sun */
01233 
01234 int
01235 zvol_strategy(struct bio *bp)
01236 {
01237         zvol_state_t *zv = bp->bio_to->private;
01238         uint64_t off, volsize;
01239         size_t resid;
01240         char *addr;
01241         objset_t *os;
01242         rl_t *rl;
01243         int error = 0;
01244         boolean_t doread = (bp->bio_cmd == BIO_READ);
01245         boolean_t sync;
01246 
01247         if (zv == NULL) {
01248                 g_io_deliver(bp, ENXIO);
01249                 return (0);
01250         }
01251 
01252         if (bp->bio_cmd != BIO_READ && (zv->zv_flags & ZVOL_RDONLY)) {
01253                 g_io_deliver(bp, EROFS);
01254                 return (0);
01255         }
01256 
01257         off = bp->bio_offset;
01258         volsize = zv->zv_volsize;
01259 
01260         os = zv->zv_objset;
01261         ASSERT(os != NULL);
01262 
01263         addr = bp->bio_data;
01264         resid = bp->bio_length;
01265 
01266         if (resid > 0 && (off < 0 || off >= volsize)) {
01267                 g_io_deliver(bp, EIO);
01268                 return (0);
01269         }
01270 
01271         sync = !doread && zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
01272 
01273         /*
01274          * There must be no buffer changes when doing a dmu_sync() because
01275          * we can't change the data whilst calculating the checksum.
01276          */
01277         rl = zfs_range_lock(&zv->zv_znode, off, resid,
01278             doread ? RL_READER : RL_WRITER);
01279 
01280         while (resid != 0 && off < volsize) {
01281                 size_t size = MIN(resid, zvol_maxphys);
01282                 if (doread) {
01283                         error = dmu_read(os, ZVOL_OBJ, off, size, addr,
01284                             DMU_READ_PREFETCH);
01285                 } else {
01286                         dmu_tx_t *tx = dmu_tx_create(os);
01287                         dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
01288                         error = dmu_tx_assign(tx, TXG_WAIT);
01289                         if (error) {
01290                                 dmu_tx_abort(tx);
01291                         } else {
01292                                 dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
01293                                 zvol_log_write(zv, tx, off, size, sync);
01294                                 dmu_tx_commit(tx);
01295                         }
01296                 }
01297                 if (error) {
01298                         /* convert checksum errors into IO errors */
01299                         if (error == ECKSUM)
01300                                 error = EIO;
01301                         break;
01302                 }
01303                 off += size;
01304                 addr += size;
01305                 resid -= size;
01306         }
01307         zfs_range_unlock(rl);
01308 
01309         bp->bio_completed = bp->bio_length - resid;
01310         if (bp->bio_completed < bp->bio_length)
01311                 bp->bio_error = (off > volsize ? EINVAL : error);
01312 
01313         if (sync)
01314                 zil_commit(zv->zv_zilog, ZVOL_OBJ);
01315         g_io_deliver(bp, 0);
01316 
01317         return (0);
01318 }
01319 
01320 #ifdef sun
01321 
01329 void
01330 zvol_minphys(struct buf *bp)
01331 {
01332         if (bp->b_bcount > zvol_maxphys)
01333                 bp->b_bcount = zvol_maxphys;
01334 }
01335 
01336 int
01337 zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks)
01338 {
01339         minor_t minor = getminor(dev);
01340         zvol_state_t *zv;
01341         int error = 0;
01342         uint64_t size;
01343         uint64_t boff;
01344         uint64_t resid;
01345 
01346         zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
01347         if (zv == NULL)
01348                 return (ENXIO);
01349 
01350         boff = ldbtob(blkno);
01351         resid = ldbtob(nblocks);
01352 
01353         VERIFY3U(boff + resid, <=, zv->zv_volsize);
01354 
01355         while (resid) {
01356                 size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff);
01357                 error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE);
01358                 if (error)
01359                         break;
01360                 boff += size;
01361                 addr += size;
01362                 resid -= size;
01363         }
01364 
01365         return (error);
01366 }
01367 
01368 /*ARGSUSED*/
01369 int
01370 zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
01371 {
01372         minor_t minor = getminor(dev);
01373         zvol_state_t *zv;
01374         uint64_t volsize;
01375         rl_t *rl;
01376         int error = 0;
01377 
01378         zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
01379         if (zv == NULL)
01380                 return (ENXIO);
01381 
01382         volsize = zv->zv_volsize;
01383         if (uio->uio_resid > 0 &&
01384             (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
01385                 return (EIO);
01386 
01387         if (zv->zv_flags & ZVOL_DUMPIFIED) {
01388                 error = physio(zvol_strategy, NULL, dev, B_READ,
01389                     zvol_minphys, uio);
01390                 return (error);
01391         }
01392 
01393         rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
01394             RL_READER);
01395         while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
01396                 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
01397 
01398                 /* don't read past the end */
01399                 if (bytes > volsize - uio->uio_loffset)
01400                         bytes = volsize - uio->uio_loffset;
01401 
01402                 error =  dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes);
01403                 if (error) {
01404                         /* convert checksum errors into IO errors */
01405                         if (error == ECKSUM)
01406                                 error = EIO;
01407                         break;
01408                 }
01409         }
01410         zfs_range_unlock(rl);
01411         return (error);
01412 }
01413 
01414 /*ARGSUSED*/
01415 int
01416 zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
01417 {
01418         minor_t minor = getminor(dev);
01419         zvol_state_t *zv;
01420         uint64_t volsize;
01421         rl_t *rl;
01422         int error = 0;
01423         boolean_t sync;
01424 
01425         zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
01426         if (zv == NULL)
01427                 return (ENXIO);
01428 
01429         volsize = zv->zv_volsize;
01430         if (uio->uio_resid > 0 &&
01431             (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
01432                 return (EIO);
01433 
01434         if (zv->zv_flags & ZVOL_DUMPIFIED) {
01435                 error = physio(zvol_strategy, NULL, dev, B_WRITE,
01436                     zvol_minphys, uio);
01437                 return (error);
01438         }
01439 
01440         sync = !(zv->zv_flags & ZVOL_WCE) ||
01441             (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
01442 
01443         rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
01444             RL_WRITER);
01445         while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
01446                 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
01447                 uint64_t off = uio->uio_loffset;
01448                 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
01449 
01450                 if (bytes > volsize - off)      /* don't write past the end */
01451                         bytes = volsize - off;
01452 
01453                 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
01454                 error = dmu_tx_assign(tx, TXG_WAIT);
01455                 if (error) {
01456                         dmu_tx_abort(tx);
01457                         break;
01458                 }
01459                 error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx);
01460                 if (error == 0)
01461                         zvol_log_write(zv, tx, off, bytes, sync);
01462                 dmu_tx_commit(tx);
01463 
01464                 if (error)
01465                         break;
01466         }
01467         zfs_range_unlock(rl);
01468         if (sync)
01469                 zil_commit(zv->zv_zilog, ZVOL_OBJ);
01470         return (error);
01471 }
01472 
01473 int
01474 zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs)
01475 {
01476         struct uuid uuid = EFI_RESERVED;
01477         efi_gpe_t gpe = { 0 };
01478         uint32_t crc;
01479         dk_efi_t efi;
01480         int length;
01481         char *ptr;
01482 
01483         if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag))
01484                 return (EFAULT);
01485         ptr = (char *)(uintptr_t)efi.dki_data_64;
01486         length = efi.dki_length;
01487         /*
01488          * Some clients may attempt to request a PMBR for the
01489          * zvol.  Currently this interface will return EINVAL to
01490          * such requests.  These requests could be supported by
01491          * adding a check for lba == 0 and consing up an appropriate
01492          * PMBR.
01493          */
01494         if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0)
01495                 return (EINVAL);
01496 
01497         gpe.efi_gpe_StartingLBA = LE_64(34ULL);
01498         gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1);
01499         UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid);
01500 
01501         if (efi.dki_lba == 1) {
01502                 efi_gpt_t gpt = { 0 };
01503 
01504                 gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE);
01505                 gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT);
01506                 gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt));
01507                 gpt.efi_gpt_MyLBA = LE_64(1ULL);
01508                 gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL);
01509                 gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1);
01510                 gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL);
01511                 gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1);
01512                 gpt.efi_gpt_SizeOfPartitionEntry =
01513                     LE_32(sizeof (efi_gpe_t));
01514                 CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table);
01515                 gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc);
01516                 CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table);
01517                 gpt.efi_gpt_HeaderCRC32 = LE_32(~crc);
01518                 if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length),
01519                     flag))
01520                         return (EFAULT);
01521                 ptr += sizeof (gpt);
01522                 length -= sizeof (gpt);
01523         }
01524         if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe),
01525             length), flag))
01526                 return (EFAULT);
01527         return (0);
01528 }
01529 
01539 int
01540 zvol_get_volume_params(minor_t minor, uint64_t *blksize,
01541     uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl,
01542     void **rl_hdl, void **bonus_hdl)
01543 {
01544         zvol_state_t *zv;
01545 
01546         zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
01547         if (zv == NULL)
01548                 return (ENXIO);
01549         if (zv->zv_flags & ZVOL_DUMPIFIED)
01550                 return (ENXIO);
01551 
01552         ASSERT(blksize && max_xfer_len && minor_hdl &&
01553             objset_hdl && zil_hdl && rl_hdl && bonus_hdl);
01554 
01555         *blksize = zv->zv_volblocksize;
01556         *max_xfer_len = (uint64_t)zvol_maxphys;
01557         *minor_hdl = zv;
01558         *objset_hdl = zv->zv_objset;
01559         *zil_hdl = zv->zv_zilog;
01560         *rl_hdl = &zv->zv_znode;
01561         *bonus_hdl = zv->zv_dbuf;
01562         return (0);
01563 }
01564 
01570 uint64_t
01571 zvol_get_volume_size(void *minor_hdl)
01572 {
01573         zvol_state_t *zv = minor_hdl;
01574 
01575         return (zv->zv_volsize);
01576 }
01577 
01582 int
01583 zvol_get_volume_wce(void *minor_hdl)
01584 {
01585         zvol_state_t *zv = minor_hdl;
01586 
01587         return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0);
01588 }
01589 
01593 void
01594 zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid,
01595     boolean_t sync)
01596 {
01597         zvol_state_t *zv = minor_hdl;
01598 
01599         zvol_log_write(zv, tx, off, resid, sync);
01600 }
01609 /*ARGSUSED*/
01610 int
01611 zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
01612 {
01613         zvol_state_t *zv;
01614         struct dk_cinfo dki;
01615         struct dk_minfo dkm;
01616         struct dk_callback *dkc;
01617         int error = 0;
01618         rl_t *rl;
01619 
01620         mutex_enter(&spa_namespace_lock);
01621 
01622         zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL);
01623 
01624         if (zv == NULL) {
01625                 mutex_exit(&spa_namespace_lock);
01626                 return (ENXIO);
01627         }
01628         ASSERT(zv->zv_total_opens > 0);
01629 
01630         switch (cmd) {
01631 
01632         case DKIOCINFO:
01633                 bzero(&dki, sizeof (dki));
01634                 (void) strcpy(dki.dki_cname, "zvol");
01635                 (void) strcpy(dki.dki_dname, "zvol");
01636                 dki.dki_ctype = DKC_UNKNOWN;
01637                 dki.dki_unit = getminor(dev);
01638                 dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs);
01639                 mutex_exit(&spa_namespace_lock);
01640                 if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag))
01641                         error = EFAULT;
01642                 return (error);
01643 
01644         case DKIOCGMEDIAINFO:
01645                 bzero(&dkm, sizeof (dkm));
01646                 dkm.dki_lbsize = 1U << zv->zv_min_bs;
01647                 dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs;
01648                 dkm.dki_media_type = DK_UNKNOWN;
01649                 mutex_exit(&spa_namespace_lock);
01650                 if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag))
01651                         error = EFAULT;
01652                 return (error);
01653 
01654         case DKIOCGETEFI:
01655                 {
01656                         uint64_t vs = zv->zv_volsize;
01657                         uint8_t bs = zv->zv_min_bs;
01658 
01659                         mutex_exit(&spa_namespace_lock);
01660                         error = zvol_getefi((void *)arg, flag, vs, bs);
01661                         return (error);
01662                 }
01663 
01664         case DKIOCFLUSHWRITECACHE:
01665                 dkc = (struct dk_callback *)arg;
01666                 mutex_exit(&spa_namespace_lock);
01667                 zil_commit(zv->zv_zilog, ZVOL_OBJ);
01668                 if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) {
01669                         (*dkc->dkc_callback)(dkc->dkc_cookie, error);
01670                         error = 0;
01671                 }
01672                 return (error);
01673 
01674         case DKIOCGETWCE:
01675                 {
01676                         int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0;
01677                         if (ddi_copyout(&wce, (void *)arg, sizeof (int),
01678                             flag))
01679                                 error = EFAULT;
01680                         break;
01681                 }
01682         case DKIOCSETWCE:
01683                 {
01684                         int wce;
01685                         if (ddi_copyin((void *)arg, &wce, sizeof (int),
01686                             flag)) {
01687                                 error = EFAULT;
01688                                 break;
01689                         }
01690                         if (wce) {
01691                                 zv->zv_flags |= ZVOL_WCE;
01692                                 mutex_exit(&spa_namespace_lock);
01693                         } else {
01694                                 zv->zv_flags &= ~ZVOL_WCE;
01695                                 mutex_exit(&spa_namespace_lock);
01696                                 zil_commit(zv->zv_zilog, ZVOL_OBJ);
01697                         }
01698                         return (0);
01699                 }
01700 
01701         case DKIOCGGEOM:
01702         case DKIOCGVTOC:
01703                 /*
01704                  * commands using these (like prtvtoc) expect ENOTSUP
01705                  * since we're emulating an EFI label
01706                  */
01707                 error = ENOTSUP;
01708                 break;
01709 
01710         case DKIOCDUMPINIT:
01711                 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
01712                     RL_WRITER);
01713                 error = zvol_dumpify(zv);
01714                 zfs_range_unlock(rl);
01715                 break;
01716 
01717         case DKIOCDUMPFINI:
01718                 if (!(zv->zv_flags & ZVOL_DUMPIFIED))
01719                         break;
01720                 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
01721                     RL_WRITER);
01722                 error = zvol_dump_fini(zv);
01723                 zfs_range_unlock(rl);
01724                 break;
01725 
01726         default:
01727                 error = ENOTTY;
01728                 break;
01729 
01730         }
01731         mutex_exit(&spa_namespace_lock);
01732         return (error);
01733 }
01734 #endif  /* sun */
01735 
01736 int
01737 zvol_busy(void)
01738 {
01739         return (zvol_minors != 0);
01740 }
01741 
01742 void
01743 zvol_init(void)
01744 {
01745         VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t),
01746             1) == 0);
01747         ZFS_LOG(1, "ZVOL Initialized.");
01748 }
01749 
01750 void
01751 zvol_fini(void)
01752 {
01753         ddi_soft_state_fini(&zfsdev_state);
01754         ZFS_LOG(1, "ZVOL Deinitialized.");
01755 }
01756 
01757 #ifdef sun
01758 static int
01759 zvol_dump_init(zvol_state_t *zv, boolean_t resize)
01760 {
01761         dmu_tx_t *tx;
01762         int error = 0;
01763         objset_t *os = zv->zv_objset;
01764         nvlist_t *nv = NULL;
01765         uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset));
01766 
01767         ASSERT(MUTEX_HELD(&spa_namespace_lock));
01768         error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0,
01769             DMU_OBJECT_END);
01770         /* wait for dmu_free_long_range to actually free the blocks */
01771         txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
01772 
01773         tx = dmu_tx_create(os);
01774         dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
01775         dmu_tx_hold_bonus(tx, ZVOL_OBJ);
01776         error = dmu_tx_assign(tx, TXG_WAIT);
01777         if (error) {
01778                 dmu_tx_abort(tx);
01779                 return (error);
01780         }
01781 
01782         /*
01783          * If we are resizing the dump device then we only need to
01784          * update the refreservation to match the newly updated
01785          * zvolsize. Otherwise, we save off the original state of the
01786          * zvol so that we can restore them if the zvol is ever undumpified.
01787          */
01788         if (resize) {
01789                 error = zap_update(os, ZVOL_ZAP_OBJ,
01790                     zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1,
01791                     &zv->zv_volsize, tx);
01792         } else {
01793                 uint64_t checksum, compress, refresrv, vbs, dedup;
01794 
01795                 error = dsl_prop_get_integer(zv->zv_name,
01796                     zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL);
01797                 error = error ? error : dsl_prop_get_integer(zv->zv_name,
01798                     zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL);
01799                 error = error ? error : dsl_prop_get_integer(zv->zv_name,
01800                     zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL);
01801                 error = error ? error : dsl_prop_get_integer(zv->zv_name,
01802                     zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL);
01803                 if (version >= SPA_VERSION_DEDUP) {
01804                         error = error ? error :
01805                             dsl_prop_get_integer(zv->zv_name,
01806                             zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL);
01807                 }
01808 
01809                 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
01810                     zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1,
01811                     &compress, tx);
01812                 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
01813                     zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx);
01814                 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
01815                     zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1,
01816                     &refresrv, tx);
01817                 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
01818                     zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1,
01819                     &vbs, tx);
01820                 error = error ? error : dmu_object_set_blocksize(
01821                     os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx);
01822                 if (version >= SPA_VERSION_DEDUP) {
01823                         error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
01824                             zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1,
01825                             &dedup, tx);
01826                 }
01827                 if (error == 0)
01828                         zv->zv_volblocksize = SPA_MAXBLOCKSIZE;
01829         }
01830         dmu_tx_commit(tx);
01831 
01832         /*
01833          * We only need update the zvol's property if we are initializing
01834          * the dump area for the first time.
01835          */
01836         if (!resize) {
01837                 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
01838                 VERIFY(nvlist_add_uint64(nv,
01839                     zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0);
01840                 VERIFY(nvlist_add_uint64(nv,
01841                     zfs_prop_to_name(ZFS_PROP_COMPRESSION),
01842                     ZIO_COMPRESS_OFF) == 0);
01843                 VERIFY(nvlist_add_uint64(nv,
01844                     zfs_prop_to_name(ZFS_PROP_CHECKSUM),
01845                     ZIO_CHECKSUM_OFF) == 0);
01846                 if (version >= SPA_VERSION_DEDUP) {
01847                         VERIFY(nvlist_add_uint64(nv,
01848                             zfs_prop_to_name(ZFS_PROP_DEDUP),
01849                             ZIO_CHECKSUM_OFF) == 0);
01850                 }
01851 
01852                 error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL,
01853                     nv, NULL);
01854                 nvlist_free(nv);
01855 
01856                 if (error)
01857                         return (error);
01858         }
01859 
01860         /* Allocate the space for the dump */
01861         error = zvol_prealloc(zv);
01862         return (error);
01863 }
01864 
01865 static int
01866 zvol_dumpify(zvol_state_t *zv)
01867 {
01868         int error = 0;
01869         uint64_t dumpsize = 0;
01870         dmu_tx_t *tx;
01871         objset_t *os = zv->zv_objset;
01872 
01873         if (zv->zv_flags & ZVOL_RDONLY)
01874                 return (EROFS);
01875 
01876         if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE,
01877             8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) {
01878                 boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE;
01879 
01880                 if ((error = zvol_dump_init(zv, resize)) != 0) {
01881                         (void) zvol_dump_fini(zv);
01882                         return (error);
01883                 }
01884         }
01885 
01886         /*
01887          * Build up our lba mapping.
01888          */
01889         error = zvol_get_lbas(zv);
01890         if (error) {
01891                 (void) zvol_dump_fini(zv);
01892                 return (error);
01893         }
01894 
01895         tx = dmu_tx_create(os);
01896         dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
01897         error = dmu_tx_assign(tx, TXG_WAIT);
01898         if (error) {
01899                 dmu_tx_abort(tx);
01900                 (void) zvol_dump_fini(zv);
01901                 return (error);
01902         }
01903 
01904         zv->zv_flags |= ZVOL_DUMPIFIED;
01905         error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1,
01906             &zv->zv_volsize, tx);
01907         dmu_tx_commit(tx);
01908 
01909         if (error) {
01910                 (void) zvol_dump_fini(zv);
01911                 return (error);
01912         }
01913 
01914         txg_wait_synced(dmu_objset_pool(os), 0);
01915         return (0);
01916 }
01917 
01918 static int
01919 zvol_dump_fini(zvol_state_t *zv)
01920 {
01921         dmu_tx_t *tx;
01922         objset_t *os = zv->zv_objset;
01923         nvlist_t *nv;
01924         int error = 0;
01925         uint64_t checksum, compress, refresrv, vbs, dedup;
01926         uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset));
01927 
01928         /*
01929          * Attempt to restore the zvol back to its pre-dumpified state.
01930          * This is a best-effort attempt as it's possible that not all
01931          * of these properties were initialized during the dumpify process
01932          * (i.e. error during zvol_dump_init).
01933          */
01934 
01935         tx = dmu_tx_create(os);
01936         dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
01937         error = dmu_tx_assign(tx, TXG_WAIT);
01938         if (error) {
01939                 dmu_tx_abort(tx);
01940                 return (error);
01941         }
01942         (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx);
01943         dmu_tx_commit(tx);
01944 
01945         (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
01946             zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum);
01947         (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
01948             zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress);
01949         (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
01950             zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv);
01951         (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
01952             zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs);
01953 
01954         VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
01955         (void) nvlist_add_uint64(nv,
01956             zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum);
01957         (void) nvlist_add_uint64(nv,
01958             zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress);
01959         (void) nvlist_add_uint64(nv,
01960             zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv);
01961         if (version >= SPA_VERSION_DEDUP &&
01962             zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
01963             zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) {
01964                 (void) nvlist_add_uint64(nv,
01965                     zfs_prop_to_name(ZFS_PROP_DEDUP), dedup);
01966         }
01967         (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL,
01968             nv, NULL);
01969         nvlist_free(nv);
01970 
01971         zvol_free_extents(zv);
01972         zv->zv_flags &= ~ZVOL_DUMPIFIED;
01973         (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END);
01974         /* wait for dmu_free_long_range to actually free the blocks */
01975         txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
01976         tx = dmu_tx_create(os);
01977         dmu_tx_hold_bonus(tx, ZVOL_OBJ);
01978         error = dmu_tx_assign(tx, TXG_WAIT);
01979         if (error) {
01980                 dmu_tx_abort(tx);
01981                 return (error);
01982         }
01983         if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0)
01984                 zv->zv_volblocksize = vbs;
01985         dmu_tx_commit(tx);
01986 
01987         return (0);
01988 }
01989 #endif  /* sun */
01990 
01991 static zvol_state_t *
01992 zvol_geom_create(const char *name)
01993 {
01994         struct g_provider *pp;
01995         struct g_geom *gp;
01996         zvol_state_t *zv;
01997 
01998         gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name);
01999         gp->start = zvol_geom_start;
02000         gp->access = zvol_geom_access;
02001         pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name);
02002         pp->sectorsize = DEV_BSIZE;
02003 
02004         zv = kmem_zalloc(sizeof(*zv), KM_SLEEP);
02005         zv->zv_provider = pp;
02006         zv->zv_state = 0;
02007         bioq_init(&zv->zv_queue);
02008         mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF);
02009 
02010         pp->private = zv;
02011 
02012         return (zv);
02013 }
02014 
02015 static void
02016 zvol_geom_run(zvol_state_t *zv)
02017 {
02018         struct g_provider *pp;
02019 
02020         pp = zv->zv_provider;
02021         g_error_provider(pp, 0);
02022 
02023         kproc_kthread_add(zvol_geom_worker, zv, &zfsproc, NULL, 0, 0,
02024             "zfskern", "zvol %s", pp->name + sizeof(ZVOL_DRIVER));
02025 }
02026 
02027 static void
02028 zvol_geom_destroy(zvol_state_t *zv)
02029 {
02030         struct g_provider *pp;
02031 
02032         g_topology_assert();
02033 
02034         mtx_lock(&zv->zv_queue_mtx);
02035         zv->zv_state = 1;
02036         wakeup_one(&zv->zv_queue);
02037         while (zv->zv_state != 2)
02038                 msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0);
02039         mtx_destroy(&zv->zv_queue_mtx);
02040 
02041         pp = zv->zv_provider;
02042         zv->zv_provider = NULL;
02043         pp->private = NULL;
02044         g_wither_geom(pp->geom, ENXIO);
02045 
02046         kmem_free(zv, sizeof(*zv));
02047 }
02048 
02049 static int
02050 zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace)
02051 {
02052         int count, error, flags;
02053 
02054         g_topology_assert();
02055 
02056         /*
02057          * To make it easier we expect either open or close, but not both
02058          * at the same time.
02059          */
02060         KASSERT((acr >= 0 && acw >= 0 && ace >= 0) ||
02061             (acr <= 0 && acw <= 0 && ace <= 0),
02062             ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).",
02063             pp->name, acr, acw, ace));
02064 
02065         if (pp->private == NULL) {
02066                 if (acr <= 0 && acw <= 0 && ace <= 0)
02067                         return (0);
02068                 return (pp->error);
02069         }
02070 
02071         /*
02072          * We don't pass FEXCL flag to zvol_open()/zvol_close() if ace != 0,
02073          * because GEOM already handles that and handles it a bit differently.
02074          * GEOM allows for multiple read/exclusive consumers and ZFS allows
02075          * only one exclusive consumer, no matter if it is reader or writer.
02076          * I like better the way GEOM works so I'll leave it for GEOM to
02077          * decide what to do.
02078          */
02079 
02080         count = acr + acw + ace;
02081         if (count == 0)
02082                 return (0);
02083 
02084         flags = 0;
02085         if (acr != 0 || ace != 0)
02086                 flags |= FREAD;
02087         if (acw != 0)
02088                 flags |= FWRITE;
02089 
02090         g_topology_unlock();
02091         if (count > 0)
02092                 error = zvol_open(pp, flags, count);
02093         else
02094                 error = zvol_close(pp, flags, -count);
02095         g_topology_lock();
02096         return (error);
02097 }
02098 
02099 static void
02100 zvol_geom_start(struct bio *bp)
02101 {
02102         zvol_state_t *zv;
02103         boolean_t first;
02104 
02105         switch (bp->bio_cmd) {
02106         case BIO_READ:
02107         case BIO_WRITE:
02108         case BIO_FLUSH:
02109                 zv = bp->bio_to->private;
02110                 ASSERT(zv != NULL);
02111                 mtx_lock(&zv->zv_queue_mtx);
02112                 first = (bioq_first(&zv->zv_queue) == NULL);
02113                 bioq_insert_tail(&zv->zv_queue, bp);
02114                 mtx_unlock(&zv->zv_queue_mtx);
02115                 if (first)
02116                         wakeup_one(&zv->zv_queue);
02117                 break;
02118         case BIO_GETATTR:
02119         case BIO_DELETE:
02120         default:
02121                 g_io_deliver(bp, EOPNOTSUPP);
02122                 break;
02123         }
02124 }
02125 
02126 static void
02127 zvol_geom_worker(void *arg)
02128 {
02129         zvol_state_t *zv;
02130         struct bio *bp;
02131 
02132         thread_lock(curthread);
02133         sched_prio(curthread, PRIBIO);
02134         thread_unlock(curthread);
02135 
02136         zv = arg;
02137         for (;;) {
02138                 mtx_lock(&zv->zv_queue_mtx);
02139                 bp = bioq_takefirst(&zv->zv_queue);
02140                 if (bp == NULL) {
02141                         if (zv->zv_state == 1) {
02142                                 zv->zv_state = 2;
02143                                 wakeup(&zv->zv_state);
02144                                 mtx_unlock(&zv->zv_queue_mtx);
02145                                 kthread_exit();
02146                         }
02147                         msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP,
02148                             "zvol:io", 0);
02149                         continue;
02150                 }
02151                 mtx_unlock(&zv->zv_queue_mtx);
02152                 switch (bp->bio_cmd) {
02153                 case BIO_FLUSH:
02154                         zil_commit(zv->zv_zilog, ZVOL_OBJ);
02155                         g_io_deliver(bp, 0);
02156                         break;
02157                 case BIO_READ:
02158                 case BIO_WRITE:
02159                         zvol_strategy(bp);
02160                         break;
02161                 }
02162         }
02163 }
02164 
02165 extern boolean_t dataset_name_hidden(const char *name);
02166 
02167 static int
02168 zvol_create_snapshots(objset_t *os, const char *name)
02169 {
02170         uint64_t cookie, obj;
02171         char *sname;
02172         int error, len;
02173 
02174         cookie = obj = 0;
02175         sname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
02176 
02177         (void) dmu_objset_find(name, dmu_objset_prefetch, NULL,
02178             DS_FIND_SNAPSHOTS);
02179 
02180         for (;;) {
02181                 len = snprintf(sname, MAXPATHLEN, "%s@", name);
02182                 if (len >= MAXPATHLEN) {
02183                         dmu_objset_rele(os, FTAG);
02184                         error = ENAMETOOLONG;
02185                         break;
02186                 }
02187 
02188                 error = dmu_snapshot_list_next(os, MAXPATHLEN - len,
02189                     sname + len, &obj, &cookie, NULL);
02190                 if (error != 0) {
02191                         if (error == ENOENT)
02192                                 error = 0;
02193                         break;
02194                 }
02195 
02196                 if ((error = zvol_create_minor(sname)) != 0) {
02197                         printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n",
02198                             sname, error);
02199                         break;
02200                 }
02201         }
02202 
02203         kmem_free(sname, MAXPATHLEN);
02204         return (error);
02205 }
02206 
02207 int
02208 zvol_create_minors(const char *name)
02209 {
02210         uint64_t cookie;
02211         objset_t *os;
02212         char *osname, *p;
02213         int error, len;
02214 
02215         if (dataset_name_hidden(name))
02216                 return (0);
02217 
02218         if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
02219                 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n",
02220                     name, error);
02221                 return (error);
02222         }
02223         if (dmu_objset_type(os) == DMU_OST_ZVOL) {
02224                 if ((error = zvol_create_minor(name)) == 0)
02225                         error = zvol_create_snapshots(os, name);
02226                 else {
02227                         printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n",
02228                             name, error);
02229                 }
02230                 dmu_objset_rele(os, FTAG);
02231                 return (error);
02232         }
02233         if (dmu_objset_type(os) != DMU_OST_ZFS) {
02234                 dmu_objset_rele(os, FTAG);
02235                 return (0);
02236         }
02237 
02238         osname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
02239         if (snprintf(osname, MAXPATHLEN, "%s/", name) >= MAXPATHLEN) {
02240                 dmu_objset_rele(os, FTAG);
02241                 kmem_free(osname, MAXPATHLEN);
02242                 return (ENOENT);
02243         }
02244         p = osname + strlen(osname);
02245         len = MAXPATHLEN - (p - osname);
02246 
02247         /* Prefetch the datasets. */
02248         cookie = 0;
02249         while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
02250                 if (!dataset_name_hidden(osname))
02251                         (void) dmu_objset_prefetch(osname, NULL);
02252         }
02253 
02254         cookie = 0;
02255         while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL,
02256             &cookie) == 0) {
02257                 dmu_objset_rele(os, FTAG);
02258                 (void)zvol_create_minors(osname);
02259                 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
02260                         printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n",
02261                             name, error);
02262                         return (error);
02263                 }
02264         }
02265 
02266         dmu_objset_rele(os, FTAG);
02267         kmem_free(osname, MAXPATHLEN);
02268         return (0);
02269 }
02270 
02271 static void
02272 zvol_rename_minor(struct g_geom *gp, const char *newname)
02273 {
02274         struct g_provider *pp;
02275         zvol_state_t *zv;
02276 
02277         ASSERT(MUTEX_HELD(&spa_namespace_lock));
02278         g_topology_assert();
02279 
02280         pp = LIST_FIRST(&gp->provider);
02281         ASSERT(pp != NULL);
02282         zv = pp->private;
02283         ASSERT(zv != NULL);
02284 
02285         zv->zv_provider = NULL;
02286         g_wither_provider(pp, ENXIO);
02287 
02288         pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname);
02289         pp->sectorsize = DEV_BSIZE;
02290         pp->mediasize = zv->zv_volsize;
02291         pp->private = zv;
02292         zv->zv_provider = pp;
02293         strlcpy(zv->zv_name, newname, sizeof(zv->zv_name));
02294         g_error_provider(pp, 0);
02295 }
02296 
02297 void
02298 zvol_rename_minors(const char *oldname, const char *newname)
02299 {
02300         char name[MAXPATHLEN];
02301         struct g_provider *pp;
02302         struct g_geom *gp;
02303         size_t oldnamelen, newnamelen;
02304         zvol_state_t *zv;
02305         char *namebuf;
02306 
02307         oldnamelen = strlen(oldname);
02308         newnamelen = strlen(newname);
02309 
02310         DROP_GIANT();
02311         mutex_enter(&spa_namespace_lock);
02312         g_topology_lock();
02313 
02314         LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) {
02315                 pp = LIST_FIRST(&gp->provider);
02316                 if (pp == NULL)
02317                         continue;
02318                 zv = pp->private;
02319                 if (zv == NULL)
02320                         continue;
02321                 if (strcmp(zv->zv_name, oldname) == 0) {
02322                         zvol_rename_minor(gp, newname);
02323                 } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
02324                     (zv->zv_name[oldnamelen] == '/' ||
02325                      zv->zv_name[oldnamelen] == '@')) {
02326                         snprintf(name, sizeof(name), "%s%c%s", newname,
02327                             zv->zv_name[oldnamelen],
02328                             zv->zv_name + oldnamelen + 1);
02329                         zvol_rename_minor(gp, name);
02330                 }
02331         }
02332 
02333         g_topology_unlock();
02334         mutex_exit(&spa_namespace_lock);
02335         PICKUP_GIANT();
02336 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines