FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 00023 * 00024 * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org> 00025 * All rights reserved. 00026 */ 00027 00028 /* Portions Copyright 2010 Robert Milkowski */ 00029 /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 00030 00050 #include <sys/types.h> 00051 #include <sys/param.h> 00052 #include <sys/kernel.h> 00053 #include <sys/errno.h> 00054 #include <sys/uio.h> 00055 #include <sys/bio.h> 00056 #include <sys/buf.h> 00057 #include <sys/kmem.h> 00058 #include <sys/conf.h> 00059 #include <sys/cmn_err.h> 00060 #include <sys/stat.h> 00061 #include <sys/zap.h> 00062 #include <sys/spa.h> 00063 #include <sys/zio.h> 00064 #include <sys/dmu_traverse.h> 00065 #include <sys/dnode.h> 00066 #include <sys/dsl_dataset.h> 00067 #include <sys/dsl_prop.h> 00068 #include <sys/dkio.h> 00069 #include <sys/byteorder.h> 00070 #include <sys/sunddi.h> 00071 #include <sys/dirent.h> 00072 #include <sys/policy.h> 00073 #include <sys/fs/zfs.h> 00074 #include <sys/zfs_ioctl.h> 00075 #include <sys/zil.h> 00076 #include <sys/refcount.h> 00077 #include <sys/zfs_znode.h> 00078 #include <sys/zfs_rlock.h> 00079 #include <sys/vdev_impl.h> 00080 #include <sys/zvol.h> 00081 #include <sys/zil_impl.h> 00082 #include <geom/geom.h> 00083 00084 #include "zfs_namecheck.h" 00085 00086 struct g_class zfs_zvol_class = { 00087 .name = "ZFS::ZVOL", 00088 .version = G_VERSION, 00089 }; 00090 00091 DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol); 00092 00099 void *zfsdev_state; 00100 static char *zvol_tag = "zvol_tag"; 00101 00102 #define ZVOL_DUMPSIZE "dumpsize" 00103 00104 static uint32_t zvol_minors; 00105 00106 typedef struct zvol_extent { 00107 list_node_t ze_node; 00108 dva_t ze_dva; 00109 uint64_t ze_nblks; 00110 } zvol_extent_t; 00111 00115 typedef struct zvol_state { 00116 char zv_name[MAXPATHLEN]; 00117 uint64_t zv_volsize; 00118 uint64_t zv_volblocksize; 00119 struct g_provider *zv_provider; 00120 uint8_t zv_min_bs; 00121 uint8_t zv_flags; 00122 objset_t *zv_objset; 00123 uint32_t zv_total_opens; 00124 zilog_t *zv_zilog; 00125 list_t zv_extents; 00126 znode_t zv_znode; 00127 dmu_buf_t *zv_dbuf; 00128 int zv_state; 00129 struct bio_queue_head zv_queue; 00130 struct mtx zv_queue_mtx; 00131 } zvol_state_t; 00132 00133 /* 00134 * zvol specific flags 00135 */ 00136 #define ZVOL_RDONLY 0x1 00137 #define ZVOL_DUMPIFIED 0x2 00138 #define ZVOL_EXCL 0x4 00139 #define ZVOL_WCE 0x8 00140 00144 int zvol_maxphys = DMU_MAX_ACCESS/2; 00145 00146 extern int zfs_set_prop_nvlist(const char *, zprop_source_t, 00147 nvlist_t *, nvlist_t **); 00148 static int zvol_remove_zv(zvol_state_t *); 00149 static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); 00150 static int zvol_dumpify(zvol_state_t *zv); 00151 static int zvol_dump_fini(zvol_state_t *zv); 00152 static int zvol_dump_init(zvol_state_t *zv, boolean_t resize); 00153 00154 static zvol_state_t *zvol_geom_create(const char *name); 00155 static void zvol_geom_run(zvol_state_t *zv); 00156 static void zvol_geom_destroy(zvol_state_t *zv); 00157 static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace); 00158 static void zvol_geom_start(struct bio *bp); 00159 static void zvol_geom_worker(void *arg); 00160 00161 static void 00162 zvol_size_changed(zvol_state_t *zv) 00163 { 00164 #ifdef sun 00165 dev_t dev = makedevice(maj, min); 00166 00167 VERIFY(ddi_prop_update_int64(dev, zfs_dip, 00168 "Size", volsize) == DDI_SUCCESS); 00169 VERIFY(ddi_prop_update_int64(dev, zfs_dip, 00170 "Nblocks", lbtodb(volsize)) == DDI_SUCCESS); 00171 00172 /* Notify specfs to invalidate the cached size */ 00173 spec_size_invalidate(dev, VBLK); 00174 spec_size_invalidate(dev, VCHR); 00175 #else /* !sun */ 00176 struct g_provider *pp; 00177 00178 pp = zv->zv_provider; 00179 if (pp == NULL) 00180 return; 00181 g_topology_lock(); 00182 g_resize_provider(pp, zv->zv_volsize); 00183 g_topology_unlock(); 00184 #endif /* !sun */ 00185 } 00186 00187 int 00188 zvol_check_volsize(uint64_t volsize, uint64_t blocksize) 00189 { 00190 if (volsize == 0) 00191 return (EINVAL); 00192 00193 if (volsize % blocksize != 0) 00194 return (EINVAL); 00195 00196 #ifdef _ILP32 00197 if (volsize - 1 > SPEC_MAXOFFSET_T) 00198 return (EOVERFLOW); 00199 #endif 00200 return (0); 00201 } 00202 00203 int 00204 zvol_check_volblocksize(uint64_t volblocksize) 00205 { 00206 if (volblocksize < SPA_MINBLOCKSIZE || 00207 volblocksize > SPA_MAXBLOCKSIZE || 00208 !ISP2(volblocksize)) 00209 return (EDOM); 00210 00211 return (0); 00212 } 00213 00214 int 00215 zvol_get_stats(objset_t *os, nvlist_t *nv) 00216 { 00217 int error; 00218 dmu_object_info_t doi; 00219 uint64_t val; 00220 00221 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); 00222 if (error) 00223 return (error); 00224 00225 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); 00226 00227 error = dmu_object_info(os, ZVOL_OBJ, &doi); 00228 00229 if (error == 0) { 00230 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, 00231 doi.doi_data_block_size); 00232 } 00233 00234 return (error); 00235 } 00236 00237 static zvol_state_t * 00238 zvol_minor_lookup(const char *name) 00239 { 00240 struct g_provider *pp; 00241 struct g_geom *gp; 00242 zvol_state_t *zv = NULL; 00243 00244 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 00245 00246 g_topology_lock(); 00247 LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) { 00248 pp = LIST_FIRST(&gp->provider); 00249 if (pp == NULL) 00250 continue; 00251 zv = pp->private; 00252 if (zv == NULL) 00253 continue; 00254 if (strcmp(zv->zv_name, name) == 0) 00255 break; 00256 } 00257 g_topology_unlock(); 00258 00259 return (gp != NULL ? zv : NULL); 00260 } 00261 00263 struct maparg { 00264 zvol_state_t *ma_zv; 00265 uint64_t ma_blks; 00266 }; 00267 00268 /*ARGSUSED*/ 00269 static int 00270 zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, 00271 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 00272 { 00273 struct maparg *ma = arg; 00274 zvol_extent_t *ze; 00275 int bs = ma->ma_zv->zv_volblocksize; 00276 00277 if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0) 00278 return (0); 00279 00280 VERIFY3U(ma->ma_blks, ==, zb->zb_blkid); 00281 ma->ma_blks++; 00282 00283 /* Abort immediately if we have encountered gang blocks */ 00284 if (BP_IS_GANG(bp)) 00285 return (EFRAGS); 00286 00287 /* 00288 * See if the block is at the end of the previous extent. 00289 */ 00290 ze = list_tail(&ma->ma_zv->zv_extents); 00291 if (ze && 00292 DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) && 00293 DVA_GET_OFFSET(BP_IDENTITY(bp)) == 00294 DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) { 00295 ze->ze_nblks++; 00296 return (0); 00297 } 00298 00299 dprintf_bp(bp, "%s", "next blkptr:"); 00300 00301 /* start a new extent */ 00302 ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP); 00303 ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ 00304 ze->ze_nblks = 1; 00305 list_insert_tail(&ma->ma_zv->zv_extents, ze); 00306 return (0); 00307 } 00308 00309 static void 00310 zvol_free_extents(zvol_state_t *zv) 00311 { 00312 zvol_extent_t *ze; 00313 00314 while (ze = list_head(&zv->zv_extents)) { 00315 list_remove(&zv->zv_extents, ze); 00316 kmem_free(ze, sizeof (zvol_extent_t)); 00317 } 00318 } 00319 00320 static int 00321 zvol_get_lbas(zvol_state_t *zv) 00322 { 00323 objset_t *os = zv->zv_objset; 00324 struct maparg ma; 00325 int err; 00326 00327 ma.ma_zv = zv; 00328 ma.ma_blks = 0; 00329 zvol_free_extents(zv); 00330 00331 /* commit any in-flight changes before traversing the dataset */ 00332 txg_wait_synced(dmu_objset_pool(os), 0); 00333 err = traverse_dataset(dmu_objset_ds(os), 0, 00334 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma); 00335 if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) { 00336 zvol_free_extents(zv); 00337 return (err ? err : EIO); 00338 } 00339 00340 return (0); 00341 } 00342 00343 /* ARGSUSED */ 00344 void 00345 zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 00346 { 00347 zfs_creat_t *zct = arg; 00348 nvlist_t *nvprops = zct->zct_props; 00349 int error; 00350 uint64_t volblocksize, volsize; 00351 00352 VERIFY(nvlist_lookup_uint64(nvprops, 00353 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); 00354 if (nvlist_lookup_uint64(nvprops, 00355 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) 00356 volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); 00357 00358 /* 00359 * These properties must be removed from the list so the generic 00360 * property setting step won't apply to them. 00361 */ 00362 VERIFY(nvlist_remove_all(nvprops, 00363 zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); 00364 (void) nvlist_remove_all(nvprops, 00365 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); 00366 00367 error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, 00368 DMU_OT_NONE, 0, tx); 00369 ASSERT(error == 0); 00370 00371 error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, 00372 DMU_OT_NONE, 0, tx); 00373 ASSERT(error == 0); 00374 00375 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); 00376 ASSERT(error == 0); 00377 } 00378 00383 static int 00384 zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) 00385 { 00386 objset_t *os = zv->zv_objset; 00387 char *data = (char *)(lr + 1); /* data follows lr_write_t */ 00388 uint64_t offset, length; 00389 dmu_tx_t *tx; 00390 int error; 00391 00392 if (byteswap) 00393 byteswap_uint64_array(lr, sizeof (*lr)); 00394 00395 offset = lr->lr_offset; 00396 length = lr->lr_length; 00397 00398 /* If it's a dmu_sync() block, write the whole block */ 00399 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 00400 uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 00401 if (length < blocksize) { 00402 offset -= offset % blocksize; 00403 length = blocksize; 00404 } 00405 } 00406 00407 tx = dmu_tx_create(os); 00408 dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length); 00409 error = dmu_tx_assign(tx, TXG_WAIT); 00410 if (error) { 00411 dmu_tx_abort(tx); 00412 } else { 00413 dmu_write(os, ZVOL_OBJ, offset, length, data, tx); 00414 dmu_tx_commit(tx); 00415 } 00416 00417 return (error); 00418 } 00419 00420 /* ARGSUSED */ 00421 static int 00422 zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) 00423 { 00424 return (ENOTSUP); 00425 } 00426 00431 zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { 00432 zvol_replay_err, 00433 zvol_replay_err, 00434 zvol_replay_err, 00435 zvol_replay_err, 00436 zvol_replay_err, 00437 zvol_replay_err, 00438 zvol_replay_err, 00439 zvol_replay_err, 00440 zvol_replay_err, 00441 zvol_replay_write, 00442 zvol_replay_err, 00443 zvol_replay_err, 00444 zvol_replay_err, 00445 zvol_replay_err, 00446 zvol_replay_err, 00447 zvol_replay_err, 00448 zvol_replay_err, 00449 zvol_replay_err, 00450 zvol_replay_err, 00451 zvol_replay_err, 00452 }; 00453 00454 #ifdef sun 00455 int 00456 zvol_name2minor(const char *name, minor_t *minor) 00457 { 00458 zvol_state_t *zv; 00459 00460 mutex_enter(&spa_namespace_lock); 00461 zv = zvol_minor_lookup(name); 00462 if (minor && zv) 00463 *minor = zv->zv_minor; 00464 mutex_exit(&spa_namespace_lock); 00465 return (zv ? 0 : -1); 00466 } 00467 #endif /* sun */ 00468 00472 int 00473 zvol_create_minor(const char *name) 00474 { 00475 zfs_soft_state_t *zs; 00476 zvol_state_t *zv; 00477 objset_t *os; 00478 dmu_object_info_t doi; 00479 uint64_t volsize; 00480 int error; 00481 00482 ZFS_LOG(1, "Creating ZVOL %s...", name); 00483 00484 mutex_enter(&spa_namespace_lock); 00485 00486 if (zvol_minor_lookup(name) != NULL) { 00487 mutex_exit(&spa_namespace_lock); 00488 return (EEXIST); 00489 } 00490 00491 /* lie and say we're read-only */ 00492 error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); 00493 00494 if (error) { 00495 mutex_exit(&spa_namespace_lock); 00496 return (error); 00497 } 00498 00499 #ifdef sun 00500 if ((minor = zfsdev_minor_alloc()) == 0) { 00501 dmu_objset_disown(os, FTAG); 00502 mutex_exit(&spa_namespace_lock); 00503 return (ENXIO); 00504 } 00505 00506 if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { 00507 dmu_objset_disown(os, FTAG); 00508 mutex_exit(&spa_namespace_lock); 00509 return (EAGAIN); 00510 } 00511 (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, 00512 (char *)name); 00513 00514 (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor); 00515 00516 if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, 00517 minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 00518 ddi_soft_state_free(zfsdev_state, minor); 00519 dmu_objset_disown(os, FTAG); 00520 mutex_exit(&spa_namespace_lock); 00521 return (EAGAIN); 00522 } 00523 00524 (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor); 00525 00526 if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, 00527 minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 00528 ddi_remove_minor_node(zfs_dip, chrbuf); 00529 ddi_soft_state_free(zfsdev_state, minor); 00530 dmu_objset_disown(os, FTAG); 00531 mutex_exit(&spa_namespace_lock); 00532 return (EAGAIN); 00533 } 00534 00535 zs = ddi_get_soft_state(zfsdev_state, minor); 00536 zs->zss_type = ZSST_ZVOL; 00537 zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); 00538 #else /* !sun */ 00539 00540 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 00541 if (error) { 00542 ASSERT(error == 0); 00543 dmu_objset_disown(os, zvol_tag); 00544 mutex_exit(&spa_namespace_lock); 00545 return (error); 00546 } 00547 00548 DROP_GIANT(); 00549 g_topology_lock(); 00550 zv = zvol_geom_create(name); 00551 zv->zv_volsize = volsize; 00552 zv->zv_provider->mediasize = zv->zv_volsize; 00553 00554 #endif /* !sun */ 00555 00556 (void) strlcpy(zv->zv_name, name, MAXPATHLEN); 00557 zv->zv_min_bs = DEV_BSHIFT; 00558 zv->zv_objset = os; 00559 if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) 00560 zv->zv_flags |= ZVOL_RDONLY; 00561 mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); 00562 avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, 00563 sizeof (rl_t), offsetof(rl_t, r_node)); 00564 list_create(&zv->zv_extents, sizeof (zvol_extent_t), 00565 offsetof(zvol_extent_t, ze_node)); 00566 /* get and cache the blocksize */ 00567 error = dmu_object_info(os, ZVOL_OBJ, &doi); 00568 ASSERT(error == 0); 00569 zv->zv_volblocksize = doi.doi_data_block_size; 00570 00571 if (spa_writeable(dmu_objset_spa(os))) { 00572 if (zil_replay_disable) 00573 zil_destroy(dmu_objset_zil(os), B_FALSE); 00574 else 00575 zil_replay(os, zv, zvol_replay_vector); 00576 } 00577 dmu_objset_disown(os, FTAG); 00578 zv->zv_objset = NULL; 00579 00580 zvol_minors++; 00581 00582 mutex_exit(&spa_namespace_lock); 00583 00584 zvol_geom_run(zv); 00585 00586 g_topology_unlock(); 00587 PICKUP_GIANT(); 00588 00589 ZFS_LOG(1, "ZVOL %s created.", name); 00590 00591 return (0); 00592 } 00593 00597 static int 00598 zvol_remove_zv(zvol_state_t *zv) 00599 { 00600 #ifdef sun 00601 minor_t minor = zv->zv_minor; 00602 #endif 00603 00604 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 00605 if (zv->zv_total_opens != 0) 00606 return (EBUSY); 00607 00608 ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); 00609 00610 #ifdef sun 00611 (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor); 00612 ddi_remove_minor_node(zfs_dip, nmbuf); 00613 #endif /* sun */ 00614 00615 avl_destroy(&zv->zv_znode.z_range_avl); 00616 mutex_destroy(&zv->zv_znode.z_range_lock); 00617 00618 zvol_geom_destroy(zv); 00619 00620 zvol_minors--; 00621 return (0); 00622 } 00623 00624 int 00625 zvol_remove_minor(const char *name) 00626 { 00627 zvol_state_t *zv; 00628 int rc; 00629 00630 mutex_enter(&spa_namespace_lock); 00631 if ((zv = zvol_minor_lookup(name)) == NULL) { 00632 mutex_exit(&spa_namespace_lock); 00633 return (ENXIO); 00634 } 00635 g_topology_lock(); 00636 rc = zvol_remove_zv(zv); 00637 g_topology_unlock(); 00638 mutex_exit(&spa_namespace_lock); 00639 return (rc); 00640 } 00641 00642 int 00643 zvol_first_open(zvol_state_t *zv) 00644 { 00645 objset_t *os; 00646 uint64_t volsize; 00647 int error; 00648 uint64_t readonly; 00649 00650 /* lie and say we're read-only */ 00651 error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, 00652 zvol_tag, &os); 00653 if (error) 00654 return (error); 00655 00656 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 00657 if (error) { 00658 ASSERT(error == 0); 00659 dmu_objset_disown(os, zvol_tag); 00660 return (error); 00661 } 00662 zv->zv_objset = os; 00663 error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); 00664 if (error) { 00665 dmu_objset_disown(os, zvol_tag); 00666 return (error); 00667 } 00668 zv->zv_volsize = volsize; 00669 zv->zv_zilog = zil_open(os, zvol_get_data); 00670 zvol_size_changed(zv); 00671 00672 VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly, 00673 NULL) == 0); 00674 if (readonly || dmu_objset_is_snapshot(os) || 00675 !spa_writeable(dmu_objset_spa(os))) 00676 zv->zv_flags |= ZVOL_RDONLY; 00677 else 00678 zv->zv_flags &= ~ZVOL_RDONLY; 00679 return (error); 00680 } 00681 00682 void 00683 zvol_last_close(zvol_state_t *zv) 00684 { 00685 zil_close(zv->zv_zilog); 00686 zv->zv_zilog = NULL; 00687 00688 dmu_buf_rele(zv->zv_dbuf, zvol_tag); 00689 zv->zv_dbuf = NULL; 00690 00691 /* 00692 * Evict cached data 00693 */ 00694 if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && 00695 !(zv->zv_flags & ZVOL_RDONLY)) 00696 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 00697 (void) dmu_objset_evict_dbufs(zv->zv_objset); 00698 00699 dmu_objset_disown(zv->zv_objset, zvol_tag); 00700 zv->zv_objset = NULL; 00701 } 00702 00703 #ifdef sun 00704 int 00705 zvol_prealloc(zvol_state_t *zv) 00706 { 00707 objset_t *os = zv->zv_objset; 00708 dmu_tx_t *tx; 00709 uint64_t refd, avail, usedobjs, availobjs; 00710 uint64_t resid = zv->zv_volsize; 00711 uint64_t off = 0; 00712 00713 /* Check the space usage before attempting to allocate the space */ 00714 dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); 00715 if (avail < zv->zv_volsize) 00716 return (ENOSPC); 00717 00718 /* Free old extents if they exist */ 00719 zvol_free_extents(zv); 00720 00721 while (resid != 0) { 00722 int error; 00723 uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE); 00724 00725 tx = dmu_tx_create(os); 00726 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 00727 error = dmu_tx_assign(tx, TXG_WAIT); 00728 if (error) { 00729 dmu_tx_abort(tx); 00730 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); 00731 return (error); 00732 } 00733 dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx); 00734 dmu_tx_commit(tx); 00735 off += bytes; 00736 resid -= bytes; 00737 } 00738 txg_wait_synced(dmu_objset_pool(os), 0); 00739 00740 return (0); 00741 } 00742 #endif /* sun */ 00743 00744 int 00745 zvol_update_volsize(objset_t *os, uint64_t volsize) 00746 { 00747 dmu_tx_t *tx; 00748 int error; 00749 00750 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 00751 00752 tx = dmu_tx_create(os); 00753 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 00754 error = dmu_tx_assign(tx, TXG_WAIT); 00755 if (error) { 00756 dmu_tx_abort(tx); 00757 return (error); 00758 } 00759 00760 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, 00761 &volsize, tx); 00762 dmu_tx_commit(tx); 00763 00764 if (error == 0) 00765 error = dmu_free_long_range(os, 00766 ZVOL_OBJ, volsize, DMU_OBJECT_END); 00767 return (error); 00768 } 00769 00770 void 00771 zvol_remove_minors(const char *name) 00772 { 00773 struct g_geom *gp, *gptmp; 00774 struct g_provider *pp; 00775 zvol_state_t *zv; 00776 size_t namelen; 00777 00778 namelen = strlen(name); 00779 00780 DROP_GIANT(); 00781 mutex_enter(&spa_namespace_lock); 00782 g_topology_lock(); 00783 00784 LIST_FOREACH_SAFE(gp, &zfs_zvol_class.geom, geom, gptmp) { 00785 pp = LIST_FIRST(&gp->provider); 00786 if (pp == NULL) 00787 continue; 00788 zv = pp->private; 00789 if (zv == NULL) 00790 continue; 00791 if (strcmp(zv->zv_name, name) == 0 || 00792 (strncmp(zv->zv_name, name, namelen) == 0 && 00793 zv->zv_name[namelen] == '/')) { 00794 (void) zvol_remove_zv(zv); 00795 } 00796 } 00797 00798 g_topology_unlock(); 00799 mutex_exit(&spa_namespace_lock); 00800 PICKUP_GIANT(); 00801 } 00802 00803 int 00804 zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) 00805 { 00806 zvol_state_t *zv = NULL; 00807 objset_t *os; 00808 int error; 00809 dmu_object_info_t doi; 00810 uint64_t old_volsize = 0ULL; 00811 uint64_t readonly; 00812 00813 mutex_enter(&spa_namespace_lock); 00814 zv = zvol_minor_lookup(name); 00815 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 00816 mutex_exit(&spa_namespace_lock); 00817 return (error); 00818 } 00819 00820 if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || 00821 (error = zvol_check_volsize(volsize, 00822 doi.doi_data_block_size)) != 0) 00823 goto out; 00824 00825 VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, 00826 NULL) == 0); 00827 if (readonly) { 00828 error = EROFS; 00829 goto out; 00830 } 00831 00832 error = zvol_update_volsize(os, volsize); 00833 /* 00834 * Reinitialize the dump area to the new size. If we 00835 * failed to resize the dump area then restore it back to 00836 * its original size. 00837 */ 00838 if (zv && error == 0) { 00839 #ifdef ZVOL_DUMP 00840 if (zv->zv_flags & ZVOL_DUMPIFIED) { 00841 old_volsize = zv->zv_volsize; 00842 zv->zv_volsize = volsize; 00843 if ((error = zvol_dumpify(zv)) != 0 || 00844 (error = dumpvp_resize()) != 0) { 00845 (void) zvol_update_volsize(os, old_volsize); 00846 zv->zv_volsize = old_volsize; 00847 error = zvol_dumpify(zv); 00848 } 00849 } 00850 #endif /* ZVOL_DUMP */ 00851 if (error == 0) { 00852 zv->zv_volsize = volsize; 00853 zvol_size_changed(zv); 00854 } 00855 } 00856 00857 #ifdef sun 00858 /* 00859 * Generate a LUN expansion event. 00860 */ 00861 if (zv && error == 0) { 00862 sysevent_id_t eid; 00863 nvlist_t *attr; 00864 char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 00865 00866 (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV, 00867 zv->zv_minor); 00868 00869 VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 00870 VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 00871 00872 (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 00873 ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 00874 00875 nvlist_free(attr); 00876 kmem_free(physpath, MAXPATHLEN); 00877 } 00878 #endif /* sun */ 00879 00880 out: 00881 dmu_objset_rele(os, FTAG); 00882 00883 mutex_exit(&spa_namespace_lock); 00884 00885 return (error); 00886 } 00887 00888 /*ARGSUSED*/ 00889 static int 00890 zvol_open(struct g_provider *pp, int flag, int count) 00891 { 00892 zvol_state_t *zv; 00893 int err = 0; 00894 boolean_t locked = B_FALSE; 00895 00896 /* 00897 * Protect against recursively entering spa_namespace_lock 00898 * when spa_open() is used for a pool on a (local) ZVOL(s). 00899 * This is needed since we replaced upstream zfsdev_state_lock 00900 * with spa_namespace_lock in the ZVOL code. 00901 * We are using the same trick as spa_open(). 00902 * Note that calls in zvol_first_open which need to resolve 00903 * pool name to a spa object will enter spa_open() 00904 * recursively, but that function already has all the 00905 * necessary protection. 00906 */ 00907 if (!MUTEX_HELD(&spa_namespace_lock)) { 00908 mutex_enter(&spa_namespace_lock); 00909 locked = B_TRUE; 00910 } 00911 00912 zv = pp->private; 00913 if (zv == NULL) { 00914 if (locked) 00915 mutex_exit(&spa_namespace_lock); 00916 return (ENXIO); 00917 } 00918 00919 if (zv->zv_total_opens == 0) 00920 err = zvol_first_open(zv); 00921 if (err) { 00922 if (locked) 00923 mutex_exit(&spa_namespace_lock); 00924 return (err); 00925 } 00926 if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 00927 err = EROFS; 00928 goto out; 00929 } 00930 if (zv->zv_flags & ZVOL_EXCL) { 00931 err = EBUSY; 00932 goto out; 00933 } 00934 #ifdef FEXCL 00935 if (flag & FEXCL) { 00936 if (zv->zv_total_opens != 0) { 00937 err = EBUSY; 00938 goto out; 00939 } 00940 zv->zv_flags |= ZVOL_EXCL; 00941 } 00942 #endif 00943 00944 zv->zv_total_opens += count; 00945 if (locked) 00946 mutex_exit(&spa_namespace_lock); 00947 00948 return (err); 00949 out: 00950 if (zv->zv_total_opens == 0) 00951 zvol_last_close(zv); 00952 if (locked) 00953 mutex_exit(&spa_namespace_lock); 00954 return (err); 00955 } 00956 00957 /*ARGSUSED*/ 00958 static int 00959 zvol_close(struct g_provider *pp, int flag, int count) 00960 { 00961 zvol_state_t *zv; 00962 int error = 0; 00963 boolean_t locked = B_FALSE; 00964 00965 /* See comment in zvol_open(). */ 00966 if (!MUTEX_HELD(&spa_namespace_lock)) { 00967 mutex_enter(&spa_namespace_lock); 00968 locked = B_TRUE; 00969 } 00970 00971 zv = pp->private; 00972 if (zv == NULL) { 00973 if (locked) 00974 mutex_exit(&spa_namespace_lock); 00975 return (ENXIO); 00976 } 00977 00978 if (zv->zv_flags & ZVOL_EXCL) { 00979 ASSERT(zv->zv_total_opens == 1); 00980 zv->zv_flags &= ~ZVOL_EXCL; 00981 } 00982 00983 /* 00984 * If the open count is zero, this is a spurious close. 00985 * That indicates a bug in the kernel / DDI framework. 00986 */ 00987 ASSERT(zv->zv_total_opens != 0); 00988 00989 /* 00990 * You may get multiple opens, but only one close. 00991 */ 00992 zv->zv_total_opens -= count; 00993 00994 if (zv->zv_total_opens == 0) 00995 zvol_last_close(zv); 00996 00997 if (locked) 00998 mutex_exit(&spa_namespace_lock); 00999 return (error); 01000 } 01001 01002 static void 01003 zvol_get_done(zgd_t *zgd, int error) 01004 { 01005 if (zgd->zgd_db) 01006 dmu_buf_rele(zgd->zgd_db, zgd); 01007 01008 zfs_range_unlock(zgd->zgd_rl); 01009 01010 if (error == 0 && zgd->zgd_bp) 01011 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 01012 01013 kmem_free(zgd, sizeof (zgd_t)); 01014 } 01015 01019 static int 01020 zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 01021 { 01022 zvol_state_t *zv = arg; 01023 objset_t *os = zv->zv_objset; 01024 uint64_t object = ZVOL_OBJ; 01025 uint64_t offset = lr->lr_offset; 01026 uint64_t size = lr->lr_length; /* length of user data */ 01027 blkptr_t *bp = &lr->lr_blkptr; 01028 dmu_buf_t *db; 01029 zgd_t *zgd; 01030 int error; 01031 01032 ASSERT(zio != NULL); 01033 ASSERT(size != 0); 01034 01035 zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 01036 zgd->zgd_zilog = zv->zv_zilog; 01037 zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); 01038 01039 /* 01040 * Write records come in two flavors: immediate and indirect. 01041 * For small writes it's cheaper to store the data with the 01042 * log record (immediate); for large writes it's cheaper to 01043 * sync the data and get a pointer to it (indirect) so that 01044 * we don't have to write the data twice. 01045 */ 01046 if (buf != NULL) { /* immediate write */ 01047 error = dmu_read(os, object, offset, size, buf, 01048 DMU_READ_NO_PREFETCH); 01049 } else { 01050 size = zv->zv_volblocksize; 01051 offset = P2ALIGN(offset, size); 01052 error = dmu_buf_hold(os, object, offset, zgd, &db, 01053 DMU_READ_NO_PREFETCH); 01054 if (error == 0) { 01055 zgd->zgd_db = db; 01056 zgd->zgd_bp = bp; 01057 01058 ASSERT(db->db_offset == offset); 01059 ASSERT(db->db_size == size); 01060 01061 error = dmu_sync(zio, lr->lr_common.lrc_txg, 01062 zvol_get_done, zgd); 01063 01064 if (error == 0) 01065 return (0); 01066 } 01067 } 01068 01069 zvol_get_done(zgd, error); 01070 01071 return (error); 01072 } 01073 01074 ssize_t zvol_immediate_write_sz = 32768; 01081 static void 01082 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, 01083 boolean_t sync) 01084 { 01085 uint32_t blocksize = zv->zv_volblocksize; 01086 zilog_t *zilog = zv->zv_zilog; 01087 boolean_t slogging; 01088 ssize_t immediate_write_sz; 01089 01090 if (zil_replaying(zilog, tx)) 01091 return; 01092 01093 immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) 01094 ? 0 : zvol_immediate_write_sz; 01095 01096 slogging = spa_has_slogs(zilog->zl_spa) && 01097 (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); 01098 01099 while (resid) { 01100 itx_t *itx; 01101 lr_write_t *lr; 01102 ssize_t len; 01103 itx_wr_state_t write_state; 01104 01105 /* 01106 * Unlike zfs_log_write() we can be called with 01107 * upto DMU_MAX_ACCESS/2 (5MB) writes. 01108 */ 01109 if (blocksize > immediate_write_sz && !slogging && 01110 resid >= blocksize && off % blocksize == 0) { 01111 write_state = WR_INDIRECT; /* uses dmu_sync */ 01112 len = blocksize; 01113 } else if (sync) { 01114 write_state = WR_COPIED; 01115 len = MIN(ZIL_MAX_LOG_DATA, resid); 01116 } else { 01117 write_state = WR_NEED_COPY; 01118 len = MIN(ZIL_MAX_LOG_DATA, resid); 01119 } 01120 01121 itx = zil_itx_create(TX_WRITE, sizeof (*lr) + 01122 (write_state == WR_COPIED ? len : 0)); 01123 lr = (lr_write_t *)&itx->itx_lr; 01124 if (write_state == WR_COPIED && dmu_read(zv->zv_objset, 01125 ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { 01126 zil_itx_destroy(itx); 01127 itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 01128 lr = (lr_write_t *)&itx->itx_lr; 01129 write_state = WR_NEED_COPY; 01130 } 01131 01132 itx->itx_wr_state = write_state; 01133 if (write_state == WR_NEED_COPY) 01134 itx->itx_sod += len; 01135 lr->lr_foid = ZVOL_OBJ; 01136 lr->lr_offset = off; 01137 lr->lr_length = len; 01138 lr->lr_blkoff = 0; 01139 BP_ZERO(&lr->lr_blkptr); 01140 01141 itx->itx_private = zv; 01142 itx->itx_sync = sync; 01143 01144 zil_itx_assign(zilog, itx, tx); 01145 01146 off += len; 01147 resid -= len; 01148 } 01149 } 01150 01151 #ifdef sun 01152 static int 01153 zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size, 01154 boolean_t doread, boolean_t isdump) 01155 { 01156 vdev_disk_t *dvd; 01157 int c; 01158 int numerrors = 0; 01159 01160 for (c = 0; c < vd->vdev_children; c++) { 01161 ASSERT(vd->vdev_ops == &vdev_mirror_ops || 01162 vd->vdev_ops == &vdev_replacing_ops || 01163 vd->vdev_ops == &vdev_spare_ops); 01164 int err = zvol_dumpio_vdev(vd->vdev_child[c], 01165 addr, offset, size, doread, isdump); 01166 if (err != 0) { 01167 numerrors++; 01168 } else if (doread) { 01169 break; 01170 } 01171 } 01172 01173 if (!vd->vdev_ops->vdev_op_leaf) 01174 return (numerrors < vd->vdev_children ? 0 : EIO); 01175 01176 if (doread && !vdev_readable(vd)) 01177 return (EIO); 01178 else if (!doread && !vdev_writeable(vd)) 01179 return (EIO); 01180 01181 dvd = vd->vdev_tsd; 01182 ASSERT3P(dvd, !=, NULL); 01183 offset += VDEV_LABEL_START_SIZE; 01184 01185 if (ddi_in_panic() || isdump) { 01186 ASSERT(!doread); 01187 if (doread) 01188 return (EIO); 01189 return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), 01190 lbtodb(size))); 01191 } else { 01192 return (vdev_disk_physio(dvd->vd_lh, addr, size, offset, 01193 doread ? B_READ : B_WRITE)); 01194 } 01195 } 01196 01197 static int 01198 zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, 01199 boolean_t doread, boolean_t isdump) 01200 { 01201 vdev_t *vd; 01202 int error; 01203 zvol_extent_t *ze; 01204 spa_t *spa = dmu_objset_spa(zv->zv_objset); 01205 01206 /* Must be sector aligned, and not stradle a block boundary. */ 01207 if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || 01208 P2BOUNDARY(offset, size, zv->zv_volblocksize)) { 01209 return (EINVAL); 01210 } 01211 ASSERT(size <= zv->zv_volblocksize); 01212 01213 /* Locate the extent this belongs to */ 01214 ze = list_head(&zv->zv_extents); 01215 while (offset >= ze->ze_nblks * zv->zv_volblocksize) { 01216 offset -= ze->ze_nblks * zv->zv_volblocksize; 01217 ze = list_next(&zv->zv_extents, ze); 01218 } 01219 01220 if (!ddi_in_panic()) 01221 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 01222 01223 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); 01224 offset += DVA_GET_OFFSET(&ze->ze_dva); 01225 error = zvol_dumpio_vdev(vd, addr, offset, size, doread, isdump); 01226 01227 if (!ddi_in_panic()) 01228 spa_config_exit(spa, SCL_STATE, FTAG); 01229 01230 return (error); 01231 } 01232 #endif /* sun */ 01233 01234 int 01235 zvol_strategy(struct bio *bp) 01236 { 01237 zvol_state_t *zv = bp->bio_to->private; 01238 uint64_t off, volsize; 01239 size_t resid; 01240 char *addr; 01241 objset_t *os; 01242 rl_t *rl; 01243 int error = 0; 01244 boolean_t doread = (bp->bio_cmd == BIO_READ); 01245 boolean_t sync; 01246 01247 if (zv == NULL) { 01248 g_io_deliver(bp, ENXIO); 01249 return (0); 01250 } 01251 01252 if (bp->bio_cmd != BIO_READ && (zv->zv_flags & ZVOL_RDONLY)) { 01253 g_io_deliver(bp, EROFS); 01254 return (0); 01255 } 01256 01257 off = bp->bio_offset; 01258 volsize = zv->zv_volsize; 01259 01260 os = zv->zv_objset; 01261 ASSERT(os != NULL); 01262 01263 addr = bp->bio_data; 01264 resid = bp->bio_length; 01265 01266 if (resid > 0 && (off < 0 || off >= volsize)) { 01267 g_io_deliver(bp, EIO); 01268 return (0); 01269 } 01270 01271 sync = !doread && zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; 01272 01273 /* 01274 * There must be no buffer changes when doing a dmu_sync() because 01275 * we can't change the data whilst calculating the checksum. 01276 */ 01277 rl = zfs_range_lock(&zv->zv_znode, off, resid, 01278 doread ? RL_READER : RL_WRITER); 01279 01280 while (resid != 0 && off < volsize) { 01281 size_t size = MIN(resid, zvol_maxphys); 01282 if (doread) { 01283 error = dmu_read(os, ZVOL_OBJ, off, size, addr, 01284 DMU_READ_PREFETCH); 01285 } else { 01286 dmu_tx_t *tx = dmu_tx_create(os); 01287 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 01288 error = dmu_tx_assign(tx, TXG_WAIT); 01289 if (error) { 01290 dmu_tx_abort(tx); 01291 } else { 01292 dmu_write(os, ZVOL_OBJ, off, size, addr, tx); 01293 zvol_log_write(zv, tx, off, size, sync); 01294 dmu_tx_commit(tx); 01295 } 01296 } 01297 if (error) { 01298 /* convert checksum errors into IO errors */ 01299 if (error == ECKSUM) 01300 error = EIO; 01301 break; 01302 } 01303 off += size; 01304 addr += size; 01305 resid -= size; 01306 } 01307 zfs_range_unlock(rl); 01308 01309 bp->bio_completed = bp->bio_length - resid; 01310 if (bp->bio_completed < bp->bio_length) 01311 bp->bio_error = (off > volsize ? EINVAL : error); 01312 01313 if (sync) 01314 zil_commit(zv->zv_zilog, ZVOL_OBJ); 01315 g_io_deliver(bp, 0); 01316 01317 return (0); 01318 } 01319 01320 #ifdef sun 01321 01329 void 01330 zvol_minphys(struct buf *bp) 01331 { 01332 if (bp->b_bcount > zvol_maxphys) 01333 bp->b_bcount = zvol_maxphys; 01334 } 01335 01336 int 01337 zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) 01338 { 01339 minor_t minor = getminor(dev); 01340 zvol_state_t *zv; 01341 int error = 0; 01342 uint64_t size; 01343 uint64_t boff; 01344 uint64_t resid; 01345 01346 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 01347 if (zv == NULL) 01348 return (ENXIO); 01349 01350 boff = ldbtob(blkno); 01351 resid = ldbtob(nblocks); 01352 01353 VERIFY3U(boff + resid, <=, zv->zv_volsize); 01354 01355 while (resid) { 01356 size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); 01357 error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE); 01358 if (error) 01359 break; 01360 boff += size; 01361 addr += size; 01362 resid -= size; 01363 } 01364 01365 return (error); 01366 } 01367 01368 /*ARGSUSED*/ 01369 int 01370 zvol_read(dev_t dev, uio_t *uio, cred_t *cr) 01371 { 01372 minor_t minor = getminor(dev); 01373 zvol_state_t *zv; 01374 uint64_t volsize; 01375 rl_t *rl; 01376 int error = 0; 01377 01378 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 01379 if (zv == NULL) 01380 return (ENXIO); 01381 01382 volsize = zv->zv_volsize; 01383 if (uio->uio_resid > 0 && 01384 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 01385 return (EIO); 01386 01387 if (zv->zv_flags & ZVOL_DUMPIFIED) { 01388 error = physio(zvol_strategy, NULL, dev, B_READ, 01389 zvol_minphys, uio); 01390 return (error); 01391 } 01392 01393 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 01394 RL_READER); 01395 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 01396 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 01397 01398 /* don't read past the end */ 01399 if (bytes > volsize - uio->uio_loffset) 01400 bytes = volsize - uio->uio_loffset; 01401 01402 error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes); 01403 if (error) { 01404 /* convert checksum errors into IO errors */ 01405 if (error == ECKSUM) 01406 error = EIO; 01407 break; 01408 } 01409 } 01410 zfs_range_unlock(rl); 01411 return (error); 01412 } 01413 01414 /*ARGSUSED*/ 01415 int 01416 zvol_write(dev_t dev, uio_t *uio, cred_t *cr) 01417 { 01418 minor_t minor = getminor(dev); 01419 zvol_state_t *zv; 01420 uint64_t volsize; 01421 rl_t *rl; 01422 int error = 0; 01423 boolean_t sync; 01424 01425 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 01426 if (zv == NULL) 01427 return (ENXIO); 01428 01429 volsize = zv->zv_volsize; 01430 if (uio->uio_resid > 0 && 01431 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 01432 return (EIO); 01433 01434 if (zv->zv_flags & ZVOL_DUMPIFIED) { 01435 error = physio(zvol_strategy, NULL, dev, B_WRITE, 01436 zvol_minphys, uio); 01437 return (error); 01438 } 01439 01440 sync = !(zv->zv_flags & ZVOL_WCE) || 01441 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 01442 01443 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 01444 RL_WRITER); 01445 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 01446 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 01447 uint64_t off = uio->uio_loffset; 01448 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 01449 01450 if (bytes > volsize - off) /* don't write past the end */ 01451 bytes = volsize - off; 01452 01453 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 01454 error = dmu_tx_assign(tx, TXG_WAIT); 01455 if (error) { 01456 dmu_tx_abort(tx); 01457 break; 01458 } 01459 error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx); 01460 if (error == 0) 01461 zvol_log_write(zv, tx, off, bytes, sync); 01462 dmu_tx_commit(tx); 01463 01464 if (error) 01465 break; 01466 } 01467 zfs_range_unlock(rl); 01468 if (sync) 01469 zil_commit(zv->zv_zilog, ZVOL_OBJ); 01470 return (error); 01471 } 01472 01473 int 01474 zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) 01475 { 01476 struct uuid uuid = EFI_RESERVED; 01477 efi_gpe_t gpe = { 0 }; 01478 uint32_t crc; 01479 dk_efi_t efi; 01480 int length; 01481 char *ptr; 01482 01483 if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) 01484 return (EFAULT); 01485 ptr = (char *)(uintptr_t)efi.dki_data_64; 01486 length = efi.dki_length; 01487 /* 01488 * Some clients may attempt to request a PMBR for the 01489 * zvol. Currently this interface will return EINVAL to 01490 * such requests. These requests could be supported by 01491 * adding a check for lba == 0 and consing up an appropriate 01492 * PMBR. 01493 */ 01494 if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) 01495 return (EINVAL); 01496 01497 gpe.efi_gpe_StartingLBA = LE_64(34ULL); 01498 gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); 01499 UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); 01500 01501 if (efi.dki_lba == 1) { 01502 efi_gpt_t gpt = { 0 }; 01503 01504 gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); 01505 gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 01506 gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); 01507 gpt.efi_gpt_MyLBA = LE_64(1ULL); 01508 gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); 01509 gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1); 01510 gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); 01511 gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); 01512 gpt.efi_gpt_SizeOfPartitionEntry = 01513 LE_32(sizeof (efi_gpe_t)); 01514 CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); 01515 gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 01516 CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); 01517 gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); 01518 if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), 01519 flag)) 01520 return (EFAULT); 01521 ptr += sizeof (gpt); 01522 length -= sizeof (gpt); 01523 } 01524 if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), 01525 length), flag)) 01526 return (EFAULT); 01527 return (0); 01528 } 01529 01539 int 01540 zvol_get_volume_params(minor_t minor, uint64_t *blksize, 01541 uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl, 01542 void **rl_hdl, void **bonus_hdl) 01543 { 01544 zvol_state_t *zv; 01545 01546 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 01547 if (zv == NULL) 01548 return (ENXIO); 01549 if (zv->zv_flags & ZVOL_DUMPIFIED) 01550 return (ENXIO); 01551 01552 ASSERT(blksize && max_xfer_len && minor_hdl && 01553 objset_hdl && zil_hdl && rl_hdl && bonus_hdl); 01554 01555 *blksize = zv->zv_volblocksize; 01556 *max_xfer_len = (uint64_t)zvol_maxphys; 01557 *minor_hdl = zv; 01558 *objset_hdl = zv->zv_objset; 01559 *zil_hdl = zv->zv_zilog; 01560 *rl_hdl = &zv->zv_znode; 01561 *bonus_hdl = zv->zv_dbuf; 01562 return (0); 01563 } 01564 01570 uint64_t 01571 zvol_get_volume_size(void *minor_hdl) 01572 { 01573 zvol_state_t *zv = minor_hdl; 01574 01575 return (zv->zv_volsize); 01576 } 01577 01582 int 01583 zvol_get_volume_wce(void *minor_hdl) 01584 { 01585 zvol_state_t *zv = minor_hdl; 01586 01587 return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0); 01588 } 01589 01593 void 01594 zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid, 01595 boolean_t sync) 01596 { 01597 zvol_state_t *zv = minor_hdl; 01598 01599 zvol_log_write(zv, tx, off, resid, sync); 01600 } 01609 /*ARGSUSED*/ 01610 int 01611 zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 01612 { 01613 zvol_state_t *zv; 01614 struct dk_cinfo dki; 01615 struct dk_minfo dkm; 01616 struct dk_callback *dkc; 01617 int error = 0; 01618 rl_t *rl; 01619 01620 mutex_enter(&spa_namespace_lock); 01621 01622 zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL); 01623 01624 if (zv == NULL) { 01625 mutex_exit(&spa_namespace_lock); 01626 return (ENXIO); 01627 } 01628 ASSERT(zv->zv_total_opens > 0); 01629 01630 switch (cmd) { 01631 01632 case DKIOCINFO: 01633 bzero(&dki, sizeof (dki)); 01634 (void) strcpy(dki.dki_cname, "zvol"); 01635 (void) strcpy(dki.dki_dname, "zvol"); 01636 dki.dki_ctype = DKC_UNKNOWN; 01637 dki.dki_unit = getminor(dev); 01638 dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); 01639 mutex_exit(&spa_namespace_lock); 01640 if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) 01641 error = EFAULT; 01642 return (error); 01643 01644 case DKIOCGMEDIAINFO: 01645 bzero(&dkm, sizeof (dkm)); 01646 dkm.dki_lbsize = 1U << zv->zv_min_bs; 01647 dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 01648 dkm.dki_media_type = DK_UNKNOWN; 01649 mutex_exit(&spa_namespace_lock); 01650 if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) 01651 error = EFAULT; 01652 return (error); 01653 01654 case DKIOCGETEFI: 01655 { 01656 uint64_t vs = zv->zv_volsize; 01657 uint8_t bs = zv->zv_min_bs; 01658 01659 mutex_exit(&spa_namespace_lock); 01660 error = zvol_getefi((void *)arg, flag, vs, bs); 01661 return (error); 01662 } 01663 01664 case DKIOCFLUSHWRITECACHE: 01665 dkc = (struct dk_callback *)arg; 01666 mutex_exit(&spa_namespace_lock); 01667 zil_commit(zv->zv_zilog, ZVOL_OBJ); 01668 if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { 01669 (*dkc->dkc_callback)(dkc->dkc_cookie, error); 01670 error = 0; 01671 } 01672 return (error); 01673 01674 case DKIOCGETWCE: 01675 { 01676 int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0; 01677 if (ddi_copyout(&wce, (void *)arg, sizeof (int), 01678 flag)) 01679 error = EFAULT; 01680 break; 01681 } 01682 case DKIOCSETWCE: 01683 { 01684 int wce; 01685 if (ddi_copyin((void *)arg, &wce, sizeof (int), 01686 flag)) { 01687 error = EFAULT; 01688 break; 01689 } 01690 if (wce) { 01691 zv->zv_flags |= ZVOL_WCE; 01692 mutex_exit(&spa_namespace_lock); 01693 } else { 01694 zv->zv_flags &= ~ZVOL_WCE; 01695 mutex_exit(&spa_namespace_lock); 01696 zil_commit(zv->zv_zilog, ZVOL_OBJ); 01697 } 01698 return (0); 01699 } 01700 01701 case DKIOCGGEOM: 01702 case DKIOCGVTOC: 01703 /* 01704 * commands using these (like prtvtoc) expect ENOTSUP 01705 * since we're emulating an EFI label 01706 */ 01707 error = ENOTSUP; 01708 break; 01709 01710 case DKIOCDUMPINIT: 01711 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 01712 RL_WRITER); 01713 error = zvol_dumpify(zv); 01714 zfs_range_unlock(rl); 01715 break; 01716 01717 case DKIOCDUMPFINI: 01718 if (!(zv->zv_flags & ZVOL_DUMPIFIED)) 01719 break; 01720 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 01721 RL_WRITER); 01722 error = zvol_dump_fini(zv); 01723 zfs_range_unlock(rl); 01724 break; 01725 01726 default: 01727 error = ENOTTY; 01728 break; 01729 01730 } 01731 mutex_exit(&spa_namespace_lock); 01732 return (error); 01733 } 01734 #endif /* sun */ 01735 01736 int 01737 zvol_busy(void) 01738 { 01739 return (zvol_minors != 0); 01740 } 01741 01742 void 01743 zvol_init(void) 01744 { 01745 VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t), 01746 1) == 0); 01747 ZFS_LOG(1, "ZVOL Initialized."); 01748 } 01749 01750 void 01751 zvol_fini(void) 01752 { 01753 ddi_soft_state_fini(&zfsdev_state); 01754 ZFS_LOG(1, "ZVOL Deinitialized."); 01755 } 01756 01757 #ifdef sun 01758 static int 01759 zvol_dump_init(zvol_state_t *zv, boolean_t resize) 01760 { 01761 dmu_tx_t *tx; 01762 int error = 0; 01763 objset_t *os = zv->zv_objset; 01764 nvlist_t *nv = NULL; 01765 uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); 01766 01767 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 01768 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, 01769 DMU_OBJECT_END); 01770 /* wait for dmu_free_long_range to actually free the blocks */ 01771 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 01772 01773 tx = dmu_tx_create(os); 01774 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 01775 dmu_tx_hold_bonus(tx, ZVOL_OBJ); 01776 error = dmu_tx_assign(tx, TXG_WAIT); 01777 if (error) { 01778 dmu_tx_abort(tx); 01779 return (error); 01780 } 01781 01782 /* 01783 * If we are resizing the dump device then we only need to 01784 * update the refreservation to match the newly updated 01785 * zvolsize. Otherwise, we save off the original state of the 01786 * zvol so that we can restore them if the zvol is ever undumpified. 01787 */ 01788 if (resize) { 01789 error = zap_update(os, ZVOL_ZAP_OBJ, 01790 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 01791 &zv->zv_volsize, tx); 01792 } else { 01793 uint64_t checksum, compress, refresrv, vbs, dedup; 01794 01795 error = dsl_prop_get_integer(zv->zv_name, 01796 zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); 01797 error = error ? error : dsl_prop_get_integer(zv->zv_name, 01798 zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); 01799 error = error ? error : dsl_prop_get_integer(zv->zv_name, 01800 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); 01801 error = error ? error : dsl_prop_get_integer(zv->zv_name, 01802 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL); 01803 if (version >= SPA_VERSION_DEDUP) { 01804 error = error ? error : 01805 dsl_prop_get_integer(zv->zv_name, 01806 zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL); 01807 } 01808 01809 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 01810 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, 01811 &compress, tx); 01812 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 01813 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); 01814 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 01815 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 01816 &refresrv, tx); 01817 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 01818 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, 01819 &vbs, tx); 01820 error = error ? error : dmu_object_set_blocksize( 01821 os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx); 01822 if (version >= SPA_VERSION_DEDUP) { 01823 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 01824 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, 01825 &dedup, tx); 01826 } 01827 if (error == 0) 01828 zv->zv_volblocksize = SPA_MAXBLOCKSIZE; 01829 } 01830 dmu_tx_commit(tx); 01831 01832 /* 01833 * We only need update the zvol's property if we are initializing 01834 * the dump area for the first time. 01835 */ 01836 if (!resize) { 01837 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 01838 VERIFY(nvlist_add_uint64(nv, 01839 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); 01840 VERIFY(nvlist_add_uint64(nv, 01841 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 01842 ZIO_COMPRESS_OFF) == 0); 01843 VERIFY(nvlist_add_uint64(nv, 01844 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 01845 ZIO_CHECKSUM_OFF) == 0); 01846 if (version >= SPA_VERSION_DEDUP) { 01847 VERIFY(nvlist_add_uint64(nv, 01848 zfs_prop_to_name(ZFS_PROP_DEDUP), 01849 ZIO_CHECKSUM_OFF) == 0); 01850 } 01851 01852 error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 01853 nv, NULL); 01854 nvlist_free(nv); 01855 01856 if (error) 01857 return (error); 01858 } 01859 01860 /* Allocate the space for the dump */ 01861 error = zvol_prealloc(zv); 01862 return (error); 01863 } 01864 01865 static int 01866 zvol_dumpify(zvol_state_t *zv) 01867 { 01868 int error = 0; 01869 uint64_t dumpsize = 0; 01870 dmu_tx_t *tx; 01871 objset_t *os = zv->zv_objset; 01872 01873 if (zv->zv_flags & ZVOL_RDONLY) 01874 return (EROFS); 01875 01876 if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 01877 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { 01878 boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE; 01879 01880 if ((error = zvol_dump_init(zv, resize)) != 0) { 01881 (void) zvol_dump_fini(zv); 01882 return (error); 01883 } 01884 } 01885 01886 /* 01887 * Build up our lba mapping. 01888 */ 01889 error = zvol_get_lbas(zv); 01890 if (error) { 01891 (void) zvol_dump_fini(zv); 01892 return (error); 01893 } 01894 01895 tx = dmu_tx_create(os); 01896 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 01897 error = dmu_tx_assign(tx, TXG_WAIT); 01898 if (error) { 01899 dmu_tx_abort(tx); 01900 (void) zvol_dump_fini(zv); 01901 return (error); 01902 } 01903 01904 zv->zv_flags |= ZVOL_DUMPIFIED; 01905 error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, 01906 &zv->zv_volsize, tx); 01907 dmu_tx_commit(tx); 01908 01909 if (error) { 01910 (void) zvol_dump_fini(zv); 01911 return (error); 01912 } 01913 01914 txg_wait_synced(dmu_objset_pool(os), 0); 01915 return (0); 01916 } 01917 01918 static int 01919 zvol_dump_fini(zvol_state_t *zv) 01920 { 01921 dmu_tx_t *tx; 01922 objset_t *os = zv->zv_objset; 01923 nvlist_t *nv; 01924 int error = 0; 01925 uint64_t checksum, compress, refresrv, vbs, dedup; 01926 uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); 01927 01928 /* 01929 * Attempt to restore the zvol back to its pre-dumpified state. 01930 * This is a best-effort attempt as it's possible that not all 01931 * of these properties were initialized during the dumpify process 01932 * (i.e. error during zvol_dump_init). 01933 */ 01934 01935 tx = dmu_tx_create(os); 01936 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 01937 error = dmu_tx_assign(tx, TXG_WAIT); 01938 if (error) { 01939 dmu_tx_abort(tx); 01940 return (error); 01941 } 01942 (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); 01943 dmu_tx_commit(tx); 01944 01945 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 01946 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); 01947 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 01948 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); 01949 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 01950 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); 01951 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 01952 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs); 01953 01954 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 01955 (void) nvlist_add_uint64(nv, 01956 zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); 01957 (void) nvlist_add_uint64(nv, 01958 zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); 01959 (void) nvlist_add_uint64(nv, 01960 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); 01961 if (version >= SPA_VERSION_DEDUP && 01962 zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 01963 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) { 01964 (void) nvlist_add_uint64(nv, 01965 zfs_prop_to_name(ZFS_PROP_DEDUP), dedup); 01966 } 01967 (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 01968 nv, NULL); 01969 nvlist_free(nv); 01970 01971 zvol_free_extents(zv); 01972 zv->zv_flags &= ~ZVOL_DUMPIFIED; 01973 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); 01974 /* wait for dmu_free_long_range to actually free the blocks */ 01975 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 01976 tx = dmu_tx_create(os); 01977 dmu_tx_hold_bonus(tx, ZVOL_OBJ); 01978 error = dmu_tx_assign(tx, TXG_WAIT); 01979 if (error) { 01980 dmu_tx_abort(tx); 01981 return (error); 01982 } 01983 if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0) 01984 zv->zv_volblocksize = vbs; 01985 dmu_tx_commit(tx); 01986 01987 return (0); 01988 } 01989 #endif /* sun */ 01990 01991 static zvol_state_t * 01992 zvol_geom_create(const char *name) 01993 { 01994 struct g_provider *pp; 01995 struct g_geom *gp; 01996 zvol_state_t *zv; 01997 01998 gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name); 01999 gp->start = zvol_geom_start; 02000 gp->access = zvol_geom_access; 02001 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name); 02002 pp->sectorsize = DEV_BSIZE; 02003 02004 zv = kmem_zalloc(sizeof(*zv), KM_SLEEP); 02005 zv->zv_provider = pp; 02006 zv->zv_state = 0; 02007 bioq_init(&zv->zv_queue); 02008 mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF); 02009 02010 pp->private = zv; 02011 02012 return (zv); 02013 } 02014 02015 static void 02016 zvol_geom_run(zvol_state_t *zv) 02017 { 02018 struct g_provider *pp; 02019 02020 pp = zv->zv_provider; 02021 g_error_provider(pp, 0); 02022 02023 kproc_kthread_add(zvol_geom_worker, zv, &zfsproc, NULL, 0, 0, 02024 "zfskern", "zvol %s", pp->name + sizeof(ZVOL_DRIVER)); 02025 } 02026 02027 static void 02028 zvol_geom_destroy(zvol_state_t *zv) 02029 { 02030 struct g_provider *pp; 02031 02032 g_topology_assert(); 02033 02034 mtx_lock(&zv->zv_queue_mtx); 02035 zv->zv_state = 1; 02036 wakeup_one(&zv->zv_queue); 02037 while (zv->zv_state != 2) 02038 msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0); 02039 mtx_destroy(&zv->zv_queue_mtx); 02040 02041 pp = zv->zv_provider; 02042 zv->zv_provider = NULL; 02043 pp->private = NULL; 02044 g_wither_geom(pp->geom, ENXIO); 02045 02046 kmem_free(zv, sizeof(*zv)); 02047 } 02048 02049 static int 02050 zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace) 02051 { 02052 int count, error, flags; 02053 02054 g_topology_assert(); 02055 02056 /* 02057 * To make it easier we expect either open or close, but not both 02058 * at the same time. 02059 */ 02060 KASSERT((acr >= 0 && acw >= 0 && ace >= 0) || 02061 (acr <= 0 && acw <= 0 && ace <= 0), 02062 ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).", 02063 pp->name, acr, acw, ace)); 02064 02065 if (pp->private == NULL) { 02066 if (acr <= 0 && acw <= 0 && ace <= 0) 02067 return (0); 02068 return (pp->error); 02069 } 02070 02071 /* 02072 * We don't pass FEXCL flag to zvol_open()/zvol_close() if ace != 0, 02073 * because GEOM already handles that and handles it a bit differently. 02074 * GEOM allows for multiple read/exclusive consumers and ZFS allows 02075 * only one exclusive consumer, no matter if it is reader or writer. 02076 * I like better the way GEOM works so I'll leave it for GEOM to 02077 * decide what to do. 02078 */ 02079 02080 count = acr + acw + ace; 02081 if (count == 0) 02082 return (0); 02083 02084 flags = 0; 02085 if (acr != 0 || ace != 0) 02086 flags |= FREAD; 02087 if (acw != 0) 02088 flags |= FWRITE; 02089 02090 g_topology_unlock(); 02091 if (count > 0) 02092 error = zvol_open(pp, flags, count); 02093 else 02094 error = zvol_close(pp, flags, -count); 02095 g_topology_lock(); 02096 return (error); 02097 } 02098 02099 static void 02100 zvol_geom_start(struct bio *bp) 02101 { 02102 zvol_state_t *zv; 02103 boolean_t first; 02104 02105 switch (bp->bio_cmd) { 02106 case BIO_READ: 02107 case BIO_WRITE: 02108 case BIO_FLUSH: 02109 zv = bp->bio_to->private; 02110 ASSERT(zv != NULL); 02111 mtx_lock(&zv->zv_queue_mtx); 02112 first = (bioq_first(&zv->zv_queue) == NULL); 02113 bioq_insert_tail(&zv->zv_queue, bp); 02114 mtx_unlock(&zv->zv_queue_mtx); 02115 if (first) 02116 wakeup_one(&zv->zv_queue); 02117 break; 02118 case BIO_GETATTR: 02119 case BIO_DELETE: 02120 default: 02121 g_io_deliver(bp, EOPNOTSUPP); 02122 break; 02123 } 02124 } 02125 02126 static void 02127 zvol_geom_worker(void *arg) 02128 { 02129 zvol_state_t *zv; 02130 struct bio *bp; 02131 02132 thread_lock(curthread); 02133 sched_prio(curthread, PRIBIO); 02134 thread_unlock(curthread); 02135 02136 zv = arg; 02137 for (;;) { 02138 mtx_lock(&zv->zv_queue_mtx); 02139 bp = bioq_takefirst(&zv->zv_queue); 02140 if (bp == NULL) { 02141 if (zv->zv_state == 1) { 02142 zv->zv_state = 2; 02143 wakeup(&zv->zv_state); 02144 mtx_unlock(&zv->zv_queue_mtx); 02145 kthread_exit(); 02146 } 02147 msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP, 02148 "zvol:io", 0); 02149 continue; 02150 } 02151 mtx_unlock(&zv->zv_queue_mtx); 02152 switch (bp->bio_cmd) { 02153 case BIO_FLUSH: 02154 zil_commit(zv->zv_zilog, ZVOL_OBJ); 02155 g_io_deliver(bp, 0); 02156 break; 02157 case BIO_READ: 02158 case BIO_WRITE: 02159 zvol_strategy(bp); 02160 break; 02161 } 02162 } 02163 } 02164 02165 extern boolean_t dataset_name_hidden(const char *name); 02166 02167 static int 02168 zvol_create_snapshots(objset_t *os, const char *name) 02169 { 02170 uint64_t cookie, obj; 02171 char *sname; 02172 int error, len; 02173 02174 cookie = obj = 0; 02175 sname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 02176 02177 (void) dmu_objset_find(name, dmu_objset_prefetch, NULL, 02178 DS_FIND_SNAPSHOTS); 02179 02180 for (;;) { 02181 len = snprintf(sname, MAXPATHLEN, "%s@", name); 02182 if (len >= MAXPATHLEN) { 02183 dmu_objset_rele(os, FTAG); 02184 error = ENAMETOOLONG; 02185 break; 02186 } 02187 02188 error = dmu_snapshot_list_next(os, MAXPATHLEN - len, 02189 sname + len, &obj, &cookie, NULL); 02190 if (error != 0) { 02191 if (error == ENOENT) 02192 error = 0; 02193 break; 02194 } 02195 02196 if ((error = zvol_create_minor(sname)) != 0) { 02197 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 02198 sname, error); 02199 break; 02200 } 02201 } 02202 02203 kmem_free(sname, MAXPATHLEN); 02204 return (error); 02205 } 02206 02207 int 02208 zvol_create_minors(const char *name) 02209 { 02210 uint64_t cookie; 02211 objset_t *os; 02212 char *osname, *p; 02213 int error, len; 02214 02215 if (dataset_name_hidden(name)) 02216 return (0); 02217 02218 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 02219 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 02220 name, error); 02221 return (error); 02222 } 02223 if (dmu_objset_type(os) == DMU_OST_ZVOL) { 02224 if ((error = zvol_create_minor(name)) == 0) 02225 error = zvol_create_snapshots(os, name); 02226 else { 02227 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 02228 name, error); 02229 } 02230 dmu_objset_rele(os, FTAG); 02231 return (error); 02232 } 02233 if (dmu_objset_type(os) != DMU_OST_ZFS) { 02234 dmu_objset_rele(os, FTAG); 02235 return (0); 02236 } 02237 02238 osname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 02239 if (snprintf(osname, MAXPATHLEN, "%s/", name) >= MAXPATHLEN) { 02240 dmu_objset_rele(os, FTAG); 02241 kmem_free(osname, MAXPATHLEN); 02242 return (ENOENT); 02243 } 02244 p = osname + strlen(osname); 02245 len = MAXPATHLEN - (p - osname); 02246 02247 /* Prefetch the datasets. */ 02248 cookie = 0; 02249 while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { 02250 if (!dataset_name_hidden(osname)) 02251 (void) dmu_objset_prefetch(osname, NULL); 02252 } 02253 02254 cookie = 0; 02255 while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL, 02256 &cookie) == 0) { 02257 dmu_objset_rele(os, FTAG); 02258 (void)zvol_create_minors(osname); 02259 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 02260 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 02261 name, error); 02262 return (error); 02263 } 02264 } 02265 02266 dmu_objset_rele(os, FTAG); 02267 kmem_free(osname, MAXPATHLEN); 02268 return (0); 02269 } 02270 02271 static void 02272 zvol_rename_minor(struct g_geom *gp, const char *newname) 02273 { 02274 struct g_provider *pp; 02275 zvol_state_t *zv; 02276 02277 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 02278 g_topology_assert(); 02279 02280 pp = LIST_FIRST(&gp->provider); 02281 ASSERT(pp != NULL); 02282 zv = pp->private; 02283 ASSERT(zv != NULL); 02284 02285 zv->zv_provider = NULL; 02286 g_wither_provider(pp, ENXIO); 02287 02288 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname); 02289 pp->sectorsize = DEV_BSIZE; 02290 pp->mediasize = zv->zv_volsize; 02291 pp->private = zv; 02292 zv->zv_provider = pp; 02293 strlcpy(zv->zv_name, newname, sizeof(zv->zv_name)); 02294 g_error_provider(pp, 0); 02295 } 02296 02297 void 02298 zvol_rename_minors(const char *oldname, const char *newname) 02299 { 02300 char name[MAXPATHLEN]; 02301 struct g_provider *pp; 02302 struct g_geom *gp; 02303 size_t oldnamelen, newnamelen; 02304 zvol_state_t *zv; 02305 char *namebuf; 02306 02307 oldnamelen = strlen(oldname); 02308 newnamelen = strlen(newname); 02309 02310 DROP_GIANT(); 02311 mutex_enter(&spa_namespace_lock); 02312 g_topology_lock(); 02313 02314 LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) { 02315 pp = LIST_FIRST(&gp->provider); 02316 if (pp == NULL) 02317 continue; 02318 zv = pp->private; 02319 if (zv == NULL) 02320 continue; 02321 if (strcmp(zv->zv_name, oldname) == 0) { 02322 zvol_rename_minor(gp, newname); 02323 } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 && 02324 (zv->zv_name[oldnamelen] == '/' || 02325 zv->zv_name[oldnamelen] == '@')) { 02326 snprintf(name, sizeof(name), "%s%c%s", newname, 02327 zv->zv_name[oldnamelen], 02328 zv->zv_name + oldnamelen + 1); 02329 zvol_rename_minor(gp, name); 02330 } 02331 } 02332 02333 g_topology_unlock(); 02334 mutex_exit(&spa_namespace_lock); 02335 PICKUP_GIANT(); 02336 }