FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 00023 * Copyright (c) 2012 by Delphix. All rights reserved. 00024 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 00025 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 00026 * All rights reserved. 00027 * Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org> 00028 */ 00029 00030 #include <sys/dmu_objset.h> 00031 #include <sys/dsl_dataset.h> 00032 #include <sys/dsl_dir.h> 00033 #include <sys/dsl_prop.h> 00034 #include <sys/dsl_synctask.h> 00035 #include <sys/dmu_traverse.h> 00036 #include <sys/dmu_impl.h> 00037 #include <sys/dmu_tx.h> 00038 #include <sys/arc.h> 00039 #include <sys/zio.h> 00040 #include <sys/zap.h> 00041 #include <sys/zfeature.h> 00042 #include <sys/unique.h> 00043 #include <sys/zfs_context.h> 00044 #include <sys/zfs_ioctl.h> 00045 #include <sys/spa.h> 00046 #include <sys/zfs_znode.h> 00047 #include <sys/zfs_onexit.h> 00048 #include <sys/zvol.h> 00049 #include <sys/dsl_scan.h> 00050 #include <sys/dsl_deadlist.h> 00051 00052 static char *dsl_reaper = "the grim reaper"; 00053 00054 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 00055 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 00056 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 00057 00058 #define SWITCH64(x, y) \ 00059 { \ 00060 uint64_t __tmp = (x); \ 00061 (x) = (y); \ 00062 (y) = __tmp; \ 00063 } 00064 00065 #define DS_REF_MAX (1ULL << 62) 00066 00067 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 00068 00069 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 00070 00071 00077 static int64_t 00078 parent_delta(dsl_dataset_t *ds, int64_t delta) 00079 { 00080 uint64_t old_bytes, new_bytes; 00081 00082 if (ds->ds_reserved == 0) 00083 return (delta); 00084 00085 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 00086 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 00087 00088 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 00089 return (new_bytes - old_bytes); 00090 } 00091 00092 void 00093 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 00094 { 00095 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 00096 int compressed = BP_GET_PSIZE(bp); 00097 int uncompressed = BP_GET_UCSIZE(bp); 00098 int64_t delta; 00099 00100 dprintf_bp(bp, "ds=%p", ds); 00101 00102 ASSERT(dmu_tx_is_syncing(tx)); 00103 /* It could have been compressed away to nothing */ 00104 if (BP_IS_HOLE(bp)) 00105 return; 00106 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 00107 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 00108 if (ds == NULL) { 00109 dsl_pool_mos_diduse_space(tx->tx_pool, 00110 used, compressed, uncompressed); 00111 return; 00112 } 00113 dmu_buf_will_dirty(ds->ds_dbuf, tx); 00114 00115 mutex_enter(&ds->ds_dir->dd_lock); 00116 mutex_enter(&ds->ds_lock); 00117 delta = parent_delta(ds, used); 00118 ds->ds_phys->ds_referenced_bytes += used; 00119 ds->ds_phys->ds_compressed_bytes += compressed; 00120 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 00121 ds->ds_phys->ds_unique_bytes += used; 00122 mutex_exit(&ds->ds_lock); 00123 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 00124 compressed, uncompressed, tx); 00125 dsl_dir_transfer_space(ds->ds_dir, used - delta, 00126 DD_USED_REFRSRV, DD_USED_HEAD, tx); 00127 mutex_exit(&ds->ds_dir->dd_lock); 00128 } 00129 00130 int 00131 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 00132 boolean_t async) 00133 { 00134 if (BP_IS_HOLE(bp)) 00135 return (0); 00136 00137 ASSERT(dmu_tx_is_syncing(tx)); 00138 ASSERT(bp->blk_birth <= tx->tx_txg); 00139 00140 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 00141 int compressed = BP_GET_PSIZE(bp); 00142 int uncompressed = BP_GET_UCSIZE(bp); 00143 00144 ASSERT(used > 0); 00145 if (ds == NULL) { 00146 dsl_free(tx->tx_pool, tx->tx_txg, bp); 00147 dsl_pool_mos_diduse_space(tx->tx_pool, 00148 -used, -compressed, -uncompressed); 00149 return (used); 00150 } 00151 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 00152 00153 ASSERT(!dsl_dataset_is_snapshot(ds)); 00154 dmu_buf_will_dirty(ds->ds_dbuf, tx); 00155 00156 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 00157 int64_t delta; 00158 00159 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 00160 dsl_free(tx->tx_pool, tx->tx_txg, bp); 00161 00162 mutex_enter(&ds->ds_dir->dd_lock); 00163 mutex_enter(&ds->ds_lock); 00164 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 00165 !DS_UNIQUE_IS_ACCURATE(ds)); 00166 delta = parent_delta(ds, -used); 00167 ds->ds_phys->ds_unique_bytes -= used; 00168 mutex_exit(&ds->ds_lock); 00169 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 00170 delta, -compressed, -uncompressed, tx); 00171 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 00172 DD_USED_REFRSRV, DD_USED_HEAD, tx); 00173 mutex_exit(&ds->ds_dir->dd_lock); 00174 } else { 00175 dprintf_bp(bp, "putting on dead list: %s", ""); 00176 if (async) { 00177 /* 00178 * We are here as part of zio's write done callback, 00179 * which means we're a zio interrupt thread. We can't 00180 * call dsl_deadlist_insert() now because it may block 00181 * waiting for I/O. Instead, put bp on the deferred 00182 * queue and let dsl_pool_sync() finish the job. 00183 */ 00184 bplist_append(&ds->ds_pending_deadlist, bp); 00185 } else { 00186 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 00187 } 00188 ASSERT3U(ds->ds_prev->ds_object, ==, 00189 ds->ds_phys->ds_prev_snap_obj); 00190 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 00191 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 00192 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 00193 ds->ds_object && bp->blk_birth > 00194 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 00195 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 00196 mutex_enter(&ds->ds_prev->ds_lock); 00197 ds->ds_prev->ds_phys->ds_unique_bytes += used; 00198 mutex_exit(&ds->ds_prev->ds_lock); 00199 } 00200 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 00201 dsl_dir_transfer_space(ds->ds_dir, used, 00202 DD_USED_HEAD, DD_USED_SNAP, tx); 00203 } 00204 } 00205 mutex_enter(&ds->ds_lock); 00206 ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); 00207 ds->ds_phys->ds_referenced_bytes -= used; 00208 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 00209 ds->ds_phys->ds_compressed_bytes -= compressed; 00210 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 00211 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 00212 mutex_exit(&ds->ds_lock); 00213 00214 return (used); 00215 } 00216 00217 uint64_t 00218 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 00219 { 00220 uint64_t trysnap = 0; 00221 00222 if (ds == NULL) 00223 return (0); 00224 /* 00225 * The snapshot creation could fail, but that would cause an 00226 * incorrect FALSE return, which would only result in an 00227 * overestimation of the amount of space that an operation would 00228 * consume, which is OK. 00229 * 00230 * There's also a small window where we could miss a pending 00231 * snapshot, because we could set the sync task in the quiescing 00232 * phase. So this should only be used as a guess. 00233 */ 00234 if (ds->ds_trysnap_txg > 00235 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 00236 trysnap = ds->ds_trysnap_txg; 00237 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 00238 } 00239 00240 boolean_t 00241 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 00242 uint64_t blk_birth) 00243 { 00244 if (blk_birth <= dsl_dataset_prev_snap_txg(ds)) 00245 return (B_FALSE); 00246 00247 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 00248 00249 return (B_TRUE); 00250 } 00251 00252 /* ARGSUSED */ 00253 static void 00254 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 00255 { 00256 dsl_dataset_t *ds = dsv; 00257 00258 ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 00259 00260 unique_remove(ds->ds_fsid_guid); 00261 00262 if (ds->ds_objset != NULL) 00263 dmu_objset_evict(ds->ds_objset); 00264 00265 if (ds->ds_prev) { 00266 dsl_dataset_drop_ref(ds->ds_prev, ds); 00267 ds->ds_prev = NULL; 00268 } 00269 00270 bplist_destroy(&ds->ds_pending_deadlist); 00271 if (db != NULL) { 00272 dsl_deadlist_close(&ds->ds_deadlist); 00273 } else { 00274 ASSERT(ds->ds_deadlist.dl_dbuf == NULL); 00275 ASSERT(!ds->ds_deadlist.dl_oldfmt); 00276 } 00277 if (ds->ds_dir) 00278 dsl_dir_close(ds->ds_dir, ds); 00279 00280 ASSERT(!list_link_active(&ds->ds_synced_link)); 00281 00282 if (mutex_owned(&ds->ds_lock)) 00283 mutex_exit(&ds->ds_lock); 00284 mutex_destroy(&ds->ds_lock); 00285 mutex_destroy(&ds->ds_recvlock); 00286 if (mutex_owned(&ds->ds_opening_lock)) 00287 mutex_exit(&ds->ds_opening_lock); 00288 mutex_destroy(&ds->ds_opening_lock); 00289 rw_destroy(&ds->ds_rwlock); 00290 cv_destroy(&ds->ds_exclusive_cv); 00291 00292 kmem_free(ds, sizeof (dsl_dataset_t)); 00293 } 00294 00295 static int 00296 dsl_dataset_get_snapname(dsl_dataset_t *ds) 00297 { 00298 dsl_dataset_phys_t *headphys; 00299 int err; 00300 dmu_buf_t *headdbuf; 00301 dsl_pool_t *dp = ds->ds_dir->dd_pool; 00302 objset_t *mos = dp->dp_meta_objset; 00303 00304 if (ds->ds_snapname[0]) 00305 return (0); 00306 if (ds->ds_phys->ds_next_snap_obj == 0) 00307 return (0); 00308 00309 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 00310 FTAG, &headdbuf); 00311 if (err) 00312 return (err); 00313 headphys = headdbuf->db_data; 00314 err = zap_value_search(dp->dp_meta_objset, 00315 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 00316 dmu_buf_rele(headdbuf, FTAG); 00317 return (err); 00318 } 00319 00320 static int 00321 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 00322 { 00323 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 00324 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 00325 matchtype_t mt; 00326 int err; 00327 00328 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 00329 mt = MT_FIRST; 00330 else 00331 mt = MT_EXACT; 00332 00333 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 00334 value, mt, NULL, 0, NULL); 00335 if (err == ENOTSUP && mt == MT_FIRST) 00336 err = zap_lookup(mos, snapobj, name, 8, 1, value); 00337 return (err); 00338 } 00339 00340 static int 00341 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 00342 { 00343 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 00344 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 00345 matchtype_t mt; 00346 int err; 00347 00348 dsl_dir_snap_cmtime_update(ds->ds_dir); 00349 00350 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 00351 mt = MT_FIRST; 00352 else 00353 mt = MT_EXACT; 00354 00355 err = zap_remove_norm(mos, snapobj, name, mt, tx); 00356 if (err == ENOTSUP && mt == MT_FIRST) 00357 err = zap_remove(mos, snapobj, name, tx); 00358 return (err); 00359 } 00360 00361 static int 00362 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 00363 dsl_dataset_t **dsp) 00364 { 00365 objset_t *mos = dp->dp_meta_objset; 00366 dmu_buf_t *dbuf; 00367 dsl_dataset_t *ds; 00368 int err; 00369 dmu_object_info_t doi; 00370 00371 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 00372 dsl_pool_sync_context(dp)); 00373 00374 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 00375 if (err) 00376 return (err); 00377 00378 /* Make sure dsobj has the correct object type. */ 00379 dmu_object_info_from_db(dbuf, &doi); 00380 if (doi.doi_type != DMU_OT_DSL_DATASET) 00381 return (EINVAL); 00382 00383 ds = dmu_buf_get_user(dbuf); 00384 if (ds == NULL) { 00385 dsl_dataset_t *winner; 00386 00387 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 00388 ds->ds_dbuf = dbuf; 00389 ds->ds_object = dsobj; 00390 ds->ds_phys = dbuf->db_data; 00391 00392 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 00393 mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); 00394 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 00395 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 00396 00397 rw_init(&ds->ds_rwlock, 0, 0, 0); 00398 cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 00399 00400 bplist_create(&ds->ds_pending_deadlist); 00401 dsl_deadlist_open(&ds->ds_deadlist, 00402 mos, ds->ds_phys->ds_deadlist_obj); 00403 00404 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 00405 offsetof(dmu_sendarg_t, dsa_link)); 00406 00407 if (err == 0) { 00408 err = dsl_dir_open_obj(dp, 00409 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 00410 } 00411 if (err) { 00412 mutex_destroy(&ds->ds_lock); 00413 mutex_destroy(&ds->ds_recvlock); 00414 mutex_destroy(&ds->ds_opening_lock); 00415 rw_destroy(&ds->ds_rwlock); 00416 cv_destroy(&ds->ds_exclusive_cv); 00417 bplist_destroy(&ds->ds_pending_deadlist); 00418 dsl_deadlist_close(&ds->ds_deadlist); 00419 kmem_free(ds, sizeof (dsl_dataset_t)); 00420 dmu_buf_rele(dbuf, tag); 00421 return (err); 00422 } 00423 00424 if (!dsl_dataset_is_snapshot(ds)) { 00425 ds->ds_snapname[0] = '\0'; 00426 if (ds->ds_phys->ds_prev_snap_obj) { 00427 err = dsl_dataset_get_ref(dp, 00428 ds->ds_phys->ds_prev_snap_obj, 00429 ds, &ds->ds_prev); 00430 } 00431 } else { 00432 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 00433 err = dsl_dataset_get_snapname(ds); 00434 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 00435 err = zap_count( 00436 ds->ds_dir->dd_pool->dp_meta_objset, 00437 ds->ds_phys->ds_userrefs_obj, 00438 &ds->ds_userrefs); 00439 } 00440 } 00441 00442 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 00443 /* 00444 * In sync context, we're called with either no lock 00445 * or with the write lock. If we're not syncing, 00446 * we're always called with the read lock held. 00447 */ 00448 boolean_t need_lock = 00449 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 00450 dsl_pool_sync_context(dp); 00451 00452 if (need_lock) 00453 rw_enter(&dp->dp_config_rwlock, RW_READER); 00454 00455 err = dsl_prop_get_ds(ds, 00456 "refreservation", sizeof (uint64_t), 1, 00457 &ds->ds_reserved, NULL); 00458 if (err == 0) { 00459 err = dsl_prop_get_ds(ds, 00460 "refquota", sizeof (uint64_t), 1, 00461 &ds->ds_quota, NULL); 00462 } 00463 00464 if (need_lock) 00465 rw_exit(&dp->dp_config_rwlock); 00466 } else { 00467 ds->ds_reserved = ds->ds_quota = 0; 00468 } 00469 00470 if (err == 0) { 00471 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 00472 dsl_dataset_evict); 00473 } 00474 if (err || winner) { 00475 bplist_destroy(&ds->ds_pending_deadlist); 00476 dsl_deadlist_close(&ds->ds_deadlist); 00477 if (ds->ds_prev) 00478 dsl_dataset_drop_ref(ds->ds_prev, ds); 00479 dsl_dir_close(ds->ds_dir, ds); 00480 mutex_destroy(&ds->ds_lock); 00481 mutex_destroy(&ds->ds_recvlock); 00482 mutex_destroy(&ds->ds_opening_lock); 00483 rw_destroy(&ds->ds_rwlock); 00484 cv_destroy(&ds->ds_exclusive_cv); 00485 kmem_free(ds, sizeof (dsl_dataset_t)); 00486 if (err) { 00487 dmu_buf_rele(dbuf, tag); 00488 return (err); 00489 } 00490 ds = winner; 00491 } else { 00492 ds->ds_fsid_guid = 00493 unique_insert(ds->ds_phys->ds_fsid_guid); 00494 } 00495 } 00496 ASSERT3P(ds->ds_dbuf, ==, dbuf); 00497 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 00498 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 00499 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 00500 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 00501 mutex_enter(&ds->ds_lock); 00502 if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 00503 mutex_exit(&ds->ds_lock); 00504 dmu_buf_rele(ds->ds_dbuf, tag); 00505 return (ENOENT); 00506 } 00507 mutex_exit(&ds->ds_lock); 00508 *dsp = ds; 00509 return (0); 00510 } 00511 00512 static int 00513 dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 00514 { 00515 dsl_pool_t *dp = ds->ds_dir->dd_pool; 00516 00517 /* 00518 * In syncing context we don't want the rwlock lock: there 00519 * may be an existing writer waiting for sync phase to 00520 * finish. We don't need to worry about such writers, since 00521 * sync phase is single-threaded, so the writer can't be 00522 * doing anything while we are active. 00523 */ 00524 if (dsl_pool_sync_context(dp)) { 00525 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 00526 return (0); 00527 } 00528 00529 /* 00530 * Normal users will hold the ds_rwlock as a READER until they 00531 * are finished (i.e., call dsl_dataset_rele()). "Owners" will 00532 * drop their READER lock after they set the ds_owner field. 00533 * 00534 * If the dataset is being destroyed, the destroy thread will 00535 * obtain a WRITER lock for exclusive access after it's done its 00536 * open-context work and then change the ds_owner to 00537 * dsl_reaper once destruction is assured. So threads 00538 * may block here temporarily, until the "destructability" of 00539 * the dataset is determined. 00540 */ 00541 ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 00542 mutex_enter(&ds->ds_lock); 00543 while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 00544 rw_exit(&dp->dp_config_rwlock); 00545 cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 00546 if (DSL_DATASET_IS_DESTROYED(ds)) { 00547 mutex_exit(&ds->ds_lock); 00548 dsl_dataset_drop_ref(ds, tag); 00549 rw_enter(&dp->dp_config_rwlock, RW_READER); 00550 return (ENOENT); 00551 } 00552 /* 00553 * The dp_config_rwlock lives above the ds_lock. And 00554 * we need to check DSL_DATASET_IS_DESTROYED() while 00555 * holding the ds_lock, so we have to drop and reacquire 00556 * the ds_lock here. 00557 */ 00558 mutex_exit(&ds->ds_lock); 00559 rw_enter(&dp->dp_config_rwlock, RW_READER); 00560 mutex_enter(&ds->ds_lock); 00561 } 00562 mutex_exit(&ds->ds_lock); 00563 return (0); 00564 } 00565 00566 int 00567 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 00568 dsl_dataset_t **dsp) 00569 { 00570 int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 00571 00572 if (err) 00573 return (err); 00574 return (dsl_dataset_hold_ref(*dsp, tag)); 00575 } 00576 00577 int 00578 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, 00579 void *tag, dsl_dataset_t **dsp) 00580 { 00581 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 00582 if (err) 00583 return (err); 00584 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 00585 dsl_dataset_rele(*dsp, tag); 00586 *dsp = NULL; 00587 return (EBUSY); 00588 } 00589 return (0); 00590 } 00591 00592 int 00593 dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 00594 { 00595 dsl_dir_t *dd; 00596 dsl_pool_t *dp; 00597 const char *snapname; 00598 uint64_t obj; 00599 int err = 0; 00600 00601 err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 00602 if (err) 00603 return (err); 00604 00605 dp = dd->dd_pool; 00606 obj = dd->dd_phys->dd_head_dataset_obj; 00607 rw_enter(&dp->dp_config_rwlock, RW_READER); 00608 if (obj) 00609 err = dsl_dataset_get_ref(dp, obj, tag, dsp); 00610 else 00611 err = ENOENT; 00612 if (err) 00613 goto out; 00614 00615 err = dsl_dataset_hold_ref(*dsp, tag); 00616 00617 /* we may be looking for a snapshot */ 00618 if (err == 0 && snapname != NULL) { 00619 dsl_dataset_t *ds = NULL; 00620 00621 if (*snapname++ != '@') { 00622 dsl_dataset_rele(*dsp, tag); 00623 err = ENOENT; 00624 goto out; 00625 } 00626 00627 dprintf("looking for snapshot '%s'\n", snapname); 00628 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 00629 if (err == 0) 00630 err = dsl_dataset_get_ref(dp, obj, tag, &ds); 00631 dsl_dataset_rele(*dsp, tag); 00632 00633 ASSERT3U((err == 0), ==, (ds != NULL)); 00634 00635 if (ds) { 00636 mutex_enter(&ds->ds_lock); 00637 if (ds->ds_snapname[0] == 0) 00638 (void) strlcpy(ds->ds_snapname, snapname, 00639 sizeof (ds->ds_snapname)); 00640 mutex_exit(&ds->ds_lock); 00641 err = dsl_dataset_hold_ref(ds, tag); 00642 *dsp = err ? NULL : ds; 00643 } 00644 } 00645 out: 00646 rw_exit(&dp->dp_config_rwlock); 00647 dsl_dir_close(dd, FTAG); 00648 return (err); 00649 } 00650 00651 int 00652 dsl_dataset_own(const char *name, boolean_t inconsistentok, 00653 void *tag, dsl_dataset_t **dsp) 00654 { 00655 int err = dsl_dataset_hold(name, tag, dsp); 00656 if (err) 00657 return (err); 00658 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 00659 dsl_dataset_rele(*dsp, tag); 00660 return (EBUSY); 00661 } 00662 return (0); 00663 } 00664 00665 void 00666 dsl_dataset_name(dsl_dataset_t *ds, char *name) 00667 { 00668 if (ds == NULL) { 00669 (void) strcpy(name, "mos"); 00670 } else { 00671 dsl_dir_name(ds->ds_dir, name); 00672 VERIFY(0 == dsl_dataset_get_snapname(ds)); 00673 if (ds->ds_snapname[0]) { 00674 (void) strcat(name, "@"); 00675 /* 00676 * We use a "recursive" mutex so that we 00677 * can call dprintf_ds() with ds_lock held. 00678 */ 00679 if (!MUTEX_HELD(&ds->ds_lock)) { 00680 mutex_enter(&ds->ds_lock); 00681 (void) strcat(name, ds->ds_snapname); 00682 mutex_exit(&ds->ds_lock); 00683 } else { 00684 (void) strcat(name, ds->ds_snapname); 00685 } 00686 } 00687 } 00688 } 00689 00690 static int 00691 dsl_dataset_namelen(dsl_dataset_t *ds) 00692 { 00693 int result; 00694 00695 if (ds == NULL) { 00696 result = 3; /* "mos" */ 00697 } else { 00698 result = dsl_dir_namelen(ds->ds_dir); 00699 VERIFY(0 == dsl_dataset_get_snapname(ds)); 00700 if (ds->ds_snapname[0]) { 00701 ++result; /* adding one for the @-sign */ 00702 if (!MUTEX_HELD(&ds->ds_lock)) { 00703 mutex_enter(&ds->ds_lock); 00704 result += strlen(ds->ds_snapname); 00705 mutex_exit(&ds->ds_lock); 00706 } else { 00707 result += strlen(ds->ds_snapname); 00708 } 00709 } 00710 } 00711 00712 return (result); 00713 } 00714 00715 void 00716 dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 00717 { 00718 dmu_buf_rele(ds->ds_dbuf, tag); 00719 } 00720 00721 void 00722 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 00723 { 00724 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 00725 rw_exit(&ds->ds_rwlock); 00726 } 00727 dsl_dataset_drop_ref(ds, tag); 00728 } 00729 00730 void 00731 dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 00732 { 00733 ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || 00734 (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 00735 00736 mutex_enter(&ds->ds_lock); 00737 ds->ds_owner = NULL; 00738 if (RW_WRITE_HELD(&ds->ds_rwlock)) { 00739 rw_exit(&ds->ds_rwlock); 00740 cv_broadcast(&ds->ds_exclusive_cv); 00741 } 00742 mutex_exit(&ds->ds_lock); 00743 if (ds->ds_dbuf) 00744 dsl_dataset_drop_ref(ds, tag); 00745 else 00746 dsl_dataset_evict(NULL, ds); 00747 } 00748 00749 boolean_t 00750 dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) 00751 { 00752 boolean_t gotit = FALSE; 00753 00754 mutex_enter(&ds->ds_lock); 00755 if (ds->ds_owner == NULL && 00756 (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 00757 ds->ds_owner = tag; 00758 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 00759 rw_exit(&ds->ds_rwlock); 00760 gotit = TRUE; 00761 } 00762 mutex_exit(&ds->ds_lock); 00763 return (gotit); 00764 } 00765 00766 void 00767 dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 00768 { 00769 ASSERT3P(owner, ==, ds->ds_owner); 00770 if (!RW_WRITE_HELD(&ds->ds_rwlock)) 00771 rw_enter(&ds->ds_rwlock, RW_WRITER); 00772 } 00773 00774 uint64_t 00775 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 00776 uint64_t flags, dmu_tx_t *tx) 00777 { 00778 dsl_pool_t *dp = dd->dd_pool; 00779 dmu_buf_t *dbuf; 00780 dsl_dataset_phys_t *dsphys; 00781 uint64_t dsobj; 00782 objset_t *mos = dp->dp_meta_objset; 00783 00784 if (origin == NULL) 00785 origin = dp->dp_origin_snap; 00786 00787 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 00788 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 00789 ASSERT(dmu_tx_is_syncing(tx)); 00790 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 00791 00792 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 00793 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 00794 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 00795 dmu_buf_will_dirty(dbuf, tx); 00796 dsphys = dbuf->db_data; 00797 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 00798 dsphys->ds_dir_obj = dd->dd_object; 00799 dsphys->ds_flags = flags; 00800 dsphys->ds_fsid_guid = unique_create(); 00801 do { 00802 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 00803 sizeof (dsphys->ds_guid)); 00804 } while (dsphys->ds_guid == 0); 00805 dsphys->ds_snapnames_zapobj = 00806 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 00807 DMU_OT_NONE, 0, tx); 00808 dsphys->ds_creation_time = gethrestime_sec(); 00809 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 00810 00811 if (origin == NULL) { 00812 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 00813 } else { 00814 dsl_dataset_t *ohds; 00815 00816 dsphys->ds_prev_snap_obj = origin->ds_object; 00817 dsphys->ds_prev_snap_txg = 00818 origin->ds_phys->ds_creation_txg; 00819 dsphys->ds_referenced_bytes = 00820 origin->ds_phys->ds_referenced_bytes; 00821 dsphys->ds_compressed_bytes = 00822 origin->ds_phys->ds_compressed_bytes; 00823 dsphys->ds_uncompressed_bytes = 00824 origin->ds_phys->ds_uncompressed_bytes; 00825 dsphys->ds_bp = origin->ds_phys->ds_bp; 00826 dsphys->ds_flags |= origin->ds_phys->ds_flags; 00827 00828 dmu_buf_will_dirty(origin->ds_dbuf, tx); 00829 origin->ds_phys->ds_num_children++; 00830 00831 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, 00832 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 00833 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 00834 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 00835 dsl_dataset_rele(ohds, FTAG); 00836 00837 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 00838 if (origin->ds_phys->ds_next_clones_obj == 0) { 00839 origin->ds_phys->ds_next_clones_obj = 00840 zap_create(mos, 00841 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 00842 } 00843 VERIFY(0 == zap_add_int(mos, 00844 origin->ds_phys->ds_next_clones_obj, 00845 dsobj, tx)); 00846 } 00847 00848 dmu_buf_will_dirty(dd->dd_dbuf, tx); 00849 dd->dd_phys->dd_origin_obj = origin->ds_object; 00850 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 00851 if (origin->ds_dir->dd_phys->dd_clones == 0) { 00852 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 00853 origin->ds_dir->dd_phys->dd_clones = 00854 zap_create(mos, 00855 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 00856 } 00857 VERIFY3U(0, ==, zap_add_int(mos, 00858 origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 00859 } 00860 } 00861 00862 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 00863 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 00864 00865 dmu_buf_rele(dbuf, FTAG); 00866 00867 dmu_buf_will_dirty(dd->dd_dbuf, tx); 00868 dd->dd_phys->dd_head_dataset_obj = dsobj; 00869 00870 return (dsobj); 00871 } 00872 00873 uint64_t 00874 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 00875 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 00876 { 00877 dsl_pool_t *dp = pdd->dd_pool; 00878 uint64_t dsobj, ddobj; 00879 dsl_dir_t *dd; 00880 00881 ASSERT(lastname[0] != '@'); 00882 00883 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 00884 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 00885 00886 dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 00887 00888 dsl_deleg_set_create_perms(dd, tx, cr); 00889 00890 dsl_dir_close(dd, FTAG); 00891 00892 /* 00893 * If we are creating a clone, make sure we zero out any stale 00894 * data from the origin snapshots zil header. 00895 */ 00896 if (origin != NULL) { 00897 dsl_dataset_t *ds; 00898 objset_t *os; 00899 00900 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 00901 VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); 00902 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 00903 dsl_dataset_dirty(ds, tx); 00904 dsl_dataset_rele(ds, FTAG); 00905 } 00906 00907 return (dsobj); 00908 } 00909 00910 #ifdef __FreeBSD__ 00911 /* FreeBSD ioctl compat begin */ 00912 struct destroyarg { 00913 nvlist_t *nvl; 00914 const char *snapname; 00915 }; 00916 00917 static int 00918 dsl_check_snap_cb(const char *name, void *arg) 00919 { 00920 struct destroyarg *da = arg; 00921 dsl_dataset_t *ds; 00922 char *dsname; 00923 00924 dsname = kmem_asprintf("%s@%s", name, da->snapname); 00925 VERIFY(nvlist_add_boolean(da->nvl, dsname) == 0); 00926 00927 return (0); 00928 } 00929 00930 int 00931 dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname, 00932 nvlist_t *snaps) 00933 { 00934 struct destroyarg *da; 00935 int err; 00936 00937 da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); 00938 da->nvl = snaps; 00939 da->snapname = snapname; 00940 err = dmu_objset_find(fsname, dsl_check_snap_cb, da, 00941 DS_FIND_CHILDREN); 00942 kmem_free(da, sizeof (struct destroyarg)); 00943 00944 return (err); 00945 } 00946 /* FreeBSD ioctl compat end */ 00947 #endif /* __FreeBSD__ */ 00948 00954 int 00955 dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) 00956 { 00957 int err; 00958 dsl_sync_task_t *dst; 00959 spa_t *spa; 00960 nvpair_t *pair; 00961 dsl_sync_task_group_t *dstg; 00962 00963 pair = nvlist_next_nvpair(snaps, NULL); 00964 if (pair == NULL) 00965 return (0); 00966 00967 err = spa_open(nvpair_name(pair), &spa, FTAG); 00968 if (err) 00969 return (err); 00970 dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 00971 00972 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 00973 pair = nvlist_next_nvpair(snaps, pair)) { 00974 dsl_dataset_t *ds; 00975 00976 err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds); 00977 if (err == 0) { 00978 struct dsl_ds_destroyarg *dsda; 00979 00980 dsl_dataset_make_exclusive(ds, dstg); 00981 dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), 00982 KM_SLEEP); 00983 dsda->ds = ds; 00984 dsda->defer = defer; 00985 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 00986 dsl_dataset_destroy_sync, dsda, dstg, 0); 00987 } else if (err == ENOENT) { 00988 err = 0; 00989 } else { 00990 (void) strcpy(failed, nvpair_name(pair)); 00991 break; 00992 } 00993 } 00994 00995 if (err == 0) 00996 err = dsl_sync_task_group_wait(dstg); 00997 00998 for (dst = list_head(&dstg->dstg_tasks); dst; 00999 dst = list_next(&dstg->dstg_tasks, dst)) { 01000 struct dsl_ds_destroyarg *dsda = dst->dst_arg1; 01001 dsl_dataset_t *ds = dsda->ds; 01002 01003 /* 01004 * Return the file system name that triggered the error 01005 */ 01006 if (dst->dst_err) { 01007 dsl_dataset_name(ds, failed); 01008 } 01009 ASSERT3P(dsda->rm_origin, ==, NULL); 01010 dsl_dataset_disown(ds, dstg); 01011 kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); 01012 } 01013 01014 dsl_sync_task_group_destroy(dstg); 01015 spa_close(spa, FTAG); 01016 return (err); 01017 01018 } 01019 01020 static boolean_t 01021 dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) 01022 { 01023 boolean_t might_destroy = B_FALSE; 01024 01025 mutex_enter(&ds->ds_lock); 01026 if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && 01027 DS_IS_DEFER_DESTROY(ds)) 01028 might_destroy = B_TRUE; 01029 mutex_exit(&ds->ds_lock); 01030 01031 return (might_destroy); 01032 } 01033 01042 static int 01043 dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) 01044 { 01045 dsl_dataset_t *ds = dsda->ds; 01046 dsl_dataset_t *origin = ds->ds_prev; 01047 01048 if (dsl_dataset_might_destroy_origin(origin)) { 01049 char *name; 01050 int namelen; 01051 int error; 01052 01053 namelen = dsl_dataset_namelen(origin) + 1; 01054 name = kmem_alloc(namelen, KM_SLEEP); 01055 dsl_dataset_name(origin, name); 01056 #ifdef _KERNEL 01057 error = zfs_unmount_snap(name, NULL); 01058 if (error) { 01059 kmem_free(name, namelen); 01060 return (error); 01061 } 01062 #endif 01063 error = dsl_dataset_own(name, B_TRUE, tag, &origin); 01064 kmem_free(name, namelen); 01065 if (error) 01066 return (error); 01067 dsda->rm_origin = origin; 01068 dsl_dataset_make_exclusive(origin, tag); 01069 } 01070 01071 return (0); 01072 } 01073 01078 int 01079 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) 01080 { 01081 int err; 01082 dsl_sync_task_group_t *dstg; 01083 objset_t *os; 01084 dsl_dir_t *dd; 01085 uint64_t obj; 01086 struct dsl_ds_destroyarg dsda = { 0 }; 01087 dsl_dataset_t dummy_ds = { 0 }; 01088 01089 dsda.ds = ds; 01090 01091 if (dsl_dataset_is_snapshot(ds)) { 01092 /* Destroying a snapshot is simpler */ 01093 dsl_dataset_make_exclusive(ds, tag); 01094 01095 dsda.defer = defer; 01096 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 01097 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 01098 &dsda, tag, 0); 01099 ASSERT3P(dsda.rm_origin, ==, NULL); 01100 goto out; 01101 } else if (defer) { 01102 err = EINVAL; 01103 goto out; 01104 } 01105 01106 dd = ds->ds_dir; 01107 dummy_ds.ds_dir = dd; 01108 dummy_ds.ds_object = ds->ds_object; 01109 01110 if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds), 01111 &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { 01112 /* 01113 * Check for errors and mark this ds as inconsistent, in 01114 * case we crash while freeing the objects. 01115 */ 01116 err = dsl_sync_task_do(dd->dd_pool, 01117 dsl_dataset_destroy_begin_check, 01118 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 01119 if (err) 01120 goto out; 01121 01122 err = dmu_objset_from_ds(ds, &os); 01123 if (err) 01124 goto out; 01125 01126 /* 01127 * Remove all objects while in the open context so that 01128 * there is less work to do in the syncing context. 01129 */ 01130 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 01131 ds->ds_phys->ds_prev_snap_txg)) { 01132 /* 01133 * Ignore errors, if there is not enough disk space 01134 * we will deal with it in dsl_dataset_destroy_sync(). 01135 */ 01136 (void) dmu_free_object(os, obj); 01137 } 01138 if (err != ESRCH) 01139 goto out; 01140 01141 /* 01142 * Sync out all in-flight IO. 01143 */ 01144 txg_wait_synced(dd->dd_pool, 0); 01145 01146 /* 01147 * If we managed to free all the objects in open 01148 * context, the user space accounting should be zero. 01149 */ 01150 if (ds->ds_phys->ds_bp.blk_fill == 0 && 01151 dmu_objset_userused_enabled(os)) { 01152 uint64_t count; 01153 01154 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, 01155 &count) != 0 || count == 0); 01156 ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, 01157 &count) != 0 || count == 0); 01158 } 01159 } 01160 01161 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 01162 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 01163 rw_exit(&dd->dd_pool->dp_config_rwlock); 01164 01165 if (err) 01166 goto out; 01167 01168 /* 01169 * Blow away the dsl_dir + head dataset. 01170 */ 01171 dsl_dataset_make_exclusive(ds, tag); 01172 /* 01173 * If we're removing a clone, we might also need to remove its 01174 * origin. 01175 */ 01176 do { 01177 dsda.need_prep = B_FALSE; 01178 if (dsl_dir_is_clone(dd)) { 01179 err = dsl_dataset_origin_rm_prep(&dsda, tag); 01180 if (err) { 01181 dsl_dir_close(dd, FTAG); 01182 goto out; 01183 } 01184 } 01185 01186 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 01187 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 01188 dsl_dataset_destroy_sync, &dsda, tag, 0); 01189 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 01190 dsl_dir_destroy_sync, &dummy_ds, FTAG, 0); 01191 err = dsl_sync_task_group_wait(dstg); 01192 dsl_sync_task_group_destroy(dstg); 01193 01194 /* 01195 * We could be racing against 'zfs release' or 'zfs destroy -d' 01196 * on the origin snap, in which case we can get EBUSY if we 01197 * needed to destroy the origin snap but were not ready to 01198 * do so. 01199 */ 01200 if (dsda.need_prep) { 01201 ASSERT(err == EBUSY); 01202 ASSERT(dsl_dir_is_clone(dd)); 01203 ASSERT(dsda.rm_origin == NULL); 01204 } 01205 } while (dsda.need_prep); 01206 01207 if (dsda.rm_origin != NULL) 01208 dsl_dataset_disown(dsda.rm_origin, tag); 01209 01210 /* if it is successful, dsl_dir_destroy_sync will close the dd */ 01211 if (err) 01212 dsl_dir_close(dd, FTAG); 01213 out: 01214 dsl_dataset_disown(ds, tag); 01215 return (err); 01216 } 01217 01218 blkptr_t * 01219 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 01220 { 01221 return (&ds->ds_phys->ds_bp); 01222 } 01223 01224 void 01225 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 01226 { 01227 ASSERT(dmu_tx_is_syncing(tx)); 01228 /* If it's the meta-objset, set dp_meta_rootbp */ 01229 if (ds == NULL) { 01230 tx->tx_pool->dp_meta_rootbp = *bp; 01231 } else { 01232 dmu_buf_will_dirty(ds->ds_dbuf, tx); 01233 ds->ds_phys->ds_bp = *bp; 01234 } 01235 } 01236 01237 spa_t * 01238 dsl_dataset_get_spa(dsl_dataset_t *ds) 01239 { 01240 return (ds->ds_dir->dd_pool->dp_spa); 01241 } 01242 01243 void 01244 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 01245 { 01246 dsl_pool_t *dp; 01247 01248 if (ds == NULL) /* this is the meta-objset */ 01249 return; 01250 01251 ASSERT(ds->ds_objset != NULL); 01252 01253 if (ds->ds_phys->ds_next_snap_obj != 0) 01254 panic("dirtying snapshot!"); 01255 01256 dp = ds->ds_dir->dd_pool; 01257 01258 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 01259 /* up the hold count until we can be written out */ 01260 dmu_buf_add_ref(ds->ds_dbuf, ds); 01261 } 01262 } 01263 01264 boolean_t 01265 dsl_dataset_is_dirty(dsl_dataset_t *ds) 01266 { 01267 for (int t = 0; t < TXG_SIZE; t++) { 01268 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 01269 ds, t)) 01270 return (B_TRUE); 01271 } 01272 return (B_FALSE); 01273 } 01274 01283 static void 01284 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 01285 { 01286 uint64_t mrs_used; 01287 uint64_t dlused, dlcomp, dluncomp; 01288 01289 ASSERT(!dsl_dataset_is_snapshot(ds)); 01290 01291 if (ds->ds_phys->ds_prev_snap_obj != 0) 01292 mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; 01293 else 01294 mrs_used = 0; 01295 01296 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 01297 01298 ASSERT3U(dlused, <=, mrs_used); 01299 ds->ds_phys->ds_unique_bytes = 01300 ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); 01301 01302 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 01303 SPA_VERSION_UNIQUE_ACCURATE) 01304 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 01305 } 01306 01307 struct killarg { 01308 dsl_dataset_t *ds; 01309 dmu_tx_t *tx; 01310 }; 01311 01312 /* ARGSUSED */ 01313 static int 01314 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, 01315 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 01316 { 01317 struct killarg *ka = arg; 01318 dmu_tx_t *tx = ka->tx; 01319 01320 if (bp == NULL) 01321 return (0); 01322 01323 if (zb->zb_level == ZB_ZIL_LEVEL) { 01324 ASSERT(zilog != NULL); 01325 /* 01326 * It's a block in the intent log. It has no 01327 * accounting, so just free it. 01328 */ 01329 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); 01330 } else { 01331 ASSERT(zilog == NULL); 01332 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 01333 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); 01334 } 01335 01336 return (0); 01337 } 01338 01339 /* ARGSUSED */ 01340 static int 01341 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 01342 { 01343 dsl_dataset_t *ds = arg1; 01344 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 01345 uint64_t count; 01346 int err; 01347 01348 /* 01349 * Can't delete a head dataset if there are snapshots of it. 01350 * (Except if the only snapshots are from the branch we cloned 01351 * from.) 01352 */ 01353 if (ds->ds_prev != NULL && 01354 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 01355 return (EBUSY); 01356 01357 /* 01358 * This is really a dsl_dir thing, but check it here so that 01359 * we'll be less likely to leave this dataset inconsistent & 01360 * nearly destroyed. 01361 */ 01362 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 01363 if (err) 01364 return (err); 01365 if (count != 0) 01366 return (EEXIST); 01367 01368 return (0); 01369 } 01370 01371 /* ARGSUSED */ 01372 static void 01373 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) 01374 { 01375 dsl_dataset_t *ds = arg1; 01376 dsl_pool_t *dp = ds->ds_dir->dd_pool; 01377 01378 /* Mark it as inconsistent on-disk, in case we crash */ 01379 dmu_buf_will_dirty(ds->ds_dbuf, tx); 01380 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 01381 01382 spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 01383 "dataset = %llu", ds->ds_object); 01384 } 01385 01386 static int 01387 dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, 01388 dmu_tx_t *tx) 01389 { 01390 dsl_dataset_t *ds = dsda->ds; 01391 dsl_dataset_t *ds_prev = ds->ds_prev; 01392 01393 if (dsl_dataset_might_destroy_origin(ds_prev)) { 01394 struct dsl_ds_destroyarg ndsda = {0}; 01395 01396 /* 01397 * If we're not prepared to remove the origin, don't remove 01398 * the clone either. 01399 */ 01400 if (dsda->rm_origin == NULL) { 01401 dsda->need_prep = B_TRUE; 01402 return (EBUSY); 01403 } 01404 01405 ndsda.ds = ds_prev; 01406 ndsda.is_origin_rm = B_TRUE; 01407 return (dsl_dataset_destroy_check(&ndsda, tag, tx)); 01408 } 01409 01410 /* 01411 * If we're not going to remove the origin after all, 01412 * undo the open context setup. 01413 */ 01414 if (dsda->rm_origin != NULL) { 01415 dsl_dataset_disown(dsda->rm_origin, tag); 01416 dsda->rm_origin = NULL; 01417 } 01418 01419 return (0); 01420 } 01421 01427 /* ARGSUSED */ 01428 int 01429 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 01430 { 01431 struct dsl_ds_destroyarg *dsda = arg1; 01432 dsl_dataset_t *ds = dsda->ds; 01433 01434 /* we have an owner hold, so noone else can destroy us */ 01435 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 01436 01437 /* 01438 * Only allow deferred destroy on pools that support it. 01439 * NOTE: deferred destroy is only supported on snapshots. 01440 */ 01441 if (dsda->defer) { 01442 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 01443 SPA_VERSION_USERREFS) 01444 return (ENOTSUP); 01445 ASSERT(dsl_dataset_is_snapshot(ds)); 01446 return (0); 01447 } 01448 01449 /* 01450 * Can't delete a head dataset if there are snapshots of it. 01451 * (Except if the only snapshots are from the branch we cloned 01452 * from.) 01453 */ 01454 if (ds->ds_prev != NULL && 01455 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 01456 return (EBUSY); 01457 01458 /* 01459 * If we made changes this txg, traverse_dsl_dataset won't find 01460 * them. Try again. 01461 */ 01462 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 01463 return (EAGAIN); 01464 01465 if (dsl_dataset_is_snapshot(ds)) { 01466 /* 01467 * If this snapshot has an elevated user reference count, 01468 * we can't destroy it yet. 01469 */ 01470 if (ds->ds_userrefs > 0 && !dsda->releasing) 01471 return (EBUSY); 01472 01473 mutex_enter(&ds->ds_lock); 01474 /* 01475 * Can't delete a branch point. However, if we're destroying 01476 * a clone and removing its origin due to it having a user 01477 * hold count of 0 and having been marked for deferred destroy, 01478 * it's OK for the origin to have a single clone. 01479 */ 01480 if (ds->ds_phys->ds_num_children > 01481 (dsda->is_origin_rm ? 2 : 1)) { 01482 mutex_exit(&ds->ds_lock); 01483 return (EEXIST); 01484 } 01485 mutex_exit(&ds->ds_lock); 01486 } else if (dsl_dir_is_clone(ds->ds_dir)) { 01487 return (dsl_dataset_origin_check(dsda, arg2, tx)); 01488 } 01489 01490 /* XXX we should do some i/o error checking... */ 01491 return (0); 01492 } 01493 01494 struct refsarg { 01495 kmutex_t lock; 01496 boolean_t gone; 01497 kcondvar_t cv; 01498 }; 01499 01500 /* ARGSUSED */ 01501 static void 01502 dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 01503 { 01504 struct refsarg *arg = argv; 01505 01506 mutex_enter(&arg->lock); 01507 arg->gone = TRUE; 01508 cv_signal(&arg->cv); 01509 mutex_exit(&arg->lock); 01510 } 01511 01512 static void 01513 dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 01514 { 01515 struct refsarg arg; 01516 01517 bzero(&arg, sizeof(arg)); 01518 mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 01519 cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 01520 arg.gone = FALSE; 01521 (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 01522 dsl_dataset_refs_gone); 01523 dmu_buf_rele(ds->ds_dbuf, tag); 01524 mutex_enter(&arg.lock); 01525 while (!arg.gone) 01526 cv_wait(&arg.cv, &arg.lock); 01527 ASSERT(arg.gone); 01528 mutex_exit(&arg.lock); 01529 ds->ds_dbuf = NULL; 01530 ds->ds_phys = NULL; 01531 mutex_destroy(&arg.lock); 01532 cv_destroy(&arg.cv); 01533 } 01534 01535 static void 01536 remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) 01537 { 01538 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 01539 uint64_t count; 01540 int err; 01541 01542 ASSERT(ds->ds_phys->ds_num_children >= 2); 01543 err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 01544 /* 01545 * The err should not be ENOENT, but a bug in a previous version 01546 * of the code could cause upgrade_clones_cb() to not set 01547 * ds_next_snap_obj when it should, leading to a missing entry. 01548 * If we knew that the pool was created after 01549 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 01550 * ENOENT. However, at least we can check that we don't have 01551 * too many entries in the next_clones_obj even after failing to 01552 * remove this one. 01553 */ 01554 if (err != ENOENT) { 01555 VERIFY0(err); 01556 } 01557 ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, 01558 &count)); 01559 ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 01560 } 01561 01562 static void 01563 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) 01564 { 01565 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 01566 zap_cursor_t zc; 01567 zap_attribute_t za; 01568 01569 /* 01570 * If it is the old version, dd_clones doesn't exist so we can't 01571 * find the clones, but deadlist_remove_key() is a no-op so it 01572 * doesn't matter. 01573 */ 01574 if (ds->ds_dir->dd_phys->dd_clones == 0) 01575 return; 01576 01577 for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); 01578 zap_cursor_retrieve(&zc, &za) == 0; 01579 zap_cursor_advance(&zc)) { 01580 dsl_dataset_t *clone; 01581 01582 VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 01583 za.za_first_integer, FTAG, &clone)); 01584 if (clone->ds_dir->dd_origin_txg > mintxg) { 01585 dsl_deadlist_remove_key(&clone->ds_deadlist, 01586 mintxg, tx); 01587 dsl_dataset_remove_clones_key(clone, mintxg, tx); 01588 } 01589 dsl_dataset_rele(clone, FTAG); 01590 } 01591 zap_cursor_fini(&zc); 01592 } 01593 01594 struct process_old_arg { 01595 dsl_dataset_t *ds; 01596 dsl_dataset_t *ds_prev; 01597 boolean_t after_branch_point; 01598 zio_t *pio; 01599 uint64_t used, comp, uncomp; 01600 }; 01601 01602 static int 01603 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 01604 { 01605 struct process_old_arg *poa = arg; 01606 dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; 01607 01608 if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { 01609 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); 01610 if (poa->ds_prev && !poa->after_branch_point && 01611 bp->blk_birth > 01612 poa->ds_prev->ds_phys->ds_prev_snap_txg) { 01613 poa->ds_prev->ds_phys->ds_unique_bytes += 01614 bp_get_dsize_sync(dp->dp_spa, bp); 01615 } 01616 } else { 01617 poa->used += bp_get_dsize_sync(dp->dp_spa, bp); 01618 poa->comp += BP_GET_PSIZE(bp); 01619 poa->uncomp += BP_GET_UCSIZE(bp); 01620 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); 01621 } 01622 return (0); 01623 } 01624 01625 static void 01626 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, 01627 dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) 01628 { 01629 struct process_old_arg poa = { 0 }; 01630 dsl_pool_t *dp = ds->ds_dir->dd_pool; 01631 objset_t *mos = dp->dp_meta_objset; 01632 01633 ASSERT(ds->ds_deadlist.dl_oldfmt); 01634 ASSERT(ds_next->ds_deadlist.dl_oldfmt); 01635 01636 poa.ds = ds; 01637 poa.ds_prev = ds_prev; 01638 poa.after_branch_point = after_branch_point; 01639 poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 01640 VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, 01641 process_old_cb, &poa, tx)); 01642 VERIFY0(zio_wait(poa.pio)); 01643 ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); 01644 01645 /* change snapused */ 01646 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 01647 -poa.used, -poa.comp, -poa.uncomp, tx); 01648 01649 /* swap next's deadlist to our deadlist */ 01650 dsl_deadlist_close(&ds->ds_deadlist); 01651 dsl_deadlist_close(&ds_next->ds_deadlist); 01652 SWITCH64(ds_next->ds_phys->ds_deadlist_obj, 01653 ds->ds_phys->ds_deadlist_obj); 01654 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 01655 dsl_deadlist_open(&ds_next->ds_deadlist, mos, 01656 ds_next->ds_phys->ds_deadlist_obj); 01657 } 01658 01659 static int 01660 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) 01661 { 01662 int err; 01663 struct killarg ka; 01664 01665 /* 01666 * Free everything that we point to (that's born after 01667 * the previous snapshot, if we are a clone) 01668 * 01669 * NB: this should be very quick, because we already 01670 * freed all the objects in open context. 01671 */ 01672 ka.ds = ds; 01673 ka.tx = tx; 01674 err = traverse_dataset(ds, 01675 ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST, 01676 kill_blkptr, &ka); 01677 ASSERT0(err); 01678 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); 01679 01680 return (err); 01681 } 01682 01683 void 01684 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 01685 { 01686 struct dsl_ds_destroyarg *dsda = arg1; 01687 dsl_dataset_t *ds = dsda->ds; 01688 int err; 01689 int after_branch_point = FALSE; 01690 dsl_pool_t *dp = ds->ds_dir->dd_pool; 01691 objset_t *mos = dp->dp_meta_objset; 01692 dsl_dataset_t *ds_prev = NULL; 01693 boolean_t wont_destroy; 01694 uint64_t obj; 01695 01696 wont_destroy = (dsda->defer && 01697 (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)); 01698 01699 ASSERT(ds->ds_owner || wont_destroy); 01700 ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); 01701 ASSERT(ds->ds_prev == NULL || 01702 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 01703 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 01704 01705 if (wont_destroy) { 01706 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 01707 dmu_buf_will_dirty(ds->ds_dbuf, tx); 01708 ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; 01709 return; 01710 } 01711 01712 /* signal any waiters that this dataset is going away */ 01713 mutex_enter(&ds->ds_lock); 01714 ds->ds_owner = dsl_reaper; 01715 cv_broadcast(&ds->ds_exclusive_cv); 01716 mutex_exit(&ds->ds_lock); 01717 01718 /* Remove our reservation */ 01719 if (ds->ds_reserved != 0) { 01720 dsl_prop_setarg_t psa; 01721 uint64_t value = 0; 01722 01723 dsl_prop_setarg_init_uint64(&psa, "refreservation", 01724 (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), 01725 &value); 01726 psa.psa_effective_value = 0; /* predict default value */ 01727 01728 dsl_dataset_set_reservation_sync(ds, &psa, tx); 01729 ASSERT0(ds->ds_reserved); 01730 } 01731 01732 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 01733 01734 dsl_scan_ds_destroyed(ds, tx); 01735 01736 obj = ds->ds_object; 01737 01738 if (ds->ds_phys->ds_prev_snap_obj != 0) { 01739 if (ds->ds_prev) { 01740 ds_prev = ds->ds_prev; 01741 } else { 01742 VERIFY(0 == dsl_dataset_hold_obj(dp, 01743 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 01744 } 01745 after_branch_point = 01746 (ds_prev->ds_phys->ds_next_snap_obj != obj); 01747 01748 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 01749 if (after_branch_point && 01750 ds_prev->ds_phys->ds_next_clones_obj != 0) { 01751 remove_from_next_clones(ds_prev, obj, tx); 01752 if (ds->ds_phys->ds_next_snap_obj != 0) { 01753 VERIFY(0 == zap_add_int(mos, 01754 ds_prev->ds_phys->ds_next_clones_obj, 01755 ds->ds_phys->ds_next_snap_obj, tx)); 01756 } 01757 } 01758 if (after_branch_point && 01759 ds->ds_phys->ds_next_snap_obj == 0) { 01760 /* This clone is toast. */ 01761 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 01762 ds_prev->ds_phys->ds_num_children--; 01763 01764 /* 01765 * If the clone's origin has no other clones, no 01766 * user holds, and has been marked for deferred 01767 * deletion, then we should have done the necessary 01768 * destroy setup for it. 01769 */ 01770 if (ds_prev->ds_phys->ds_num_children == 1 && 01771 ds_prev->ds_userrefs == 0 && 01772 DS_IS_DEFER_DESTROY(ds_prev)) { 01773 ASSERT3P(dsda->rm_origin, !=, NULL); 01774 } else { 01775 ASSERT3P(dsda->rm_origin, ==, NULL); 01776 } 01777 } else if (!after_branch_point) { 01778 ds_prev->ds_phys->ds_next_snap_obj = 01779 ds->ds_phys->ds_next_snap_obj; 01780 } 01781 } 01782 01783 if (dsl_dataset_is_snapshot(ds)) { 01784 dsl_dataset_t *ds_next; 01785 uint64_t old_unique; 01786 uint64_t used = 0, comp = 0, uncomp = 0; 01787 01788 VERIFY(0 == dsl_dataset_hold_obj(dp, 01789 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 01790 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 01791 01792 old_unique = ds_next->ds_phys->ds_unique_bytes; 01793 01794 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 01795 ds_next->ds_phys->ds_prev_snap_obj = 01796 ds->ds_phys->ds_prev_snap_obj; 01797 ds_next->ds_phys->ds_prev_snap_txg = 01798 ds->ds_phys->ds_prev_snap_txg; 01799 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 01800 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 01801 01802 01803 if (ds_next->ds_deadlist.dl_oldfmt) { 01804 process_old_deadlist(ds, ds_prev, ds_next, 01805 after_branch_point, tx); 01806 } else { 01807 /* Adjust prev's unique space. */ 01808 if (ds_prev && !after_branch_point) { 01809 dsl_deadlist_space_range(&ds_next->ds_deadlist, 01810 ds_prev->ds_phys->ds_prev_snap_txg, 01811 ds->ds_phys->ds_prev_snap_txg, 01812 &used, &comp, &uncomp); 01813 ds_prev->ds_phys->ds_unique_bytes += used; 01814 } 01815 01816 /* Adjust snapused. */ 01817 dsl_deadlist_space_range(&ds_next->ds_deadlist, 01818 ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 01819 &used, &comp, &uncomp); 01820 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 01821 -used, -comp, -uncomp, tx); 01822 01823 /* Move blocks to be freed to pool's free list. */ 01824 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, 01825 &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, 01826 tx); 01827 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, 01828 DD_USED_HEAD, used, comp, uncomp, tx); 01829 01830 /* Merge our deadlist into next's and free it. */ 01831 dsl_deadlist_merge(&ds_next->ds_deadlist, 01832 ds->ds_phys->ds_deadlist_obj, tx); 01833 } 01834 dsl_deadlist_close(&ds->ds_deadlist); 01835 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); 01836 01837 /* Collapse range in clone heads */ 01838 dsl_dataset_remove_clones_key(ds, 01839 ds->ds_phys->ds_creation_txg, tx); 01840 01841 if (dsl_dataset_is_snapshot(ds_next)) { 01842 dsl_dataset_t *ds_nextnext; 01843 01844 /* 01845 * Update next's unique to include blocks which 01846 * were previously shared by only this snapshot 01847 * and it. Those blocks will be born after the 01848 * prev snap and before this snap, and will have 01849 * died after the next snap and before the one 01850 * after that (ie. be on the snap after next's 01851 * deadlist). 01852 */ 01853 VERIFY(0 == dsl_dataset_hold_obj(dp, 01854 ds_next->ds_phys->ds_next_snap_obj, 01855 FTAG, &ds_nextnext)); 01856 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, 01857 ds->ds_phys->ds_prev_snap_txg, 01858 ds->ds_phys->ds_creation_txg, 01859 &used, &comp, &uncomp); 01860 ds_next->ds_phys->ds_unique_bytes += used; 01861 dsl_dataset_rele(ds_nextnext, FTAG); 01862 ASSERT3P(ds_next->ds_prev, ==, NULL); 01863 01864 /* Collapse range in this head. */ 01865 dsl_dataset_t *hds; 01866 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, 01867 ds->ds_dir->dd_phys->dd_head_dataset_obj, 01868 FTAG, &hds)); 01869 dsl_deadlist_remove_key(&hds->ds_deadlist, 01870 ds->ds_phys->ds_creation_txg, tx); 01871 dsl_dataset_rele(hds, FTAG); 01872 01873 } else { 01874 ASSERT3P(ds_next->ds_prev, ==, ds); 01875 dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 01876 ds_next->ds_prev = NULL; 01877 if (ds_prev) { 01878 VERIFY(0 == dsl_dataset_get_ref(dp, 01879 ds->ds_phys->ds_prev_snap_obj, 01880 ds_next, &ds_next->ds_prev)); 01881 } 01882 01883 dsl_dataset_recalc_head_uniq(ds_next); 01884 01885 /* 01886 * Reduce the amount of our unconsmed refreservation 01887 * being charged to our parent by the amount of 01888 * new unique data we have gained. 01889 */ 01890 if (old_unique < ds_next->ds_reserved) { 01891 int64_t mrsdelta; 01892 uint64_t new_unique = 01893 ds_next->ds_phys->ds_unique_bytes; 01894 01895 ASSERT(old_unique <= new_unique); 01896 mrsdelta = MIN(new_unique - old_unique, 01897 ds_next->ds_reserved - old_unique); 01898 dsl_dir_diduse_space(ds->ds_dir, 01899 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 01900 } 01901 } 01902 dsl_dataset_rele(ds_next, FTAG); 01903 } else { 01904 zfeature_info_t *async_destroy = 01905 &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]; 01906 objset_t *os; 01907 01908 /* 01909 * There's no next snapshot, so this is a head dataset. 01910 * Destroy the deadlist. Unless it's a clone, the 01911 * deadlist should be empty. (If it's a clone, it's 01912 * safe to ignore the deadlist contents.) 01913 */ 01914 dsl_deadlist_close(&ds->ds_deadlist); 01915 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); 01916 ds->ds_phys->ds_deadlist_obj = 0; 01917 01918 VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); 01919 01920 if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) { 01921 err = old_synchronous_dataset_destroy(ds, tx); 01922 } else { 01923 /* 01924 * Move the bptree into the pool's list of trees to 01925 * clean up and update space accounting information. 01926 */ 01927 uint64_t used, comp, uncomp; 01928 01929 zil_destroy_sync(dmu_objset_zil(os), tx); 01930 01931 if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { 01932 spa_feature_incr(dp->dp_spa, async_destroy, tx); 01933 dp->dp_bptree_obj = bptree_alloc(mos, tx); 01934 VERIFY(zap_add(mos, 01935 DMU_POOL_DIRECTORY_OBJECT, 01936 DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, 01937 &dp->dp_bptree_obj, tx) == 0); 01938 } 01939 01940 used = ds->ds_dir->dd_phys->dd_used_bytes; 01941 comp = ds->ds_dir->dd_phys->dd_compressed_bytes; 01942 uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes; 01943 01944 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 01945 ds->ds_phys->ds_unique_bytes == used); 01946 01947 bptree_add(mos, dp->dp_bptree_obj, 01948 &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, 01949 used, comp, uncomp, tx); 01950 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 01951 -used, -comp, -uncomp, tx); 01952 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, 01953 used, comp, uncomp, tx); 01954 } 01955 01956 if (ds->ds_prev != NULL) { 01957 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 01958 VERIFY3U(0, ==, zap_remove_int(mos, 01959 ds->ds_prev->ds_dir->dd_phys->dd_clones, 01960 ds->ds_object, tx)); 01961 } 01962 dsl_dataset_rele(ds->ds_prev, ds); 01963 ds->ds_prev = ds_prev = NULL; 01964 } 01965 } 01966 01967 /* 01968 * This must be done after the dsl_traverse(), because it will 01969 * re-open the objset. 01970 */ 01971 if (ds->ds_objset) { 01972 dmu_objset_evict(ds->ds_objset); 01973 ds->ds_objset = NULL; 01974 } 01975 01976 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 01977 /* Erase the link in the dir */ 01978 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 01979 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 01980 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 01981 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 01982 ASSERT(err == 0); 01983 } else { 01984 /* remove from snapshot namespace */ 01985 dsl_dataset_t *ds_head; 01986 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 01987 VERIFY(0 == dsl_dataset_hold_obj(dp, 01988 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 01989 VERIFY(0 == dsl_dataset_get_snapname(ds)); 01990 #ifdef ZFS_DEBUG 01991 { 01992 uint64_t val; 01993 01994 err = dsl_dataset_snap_lookup(ds_head, 01995 ds->ds_snapname, &val); 01996 ASSERT0(err); 01997 ASSERT3U(val, ==, obj); 01998 } 01999 #endif 02000 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 02001 ASSERT(err == 0); 02002 dsl_dataset_rele(ds_head, FTAG); 02003 } 02004 02005 if (ds_prev && ds->ds_prev != ds_prev) 02006 dsl_dataset_rele(ds_prev, FTAG); 02007 02008 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 02009 spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, 02010 "dataset = %llu", ds->ds_object); 02011 02012 if (ds->ds_phys->ds_next_clones_obj != 0) { 02013 uint64_t count; 02014 ASSERT(0 == zap_count(mos, 02015 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 02016 VERIFY(0 == dmu_object_free(mos, 02017 ds->ds_phys->ds_next_clones_obj, tx)); 02018 } 02019 if (ds->ds_phys->ds_props_obj != 0) 02020 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 02021 if (ds->ds_phys->ds_userrefs_obj != 0) 02022 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); 02023 dsl_dir_close(ds->ds_dir, ds); 02024 ds->ds_dir = NULL; 02025 dsl_dataset_drain_refs(ds, tag); 02026 VERIFY(0 == dmu_object_free(mos, obj, tx)); 02027 02028 if (dsda->rm_origin) { 02029 /* 02030 * Remove the origin of the clone we just destroyed. 02031 */ 02032 struct dsl_ds_destroyarg ndsda = {0}; 02033 02034 ndsda.ds = dsda->rm_origin; 02035 dsl_dataset_destroy_sync(&ndsda, tag, tx); 02036 } 02037 } 02038 02039 static int 02040 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 02041 { 02042 uint64_t asize; 02043 02044 if (!dmu_tx_is_syncing(tx)) 02045 return (0); 02046 02047 /* 02048 * If there's an fs-only reservation, any blocks that might become 02049 * owned by the snapshot dataset must be accommodated by space 02050 * outside of the reservation. 02051 */ 02052 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 02053 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 02054 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 02055 return (ENOSPC); 02056 02057 /* 02058 * Propogate any reserved space for this snapshot to other 02059 * snapshot checks in this sync group. 02060 */ 02061 if (asize > 0) 02062 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 02063 02064 return (0); 02065 } 02066 02067 int 02068 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 02069 { 02070 dsl_dataset_t *ds = arg1; 02071 const char *snapname = arg2; 02072 int err; 02073 uint64_t value; 02074 02075 /* 02076 * We don't allow multiple snapshots of the same txg. If there 02077 * is already one, try again. 02078 */ 02079 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 02080 return (EAGAIN); 02081 02082 /* 02083 * Check for conflicting name snapshot name. 02084 */ 02085 err = dsl_dataset_snap_lookup(ds, snapname, &value); 02086 if (err == 0) 02087 return (EEXIST); 02088 if (err != ENOENT) 02089 return (err); 02090 02091 /* 02092 * Check that the dataset's name is not too long. Name consists 02093 * of the dataset's length + 1 for the @-sign + snapshot name's length 02094 */ 02095 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 02096 return (ENAMETOOLONG); 02097 02098 err = dsl_dataset_snapshot_reserve_space(ds, tx); 02099 if (err) 02100 return (err); 02101 02102 ds->ds_trysnap_txg = tx->tx_txg; 02103 return (0); 02104 } 02105 02106 void 02107 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 02108 { 02109 dsl_dataset_t *ds = arg1; 02110 const char *snapname = arg2; 02111 dsl_pool_t *dp = ds->ds_dir->dd_pool; 02112 dmu_buf_t *dbuf; 02113 dsl_dataset_phys_t *dsphys; 02114 uint64_t dsobj, crtxg; 02115 objset_t *mos = dp->dp_meta_objset; 02116 int err; 02117 02118 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 02119 02120 /* 02121 * The origin's ds_creation_txg has to be < TXG_INITIAL 02122 */ 02123 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 02124 crtxg = 1; 02125 else 02126 crtxg = tx->tx_txg; 02127 02128 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 02129 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 02130 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 02131 dmu_buf_will_dirty(dbuf, tx); 02132 dsphys = dbuf->db_data; 02133 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 02134 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 02135 dsphys->ds_fsid_guid = unique_create(); 02136 do { 02137 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 02138 sizeof (dsphys->ds_guid)); 02139 } while (dsphys->ds_guid == 0); 02140 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 02141 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 02142 dsphys->ds_next_snap_obj = ds->ds_object; 02143 dsphys->ds_num_children = 1; 02144 dsphys->ds_creation_time = gethrestime_sec(); 02145 dsphys->ds_creation_txg = crtxg; 02146 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 02147 dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes; 02148 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 02149 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 02150 dsphys->ds_flags = ds->ds_phys->ds_flags; 02151 dsphys->ds_bp = ds->ds_phys->ds_bp; 02152 dmu_buf_rele(dbuf, FTAG); 02153 02154 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 02155 if (ds->ds_prev) { 02156 uint64_t next_clones_obj = 02157 ds->ds_prev->ds_phys->ds_next_clones_obj; 02158 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 02159 ds->ds_object || 02160 ds->ds_prev->ds_phys->ds_num_children > 1); 02161 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 02162 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 02163 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 02164 ds->ds_prev->ds_phys->ds_creation_txg); 02165 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 02166 } else if (next_clones_obj != 0) { 02167 remove_from_next_clones(ds->ds_prev, 02168 dsphys->ds_next_snap_obj, tx); 02169 VERIFY3U(0, ==, zap_add_int(mos, 02170 next_clones_obj, dsobj, tx)); 02171 } 02172 } 02173 02174 /* 02175 * If we have a reference-reservation on this dataset, we will 02176 * need to increase the amount of refreservation being charged 02177 * since our unique space is going to zero. 02178 */ 02179 if (ds->ds_reserved) { 02180 int64_t delta; 02181 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 02182 delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 02183 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 02184 delta, 0, 0, tx); 02185 } 02186 02187 dmu_buf_will_dirty(ds->ds_dbuf, tx); 02188 zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu", 02189 ds->ds_dir->dd_myname, snapname, dsobj, 02190 ds->ds_phys->ds_prev_snap_txg); 02191 ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, 02192 UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); 02193 dsl_deadlist_close(&ds->ds_deadlist); 02194 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 02195 dsl_deadlist_add_key(&ds->ds_deadlist, 02196 ds->ds_phys->ds_prev_snap_txg, tx); 02197 02198 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 02199 ds->ds_phys->ds_prev_snap_obj = dsobj; 02200 ds->ds_phys->ds_prev_snap_txg = crtxg; 02201 ds->ds_phys->ds_unique_bytes = 0; 02202 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 02203 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 02204 02205 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 02206 snapname, 8, 1, &dsobj, tx); 02207 ASSERT(err == 0); 02208 02209 if (ds->ds_prev) 02210 dsl_dataset_drop_ref(ds->ds_prev, ds); 02211 VERIFY(0 == dsl_dataset_get_ref(dp, 02212 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 02213 02214 dsl_scan_ds_snapshotted(ds, tx); 02215 02216 dsl_dir_snap_cmtime_update(ds->ds_dir); 02217 02218 spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, 02219 "dataset = %llu", dsobj); 02220 } 02221 02222 void 02223 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 02224 { 02225 ASSERT(dmu_tx_is_syncing(tx)); 02226 ASSERT(ds->ds_objset != NULL); 02227 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 02228 02229 /* 02230 * in case we had to change ds_fsid_guid when we opened it, 02231 * sync it out now. 02232 */ 02233 dmu_buf_will_dirty(ds->ds_dbuf, tx); 02234 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 02235 02236 dmu_objset_sync(ds->ds_objset, zio, tx); 02237 } 02238 02239 static void 02240 get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 02241 { 02242 uint64_t count = 0; 02243 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 02244 zap_cursor_t zc; 02245 zap_attribute_t za; 02246 nvlist_t *propval; 02247 nvlist_t *val; 02248 02249 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 02250 VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 02251 VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0); 02252 02253 /* 02254 * There may me missing entries in ds_next_clones_obj 02255 * due to a bug in a previous version of the code. 02256 * Only trust it if it has the right number of entries. 02257 */ 02258 if (ds->ds_phys->ds_next_clones_obj != 0) { 02259 ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, 02260 &count)); 02261 } 02262 if (count != ds->ds_phys->ds_num_children - 1) { 02263 goto fail; 02264 } 02265 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); 02266 zap_cursor_retrieve(&zc, &za) == 0; 02267 zap_cursor_advance(&zc)) { 02268 dsl_dataset_t *clone; 02269 char buf[ZFS_MAXNAMELEN]; 02270 /* 02271 * Even though we hold the dp_config_rwlock, the dataset 02272 * may fail to open, returning ENOENT. If there is a 02273 * thread concurrently attempting to destroy this 02274 * dataset, it will have the ds_rwlock held for 02275 * RW_WRITER. Our call to dsl_dataset_hold_obj() -> 02276 * dsl_dataset_hold_ref() will fail its 02277 * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the 02278 * dp_config_rwlock, and wait for the destroy progress 02279 * and signal ds_exclusive_cv. If the destroy was 02280 * successful, we will see that 02281 * DSL_DATASET_IS_DESTROYED(), and return ENOENT. 02282 */ 02283 if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 02284 za.za_first_integer, FTAG, &clone) != 0) 02285 continue; 02286 dsl_dir_name(clone->ds_dir, buf); 02287 VERIFY(nvlist_add_boolean(val, buf) == 0); 02288 dsl_dataset_rele(clone, FTAG); 02289 } 02290 zap_cursor_fini(&zc); 02291 VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0); 02292 VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), 02293 propval) == 0); 02294 fail: 02295 nvlist_free(val); 02296 nvlist_free(propval); 02297 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 02298 } 02299 02300 void 02301 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 02302 { 02303 uint64_t refd, avail, uobjs, aobjs, ratio; 02304 02305 dsl_dir_stats(ds->ds_dir, nv); 02306 02307 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 02308 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 02309 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 02310 02311 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 02312 ds->ds_phys->ds_creation_time); 02313 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 02314 ds->ds_phys->ds_creation_txg); 02315 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 02316 ds->ds_quota); 02317 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 02318 ds->ds_reserved); 02319 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 02320 ds->ds_phys->ds_guid); 02321 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 02322 ds->ds_phys->ds_unique_bytes); 02323 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 02324 ds->ds_object); 02325 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 02326 ds->ds_userrefs); 02327 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 02328 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 02329 02330 if (ds->ds_phys->ds_prev_snap_obj != 0) { 02331 uint64_t written, comp, uncomp; 02332 dsl_pool_t *dp = ds->ds_dir->dd_pool; 02333 dsl_dataset_t *prev; 02334 02335 rw_enter(&dp->dp_config_rwlock, RW_READER); 02336 int err = dsl_dataset_hold_obj(dp, 02337 ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); 02338 rw_exit(&dp->dp_config_rwlock); 02339 if (err == 0) { 02340 err = dsl_dataset_space_written(prev, ds, &written, 02341 &comp, &uncomp); 02342 dsl_dataset_rele(prev, FTAG); 02343 if (err == 0) { 02344 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 02345 written); 02346 } 02347 } 02348 } 02349 ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 02350 (ds->ds_phys->ds_uncompressed_bytes * 100 / 02351 ds->ds_phys->ds_compressed_bytes); 02352 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 02353 02354 if (ds->ds_phys->ds_next_snap_obj) { 02355 /* 02356 * This is a snapshot; override the dd's space used with 02357 * our unique space and compression ratio. 02358 */ 02359 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 02360 ds->ds_phys->ds_unique_bytes); 02361 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 02362 02363 get_clones_stat(ds, nv); 02364 } 02365 } 02366 02367 void 02368 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 02369 { 02370 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 02371 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 02372 stat->dds_guid = ds->ds_phys->ds_guid; 02373 if (ds->ds_phys->ds_next_snap_obj) { 02374 stat->dds_is_snapshot = B_TRUE; 02375 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 02376 } else { 02377 stat->dds_is_snapshot = B_FALSE; 02378 stat->dds_num_clones = 0; 02379 } 02380 02381 /* clone origin is really a dsl_dir thing... */ 02382 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 02383 if (dsl_dir_is_clone(ds->ds_dir)) { 02384 dsl_dataset_t *ods; 02385 02386 VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 02387 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 02388 dsl_dataset_name(ods, stat->dds_origin); 02389 dsl_dataset_drop_ref(ods, FTAG); 02390 } else { 02391 stat->dds_origin[0] = '\0'; 02392 } 02393 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 02394 } 02395 02396 uint64_t 02397 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 02398 { 02399 return (ds->ds_fsid_guid); 02400 } 02401 02402 void 02403 dsl_dataset_space(dsl_dataset_t *ds, 02404 uint64_t *refdbytesp, uint64_t *availbytesp, 02405 uint64_t *usedobjsp, uint64_t *availobjsp) 02406 { 02407 *refdbytesp = ds->ds_phys->ds_referenced_bytes; 02408 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 02409 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 02410 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 02411 if (ds->ds_quota != 0) { 02412 /* 02413 * Adjust available bytes according to refquota 02414 */ 02415 if (*refdbytesp < ds->ds_quota) 02416 *availbytesp = MIN(*availbytesp, 02417 ds->ds_quota - *refdbytesp); 02418 else 02419 *availbytesp = 0; 02420 } 02421 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 02422 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 02423 } 02424 02425 boolean_t 02426 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 02427 { 02428 dsl_pool_t *dp = ds->ds_dir->dd_pool; 02429 02430 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 02431 dsl_pool_sync_context(dp)); 02432 if (ds->ds_prev == NULL) 02433 return (B_FALSE); 02434 if (ds->ds_phys->ds_bp.blk_birth > 02435 ds->ds_prev->ds_phys->ds_creation_txg) { 02436 objset_t *os, *os_prev; 02437 /* 02438 * It may be that only the ZIL differs, because it was 02439 * reset in the head. Don't count that as being 02440 * modified. 02441 */ 02442 if (dmu_objset_from_ds(ds, &os) != 0) 02443 return (B_TRUE); 02444 if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0) 02445 return (B_TRUE); 02446 return (bcmp(&os->os_phys->os_meta_dnode, 02447 &os_prev->os_phys->os_meta_dnode, 02448 sizeof (os->os_phys->os_meta_dnode)) != 0); 02449 } 02450 return (B_FALSE); 02451 } 02452 02453 /* ARGSUSED */ 02454 static int 02455 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 02456 { 02457 dsl_dataset_t *ds = arg1; 02458 char *newsnapname = arg2; 02459 dsl_dir_t *dd = ds->ds_dir; 02460 dsl_dataset_t *hds; 02461 uint64_t val; 02462 int err; 02463 02464 err = dsl_dataset_hold_obj(dd->dd_pool, 02465 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 02466 if (err) 02467 return (err); 02468 02469 /* new name better not be in use */ 02470 err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 02471 dsl_dataset_rele(hds, FTAG); 02472 02473 if (err == 0) 02474 err = EEXIST; 02475 else if (err == ENOENT) 02476 err = 0; 02477 02478 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 02479 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 02480 err = ENAMETOOLONG; 02481 02482 return (err); 02483 } 02484 02485 static void 02486 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 02487 { 02488 char oldname[MAXPATHLEN], newname[MAXPATHLEN]; 02489 dsl_dataset_t *ds = arg1; 02490 const char *newsnapname = arg2; 02491 dsl_dir_t *dd = ds->ds_dir; 02492 objset_t *mos = dd->dd_pool->dp_meta_objset; 02493 dsl_dataset_t *hds; 02494 int err; 02495 02496 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 02497 02498 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 02499 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 02500 02501 VERIFY(0 == dsl_dataset_get_snapname(ds)); 02502 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 02503 ASSERT0(err); 02504 dsl_dataset_name(ds, oldname); 02505 mutex_enter(&ds->ds_lock); 02506 (void) strcpy(ds->ds_snapname, newsnapname); 02507 mutex_exit(&ds->ds_lock); 02508 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 02509 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 02510 ASSERT0(err); 02511 dsl_dataset_name(ds, newname); 02512 #ifdef _KERNEL 02513 zvol_rename_minors(oldname, newname); 02514 #endif 02515 02516 spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 02517 "dataset = %llu", ds->ds_object); 02518 dsl_dataset_rele(hds, FTAG); 02519 } 02520 02521 struct renamesnaparg { 02522 dsl_sync_task_group_t *dstg; 02523 char failed[MAXPATHLEN]; 02524 char *oldsnap; 02525 char *newsnap; 02526 int error; 02527 }; 02528 02529 static int 02530 dsl_snapshot_rename_one(const char *name, void *arg) 02531 { 02532 struct renamesnaparg *ra = arg; 02533 dsl_dataset_t *ds = NULL; 02534 char *snapname; 02535 int err; 02536 02537 snapname = kmem_asprintf("%s@%s", name, ra->oldsnap); 02538 (void) strlcpy(ra->failed, snapname, sizeof (ra->failed)); 02539 02540 /* 02541 * For recursive snapshot renames the parent won't be changing 02542 * so we just pass name for both the to/from argument. 02543 */ 02544 err = zfs_secpolicy_rename_perms(snapname, snapname, CRED()); 02545 if (err != 0) { 02546 strfree(snapname); 02547 return (err == ENOENT ? 0 : err); 02548 } 02549 02550 #ifdef _KERNEL 02551 /* 02552 * For all filesystems undergoing rename, we'll need to unmount it. 02553 */ 02554 (void) zfs_unmount_snap(snapname, NULL); 02555 #endif 02556 err = dsl_dataset_hold(snapname, ra->dstg, &ds); 02557 strfree(snapname); 02558 if (err != 0) 02559 return (err == ENOENT ? 0 : err); 02560 02561 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 02562 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 02563 02564 /* First successful rename clears the error. */ 02565 ra->error = 0; 02566 02567 return (0); 02568 } 02569 02570 static int 02571 dsl_recursive_rename(char *oldname, const char *newname) 02572 { 02573 int err; 02574 struct renamesnaparg *ra; 02575 dsl_sync_task_t *dst; 02576 spa_t *spa; 02577 char *cp, *fsname = spa_strdup(oldname); 02578 int len = strlen(oldname) + 1; 02579 02580 /* truncate the snapshot name to get the fsname */ 02581 cp = strchr(fsname, '@'); 02582 *cp = '\0'; 02583 02584 err = spa_open(fsname, &spa, FTAG); 02585 if (err) { 02586 kmem_free(fsname, len); 02587 return (err); 02588 } 02589 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 02590 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 02591 02592 ra->oldsnap = strchr(oldname, '@') + 1; 02593 ra->newsnap = strchr(newname, '@') + 1; 02594 *ra->failed = '\0'; 02595 ra->error = ENOENT; 02596 02597 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 02598 DS_FIND_CHILDREN); 02599 kmem_free(fsname, len); 02600 if (err == 0) 02601 err = ra->error; 02602 02603 if (err == 0) 02604 err = dsl_sync_task_group_wait(ra->dstg); 02605 02606 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 02607 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 02608 dsl_dataset_t *ds = dst->dst_arg1; 02609 if (dst->dst_err) { 02610 dsl_dir_name(ds->ds_dir, ra->failed); 02611 (void) strlcat(ra->failed, "@", sizeof (ra->failed)); 02612 (void) strlcat(ra->failed, ra->newsnap, 02613 sizeof (ra->failed)); 02614 } 02615 dsl_dataset_rele(ds, ra->dstg); 02616 } 02617 02618 if (err) 02619 (void) strlcpy(oldname, ra->failed, sizeof (ra->failed)); 02620 02621 dsl_sync_task_group_destroy(ra->dstg); 02622 kmem_free(ra, sizeof (struct renamesnaparg)); 02623 spa_close(spa, FTAG); 02624 return (err); 02625 } 02626 02627 static int 02628 dsl_valid_rename(const char *oldname, void *arg) 02629 { 02630 int delta = *(int *)arg; 02631 02632 if (strlen(oldname) + delta >= MAXNAMELEN) 02633 return (ENAMETOOLONG); 02634 02635 return (0); 02636 } 02637 02638 #pragma weak dmu_objset_rename = dsl_dataset_rename 02639 int 02640 dsl_dataset_rename(char *oldname, const char *newname, int flags) 02641 { 02642 dsl_dir_t *dd; 02643 dsl_dataset_t *ds; 02644 const char *tail; 02645 int err; 02646 02647 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 02648 if (err) 02649 return (err); 02650 02651 if (tail == NULL) { 02652 int delta = strlen(newname) - strlen(oldname); 02653 02654 /* if we're growing, validate child name lengths */ 02655 if (delta > 0) 02656 err = dmu_objset_find(oldname, dsl_valid_rename, 02657 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 02658 02659 if (err == 0) 02660 err = dsl_dir_rename(dd, newname, flags); 02661 dsl_dir_close(dd, FTAG); 02662 return (err); 02663 } 02664 02665 if (tail[0] != '@') { 02666 /* the name ended in a nonexistent component */ 02667 dsl_dir_close(dd, FTAG); 02668 return (ENOENT); 02669 } 02670 02671 dsl_dir_close(dd, FTAG); 02672 02673 /* new name must be snapshot in same filesystem */ 02674 tail = strchr(newname, '@'); 02675 if (tail == NULL) 02676 return (EINVAL); 02677 tail++; 02678 if (strncmp(oldname, newname, tail - newname) != 0) 02679 return (EXDEV); 02680 02681 if (flags & ZFS_RENAME_RECURSIVE) { 02682 err = dsl_recursive_rename(oldname, newname); 02683 } else { 02684 err = dsl_dataset_hold(oldname, FTAG, &ds); 02685 if (err) 02686 return (err); 02687 02688 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 02689 dsl_dataset_snapshot_rename_check, 02690 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 02691 02692 dsl_dataset_rele(ds, FTAG); 02693 } 02694 02695 return (err); 02696 } 02697 02698 struct promotenode { 02699 list_node_t link; 02700 dsl_dataset_t *ds; 02701 }; 02702 02703 struct promotearg { 02704 list_t shared_snaps, origin_snaps, clone_snaps; 02705 dsl_dataset_t *origin_origin; 02706 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 02707 char *err_ds; 02708 }; 02709 02710 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 02711 static boolean_t snaplist_unstable(list_t *l); 02712 02713 static int 02714 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 02715 { 02716 dsl_dataset_t *hds = arg1; 02717 struct promotearg *pa = arg2; 02718 struct promotenode *snap = list_head(&pa->shared_snaps); 02719 dsl_dataset_t *origin_ds = snap->ds; 02720 int err; 02721 uint64_t unused; 02722 02723 /* Check that it is a real clone */ 02724 if (!dsl_dir_is_clone(hds->ds_dir)) 02725 return (EINVAL); 02726 02727 /* Since this is so expensive, don't do the preliminary check */ 02728 if (!dmu_tx_is_syncing(tx)) 02729 return (0); 02730 02731 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 02732 return (EXDEV); 02733 02734 /* compute origin's new unique space */ 02735 snap = list_tail(&pa->clone_snaps); 02736 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 02737 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 02738 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 02739 &pa->unique, &unused, &unused); 02740 02741 /* 02742 * Walk the snapshots that we are moving 02743 * 02744 * Compute space to transfer. Consider the incremental changes 02745 * to used for each snapshot: 02746 * (my used) = (prev's used) + (blocks born) - (blocks killed) 02747 * So each snapshot gave birth to: 02748 * (blocks born) = (my used) - (prev's used) + (blocks killed) 02749 * So a sequence would look like: 02750 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 02751 * Which simplifies to: 02752 * uN + kN + kN-1 + ... + k1 + k0 02753 * Note however, if we stop before we reach the ORIGIN we get: 02754 * uN + kN + kN-1 + ... + kM - uM-1 02755 */ 02756 pa->used = origin_ds->ds_phys->ds_referenced_bytes; 02757 pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 02758 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 02759 for (snap = list_head(&pa->shared_snaps); snap; 02760 snap = list_next(&pa->shared_snaps, snap)) { 02761 uint64_t val, dlused, dlcomp, dluncomp; 02762 dsl_dataset_t *ds = snap->ds; 02763 02764 /* Check that the snapshot name does not conflict */ 02765 VERIFY(0 == dsl_dataset_get_snapname(ds)); 02766 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 02767 if (err == 0) { 02768 err = EEXIST; 02769 goto out; 02770 } 02771 if (err != ENOENT) 02772 goto out; 02773 02774 /* The very first snapshot does not have a deadlist */ 02775 if (ds->ds_phys->ds_prev_snap_obj == 0) 02776 continue; 02777 02778 dsl_deadlist_space(&ds->ds_deadlist, 02779 &dlused, &dlcomp, &dluncomp); 02780 pa->used += dlused; 02781 pa->comp += dlcomp; 02782 pa->uncomp += dluncomp; 02783 } 02784 02785 /* 02786 * If we are a clone of a clone then we never reached ORIGIN, 02787 * so we need to subtract out the clone origin's used space. 02788 */ 02789 if (pa->origin_origin) { 02790 pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes; 02791 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 02792 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 02793 } 02794 02795 /* Check that there is enough space here */ 02796 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 02797 pa->used); 02798 if (err) 02799 return (err); 02800 02801 /* 02802 * Compute the amounts of space that will be used by snapshots 02803 * after the promotion (for both origin and clone). For each, 02804 * it is the amount of space that will be on all of their 02805 * deadlists (that was not born before their new origin). 02806 */ 02807 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 02808 uint64_t space; 02809 02810 /* 02811 * Note, typically this will not be a clone of a clone, 02812 * so dd_origin_txg will be < TXG_INITIAL, so 02813 * these snaplist_space() -> dsl_deadlist_space_range() 02814 * calls will be fast because they do not have to 02815 * iterate over all bps. 02816 */ 02817 snap = list_head(&pa->origin_snaps); 02818 err = snaplist_space(&pa->shared_snaps, 02819 snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap); 02820 if (err) 02821 return (err); 02822 02823 err = snaplist_space(&pa->clone_snaps, 02824 snap->ds->ds_dir->dd_origin_txg, &space); 02825 if (err) 02826 return (err); 02827 pa->cloneusedsnap += space; 02828 } 02829 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 02830 err = snaplist_space(&pa->origin_snaps, 02831 origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 02832 if (err) 02833 return (err); 02834 } 02835 02836 return (0); 02837 out: 02838 pa->err_ds = snap->ds->ds_snapname; 02839 return (err); 02840 } 02841 02842 static void 02843 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) 02844 { 02845 dsl_dataset_t *hds = arg1; 02846 struct promotearg *pa = arg2; 02847 struct promotenode *snap = list_head(&pa->shared_snaps); 02848 dsl_dataset_t *origin_ds = snap->ds; 02849 dsl_dataset_t *origin_head; 02850 dsl_dir_t *dd = hds->ds_dir; 02851 dsl_pool_t *dp = hds->ds_dir->dd_pool; 02852 dsl_dir_t *odd = NULL; 02853 uint64_t oldnext_obj; 02854 int64_t delta; 02855 02856 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 02857 02858 snap = list_head(&pa->origin_snaps); 02859 origin_head = snap->ds; 02860 02861 /* 02862 * We need to explicitly open odd, since origin_ds's dd will be 02863 * changing. 02864 */ 02865 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 02866 NULL, FTAG, &odd)); 02867 02868 /* change origin's next snap */ 02869 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 02870 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 02871 snap = list_tail(&pa->clone_snaps); 02872 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 02873 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 02874 02875 /* change the origin's next clone */ 02876 if (origin_ds->ds_phys->ds_next_clones_obj) { 02877 remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); 02878 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 02879 origin_ds->ds_phys->ds_next_clones_obj, 02880 oldnext_obj, tx)); 02881 } 02882 02883 /* change origin */ 02884 dmu_buf_will_dirty(dd->dd_dbuf, tx); 02885 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 02886 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 02887 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 02888 dmu_buf_will_dirty(odd->dd_dbuf, tx); 02889 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 02890 origin_head->ds_dir->dd_origin_txg = 02891 origin_ds->ds_phys->ds_creation_txg; 02892 02893 /* change dd_clone entries */ 02894 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 02895 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 02896 odd->dd_phys->dd_clones, hds->ds_object, tx)); 02897 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 02898 pa->origin_origin->ds_dir->dd_phys->dd_clones, 02899 hds->ds_object, tx)); 02900 02901 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 02902 pa->origin_origin->ds_dir->dd_phys->dd_clones, 02903 origin_head->ds_object, tx)); 02904 if (dd->dd_phys->dd_clones == 0) { 02905 dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 02906 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 02907 } 02908 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 02909 dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 02910 02911 } 02912 02913 /* move snapshots to this dir */ 02914 for (snap = list_head(&pa->shared_snaps); snap; 02915 snap = list_next(&pa->shared_snaps, snap)) { 02916 dsl_dataset_t *ds = snap->ds; 02917 02918 /* unregister props as dsl_dir is changing */ 02919 if (ds->ds_objset) { 02920 dmu_objset_evict(ds->ds_objset); 02921 ds->ds_objset = NULL; 02922 } 02923 /* move snap name entry */ 02924 VERIFY(0 == dsl_dataset_get_snapname(ds)); 02925 VERIFY(0 == dsl_dataset_snap_remove(origin_head, 02926 ds->ds_snapname, tx)); 02927 VERIFY(0 == zap_add(dp->dp_meta_objset, 02928 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 02929 8, 1, &ds->ds_object, tx)); 02930 02931 /* change containing dsl_dir */ 02932 dmu_buf_will_dirty(ds->ds_dbuf, tx); 02933 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 02934 ds->ds_phys->ds_dir_obj = dd->dd_object; 02935 ASSERT3P(ds->ds_dir, ==, odd); 02936 dsl_dir_close(ds->ds_dir, ds); 02937 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 02938 NULL, ds, &ds->ds_dir)); 02939 02940 /* move any clone references */ 02941 if (ds->ds_phys->ds_next_clones_obj && 02942 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 02943 zap_cursor_t zc; 02944 zap_attribute_t za; 02945 02946 for (zap_cursor_init(&zc, dp->dp_meta_objset, 02947 ds->ds_phys->ds_next_clones_obj); 02948 zap_cursor_retrieve(&zc, &za) == 0; 02949 zap_cursor_advance(&zc)) { 02950 dsl_dataset_t *cnds; 02951 uint64_t o; 02952 02953 if (za.za_first_integer == oldnext_obj) { 02954 /* 02955 * We've already moved the 02956 * origin's reference. 02957 */ 02958 continue; 02959 } 02960 02961 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, 02962 za.za_first_integer, FTAG, &cnds)); 02963 o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 02964 02965 VERIFY3U(zap_remove_int(dp->dp_meta_objset, 02966 odd->dd_phys->dd_clones, o, tx), ==, 0); 02967 VERIFY3U(zap_add_int(dp->dp_meta_objset, 02968 dd->dd_phys->dd_clones, o, tx), ==, 0); 02969 dsl_dataset_rele(cnds, FTAG); 02970 } 02971 zap_cursor_fini(&zc); 02972 } 02973 02974 ASSERT0(dsl_prop_numcb(ds)); 02975 } 02976 02977 /* 02978 * Change space accounting. 02979 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 02980 * both be valid, or both be 0 (resulting in delta == 0). This 02981 * is true for each of {clone,origin} independently. 02982 */ 02983 02984 delta = pa->cloneusedsnap - 02985 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 02986 ASSERT3S(delta, >=, 0); 02987 ASSERT3U(pa->used, >=, delta); 02988 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 02989 dsl_dir_diduse_space(dd, DD_USED_HEAD, 02990 pa->used - delta, pa->comp, pa->uncomp, tx); 02991 02992 delta = pa->originusedsnap - 02993 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 02994 ASSERT3S(delta, <=, 0); 02995 ASSERT3U(pa->used, >=, -delta); 02996 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 02997 dsl_dir_diduse_space(odd, DD_USED_HEAD, 02998 -pa->used - delta, -pa->comp, -pa->uncomp, tx); 02999 03000 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 03001 03002 /* log history record */ 03003 spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 03004 "dataset = %llu", hds->ds_object); 03005 03006 dsl_dir_close(odd, FTAG); 03007 } 03008 03009 static char *snaplist_tag = "snaplist"; 03016 static int 03017 snaplist_make(dsl_pool_t *dp, boolean_t own, 03018 uint64_t first_obj, uint64_t last_obj, list_t *l) 03019 { 03020 uint64_t obj = last_obj; 03021 03022 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 03023 03024 list_create(l, sizeof (struct promotenode), 03025 offsetof(struct promotenode, link)); 03026 03027 while (obj != first_obj) { 03028 dsl_dataset_t *ds; 03029 struct promotenode *snap; 03030 int err; 03031 03032 if (own) { 03033 err = dsl_dataset_own_obj(dp, obj, 03034 0, snaplist_tag, &ds); 03035 if (err == 0) 03036 dsl_dataset_make_exclusive(ds, snaplist_tag); 03037 } else { 03038 err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 03039 } 03040 if (err == ENOENT) { 03041 /* lost race with snapshot destroy */ 03042 struct promotenode *last = list_tail(l); 03043 ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 03044 obj = last->ds->ds_phys->ds_prev_snap_obj; 03045 continue; 03046 } else if (err) { 03047 return (err); 03048 } 03049 03050 if (first_obj == 0) 03051 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 03052 03053 snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 03054 snap->ds = ds; 03055 list_insert_tail(l, snap); 03056 obj = ds->ds_phys->ds_prev_snap_obj; 03057 } 03058 03059 return (0); 03060 } 03061 03062 static int 03063 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 03064 { 03065 struct promotenode *snap; 03066 03067 *spacep = 0; 03068 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 03069 uint64_t used, comp, uncomp; 03070 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 03071 mintxg, UINT64_MAX, &used, &comp, &uncomp); 03072 *spacep += used; 03073 } 03074 return (0); 03075 } 03076 03077 static void 03078 snaplist_destroy(list_t *l, boolean_t own) 03079 { 03080 struct promotenode *snap; 03081 03082 if (!l || !list_link_active(&l->list_head)) 03083 return; 03084 03085 while ((snap = list_tail(l)) != NULL) { 03086 list_remove(l, snap); 03087 if (own) 03088 dsl_dataset_disown(snap->ds, snaplist_tag); 03089 else 03090 dsl_dataset_rele(snap->ds, snaplist_tag); 03091 kmem_free(snap, sizeof (struct promotenode)); 03092 } 03093 list_destroy(l); 03094 } 03095 03107 int 03108 dsl_dataset_promote(const char *name, char *conflsnap) 03109 { 03110 dsl_dataset_t *ds; 03111 dsl_dir_t *dd; 03112 dsl_pool_t *dp; 03113 dmu_object_info_t doi; 03114 struct promotearg pa = { 0 }; 03115 struct promotenode *snap; 03116 int err; 03117 03118 err = dsl_dataset_hold(name, FTAG, &ds); 03119 if (err) 03120 return (err); 03121 dd = ds->ds_dir; 03122 dp = dd->dd_pool; 03123 03124 err = dmu_object_info(dp->dp_meta_objset, 03125 ds->ds_phys->ds_snapnames_zapobj, &doi); 03126 if (err) { 03127 dsl_dataset_rele(ds, FTAG); 03128 return (err); 03129 } 03130 03131 if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 03132 dsl_dataset_rele(ds, FTAG); 03133 return (EINVAL); 03134 } 03135 03136 /* 03137 * We are going to inherit all the snapshots taken before our 03138 * origin (i.e., our new origin will be our parent's origin). 03139 * Take ownership of them so that we can rename them into our 03140 * namespace. 03141 */ 03142 rw_enter(&dp->dp_config_rwlock, RW_READER); 03143 03144 err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 03145 &pa.shared_snaps); 03146 if (err != 0) 03147 goto out; 03148 03149 err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 03150 if (err != 0) 03151 goto out; 03152 03153 snap = list_head(&pa.shared_snaps); 03154 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 03155 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 03156 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 03157 if (err != 0) 03158 goto out; 03159 03160 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 03161 err = dsl_dataset_hold_obj(dp, 03162 snap->ds->ds_dir->dd_phys->dd_origin_obj, 03163 FTAG, &pa.origin_origin); 03164 if (err != 0) 03165 goto out; 03166 } 03167 03168 out: 03169 rw_exit(&dp->dp_config_rwlock); 03170 03171 /* 03172 * Add in 128x the snapnames zapobj size, since we will be moving 03173 * a bunch of snapnames to the promoted ds, and dirtying their 03174 * bonus buffers. 03175 */ 03176 if (err == 0) { 03177 err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 03178 dsl_dataset_promote_sync, ds, &pa, 03179 2 + 2 * doi.doi_physical_blocks_512); 03180 if (err && pa.err_ds && conflsnap) 03181 (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN); 03182 } 03183 03184 snaplist_destroy(&pa.shared_snaps, B_TRUE); 03185 snaplist_destroy(&pa.clone_snaps, B_FALSE); 03186 snaplist_destroy(&pa.origin_snaps, B_FALSE); 03187 if (pa.origin_origin) 03188 dsl_dataset_rele(pa.origin_origin, FTAG); 03189 dsl_dataset_rele(ds, FTAG); 03190 return (err); 03191 } 03192 03193 struct cloneswaparg { 03194 dsl_dataset_t *cds; /* clone dataset */ 03195 dsl_dataset_t *ohds; /* origin's head dataset */ 03196 boolean_t force; 03197 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 03198 }; 03199 03200 /* ARGSUSED */ 03201 static int 03202 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 03203 { 03204 struct cloneswaparg *csa = arg1; 03205 03206 /* they should both be heads */ 03207 if (dsl_dataset_is_snapshot(csa->cds) || 03208 dsl_dataset_is_snapshot(csa->ohds)) 03209 return (EINVAL); 03210 03211 /* the branch point should be just before them */ 03212 if (csa->cds->ds_prev != csa->ohds->ds_prev) 03213 return (EINVAL); 03214 03215 /* cds should be the clone (unless they are unrelated) */ 03216 if (csa->cds->ds_prev != NULL && 03217 csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && 03218 csa->ohds->ds_object != 03219 csa->cds->ds_prev->ds_phys->ds_next_snap_obj) 03220 return (EINVAL); 03221 03222 /* the clone should be a child of the origin */ 03223 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 03224 return (EINVAL); 03225 03226 /* ohds shouldn't be modified unless 'force' */ 03227 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 03228 return (ETXTBSY); 03229 03230 /* adjust amount of any unconsumed refreservation */ 03231 csa->unused_refres_delta = 03232 (int64_t)MIN(csa->ohds->ds_reserved, 03233 csa->ohds->ds_phys->ds_unique_bytes) - 03234 (int64_t)MIN(csa->ohds->ds_reserved, 03235 csa->cds->ds_phys->ds_unique_bytes); 03236 03237 if (csa->unused_refres_delta > 0 && 03238 csa->unused_refres_delta > 03239 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 03240 return (ENOSPC); 03241 03242 if (csa->ohds->ds_quota != 0 && 03243 csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota) 03244 return (EDQUOT); 03245 03246 return (0); 03247 } 03248 03249 /* ARGSUSED */ 03250 static void 03251 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) 03252 { 03253 struct cloneswaparg *csa = arg1; 03254 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 03255 03256 ASSERT(csa->cds->ds_reserved == 0); 03257 ASSERT(csa->ohds->ds_quota == 0 || 03258 csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota); 03259 03260 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 03261 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 03262 03263 if (csa->cds->ds_objset != NULL) { 03264 dmu_objset_evict(csa->cds->ds_objset); 03265 csa->cds->ds_objset = NULL; 03266 } 03267 03268 if (csa->ohds->ds_objset != NULL) { 03269 dmu_objset_evict(csa->ohds->ds_objset); 03270 csa->ohds->ds_objset = NULL; 03271 } 03272 03273 /* 03274 * Reset origin's unique bytes, if it exists. 03275 */ 03276 if (csa->cds->ds_prev) { 03277 dsl_dataset_t *origin = csa->cds->ds_prev; 03278 uint64_t comp, uncomp; 03279 03280 dmu_buf_will_dirty(origin->ds_dbuf, tx); 03281 dsl_deadlist_space_range(&csa->cds->ds_deadlist, 03282 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 03283 &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 03284 } 03285 03286 /* swap blkptrs */ 03287 { 03288 blkptr_t tmp; 03289 tmp = csa->ohds->ds_phys->ds_bp; 03290 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 03291 csa->cds->ds_phys->ds_bp = tmp; 03292 } 03293 03294 /* set dd_*_bytes */ 03295 { 03296 int64_t dused, dcomp, duncomp; 03297 uint64_t cdl_used, cdl_comp, cdl_uncomp; 03298 uint64_t odl_used, odl_comp, odl_uncomp; 03299 03300 ASSERT3U(csa->cds->ds_dir->dd_phys-> 03301 dd_used_breakdown[DD_USED_SNAP], ==, 0); 03302 03303 dsl_deadlist_space(&csa->cds->ds_deadlist, 03304 &cdl_used, &cdl_comp, &cdl_uncomp); 03305 dsl_deadlist_space(&csa->ohds->ds_deadlist, 03306 &odl_used, &odl_comp, &odl_uncomp); 03307 03308 dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used - 03309 (csa->ohds->ds_phys->ds_referenced_bytes + odl_used); 03310 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 03311 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 03312 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 03313 cdl_uncomp - 03314 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 03315 03316 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 03317 dused, dcomp, duncomp, tx); 03318 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 03319 -dused, -dcomp, -duncomp, tx); 03320 03321 /* 03322 * The difference in the space used by snapshots is the 03323 * difference in snapshot space due to the head's 03324 * deadlist (since that's the only thing that's 03325 * changing that affects the snapused). 03326 */ 03327 dsl_deadlist_space_range(&csa->cds->ds_deadlist, 03328 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, 03329 &cdl_used, &cdl_comp, &cdl_uncomp); 03330 dsl_deadlist_space_range(&csa->ohds->ds_deadlist, 03331 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, 03332 &odl_used, &odl_comp, &odl_uncomp); 03333 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 03334 DD_USED_HEAD, DD_USED_SNAP, tx); 03335 } 03336 03337 /* swap ds_*_bytes */ 03338 SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes, 03339 csa->cds->ds_phys->ds_referenced_bytes); 03340 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 03341 csa->cds->ds_phys->ds_compressed_bytes); 03342 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 03343 csa->cds->ds_phys->ds_uncompressed_bytes); 03344 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 03345 csa->cds->ds_phys->ds_unique_bytes); 03346 03347 /* apply any parent delta for change in unconsumed refreservation */ 03348 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 03349 csa->unused_refres_delta, 0, 0, tx); 03350 03351 /* 03352 * Swap deadlists. 03353 */ 03354 dsl_deadlist_close(&csa->cds->ds_deadlist); 03355 dsl_deadlist_close(&csa->ohds->ds_deadlist); 03356 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 03357 csa->cds->ds_phys->ds_deadlist_obj); 03358 dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 03359 csa->cds->ds_phys->ds_deadlist_obj); 03360 dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 03361 csa->ohds->ds_phys->ds_deadlist_obj); 03362 03363 dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); 03364 } 03365 03372 int 03373 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 03374 boolean_t force) 03375 { 03376 struct cloneswaparg csa; 03377 int error; 03378 03379 ASSERT(clone->ds_owner); 03380 ASSERT(origin_head->ds_owner); 03381 retry: 03382 /* 03383 * Need exclusive access for the swap. If we're swapping these 03384 * datasets back after an error, we already hold the locks. 03385 */ 03386 if (!RW_WRITE_HELD(&clone->ds_rwlock)) 03387 rw_enter(&clone->ds_rwlock, RW_WRITER); 03388 if (!RW_WRITE_HELD(&origin_head->ds_rwlock) && 03389 !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 03390 rw_exit(&clone->ds_rwlock); 03391 rw_enter(&origin_head->ds_rwlock, RW_WRITER); 03392 if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 03393 rw_exit(&origin_head->ds_rwlock); 03394 goto retry; 03395 } 03396 } 03397 csa.cds = clone; 03398 csa.ohds = origin_head; 03399 csa.force = force; 03400 error = dsl_sync_task_do(clone->ds_dir->dd_pool, 03401 dsl_dataset_clone_swap_check, 03402 dsl_dataset_clone_swap_sync, &csa, NULL, 9); 03403 return (error); 03404 } 03405 03412 int 03413 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 03414 { 03415 spa_t *spa; 03416 dsl_pool_t *dp; 03417 dsl_dataset_t *ds; 03418 int error; 03419 03420 if ((error = spa_open(pname, &spa, FTAG)) != 0) 03421 return (error); 03422 dp = spa_get_dsl(spa); 03423 rw_enter(&dp->dp_config_rwlock, RW_READER); 03424 if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 03425 dsl_dataset_name(ds, buf); 03426 dsl_dataset_rele(ds, FTAG); 03427 } 03428 rw_exit(&dp->dp_config_rwlock); 03429 spa_close(spa, FTAG); 03430 03431 return (error); 03432 } 03433 03434 int 03435 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 03436 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 03437 { 03438 int error = 0; 03439 03440 ASSERT3S(asize, >, 0); 03441 03442 /* 03443 * *ref_rsrv is the portion of asize that will come from any 03444 * unconsumed refreservation space. 03445 */ 03446 *ref_rsrv = 0; 03447 03448 mutex_enter(&ds->ds_lock); 03449 /* 03450 * Make a space adjustment for reserved bytes. 03451 */ 03452 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 03453 ASSERT3U(*used, >=, 03454 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 03455 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 03456 *ref_rsrv = 03457 asize - MIN(asize, parent_delta(ds, asize + inflight)); 03458 } 03459 03460 if (!check_quota || ds->ds_quota == 0) { 03461 mutex_exit(&ds->ds_lock); 03462 return (0); 03463 } 03464 /* 03465 * If they are requesting more space, and our current estimate 03466 * is over quota, they get to try again unless the actual 03467 * on-disk is over quota and there are no pending changes (which 03468 * may free up space for us). 03469 */ 03470 if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { 03471 if (inflight > 0 || 03472 ds->ds_phys->ds_referenced_bytes < ds->ds_quota) 03473 error = ERESTART; 03474 else 03475 error = EDQUOT; 03476 } 03477 mutex_exit(&ds->ds_lock); 03478 03479 return (error); 03480 } 03481 03482 /* ARGSUSED */ 03483 static int 03484 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 03485 { 03486 dsl_dataset_t *ds = arg1; 03487 dsl_prop_setarg_t *psa = arg2; 03488 int err; 03489 03490 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 03491 return (ENOTSUP); 03492 03493 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) 03494 return (err); 03495 03496 if (psa->psa_effective_value == 0) 03497 return (0); 03498 03499 if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes || 03500 psa->psa_effective_value < ds->ds_reserved) 03501 return (ENOSPC); 03502 03503 return (0); 03504 } 03505 03506 extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *); 03507 03508 void 03509 dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) 03510 { 03511 dsl_dataset_t *ds = arg1; 03512 dsl_prop_setarg_t *psa = arg2; 03513 uint64_t effective_value = psa->psa_effective_value; 03514 03515 dsl_prop_set_sync(ds, psa, tx); 03516 DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); 03517 03518 if (ds->ds_quota != effective_value) { 03519 dmu_buf_will_dirty(ds->ds_dbuf, tx); 03520 ds->ds_quota = effective_value; 03521 } 03522 } 03523 03524 int 03525 dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota) 03526 { 03527 dsl_dataset_t *ds; 03528 dsl_prop_setarg_t psa; 03529 int err; 03530 03531 dsl_prop_setarg_init_uint64(&psa, "refquota", source, "a); 03532 03533 err = dsl_dataset_hold(dsname, FTAG, &ds); 03534 if (err) 03535 return (err); 03536 03537 /* 03538 * If someone removes a file, then tries to set the quota, we 03539 * want to make sure the file freeing takes effect. 03540 */ 03541 txg_wait_open(ds->ds_dir->dd_pool, 0); 03542 03543 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 03544 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 03545 ds, &psa, 0); 03546 03547 dsl_dataset_rele(ds, FTAG); 03548 return (err); 03549 } 03550 03551 static int 03552 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 03553 { 03554 dsl_dataset_t *ds = arg1; 03555 dsl_prop_setarg_t *psa = arg2; 03556 uint64_t effective_value; 03557 uint64_t unique; 03558 int err; 03559 03560 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 03561 SPA_VERSION_REFRESERVATION) 03562 return (ENOTSUP); 03563 03564 if (dsl_dataset_is_snapshot(ds)) 03565 return (EINVAL); 03566 03567 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) 03568 return (err); 03569 03570 effective_value = psa->psa_effective_value; 03571 03572 /* 03573 * If we are doing the preliminary check in open context, the 03574 * space estimates may be inaccurate. 03575 */ 03576 if (!dmu_tx_is_syncing(tx)) 03577 return (0); 03578 03579 mutex_enter(&ds->ds_lock); 03580 if (!DS_UNIQUE_IS_ACCURATE(ds)) 03581 dsl_dataset_recalc_head_uniq(ds); 03582 unique = ds->ds_phys->ds_unique_bytes; 03583 mutex_exit(&ds->ds_lock); 03584 03585 if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) { 03586 uint64_t delta = MAX(unique, effective_value) - 03587 MAX(unique, ds->ds_reserved); 03588 03589 if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 03590 return (ENOSPC); 03591 if (ds->ds_quota > 0 && 03592 effective_value > ds->ds_quota) 03593 return (ENOSPC); 03594 } 03595 03596 return (0); 03597 } 03598 03599 static void 03600 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) 03601 { 03602 dsl_dataset_t *ds = arg1; 03603 dsl_prop_setarg_t *psa = arg2; 03604 uint64_t effective_value = psa->psa_effective_value; 03605 uint64_t unique; 03606 int64_t delta; 03607 03608 dsl_prop_set_sync(ds, psa, tx); 03609 DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); 03610 03611 dmu_buf_will_dirty(ds->ds_dbuf, tx); 03612 03613 mutex_enter(&ds->ds_dir->dd_lock); 03614 mutex_enter(&ds->ds_lock); 03615 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 03616 unique = ds->ds_phys->ds_unique_bytes; 03617 delta = MAX(0, (int64_t)(effective_value - unique)) - 03618 MAX(0, (int64_t)(ds->ds_reserved - unique)); 03619 ds->ds_reserved = effective_value; 03620 mutex_exit(&ds->ds_lock); 03621 03622 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 03623 mutex_exit(&ds->ds_dir->dd_lock); 03624 } 03625 03626 int 03627 dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, 03628 uint64_t reservation) 03629 { 03630 dsl_dataset_t *ds; 03631 dsl_prop_setarg_t psa; 03632 int err; 03633 03634 dsl_prop_setarg_init_uint64(&psa, "refreservation", source, 03635 &reservation); 03636 03637 err = dsl_dataset_hold(dsname, FTAG, &ds); 03638 if (err) 03639 return (err); 03640 03641 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 03642 dsl_dataset_set_reservation_check, 03643 dsl_dataset_set_reservation_sync, ds, &psa, 0); 03644 03645 dsl_dataset_rele(ds, FTAG); 03646 return (err); 03647 } 03648 03649 typedef struct zfs_hold_cleanup_arg { 03650 dsl_pool_t *dp; 03651 uint64_t dsobj; 03652 char htag[MAXNAMELEN]; 03653 } zfs_hold_cleanup_arg_t; 03654 03655 static void 03656 dsl_dataset_user_release_onexit(void *arg) 03657 { 03658 zfs_hold_cleanup_arg_t *ca = arg; 03659 03660 (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag, 03661 B_TRUE); 03662 kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); 03663 } 03664 03665 void 03666 dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, 03667 minor_t minor) 03668 { 03669 zfs_hold_cleanup_arg_t *ca; 03670 03671 ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); 03672 ca->dp = ds->ds_dir->dd_pool; 03673 ca->dsobj = ds->ds_object; 03674 (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); 03675 VERIFY3U(0, ==, zfs_onexit_add_cb(minor, 03676 dsl_dataset_user_release_onexit, ca, NULL)); 03677 } 03678 03684 static int 03685 dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) 03686 { 03687 dsl_dataset_t *ds = arg1; 03688 struct dsl_ds_holdarg *ha = arg2; 03689 char *htag = ha->htag; 03690 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 03691 int error = 0; 03692 03693 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 03694 return (ENOTSUP); 03695 03696 if (!dsl_dataset_is_snapshot(ds)) 03697 return (EINVAL); 03698 03699 /* tags must be unique */ 03700 mutex_enter(&ds->ds_lock); 03701 if (ds->ds_phys->ds_userrefs_obj) { 03702 error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, 03703 8, 1, tx); 03704 if (error == 0) 03705 error = EEXIST; 03706 else if (error == ENOENT) 03707 error = 0; 03708 } 03709 mutex_exit(&ds->ds_lock); 03710 03711 if (error == 0 && ha->temphold && 03712 strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) 03713 error = E2BIG; 03714 03715 return (error); 03716 } 03717 03718 void 03719 dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) 03720 { 03721 dsl_dataset_t *ds = arg1; 03722 struct dsl_ds_holdarg *ha = arg2; 03723 char *htag = ha->htag; 03724 dsl_pool_t *dp = ds->ds_dir->dd_pool; 03725 objset_t *mos = dp->dp_meta_objset; 03726 uint64_t now = gethrestime_sec(); 03727 uint64_t zapobj; 03728 03729 mutex_enter(&ds->ds_lock); 03730 if (ds->ds_phys->ds_userrefs_obj == 0) { 03731 /* 03732 * This is the first user hold for this dataset. Create 03733 * the userrefs zap object. 03734 */ 03735 dmu_buf_will_dirty(ds->ds_dbuf, tx); 03736 zapobj = ds->ds_phys->ds_userrefs_obj = 03737 zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); 03738 } else { 03739 zapobj = ds->ds_phys->ds_userrefs_obj; 03740 } 03741 ds->ds_userrefs++; 03742 mutex_exit(&ds->ds_lock); 03743 03744 VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); 03745 03746 if (ha->temphold) { 03747 VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object, 03748 htag, &now, tx)); 03749 } 03750 03751 spa_history_log_internal(LOG_DS_USER_HOLD, 03752 dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, 03753 (int)ha->temphold, ds->ds_object); 03754 } 03755 03756 static int 03757 dsl_dataset_user_hold_one(const char *dsname, void *arg) 03758 { 03759 struct dsl_ds_holdarg *ha = arg; 03760 dsl_dataset_t *ds; 03761 int error; 03762 char *name; 03763 03764 /* alloc a buffer to hold dsname@snapname plus terminating NULL */ 03765 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 03766 error = dsl_dataset_hold(name, ha->dstg, &ds); 03767 strfree(name); 03768 if (error == 0) { 03769 ha->gotone = B_TRUE; 03770 dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, 03771 dsl_dataset_user_hold_sync, ds, ha, 0); 03772 } else if (error == ENOENT && ha->recursive) { 03773 error = 0; 03774 } else { 03775 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 03776 } 03777 return (error); 03778 } 03779 03780 int 03781 dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, 03782 boolean_t temphold) 03783 { 03784 struct dsl_ds_holdarg *ha; 03785 int error; 03786 03787 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 03788 ha->htag = htag; 03789 ha->temphold = temphold; 03790 error = dsl_sync_task_do(ds->ds_dir->dd_pool, 03791 dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync, 03792 ds, ha, 0); 03793 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 03794 03795 return (error); 03796 } 03797 03798 int 03799 dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, 03800 boolean_t recursive, boolean_t temphold, int cleanup_fd) 03801 { 03802 struct dsl_ds_holdarg *ha; 03803 dsl_sync_task_t *dst; 03804 spa_t *spa; 03805 int error; 03806 minor_t minor = 0; 03807 03808 if (cleanup_fd != -1) { 03809 /* Currently we only support cleanup-on-exit of tempholds. */ 03810 if (!temphold) 03811 return (EINVAL); 03812 error = zfs_onexit_fd_hold(cleanup_fd, &minor); 03813 if (error) 03814 return (error); 03815 } 03816 03817 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 03818 03819 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 03820 03821 error = spa_open(dsname, &spa, FTAG); 03822 if (error) { 03823 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 03824 if (cleanup_fd != -1) 03825 zfs_onexit_fd_rele(cleanup_fd); 03826 return (error); 03827 } 03828 03829 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 03830 ha->htag = htag; 03831 ha->snapname = snapname; 03832 ha->recursive = recursive; 03833 ha->temphold = temphold; 03834 03835 if (recursive) { 03836 error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, 03837 ha, DS_FIND_CHILDREN); 03838 } else { 03839 error = dsl_dataset_user_hold_one(dsname, ha); 03840 } 03841 if (error == 0) 03842 error = dsl_sync_task_group_wait(ha->dstg); 03843 03844 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 03845 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 03846 dsl_dataset_t *ds = dst->dst_arg1; 03847 03848 if (dst->dst_err) { 03849 dsl_dataset_name(ds, ha->failed); 03850 *strchr(ha->failed, '@') = '\0'; 03851 } else if (error == 0 && minor != 0 && temphold) { 03852 /* 03853 * If this hold is to be released upon process exit, 03854 * register that action now. 03855 */ 03856 dsl_register_onexit_hold_cleanup(ds, htag, minor); 03857 } 03858 dsl_dataset_rele(ds, ha->dstg); 03859 } 03860 03861 if (error == 0 && recursive && !ha->gotone) 03862 error = ENOENT; 03863 03864 if (error) 03865 (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); 03866 03867 dsl_sync_task_group_destroy(ha->dstg); 03868 03869 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 03870 spa_close(spa, FTAG); 03871 if (cleanup_fd != -1) 03872 zfs_onexit_fd_rele(cleanup_fd); 03873 return (error); 03874 } 03875 03876 struct dsl_ds_releasearg { 03877 dsl_dataset_t *ds; 03878 const char *htag; 03879 boolean_t own; /* do we own or just hold ds? */ 03880 }; 03881 03882 static int 03883 dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, 03884 boolean_t *might_destroy) 03885 { 03886 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 03887 uint64_t zapobj; 03888 uint64_t tmp; 03889 int error; 03890 03891 *might_destroy = B_FALSE; 03892 03893 mutex_enter(&ds->ds_lock); 03894 zapobj = ds->ds_phys->ds_userrefs_obj; 03895 if (zapobj == 0) { 03896 /* The tag can't possibly exist */ 03897 mutex_exit(&ds->ds_lock); 03898 return (ESRCH); 03899 } 03900 03901 /* Make sure the tag exists */ 03902 error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); 03903 if (error) { 03904 mutex_exit(&ds->ds_lock); 03905 if (error == ENOENT) 03906 error = ESRCH; 03907 return (error); 03908 } 03909 03910 if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && 03911 DS_IS_DEFER_DESTROY(ds)) 03912 *might_destroy = B_TRUE; 03913 03914 mutex_exit(&ds->ds_lock); 03915 return (0); 03916 } 03917 03918 static int 03919 dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) 03920 { 03921 struct dsl_ds_releasearg *ra = arg1; 03922 dsl_dataset_t *ds = ra->ds; 03923 boolean_t might_destroy; 03924 int error; 03925 03926 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 03927 return (ENOTSUP); 03928 03929 error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); 03930 if (error) 03931 return (error); 03932 03933 if (might_destroy) { 03934 struct dsl_ds_destroyarg dsda = {0}; 03935 03936 if (dmu_tx_is_syncing(tx)) { 03937 /* 03938 * If we're not prepared to remove the snapshot, 03939 * we can't allow the release to happen right now. 03940 */ 03941 if (!ra->own) 03942 return (EBUSY); 03943 } 03944 dsda.ds = ds; 03945 dsda.releasing = B_TRUE; 03946 return (dsl_dataset_destroy_check(&dsda, tag, tx)); 03947 } 03948 03949 return (0); 03950 } 03951 03952 static void 03953 dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) 03954 { 03955 struct dsl_ds_releasearg *ra = arg1; 03956 dsl_dataset_t *ds = ra->ds; 03957 dsl_pool_t *dp = ds->ds_dir->dd_pool; 03958 objset_t *mos = dp->dp_meta_objset; 03959 uint64_t zapobj; 03960 uint64_t dsobj = ds->ds_object; 03961 uint64_t refs; 03962 int error; 03963 03964 mutex_enter(&ds->ds_lock); 03965 ds->ds_userrefs--; 03966 refs = ds->ds_userrefs; 03967 mutex_exit(&ds->ds_lock); 03968 error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx); 03969 VERIFY(error == 0 || error == ENOENT); 03970 zapobj = ds->ds_phys->ds_userrefs_obj; 03971 VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); 03972 03973 spa_history_log_internal(LOG_DS_USER_RELEASE, 03974 dp->dp_spa, tx, "<%s> %lld dataset = %llu", 03975 ra->htag, (longlong_t)refs, dsobj); 03976 03977 if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && 03978 DS_IS_DEFER_DESTROY(ds)) { 03979 struct dsl_ds_destroyarg dsda = {0}; 03980 03981 ASSERT(ra->own); 03982 dsda.ds = ds; 03983 dsda.releasing = B_TRUE; 03984 /* We already did the destroy_check */ 03985 dsl_dataset_destroy_sync(&dsda, tag, tx); 03986 } 03987 } 03988 03989 static int 03990 dsl_dataset_user_release_one(const char *dsname, void *arg) 03991 { 03992 struct dsl_ds_holdarg *ha = arg; 03993 struct dsl_ds_releasearg *ra; 03994 dsl_dataset_t *ds; 03995 int error; 03996 void *dtag = ha->dstg; 03997 char *name; 03998 boolean_t own = B_FALSE; 03999 boolean_t might_destroy; 04000 04001 /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ 04002 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 04003 error = dsl_dataset_hold(name, dtag, &ds); 04004 strfree(name); 04005 if (error == ENOENT && ha->recursive) 04006 return (0); 04007 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 04008 if (error) 04009 return (error); 04010 04011 ha->gotone = B_TRUE; 04012 04013 ASSERT(dsl_dataset_is_snapshot(ds)); 04014 04015 error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); 04016 if (error) { 04017 dsl_dataset_rele(ds, dtag); 04018 return (error); 04019 } 04020 04021 if (might_destroy) { 04022 #ifdef _KERNEL 04023 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 04024 error = zfs_unmount_snap(name, NULL); 04025 strfree(name); 04026 if (error) { 04027 dsl_dataset_rele(ds, dtag); 04028 return (error); 04029 } 04030 #endif 04031 if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { 04032 dsl_dataset_rele(ds, dtag); 04033 return (EBUSY); 04034 } else { 04035 own = B_TRUE; 04036 dsl_dataset_make_exclusive(ds, dtag); 04037 } 04038 } 04039 04040 ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); 04041 ra->ds = ds; 04042 ra->htag = ha->htag; 04043 ra->own = own; 04044 dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, 04045 dsl_dataset_user_release_sync, ra, dtag, 0); 04046 04047 return (0); 04048 } 04049 04050 int 04051 dsl_dataset_user_release(char *dsname, char *snapname, char *htag, 04052 boolean_t recursive) 04053 { 04054 struct dsl_ds_holdarg *ha; 04055 dsl_sync_task_t *dst; 04056 spa_t *spa; 04057 int error; 04058 04059 top: 04060 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 04061 04062 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 04063 04064 error = spa_open(dsname, &spa, FTAG); 04065 if (error) { 04066 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 04067 return (error); 04068 } 04069 04070 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 04071 ha->htag = htag; 04072 ha->snapname = snapname; 04073 ha->recursive = recursive; 04074 if (recursive) { 04075 error = dmu_objset_find(dsname, dsl_dataset_user_release_one, 04076 ha, DS_FIND_CHILDREN); 04077 } else { 04078 error = dsl_dataset_user_release_one(dsname, ha); 04079 } 04080 if (error == 0) 04081 error = dsl_sync_task_group_wait(ha->dstg); 04082 04083 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 04084 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 04085 struct dsl_ds_releasearg *ra = dst->dst_arg1; 04086 dsl_dataset_t *ds = ra->ds; 04087 04088 if (dst->dst_err) 04089 dsl_dataset_name(ds, ha->failed); 04090 04091 if (ra->own) 04092 dsl_dataset_disown(ds, ha->dstg); 04093 else 04094 dsl_dataset_rele(ds, ha->dstg); 04095 04096 kmem_free(ra, sizeof (struct dsl_ds_releasearg)); 04097 } 04098 04099 if (error == 0 && recursive && !ha->gotone) 04100 error = ENOENT; 04101 04102 if (error && error != EBUSY) 04103 (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); 04104 04105 dsl_sync_task_group_destroy(ha->dstg); 04106 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 04107 spa_close(spa, FTAG); 04108 04109 /* 04110 * We can get EBUSY if we were racing with deferred destroy and 04111 * dsl_dataset_user_release_check() hadn't done the necessary 04112 * open context setup. We can also get EBUSY if we're racing 04113 * with destroy and that thread is the ds_owner. Either way 04114 * the busy condition should be transient, and we should retry 04115 * the release operation. 04116 */ 04117 if (error == EBUSY) 04118 goto top; 04119 04120 return (error); 04121 } 04122 04127 int 04128 dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag, 04129 boolean_t retry) 04130 { 04131 dsl_dataset_t *ds; 04132 char *snap; 04133 char *name; 04134 int namelen; 04135 int error; 04136 04137 do { 04138 rw_enter(&dp->dp_config_rwlock, RW_READER); 04139 error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); 04140 rw_exit(&dp->dp_config_rwlock); 04141 if (error) 04142 return (error); 04143 namelen = dsl_dataset_namelen(ds)+1; 04144 name = kmem_alloc(namelen, KM_SLEEP); 04145 dsl_dataset_name(ds, name); 04146 dsl_dataset_rele(ds, FTAG); 04147 04148 snap = strchr(name, '@'); 04149 *snap = '\0'; 04150 ++snap; 04151 error = dsl_dataset_user_release(name, snap, htag, B_FALSE); 04152 kmem_free(name, namelen); 04153 04154 /* 04155 * The object can't have been destroyed because we have a hold, 04156 * but it might have been renamed, resulting in ENOENT. Retry 04157 * if we've been requested to do so. 04158 * 04159 * It would be nice if we could use the dsobj all the way 04160 * through and avoid ENOENT entirely. But we might need to 04161 * unmount the snapshot, and there's currently no way to lookup 04162 * a vfsp using a ZFS object id. 04163 */ 04164 } while ((error == ENOENT) && retry); 04165 04166 return (error); 04167 } 04168 04169 int 04170 dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) 04171 { 04172 dsl_dataset_t *ds; 04173 int err; 04174 04175 err = dsl_dataset_hold(dsname, FTAG, &ds); 04176 if (err) 04177 return (err); 04178 04179 VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); 04180 if (ds->ds_phys->ds_userrefs_obj != 0) { 04181 zap_attribute_t *za; 04182 zap_cursor_t zc; 04183 04184 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 04185 for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, 04186 ds->ds_phys->ds_userrefs_obj); 04187 zap_cursor_retrieve(&zc, za) == 0; 04188 zap_cursor_advance(&zc)) { 04189 VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, 04190 za->za_first_integer)); 04191 } 04192 zap_cursor_fini(&zc); 04193 kmem_free(za, sizeof (zap_attribute_t)); 04194 } 04195 dsl_dataset_rele(ds, FTAG); 04196 return (0); 04197 } 04198 04205 /* ARGSUSED */ 04206 int 04207 dsl_destroy_inconsistent(const char *dsname, void *arg) 04208 { 04209 dsl_dataset_t *ds; 04210 04211 if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { 04212 if (DS_IS_INCONSISTENT(ds)) 04213 (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); 04214 else 04215 dsl_dataset_disown(ds, FTAG); 04216 } 04217 return (0); 04218 } 04219 04220 /* 04221 * Return (in *usedp) the amount of space written in new that is not 04222 * present in oldsnap. New may be a snapshot or the head. Old must be 04223 * a snapshot before new, in new's filesystem (or its origin). If not then 04224 * fail and return EINVAL. 04225 * 04226 * The written space is calculated by considering two components: First, we 04227 * ignore any freed space, and calculate the written as new's used space 04228 * minus old's used space. Next, we add in the amount of space that was freed 04229 * between the two snapshots, thus reducing new's used space relative to old's. 04230 * Specifically, this is the space that was born before old->ds_creation_txg, 04231 * and freed before new (ie. on new's deadlist or a previous deadlist). 04232 * 04233 * space freed [---------------------] 04234 * snapshots ---O-------O--------O-------O------ 04235 * oldsnap new 04236 */ 04237 int 04238 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 04239 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 04240 { 04241 int err = 0; 04242 uint64_t snapobj; 04243 dsl_pool_t *dp = new->ds_dir->dd_pool; 04244 04245 *usedp = 0; 04246 *usedp += new->ds_phys->ds_referenced_bytes; 04247 *usedp -= oldsnap->ds_phys->ds_referenced_bytes; 04248 04249 *compp = 0; 04250 *compp += new->ds_phys->ds_compressed_bytes; 04251 *compp -= oldsnap->ds_phys->ds_compressed_bytes; 04252 04253 *uncompp = 0; 04254 *uncompp += new->ds_phys->ds_uncompressed_bytes; 04255 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; 04256 04257 rw_enter(&dp->dp_config_rwlock, RW_READER); 04258 snapobj = new->ds_object; 04259 while (snapobj != oldsnap->ds_object) { 04260 dsl_dataset_t *snap; 04261 uint64_t used, comp, uncomp; 04262 04263 if (snapobj == new->ds_object) { 04264 snap = new; 04265 } else { 04266 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 04267 if (err != 0) 04268 break; 04269 } 04270 04271 if (snap->ds_phys->ds_prev_snap_txg == 04272 oldsnap->ds_phys->ds_creation_txg) { 04273 /* 04274 * The blocks in the deadlist can not be born after 04275 * ds_prev_snap_txg, so get the whole deadlist space, 04276 * which is more efficient (especially for old-format 04277 * deadlists). Unfortunately the deadlist code 04278 * doesn't have enough information to make this 04279 * optimization itself. 04280 */ 04281 dsl_deadlist_space(&snap->ds_deadlist, 04282 &used, &comp, &uncomp); 04283 } else { 04284 dsl_deadlist_space_range(&snap->ds_deadlist, 04285 0, oldsnap->ds_phys->ds_creation_txg, 04286 &used, &comp, &uncomp); 04287 } 04288 *usedp += used; 04289 *compp += comp; 04290 *uncompp += uncomp; 04291 04292 /* 04293 * If we get to the beginning of the chain of snapshots 04294 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 04295 * was not a snapshot of/before new. 04296 */ 04297 snapobj = snap->ds_phys->ds_prev_snap_obj; 04298 if (snap != new) 04299 dsl_dataset_rele(snap, FTAG); 04300 if (snapobj == 0) { 04301 err = EINVAL; 04302 break; 04303 } 04304 04305 } 04306 rw_exit(&dp->dp_config_rwlock); 04307 return (err); 04308 } 04309 04310 /* 04311 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 04312 * lastsnap, and all snapshots in between are deleted. 04313 * 04314 * blocks that would be freed [---------------------------] 04315 * snapshots ---O-------O--------O-------O--------O 04316 * firstsnap lastsnap 04317 * 04318 * This is the set of blocks that were born after the snap before firstsnap, 04319 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 04320 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 04321 * We calculate this by iterating over the relevant deadlists (from the snap 04322 * after lastsnap, backward to the snap after firstsnap), summing up the 04323 * space on the deadlist that was born after the snap before firstsnap. 04324 */ 04325 int 04326 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 04327 dsl_dataset_t *lastsnap, 04328 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 04329 { 04330 int err = 0; 04331 uint64_t snapobj; 04332 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 04333 04334 ASSERT(dsl_dataset_is_snapshot(firstsnap)); 04335 ASSERT(dsl_dataset_is_snapshot(lastsnap)); 04336 04337 /* 04338 * Check that the snapshots are in the same dsl_dir, and firstsnap 04339 * is before lastsnap. 04340 */ 04341 if (firstsnap->ds_dir != lastsnap->ds_dir || 04342 firstsnap->ds_phys->ds_creation_txg > 04343 lastsnap->ds_phys->ds_creation_txg) 04344 return (EINVAL); 04345 04346 *usedp = *compp = *uncompp = 0; 04347 04348 rw_enter(&dp->dp_config_rwlock, RW_READER); 04349 snapobj = lastsnap->ds_phys->ds_next_snap_obj; 04350 while (snapobj != firstsnap->ds_object) { 04351 dsl_dataset_t *ds; 04352 uint64_t used, comp, uncomp; 04353 04354 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 04355 if (err != 0) 04356 break; 04357 04358 dsl_deadlist_space_range(&ds->ds_deadlist, 04359 firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, 04360 &used, &comp, &uncomp); 04361 *usedp += used; 04362 *compp += comp; 04363 *uncompp += uncomp; 04364 04365 snapobj = ds->ds_phys->ds_prev_snap_obj; 04366 ASSERT3U(snapobj, !=, 0); 04367 dsl_dataset_rele(ds, FTAG); 04368 } 04369 rw_exit(&dp->dp_config_rwlock); 04370 return (err); 04371 }