FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 00023 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 00024 * All rights reserved. 00025 */ 00026 00027 #include <sys/dmu.h> 00028 #include <sys/dmu_objset.h> 00029 #include <sys/dmu_tx.h> 00030 #include <sys/dsl_dataset.h> 00031 #include <sys/dsl_dir.h> 00032 #include <sys/dsl_prop.h> 00033 #include <sys/dsl_synctask.h> 00034 #include <sys/dsl_deleg.h> 00035 #include <sys/spa.h> 00036 #include <sys/metaslab.h> 00037 #include <sys/zap.h> 00038 #include <sys/zio.h> 00039 #include <sys/arc.h> 00040 #include <sys/sunddi.h> 00041 #include <sys/zvol.h> 00042 #ifdef _KERNEL 00043 #include <sys/zfs_vfsops.h> 00044 #endif 00045 #include "zfs_namecheck.h" 00046 00047 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); 00048 static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); 00049 00050 00051 /* ARGSUSED */ 00052 static void 00053 dsl_dir_evict(dmu_buf_t *db, void *arg) 00054 { 00055 dsl_dir_t *dd = arg; 00056 dsl_pool_t *dp = dd->dd_pool; 00057 int t; 00058 00059 for (t = 0; t < TXG_SIZE; t++) { 00060 ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 00061 ASSERT(dd->dd_tempreserved[t] == 0); 00062 ASSERT(dd->dd_space_towrite[t] == 0); 00063 } 00064 00065 if (dd->dd_parent) 00066 dsl_dir_close(dd->dd_parent, dd); 00067 00068 spa_close(dd->dd_pool->dp_spa, dd); 00069 00070 /* 00071 * The props callback list should have been cleaned up by 00072 * objset_evict(). 00073 */ 00074 list_destroy(&dd->dd_prop_cbs); 00075 mutex_destroy(&dd->dd_lock); 00076 kmem_free(dd, sizeof (dsl_dir_t)); 00077 } 00078 00079 int 00080 dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, 00081 const char *tail, void *tag, dsl_dir_t **ddp) 00082 { 00083 dmu_buf_t *dbuf; 00084 dsl_dir_t *dd; 00085 int err; 00086 00087 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 00088 dsl_pool_sync_context(dp)); 00089 00090 err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); 00091 if (err) 00092 return (err); 00093 dd = dmu_buf_get_user(dbuf); 00094 #ifdef ZFS_DEBUG 00095 { 00096 dmu_object_info_t doi; 00097 dmu_object_info_from_db(dbuf, &doi); 00098 ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR); 00099 ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); 00100 } 00101 #endif 00102 if (dd == NULL) { 00103 dsl_dir_t *winner; 00104 00105 dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); 00106 dd->dd_object = ddobj; 00107 dd->dd_dbuf = dbuf; 00108 dd->dd_pool = dp; 00109 dd->dd_phys = dbuf->db_data; 00110 mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); 00111 00112 list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), 00113 offsetof(dsl_prop_cb_record_t, cbr_node)); 00114 00115 dsl_dir_snap_cmtime_update(dd); 00116 00117 if (dd->dd_phys->dd_parent_obj) { 00118 err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, 00119 NULL, dd, &dd->dd_parent); 00120 if (err) 00121 goto errout; 00122 if (tail) { 00123 #ifdef ZFS_DEBUG 00124 uint64_t foundobj; 00125 00126 err = zap_lookup(dp->dp_meta_objset, 00127 dd->dd_parent->dd_phys->dd_child_dir_zapobj, 00128 tail, sizeof (foundobj), 1, &foundobj); 00129 ASSERT(err || foundobj == ddobj); 00130 #endif 00131 (void) strcpy(dd->dd_myname, tail); 00132 } else { 00133 err = zap_value_search(dp->dp_meta_objset, 00134 dd->dd_parent->dd_phys->dd_child_dir_zapobj, 00135 ddobj, 0, dd->dd_myname); 00136 } 00137 if (err) 00138 goto errout; 00139 } else { 00140 (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); 00141 } 00142 00143 if (dsl_dir_is_clone(dd)) { 00144 dmu_buf_t *origin_bonus; 00145 dsl_dataset_phys_t *origin_phys; 00146 00147 /* 00148 * We can't open the origin dataset, because 00149 * that would require opening this dsl_dir. 00150 * Just look at its phys directly instead. 00151 */ 00152 err = dmu_bonus_hold(dp->dp_meta_objset, 00153 dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus); 00154 if (err) 00155 goto errout; 00156 origin_phys = origin_bonus->db_data; 00157 dd->dd_origin_txg = 00158 origin_phys->ds_creation_txg; 00159 dmu_buf_rele(origin_bonus, FTAG); 00160 } 00161 00162 winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, 00163 dsl_dir_evict); 00164 if (winner) { 00165 if (dd->dd_parent) 00166 dsl_dir_close(dd->dd_parent, dd); 00167 mutex_destroy(&dd->dd_lock); 00168 kmem_free(dd, sizeof (dsl_dir_t)); 00169 dd = winner; 00170 } else { 00171 spa_open_ref(dp->dp_spa, dd); 00172 } 00173 } 00174 00175 /* 00176 * The dsl_dir_t has both open-to-close and instantiate-to-evict 00177 * holds on the spa. We need the open-to-close holds because 00178 * otherwise the spa_refcnt wouldn't change when we open a 00179 * dir which the spa also has open, so we could incorrectly 00180 * think it was OK to unload/export/destroy the pool. We need 00181 * the instantiate-to-evict hold because the dsl_dir_t has a 00182 * pointer to the dd_pool, which has a pointer to the spa_t. 00183 */ 00184 spa_open_ref(dp->dp_spa, tag); 00185 ASSERT3P(dd->dd_pool, ==, dp); 00186 ASSERT3U(dd->dd_object, ==, ddobj); 00187 ASSERT3P(dd->dd_dbuf, ==, dbuf); 00188 *ddp = dd; 00189 return (0); 00190 00191 errout: 00192 if (dd->dd_parent) 00193 dsl_dir_close(dd->dd_parent, dd); 00194 mutex_destroy(&dd->dd_lock); 00195 kmem_free(dd, sizeof (dsl_dir_t)); 00196 dmu_buf_rele(dbuf, tag); 00197 return (err); 00198 } 00199 00200 void 00201 dsl_dir_close(dsl_dir_t *dd, void *tag) 00202 { 00203 dprintf_dd(dd, "%s\n", ""); 00204 spa_close(dd->dd_pool->dp_spa, tag); 00205 dmu_buf_rele(dd->dd_dbuf, tag); 00206 } 00207 00209 void 00210 dsl_dir_name(dsl_dir_t *dd, char *buf) 00211 { 00212 if (dd->dd_parent) { 00213 dsl_dir_name(dd->dd_parent, buf); 00214 (void) strcat(buf, "/"); 00215 } else { 00216 buf[0] = '\0'; 00217 } 00218 if (!MUTEX_HELD(&dd->dd_lock)) { 00219 /* 00220 * recursive mutex so that we can use 00221 * dprintf_dd() with dd_lock held 00222 */ 00223 mutex_enter(&dd->dd_lock); 00224 (void) strcat(buf, dd->dd_myname); 00225 mutex_exit(&dd->dd_lock); 00226 } else { 00227 (void) strcat(buf, dd->dd_myname); 00228 } 00229 } 00230 00232 int 00233 dsl_dir_namelen(dsl_dir_t *dd) 00234 { 00235 int result = 0; 00236 00237 if (dd->dd_parent) { 00238 /* parent's name + 1 for the "/" */ 00239 result = dsl_dir_namelen(dd->dd_parent) + 1; 00240 } 00241 00242 if (!MUTEX_HELD(&dd->dd_lock)) { 00243 /* see dsl_dir_name */ 00244 mutex_enter(&dd->dd_lock); 00245 result += strlen(dd->dd_myname); 00246 mutex_exit(&dd->dd_lock); 00247 } else { 00248 result += strlen(dd->dd_myname); 00249 } 00250 00251 return (result); 00252 } 00253 00254 static int 00255 getcomponent(const char *path, char *component, const char **nextp) 00256 { 00257 char *p; 00258 if ((path == NULL) || (path[0] == '\0')) 00259 return (ENOENT); 00260 /* This would be a good place to reserve some namespace... */ 00261 p = strpbrk(path, "/@"); 00262 if (p && (p[1] == '/' || p[1] == '@')) { 00263 /* two separators in a row */ 00264 return (EINVAL); 00265 } 00266 if (p == NULL || p == path) { 00267 /* 00268 * if the first thing is an @ or /, it had better be an 00269 * @ and it had better not have any more ats or slashes, 00270 * and it had better have something after the @. 00271 */ 00272 if (p != NULL && 00273 (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) 00274 return (EINVAL); 00275 if (strlen(path) >= MAXNAMELEN) 00276 return (ENAMETOOLONG); 00277 (void) strcpy(component, path); 00278 p = NULL; 00279 } else if (p[0] == '/') { 00280 if (p-path >= MAXNAMELEN) 00281 return (ENAMETOOLONG); 00282 (void) strncpy(component, path, p - path); 00283 component[p-path] = '\0'; 00284 p++; 00285 } else if (p[0] == '@') { 00286 /* 00287 * if the next separator is an @, there better not be 00288 * any more slashes. 00289 */ 00290 if (strchr(path, '/')) 00291 return (EINVAL); 00292 if (p-path >= MAXNAMELEN) 00293 return (ENAMETOOLONG); 00294 (void) strncpy(component, path, p - path); 00295 component[p-path] = '\0'; 00296 } else { 00297 ASSERT(!"invalid p"); 00298 } 00299 *nextp = p; 00300 return (0); 00301 } 00302 00307 int 00308 dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, 00309 dsl_dir_t **ddp, const char **tailp) 00310 { 00311 char buf[MAXNAMELEN]; 00312 const char *next, *nextnext = NULL; 00313 int err; 00314 dsl_dir_t *dd; 00315 dsl_pool_t *dp; 00316 uint64_t ddobj; 00317 int openedspa = FALSE; 00318 00319 dprintf("%s\n", name); 00320 00321 err = getcomponent(name, buf, &next); 00322 if (err) 00323 return (err); 00324 if (spa == NULL) { 00325 err = spa_open(buf, &spa, FTAG); 00326 if (err) { 00327 dprintf("spa_open(%s) failed\n", buf); 00328 return (err); 00329 } 00330 openedspa = TRUE; 00331 00332 /* XXX this assertion belongs in spa_open */ 00333 ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); 00334 } 00335 00336 dp = spa_get_dsl(spa); 00337 00338 rw_enter(&dp->dp_config_rwlock, RW_READER); 00339 err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); 00340 if (err) { 00341 rw_exit(&dp->dp_config_rwlock); 00342 if (openedspa) 00343 spa_close(spa, FTAG); 00344 return (err); 00345 } 00346 00347 while (next != NULL) { 00348 dsl_dir_t *child_ds; 00349 err = getcomponent(next, buf, &nextnext); 00350 if (err) 00351 break; 00352 ASSERT(next[0] != '\0'); 00353 if (next[0] == '@') 00354 break; 00355 dprintf("looking up %s in obj%lld\n", 00356 buf, dd->dd_phys->dd_child_dir_zapobj); 00357 00358 err = zap_lookup(dp->dp_meta_objset, 00359 dd->dd_phys->dd_child_dir_zapobj, 00360 buf, sizeof (ddobj), 1, &ddobj); 00361 if (err) { 00362 if (err == ENOENT) 00363 err = 0; 00364 break; 00365 } 00366 00367 err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds); 00368 if (err) 00369 break; 00370 dsl_dir_close(dd, tag); 00371 dd = child_ds; 00372 next = nextnext; 00373 } 00374 rw_exit(&dp->dp_config_rwlock); 00375 00376 if (err) { 00377 dsl_dir_close(dd, tag); 00378 if (openedspa) 00379 spa_close(spa, FTAG); 00380 return (err); 00381 } 00382 00383 /* 00384 * It's an error if there's more than one component left, or 00385 * tailp==NULL and there's any component left. 00386 */ 00387 if (next != NULL && 00388 (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { 00389 /* bad path name */ 00390 dsl_dir_close(dd, tag); 00391 dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); 00392 err = ENOENT; 00393 } 00394 if (tailp) 00395 *tailp = next; 00396 if (openedspa) 00397 spa_close(spa, FTAG); 00398 *ddp = dd; 00399 return (err); 00400 } 00401 00408 int 00409 dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) 00410 { 00411 return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp)); 00412 } 00413 00414 uint64_t 00415 dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, 00416 dmu_tx_t *tx) 00417 { 00418 objset_t *mos = dp->dp_meta_objset; 00419 uint64_t ddobj; 00420 dsl_dir_phys_t *ddphys; 00421 dmu_buf_t *dbuf; 00422 00423 ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, 00424 DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); 00425 if (pds) { 00426 VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, 00427 name, sizeof (uint64_t), 1, &ddobj, tx)); 00428 } else { 00429 /* it's the root dir */ 00430 VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, 00431 DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx)); 00432 } 00433 VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); 00434 dmu_buf_will_dirty(dbuf, tx); 00435 ddphys = dbuf->db_data; 00436 00437 ddphys->dd_creation_time = gethrestime_sec(); 00438 if (pds) 00439 ddphys->dd_parent_obj = pds->dd_object; 00440 ddphys->dd_props_zapobj = zap_create(mos, 00441 DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 00442 ddphys->dd_child_dir_zapobj = zap_create(mos, 00443 DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 00444 if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) 00445 ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; 00446 dmu_buf_rele(dbuf, FTAG); 00447 00448 return (ddobj); 00449 } 00450 00451 /* ARGSUSED */ 00452 int 00453 dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 00454 { 00455 dsl_dataset_t *ds = arg1; 00456 dsl_dir_t *dd = ds->ds_dir; 00457 dsl_pool_t *dp = dd->dd_pool; 00458 objset_t *mos = dp->dp_meta_objset; 00459 int err; 00460 uint64_t count; 00461 00462 /* 00463 * There should be exactly two holds, both from 00464 * dsl_dataset_destroy: one on the dd directory, and one on its 00465 * head ds. If there are more holds, then a concurrent thread is 00466 * performing a lookup inside this dir while we're trying to destroy 00467 * it. To minimize this possibility, we perform this check only 00468 * in syncing context and fail the operation if we encounter 00469 * additional holds. The dp_config_rwlock ensures that nobody else 00470 * opens it after we check. 00471 */ 00472 if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2) 00473 return (EBUSY); 00474 00475 err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count); 00476 if (err) 00477 return (err); 00478 if (count != 0) 00479 return (EEXIST); 00480 00481 return (0); 00482 } 00483 00484 void 00485 dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 00486 { 00487 dsl_dataset_t *ds = arg1; 00488 dsl_dir_t *dd = ds->ds_dir; 00489 objset_t *mos = dd->dd_pool->dp_meta_objset; 00490 dsl_prop_setarg_t psa; 00491 uint64_t value = 0; 00492 uint64_t obj; 00493 dd_used_t t; 00494 00495 ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); 00496 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 00497 00498 /* Remove our reservation. */ 00499 dsl_prop_setarg_init_uint64(&psa, "reservation", 00500 (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), 00501 &value); 00502 psa.psa_effective_value = 0; /* predict default value */ 00503 00504 dsl_dir_set_reservation_sync(ds, &psa, tx); 00505 00506 ASSERT0(dd->dd_phys->dd_used_bytes); 00507 ASSERT0(dd->dd_phys->dd_reserved); 00508 for (t = 0; t < DD_USED_NUM; t++) 00509 ASSERT0(dd->dd_phys->dd_used_breakdown[t]); 00510 00511 VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); 00512 VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); 00513 VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); 00514 VERIFY(0 == zap_remove(mos, 00515 dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); 00516 00517 obj = dd->dd_object; 00518 dsl_dir_close(dd, tag); 00519 VERIFY(0 == dmu_object_free(mos, obj, tx)); 00520 } 00521 00522 boolean_t 00523 dsl_dir_is_clone(dsl_dir_t *dd) 00524 { 00525 return (dd->dd_phys->dd_origin_obj && 00526 (dd->dd_pool->dp_origin_snap == NULL || 00527 dd->dd_phys->dd_origin_obj != 00528 dd->dd_pool->dp_origin_snap->ds_object)); 00529 } 00530 00531 void 00532 dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) 00533 { 00534 mutex_enter(&dd->dd_lock); 00535 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 00536 dd->dd_phys->dd_used_bytes); 00537 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota); 00538 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION, 00539 dd->dd_phys->dd_reserved); 00540 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 00541 dd->dd_phys->dd_compressed_bytes == 0 ? 100 : 00542 (dd->dd_phys->dd_uncompressed_bytes * 100 / 00543 dd->dd_phys->dd_compressed_bytes)); 00544 if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 00545 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP, 00546 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]); 00547 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS, 00548 dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]); 00549 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV, 00550 dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]); 00551 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD, 00552 dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] + 00553 dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]); 00554 } 00555 mutex_exit(&dd->dd_lock); 00556 00557 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 00558 if (dsl_dir_is_clone(dd)) { 00559 dsl_dataset_t *ds; 00560 char buf[MAXNAMELEN]; 00561 00562 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 00563 dd->dd_phys->dd_origin_obj, FTAG, &ds)); 00564 dsl_dataset_name(ds, buf); 00565 dsl_dataset_rele(ds, FTAG); 00566 dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); 00567 } 00568 rw_exit(&dd->dd_pool->dp_config_rwlock); 00569 } 00570 00571 void 00572 dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) 00573 { 00574 dsl_pool_t *dp = dd->dd_pool; 00575 00576 ASSERT(dd->dd_phys); 00577 00578 if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { 00579 /* up the hold count until we can be written out */ 00580 dmu_buf_add_ref(dd->dd_dbuf, dd); 00581 } 00582 } 00583 00584 static int64_t 00585 parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) 00586 { 00587 uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); 00588 uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); 00589 return (new_accounted - old_accounted); 00590 } 00591 00592 void 00593 dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) 00594 { 00595 ASSERT(dmu_tx_is_syncing(tx)); 00596 00597 mutex_enter(&dd->dd_lock); 00598 ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]); 00599 dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, 00600 dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); 00601 dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; 00602 mutex_exit(&dd->dd_lock); 00603 00604 /* release the hold from dsl_dir_dirty */ 00605 dmu_buf_rele(dd->dd_dbuf, dd); 00606 } 00607 00608 static uint64_t 00609 dsl_dir_space_towrite(dsl_dir_t *dd) 00610 { 00611 uint64_t space = 0; 00612 int i; 00613 00614 ASSERT(MUTEX_HELD(&dd->dd_lock)); 00615 00616 for (i = 0; i < TXG_SIZE; i++) { 00617 space += dd->dd_space_towrite[i&TXG_MASK]; 00618 ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); 00619 } 00620 return (space); 00621 } 00622 00628 uint64_t 00629 dsl_dir_space_available(dsl_dir_t *dd, 00630 dsl_dir_t *ancestor, int64_t delta, int ondiskonly) 00631 { 00632 uint64_t parentspace, myspace, quota, used; 00633 00634 /* 00635 * If there are no restrictions otherwise, assume we have 00636 * unlimited space available. 00637 */ 00638 quota = UINT64_MAX; 00639 parentspace = UINT64_MAX; 00640 00641 if (dd->dd_parent != NULL) { 00642 parentspace = dsl_dir_space_available(dd->dd_parent, 00643 ancestor, delta, ondiskonly); 00644 } 00645 00646 mutex_enter(&dd->dd_lock); 00647 if (dd->dd_phys->dd_quota != 0) 00648 quota = dd->dd_phys->dd_quota; 00649 used = dd->dd_phys->dd_used_bytes; 00650 if (!ondiskonly) 00651 used += dsl_dir_space_towrite(dd); 00652 00653 if (dd->dd_parent == NULL) { 00654 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE); 00655 quota = MIN(quota, poolsize); 00656 } 00657 00658 if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { 00659 /* 00660 * We have some space reserved, in addition to what our 00661 * parent gave us. 00662 */ 00663 parentspace += dd->dd_phys->dd_reserved - used; 00664 } 00665 00666 if (dd == ancestor) { 00667 ASSERT(delta <= 0); 00668 ASSERT(used >= -delta); 00669 used += delta; 00670 if (parentspace != UINT64_MAX) 00671 parentspace -= delta; 00672 } 00673 00674 if (used > quota) { 00675 /* over quota */ 00676 myspace = 0; 00677 } else { 00678 /* 00679 * the lesser of the space provided by our parent and 00680 * the space left in our quota 00681 */ 00682 myspace = MIN(parentspace, quota - used); 00683 } 00684 00685 mutex_exit(&dd->dd_lock); 00686 00687 return (myspace); 00688 } 00689 00690 struct tempreserve { 00691 list_node_t tr_node; 00692 dsl_pool_t *tr_dp; 00693 dsl_dir_t *tr_ds; 00694 uint64_t tr_size; 00695 }; 00696 00697 static int 00698 dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, 00699 boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list, 00700 dmu_tx_t *tx, boolean_t first) 00701 { 00702 uint64_t txg = tx->tx_txg; 00703 uint64_t est_inflight, used_on_disk, quota, parent_rsrv; 00704 uint64_t deferred = 0; 00705 struct tempreserve *tr; 00706 int retval = EDQUOT; 00707 int txgidx = txg & TXG_MASK; 00708 int i; 00709 uint64_t ref_rsrv = 0; 00710 00711 ASSERT3U(txg, !=, 0); 00712 ASSERT3S(asize, >, 0); 00713 00714 mutex_enter(&dd->dd_lock); 00715 00716 /* 00717 * Check against the dsl_dir's quota. We don't add in the delta 00718 * when checking for over-quota because they get one free hit. 00719 */ 00720 est_inflight = dsl_dir_space_towrite(dd); 00721 for (i = 0; i < TXG_SIZE; i++) 00722 est_inflight += dd->dd_tempreserved[i]; 00723 used_on_disk = dd->dd_phys->dd_used_bytes; 00724 00725 /* 00726 * On the first iteration, fetch the dataset's used-on-disk and 00727 * refreservation values. Also, if checkrefquota is set, test if 00728 * allocating this space would exceed the dataset's refquota. 00729 */ 00730 if (first && tx->tx_objset) { 00731 int error; 00732 dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset; 00733 00734 error = dsl_dataset_check_quota(ds, checkrefquota, 00735 asize, est_inflight, &used_on_disk, &ref_rsrv); 00736 if (error) { 00737 mutex_exit(&dd->dd_lock); 00738 return (error); 00739 } 00740 } 00741 00742 /* 00743 * If this transaction will result in a net free of space, 00744 * we want to let it through. 00745 */ 00746 if (ignorequota || netfree || dd->dd_phys->dd_quota == 0) 00747 quota = UINT64_MAX; 00748 else 00749 quota = dd->dd_phys->dd_quota; 00750 00751 /* 00752 * Adjust the quota against the actual pool size at the root 00753 * minus any outstanding deferred frees. 00754 * To ensure that it's possible to remove files from a full 00755 * pool without inducing transient overcommits, we throttle 00756 * netfree transactions against a quota that is slightly larger, 00757 * but still within the pool's allocation slop. In cases where 00758 * we're very close to full, this will allow a steady trickle of 00759 * removes to get through. 00760 */ 00761 if (dd->dd_parent == NULL) { 00762 spa_t *spa = dd->dd_pool->dp_spa; 00763 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); 00764 deferred = metaslab_class_get_deferred(spa_normal_class(spa)); 00765 if (poolsize - deferred < quota) { 00766 quota = poolsize - deferred; 00767 retval = ENOSPC; 00768 } 00769 } 00770 00771 /* 00772 * If they are requesting more space, and our current estimate 00773 * is over quota, they get to try again unless the actual 00774 * on-disk is over quota and there are no pending changes (which 00775 * may free up space for us). 00776 */ 00777 if (used_on_disk + est_inflight >= quota) { 00778 if (est_inflight > 0 || used_on_disk < quota || 00779 (retval == ENOSPC && used_on_disk < quota + deferred)) 00780 retval = ERESTART; 00781 dprintf_dd(dd, "failing: used=%lluK inflight = %lluK " 00782 "quota=%lluK tr=%lluK err=%d\n", 00783 used_on_disk>>10, est_inflight>>10, 00784 quota>>10, asize>>10, retval); 00785 mutex_exit(&dd->dd_lock); 00786 return (retval); 00787 } 00788 00789 /* We need to up our estimated delta before dropping dd_lock */ 00790 dd->dd_tempreserved[txgidx] += asize; 00791 00792 parent_rsrv = parent_delta(dd, used_on_disk + est_inflight, 00793 asize - ref_rsrv); 00794 mutex_exit(&dd->dd_lock); 00795 00796 tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); 00797 tr->tr_ds = dd; 00798 tr->tr_size = asize; 00799 list_insert_tail(tr_list, tr); 00800 00801 /* see if it's OK with our parent */ 00802 if (dd->dd_parent && parent_rsrv) { 00803 boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0); 00804 00805 return (dsl_dir_tempreserve_impl(dd->dd_parent, 00806 parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE)); 00807 } else { 00808 return (0); 00809 } 00810 } 00811 00818 int 00819 dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, 00820 uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx) 00821 { 00822 int err; 00823 list_t *tr_list; 00824 00825 if (asize == 0) { 00826 *tr_cookiep = NULL; 00827 return (0); 00828 } 00829 00830 tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); 00831 list_create(tr_list, sizeof (struct tempreserve), 00832 offsetof(struct tempreserve, tr_node)); 00833 ASSERT3S(asize, >, 0); 00834 ASSERT3S(fsize, >=, 0); 00835 00836 err = arc_tempreserve_space(lsize, tx->tx_txg); 00837 if (err == 0) { 00838 struct tempreserve *tr; 00839 00840 tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); 00841 tr->tr_size = lsize; 00842 list_insert_tail(tr_list, tr); 00843 00844 err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx); 00845 } else { 00846 if (err == EAGAIN) { 00847 txg_delay(dd->dd_pool, tx->tx_txg, 1); 00848 err = ERESTART; 00849 } 00850 dsl_pool_memory_pressure(dd->dd_pool); 00851 } 00852 00853 if (err == 0) { 00854 struct tempreserve *tr; 00855 00856 tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); 00857 tr->tr_dp = dd->dd_pool; 00858 tr->tr_size = asize; 00859 list_insert_tail(tr_list, tr); 00860 00861 err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, 00862 FALSE, asize > usize, tr_list, tx, TRUE); 00863 } 00864 00865 if (err) 00866 dsl_dir_tempreserve_clear(tr_list, tx); 00867 else 00868 *tr_cookiep = tr_list; 00869 00870 return (err); 00871 } 00872 00877 void 00878 dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) 00879 { 00880 int txgidx = tx->tx_txg & TXG_MASK; 00881 list_t *tr_list = tr_cookie; 00882 struct tempreserve *tr; 00883 00884 ASSERT3U(tx->tx_txg, !=, 0); 00885 00886 if (tr_cookie == NULL) 00887 return; 00888 00889 while (tr = list_head(tr_list)) { 00890 if (tr->tr_dp) { 00891 dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx); 00892 } else if (tr->tr_ds) { 00893 mutex_enter(&tr->tr_ds->dd_lock); 00894 ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, 00895 tr->tr_size); 00896 tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; 00897 mutex_exit(&tr->tr_ds->dd_lock); 00898 } else { 00899 arc_tempreserve_clear(tr->tr_size); 00900 } 00901 list_remove(tr_list, tr); 00902 kmem_free(tr, sizeof (struct tempreserve)); 00903 } 00904 00905 kmem_free(tr_list, sizeof (list_t)); 00906 } 00907 00908 static void 00909 dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) 00910 { 00911 int64_t parent_space; 00912 uint64_t est_used; 00913 00914 mutex_enter(&dd->dd_lock); 00915 if (space > 0) 00916 dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; 00917 00918 est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes; 00919 parent_space = parent_delta(dd, est_used, space); 00920 mutex_exit(&dd->dd_lock); 00921 00922 /* Make sure that we clean up dd_space_to* */ 00923 dsl_dir_dirty(dd, tx); 00924 00925 /* XXX this is potentially expensive and unnecessary... */ 00926 if (parent_space && dd->dd_parent) 00927 dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx); 00928 } 00929 00935 void 00936 dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) 00937 { 00938 dsl_pool_willuse_space(dd->dd_pool, space, tx); 00939 dsl_dir_willuse_space_impl(dd, space, tx); 00940 } 00941 00943 void 00944 dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, 00945 int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) 00946 { 00947 int64_t accounted_delta; 00948 boolean_t needlock = !MUTEX_HELD(&dd->dd_lock); 00949 00950 ASSERT(dmu_tx_is_syncing(tx)); 00951 ASSERT(type < DD_USED_NUM); 00952 00953 if (needlock) 00954 mutex_enter(&dd->dd_lock); 00955 accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used); 00956 ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used); 00957 ASSERT(compressed >= 0 || 00958 dd->dd_phys->dd_compressed_bytes >= -compressed); 00959 ASSERT(uncompressed >= 0 || 00960 dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); 00961 dmu_buf_will_dirty(dd->dd_dbuf, tx); 00962 dd->dd_phys->dd_used_bytes += used; 00963 dd->dd_phys->dd_uncompressed_bytes += uncompressed; 00964 dd->dd_phys->dd_compressed_bytes += compressed; 00965 00966 if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 00967 ASSERT(used > 0 || 00968 dd->dd_phys->dd_used_breakdown[type] >= -used); 00969 dd->dd_phys->dd_used_breakdown[type] += used; 00970 #ifdef DEBUG 00971 dd_used_t t; 00972 uint64_t u = 0; 00973 for (t = 0; t < DD_USED_NUM; t++) 00974 u += dd->dd_phys->dd_used_breakdown[t]; 00975 ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes); 00976 #endif 00977 } 00978 if (needlock) 00979 mutex_exit(&dd->dd_lock); 00980 00981 if (dd->dd_parent != NULL) { 00982 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, 00983 accounted_delta, compressed, uncompressed, tx); 00984 dsl_dir_transfer_space(dd->dd_parent, 00985 used - accounted_delta, 00986 DD_USED_CHILD_RSRV, DD_USED_CHILD, tx); 00987 } 00988 } 00989 00990 void 00991 dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta, 00992 dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx) 00993 { 00994 boolean_t needlock = !MUTEX_HELD(&dd->dd_lock); 00995 00996 ASSERT(dmu_tx_is_syncing(tx)); 00997 ASSERT(oldtype < DD_USED_NUM); 00998 ASSERT(newtype < DD_USED_NUM); 00999 01000 if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN)) 01001 return; 01002 01003 if (needlock) 01004 mutex_enter(&dd->dd_lock); 01005 ASSERT(delta > 0 ? 01006 dd->dd_phys->dd_used_breakdown[oldtype] >= delta : 01007 dd->dd_phys->dd_used_breakdown[newtype] >= -delta); 01008 ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta)); 01009 dmu_buf_will_dirty(dd->dd_dbuf, tx); 01010 dd->dd_phys->dd_used_breakdown[oldtype] -= delta; 01011 dd->dd_phys->dd_used_breakdown[newtype] += delta; 01012 if (needlock) 01013 mutex_exit(&dd->dd_lock); 01014 } 01015 01016 static int 01017 dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 01018 { 01019 dsl_dataset_t *ds = arg1; 01020 dsl_dir_t *dd = ds->ds_dir; 01021 dsl_prop_setarg_t *psa = arg2; 01022 int err; 01023 uint64_t towrite; 01024 01025 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) 01026 return (err); 01027 01028 if (psa->psa_effective_value == 0) 01029 return (0); 01030 01031 mutex_enter(&dd->dd_lock); 01032 /* 01033 * If we are doing the preliminary check in open context, and 01034 * there are pending changes, then don't fail it, since the 01035 * pending changes could under-estimate the amount of space to be 01036 * freed up. 01037 */ 01038 towrite = dsl_dir_space_towrite(dd); 01039 if ((dmu_tx_is_syncing(tx) || towrite == 0) && 01040 (psa->psa_effective_value < dd->dd_phys->dd_reserved || 01041 psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) { 01042 err = ENOSPC; 01043 } 01044 mutex_exit(&dd->dd_lock); 01045 return (err); 01046 } 01047 01048 extern dsl_syncfunc_t dsl_prop_set_sync; 01049 01050 static void 01051 dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) 01052 { 01053 dsl_dataset_t *ds = arg1; 01054 dsl_dir_t *dd = ds->ds_dir; 01055 dsl_prop_setarg_t *psa = arg2; 01056 uint64_t effective_value = psa->psa_effective_value; 01057 01058 dsl_prop_set_sync(ds, psa, tx); 01059 DSL_PROP_CHECK_PREDICTION(dd, psa); 01060 01061 dmu_buf_will_dirty(dd->dd_dbuf, tx); 01062 01063 mutex_enter(&dd->dd_lock); 01064 dd->dd_phys->dd_quota = effective_value; 01065 mutex_exit(&dd->dd_lock); 01066 } 01067 01068 int 01069 dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota) 01070 { 01071 dsl_dir_t *dd; 01072 dsl_dataset_t *ds; 01073 dsl_prop_setarg_t psa; 01074 int err; 01075 01076 dsl_prop_setarg_init_uint64(&psa, "quota", source, "a); 01077 01078 err = dsl_dataset_hold(ddname, FTAG, &ds); 01079 if (err) 01080 return (err); 01081 01082 err = dsl_dir_open(ddname, FTAG, &dd, NULL); 01083 if (err) { 01084 dsl_dataset_rele(ds, FTAG); 01085 return (err); 01086 } 01087 01088 ASSERT(ds->ds_dir == dd); 01089 01090 /* 01091 * If someone removes a file, then tries to set the quota, we want to 01092 * make sure the file freeing takes effect. 01093 */ 01094 txg_wait_open(dd->dd_pool, 0); 01095 01096 err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check, 01097 dsl_dir_set_quota_sync, ds, &psa, 0); 01098 01099 dsl_dir_close(dd, FTAG); 01100 dsl_dataset_rele(ds, FTAG); 01101 return (err); 01102 } 01103 01104 int 01105 dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 01106 { 01107 dsl_dataset_t *ds = arg1; 01108 dsl_dir_t *dd = ds->ds_dir; 01109 dsl_prop_setarg_t *psa = arg2; 01110 uint64_t effective_value; 01111 uint64_t used, avail; 01112 int err; 01113 01114 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) 01115 return (err); 01116 01117 effective_value = psa->psa_effective_value; 01118 01119 /* 01120 * If we are doing the preliminary check in open context, the 01121 * space estimates may be inaccurate. 01122 */ 01123 if (!dmu_tx_is_syncing(tx)) 01124 return (0); 01125 01126 mutex_enter(&dd->dd_lock); 01127 used = dd->dd_phys->dd_used_bytes; 01128 mutex_exit(&dd->dd_lock); 01129 01130 if (dd->dd_parent) { 01131 avail = dsl_dir_space_available(dd->dd_parent, 01132 NULL, 0, FALSE); 01133 } else { 01134 avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; 01135 } 01136 01137 if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) { 01138 uint64_t delta = MAX(used, effective_value) - 01139 MAX(used, dd->dd_phys->dd_reserved); 01140 01141 if (delta > avail) 01142 return (ENOSPC); 01143 if (dd->dd_phys->dd_quota > 0 && 01144 effective_value > dd->dd_phys->dd_quota) 01145 return (ENOSPC); 01146 } 01147 01148 return (0); 01149 } 01150 01151 static void 01152 dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) 01153 { 01154 dsl_dataset_t *ds = arg1; 01155 dsl_dir_t *dd = ds->ds_dir; 01156 dsl_prop_setarg_t *psa = arg2; 01157 uint64_t effective_value = psa->psa_effective_value; 01158 uint64_t used; 01159 int64_t delta; 01160 01161 dsl_prop_set_sync(ds, psa, tx); 01162 DSL_PROP_CHECK_PREDICTION(dd, psa); 01163 01164 dmu_buf_will_dirty(dd->dd_dbuf, tx); 01165 01166 mutex_enter(&dd->dd_lock); 01167 used = dd->dd_phys->dd_used_bytes; 01168 delta = MAX(used, effective_value) - 01169 MAX(used, dd->dd_phys->dd_reserved); 01170 dd->dd_phys->dd_reserved = effective_value; 01171 01172 if (dd->dd_parent != NULL) { 01173 /* Roll up this additional usage into our ancestors */ 01174 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, 01175 delta, 0, 0, tx); 01176 } 01177 mutex_exit(&dd->dd_lock); 01178 } 01179 01180 int 01181 dsl_dir_set_reservation(const char *ddname, zprop_source_t source, 01182 uint64_t reservation) 01183 { 01184 dsl_dir_t *dd; 01185 dsl_dataset_t *ds; 01186 dsl_prop_setarg_t psa; 01187 int err; 01188 01189 dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation); 01190 01191 err = dsl_dataset_hold(ddname, FTAG, &ds); 01192 if (err) 01193 return (err); 01194 01195 err = dsl_dir_open(ddname, FTAG, &dd, NULL); 01196 if (err) { 01197 dsl_dataset_rele(ds, FTAG); 01198 return (err); 01199 } 01200 01201 ASSERT(ds->ds_dir == dd); 01202 01203 err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check, 01204 dsl_dir_set_reservation_sync, ds, &psa, 0); 01205 01206 dsl_dir_close(dd, FTAG); 01207 dsl_dataset_rele(ds, FTAG); 01208 return (err); 01209 } 01210 01211 static dsl_dir_t * 01212 closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) 01213 { 01214 for (; ds1; ds1 = ds1->dd_parent) { 01215 dsl_dir_t *dd; 01216 for (dd = ds2; dd; dd = dd->dd_parent) { 01217 if (ds1 == dd) 01218 return (dd); 01219 } 01220 } 01221 return (NULL); 01222 } 01223 01228 static int64_t 01229 would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) 01230 { 01231 if (dd == ancestor) 01232 return (delta); 01233 01234 mutex_enter(&dd->dd_lock); 01235 delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta); 01236 mutex_exit(&dd->dd_lock); 01237 return (would_change(dd->dd_parent, delta, ancestor)); 01238 } 01239 01240 struct renamearg { 01241 dsl_dir_t *newparent; 01242 const char *mynewname; 01243 boolean_t allowmounted; 01244 }; 01245 01246 static int 01247 dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 01248 { 01249 dsl_dir_t *dd = arg1; 01250 struct renamearg *ra = arg2; 01251 dsl_pool_t *dp = dd->dd_pool; 01252 objset_t *mos = dp->dp_meta_objset; 01253 int err; 01254 uint64_t val; 01255 01256 /* 01257 * There should only be one reference, from dmu_objset_rename(). 01258 * Fleeting holds are also possible (eg, from "zfs list" getting 01259 * stats), but any that are present in open context will likely 01260 * be gone by syncing context, so only fail from syncing 01261 * context. 01262 * Don't check if we allow renaming of busy (mounted) dataset. 01263 */ 01264 if (!ra->allowmounted && dmu_tx_is_syncing(tx) && 01265 dmu_buf_refcount(dd->dd_dbuf) > 1) { 01266 return (EBUSY); 01267 } 01268 01269 /* check for existing name */ 01270 err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, 01271 ra->mynewname, 8, 1, &val); 01272 if (err == 0) 01273 return (EEXIST); 01274 if (err != ENOENT) 01275 return (err); 01276 01277 if (ra->newparent != dd->dd_parent) { 01278 /* is there enough space? */ 01279 uint64_t myspace = 01280 MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved); 01281 01282 /* no rename into our descendant */ 01283 if (closest_common_ancestor(dd, ra->newparent) == dd) 01284 return (EINVAL); 01285 01286 if (err = dsl_dir_transfer_possible(dd->dd_parent, 01287 ra->newparent, myspace)) 01288 return (err); 01289 } 01290 01291 return (0); 01292 } 01293 01294 static void 01295 dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 01296 { 01297 char oldname[MAXPATHLEN], newname[MAXPATHLEN]; 01298 dsl_dir_t *dd = arg1; 01299 struct renamearg *ra = arg2; 01300 dsl_pool_t *dp = dd->dd_pool; 01301 objset_t *mos = dp->dp_meta_objset; 01302 int err; 01303 01304 ASSERT(ra->allowmounted || dmu_buf_refcount(dd->dd_dbuf) <= 2); 01305 01306 if (ra->newparent != dd->dd_parent) { 01307 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, 01308 -dd->dd_phys->dd_used_bytes, 01309 -dd->dd_phys->dd_compressed_bytes, 01310 -dd->dd_phys->dd_uncompressed_bytes, tx); 01311 dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD, 01312 dd->dd_phys->dd_used_bytes, 01313 dd->dd_phys->dd_compressed_bytes, 01314 dd->dd_phys->dd_uncompressed_bytes, tx); 01315 01316 if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) { 01317 uint64_t unused_rsrv = dd->dd_phys->dd_reserved - 01318 dd->dd_phys->dd_used_bytes; 01319 01320 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, 01321 -unused_rsrv, 0, 0, tx); 01322 dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV, 01323 unused_rsrv, 0, 0, tx); 01324 } 01325 } 01326 01327 dmu_buf_will_dirty(dd->dd_dbuf, tx); 01328 01329 /* remove from old parent zapobj */ 01330 dsl_dir_name(dd, oldname); 01331 err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, 01332 dd->dd_myname, tx); 01333 ASSERT0(err); 01334 01335 (void) strcpy(dd->dd_myname, ra->mynewname); 01336 dsl_dir_close(dd->dd_parent, dd); 01337 dd->dd_phys->dd_parent_obj = ra->newparent->dd_object; 01338 VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, 01339 ra->newparent->dd_object, NULL, dd, &dd->dd_parent)); 01340 01341 /* add to new parent zapobj */ 01342 err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, 01343 dd->dd_myname, 8, 1, &dd->dd_object, tx); 01344 ASSERT0(err); 01345 dsl_dir_name(dd, newname); 01346 #ifdef _KERNEL 01347 zfsvfs_update_fromname(oldname, newname); 01348 zvol_rename_minors(oldname, newname); 01349 #endif 01350 01351 spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, 01352 tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj); 01353 } 01354 01355 int 01356 dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags) 01357 { 01358 struct renamearg ra; 01359 int err; 01360 01361 /* new parent should exist */ 01362 err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname); 01363 if (err) 01364 return (err); 01365 01366 /* can't rename to different pool */ 01367 if (dd->dd_pool != ra.newparent->dd_pool) { 01368 err = ENXIO; 01369 goto out; 01370 } 01371 01372 /* new name should not already exist */ 01373 if (ra.mynewname == NULL) { 01374 err = EEXIST; 01375 goto out; 01376 } 01377 01378 ra.allowmounted = !!(flags & ZFS_RENAME_ALLOW_MOUNTED); 01379 01380 err = dsl_sync_task_do(dd->dd_pool, 01381 dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3); 01382 01383 out: 01384 dsl_dir_close(ra.newparent, FTAG); 01385 return (err); 01386 } 01387 01388 int 01389 dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space) 01390 { 01391 dsl_dir_t *ancestor; 01392 int64_t adelta; 01393 uint64_t avail; 01394 01395 ancestor = closest_common_ancestor(sdd, tdd); 01396 adelta = would_change(sdd, -space, ancestor); 01397 avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE); 01398 if (avail < space) 01399 return (ENOSPC); 01400 01401 return (0); 01402 } 01403 01404 timestruc_t 01405 dsl_dir_snap_cmtime(dsl_dir_t *dd) 01406 { 01407 timestruc_t t; 01408 01409 mutex_enter(&dd->dd_lock); 01410 t = dd->dd_snap_cmtime; 01411 mutex_exit(&dd->dd_lock); 01412 01413 return (t); 01414 } 01415 01416 void 01417 dsl_dir_snap_cmtime_update(dsl_dir_t *dd) 01418 { 01419 timestruc_t t; 01420 01421 gethrestime(&t); 01422 mutex_enter(&dd->dd_lock); 01423 dd->dd_snap_cmtime = t; 01424 mutex_exit(&dd->dd_lock); 01425 }