FreeBSD ZFS
The Zettabyte File System

dsl_dir.c

Go to the documentation of this file.
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 /*
00022  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
00023  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
00024  * All rights reserved.
00025  */
00026 
00027 #include <sys/dmu.h>
00028 #include <sys/dmu_objset.h>
00029 #include <sys/dmu_tx.h>
00030 #include <sys/dsl_dataset.h>
00031 #include <sys/dsl_dir.h>
00032 #include <sys/dsl_prop.h>
00033 #include <sys/dsl_synctask.h>
00034 #include <sys/dsl_deleg.h>
00035 #include <sys/spa.h>
00036 #include <sys/metaslab.h>
00037 #include <sys/zap.h>
00038 #include <sys/zio.h>
00039 #include <sys/arc.h>
00040 #include <sys/sunddi.h>
00041 #include <sys/zvol.h>
00042 #ifdef _KERNEL
00043 #include <sys/zfs_vfsops.h>
00044 #endif
00045 #include "zfs_namecheck.h"
00046 
00047 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
00048 static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
00049 
00050 
00051 /* ARGSUSED */
00052 static void
00053 dsl_dir_evict(dmu_buf_t *db, void *arg)
00054 {
00055         dsl_dir_t *dd = arg;
00056         dsl_pool_t *dp = dd->dd_pool;
00057         int t;
00058 
00059         for (t = 0; t < TXG_SIZE; t++) {
00060                 ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
00061                 ASSERT(dd->dd_tempreserved[t] == 0);
00062                 ASSERT(dd->dd_space_towrite[t] == 0);
00063         }
00064 
00065         if (dd->dd_parent)
00066                 dsl_dir_close(dd->dd_parent, dd);
00067 
00068         spa_close(dd->dd_pool->dp_spa, dd);
00069 
00070         /*
00071          * The props callback list should have been cleaned up by
00072          * objset_evict().
00073          */
00074         list_destroy(&dd->dd_prop_cbs);
00075         mutex_destroy(&dd->dd_lock);
00076         kmem_free(dd, sizeof (dsl_dir_t));
00077 }
00078 
00079 int
00080 dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
00081     const char *tail, void *tag, dsl_dir_t **ddp)
00082 {
00083         dmu_buf_t *dbuf;
00084         dsl_dir_t *dd;
00085         int err;
00086 
00087         ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
00088             dsl_pool_sync_context(dp));
00089 
00090         err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
00091         if (err)
00092                 return (err);
00093         dd = dmu_buf_get_user(dbuf);
00094 #ifdef ZFS_DEBUG
00095         {
00096                 dmu_object_info_t doi;
00097                 dmu_object_info_from_db(dbuf, &doi);
00098                 ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
00099                 ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
00100         }
00101 #endif
00102         if (dd == NULL) {
00103                 dsl_dir_t *winner;
00104 
00105                 dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
00106                 dd->dd_object = ddobj;
00107                 dd->dd_dbuf = dbuf;
00108                 dd->dd_pool = dp;
00109                 dd->dd_phys = dbuf->db_data;
00110                 mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
00111 
00112                 list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
00113                     offsetof(dsl_prop_cb_record_t, cbr_node));
00114 
00115                 dsl_dir_snap_cmtime_update(dd);
00116 
00117                 if (dd->dd_phys->dd_parent_obj) {
00118                         err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
00119                             NULL, dd, &dd->dd_parent);
00120                         if (err)
00121                                 goto errout;
00122                         if (tail) {
00123 #ifdef ZFS_DEBUG
00124                                 uint64_t foundobj;
00125 
00126                                 err = zap_lookup(dp->dp_meta_objset,
00127                                     dd->dd_parent->dd_phys->dd_child_dir_zapobj,
00128                                     tail, sizeof (foundobj), 1, &foundobj);
00129                                 ASSERT(err || foundobj == ddobj);
00130 #endif
00131                                 (void) strcpy(dd->dd_myname, tail);
00132                         } else {
00133                                 err = zap_value_search(dp->dp_meta_objset,
00134                                     dd->dd_parent->dd_phys->dd_child_dir_zapobj,
00135                                     ddobj, 0, dd->dd_myname);
00136                         }
00137                         if (err)
00138                                 goto errout;
00139                 } else {
00140                         (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
00141                 }
00142 
00143                 if (dsl_dir_is_clone(dd)) {
00144                         dmu_buf_t *origin_bonus;
00145                         dsl_dataset_phys_t *origin_phys;
00146 
00147                         /*
00148                          * We can't open the origin dataset, because
00149                          * that would require opening this dsl_dir.
00150                          * Just look at its phys directly instead.
00151                          */
00152                         err = dmu_bonus_hold(dp->dp_meta_objset,
00153                             dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
00154                         if (err)
00155                                 goto errout;
00156                         origin_phys = origin_bonus->db_data;
00157                         dd->dd_origin_txg =
00158                             origin_phys->ds_creation_txg;
00159                         dmu_buf_rele(origin_bonus, FTAG);
00160                 }
00161 
00162                 winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
00163                     dsl_dir_evict);
00164                 if (winner) {
00165                         if (dd->dd_parent)
00166                                 dsl_dir_close(dd->dd_parent, dd);
00167                         mutex_destroy(&dd->dd_lock);
00168                         kmem_free(dd, sizeof (dsl_dir_t));
00169                         dd = winner;
00170                 } else {
00171                         spa_open_ref(dp->dp_spa, dd);
00172                 }
00173         }
00174 
00175         /*
00176          * The dsl_dir_t has both open-to-close and instantiate-to-evict
00177          * holds on the spa.  We need the open-to-close holds because
00178          * otherwise the spa_refcnt wouldn't change when we open a
00179          * dir which the spa also has open, so we could incorrectly
00180          * think it was OK to unload/export/destroy the pool.  We need
00181          * the instantiate-to-evict hold because the dsl_dir_t has a
00182          * pointer to the dd_pool, which has a pointer to the spa_t.
00183          */
00184         spa_open_ref(dp->dp_spa, tag);
00185         ASSERT3P(dd->dd_pool, ==, dp);
00186         ASSERT3U(dd->dd_object, ==, ddobj);
00187         ASSERT3P(dd->dd_dbuf, ==, dbuf);
00188         *ddp = dd;
00189         return (0);
00190 
00191 errout:
00192         if (dd->dd_parent)
00193                 dsl_dir_close(dd->dd_parent, dd);
00194         mutex_destroy(&dd->dd_lock);
00195         kmem_free(dd, sizeof (dsl_dir_t));
00196         dmu_buf_rele(dbuf, tag);
00197         return (err);
00198 }
00199 
00200 void
00201 dsl_dir_close(dsl_dir_t *dd, void *tag)
00202 {
00203         dprintf_dd(dd, "%s\n", "");
00204         spa_close(dd->dd_pool->dp_spa, tag);
00205         dmu_buf_rele(dd->dd_dbuf, tag);
00206 }
00207 
00209 void
00210 dsl_dir_name(dsl_dir_t *dd, char *buf)
00211 {
00212         if (dd->dd_parent) {
00213                 dsl_dir_name(dd->dd_parent, buf);
00214                 (void) strcat(buf, "/");
00215         } else {
00216                 buf[0] = '\0';
00217         }
00218         if (!MUTEX_HELD(&dd->dd_lock)) {
00219                 /*
00220                  * recursive mutex so that we can use
00221                  * dprintf_dd() with dd_lock held
00222                  */
00223                 mutex_enter(&dd->dd_lock);
00224                 (void) strcat(buf, dd->dd_myname);
00225                 mutex_exit(&dd->dd_lock);
00226         } else {
00227                 (void) strcat(buf, dd->dd_myname);
00228         }
00229 }
00230 
00232 int
00233 dsl_dir_namelen(dsl_dir_t *dd)
00234 {
00235         int result = 0;
00236 
00237         if (dd->dd_parent) {
00238                 /* parent's name + 1 for the "/" */
00239                 result = dsl_dir_namelen(dd->dd_parent) + 1;
00240         }
00241 
00242         if (!MUTEX_HELD(&dd->dd_lock)) {
00243                 /* see dsl_dir_name */
00244                 mutex_enter(&dd->dd_lock);
00245                 result += strlen(dd->dd_myname);
00246                 mutex_exit(&dd->dd_lock);
00247         } else {
00248                 result += strlen(dd->dd_myname);
00249         }
00250 
00251         return (result);
00252 }
00253 
00254 static int
00255 getcomponent(const char *path, char *component, const char **nextp)
00256 {
00257         char *p;
00258         if ((path == NULL) || (path[0] == '\0'))
00259                 return (ENOENT);
00260         /* This would be a good place to reserve some namespace... */
00261         p = strpbrk(path, "/@");
00262         if (p && (p[1] == '/' || p[1] == '@')) {
00263                 /* two separators in a row */
00264                 return (EINVAL);
00265         }
00266         if (p == NULL || p == path) {
00267                 /*
00268                  * if the first thing is an @ or /, it had better be an
00269                  * @ and it had better not have any more ats or slashes,
00270                  * and it had better have something after the @.
00271                  */
00272                 if (p != NULL &&
00273                     (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
00274                         return (EINVAL);
00275                 if (strlen(path) >= MAXNAMELEN)
00276                         return (ENAMETOOLONG);
00277                 (void) strcpy(component, path);
00278                 p = NULL;
00279         } else if (p[0] == '/') {
00280                 if (p-path >= MAXNAMELEN)
00281                         return (ENAMETOOLONG);
00282                 (void) strncpy(component, path, p - path);
00283                 component[p-path] = '\0';
00284                 p++;
00285         } else if (p[0] == '@') {
00286                 /*
00287                  * if the next separator is an @, there better not be
00288                  * any more slashes.
00289                  */
00290                 if (strchr(path, '/'))
00291                         return (EINVAL);
00292                 if (p-path >= MAXNAMELEN)
00293                         return (ENAMETOOLONG);
00294                 (void) strncpy(component, path, p - path);
00295                 component[p-path] = '\0';
00296         } else {
00297                 ASSERT(!"invalid p");
00298         }
00299         *nextp = p;
00300         return (0);
00301 }
00302 
00307 int
00308 dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
00309     dsl_dir_t **ddp, const char **tailp)
00310 {
00311         char buf[MAXNAMELEN];
00312         const char *next, *nextnext = NULL;
00313         int err;
00314         dsl_dir_t *dd;
00315         dsl_pool_t *dp;
00316         uint64_t ddobj;
00317         int openedspa = FALSE;
00318 
00319         dprintf("%s\n", name);
00320 
00321         err = getcomponent(name, buf, &next);
00322         if (err)
00323                 return (err);
00324         if (spa == NULL) {
00325                 err = spa_open(buf, &spa, FTAG);
00326                 if (err) {
00327                         dprintf("spa_open(%s) failed\n", buf);
00328                         return (err);
00329                 }
00330                 openedspa = TRUE;
00331 
00332                 /* XXX this assertion belongs in spa_open */
00333                 ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
00334         }
00335 
00336         dp = spa_get_dsl(spa);
00337 
00338         rw_enter(&dp->dp_config_rwlock, RW_READER);
00339         err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
00340         if (err) {
00341                 rw_exit(&dp->dp_config_rwlock);
00342                 if (openedspa)
00343                         spa_close(spa, FTAG);
00344                 return (err);
00345         }
00346 
00347         while (next != NULL) {
00348                 dsl_dir_t *child_ds;
00349                 err = getcomponent(next, buf, &nextnext);
00350                 if (err)
00351                         break;
00352                 ASSERT(next[0] != '\0');
00353                 if (next[0] == '@')
00354                         break;
00355                 dprintf("looking up %s in obj%lld\n",
00356                     buf, dd->dd_phys->dd_child_dir_zapobj);
00357 
00358                 err = zap_lookup(dp->dp_meta_objset,
00359                     dd->dd_phys->dd_child_dir_zapobj,
00360                     buf, sizeof (ddobj), 1, &ddobj);
00361                 if (err) {
00362                         if (err == ENOENT)
00363                                 err = 0;
00364                         break;
00365                 }
00366 
00367                 err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
00368                 if (err)
00369                         break;
00370                 dsl_dir_close(dd, tag);
00371                 dd = child_ds;
00372                 next = nextnext;
00373         }
00374         rw_exit(&dp->dp_config_rwlock);
00375 
00376         if (err) {
00377                 dsl_dir_close(dd, tag);
00378                 if (openedspa)
00379                         spa_close(spa, FTAG);
00380                 return (err);
00381         }
00382 
00383         /*
00384          * It's an error if there's more than one component left, or
00385          * tailp==NULL and there's any component left.
00386          */
00387         if (next != NULL &&
00388             (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
00389                 /* bad path name */
00390                 dsl_dir_close(dd, tag);
00391                 dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
00392                 err = ENOENT;
00393         }
00394         if (tailp)
00395                 *tailp = next;
00396         if (openedspa)
00397                 spa_close(spa, FTAG);
00398         *ddp = dd;
00399         return (err);
00400 }
00401 
00408 int
00409 dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
00410 {
00411         return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
00412 }
00413 
00414 uint64_t
00415 dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
00416     dmu_tx_t *tx)
00417 {
00418         objset_t *mos = dp->dp_meta_objset;
00419         uint64_t ddobj;
00420         dsl_dir_phys_t *ddphys;
00421         dmu_buf_t *dbuf;
00422 
00423         ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
00424             DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
00425         if (pds) {
00426                 VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
00427                     name, sizeof (uint64_t), 1, &ddobj, tx));
00428         } else {
00429                 /* it's the root dir */
00430                 VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
00431                     DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
00432         }
00433         VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
00434         dmu_buf_will_dirty(dbuf, tx);
00435         ddphys = dbuf->db_data;
00436 
00437         ddphys->dd_creation_time = gethrestime_sec();
00438         if (pds)
00439                 ddphys->dd_parent_obj = pds->dd_object;
00440         ddphys->dd_props_zapobj = zap_create(mos,
00441             DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
00442         ddphys->dd_child_dir_zapobj = zap_create(mos,
00443             DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
00444         if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
00445                 ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
00446         dmu_buf_rele(dbuf, FTAG);
00447 
00448         return (ddobj);
00449 }
00450 
00451 /* ARGSUSED */
00452 int
00453 dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
00454 {
00455         dsl_dataset_t *ds = arg1;
00456         dsl_dir_t *dd = ds->ds_dir;
00457         dsl_pool_t *dp = dd->dd_pool;
00458         objset_t *mos = dp->dp_meta_objset;
00459         int err;
00460         uint64_t count;
00461 
00462         /*
00463          * There should be exactly two holds, both from
00464          * dsl_dataset_destroy: one on the dd directory, and one on its
00465          * head ds.  If there are more holds, then a concurrent thread is
00466          * performing a lookup inside this dir while we're trying to destroy
00467          * it.  To minimize this possibility, we perform this check only
00468          * in syncing context and fail the operation if we encounter
00469          * additional holds.  The dp_config_rwlock ensures that nobody else
00470          * opens it after we check.
00471          */
00472         if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
00473                 return (EBUSY);
00474 
00475         err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
00476         if (err)
00477                 return (err);
00478         if (count != 0)
00479                 return (EEXIST);
00480 
00481         return (0);
00482 }
00483 
00484 void
00485 dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
00486 {
00487         dsl_dataset_t *ds = arg1;
00488         dsl_dir_t *dd = ds->ds_dir;
00489         objset_t *mos = dd->dd_pool->dp_meta_objset;
00490         dsl_prop_setarg_t psa;
00491         uint64_t value = 0;
00492         uint64_t obj;
00493         dd_used_t t;
00494 
00495         ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
00496         ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
00497 
00498         /* Remove our reservation. */
00499         dsl_prop_setarg_init_uint64(&psa, "reservation",
00500             (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
00501             &value);
00502         psa.psa_effective_value = 0;    /* predict default value */
00503 
00504         dsl_dir_set_reservation_sync(ds, &psa, tx);
00505 
00506         ASSERT0(dd->dd_phys->dd_used_bytes);
00507         ASSERT0(dd->dd_phys->dd_reserved);
00508         for (t = 0; t < DD_USED_NUM; t++)
00509                 ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
00510 
00511         VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
00512         VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
00513         VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
00514         VERIFY(0 == zap_remove(mos,
00515             dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
00516 
00517         obj = dd->dd_object;
00518         dsl_dir_close(dd, tag);
00519         VERIFY(0 == dmu_object_free(mos, obj, tx));
00520 }
00521 
00522 boolean_t
00523 dsl_dir_is_clone(dsl_dir_t *dd)
00524 {
00525         return (dd->dd_phys->dd_origin_obj &&
00526             (dd->dd_pool->dp_origin_snap == NULL ||
00527             dd->dd_phys->dd_origin_obj !=
00528             dd->dd_pool->dp_origin_snap->ds_object));
00529 }
00530 
00531 void
00532 dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
00533 {
00534         mutex_enter(&dd->dd_lock);
00535         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
00536             dd->dd_phys->dd_used_bytes);
00537         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
00538         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
00539             dd->dd_phys->dd_reserved);
00540         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
00541             dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
00542             (dd->dd_phys->dd_uncompressed_bytes * 100 /
00543             dd->dd_phys->dd_compressed_bytes));
00544         if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
00545                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
00546                     dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
00547                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
00548                     dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
00549                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
00550                     dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
00551                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
00552                     dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
00553                     dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
00554         }
00555         mutex_exit(&dd->dd_lock);
00556 
00557         rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
00558         if (dsl_dir_is_clone(dd)) {
00559                 dsl_dataset_t *ds;
00560                 char buf[MAXNAMELEN];
00561 
00562                 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
00563                     dd->dd_phys->dd_origin_obj, FTAG, &ds));
00564                 dsl_dataset_name(ds, buf);
00565                 dsl_dataset_rele(ds, FTAG);
00566                 dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
00567         }
00568         rw_exit(&dd->dd_pool->dp_config_rwlock);
00569 }
00570 
00571 void
00572 dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
00573 {
00574         dsl_pool_t *dp = dd->dd_pool;
00575 
00576         ASSERT(dd->dd_phys);
00577 
00578         if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
00579                 /* up the hold count until we can be written out */
00580                 dmu_buf_add_ref(dd->dd_dbuf, dd);
00581         }
00582 }
00583 
00584 static int64_t
00585 parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
00586 {
00587         uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
00588         uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
00589         return (new_accounted - old_accounted);
00590 }
00591 
00592 void
00593 dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
00594 {
00595         ASSERT(dmu_tx_is_syncing(tx));
00596 
00597         mutex_enter(&dd->dd_lock);
00598         ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
00599         dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
00600             dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
00601         dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
00602         mutex_exit(&dd->dd_lock);
00603 
00604         /* release the hold from dsl_dir_dirty */
00605         dmu_buf_rele(dd->dd_dbuf, dd);
00606 }
00607 
00608 static uint64_t
00609 dsl_dir_space_towrite(dsl_dir_t *dd)
00610 {
00611         uint64_t space = 0;
00612         int i;
00613 
00614         ASSERT(MUTEX_HELD(&dd->dd_lock));
00615 
00616         for (i = 0; i < TXG_SIZE; i++) {
00617                 space += dd->dd_space_towrite[i&TXG_MASK];
00618                 ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
00619         }
00620         return (space);
00621 }
00622 
00628 uint64_t
00629 dsl_dir_space_available(dsl_dir_t *dd,
00630     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
00631 {
00632         uint64_t parentspace, myspace, quota, used;
00633 
00634         /*
00635          * If there are no restrictions otherwise, assume we have
00636          * unlimited space available.
00637          */
00638         quota = UINT64_MAX;
00639         parentspace = UINT64_MAX;
00640 
00641         if (dd->dd_parent != NULL) {
00642                 parentspace = dsl_dir_space_available(dd->dd_parent,
00643                     ancestor, delta, ondiskonly);
00644         }
00645 
00646         mutex_enter(&dd->dd_lock);
00647         if (dd->dd_phys->dd_quota != 0)
00648                 quota = dd->dd_phys->dd_quota;
00649         used = dd->dd_phys->dd_used_bytes;
00650         if (!ondiskonly)
00651                 used += dsl_dir_space_towrite(dd);
00652 
00653         if (dd->dd_parent == NULL) {
00654                 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
00655                 quota = MIN(quota, poolsize);
00656         }
00657 
00658         if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
00659                 /*
00660                  * We have some space reserved, in addition to what our
00661                  * parent gave us.
00662                  */
00663                 parentspace += dd->dd_phys->dd_reserved - used;
00664         }
00665 
00666         if (dd == ancestor) {
00667                 ASSERT(delta <= 0);
00668                 ASSERT(used >= -delta);
00669                 used += delta;
00670                 if (parentspace != UINT64_MAX)
00671                         parentspace -= delta;
00672         }
00673 
00674         if (used > quota) {
00675                 /* over quota */
00676                 myspace = 0;
00677         } else {
00678                 /*
00679                  * the lesser of the space provided by our parent and
00680                  * the space left in our quota
00681                  */
00682                 myspace = MIN(parentspace, quota - used);
00683         }
00684 
00685         mutex_exit(&dd->dd_lock);
00686 
00687         return (myspace);
00688 }
00689 
00690 struct tempreserve {
00691         list_node_t tr_node;
00692         dsl_pool_t *tr_dp;
00693         dsl_dir_t *tr_ds;
00694         uint64_t tr_size;
00695 };
00696 
00697 static int
00698 dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
00699     boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
00700     dmu_tx_t *tx, boolean_t first)
00701 {
00702         uint64_t txg = tx->tx_txg;
00703         uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
00704         uint64_t deferred = 0;
00705         struct tempreserve *tr;
00706         int retval = EDQUOT;
00707         int txgidx = txg & TXG_MASK;
00708         int i;
00709         uint64_t ref_rsrv = 0;
00710 
00711         ASSERT3U(txg, !=, 0);
00712         ASSERT3S(asize, >, 0);
00713 
00714         mutex_enter(&dd->dd_lock);
00715 
00716         /*
00717          * Check against the dsl_dir's quota.  We don't add in the delta
00718          * when checking for over-quota because they get one free hit.
00719          */
00720         est_inflight = dsl_dir_space_towrite(dd);
00721         for (i = 0; i < TXG_SIZE; i++)
00722                 est_inflight += dd->dd_tempreserved[i];
00723         used_on_disk = dd->dd_phys->dd_used_bytes;
00724 
00725         /*
00726          * On the first iteration, fetch the dataset's used-on-disk and
00727          * refreservation values. Also, if checkrefquota is set, test if
00728          * allocating this space would exceed the dataset's refquota.
00729          */
00730         if (first && tx->tx_objset) {
00731                 int error;
00732                 dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
00733 
00734                 error = dsl_dataset_check_quota(ds, checkrefquota,
00735                     asize, est_inflight, &used_on_disk, &ref_rsrv);
00736                 if (error) {
00737                         mutex_exit(&dd->dd_lock);
00738                         return (error);
00739                 }
00740         }
00741 
00742         /*
00743          * If this transaction will result in a net free of space,
00744          * we want to let it through.
00745          */
00746         if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
00747                 quota = UINT64_MAX;
00748         else
00749                 quota = dd->dd_phys->dd_quota;
00750 
00751         /*
00752          * Adjust the quota against the actual pool size at the root
00753          * minus any outstanding deferred frees.
00754          * To ensure that it's possible to remove files from a full
00755          * pool without inducing transient overcommits, we throttle
00756          * netfree transactions against a quota that is slightly larger,
00757          * but still within the pool's allocation slop.  In cases where
00758          * we're very close to full, this will allow a steady trickle of
00759          * removes to get through.
00760          */
00761         if (dd->dd_parent == NULL) {
00762                 spa_t *spa = dd->dd_pool->dp_spa;
00763                 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
00764                 deferred = metaslab_class_get_deferred(spa_normal_class(spa));
00765                 if (poolsize - deferred < quota) {
00766                         quota = poolsize - deferred;
00767                         retval = ENOSPC;
00768                 }
00769         }
00770 
00771         /*
00772          * If they are requesting more space, and our current estimate
00773          * is over quota, they get to try again unless the actual
00774          * on-disk is over quota and there are no pending changes (which
00775          * may free up space for us).
00776          */
00777         if (used_on_disk + est_inflight >= quota) {
00778                 if (est_inflight > 0 || used_on_disk < quota ||
00779                     (retval == ENOSPC && used_on_disk < quota + deferred))
00780                         retval = ERESTART;
00781                 dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
00782                     "quota=%lluK tr=%lluK err=%d\n",
00783                     used_on_disk>>10, est_inflight>>10,
00784                     quota>>10, asize>>10, retval);
00785                 mutex_exit(&dd->dd_lock);
00786                 return (retval);
00787         }
00788 
00789         /* We need to up our estimated delta before dropping dd_lock */
00790         dd->dd_tempreserved[txgidx] += asize;
00791 
00792         parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
00793             asize - ref_rsrv);
00794         mutex_exit(&dd->dd_lock);
00795 
00796         tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
00797         tr->tr_ds = dd;
00798         tr->tr_size = asize;
00799         list_insert_tail(tr_list, tr);
00800 
00801         /* see if it's OK with our parent */
00802         if (dd->dd_parent && parent_rsrv) {
00803                 boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
00804 
00805                 return (dsl_dir_tempreserve_impl(dd->dd_parent,
00806                     parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
00807         } else {
00808                 return (0);
00809         }
00810 }
00811 
00818 int
00819 dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
00820     uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
00821 {
00822         int err;
00823         list_t *tr_list;
00824 
00825         if (asize == 0) {
00826                 *tr_cookiep = NULL;
00827                 return (0);
00828         }
00829 
00830         tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
00831         list_create(tr_list, sizeof (struct tempreserve),
00832             offsetof(struct tempreserve, tr_node));
00833         ASSERT3S(asize, >, 0);
00834         ASSERT3S(fsize, >=, 0);
00835 
00836         err = arc_tempreserve_space(lsize, tx->tx_txg);
00837         if (err == 0) {
00838                 struct tempreserve *tr;
00839 
00840                 tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
00841                 tr->tr_size = lsize;
00842                 list_insert_tail(tr_list, tr);
00843 
00844                 err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
00845         } else {
00846                 if (err == EAGAIN) {
00847                         txg_delay(dd->dd_pool, tx->tx_txg, 1);
00848                         err = ERESTART;
00849                 }
00850                 dsl_pool_memory_pressure(dd->dd_pool);
00851         }
00852 
00853         if (err == 0) {
00854                 struct tempreserve *tr;
00855 
00856                 tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
00857                 tr->tr_dp = dd->dd_pool;
00858                 tr->tr_size = asize;
00859                 list_insert_tail(tr_list, tr);
00860 
00861                 err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
00862                     FALSE, asize > usize, tr_list, tx, TRUE);
00863         }
00864 
00865         if (err)
00866                 dsl_dir_tempreserve_clear(tr_list, tx);
00867         else
00868                 *tr_cookiep = tr_list;
00869 
00870         return (err);
00871 }
00872 
00877 void
00878 dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
00879 {
00880         int txgidx = tx->tx_txg & TXG_MASK;
00881         list_t *tr_list = tr_cookie;
00882         struct tempreserve *tr;
00883 
00884         ASSERT3U(tx->tx_txg, !=, 0);
00885 
00886         if (tr_cookie == NULL)
00887                 return;
00888 
00889         while (tr = list_head(tr_list)) {
00890                 if (tr->tr_dp) {
00891                         dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
00892                 } else if (tr->tr_ds) {
00893                         mutex_enter(&tr->tr_ds->dd_lock);
00894                         ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
00895                             tr->tr_size);
00896                         tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
00897                         mutex_exit(&tr->tr_ds->dd_lock);
00898                 } else {
00899                         arc_tempreserve_clear(tr->tr_size);
00900                 }
00901                 list_remove(tr_list, tr);
00902                 kmem_free(tr, sizeof (struct tempreserve));
00903         }
00904 
00905         kmem_free(tr_list, sizeof (list_t));
00906 }
00907 
00908 static void
00909 dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
00910 {
00911         int64_t parent_space;
00912         uint64_t est_used;
00913 
00914         mutex_enter(&dd->dd_lock);
00915         if (space > 0)
00916                 dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
00917 
00918         est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
00919         parent_space = parent_delta(dd, est_used, space);
00920         mutex_exit(&dd->dd_lock);
00921 
00922         /* Make sure that we clean up dd_space_to* */
00923         dsl_dir_dirty(dd, tx);
00924 
00925         /* XXX this is potentially expensive and unnecessary... */
00926         if (parent_space && dd->dd_parent)
00927                 dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
00928 }
00929 
00935 void
00936 dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
00937 {
00938         dsl_pool_willuse_space(dd->dd_pool, space, tx);
00939         dsl_dir_willuse_space_impl(dd, space, tx);
00940 }
00941 
00943 void
00944 dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
00945     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
00946 {
00947         int64_t accounted_delta;
00948         boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
00949 
00950         ASSERT(dmu_tx_is_syncing(tx));
00951         ASSERT(type < DD_USED_NUM);
00952 
00953         if (needlock)
00954                 mutex_enter(&dd->dd_lock);
00955         accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
00956         ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
00957         ASSERT(compressed >= 0 ||
00958             dd->dd_phys->dd_compressed_bytes >= -compressed);
00959         ASSERT(uncompressed >= 0 ||
00960             dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
00961         dmu_buf_will_dirty(dd->dd_dbuf, tx);
00962         dd->dd_phys->dd_used_bytes += used;
00963         dd->dd_phys->dd_uncompressed_bytes += uncompressed;
00964         dd->dd_phys->dd_compressed_bytes += compressed;
00965 
00966         if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
00967                 ASSERT(used > 0 ||
00968                     dd->dd_phys->dd_used_breakdown[type] >= -used);
00969                 dd->dd_phys->dd_used_breakdown[type] += used;
00970 #ifdef DEBUG
00971                 dd_used_t t;
00972                 uint64_t u = 0;
00973                 for (t = 0; t < DD_USED_NUM; t++)
00974                         u += dd->dd_phys->dd_used_breakdown[t];
00975                 ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
00976 #endif
00977         }
00978         if (needlock)
00979                 mutex_exit(&dd->dd_lock);
00980 
00981         if (dd->dd_parent != NULL) {
00982                 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
00983                     accounted_delta, compressed, uncompressed, tx);
00984                 dsl_dir_transfer_space(dd->dd_parent,
00985                     used - accounted_delta,
00986                     DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
00987         }
00988 }
00989 
00990 void
00991 dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
00992     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
00993 {
00994         boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
00995 
00996         ASSERT(dmu_tx_is_syncing(tx));
00997         ASSERT(oldtype < DD_USED_NUM);
00998         ASSERT(newtype < DD_USED_NUM);
00999 
01000         if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
01001                 return;
01002 
01003         if (needlock)
01004                 mutex_enter(&dd->dd_lock);
01005         ASSERT(delta > 0 ?
01006             dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
01007             dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
01008         ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
01009         dmu_buf_will_dirty(dd->dd_dbuf, tx);
01010         dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
01011         dd->dd_phys->dd_used_breakdown[newtype] += delta;
01012         if (needlock)
01013                 mutex_exit(&dd->dd_lock);
01014 }
01015 
01016 static int
01017 dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
01018 {
01019         dsl_dataset_t *ds = arg1;
01020         dsl_dir_t *dd = ds->ds_dir;
01021         dsl_prop_setarg_t *psa = arg2;
01022         int err;
01023         uint64_t towrite;
01024 
01025         if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
01026                 return (err);
01027 
01028         if (psa->psa_effective_value == 0)
01029                 return (0);
01030 
01031         mutex_enter(&dd->dd_lock);
01032         /*
01033          * If we are doing the preliminary check in open context, and
01034          * there are pending changes, then don't fail it, since the
01035          * pending changes could under-estimate the amount of space to be
01036          * freed up.
01037          */
01038         towrite = dsl_dir_space_towrite(dd);
01039         if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
01040             (psa->psa_effective_value < dd->dd_phys->dd_reserved ||
01041             psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
01042                 err = ENOSPC;
01043         }
01044         mutex_exit(&dd->dd_lock);
01045         return (err);
01046 }
01047 
01048 extern dsl_syncfunc_t dsl_prop_set_sync;
01049 
01050 static void
01051 dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
01052 {
01053         dsl_dataset_t *ds = arg1;
01054         dsl_dir_t *dd = ds->ds_dir;
01055         dsl_prop_setarg_t *psa = arg2;
01056         uint64_t effective_value = psa->psa_effective_value;
01057 
01058         dsl_prop_set_sync(ds, psa, tx);
01059         DSL_PROP_CHECK_PREDICTION(dd, psa);
01060 
01061         dmu_buf_will_dirty(dd->dd_dbuf, tx);
01062 
01063         mutex_enter(&dd->dd_lock);
01064         dd->dd_phys->dd_quota = effective_value;
01065         mutex_exit(&dd->dd_lock);
01066 }
01067 
01068 int
01069 dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
01070 {
01071         dsl_dir_t *dd;
01072         dsl_dataset_t *ds;
01073         dsl_prop_setarg_t psa;
01074         int err;
01075 
01076         dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
01077 
01078         err = dsl_dataset_hold(ddname, FTAG, &ds);
01079         if (err)
01080                 return (err);
01081 
01082         err = dsl_dir_open(ddname, FTAG, &dd, NULL);
01083         if (err) {
01084                 dsl_dataset_rele(ds, FTAG);
01085                 return (err);
01086         }
01087 
01088         ASSERT(ds->ds_dir == dd);
01089 
01090         /*
01091          * If someone removes a file, then tries to set the quota, we want to
01092          * make sure the file freeing takes effect.
01093          */
01094         txg_wait_open(dd->dd_pool, 0);
01095 
01096         err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
01097             dsl_dir_set_quota_sync, ds, &psa, 0);
01098 
01099         dsl_dir_close(dd, FTAG);
01100         dsl_dataset_rele(ds, FTAG);
01101         return (err);
01102 }
01103 
01104 int
01105 dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
01106 {
01107         dsl_dataset_t *ds = arg1;
01108         dsl_dir_t *dd = ds->ds_dir;
01109         dsl_prop_setarg_t *psa = arg2;
01110         uint64_t effective_value;
01111         uint64_t used, avail;
01112         int err;
01113 
01114         if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
01115                 return (err);
01116 
01117         effective_value = psa->psa_effective_value;
01118 
01119         /*
01120          * If we are doing the preliminary check in open context, the
01121          * space estimates may be inaccurate.
01122          */
01123         if (!dmu_tx_is_syncing(tx))
01124                 return (0);
01125 
01126         mutex_enter(&dd->dd_lock);
01127         used = dd->dd_phys->dd_used_bytes;
01128         mutex_exit(&dd->dd_lock);
01129 
01130         if (dd->dd_parent) {
01131                 avail = dsl_dir_space_available(dd->dd_parent,
01132                     NULL, 0, FALSE);
01133         } else {
01134                 avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
01135         }
01136 
01137         if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
01138                 uint64_t delta = MAX(used, effective_value) -
01139                     MAX(used, dd->dd_phys->dd_reserved);
01140 
01141                 if (delta > avail)
01142                         return (ENOSPC);
01143                 if (dd->dd_phys->dd_quota > 0 &&
01144                     effective_value > dd->dd_phys->dd_quota)
01145                         return (ENOSPC);
01146         }
01147 
01148         return (0);
01149 }
01150 
01151 static void
01152 dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
01153 {
01154         dsl_dataset_t *ds = arg1;
01155         dsl_dir_t *dd = ds->ds_dir;
01156         dsl_prop_setarg_t *psa = arg2;
01157         uint64_t effective_value = psa->psa_effective_value;
01158         uint64_t used;
01159         int64_t delta;
01160 
01161         dsl_prop_set_sync(ds, psa, tx);
01162         DSL_PROP_CHECK_PREDICTION(dd, psa);
01163 
01164         dmu_buf_will_dirty(dd->dd_dbuf, tx);
01165 
01166         mutex_enter(&dd->dd_lock);
01167         used = dd->dd_phys->dd_used_bytes;
01168         delta = MAX(used, effective_value) -
01169             MAX(used, dd->dd_phys->dd_reserved);
01170         dd->dd_phys->dd_reserved = effective_value;
01171 
01172         if (dd->dd_parent != NULL) {
01173                 /* Roll up this additional usage into our ancestors */
01174                 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
01175                     delta, 0, 0, tx);
01176         }
01177         mutex_exit(&dd->dd_lock);
01178 }
01179 
01180 int
01181 dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
01182     uint64_t reservation)
01183 {
01184         dsl_dir_t *dd;
01185         dsl_dataset_t *ds;
01186         dsl_prop_setarg_t psa;
01187         int err;
01188 
01189         dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
01190 
01191         err = dsl_dataset_hold(ddname, FTAG, &ds);
01192         if (err)
01193                 return (err);
01194 
01195         err = dsl_dir_open(ddname, FTAG, &dd, NULL);
01196         if (err) {
01197                 dsl_dataset_rele(ds, FTAG);
01198                 return (err);
01199         }
01200 
01201         ASSERT(ds->ds_dir == dd);
01202 
01203         err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
01204             dsl_dir_set_reservation_sync, ds, &psa, 0);
01205 
01206         dsl_dir_close(dd, FTAG);
01207         dsl_dataset_rele(ds, FTAG);
01208         return (err);
01209 }
01210 
01211 static dsl_dir_t *
01212 closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
01213 {
01214         for (; ds1; ds1 = ds1->dd_parent) {
01215                 dsl_dir_t *dd;
01216                 for (dd = ds2; dd; dd = dd->dd_parent) {
01217                         if (ds1 == dd)
01218                                 return (dd);
01219                 }
01220         }
01221         return (NULL);
01222 }
01223 
01228 static int64_t
01229 would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
01230 {
01231         if (dd == ancestor)
01232                 return (delta);
01233 
01234         mutex_enter(&dd->dd_lock);
01235         delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
01236         mutex_exit(&dd->dd_lock);
01237         return (would_change(dd->dd_parent, delta, ancestor));
01238 }
01239 
01240 struct renamearg {
01241         dsl_dir_t *newparent;
01242         const char *mynewname;
01243         boolean_t allowmounted;
01244 };
01245 
01246 static int
01247 dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
01248 {
01249         dsl_dir_t *dd = arg1;
01250         struct renamearg *ra = arg2;
01251         dsl_pool_t *dp = dd->dd_pool;
01252         objset_t *mos = dp->dp_meta_objset;
01253         int err;
01254         uint64_t val;
01255 
01256         /*
01257          * There should only be one reference, from dmu_objset_rename().
01258          * Fleeting holds are also possible (eg, from "zfs list" getting
01259          * stats), but any that are present in open context will likely
01260          * be gone by syncing context, so only fail from syncing
01261          * context.
01262          * Don't check if we allow renaming of busy (mounted) dataset.
01263          */
01264         if (!ra->allowmounted && dmu_tx_is_syncing(tx) &&
01265             dmu_buf_refcount(dd->dd_dbuf) > 1) {
01266                 return (EBUSY);
01267         }
01268 
01269         /* check for existing name */
01270         err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
01271             ra->mynewname, 8, 1, &val);
01272         if (err == 0)
01273                 return (EEXIST);
01274         if (err != ENOENT)
01275                 return (err);
01276 
01277         if (ra->newparent != dd->dd_parent) {
01278                 /* is there enough space? */
01279                 uint64_t myspace =
01280                     MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
01281 
01282                 /* no rename into our descendant */
01283                 if (closest_common_ancestor(dd, ra->newparent) == dd)
01284                         return (EINVAL);
01285 
01286                 if (err = dsl_dir_transfer_possible(dd->dd_parent,
01287                     ra->newparent, myspace))
01288                         return (err);
01289         }
01290 
01291         return (0);
01292 }
01293 
01294 static void
01295 dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
01296 {
01297         char oldname[MAXPATHLEN], newname[MAXPATHLEN];
01298         dsl_dir_t *dd = arg1;
01299         struct renamearg *ra = arg2;
01300         dsl_pool_t *dp = dd->dd_pool;
01301         objset_t *mos = dp->dp_meta_objset;
01302         int err;
01303 
01304         ASSERT(ra->allowmounted || dmu_buf_refcount(dd->dd_dbuf) <= 2);
01305 
01306         if (ra->newparent != dd->dd_parent) {
01307                 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
01308                     -dd->dd_phys->dd_used_bytes,
01309                     -dd->dd_phys->dd_compressed_bytes,
01310                     -dd->dd_phys->dd_uncompressed_bytes, tx);
01311                 dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
01312                     dd->dd_phys->dd_used_bytes,
01313                     dd->dd_phys->dd_compressed_bytes,
01314                     dd->dd_phys->dd_uncompressed_bytes, tx);
01315 
01316                 if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
01317                         uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
01318                             dd->dd_phys->dd_used_bytes;
01319 
01320                         dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
01321                             -unused_rsrv, 0, 0, tx);
01322                         dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
01323                             unused_rsrv, 0, 0, tx);
01324                 }
01325         }
01326 
01327         dmu_buf_will_dirty(dd->dd_dbuf, tx);
01328 
01329         /* remove from old parent zapobj */
01330         dsl_dir_name(dd, oldname);
01331         err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
01332             dd->dd_myname, tx);
01333         ASSERT0(err);
01334 
01335         (void) strcpy(dd->dd_myname, ra->mynewname);
01336         dsl_dir_close(dd->dd_parent, dd);
01337         dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
01338         VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
01339             ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
01340 
01341         /* add to new parent zapobj */
01342         err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
01343             dd->dd_myname, 8, 1, &dd->dd_object, tx);
01344         ASSERT0(err);
01345         dsl_dir_name(dd, newname);
01346 #ifdef _KERNEL
01347         zfsvfs_update_fromname(oldname, newname);
01348         zvol_rename_minors(oldname, newname);
01349 #endif
01350 
01351         spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
01352             tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
01353 }
01354 
01355 int
01356 dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags)
01357 {
01358         struct renamearg ra;
01359         int err;
01360 
01361         /* new parent should exist */
01362         err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
01363         if (err)
01364                 return (err);
01365 
01366         /* can't rename to different pool */
01367         if (dd->dd_pool != ra.newparent->dd_pool) {
01368                 err = ENXIO;
01369                 goto out;
01370         }
01371 
01372         /* new name should not already exist */
01373         if (ra.mynewname == NULL) {
01374                 err = EEXIST;
01375                 goto out;
01376         }
01377 
01378         ra.allowmounted = !!(flags & ZFS_RENAME_ALLOW_MOUNTED);
01379 
01380         err = dsl_sync_task_do(dd->dd_pool,
01381             dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
01382 
01383 out:
01384         dsl_dir_close(ra.newparent, FTAG);
01385         return (err);
01386 }
01387 
01388 int
01389 dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
01390 {
01391         dsl_dir_t *ancestor;
01392         int64_t adelta;
01393         uint64_t avail;
01394 
01395         ancestor = closest_common_ancestor(sdd, tdd);
01396         adelta = would_change(sdd, -space, ancestor);
01397         avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
01398         if (avail < space)
01399                 return (ENOSPC);
01400 
01401         return (0);
01402 }
01403 
01404 timestruc_t
01405 dsl_dir_snap_cmtime(dsl_dir_t *dd)
01406 {
01407         timestruc_t t;
01408 
01409         mutex_enter(&dd->dd_lock);
01410         t = dd->dd_snap_cmtime;
01411         mutex_exit(&dd->dd_lock);
01412 
01413         return (t);
01414 }
01415 
01416 void
01417 dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
01418 {
01419         timestruc_t t;
01420 
01421         gethrestime(&t);
01422         mutex_enter(&dd->dd_lock);
01423         dd->dd_snap_cmtime = t;
01424         mutex_exit(&dd->dd_lock);
01425 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines