FreeBSD ZFS: zfs_vfsops.c Source File

FreeBSD ZFS
The Zettabyte File System
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 /*
00022  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
00023  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
00024  * All rights reserved.
00025  */
00026 
00027 /* Portions Copyright 2010 Robert Milkowski */
00028 
00029 #include <sys/types.h>
00030 #include <sys/param.h>
00031 #include <sys/systm.h>
00032 #include <sys/kernel.h>
00033 #include <sys/sysmacros.h>
00034 #include <sys/kmem.h>
00035 #include <sys/acl.h>
00036 #include <sys/vnode.h>
00037 #include <sys/vfs.h>
00038 #include <sys/mntent.h>
00039 #include <sys/mount.h>
00040 #include <sys/cmn_err.h>
00041 #include <sys/zfs_znode.h>
00042 #include <sys/zfs_dir.h>
00043 #include <sys/zil.h>
00044 #include <sys/fs/zfs.h>
00045 #include <sys/dmu.h>
00046 #include <sys/dsl_prop.h>
00047 #include <sys/dsl_dataset.h>
00048 #include <sys/dsl_deleg.h>
00049 #include <sys/spa.h>
00050 #include <sys/zap.h>
00051 #include <sys/sa.h>
00052 #include <sys/sa_impl.h>
00053 #include <sys/varargs.h>
00054 #include <sys/policy.h>
00055 #include <sys/atomic.h>
00056 #include <sys/zfs_ioctl.h>
00057 #include <sys/zfs_ctldir.h>
00058 #include <sys/zfs_fuid.h>
00059 #include <sys/sunddi.h>
00060 #include <sys/dnlc.h>
00061 #include <sys/dmu_objset.h>
00062 #include <sys/spa_boot.h>
00063 #include <sys/jail.h>
00064 #include "zfs_comutil.h"
00065 
00066 struct mtx zfs_debug_mtx;
00067 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
00068 
00069 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
00070 
00071 int zfs_super_owner;
00072 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
00073     "File system owner can perform privileged operation on his file systems");
00074 
00078 int zfs_debug_level;
00079 TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level);
00080 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0,
00081     "Debug level");
00082 
00083 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
00084 static int zfs_version_acl = ZFS_ACL_VERSION;
00085 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
00086     "ZFS_ACL_VERSION");
00087 static int zfs_version_spa = SPA_VERSION;
00088 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
00089     "SPA_VERSION");
00090 static int zfs_version_zpl = ZPL_VERSION;
00091 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
00092     "ZPL_VERSION");
00093 
00094 static int zfs_mount(vfs_t *vfsp);
00095 static int zfs_umount(vfs_t *vfsp, int fflag);
00096 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
00097 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
00098 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
00099 static int zfs_sync(vfs_t *vfsp, int waitfor);
00100 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
00101     struct ucred **credanonp, int *numsecflavors, int **secflavors);
00102 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
00103 static void zfs_objset_close(zfsvfs_t *zfsvfs);
00104 static void zfs_freevfs(vfs_t *vfsp);
00105 
00106 static struct vfsops zfs_vfsops = {
00107         .vfs_mount =            zfs_mount,
00108         .vfs_unmount =          zfs_umount,
00109         .vfs_root =             zfs_root,
00110         .vfs_statfs =           zfs_statfs,
00111         .vfs_vget =             zfs_vget,
00112         .vfs_sync =             zfs_sync,
00113         .vfs_checkexp =         zfs_checkexp,
00114         .vfs_fhtovp =           zfs_fhtovp,
00115 };
00116 
00117 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
00118 
00124 static uint32_t zfs_active_fs_count = 0;
00125 
00126 /*ARGSUSED*/
00127 static int
00128 zfs_sync(vfs_t *vfsp, int waitfor)
00129 {
00130 
00131         /*
00132          * Data integrity is job one.  We don't want a compromised kernel
00133          * writing to the storage pool, so we never sync during panic.
00134          */
00135         if (panicstr)
00136                 return (0);
00137 
00138         if (vfsp != NULL) {
00139                 /*
00140                  * Sync a specific filesystem.
00141                  */
00142                 zfsvfs_t *zfsvfs = vfsp->vfs_data;
00143                 dsl_pool_t *dp;
00144                 int error;
00145 
00146                 error = vfs_stdsync(vfsp, waitfor);
00147                 if (error != 0)
00148                         return (error);
00149 
00150                 ZFS_ENTER(zfsvfs);
00151                 dp = dmu_objset_pool(zfsvfs->z_os);
00152 
00153                 /*
00154                  * If the system is shutting down, then skip any
00155                  * filesystems which may exist on a suspended pool.
00156                  */
00157                 if (sys_shutdown && spa_suspended(dp->dp_spa)) {
00158                         ZFS_EXIT(zfsvfs);
00159                         return (0);
00160                 }
00161 
00162                 if (zfsvfs->z_log != NULL)
00163                         zil_commit(zfsvfs->z_log, 0);
00164 
00165                 ZFS_EXIT(zfsvfs);
00166         } else {
00167                 /*
00168                  * Sync all ZFS filesystems.  This is what happens when you
00169                  * run sync(1M).  Unlike other filesystems, ZFS honors the
00170                  * request by waiting for all pools to commit all dirty data.
00171                  */
00172                 spa_sync_allpools();
00173         }
00174 
00175         return (0);
00176 }
00177 
00178 #ifndef __FreeBSD__
00179 static int
00180 zfs_create_unique_device(dev_t *dev)
00181 {
00182         major_t new_major;
00183 
00184         do {
00185                 ASSERT3U(zfs_minor, <=, MAXMIN32);
00186                 minor_t start = zfs_minor;
00187                 do {
00188                         mutex_enter(&zfs_dev_mtx);
00189                         if (zfs_minor >= MAXMIN32) {
00190                                 /*
00191                                  * If we're still using the real major
00192                                  * keep out of /dev/zfs and /dev/zvol minor
00193                                  * number space.  If we're using a getudev()'ed
00194                                  * major number, we can use all of its minors.
00195                                  */
00196                                 if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
00197                                         zfs_minor = ZFS_MIN_MINOR;
00198                                 else
00199                                         zfs_minor = 0;
00200                         } else {
00201                                 zfs_minor++;
00202                         }
00203                         *dev = makedevice(zfs_major, zfs_minor);
00204                         mutex_exit(&zfs_dev_mtx);
00205                 } while (vfs_devismounted(*dev) && zfs_minor != start);
00206                 if (zfs_minor == start) {
00207                         /*
00208                          * We are using all ~262,000 minor numbers for the
00209                          * current major number.  Create a new major number.
00210                          */
00211                         if ((new_major = getudev()) == (major_t)-1) {
00212                                 cmn_err(CE_WARN,
00213                                     "zfs_mount: Can't get unique major "
00214                                     "device number.");
00215                                 return (-1);
00216                         }
00217                         mutex_enter(&zfs_dev_mtx);
00218                         zfs_major = new_major;
00219                         zfs_minor = 0;
00220 
00221                         mutex_exit(&zfs_dev_mtx);
00222                 } else {
00223                         break;
00224                 }
00225                 /* CONSTANTCONDITION */
00226         } while (1);
00227 
00228         return (0);
00229 }
00230 #endif  /* !__FreeBSD__ */
00231 
00232 static void
00233 atime_changed_cb(void *arg, uint64_t newval)
00234 {
00235         zfsvfs_t *zfsvfs = arg;
00236 
00237         if (newval == TRUE) {
00238                 zfsvfs->z_atime = TRUE;
00239                 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
00240                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
00241                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
00242         } else {
00243                 zfsvfs->z_atime = FALSE;
00244                 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
00245                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
00246                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
00247         }
00248 }
00249 
00250 static void
00251 xattr_changed_cb(void *arg, uint64_t newval)
00252 {
00253         zfsvfs_t *zfsvfs = arg;
00254 
00255         if (newval == TRUE) {
00256                 /* XXX locking on vfs_flag? */
00257 #ifdef TODO
00258                 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
00259 #endif
00260                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
00261                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
00262         } else {
00263                 /* XXX locking on vfs_flag? */
00264 #ifdef TODO
00265                 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
00266 #endif
00267                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
00268                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
00269         }
00270 }
00271 
00272 static void
00273 blksz_changed_cb(void *arg, uint64_t newval)
00274 {
00275         zfsvfs_t *zfsvfs = arg;
00276 
00277         if (newval < SPA_MINBLOCKSIZE ||
00278             newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
00279                 newval = SPA_MAXBLOCKSIZE;
00280 
00281         zfsvfs->z_max_blksz = newval;
00282         zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
00283 }
00284 
00285 static void
00286 readonly_changed_cb(void *arg, uint64_t newval)
00287 {
00288         zfsvfs_t *zfsvfs = arg;
00289 
00290         if (newval) {
00291                 /* XXX locking on vfs_flag? */
00292                 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
00293                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
00294                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
00295         } else {
00296                 /* XXX locking on vfs_flag? */
00297                 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
00298                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
00299                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
00300         }
00301 }
00302 
00303 static void
00304 setuid_changed_cb(void *arg, uint64_t newval)
00305 {
00306         zfsvfs_t *zfsvfs = arg;
00307 
00308         if (newval == FALSE) {
00309                 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
00310                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
00311                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
00312         } else {
00313                 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
00314                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
00315                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
00316         }
00317 }
00318 
00319 static void
00320 exec_changed_cb(void *arg, uint64_t newval)
00321 {
00322         zfsvfs_t *zfsvfs = arg;
00323 
00324         if (newval == FALSE) {
00325                 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
00326                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
00327                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
00328         } else {
00329                 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
00330                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
00331                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
00332         }
00333 }
00334 
00343 static void
00344 nbmand_changed_cb(void *arg, uint64_t newval)
00345 {
00346         zfsvfs_t *zfsvfs = arg;
00347         if (newval == FALSE) {
00348                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
00349                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
00350         } else {
00351                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
00352                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
00353         }
00354 }
00355 
00356 static void
00357 snapdir_changed_cb(void *arg, uint64_t newval)
00358 {
00359         zfsvfs_t *zfsvfs = arg;
00360 
00361         zfsvfs->z_show_ctldir = newval;
00362 }
00363 
00364 static void
00365 vscan_changed_cb(void *arg, uint64_t newval)
00366 {
00367         zfsvfs_t *zfsvfs = arg;
00368 
00369         zfsvfs->z_vscan = newval;
00370 }
00371 
00372 static void
00373 acl_mode_changed_cb(void *arg, uint64_t newval)
00374 {
00375         zfsvfs_t *zfsvfs = arg;
00376 
00377         zfsvfs->z_acl_mode = newval;
00378 }
00379 
00380 static void
00381 acl_inherit_changed_cb(void *arg, uint64_t newval)
00382 {
00383         zfsvfs_t *zfsvfs = arg;
00384 
00385         zfsvfs->z_acl_inherit = newval;
00386 }
00387 
00388 static int
00389 zfs_register_callbacks(vfs_t *vfsp)
00390 {
00391         struct dsl_dataset *ds = NULL;
00392         objset_t *os = NULL;
00393         zfsvfs_t *zfsvfs = NULL;
00394         uint64_t nbmand;
00395         int readonly, do_readonly = B_FALSE;
00396         int setuid, do_setuid = B_FALSE;
00397         int exec, do_exec = B_FALSE;
00398         int xattr, do_xattr = B_FALSE;
00399         int atime, do_atime = B_FALSE;
00400         int error = 0;
00401 
00402         ASSERT(vfsp);
00403         zfsvfs = vfsp->vfs_data;
00404         ASSERT(zfsvfs);
00405         os = zfsvfs->z_os;
00406 
00407         /*
00408          * This function can be called for a snapshot when we update snapshot's
00409          * mount point, which isn't really supported.
00410          */
00411         if (dmu_objset_is_snapshot(os))
00412                 return (EOPNOTSUPP);
00413 
00414         /*
00415          * The act of registering our callbacks will destroy any mount
00416          * options we may have.  In order to enable temporary overrides
00417          * of mount options, we stash away the current values and
00418          * restore them after we register the callbacks.
00419          */
00420         if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
00421             !spa_writeable(dmu_objset_spa(os))) {
00422                 readonly = B_TRUE;
00423                 do_readonly = B_TRUE;
00424         } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
00425                 readonly = B_FALSE;
00426                 do_readonly = B_TRUE;
00427         }
00428         if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
00429                 setuid = B_FALSE;
00430                 do_setuid = B_TRUE;
00431         } else {
00432                 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
00433                         setuid = B_FALSE;
00434                         do_setuid = B_TRUE;
00435                 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
00436                         setuid = B_TRUE;
00437                         do_setuid = B_TRUE;
00438                 }
00439         }
00440         if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
00441                 exec = B_FALSE;
00442                 do_exec = B_TRUE;
00443         } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
00444                 exec = B_TRUE;
00445                 do_exec = B_TRUE;
00446         }
00447         if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
00448                 xattr = B_FALSE;
00449                 do_xattr = B_TRUE;
00450         } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
00451                 xattr = B_TRUE;
00452                 do_xattr = B_TRUE;
00453         }
00454         if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
00455                 atime = B_FALSE;
00456                 do_atime = B_TRUE;
00457         } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
00458                 atime = B_TRUE;
00459                 do_atime = B_TRUE;
00460         }
00461 
00462         /*
00463          * nbmand is a special property.  It can only be changed at
00464          * mount time.
00465          *
00466          * This is weird, but it is documented to only be changeable
00467          * at mount time.
00468          */
00469         if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
00470                 nbmand = B_FALSE;
00471         } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
00472                 nbmand = B_TRUE;
00473         } else {
00474                 char osname[MAXNAMELEN];
00475 
00476                 dmu_objset_name(os, osname);
00477                 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
00478                     NULL)) {
00479                         return (error);
00480                 }
00481         }
00482 
00483         /*
00484          * Register property callbacks.
00485          *
00486          * It would probably be fine to just check for i/o error from
00487          * the first prop_register(), but I guess I like to go
00488          * overboard...
00489          */
00490         ds = dmu_objset_ds(os);
00491         error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
00492         error = error ? error : dsl_prop_register(ds,
00493             "xattr", xattr_changed_cb, zfsvfs);
00494         error = error ? error : dsl_prop_register(ds,
00495             "recordsize", blksz_changed_cb, zfsvfs);
00496         error = error ? error : dsl_prop_register(ds,
00497             "readonly", readonly_changed_cb, zfsvfs);
00498         error = error ? error : dsl_prop_register(ds,
00499             "setuid", setuid_changed_cb, zfsvfs);
00500         error = error ? error : dsl_prop_register(ds,
00501             "exec", exec_changed_cb, zfsvfs);
00502         error = error ? error : dsl_prop_register(ds,
00503             "snapdir", snapdir_changed_cb, zfsvfs);
00504         error = error ? error : dsl_prop_register(ds,
00505             "aclmode", acl_mode_changed_cb, zfsvfs);
00506         error = error ? error : dsl_prop_register(ds,
00507             "aclinherit", acl_inherit_changed_cb, zfsvfs);
00508         error = error ? error : dsl_prop_register(ds,
00509             "vscan", vscan_changed_cb, zfsvfs);
00510         if (error)
00511                 goto unregister;
00512 
00513         /*
00514          * Invoke our callbacks to restore temporary mount options.
00515          */
00516         if (do_readonly)
00517                 readonly_changed_cb(zfsvfs, readonly);
00518         if (do_setuid)
00519                 setuid_changed_cb(zfsvfs, setuid);
00520         if (do_exec)
00521                 exec_changed_cb(zfsvfs, exec);
00522         if (do_xattr)
00523                 xattr_changed_cb(zfsvfs, xattr);
00524         if (do_atime)
00525                 atime_changed_cb(zfsvfs, atime);
00526 
00527         nbmand_changed_cb(zfsvfs, nbmand);
00528 
00529         return (0);
00530 
00531 unregister:
00532         /*
00533          * We may attempt to unregister some callbacks that are not
00534          * registered, but this is OK; it will simply return ENOMSG,
00535          * which we will ignore.
00536          */
00537         (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
00538         (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
00539         (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
00540         (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
00541         (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
00542         (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
00543         (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
00544         (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
00545         (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
00546             zfsvfs);
00547         (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
00548         return (error);
00549 
00550 }
00551 
00552 static int
00553 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
00554     uint64_t *userp, uint64_t *groupp)
00555 {
00556         int error = 0;
00557 
00558         /*
00559          * Is it a valid type of object to track?
00560          */
00561         if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
00562                 return (ENOENT);
00563 
00564         /*
00565          * If we have a NULL data pointer
00566          * then assume the id's aren't changing and
00567          * return EEXIST to the dmu to let it know to
00568          * use the same ids
00569          */
00570         if (data == NULL)
00571                 return (EEXIST);
00572 
00573         if (bonustype == DMU_OT_ZNODE) {
00574                 znode_phys_t *znp = data;
00575                 *userp = znp->zp_uid;
00576                 *groupp = znp->zp_gid;
00577         } else {
00578                 int hdrsize;
00579                 sa_hdr_phys_t *sap = data;
00580                 sa_hdr_phys_t sa = *sap;
00581                 boolean_t swap = B_FALSE;
00582 
00583                 ASSERT(bonustype == DMU_OT_SA);
00584 
00585                 if (sa.sa_magic == 0) {
00586                         /*
00587                          * This should only happen for newly created
00588                          * files that haven't had the znode data filled
00589                          * in yet.
00590                          */
00591                         *userp = 0;
00592                         *groupp = 0;
00593                         return (0);
00594                 }
00595                 if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
00596                         sa.sa_magic = SA_MAGIC;
00597                         sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
00598                         swap = B_TRUE;
00599                 } else {
00600                         VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
00601                 }
00602 
00603                 hdrsize = sa_hdrsize(&sa);
00604                 VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
00605                 *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
00606                     SA_UID_OFFSET));
00607                 *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
00608                     SA_GID_OFFSET));
00609                 if (swap) {
00610                         *userp = BSWAP_64(*userp);
00611                         *groupp = BSWAP_64(*groupp);
00612                 }
00613         }
00614         return (error);
00615 }
00616 
00617 static void
00618 fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
00619     char *domainbuf, int buflen, uid_t *ridp)
00620 {
00621         uint64_t fuid;
00622         const char *domain;
00623 
00624         fuid = strtonum(fuidstr, NULL);
00625 
00626         domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
00627         if (domain)
00628                 (void) strlcpy(domainbuf, domain, buflen);
00629         else
00630                 domainbuf[0] = '\0';
00631         *ridp = FUID_RID(fuid);
00632 }
00633 
00634 static uint64_t
00635 zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
00636 {
00637         switch (type) {
00638         case ZFS_PROP_USERUSED:
00639                 return (DMU_USERUSED_OBJECT);
00640         case ZFS_PROP_GROUPUSED:
00641                 return (DMU_GROUPUSED_OBJECT);
00642         case ZFS_PROP_USERQUOTA:
00643                 return (zfsvfs->z_userquota_obj);
00644         case ZFS_PROP_GROUPQUOTA:
00645                 return (zfsvfs->z_groupquota_obj);
00646         }
00647         return (0);
00648 }
00649 
00650 int
00651 zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
00652     uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
00653 {
00654         int error;
00655         zap_cursor_t zc;
00656         zap_attribute_t za;
00657         zfs_useracct_t *buf = vbuf;
00658         uint64_t obj;
00659 
00660         if (!dmu_objset_userspace_present(zfsvfs->z_os))
00661                 return (ENOTSUP);
00662 
00663         obj = zfs_userquota_prop_to_obj(zfsvfs, type);
00664         if (obj == 0) {
00665                 *bufsizep = 0;
00666                 return (0);
00667         }
00668 
00669         for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
00670             (error = zap_cursor_retrieve(&zc, &za)) == 0;
00671             zap_cursor_advance(&zc)) {
00672                 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
00673                     *bufsizep)
00674                         break;
00675 
00676                 fuidstr_to_sid(zfsvfs, za.za_name,
00677                     buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
00678 
00679                 buf->zu_space = za.za_first_integer;
00680                 buf++;
00681         }
00682         if (error == ENOENT)
00683                 error = 0;
00684 
00685         ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
00686         *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
00687         *cookiep = zap_cursor_serialize(&zc);
00688         zap_cursor_fini(&zc);
00689         return (error);
00690 }
00691 
00695 static int
00696 id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
00697     char *buf, boolean_t addok)
00698 {
00699         uint64_t fuid;
00700         int domainid = 0;
00701 
00702         if (domain && domain[0]) {
00703                 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
00704                 if (domainid == -1)
00705                         return (ENOENT);
00706         }
00707         fuid = FUID_ENCODE(domainid, rid);
00708         (void) sprintf(buf, "%llx", (longlong_t)fuid);
00709         return (0);
00710 }
00711 
00712 int
00713 zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
00714     const char *domain, uint64_t rid, uint64_t *valp)
00715 {
00716         char buf[32];
00717         int err;
00718         uint64_t obj;
00719 
00720         *valp = 0;
00721 
00722         if (!dmu_objset_userspace_present(zfsvfs->z_os))
00723                 return (ENOTSUP);
00724 
00725         obj = zfs_userquota_prop_to_obj(zfsvfs, type);
00726         if (obj == 0)
00727                 return (0);
00728 
00729         err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
00730         if (err)
00731                 return (err);
00732 
00733         err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
00734         if (err == ENOENT)
00735                 err = 0;
00736         return (err);
00737 }
00738 
00739 int
00740 zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
00741     const char *domain, uint64_t rid, uint64_t quota)
00742 {
00743         char buf[32];
00744         int err;
00745         dmu_tx_t *tx;
00746         uint64_t *objp;
00747         boolean_t fuid_dirtied;
00748 
00749         if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
00750                 return (EINVAL);
00751 
00752         if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
00753                 return (ENOTSUP);
00754 
00755         objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
00756             &zfsvfs->z_groupquota_obj;
00757 
00758         err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
00759         if (err)
00760                 return (err);
00761         fuid_dirtied = zfsvfs->z_fuid_dirty;
00762 
00763         tx = dmu_tx_create(zfsvfs->z_os);
00764         dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
00765         if (*objp == 0) {
00766                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
00767                     zfs_userquota_prop_prefixes[type]);
00768         }
00769         if (fuid_dirtied)
00770                 zfs_fuid_txhold(zfsvfs, tx);
00771         err = dmu_tx_assign(tx, TXG_WAIT);
00772         if (err) {
00773                 dmu_tx_abort(tx);
00774                 return (err);
00775         }
00776 
00777         mutex_enter(&zfsvfs->z_lock);
00778         if (*objp == 0) {
00779                 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
00780                     DMU_OT_NONE, 0, tx);
00781                 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
00782                     zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
00783         }
00784         mutex_exit(&zfsvfs->z_lock);
00785 
00786         if (quota == 0) {
00787                 err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
00788                 if (err == ENOENT)
00789                         err = 0;
00790         } else {
00791                 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
00792         }
00793         ASSERT(err == 0);
00794         if (fuid_dirtied)
00795                 zfs_fuid_sync(zfsvfs, tx);
00796         dmu_tx_commit(tx);
00797         return (err);
00798 }
00799 
00800 boolean_t
00801 zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
00802 {
00803         char buf[32];
00804         uint64_t used, quota, usedobj, quotaobj;
00805         int err;
00806 
00807         usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
00808         quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
00809 
00810         if (quotaobj == 0 || zfsvfs->z_replay)
00811                 return (B_FALSE);
00812 
00813         (void) sprintf(buf, "%llx", (longlong_t)fuid);
00814         err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
00815         if (err != 0)
00816                 return (B_FALSE);
00817 
00818         err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
00819         if (err != 0)
00820                 return (B_FALSE);
00821         return (used >= quota);
00822 }
00823 
00824 boolean_t
00825 zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
00826 {
00827         uint64_t fuid;
00828         uint64_t quotaobj;
00829 
00830         quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
00831 
00832         fuid = isgroup ? zp->z_gid : zp->z_uid;
00833 
00834         if (quotaobj == 0 || zfsvfs->z_replay)
00835                 return (B_FALSE);
00836 
00837         return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
00838 }
00839 
00840 int
00841 zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
00842 {
00843         objset_t *os;
00844         zfsvfs_t *zfsvfs;
00845         uint64_t zval;
00846         int i, error;
00847         uint64_t sa_obj;
00848 
00849         zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
00850 
00851         /*
00852          * We claim to always be readonly so we can open snapshots;
00853          * other ZPL code will prevent us from writing to snapshots.
00854          */
00855         error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
00856         if (error) {
00857                 kmem_free(zfsvfs, sizeof (zfsvfs_t));
00858                 return (error);
00859         }
00860 
00861         /*
00862          * Initialize the zfs-specific filesystem structure.
00863          * Should probably make this a kmem cache, shuffle fields,
00864          * and just bzero up to z_hold_mtx[].
00865          */
00866         zfsvfs->z_vfs = NULL;
00867         zfsvfs->z_parent = zfsvfs;
00868         zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
00869         zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
00870         zfsvfs->z_os = os;
00871 
00872         error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
00873         if (error) {
00874                 goto out;
00875         } else if (zfsvfs->z_version >
00876             zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
00877                 (void) printf("Can't mount a version %lld file system "
00878                     "on a version %lld pool\n. Pool must be upgraded to mount "
00879                     "this file system.", (u_longlong_t)zfsvfs->z_version,
00880                     (u_longlong_t)spa_version(dmu_objset_spa(os)));
00881                 error = ENOTSUP;
00882                 goto out;
00883         }
00884         if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
00885                 goto out;
00886         zfsvfs->z_norm = (int)zval;
00887 
00888         if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
00889                 goto out;
00890         zfsvfs->z_utf8 = (zval != 0);
00891 
00892         if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
00893                 goto out;
00894         zfsvfs->z_case = (uint_t)zval;
00895 
00896         /*
00897          * Fold case on file systems that are always or sometimes case
00898          * insensitive.
00899          */
00900         if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
00901             zfsvfs->z_case == ZFS_CASE_MIXED)
00902                 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
00903 
00904         zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
00905         zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
00906 
00907         if (zfsvfs->z_use_sa) {
00908                 /* should either have both of these objects or none */
00909                 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
00910                     &sa_obj);
00911                 if (error)
00912                         return (error);
00913         } else {
00914                 /*
00915                  * Pre SA versions file systems should never touch
00916                  * either the attribute registration or layout objects.
00917                  */
00918                 sa_obj = 0;
00919         }
00920 
00921         error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
00922             &zfsvfs->z_attr_table);
00923         if (error)
00924                 goto out;
00925 
00926         if (zfsvfs->z_version >= ZPL_VERSION_SA)
00927                 sa_register_update_callback(os, zfs_sa_upgrade);
00928 
00929         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
00930             &zfsvfs->z_root);
00931         if (error)
00932                 goto out;
00933         ASSERT(zfsvfs->z_root != 0);
00934 
00935         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
00936             &zfsvfs->z_unlinkedobj);
00937         if (error)
00938                 goto out;
00939 
00940         error = zap_lookup(os, MASTER_NODE_OBJ,
00941             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
00942             8, 1, &zfsvfs->z_userquota_obj);
00943         if (error && error != ENOENT)
00944                 goto out;
00945 
00946         error = zap_lookup(os, MASTER_NODE_OBJ,
00947             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
00948             8, 1, &zfsvfs->z_groupquota_obj);
00949         if (error && error != ENOENT)
00950                 goto out;
00951 
00952         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
00953             &zfsvfs->z_fuid_obj);
00954         if (error && error != ENOENT)
00955                 goto out;
00956 
00957         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
00958             &zfsvfs->z_shares_dir);
00959         if (error && error != ENOENT)
00960                 goto out;
00961 
00962         mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
00963         mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
00964         list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
00965             offsetof(znode_t, z_link_node));
00966         rrw_init(&zfsvfs->z_teardown_lock);
00967         rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
00968         rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
00969         for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
00970                 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
00971 
00972         *zfvp = zfsvfs;
00973         return (0);
00974 
00975 out:
00976         dmu_objset_disown(os, zfsvfs);
00977         *zfvp = NULL;
00978         kmem_free(zfsvfs, sizeof (zfsvfs_t));
00979         return (error);
00980 }
00981 
00982 static int
00983 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
00984 {
00985         int error;
00986 
00987         error = zfs_register_callbacks(zfsvfs->z_vfs);
00988         if (error)
00989                 return (error);
00990 
00991         /*
00992          * Set the objset user_ptr to track its zfsvfs.
00993          */
00994         mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
00995         dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
00996         mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
00997 
00998         zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
00999 
01000         /*
01001          * If we are not mounting (ie: online recv), then we don't
01002          * have to worry about replaying the log as we blocked all
01003          * operations out since we closed the ZIL.
01004          */
01005         if (mounting) {
01006                 boolean_t readonly;
01007 
01008                 /*
01009                  * During replay we remove the read only flag to
01010                  * allow replays to succeed.
01011                  */
01012                 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
01013                 if (readonly != 0)
01014                         zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
01015                 else
01016                         zfs_unlinked_drain(zfsvfs);
01017 
01018                 /*
01019                  * Parse and replay the intent log.
01020                  *
01021                  * Because of ziltest, this must be done after
01022                  * zfs_unlinked_drain().  (Further note: ziltest
01023                  * doesn't use readonly mounts, where
01024                  * zfs_unlinked_drain() isn't called.)  This is because
01025                  * ziltest causes spa_sync() to think it's committed,
01026                  * but actually it is not, so the intent log contains
01027                  * many txg's worth of changes.
01028                  *
01029                  * In particular, if object N is in the unlinked set in
01030                  * the last txg to actually sync, then it could be
01031                  * actually freed in a later txg and then reallocated
01032                  * in a yet later txg.  This would write a "create
01033                  * object N" record to the intent log.  Normally, this
01034                  * would be fine because the spa_sync() would have
01035                  * written out the fact that object N is free, before
01036                  * we could write the "create object N" intent log
01037                  * record.
01038                  *
01039                  * But when we are in ziltest mode, we advance the "open
01040                  * txg" without actually spa_sync()-ing the changes to
01041                  * disk.  So we would see that object N is still
01042                  * allocated and in the unlinked set, and there is an
01043                  * intent log record saying to allocate it.
01044                  */
01045                 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
01046                         if (zil_replay_disable) {
01047                                 zil_destroy(zfsvfs->z_log, B_FALSE);
01048                         } else {
01049                                 zfsvfs->z_replay = B_TRUE;
01050                                 zil_replay(zfsvfs->z_os, zfsvfs,
01051                                     zfs_replay_vector);
01052                                 zfsvfs->z_replay = B_FALSE;
01053                         }
01054                 }
01055                 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
01056         }
01057 
01058         return (0);
01059 }
01060 
01061 extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
01062 
01063 void
01064 zfsvfs_free(zfsvfs_t *zfsvfs)
01065 {
01066         int i;
01067 
01068         /*
01069          * This is a barrier to prevent the filesystem from going away in
01070          * zfs_znode_move() until we can safely ensure that the filesystem is
01071          * not unmounted. We consider the filesystem valid before the barrier
01072          * and invalid after the barrier.
01073          */
01074         rw_enter(&zfsvfs_lock, RW_READER);
01075         rw_exit(&zfsvfs_lock);
01076 
01077         zfs_fuid_destroy(zfsvfs);
01078 
01079         mutex_destroy(&zfsvfs->z_znodes_lock);
01080         mutex_destroy(&zfsvfs->z_lock);
01081         list_destroy(&zfsvfs->z_all_znodes);
01082         rrw_destroy(&zfsvfs->z_teardown_lock);
01083         rw_destroy(&zfsvfs->z_teardown_inactive_lock);
01084         rw_destroy(&zfsvfs->z_fuid_lock);
01085         for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
01086                 mutex_destroy(&zfsvfs->z_hold_mtx[i]);
01087         kmem_free(zfsvfs, sizeof (zfsvfs_t));
01088 }
01089 
01090 static void
01091 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
01092 {
01093         zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
01094         if (zfsvfs->z_vfs) {
01095                 if (zfsvfs->z_use_fuids) {
01096                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
01097                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
01098                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
01099                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
01100                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
01101                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
01102                 } else {
01103                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
01104                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
01105                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
01106                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
01107                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
01108                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
01109                 }
01110         }
01111         zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
01112 }
01113 
01114 static int
01115 zfs_domount(vfs_t *vfsp, char *osname)
01116 {
01117         uint64_t recordsize, fsid_guid;
01118         int error = 0;
01119         zfsvfs_t *zfsvfs;
01120         vnode_t *vp;
01121 
01122         ASSERT(vfsp);
01123         ASSERT(osname);
01124 
01125         error = zfsvfs_create(osname, &zfsvfs);
01126         if (error)
01127                 return (error);
01128         zfsvfs->z_vfs = vfsp;
01129 
01130         if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
01131             NULL))
01132                 goto out;
01133         zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
01134         zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
01135 
01136         vfsp->vfs_data = zfsvfs;
01137         vfsp->mnt_flag |= MNT_LOCAL;
01138         vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
01139         vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
01140         vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
01141 
01142         /*
01143          * The fsid is 64 bits, composed of an 8-bit fs type, which
01144          * separates our fsid from any other filesystem types, and a
01145          * 56-bit objset unique ID.  The objset unique ID is unique to
01146          * all objsets open on this system, provided by unique_create().
01147          * The 8-bit fs type must be put in the low bits of fsid[1]
01148          * because that's where other Solaris filesystems put it.
01149          */
01150         fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
01151         ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
01152         vfsp->vfs_fsid.val[0] = fsid_guid;
01153         vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
01154             vfsp->mnt_vfc->vfc_typenum & 0xFF;
01155 
01156         /*
01157          * Set features for file system.
01158          */
01159         zfs_set_fuid_feature(zfsvfs);
01160         if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
01161                 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
01162                 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
01163                 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
01164         } else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
01165                 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
01166                 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
01167         }
01168         vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
01169 
01170         if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
01171                 uint64_t pval;
01172 
01173                 atime_changed_cb(zfsvfs, B_FALSE);
01174                 readonly_changed_cb(zfsvfs, B_TRUE);
01175                 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
01176                         goto out;
01177                 xattr_changed_cb(zfsvfs, pval);
01178                 zfsvfs->z_issnap = B_TRUE;
01179                 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
01180 
01181                 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
01182                 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
01183                 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
01184         } else {
01185                 error = zfsvfs_setup(zfsvfs, B_TRUE);
01186         }
01187 
01188         vfs_mountedfrom(vfsp, osname);
01189         /* Grab extra reference. */
01190         VERIFY(VFS_ROOT(vfsp, LK_EXCLUSIVE, &vp) == 0);
01191         VOP_UNLOCK(vp, 0);
01192 
01193         if (!zfsvfs->z_issnap)
01194                 zfsctl_create(zfsvfs);
01195 out:
01196         if (error) {
01197                 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
01198                 zfsvfs_free(zfsvfs);
01199         } else {
01200                 atomic_add_32(&zfs_active_fs_count, 1);
01201         }
01202 
01203         return (error);
01204 }
01205 
01206 void
01207 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
01208 {
01209         objset_t *os = zfsvfs->z_os;
01210         struct dsl_dataset *ds;
01211 
01212         /*
01213          * Unregister properties.
01214          */
01215         if (!dmu_objset_is_snapshot(os)) {
01216                 ds = dmu_objset_ds(os);
01217                 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
01218                     zfsvfs) == 0);
01219 
01220                 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
01221                     zfsvfs) == 0);
01222 
01223                 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
01224                     zfsvfs) == 0);
01225 
01226                 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
01227                     zfsvfs) == 0);
01228 
01229                 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
01230                     zfsvfs) == 0);
01231 
01232                 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
01233                     zfsvfs) == 0);
01234 
01235                 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
01236                     zfsvfs) == 0);
01237 
01238                 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
01239                     zfsvfs) == 0);
01240 
01241                 VERIFY(dsl_prop_unregister(ds, "aclinherit",
01242                     acl_inherit_changed_cb, zfsvfs) == 0);
01243 
01244                 VERIFY(dsl_prop_unregister(ds, "vscan",
01245                     vscan_changed_cb, zfsvfs) == 0);
01246         }
01247 }
01248 
01249 #ifdef SECLABEL
01250 
01253 static int
01254 str_to_uint64(char *str, uint64_t *objnum)
01255 {
01256         uint64_t num = 0;
01257 
01258         while (*str) {
01259                 if (*str < '0' || *str > '9')
01260                         return (EINVAL);
01261 
01262                 num = num*10 + *str++ - '0';
01263         }
01264 
01265         *objnum = num;
01266         return (0);
01267 }
01268 
01274 static int
01275 zfs_parse_bootfs(char *bpath, char *outpath)
01276 {
01277         char *slashp;
01278         uint64_t objnum;
01279         int error;
01280 
01281         if (*bpath == 0 || *bpath == '/')
01282                 return (EINVAL);
01283 
01284         (void) strcpy(outpath, bpath);
01285 
01286         slashp = strchr(bpath, '/');
01287 
01288         /* if no '/', just return the pool name */
01289         if (slashp == NULL) {
01290                 return (0);
01291         }
01292 
01293         /* if not a number, just return the root dataset name */
01294         if (str_to_uint64(slashp+1, &objnum)) {
01295                 return (0);
01296         }
01297 
01298         *slashp = '\0';
01299         error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
01300         *slashp = '/';
01301 
01302         return (error);
01303 }
01304 
01313 int
01314 zfs_check_global_label(const char *dsname, const char *hexsl)
01315 {
01316         if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
01317                 return (0);
01318         if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
01319                 return (0);
01320         if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
01321                 /* must be readonly */
01322                 uint64_t rdonly;
01323 
01324                 if (dsl_prop_get_integer(dsname,
01325                     zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
01326                         return (EACCES);
01327                 return (rdonly ? 0 : EACCES);
01328         }
01329         return (EACCES);
01330 }
01331 
01340 static int
01341 zfs_mount_label_policy(vfs_t *vfsp, char *osname)
01342 {
01343         int             error, retv;
01344         zone_t          *mntzone = NULL;
01345         ts_label_t      *mnt_tsl;
01346         bslabel_t       *mnt_sl;
01347         bslabel_t       ds_sl;
01348         char            ds_hexsl[MAXNAMELEN];
01349 
01350         retv = EACCES;                          /* assume the worst */
01351 
01352         /*
01353          * Start by getting the dataset label if it exists.
01354          */
01355         error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
01356             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
01357         if (error)
01358                 return (EACCES);
01359 
01360         /*
01361          * If labeling is NOT enabled, then disallow the mount of datasets
01362          * which have a non-default label already.  No other label checks
01363          * are needed.
01364          */
01365         if (!is_system_labeled()) {
01366                 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
01367                         return (0);
01368                 return (EACCES);
01369         }
01370 
01371         /*
01372          * Get the label of the mountpoint.  If mounting into the global
01373          * zone (i.e. mountpoint is not within an active zone and the
01374          * zoned property is off), the label must be default or
01375          * admin_low/admin_high only; no other checks are needed.
01376          */
01377         mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
01378         if (mntzone->zone_id == GLOBAL_ZONEID) {
01379                 uint64_t zoned;
01380 
01381                 zone_rele(mntzone);
01382 
01383                 if (dsl_prop_get_integer(osname,
01384                     zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
01385                         return (EACCES);
01386                 if (!zoned)
01387                         return (zfs_check_global_label(osname, ds_hexsl));
01388                 else
01389                         /*
01390                          * This is the case of a zone dataset being mounted
01391                          * initially, before the zone has been fully created;
01392                          * allow this mount into global zone.
01393                          */
01394                         return (0);
01395         }
01396 
01397         mnt_tsl = mntzone->zone_slabel;
01398         ASSERT(mnt_tsl != NULL);
01399         label_hold(mnt_tsl);
01400         mnt_sl = label2bslabel(mnt_tsl);
01401 
01402         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
01403                 /*
01404                  * The dataset doesn't have a real label, so fabricate one.
01405                  */
01406                 char *str = NULL;
01407 
01408                 if (l_to_str_internal(mnt_sl, &str) == 0 &&
01409                     dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
01410                     ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
01411                         retv = 0;
01412                 if (str != NULL)
01413                         kmem_free(str, strlen(str) + 1);
01414         } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
01415                 /*
01416                  * Now compare labels to complete the MAC check.  If the
01417                  * labels are equal then allow access.  If the mountpoint
01418                  * label dominates the dataset label, allow readonly access.
01419                  * Otherwise, access is denied.
01420                  */
01421                 if (blequal(mnt_sl, &ds_sl))
01422                         retv = 0;
01423                 else if (bldominates(mnt_sl, &ds_sl)) {
01424                         vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
01425                         retv = 0;
01426                 }
01427         }
01428 
01429         label_rele(mnt_tsl);
01430         zone_rele(mntzone);
01431         return (retv);
01432 }
01433 #endif  /* SECLABEL */
01434 
01435 #ifdef OPENSOLARIS_MOUNTROOT
01436 static int
01437 zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
01438 {
01439         int error = 0;
01440         static int zfsrootdone = 0;
01441         zfsvfs_t *zfsvfs = NULL;
01442         znode_t *zp = NULL;
01443         vnode_t *vp = NULL;
01444         char *zfs_bootfs;
01445         char *zfs_devid;
01446 
01447         ASSERT(vfsp);
01448 
01449         /*
01450          * The filesystem that we mount as root is defined in the
01451          * boot property "zfs-bootfs" with a format of
01452          * "poolname/root-dataset-objnum".
01453          */
01454         if (why == ROOT_INIT) {
01455                 if (zfsrootdone++)
01456                         return (EBUSY);
01457                 /*
01458                  * the process of doing a spa_load will require the
01459                  * clock to be set before we could (for example) do
01460                  * something better by looking at the timestamp on
01461                  * an uberblock, so just set it to -1.
01462                  */
01463                 clkset(-1);
01464 
01465                 if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
01466                         cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
01467                             "bootfs name");
01468                         return (EINVAL);
01469                 }
01470                 zfs_devid = spa_get_bootprop("diskdevid");
01471                 error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
01472                 if (zfs_devid)
01473                         spa_free_bootprop(zfs_devid);
01474                 if (error) {
01475                         spa_free_bootprop(zfs_bootfs);
01476                         cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
01477                             error);
01478                         return (error);
01479                 }
01480                 if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
01481                         spa_free_bootprop(zfs_bootfs);
01482                         cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
01483                             error);
01484                         return (error);
01485                 }
01486 
01487                 spa_free_bootprop(zfs_bootfs);
01488 
01489                 if (error = vfs_lock(vfsp))
01490                         return (error);
01491 
01492                 if (error = zfs_domount(vfsp, rootfs.bo_name)) {
01493                         cmn_err(CE_NOTE, "zfs_domount: error %d", error);
01494                         goto out;
01495                 }
01496 
01497                 zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
01498                 ASSERT(zfsvfs);
01499                 if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
01500                         cmn_err(CE_NOTE, "zfs_zget: error %d", error);
01501                         goto out;
01502                 }
01503 
01504                 vp = ZTOV(zp);
01505                 mutex_enter(&vp->v_lock);
01506                 vp->v_flag |= VROOT;
01507                 mutex_exit(&vp->v_lock);
01508                 rootvp = vp;
01509 
01510                 /*
01511                  * Leave rootvp held.  The root file system is never unmounted.
01512                  */
01513 
01514                 vfs_add((struct vnode *)0, vfsp,
01515                     (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
01516 out:
01517                 vfs_unlock(vfsp);
01518                 return (error);
01519         } else if (why == ROOT_REMOUNT) {
01520                 readonly_changed_cb(vfsp->vfs_data, B_FALSE);
01521                 vfsp->vfs_flag |= VFS_REMOUNT;
01522 
01523                 /* refresh mount options */
01524                 zfs_unregister_callbacks(vfsp->vfs_data);
01525                 return (zfs_register_callbacks(vfsp));
01526 
01527         } else if (why == ROOT_UNMOUNT) {
01528                 zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
01529                 (void) zfs_sync(vfsp, 0, 0);
01530                 return (0);
01531         }
01532 
01533         /*
01534          * if "why" is equal to anything else other than ROOT_INIT,
01535          * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
01536          */
01537         return (ENOTSUP);
01538 }
01539 #endif  /* OPENSOLARIS_MOUNTROOT */
01540 
01541 static int
01542 getpoolname(const char *osname, char *poolname)
01543 {
01544         char *p;
01545 
01546         p = strchr(osname, '/');
01547         if (p == NULL) {
01548                 if (strlen(osname) >= MAXNAMELEN)
01549                         return (ENAMETOOLONG);
01550                 (void) strcpy(poolname, osname);
01551         } else {
01552                 if (p - osname >= MAXNAMELEN)
01553                         return (ENAMETOOLONG);
01554                 (void) strncpy(poolname, osname, p - osname);
01555                 poolname[p - osname] = '\0';
01556         }
01557         return (0);
01558 }
01559 
01560 /*ARGSUSED*/
01561 static int
01562 zfs_mount(vfs_t *vfsp)
01563 {
01564         kthread_t       *td = curthread;
01565         vnode_t         *mvp = vfsp->mnt_vnodecovered;
01566         cred_t          *cr = td->td_ucred;
01567         char            *osname;
01568         int             error = 0;
01569         int             canwrite;
01570 
01571         if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS))
01572                 return (EPERM);
01573 
01574         if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
01575                 return (EINVAL);
01576 
01577         /*
01578          * If full-owner-access is enabled and delegated administration is
01579          * turned on, we must set nosuid.
01580          */
01581         if (zfs_super_owner &&
01582             dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
01583                 secpolicy_fs_mount_clearopts(cr, vfsp);
01584         }
01585 
01586         /*
01587          * Check for mount privilege?
01588          *
01589          * If we don't have privilege then see if
01590          * we have local permission to allow it
01591          */
01592         error = secpolicy_fs_mount(cr, mvp, vfsp);
01593         if (error) {
01594                 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
01595                         goto out;
01596 
01597                 if (!(vfsp->vfs_flag & MS_REMOUNT)) {
01598                         vattr_t         vattr;
01599 
01600                         /*
01601                          * Make sure user is the owner of the mount point
01602                          * or has sufficient privileges.
01603                          */
01604 
01605                         vattr.va_mask = AT_UID;
01606 
01607                         vn_lock(mvp, LK_SHARED | LK_RETRY);
01608                         if (VOP_GETATTR(mvp, &vattr, cr)) {
01609                                 VOP_UNLOCK(mvp, 0);
01610                                 goto out;
01611                         }
01612 
01613                         if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
01614                             VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
01615                                 VOP_UNLOCK(mvp, 0);
01616                                 goto out;
01617                         }
01618                         VOP_UNLOCK(mvp, 0);
01619                 }
01620 
01621                 secpolicy_fs_mount_clearopts(cr, vfsp);
01622         }
01623 
01624         /*
01625          * Refuse to mount a filesystem if we are in a local zone and the
01626          * dataset is not visible.
01627          */
01628         if (!INGLOBALZONE(curthread) &&
01629             (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
01630                 error = EPERM;
01631                 goto out;
01632         }
01633 
01634 #ifdef SECLABEL
01635         error = zfs_mount_label_policy(vfsp, osname);
01636         if (error)
01637                 goto out;
01638 #endif
01639 
01640         vfsp->vfs_flag |= MNT_NFS4ACLS;
01641 
01642         /*
01643          * When doing a remount, we simply refresh our temporary properties
01644          * according to those options set in the current VFS options.
01645          */
01646         if (vfsp->vfs_flag & MS_REMOUNT) {
01647                 /* refresh mount options */
01648                 zfs_unregister_callbacks(vfsp->vfs_data);
01649                 error = zfs_register_callbacks(vfsp);
01650                 goto out;
01651         }
01652 
01653         /* Initial root mount: try hard to import the requested root pool. */
01654         if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
01655             (vfsp->vfs_flag & MNT_UPDATE) == 0) {
01656                 char pname[MAXNAMELEN];
01657 
01658                 error = getpoolname(osname, pname);
01659                 if (error == 0)
01660                         error = spa_import_rootpool(pname);
01661                 if (error)
01662                         goto out;
01663         }
01664         DROP_GIANT();
01665         error = zfs_domount(vfsp, osname);
01666         PICKUP_GIANT();
01667 
01668 #ifdef sun
01669         /*
01670          * Add an extra VFS_HOLD on our parent vfs so that it can't
01671          * disappear due to a forced unmount.
01672          */
01673         if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
01674                 VFS_HOLD(mvp->v_vfsp);
01675 #endif  /* sun */
01676 
01677 out:
01678         return (error);
01679 }
01680 
01681 static int
01682 zfs_statfs(vfs_t *vfsp, struct statfs *statp)
01683 {
01684         zfsvfs_t *zfsvfs = vfsp->vfs_data;
01685         uint64_t refdbytes, availbytes, usedobjs, availobjs;
01686 
01687         statp->f_version = STATFS_VERSION;
01688 
01689         ZFS_ENTER(zfsvfs);
01690 
01691         dmu_objset_space(zfsvfs->z_os,
01692             &refdbytes, &availbytes, &usedobjs, &availobjs);
01693 
01694         /*
01695          * The underlying storage pool actually uses multiple block sizes.
01696          * We report the fragsize as the smallest block size we support,
01697          * and we report our blocksize as the filesystem's maximum blocksize.
01698          */
01699         statp->f_bsize = SPA_MINBLOCKSIZE;
01700         statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
01701 
01702         /*
01703          * The following report "total" blocks of various kinds in the
01704          * file system, but reported in terms of f_frsize - the
01705          * "fragment" size.
01706          */
01707 
01708         statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
01709         statp->f_bfree = availbytes / statp->f_bsize;
01710         statp->f_bavail = statp->f_bfree; /* no root reservation */
01711 
01712         /*
01713          * statvfs() should really be called statufs(), because it assumes
01714          * static metadata.  ZFS doesn't preallocate files, so the best
01715          * we can do is report the max that could possibly fit in f_files,
01716          * and that minus the number actually used in f_ffree.
01717          * For f_ffree, report the smaller of the number of object available
01718          * and the number of blocks (each object will take at least a block).
01719          */
01720         statp->f_ffree = MIN(availobjs, statp->f_bfree);
01721         statp->f_files = statp->f_ffree + usedobjs;
01722 
01723         /*
01724          * We're a zfs filesystem.
01725          */
01726         (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename));
01727 
01728         strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
01729             sizeof(statp->f_mntfromname));
01730         strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
01731             sizeof(statp->f_mntonname));
01732 
01733         statp->f_namemax = ZFS_MAXNAMELEN;
01734 
01735         ZFS_EXIT(zfsvfs);
01736         return (0);
01737 }
01738 
01739 int
01740 zfs_vnode_lock(vnode_t *vp, int flags)
01741 {
01742         int error;
01743 
01744         ASSERT(vp != NULL);
01745 
01746         error = vn_lock(vp, flags);
01747         return (error);
01748 }
01749 
01750 static int
01751 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
01752 {
01753         zfsvfs_t *zfsvfs = vfsp->vfs_data;
01754         znode_t *rootzp;
01755         int error;
01756 
01757         ZFS_ENTER_NOERROR(zfsvfs);
01758 
01759         error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
01760         if (error == 0)
01761                 *vpp = ZTOV(rootzp);
01762 
01763         ZFS_EXIT(zfsvfs);
01764 
01765         if (error == 0) {
01766                 error = zfs_vnode_lock(*vpp, flags);
01767                 if (error == 0)
01768                         (*vpp)->v_vflag |= VV_ROOT;
01769         }
01770         if (error != 0)
01771                 *vpp = NULL;
01772 
01773         return (error);
01774 }
01775 
01782 static int
01783 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
01784 {
01785         znode_t *zp;
01786 
01787         rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
01788 
01789         if (!unmounting) {
01790                 /*
01791                  * We purge the parent filesystem's vfsp as the parent
01792                  * filesystem and all of its snapshots have their vnode's
01793                  * v_vfsp set to the parent's filesystem's vfsp.  Note,
01794                  * 'z_parent' is self referential for non-snapshots.
01795                  */
01796                 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
01797 #ifdef FREEBSD_NAMECACHE
01798                 cache_purgevfs(zfsvfs->z_parent->z_vfs);
01799 #endif
01800         }
01801 
01802         /*
01803          * Close the zil. NB: Can't close the zil while zfs_inactive
01804          * threads are blocked as zil_close can call zfs_inactive.
01805          */
01806         if (zfsvfs->z_log) {
01807                 zil_close(zfsvfs->z_log);
01808                 zfsvfs->z_log = NULL;
01809         }
01810 
01811         rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
01812 
01813         /*
01814          * If we are not unmounting (ie: online recv) and someone already
01815          * unmounted this file system while we were doing the switcheroo,
01816          * or a reopen of z_os failed then just bail out now.
01817          */
01818         if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
01819                 rw_exit(&zfsvfs->z_teardown_inactive_lock);
01820                 rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
01821                 return (EIO);
01822         }
01823 
01824         /*
01825          * At this point there are no vops active, and any new vops will
01826          * fail with EIO since we have z_teardown_lock for writer (only
01827          * relavent for forced unmount).
01828          *
01829          * Release all holds on dbufs.
01830          */
01831         mutex_enter(&zfsvfs->z_znodes_lock);
01832         for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
01833             zp = list_next(&zfsvfs->z_all_znodes, zp))
01834                 if (zp->z_sa_hdl) {
01835                         ASSERT(ZTOV(zp)->v_count >= 0);
01836                         zfs_znode_dmu_fini(zp);
01837                 }
01838         mutex_exit(&zfsvfs->z_znodes_lock);
01839 
01840         /*
01841          * If we are unmounting, set the unmounted flag and let new vops
01842          * unblock.  zfs_inactive will have the unmounted behavior, and all
01843          * other vops will fail with EIO.
01844          */
01845         if (unmounting) {
01846                 zfsvfs->z_unmounted = B_TRUE;
01847                 rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
01848                 rw_exit(&zfsvfs->z_teardown_inactive_lock);
01849 
01850 #ifdef __FreeBSD__
01851                 /*
01852                  * Some znodes might not be fully reclaimed, wait for them.
01853                  */
01854                 mutex_enter(&zfsvfs->z_znodes_lock);
01855                 while (list_head(&zfsvfs->z_all_znodes) != NULL) {
01856                         msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0,
01857                             "zteardown", 0);
01858                 }
01859                 mutex_exit(&zfsvfs->z_znodes_lock);
01860 #endif
01861         }
01862 
01863         /*
01864          * z_os will be NULL if there was an error in attempting to reopen
01865          * zfsvfs, so just return as the properties had already been
01866          * unregistered and cached data had been evicted before.
01867          */
01868         if (zfsvfs->z_os == NULL)
01869                 return (0);
01870 
01871         /*
01872          * Unregister properties.
01873          */
01874         zfs_unregister_callbacks(zfsvfs);
01875 
01876         /*
01877          * Evict cached data
01878          */
01879         if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
01880             !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
01881                 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
01882         (void) dmu_objset_evict_dbufs(zfsvfs->z_os);
01883 
01884         return (0);
01885 }
01886 
01887 /*ARGSUSED*/
01888 static int
01889 zfs_umount(vfs_t *vfsp, int fflag)
01890 {
01891         kthread_t *td = curthread;
01892         zfsvfs_t *zfsvfs = vfsp->vfs_data;
01893         objset_t *os;
01894         cred_t *cr = td->td_ucred;
01895         int ret;
01896 
01897         ret = secpolicy_fs_unmount(cr, vfsp);
01898         if (ret) {
01899                 if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
01900                     ZFS_DELEG_PERM_MOUNT, cr))
01901                         return (ret);
01902         }
01903 
01904         /*
01905          * We purge the parent filesystem's vfsp as the parent filesystem
01906          * and all of its snapshots have their vnode's v_vfsp set to the
01907          * parent's filesystem's vfsp.  Note, 'z_parent' is self
01908          * referential for non-snapshots.
01909          */
01910         (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
01911 
01912         /*
01913          * Unmount any snapshots mounted under .zfs before unmounting the
01914          * dataset itself.
01915          */
01916         if (zfsvfs->z_ctldir != NULL) {
01917                 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
01918                         return (ret);
01919                 ret = vflush(vfsp, 0, 0, td);
01920                 ASSERT(ret == EBUSY);
01921                 if (!(fflag & MS_FORCE)) {
01922                         if (zfsvfs->z_ctldir->v_count > 1)
01923                                 return (EBUSY);
01924                         ASSERT(zfsvfs->z_ctldir->v_count == 1);
01925                 }
01926                 zfsctl_destroy(zfsvfs);
01927                 ASSERT(zfsvfs->z_ctldir == NULL);
01928         }
01929 
01930         if (fflag & MS_FORCE) {
01931                 /*
01932                  * Mark file system as unmounted before calling
01933                  * vflush(FORCECLOSE). This way we ensure no future vnops
01934                  * will be called and risk operating on DOOMED vnodes.
01935                  */
01936                 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
01937                 zfsvfs->z_unmounted = B_TRUE;
01938                 rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
01939         }
01940 
01941         /*
01942          * Flush all the files.
01943          */
01944         ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
01945         if (ret != 0) {
01946                 if (!zfsvfs->z_issnap) {
01947                         zfsctl_create(zfsvfs);
01948                         ASSERT(zfsvfs->z_ctldir != NULL);
01949                 }
01950                 return (ret);
01951         }
01952 
01953         if (!(fflag & MS_FORCE)) {
01954                 /*
01955                  * Check the number of active vnodes in the file system.
01956                  * Our count is maintained in the vfs structure, but the
01957                  * number is off by 1 to indicate a hold on the vfs
01958                  * structure itself.
01959                  *
01960                  * The '.zfs' directory maintains a reference of its
01961                  * own, and any active references underneath are
01962                  * reflected in the vnode count.
01963                  */
01964                 if (zfsvfs->z_ctldir == NULL) {
01965                         if (vfsp->vfs_count > 1)
01966                                 return (EBUSY);
01967                 } else {
01968                         if (vfsp->vfs_count > 2 ||
01969                             zfsvfs->z_ctldir->v_count > 1)
01970                                 return (EBUSY);
01971                 }
01972         }
01973 
01974         VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
01975         os = zfsvfs->z_os;
01976 
01977         /*
01978          * z_os will be NULL if there was an error in
01979          * attempting to reopen zfsvfs.
01980          */
01981         if (os != NULL) {
01982                 /*
01983                  * Unset the objset user_ptr.
01984                  */
01985                 mutex_enter(&os->os_user_ptr_lock);
01986                 dmu_objset_set_user(os, NULL);
01987                 mutex_exit(&os->os_user_ptr_lock);
01988 
01989                 /*
01990                  * Finally release the objset
01991                  */
01992                 dmu_objset_disown(os, zfsvfs);
01993         }
01994 
01995         /*
01996          * We can now safely destroy the '.zfs' directory node.
01997          */
01998         if (zfsvfs->z_ctldir != NULL)
01999                 zfsctl_destroy(zfsvfs);
02000         if (zfsvfs->z_issnap) {
02001                 vnode_t *svp = vfsp->mnt_vnodecovered;
02002 
02003                 if (svp->v_count >= 2)
02004                         VN_RELE(svp);
02005         }
02006         zfs_freevfs(vfsp);
02007 
02008         return (0);
02009 }
02010 
02011 static int
02012 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
02013 {
02014         zfsvfs_t        *zfsvfs = vfsp->vfs_data;
02015         znode_t         *zp;
02016         int             err;
02017 
02018         /*
02019          * zfs_zget() can't operate on virtual entries like .zfs/ or
02020          * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
02021          * This will make NFS to switch to LOOKUP instead of using VGET.
02022          */
02023         if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR)
02024                 return (EOPNOTSUPP);
02025 
02026         ZFS_ENTER(zfsvfs);
02027         err = zfs_zget(zfsvfs, ino, &zp);
02028         if (err == 0 && zp->z_unlinked) {
02029                 VN_RELE(ZTOV(zp));
02030                 err = EINVAL;
02031         }
02032         if (err == 0)
02033                 *vpp = ZTOV(zp);
02034         ZFS_EXIT(zfsvfs);
02035         if (err == 0)
02036                 err = zfs_vnode_lock(*vpp, flags);
02037         if (err != 0)
02038                 *vpp = NULL;
02039         return (err);
02040 }
02041 
02042 static int
02043 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
02044     struct ucred **credanonp, int *numsecflavors, int **secflavors)
02045 {
02046         zfsvfs_t *zfsvfs = vfsp->vfs_data;
02047 
02048         /*
02049          * If this is regular file system vfsp is the same as
02050          * zfsvfs->z_parent->z_vfs, but if it is snapshot,
02051          * zfsvfs->z_parent->z_vfs represents parent file system
02052          * which we have to use here, because only this file system
02053          * has mnt_export configured.
02054          */
02055         return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
02056             credanonp, numsecflavors, secflavors));
02057 }
02058 
02059 CTASSERT(SHORT_FID_LEN <= sizeof(struct fid));
02060 CTASSERT(LONG_FID_LEN <= sizeof(struct fid));
02061 
02062 static int
02063 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
02064 {
02065         zfsvfs_t        *zfsvfs = vfsp->vfs_data;
02066         znode_t         *zp;
02067         uint64_t        object = 0;
02068         uint64_t        fid_gen = 0;
02069         uint64_t        gen_mask;
02070         uint64_t        zp_gen;
02071         int             i, err;
02072 
02073         *vpp = NULL;
02074 
02075         ZFS_ENTER(zfsvfs);
02076 
02077         /*
02078          * On FreeBSD we can get snapshot's mount point or its parent file
02079          * system mount point depending if snapshot is already mounted or not.
02080          */
02081         if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
02082                 zfid_long_t     *zlfid = (zfid_long_t *)fidp;
02083                 uint64_t        objsetid = 0;
02084                 uint64_t        setgen = 0;
02085 
02086                 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
02087                         objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
02088 
02089                 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
02090                         setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
02091 
02092                 ZFS_EXIT(zfsvfs);
02093 
02094                 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
02095                 if (err)
02096                         return (EINVAL);
02097                 ZFS_ENTER(zfsvfs);
02098         }
02099 
02100         if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
02101                 zfid_short_t    *zfid = (zfid_short_t *)fidp;
02102 
02103                 for (i = 0; i < sizeof (zfid->zf_object); i++)
02104                         object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
02105 
02106                 for (i = 0; i < sizeof (zfid->zf_gen); i++)
02107                         fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
02108         } else {
02109                 ZFS_EXIT(zfsvfs);
02110                 return (EINVAL);
02111         }
02112 
02113         /* A zero fid_gen means we are in the .zfs control directories */
02114         if (fid_gen == 0 &&
02115             (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
02116                 *vpp = zfsvfs->z_ctldir;
02117                 ASSERT(*vpp != NULL);
02118                 if (object == ZFSCTL_INO_SNAPDIR) {
02119                         VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
02120                             0, NULL, NULL, NULL, NULL, NULL) == 0);
02121                 } else {
02122                         VN_HOLD(*vpp);
02123                 }
02124                 ZFS_EXIT(zfsvfs);
02125                 err = zfs_vnode_lock(*vpp, flags | LK_RETRY);
02126                 if (err != 0)
02127                         *vpp = NULL;
02128                 return (err);
02129         }
02130 
02131         gen_mask = -1ULL >> (64 - 8 * i);
02132 
02133         dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
02134         if (err = zfs_zget(zfsvfs, object, &zp)) {
02135                 ZFS_EXIT(zfsvfs);
02136                 return (err);
02137         }
02138         (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
02139             sizeof (uint64_t));
02140         zp_gen = zp_gen & gen_mask;
02141         if (zp_gen == 0)
02142                 zp_gen = 1;
02143         if (zp->z_unlinked || zp_gen != fid_gen) {
02144                 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
02145                 VN_RELE(ZTOV(zp));
02146                 ZFS_EXIT(zfsvfs);
02147                 return (EINVAL);
02148         }
02149 
02150         *vpp = ZTOV(zp);
02151         ZFS_EXIT(zfsvfs);
02152         err = zfs_vnode_lock(*vpp, flags | LK_RETRY);
02153         if (err == 0)
02154                 vnode_create_vobject(*vpp, zp->z_size, curthread);
02155         else
02156                 *vpp = NULL;
02157         return (err);
02158 }
02159 
02166 int
02167 zfs_suspend_fs(zfsvfs_t *zfsvfs)
02168 {
02169         int error;
02170 
02171         if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
02172                 return (error);
02173         dmu_objset_disown(zfsvfs->z_os, zfsvfs);
02174 
02175         return (0);
02176 }
02177 
02181 int
02182 zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
02183 {
02184         int err;
02185 
02186         ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
02187         ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
02188 
02189         err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
02190             &zfsvfs->z_os);
02191         if (err) {
02192                 zfsvfs->z_os = NULL;
02193         } else {
02194                 znode_t *zp;
02195                 uint64_t sa_obj = 0;
02196 
02197                 /*
02198                  * Make sure version hasn't changed
02199                  */
02200 
02201                 err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION,
02202                     &zfsvfs->z_version);
02203 
02204                 if (err)
02205                         goto bail;
02206 
02207                 err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
02208                     ZFS_SA_ATTRS, 8, 1, &sa_obj);
02209 
02210                 if (err && zfsvfs->z_version >= ZPL_VERSION_SA)
02211                         goto bail;
02212 
02213                 if ((err = sa_setup(zfsvfs->z_os, sa_obj,
02214                     zfs_attr_table,  ZPL_END, &zfsvfs->z_attr_table)) != 0)
02215                         goto bail;
02216 
02217                 if (zfsvfs->z_version >= ZPL_VERSION_SA)
02218                         sa_register_update_callback(zfsvfs->z_os,
02219                             zfs_sa_upgrade);
02220 
02221                 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
02222 
02223                 zfs_set_fuid_feature(zfsvfs);
02224 
02225                 /*
02226                  * Attempt to re-establish all the active znodes with
02227                  * their dbufs.  If a zfs_rezget() fails, then we'll let
02228                  * any potential callers discover that via ZFS_ENTER_VERIFY_VP
02229                  * when they try to use their znode.
02230                  */
02231                 mutex_enter(&zfsvfs->z_znodes_lock);
02232                 for (zp = list_head(&zfsvfs->z_all_znodes); zp;
02233                     zp = list_next(&zfsvfs->z_all_znodes, zp)) {
02234                         (void) zfs_rezget(zp);
02235                 }
02236                 mutex_exit(&zfsvfs->z_znodes_lock);
02237         }
02238 
02239 bail:
02240         /* release the VOPs */
02241         rw_exit(&zfsvfs->z_teardown_inactive_lock);
02242         rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
02243 
02244         if (err) {
02245                 /*
02246                  * Since we couldn't reopen zfsvfs::z_os, or
02247                  * setup the sa framework force unmount this file system.
02248                  */
02249                 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
02250                         (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
02251         }
02252         return (err);
02253 }
02254 
02255 static void
02256 zfs_freevfs(vfs_t *vfsp)
02257 {
02258         zfsvfs_t *zfsvfs = vfsp->vfs_data;
02259 
02260 #ifdef sun
02261         /*
02262          * If this is a snapshot, we have an extra VFS_HOLD on our parent
02263          * from zfs_mount().  Release it here.  If we came through
02264          * zfs_mountroot() instead, we didn't grab an extra hold, so
02265          * skip the VFS_RELE for rootvfs.
02266          */
02267         if (zfsvfs->z_issnap && (vfsp != rootvfs))
02268                 VFS_RELE(zfsvfs->z_parent->z_vfs);
02269 #endif  /* sun */
02270 
02271         zfsvfs_free(zfsvfs);
02272 
02273         atomic_add_32(&zfs_active_fs_count, -1);
02274 }
02275 
02276 #ifdef __i386__
02277 static int desiredvnodes_backup;
02278 #endif
02279 
02280 static void
02281 zfs_vnodes_adjust(void)
02282 {
02283 #ifdef __i386__
02284         int newdesiredvnodes;
02285 
02286         desiredvnodes_backup = desiredvnodes;
02287 
02288         /*
02289          * We calculate newdesiredvnodes the same way it is done in
02290          * vntblinit(). If it is equal to desiredvnodes, it means that
02291          * it wasn't tuned by the administrator and we can tune it down.
02292          */
02293         newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 *
02294             vm_kmem_size / (5 * (sizeof(struct vm_object) +
02295             sizeof(struct vnode))));
02296         if (newdesiredvnodes == desiredvnodes)
02297                 desiredvnodes = (3 * newdesiredvnodes) / 4;
02298 #endif
02299 }
02300 
02301 static void
02302 zfs_vnodes_adjust_back(void)
02303 {
02304 
02305 #ifdef __i386__
02306         desiredvnodes = desiredvnodes_backup;
02307 #endif
02308 }
02309 
02310 void
02311 zfs_init(void)
02312 {
02313 
02314         printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
02315 
02316         /*
02317          * Initialize .zfs directory structures
02318          */
02319         zfsctl_init();
02320 
02321         /*
02322          * Initialize znode cache, vnode ops, etc...
02323          */
02324         zfs_znode_init();
02325 
02326         /*
02327          * Reduce number of vnodes. Originally number of vnodes is calculated
02328          * with UFS inode in mind. We reduce it here, because it's too big for
02329          * ZFS/i386.
02330          */
02331         zfs_vnodes_adjust();
02332 
02333         dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
02334 }
02335 
02336 void
02337 zfs_fini(void)
02338 {
02339         zfsctl_fini();
02340         zfs_znode_fini();
02341         zfs_vnodes_adjust_back();
02342 }
02343 
02344 int
02345 zfs_busy(void)
02346 {
02347         return (zfs_active_fs_count != 0);
02348 }
02349 
02350 int
02351 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
02352 {
02353         int error;
02354         objset_t *os = zfsvfs->z_os;
02355         dmu_tx_t *tx;
02356 
02357         if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
02358                 return (EINVAL);
02359 
02360         if (newvers < zfsvfs->z_version)
02361                 return (EINVAL);
02362 
02363         if (zfs_spa_version_map(newvers) >
02364             spa_version(dmu_objset_spa(zfsvfs->z_os)))
02365                 return (ENOTSUP);
02366 
02367         tx = dmu_tx_create(os);
02368         dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
02369         if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
02370                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
02371                     ZFS_SA_ATTRS);
02372                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
02373         }
02374         error = dmu_tx_assign(tx, TXG_WAIT);
02375         if (error) {
02376                 dmu_tx_abort(tx);
02377                 return (error);
02378         }
02379 
02380         error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
02381             8, 1, &newvers, tx);
02382 
02383         if (error) {
02384                 dmu_tx_commit(tx);
02385                 return (error);
02386         }
02387 
02388         if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
02389                 uint64_t sa_obj;
02390 
02391                 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
02392                     SPA_VERSION_SA);
02393                 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
02394                     DMU_OT_NONE, 0, tx);
02395 
02396                 error = zap_add(os, MASTER_NODE_OBJ,
02397                     ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
02398                 ASSERT0(error);
02399 
02400                 VERIFY(0 == sa_set_sa_object(os, sa_obj));
02401                 sa_register_update_callback(os, zfs_sa_upgrade);
02402         }
02403 
02404         spa_history_log_internal(LOG_DS_UPGRADE,
02405             dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
02406             zfsvfs->z_version, newvers, dmu_objset_id(os));
02407 
02408         dmu_tx_commit(tx);
02409 
02410         zfsvfs->z_version = newvers;
02411 
02412         zfs_set_fuid_feature(zfsvfs);
02413 
02414         return (0);
02415 }
02416 
02420 int
02421 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
02422 {
02423         const char *pname;
02424         int error = ENOENT;
02425 
02426         /*
02427          * Look up the file system's value for the property.  For the
02428          * version property, we look up a slightly different string.
02429          */
02430         if (prop == ZFS_PROP_VERSION)
02431                 pname = ZPL_VERSION_STR;
02432         else
02433                 pname = zfs_prop_to_name(prop);
02434 
02435         if (os != NULL)
02436                 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
02437 
02438         if (error == ENOENT) {
02439                 /* No value set, use the default value */
02440                 switch (prop) {
02441                 case ZFS_PROP_VERSION:
02442                         *value = ZPL_VERSION;
02443                         break;
02444                 case ZFS_PROP_NORMALIZE:
02445                 case ZFS_PROP_UTF8ONLY:
02446                         *value = 0;
02447                         break;
02448                 case ZFS_PROP_CASE:
02449                         *value = ZFS_CASE_SENSITIVE;
02450                         break;
02451                 default:
02452                         return (error);
02453                 }
02454                 error = 0;
02455         }
02456         return (error);
02457 }
02458 
02459 #ifdef _KERNEL
02460 void
02461 zfsvfs_update_fromname(const char *oldname, const char *newname)
02462 {
02463         char tmpbuf[MAXPATHLEN];
02464         struct mount *mp;
02465         char *fromname;
02466         size_t oldlen;
02467 
02468         oldlen = strlen(oldname);
02469 
02470         mtx_lock(&mountlist_mtx);
02471         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
02472                 fromname = mp->mnt_stat.f_mntfromname;
02473                 if (strcmp(fromname, oldname) == 0) {
02474                         (void)strlcpy(fromname, newname,
02475                             sizeof(mp->mnt_stat.f_mntfromname));
02476                         continue;
02477                 }
02478                 if (strncmp(fromname, oldname, oldlen) == 0 &&
02479                     (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
02480                         (void)snprintf(tmpbuf, sizeof(tmpbuf), "%s%s",
02481                             newname, fromname + oldlen);
02482                         (void)strlcpy(fromname, tmpbuf,
02483                             sizeof(mp->mnt_stat.f_mntfromname));
02484                         continue;
02485                 }
02486         }
02487         mtx_unlock(&mountlist_mtx);
02488 }
02489 #endif