FreeBSD ZFS
The Zettabyte File System

zfs_znode.c

Go to the documentation of this file.
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 /*
00022  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
00023  * Copyright (c) 2012 by Delphix. All rights reserved.
00024  */
00025 
00026 /* Portions Copyright 2007 Jeremy Teo */
00027 /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
00028 
00029 #ifdef _KERNEL
00030 #include <sys/types.h>
00031 #include <sys/param.h>
00032 #include <sys/time.h>
00033 #include <sys/systm.h>
00034 #include <sys/sysmacros.h>
00035 #include <sys/resource.h>
00036 #include <sys/mntent.h>
00037 #include <sys/u8_textprep.h>
00038 #include <sys/dsl_dataset.h>
00039 #include <sys/vfs.h>
00040 #include <sys/vnode.h>
00041 #include <sys/file.h>
00042 #include <sys/kmem.h>
00043 #include <sys/errno.h>
00044 #include <sys/unistd.h>
00045 #include <sys/atomic.h>
00046 #include <sys/zfs_dir.h>
00047 #include <sys/zfs_acl.h>
00048 #include <sys/zfs_ioctl.h>
00049 #include <sys/zfs_rlock.h>
00050 #include <sys/zfs_fuid.h>
00051 #include <sys/dnode.h>
00052 #include <sys/fs/zfs.h>
00053 #include <sys/kidmap.h>
00054 #endif /* _KERNEL */
00055 
00056 #include <sys/dmu.h>
00057 #include <sys/refcount.h>
00058 #include <sys/stat.h>
00059 #include <sys/zap.h>
00060 #include <sys/zfs_znode.h>
00061 #include <sys/sa.h>
00062 #include <sys/zfs_sa.h>
00063 #include <sys/zfs_stat.h>
00064 #include <sys/refcount.h>
00065 
00066 #include "zfs_prop.h"
00067 #include "zfs_comutil.h"
00068 
00069 /* Used by fstat(1). */
00070 SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, 0, sizeof(znode_t),
00071     "sizeof(znode_t)");
00072 
00077 #ifdef  DEBUG
00078 #define ZNODE_STATS
00079 #endif  /* DEBUG */
00080 
00081 #ifdef  ZNODE_STATS
00082 #define ZNODE_STAT_ADD(stat)                    ((stat)++)
00083 #else
00084 #define ZNODE_STAT_ADD(stat)                    /* nothing */
00085 #endif  /* ZNODE_STATS */
00086 
00087 /*
00088  * Functions needed for userland (ie: libzpool) are not put under
00089  * #ifdef_KERNEL; the rest of the functions have dependencies
00090  * (such as VFS logic) that will not compile easily in userland.
00091  */
00092 #ifdef _KERNEL
00093 
00097 krwlock_t zfsvfs_lock;
00098 
00099 static kmem_cache_t *znode_cache = NULL;
00100 
00101 /*ARGSUSED*/
00102 static void
00103 znode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
00104 {
00105         /*
00106          * We should never drop all dbuf refs without first clearing
00107          * the eviction callback.
00108          */
00109         panic("evicting znode %p\n", user_ptr);
00110 }
00111 
00112 extern struct vop_vector zfs_vnodeops;
00113 extern struct vop_vector zfs_fifoops;
00114 extern struct vop_vector zfs_shareops;
00115 
00116 /*
00117  * \note  We cannot use this function as a cache constructor, because
00118  *        there is one global cache for all file systems and we need
00119  *        to pass vfsp here, which is not possible, because argument
00120  *        'cdrarg' is defined at kmem_cache_create() time.
00121  */
00122 /*ARGSUSED*/
00123 static int
00124 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
00125 {
00126         znode_t *zp = buf;
00127         vnode_t *vp;
00128         vfs_t *vfsp = arg;
00129         int error;
00130 
00131         POINTER_INVALIDATE(&zp->z_zfsvfs);
00132         ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
00133 
00134         if (vfsp != NULL) {
00135                 error = getnewvnode("zfs", vfsp, &zfs_vnodeops, &vp);
00136                 if (error != 0 && (kmflags & KM_NOSLEEP))
00137                         return (-1);
00138                 ASSERT(error == 0);
00139                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
00140                 zp->z_vnode = vp;
00141                 vp->v_data = (caddr_t)zp;
00142                 VN_LOCK_AREC(vp);
00143                 VN_LOCK_ASHARE(vp);
00144         } else {
00145                 zp->z_vnode = NULL;
00146         }
00147 
00148         list_link_init(&zp->z_link_node);
00149 
00150         mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
00151         rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
00152         rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
00153         mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
00154 
00155         mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
00156         avl_create(&zp->z_range_avl, zfs_range_compare,
00157             sizeof (rl_t), offsetof(rl_t, r_node));
00158 
00159         zp->z_dirlocks = NULL;
00160         zp->z_acl_cached = NULL;
00161         zp->z_moved = 0;
00162         return (0);
00163 }
00164 
00165 /*ARGSUSED*/
00166 static void
00167 zfs_znode_cache_destructor(void *buf, void *arg)
00168 {
00169         znode_t *zp = buf;
00170 
00171         ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
00172         ASSERT(ZTOV(zp) == NULL);
00173         vn_free(ZTOV(zp));
00174         ASSERT(!list_link_active(&zp->z_link_node));
00175         mutex_destroy(&zp->z_lock);
00176         rw_destroy(&zp->z_parent_lock);
00177         rw_destroy(&zp->z_name_lock);
00178         mutex_destroy(&zp->z_acl_lock);
00179         avl_destroy(&zp->z_range_avl);
00180         mutex_destroy(&zp->z_range_lock);
00181 
00182         ASSERT(zp->z_dirlocks == NULL);
00183         ASSERT(zp->z_acl_cached == NULL);
00184 }
00185 
00186 #ifdef  ZNODE_STATS
00187 static struct {
00188         uint64_t zms_zfsvfs_invalid;
00189         uint64_t zms_zfsvfs_recheck1;
00190         uint64_t zms_zfsvfs_unmounted;
00191         uint64_t zms_zfsvfs_recheck2;
00192         uint64_t zms_obj_held;
00193         uint64_t zms_vnode_locked;
00194         uint64_t zms_not_only_dnlc;
00195 } znode_move_stats;
00196 #endif  /* ZNODE_STATS */
00197 
00198 #ifdef sun
00199 static void
00200 zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
00201 {
00202         vnode_t *vp;
00203 
00204         /* Copy fields. */
00205         nzp->z_zfsvfs = ozp->z_zfsvfs;
00206 
00207         /* Swap vnodes. */
00208         vp = nzp->z_vnode;
00209         nzp->z_vnode = ozp->z_vnode;
00210         ozp->z_vnode = vp; /* let destructor free the overwritten vnode */
00211         ZTOV(ozp)->v_data = ozp;
00212         ZTOV(nzp)->v_data = nzp;
00213 
00214         nzp->z_id = ozp->z_id;
00215         ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
00216         ASSERT(avl_numnodes(&ozp->z_range_avl) == 0);
00217         nzp->z_unlinked = ozp->z_unlinked;
00218         nzp->z_atime_dirty = ozp->z_atime_dirty;
00219         nzp->z_zn_prefetch = ozp->z_zn_prefetch;
00220         nzp->z_blksz = ozp->z_blksz;
00221         nzp->z_seq = ozp->z_seq;
00222         nzp->z_mapcnt = ozp->z_mapcnt;
00223         nzp->z_gen = ozp->z_gen;
00224         nzp->z_sync_cnt = ozp->z_sync_cnt;
00225         nzp->z_is_sa = ozp->z_is_sa;
00226         nzp->z_sa_hdl = ozp->z_sa_hdl;
00227         bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2);
00228         nzp->z_links = ozp->z_links;
00229         nzp->z_size = ozp->z_size;
00230         nzp->z_pflags = ozp->z_pflags;
00231         nzp->z_uid = ozp->z_uid;
00232         nzp->z_gid = ozp->z_gid;
00233         nzp->z_mode = ozp->z_mode;
00234 
00235         /*
00236          * Since this is just an idle znode and kmem is already dealing with
00237          * memory pressure, release any cached ACL.
00238          */
00239         if (ozp->z_acl_cached) {
00240                 zfs_acl_free(ozp->z_acl_cached);
00241                 ozp->z_acl_cached = NULL;
00242         }
00243 
00244         sa_set_userp(nzp->z_sa_hdl, nzp);
00245 
00246         /*
00247          * Invalidate the original znode by clearing fields that provide a
00248          * pointer back to the znode. Set the low bit of the vfs pointer to
00249          * ensure that zfs_znode_move() recognizes the znode as invalid in any
00250          * subsequent callback.
00251          */
00252         ozp->z_sa_hdl = NULL;
00253         POINTER_INVALIDATE(&ozp->z_zfsvfs);
00254 
00255         /*
00256          * Mark the znode.
00257          */
00258         nzp->z_moved = 1;
00259         ozp->z_moved = (uint8_t)-1;
00260 }
00261 
00262 /*ARGSUSED*/
00263 static kmem_cbrc_t
00264 zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
00265 {
00266         znode_t *ozp = buf, *nzp = newbuf;
00267         zfsvfs_t *zfsvfs;
00268         vnode_t *vp;
00269 
00270         /*
00271          * The znode is on the file system's list of known znodes if the vfs
00272          * pointer is valid. We set the low bit of the vfs pointer when freeing
00273          * the znode to invalidate it, and the memory patterns written by kmem
00274          * (baddcafe and deadbeef) set at least one of the two low bits. A newly
00275          * created znode sets the vfs pointer last of all to indicate that the
00276          * znode is known and in a valid state to be moved by this function.
00277          */
00278         zfsvfs = ozp->z_zfsvfs;
00279         if (!POINTER_IS_VALID(zfsvfs)) {
00280                 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
00281                 return (KMEM_CBRC_DONT_KNOW);
00282         }
00283 
00284         /*
00285          * Close a small window in which it's possible that the filesystem could
00286          * be unmounted and freed, and zfsvfs, though valid in the previous
00287          * statement, could point to unrelated memory by the time we try to
00288          * prevent the filesystem from being unmounted.
00289          */
00290         rw_enter(&zfsvfs_lock, RW_WRITER);
00291         if (zfsvfs != ozp->z_zfsvfs) {
00292                 rw_exit(&zfsvfs_lock);
00293                 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
00294                 return (KMEM_CBRC_DONT_KNOW);
00295         }
00296 
00297         /*
00298          * If the znode is still valid, then so is the file system. We know that
00299          * no valid file system can be freed while we hold zfsvfs_lock, so we
00300          * can safely ensure that the filesystem is not and will not be
00301          * unmounted. The next statement is equivalent to ZFS_ENTER().
00302          */
00303         rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
00304         if (zfsvfs->z_unmounted) {
00305                 ZFS_EXIT(zfsvfs);
00306                 rw_exit(&zfsvfs_lock);
00307                 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
00308                 return (KMEM_CBRC_DONT_KNOW);
00309         }
00310         rw_exit(&zfsvfs_lock);
00311 
00312         mutex_enter(&zfsvfs->z_znodes_lock);
00313         /*
00314          * Recheck the vfs pointer in case the znode was removed just before
00315          * acquiring the lock.
00316          */
00317         if (zfsvfs != ozp->z_zfsvfs) {
00318                 mutex_exit(&zfsvfs->z_znodes_lock);
00319                 ZFS_EXIT(zfsvfs);
00320                 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
00321                 return (KMEM_CBRC_DONT_KNOW);
00322         }
00323 
00324         /*
00325          * At this point we know that as long as we hold z_znodes_lock, the
00326          * znode cannot be freed and fields within the znode can be safely
00327          * accessed. Now, prevent a race with zfs_zget().
00328          */
00329         if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
00330                 mutex_exit(&zfsvfs->z_znodes_lock);
00331                 ZFS_EXIT(zfsvfs);
00332                 ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
00333                 return (KMEM_CBRC_LATER);
00334         }
00335 
00336         vp = ZTOV(ozp);
00337         if (mutex_tryenter(&vp->v_lock) == 0) {
00338                 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
00339                 mutex_exit(&zfsvfs->z_znodes_lock);
00340                 ZFS_EXIT(zfsvfs);
00341                 ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
00342                 return (KMEM_CBRC_LATER);
00343         }
00344 
00345         /* Only move znodes that are referenced _only_ by the DNLC. */
00346         if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
00347                 mutex_exit(&vp->v_lock);
00348                 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
00349                 mutex_exit(&zfsvfs->z_znodes_lock);
00350                 ZFS_EXIT(zfsvfs);
00351                 ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
00352                 return (KMEM_CBRC_LATER);
00353         }
00354 
00355         /*
00356          * The znode is known and in a valid state to move. We're holding the
00357          * locks needed to execute the critical section.
00358          */
00359         zfs_znode_move_impl(ozp, nzp);
00360         mutex_exit(&vp->v_lock);
00361         ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
00362 
00363         list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
00364         mutex_exit(&zfsvfs->z_znodes_lock);
00365         ZFS_EXIT(zfsvfs);
00366 
00367         return (KMEM_CBRC_YES);
00368 }
00369 #endif /* sun */
00370 
00371 void
00372 zfs_znode_init(void)
00373 {
00374         /*
00375          * Initialize zcache
00376          */
00377         rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
00378         ASSERT(znode_cache == NULL);
00379         znode_cache = kmem_cache_create("zfs_znode_cache",
00380             sizeof (znode_t), 0, /* zfs_znode_cache_constructor */ NULL,
00381             zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
00382         kmem_cache_set_move(znode_cache, zfs_znode_move);
00383 }
00384 
00385 void
00386 zfs_znode_fini(void)
00387 {
00388 #ifdef sun
00389         /*
00390          * Cleanup vfs & vnode ops
00391          */
00392         zfs_remove_op_tables();
00393 #endif  /* sun */
00394 
00395         /*
00396          * Cleanup zcache
00397          */
00398         if (znode_cache)
00399                 kmem_cache_destroy(znode_cache);
00400         znode_cache = NULL;
00401         rw_destroy(&zfsvfs_lock);
00402 }
00403 
00404 #ifdef sun
00405 struct vnodeops *zfs_dvnodeops;
00406 struct vnodeops *zfs_fvnodeops;
00407 struct vnodeops *zfs_symvnodeops;
00408 struct vnodeops *zfs_xdvnodeops;
00409 struct vnodeops *zfs_evnodeops;
00410 struct vnodeops *zfs_sharevnodeops;
00411 
00412 void
00413 zfs_remove_op_tables()
00414 {
00415         /*
00416          * Remove vfs ops
00417          */
00418         ASSERT(zfsfstype);
00419         (void) vfs_freevfsops_by_type(zfsfstype);
00420         zfsfstype = 0;
00421 
00422         /*
00423          * Remove vnode ops
00424          */
00425         if (zfs_dvnodeops)
00426                 vn_freevnodeops(zfs_dvnodeops);
00427         if (zfs_fvnodeops)
00428                 vn_freevnodeops(zfs_fvnodeops);
00429         if (zfs_symvnodeops)
00430                 vn_freevnodeops(zfs_symvnodeops);
00431         if (zfs_xdvnodeops)
00432                 vn_freevnodeops(zfs_xdvnodeops);
00433         if (zfs_evnodeops)
00434                 vn_freevnodeops(zfs_evnodeops);
00435         if (zfs_sharevnodeops)
00436                 vn_freevnodeops(zfs_sharevnodeops);
00437 
00438         zfs_dvnodeops = NULL;
00439         zfs_fvnodeops = NULL;
00440         zfs_symvnodeops = NULL;
00441         zfs_xdvnodeops = NULL;
00442         zfs_evnodeops = NULL;
00443         zfs_sharevnodeops = NULL;
00444 }
00445 
00446 extern const fs_operation_def_t zfs_dvnodeops_template[];
00447 extern const fs_operation_def_t zfs_fvnodeops_template[];
00448 extern const fs_operation_def_t zfs_xdvnodeops_template[];
00449 extern const fs_operation_def_t zfs_symvnodeops_template[];
00450 extern const fs_operation_def_t zfs_evnodeops_template[];
00451 extern const fs_operation_def_t zfs_sharevnodeops_template[];
00452 
00453 int
00454 zfs_create_op_tables()
00455 {
00456         int error;
00457 
00458         /*
00459          * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
00460          * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
00461          * In this case we just return as the ops vectors are already set up.
00462          */
00463         if (zfs_dvnodeops)
00464                 return (0);
00465 
00466         error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
00467             &zfs_dvnodeops);
00468         if (error)
00469                 return (error);
00470 
00471         error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
00472             &zfs_fvnodeops);
00473         if (error)
00474                 return (error);
00475 
00476         error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
00477             &zfs_symvnodeops);
00478         if (error)
00479                 return (error);
00480 
00481         error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
00482             &zfs_xdvnodeops);
00483         if (error)
00484                 return (error);
00485 
00486         error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
00487             &zfs_evnodeops);
00488         if (error)
00489                 return (error);
00490 
00491         error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
00492             &zfs_sharevnodeops);
00493 
00494         return (error);
00495 }
00496 #endif  /* sun */
00497 
00498 int
00499 zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
00500 {
00501         zfs_acl_ids_t acl_ids;
00502         vattr_t vattr;
00503         znode_t *sharezp;
00504         vnode_t *vp, vnode;
00505         znode_t *zp;
00506         int error;
00507 
00508         vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
00509         vattr.va_type = VDIR;
00510         vattr.va_mode = S_IFDIR|0555;
00511         vattr.va_uid = crgetuid(kcred);
00512         vattr.va_gid = crgetgid(kcred);
00513 
00514         sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
00515         zfs_znode_cache_constructor(sharezp, zfsvfs->z_parent->z_vfs, 0);
00516         ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs));
00517         sharezp->z_moved = 0;
00518         sharezp->z_unlinked = 0;
00519         sharezp->z_atime_dirty = 0;
00520         sharezp->z_zfsvfs = zfsvfs;
00521         sharezp->z_is_sa = zfsvfs->z_use_sa;
00522 
00523         sharezp->z_vnode = &vnode;
00524         vnode.v_data = sharezp;
00525 
00526         vp = ZTOV(sharezp);
00527         vp->v_type = VDIR;
00528 
00529         VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
00530             kcred, NULL, &acl_ids));
00531         zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
00532         ASSERT3P(zp, ==, sharezp);
00533         POINTER_INVALIDATE(&sharezp->z_zfsvfs);
00534         error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
00535             ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
00536         zfsvfs->z_shares_dir = sharezp->z_id;
00537 
00538         zfs_acl_ids_free(&acl_ids);
00539         ZTOV(sharezp)->v_data = NULL;
00540         ZTOV(sharezp)->v_count = 0;
00541         ZTOV(sharezp)->v_holdcnt = 0;
00542         zp->z_vnode = NULL;
00543         sa_handle_destroy(sharezp->z_sa_hdl);
00544         sharezp->z_vnode = NULL;
00545         kmem_cache_free(znode_cache, sharezp);
00546 
00547         return (error);
00548 }
00549 
00550 /*
00551  * define a couple of values we need available
00552  * for both 64 and 32 bit environments.
00553  */
00554 #ifndef NBITSMINOR64
00555 #define NBITSMINOR64    32
00556 #endif
00557 #ifndef MAXMAJ64
00558 #define MAXMAJ64        0xffffffffUL
00559 #endif
00560 #ifndef MAXMIN64
00561 #define MAXMIN64        0xffffffffUL
00562 #endif
00563 
00572 static uint64_t
00573 zfs_expldev(dev_t dev)
00574 {
00575         return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev));
00576 }
00585 dev_t
00586 zfs_cmpldev(uint64_t dev)
00587 {
00588         return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64)));
00589 }
00590 
00591 static void
00592 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
00593     dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
00594 {
00595         ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
00596         ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
00597 
00598         mutex_enter(&zp->z_lock);
00599 
00600         ASSERT(zp->z_sa_hdl == NULL);
00601         ASSERT(zp->z_acl_cached == NULL);
00602         if (sa_hdl == NULL) {
00603                 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
00604                     SA_HDL_SHARED, &zp->z_sa_hdl));
00605         } else {
00606                 zp->z_sa_hdl = sa_hdl;
00607                 sa_set_userp(sa_hdl, zp);
00608         }
00609 
00610         zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
00611 
00612         /*
00613          * Slap on VROOT if we are the root znode
00614          */
00615         if (zp->z_id == zfsvfs->z_root)
00616                 ZTOV(zp)->v_flag |= VROOT;
00617 
00618         mutex_exit(&zp->z_lock);
00619         vn_exists(ZTOV(zp));
00620 }
00621 
00622 void
00623 zfs_znode_dmu_fini(znode_t *zp)
00624 {
00625         ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
00626             zp->z_unlinked ||
00627             RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
00628 
00629         sa_handle_destroy(zp->z_sa_hdl);
00630         zp->z_sa_hdl = NULL;
00631 }
00632 
00633 static void
00634 zfs_vnode_forget(vnode_t *vp)
00635 {
00636 
00637         /* copied from insmntque_stddtr */
00638         vp->v_data = NULL;
00639         vp->v_op = &dead_vnodeops;
00640         vgone(vp);
00641         vput(vp);
00642 }
00643 
00651 static znode_t *
00652 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
00653     dmu_object_type_t obj_type, sa_handle_t *hdl)
00654 {
00655         znode_t *zp;
00656         vnode_t *vp;
00657         uint64_t mode;
00658         uint64_t parent;
00659         sa_bulk_attr_t bulk[9];
00660         int count = 0;
00661 
00662         zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
00663         zfs_znode_cache_constructor(zp, zfsvfs->z_parent->z_vfs, 0);
00664 
00665         ASSERT(zp->z_dirlocks == NULL);
00666         ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
00667         zp->z_moved = 0;
00668 
00669         /*
00670          * Defer setting z_zfsvfs until the znode is ready to be a candidate for
00671          * the zfs_znode_move() callback.
00672          */
00673         zp->z_sa_hdl = NULL;
00674         zp->z_unlinked = 0;
00675         zp->z_atime_dirty = 0;
00676         zp->z_mapcnt = 0;
00677         zp->z_id = db->db_object;
00678         zp->z_blksz = blksz;
00679         zp->z_seq = 0x7A4653;
00680         zp->z_sync_cnt = 0;
00681 
00682         vp = ZTOV(zp);
00683 
00684         zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
00685 
00686         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
00687         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
00688         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
00689             &zp->z_size, 8);
00690         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
00691             &zp->z_links, 8);
00692         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
00693             &zp->z_pflags, 8);
00694         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
00695         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
00696             &zp->z_atime, 16);
00697         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
00698             &zp->z_uid, 8);
00699         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
00700             &zp->z_gid, 8);
00701 
00702         if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) {
00703                 if (hdl == NULL)
00704                         sa_handle_destroy(zp->z_sa_hdl);
00705                 zfs_vnode_forget(vp);
00706                 zp->z_vnode = NULL;
00707                 kmem_cache_free(znode_cache, zp);
00708                 return (NULL);
00709         }
00710 
00711         zp->z_mode = mode;
00712 
00713         vp->v_type = IFTOVT((mode_t)mode);
00714 
00715         switch (vp->v_type) {
00716         case VDIR:
00717                 zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
00718                 break;
00719 #ifdef sun
00720         case VBLK:
00721         case VCHR:
00722                 {
00723                         uint64_t rdev;
00724                         VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
00725                             &rdev, sizeof (rdev)) == 0);
00726 
00727                         vp->v_rdev = zfs_cmpldev(rdev);
00728                 }
00729                 break;
00730 #endif  /* sun */
00731         case VFIFO:
00732 #ifdef sun
00733         case VSOCK:
00734         case VDOOR:
00735 #endif  /* sun */
00736                 vp->v_op = &zfs_fifoops;
00737                 break;
00738         case VREG:
00739                 if (parent == zfsvfs->z_shares_dir) {
00740                         ASSERT(zp->z_uid == 0 && zp->z_gid == 0);
00741                         vp->v_op = &zfs_shareops;
00742                 }
00743                 break;
00744 #ifdef sun
00745         case VLNK:
00746                 vn_setops(vp, zfs_symvnodeops);
00747                 break;
00748         default:
00749                 vn_setops(vp, zfs_evnodeops);
00750                 break;
00751 #endif  /* sun */
00752         }
00753         if (vp->v_type != VFIFO)
00754                 VN_LOCK_ASHARE(vp);
00755 
00756         mutex_enter(&zfsvfs->z_znodes_lock);
00757         list_insert_tail(&zfsvfs->z_all_znodes, zp);
00758         membar_producer();
00759         /*
00760          * Everything else must be valid before assigning z_zfsvfs makes the
00761          * znode eligible for zfs_znode_move().
00762          */
00763         zp->z_zfsvfs = zfsvfs;
00764         mutex_exit(&zfsvfs->z_znodes_lock);
00765 
00766         VFS_HOLD(zfsvfs->z_vfs);
00767         return (zp);
00768 }
00769 
00770 static uint64_t empty_xattr;
00771 static uint64_t pad[4];
00772 static zfs_acl_phys_t acl_phys;
00785 void
00786 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
00787     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
00788 {
00789         uint64_t        crtime[2], atime[2], mtime[2], ctime[2];
00790         uint64_t        mode, size, links, parent, pflags;
00791         uint64_t        dzp_pflags = 0;
00792         uint64_t        rdev = 0;
00793         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
00794         dmu_buf_t       *db;
00795         timestruc_t     now;
00796         uint64_t        gen, obj;
00797         int             err;
00798         int             bonuslen;       /* Length of bonus buffer */
00799         sa_handle_t     *sa_hdl;
00800         dmu_object_type_t obj_type;
00801         sa_bulk_attr_t  sa_attrs[ZPL_END];
00802         int             cnt = 0;
00803         zfs_acl_locator_cb_t locate = { 0 };
00804 
00805         ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
00806 
00807         if (zfsvfs->z_replay) {
00808                 obj = vap->va_nodeid;
00809                 now = vap->va_ctime;            /* see zfs_replay_create() */
00810                 gen = vap->va_nblocks;          /* ditto */
00811         } else {
00812                 obj = 0;
00813                 gethrestime(&now);
00814                 gen = dmu_tx_get_txg(tx);
00815         }
00816 
00817         obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
00818         bonuslen = (obj_type == DMU_OT_SA) ?
00819             DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;
00820 
00821         /*
00822          * Create a new DMU object.
00823          */
00824         /*
00825          * There's currently no mechanism for pre-reading the blocks that will
00826          * be needed to allocate a new object, so we accept the small chance
00827          * that there will be an i/o error and we will fail one of the
00828          * assertions below.
00829          */
00830         if (vap->va_type == VDIR) {
00831                 if (zfsvfs->z_replay) {
00832                         err = zap_create_claim_norm(zfsvfs->z_os, obj,
00833                             zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
00834                             obj_type, bonuslen, tx);
00835                         ASSERT0(err);
00836                 } else {
00837                         obj = zap_create_norm(zfsvfs->z_os,
00838                             zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
00839                             obj_type, bonuslen, tx);
00840                 }
00841         } else {
00842                 if (zfsvfs->z_replay) {
00843                         err = dmu_object_claim(zfsvfs->z_os, obj,
00844                             DMU_OT_PLAIN_FILE_CONTENTS, 0,
00845                             obj_type, bonuslen, tx);
00846                         ASSERT0(err);
00847                 } else {
00848                         obj = dmu_object_alloc(zfsvfs->z_os,
00849                             DMU_OT_PLAIN_FILE_CONTENTS, 0,
00850                             obj_type, bonuslen, tx);
00851                 }
00852         }
00853 
00854         getnewvnode_reserve(1);
00855         ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
00856         VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
00857 
00858         /*
00859          * If this is the root, fix up the half-initialized parent pointer
00860          * to reference the just-allocated physical data area.
00861          */
00862         if (flag & IS_ROOT_NODE) {
00863                 dzp->z_id = obj;
00864         } else {
00865                 dzp_pflags = dzp->z_pflags;
00866         }
00867 
00868         /*
00869          * If parent is an xattr, so am I.
00870          */
00871         if (dzp_pflags & ZFS_XATTR) {
00872                 flag |= IS_XATTR;
00873         }
00874 
00875         if (zfsvfs->z_use_fuids)
00876                 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
00877         else
00878                 pflags = 0;
00879 
00880         if (vap->va_type == VDIR) {
00881                 size = 2;               /* contents ("." and "..") */
00882                 links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
00883         } else {
00884                 size = links = 0;
00885         }
00886 
00887         if (vap->va_type == VBLK || vap->va_type == VCHR) {
00888                 rdev = zfs_expldev(vap->va_rdev);
00889         }
00890 
00891         parent = dzp->z_id;
00892         mode = acl_ids->z_mode;
00893         if (flag & IS_XATTR)
00894                 pflags |= ZFS_XATTR;
00895 
00896         /*
00897          * No execs denied will be deterimed when zfs_mode_compute() is called.
00898          */
00899         pflags |= acl_ids->z_aclp->z_hints &
00900             (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
00901             ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
00902 
00903         ZFS_TIME_ENCODE(&now, crtime);
00904         ZFS_TIME_ENCODE(&now, ctime);
00905 
00906         if (vap->va_mask & AT_ATIME) {
00907                 ZFS_TIME_ENCODE(&vap->va_atime, atime);
00908         } else {
00909                 ZFS_TIME_ENCODE(&now, atime);
00910         }
00911 
00912         if (vap->va_mask & AT_MTIME) {
00913                 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
00914         } else {
00915                 ZFS_TIME_ENCODE(&now, mtime);
00916         }
00917 
00918         /* Now add in all of the "SA" attributes */
00919         VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
00920             &sa_hdl));
00921 
00922         /*
00923          * Setup the array of attributes to be replaced/set on the new file
00924          *
00925          * order for  DMU_OT_ZNODE is critical since it needs to be constructed
00926          * in the old znode_phys_t format.  Don't change this ordering
00927          */
00928 
00929         if (obj_type == DMU_OT_ZNODE) {
00930                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
00931                     NULL, &atime, 16);
00932                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
00933                     NULL, &mtime, 16);
00934                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
00935                     NULL, &ctime, 16);
00936                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
00937                     NULL, &crtime, 16);
00938                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
00939                     NULL, &gen, 8);
00940                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
00941                     NULL, &mode, 8);
00942                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
00943                     NULL, &size, 8);
00944                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
00945                     NULL, &parent, 8);
00946         } else {
00947                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
00948                     NULL, &mode, 8);
00949                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
00950                     NULL, &size, 8);
00951                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
00952                     NULL, &gen, 8);
00953                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
00954                     &acl_ids->z_fuid, 8);
00955                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
00956                     &acl_ids->z_fgid, 8);
00957                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
00958                     NULL, &parent, 8);
00959                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
00960                     NULL, &pflags, 8);
00961                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
00962                     NULL, &atime, 16);
00963                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
00964                     NULL, &mtime, 16);
00965                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
00966                     NULL, &ctime, 16);
00967                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
00968                     NULL, &crtime, 16);
00969         }
00970 
00971         SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
00972 
00973         if (obj_type == DMU_OT_ZNODE) {
00974                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
00975                     &empty_xattr, 8);
00976         }
00977         if (obj_type == DMU_OT_ZNODE ||
00978             (vap->va_type == VBLK || vap->va_type == VCHR)) {
00979                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
00980                     NULL, &rdev, 8);
00981 
00982         }
00983         if (obj_type == DMU_OT_ZNODE) {
00984                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
00985                     NULL, &pflags, 8);
00986                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
00987                     &acl_ids->z_fuid, 8);
00988                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
00989                     &acl_ids->z_fgid, 8);
00990                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
00991                     sizeof (uint64_t) * 4);
00992                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
00993                     &acl_phys, sizeof (zfs_acl_phys_t));
00994         } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
00995                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
00996                     &acl_ids->z_aclp->z_acl_count, 8);
00997                 locate.cb_aclp = acl_ids->z_aclp;
00998                 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
00999                     zfs_acl_data_locator, &locate,
01000                     acl_ids->z_aclp->z_acl_bytes);
01001                 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
01002                     acl_ids->z_fuid, acl_ids->z_fgid);
01003         }
01004 
01005         VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
01006 
01007         if (!(flag & IS_ROOT_NODE)) {
01008                 *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
01009                 ASSERT(*zpp != NULL);
01010         } else {
01011                 /*
01012                  * If we are creating the root node, the "parent" we
01013                  * passed in is the znode for the root.
01014                  */
01015                 *zpp = dzp;
01016 
01017                 (*zpp)->z_sa_hdl = sa_hdl;
01018         }
01019 
01020         (*zpp)->z_pflags = pflags;
01021         (*zpp)->z_mode = mode;
01022 
01023         if (vap->va_mask & AT_XVATTR)
01024                 zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
01025 
01026         if (obj_type == DMU_OT_ZNODE ||
01027             acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
01028                 err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
01029                 ASSERT0(err);
01030         }
01031         if (!(flag & IS_ROOT_NODE)) {
01032                 vnode_t *vp;
01033 
01034                 vp = ZTOV(*zpp);
01035                 vp->v_vflag |= VV_FORCEINSMQ;
01036                 err = insmntque(vp, zfsvfs->z_vfs);
01037                 vp->v_vflag &= ~VV_FORCEINSMQ;
01038                 KASSERT(err == 0, ("insmntque() failed: error %d", err));
01039         }
01040         ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
01041         getnewvnode_drop_reserve();
01042 }
01043 
01050 void
01051 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
01052 {
01053         xoptattr_t *xoap;
01054 
01055         xoap = xva_getxoptattr(xvap);
01056         ASSERT(xoap);
01057 
01058         if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
01059                 uint64_t times[2];
01060                 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
01061                 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
01062                     &times, sizeof (times), tx);
01063                 XVA_SET_RTN(xvap, XAT_CREATETIME);
01064         }
01065         if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
01066                 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
01067                     zp->z_pflags, tx);
01068                 XVA_SET_RTN(xvap, XAT_READONLY);
01069         }
01070         if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
01071                 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
01072                     zp->z_pflags, tx);
01073                 XVA_SET_RTN(xvap, XAT_HIDDEN);
01074         }
01075         if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
01076                 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
01077                     zp->z_pflags, tx);
01078                 XVA_SET_RTN(xvap, XAT_SYSTEM);
01079         }
01080         if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
01081                 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
01082                     zp->z_pflags, tx);
01083                 XVA_SET_RTN(xvap, XAT_ARCHIVE);
01084         }
01085         if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
01086                 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
01087                     zp->z_pflags, tx);
01088                 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
01089         }
01090         if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
01091                 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
01092                     zp->z_pflags, tx);
01093                 XVA_SET_RTN(xvap, XAT_NOUNLINK);
01094         }
01095         if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
01096                 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
01097                     zp->z_pflags, tx);
01098                 XVA_SET_RTN(xvap, XAT_APPENDONLY);
01099         }
01100         if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
01101                 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
01102                     zp->z_pflags, tx);
01103                 XVA_SET_RTN(xvap, XAT_NODUMP);
01104         }
01105         if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
01106                 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
01107                     zp->z_pflags, tx);
01108                 XVA_SET_RTN(xvap, XAT_OPAQUE);
01109         }
01110         if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
01111                 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
01112                     xoap->xoa_av_quarantined, zp->z_pflags, tx);
01113                 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
01114         }
01115         if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
01116                 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
01117                     zp->z_pflags, tx);
01118                 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
01119         }
01120         if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
01121                 zfs_sa_set_scanstamp(zp, xvap, tx);
01122                 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
01123         }
01124         if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
01125                 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
01126                     zp->z_pflags, tx);
01127                 XVA_SET_RTN(xvap, XAT_REPARSE);
01128         }
01129         if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
01130                 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
01131                     zp->z_pflags, tx);
01132                 XVA_SET_RTN(xvap, XAT_OFFLINE);
01133         }
01134         if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
01135                 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
01136                     zp->z_pflags, tx);
01137                 XVA_SET_RTN(xvap, XAT_SPARSE);
01138         }
01139 }
01140 
01141 int
01142 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
01143 {
01144         dmu_object_info_t doi;
01145         dmu_buf_t       *db;
01146         znode_t         *zp;
01147         int err;
01148         sa_handle_t     *hdl;
01149         int first = 1;
01150 
01151         *zpp = NULL;
01152 
01153         getnewvnode_reserve(1);
01154 again:
01155         ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
01156 
01157         err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
01158         if (err) {
01159                 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01160                 getnewvnode_drop_reserve();
01161                 return (err);
01162         }
01163 
01164         dmu_object_info_from_db(db, &doi);
01165         if (doi.doi_bonus_type != DMU_OT_SA &&
01166             (doi.doi_bonus_type != DMU_OT_ZNODE ||
01167             (doi.doi_bonus_type == DMU_OT_ZNODE &&
01168             doi.doi_bonus_size < sizeof (znode_phys_t)))) {
01169                 sa_buf_rele(db, NULL);
01170                 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01171                 getnewvnode_drop_reserve();
01172                 return (EINVAL);
01173         }
01174 
01175         hdl = dmu_buf_get_user(db);
01176         if (hdl != NULL) {
01177                 zp  = sa_get_userdata(hdl);
01178 
01179 
01180                 /*
01181                  * Since "SA" does immediate eviction we
01182                  * should never find a sa handle that doesn't
01183                  * know about the znode.
01184                  */
01185 
01186                 ASSERT3P(zp, !=, NULL);
01187 
01188                 mutex_enter(&zp->z_lock);
01189                 ASSERT3U(zp->z_id, ==, obj_num);
01190                 if (zp->z_unlinked) {
01191                         err = ENOENT;
01192                 } else {
01193                         vnode_t *vp;
01194                         int dying = 0;
01195 
01196                         vp = ZTOV(zp);
01197                         if (vp == NULL)
01198                                 dying = 1;
01199                         else {
01200                                 VN_HOLD(vp);
01201                                 if ((vp->v_iflag & VI_DOOMED) != 0) {
01202                                         dying = 1;
01203                                         /*
01204                                          * Don't VN_RELE() vnode here, because
01205                                          * it can call vn_lock() which creates
01206                                          * LOR between vnode lock and znode
01207                                          * lock. We will VN_RELE() the vnode
01208                                          * after droping znode lock.
01209                                          */
01210                                 }
01211                         }
01212                         if (dying) {
01213                                 if (first) {
01214                                         ZFS_LOG(1, "dying znode detected (zp=%p)", zp);
01215                                         first = 0;
01216                                 }
01217                                 /*
01218                                  * znode is dying so we can't reuse it, we must
01219                                  * wait until destruction is completed.
01220                                  */
01221                                 sa_buf_rele(db, NULL);
01222                                 mutex_exit(&zp->z_lock);
01223                                 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01224                                 if (vp != NULL)
01225                                         VN_RELE(vp);
01226                                 tsleep(zp, 0, "zcollide", 1);
01227                                 goto again;
01228                         }
01229                         *zpp = zp;
01230                         err = 0;
01231                 }
01232                 sa_buf_rele(db, NULL);
01233                 mutex_exit(&zp->z_lock);
01234                 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01235                 getnewvnode_drop_reserve();
01236                 return (err);
01237         }
01238 
01239         /*
01240          * Not found create new znode/vnode
01241          * but only if file exists.
01242          *
01243          * There is a small window where zfs_vget() could
01244          * find this object while a file create is still in
01245          * progress.  This is checked for in zfs_znode_alloc()
01246          *
01247          * if zfs_znode_alloc() fails it will drop the hold on the
01248          * bonus buffer.
01249          */
01250         zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
01251             doi.doi_bonus_type, NULL);
01252         if (zp == NULL) {
01253                 err = ENOENT;
01254         } else {
01255                 *zpp = zp;
01256         }
01257         if (err == 0) {
01258                 vnode_t *vp = ZTOV(zp);
01259 
01260                 err = insmntque(vp, zfsvfs->z_vfs);
01261                 if (err == 0)
01262                         VOP_UNLOCK(vp, 0);
01263                 else {
01264                         zp->z_vnode = NULL;
01265                         zfs_znode_dmu_fini(zp);
01266                         zfs_znode_free(zp);
01267                         *zpp = NULL;
01268                 }
01269         }
01270         ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01271         getnewvnode_drop_reserve();
01272         return (err);
01273 }
01274 
01275 int
01276 zfs_rezget(znode_t *zp)
01277 {
01278         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
01279         dmu_object_info_t doi;
01280         dmu_buf_t *db;
01281         vnode_t *vp;
01282         uint64_t obj_num = zp->z_id;
01283         uint64_t mode, size;
01284         sa_bulk_attr_t bulk[8];
01285         int err;
01286         int count = 0;
01287         uint64_t gen;
01288 
01289         ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
01290 
01291         mutex_enter(&zp->z_acl_lock);
01292         if (zp->z_acl_cached) {
01293                 zfs_acl_free(zp->z_acl_cached);
01294                 zp->z_acl_cached = NULL;
01295         }
01296 
01297         mutex_exit(&zp->z_acl_lock);
01298         ASSERT(zp->z_sa_hdl == NULL);
01299         err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
01300         if (err) {
01301                 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01302                 return (err);
01303         }
01304 
01305         dmu_object_info_from_db(db, &doi);
01306         if (doi.doi_bonus_type != DMU_OT_SA &&
01307             (doi.doi_bonus_type != DMU_OT_ZNODE ||
01308             (doi.doi_bonus_type == DMU_OT_ZNODE &&
01309             doi.doi_bonus_size < sizeof (znode_phys_t)))) {
01310                 sa_buf_rele(db, NULL);
01311                 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01312                 return (EINVAL);
01313         }
01314 
01315         zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
01316         size = zp->z_size;
01317 
01318         /* reload cached values */
01319         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
01320             &gen, sizeof (gen));
01321         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
01322             &zp->z_size, sizeof (zp->z_size));
01323         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
01324             &zp->z_links, sizeof (zp->z_links));
01325         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
01326             &zp->z_pflags, sizeof (zp->z_pflags));
01327         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
01328             &zp->z_atime, sizeof (zp->z_atime));
01329         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
01330             &zp->z_uid, sizeof (zp->z_uid));
01331         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
01332             &zp->z_gid, sizeof (zp->z_gid));
01333         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
01334             &mode, sizeof (mode));
01335 
01336         if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
01337                 zfs_znode_dmu_fini(zp);
01338                 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01339                 return (EIO);
01340         }
01341 
01342         zp->z_mode = mode;
01343 
01344         if (gen != zp->z_gen) {
01345                 zfs_znode_dmu_fini(zp);
01346                 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01347                 return (EIO);
01348         }
01349 
01350         /*
01351          * XXXPJD: Not sure how is that possible, but under heavy
01352          * zfs recv -F load it happens that z_gen is the same, but
01353          * vnode type is different than znode type. This would mean
01354          * that for example regular file was replaced with directory
01355          * which has the same object number.
01356          */
01357         vp = ZTOV(zp);
01358         if (vp != NULL &&
01359             vp->v_type != IFTOVT((mode_t)zp->z_mode)) {
01360                 zfs_znode_dmu_fini(zp);
01361                 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01362                 return (EIO);
01363         }
01364 
01365         zp->z_unlinked = (zp->z_links == 0);
01366         zp->z_blksz = doi.doi_data_block_size;
01367         if (vp != NULL) {
01368                 vn_pages_remove(vp, 0, 0);
01369                 if (zp->z_size != size)
01370                         vnode_pager_setsize(vp, zp->z_size);
01371         }
01372 
01373         ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
01374 
01375         return (0);
01376 }
01377 
01378 void
01379 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
01380 {
01381         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
01382         objset_t *os = zfsvfs->z_os;
01383         uint64_t obj = zp->z_id;
01384         uint64_t acl_obj = zfs_external_acl(zp);
01385 
01386         ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
01387         if (acl_obj) {
01388                 VERIFY(!zp->z_is_sa);
01389                 VERIFY(0 == dmu_object_free(os, acl_obj, tx));
01390         }
01391         VERIFY(0 == dmu_object_free(os, obj, tx));
01392         zfs_znode_dmu_fini(zp);
01393         ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
01394         zfs_znode_free(zp);
01395 }
01396 
01397 void
01398 zfs_zinactive(znode_t *zp)
01399 {
01400         vnode_t *vp = ZTOV(zp);
01401         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
01402         uint64_t z_id = zp->z_id;
01403 
01404         ASSERT(zp->z_sa_hdl);
01405 
01406         /*
01407          * Don't allow a zfs_zget() while were trying to release this znode
01408          */
01409         ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
01410 
01411         mutex_enter(&zp->z_lock);
01412         VI_LOCK(vp);
01413         if (vp->v_count > 0) {
01414                 /*
01415                  * If the hold count is greater than zero, somebody has
01416                  * obtained a new reference on this znode while we were
01417                  * processing it here, so we are done.
01418                  */
01419                 VI_UNLOCK(vp);
01420                 mutex_exit(&zp->z_lock);
01421                 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
01422                 return;
01423         }
01424         VI_UNLOCK(vp);
01425 
01426         /*
01427          * If this was the last reference to a file with no links,
01428          * remove the file from the file system.
01429          */
01430         if (zp->z_unlinked) {
01431                 mutex_exit(&zp->z_lock);
01432                 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
01433                 ASSERT(vp->v_count == 0);
01434                 vrecycle(vp);
01435                 zfs_rmnode(zp);
01436                 return;
01437         }
01438 
01439         mutex_exit(&zp->z_lock);
01440         ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
01441 }
01442 
01443 void
01444 zfs_znode_free(znode_t *zp)
01445 {
01446         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
01447 
01448         ASSERT(ZTOV(zp) == NULL);
01449         ASSERT(zp->z_sa_hdl == NULL);
01450         mutex_enter(&zfsvfs->z_znodes_lock);
01451         POINTER_INVALIDATE(&zp->z_zfsvfs);
01452         list_remove(&zfsvfs->z_all_znodes, zp);
01453         mutex_exit(&zfsvfs->z_znodes_lock);
01454 
01455         if (zp->z_acl_cached) {
01456                 zfs_acl_free(zp->z_acl_cached);
01457                 zp->z_acl_cached = NULL;
01458         }
01459 
01460         kmem_cache_free(znode_cache, zp);
01461 
01462         VFS_RELE(zfsvfs->z_vfs);
01463 }
01464 
01465 void
01466 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
01467     uint64_t ctime[2], boolean_t have_tx)
01468 {
01469         timestruc_t     now;
01470 
01471         gethrestime(&now);
01472 
01473         if (have_tx) {  /* will sa_bulk_update happen really soon? */
01474                 zp->z_atime_dirty = 0;
01475                 zp->z_seq++;
01476         } else {
01477                 zp->z_atime_dirty = 1;
01478         }
01479 
01480         if (flag & AT_ATIME) {
01481                 ZFS_TIME_ENCODE(&now, zp->z_atime);
01482         }
01483 
01484         if (flag & AT_MTIME) {
01485                 ZFS_TIME_ENCODE(&now, mtime);
01486                 if (zp->z_zfsvfs->z_use_fuids) {
01487                         zp->z_pflags |= (ZFS_ARCHIVE |
01488                             ZFS_AV_MODIFIED);
01489                 }
01490         }
01491 
01492         if (flag & AT_CTIME) {
01493                 ZFS_TIME_ENCODE(&now, ctime);
01494                 if (zp->z_zfsvfs->z_use_fuids)
01495                         zp->z_pflags |= ZFS_ARCHIVE;
01496         }
01497 }
01498 
01508 void
01509 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
01510 {
01511         int             error;
01512         u_longlong_t    dummy;
01513 
01514         if (size <= zp->z_blksz)
01515                 return;
01516         /*
01517          * If the file size is already greater than the current blocksize,
01518          * we will not grow.  If there is more than one block in a file,
01519          * the blocksize cannot change.
01520          */
01521         if (zp->z_blksz && zp->z_size > zp->z_blksz)
01522                 return;
01523 
01524         error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
01525             size, 0, tx);
01526 
01527         if (error == ENOTSUP)
01528                 return;
01529         ASSERT0(error);
01530 
01531         /* What blocksize did we actually get? */
01532         dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
01533 }
01534 
01535 #ifdef sun
01536 
01541 /* ARGSUSED */
01542 static int
01543 zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
01544     int flags, cred_t *cr)
01545 {
01546         ASSERT(0);
01547         return (0);
01548 }
01549 #endif  /* sun */
01550 
01559 static int
01560 zfs_extend(znode_t *zp, uint64_t end)
01561 {
01562         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
01563         dmu_tx_t *tx;
01564         rl_t *rl;
01565         uint64_t newblksz;
01566         int error;
01567 
01568         /*
01569          * We will change zp_size, lock the whole file.
01570          */
01571         rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
01572 
01573         /*
01574          * Nothing to do if file already at desired length.
01575          */
01576         if (end <= zp->z_size) {
01577                 zfs_range_unlock(rl);
01578                 return (0);
01579         }
01580 top:
01581         tx = dmu_tx_create(zfsvfs->z_os);
01582         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
01583         zfs_sa_upgrade_txholds(tx, zp);
01584         if (end > zp->z_blksz &&
01585             (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
01586                 /*
01587                  * We are growing the file past the current block size.
01588                  */
01589                 if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
01590                         ASSERT(!ISP2(zp->z_blksz));
01591                         newblksz = MIN(end, SPA_MAXBLOCKSIZE);
01592                 } else {
01593                         newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
01594                 }
01595                 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
01596         } else {
01597                 newblksz = 0;
01598         }
01599 
01600         error = dmu_tx_assign(tx, TXG_NOWAIT);
01601         if (error) {
01602                 if (error == ERESTART) {
01603                         dmu_tx_wait(tx);
01604                         dmu_tx_abort(tx);
01605                         goto top;
01606                 }
01607                 dmu_tx_abort(tx);
01608                 zfs_range_unlock(rl);
01609                 return (error);
01610         }
01611 
01612         if (newblksz)
01613                 zfs_grow_blocksize(zp, newblksz, tx);
01614 
01615         zp->z_size = end;
01616 
01617         VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
01618             &zp->z_size, sizeof (zp->z_size), tx));
01619 
01620         vnode_pager_setsize(ZTOV(zp), end);
01621 
01622         zfs_range_unlock(rl);
01623 
01624         dmu_tx_commit(tx);
01625 
01626         return (0);
01627 }
01628 
01638 static int
01639 zfs_free_range( znode_t *zp, uint64_t off, uint64_t len)
01640 {
01641         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
01642         rl_t *rl;
01643         int error;
01644 
01645         /*
01646          * Lock the range being freed.
01647          */
01648         rl = zfs_range_lock(zp, off, len, RL_WRITER);
01649 
01650         /*
01651          * Nothing to do if file already at desired length.
01652          */
01653         if (off >= zp->z_size) {
01654                 zfs_range_unlock(rl);
01655                 return (0);
01656         }
01657 
01658         if (off + len > zp->z_size)
01659                 len = zp->z_size - off;
01660 
01661         error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
01662 
01663         if (error == 0) {
01664                 /*
01665                  * In FreeBSD we cannot free block in the middle of a file,
01666                  * but only at the end of a file, so this code path should
01667                  * never happen.
01668                  */
01669                 vnode_pager_setsize(ZTOV(zp), off);
01670         }
01671 
01672         zfs_range_unlock(rl);
01673 
01674         return (error);
01675 }
01676 
01685 static int
01686 zfs_trunc(znode_t *zp, uint64_t end)
01687 {
01688         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
01689         vnode_t *vp = ZTOV(zp);
01690         dmu_tx_t *tx;
01691         rl_t *rl;
01692         int error;
01693         sa_bulk_attr_t bulk[2];
01694         int count = 0;
01695 
01696         /*
01697          * We will change zp_size, lock the whole file.
01698          */
01699         rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
01700 
01701         /*
01702          * Nothing to do if file already at desired length.
01703          */
01704         if (end >= zp->z_size) {
01705                 zfs_range_unlock(rl);
01706                 return (0);
01707         }
01708 
01709         error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,  -1);
01710         if (error) {
01711                 zfs_range_unlock(rl);
01712                 return (error);
01713         }
01714 top:
01715         tx = dmu_tx_create(zfsvfs->z_os);
01716         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
01717         zfs_sa_upgrade_txholds(tx, zp);
01718         error = dmu_tx_assign(tx, TXG_NOWAIT);
01719         if (error) {
01720                 if (error == ERESTART) {
01721                         dmu_tx_wait(tx);
01722                         dmu_tx_abort(tx);
01723                         goto top;
01724                 }
01725                 dmu_tx_abort(tx);
01726                 zfs_range_unlock(rl);
01727                 return (error);
01728         }
01729 
01730         zp->z_size = end;
01731         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
01732             NULL, &zp->z_size, sizeof (zp->z_size));
01733 
01734         if (end == 0) {
01735                 zp->z_pflags &= ~ZFS_SPARSE;
01736                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
01737                     NULL, &zp->z_pflags, 8);
01738         }
01739         VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
01740 
01741         dmu_tx_commit(tx);
01742 
01743         /*
01744          * Clear any mapped pages in the truncated region.  This has to
01745          * happen outside of the transaction to avoid the possibility of
01746          * a deadlock with someone trying to push a page that we are
01747          * about to invalidate.
01748          */
01749         vnode_pager_setsize(vp, end);
01750 
01751         zfs_range_unlock(rl);
01752 
01753         return (0);
01754 }
01755 
01767 int
01768 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
01769 {
01770         vnode_t *vp = ZTOV(zp);
01771         dmu_tx_t *tx;
01772         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
01773         zilog_t *zilog = zfsvfs->z_log;
01774         uint64_t mode;
01775         uint64_t mtime[2], ctime[2];
01776         sa_bulk_attr_t bulk[3];
01777         int count = 0;
01778         int error;
01779 
01780         if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
01781             sizeof (mode))) != 0)
01782                 return (error);
01783 
01784         if (off > zp->z_size) {
01785                 error =  zfs_extend(zp, off+len);
01786                 if (error == 0 && log)
01787                         goto log;
01788                 else
01789                         return (error);
01790         }
01791 
01792         /*
01793          * Check for any locks in the region to be freed.
01794          */
01795 
01796         if (MANDLOCK(vp, (mode_t)mode)) {
01797                 uint64_t length = (len ? len : zp->z_size - off);
01798                 if (error = chklock(vp, FWRITE, off, length, flag, NULL))
01799                         return (error);
01800         }
01801 
01802         if (len == 0) {
01803                 error = zfs_trunc(zp, off);
01804         } else {
01805                 if ((error = zfs_free_range(zp, off, len)) == 0 &&
01806                     off + len > zp->z_size)
01807                         error = zfs_extend(zp, off+len);
01808         }
01809         if (error || !log)
01810                 return (error);
01811 log:
01812         tx = dmu_tx_create(zfsvfs->z_os);
01813         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
01814         zfs_sa_upgrade_txholds(tx, zp);
01815         error = dmu_tx_assign(tx, TXG_NOWAIT);
01816         if (error) {
01817                 if (error == ERESTART) {
01818                         dmu_tx_wait(tx);
01819                         dmu_tx_abort(tx);
01820                         goto log;
01821                 }
01822                 dmu_tx_abort(tx);
01823                 return (error);
01824         }
01825 
01826         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
01827         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
01828         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
01829             NULL, &zp->z_pflags, 8);
01830         zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
01831         error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
01832         ASSERT(error == 0);
01833 
01834         zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
01835 
01836         dmu_tx_commit(tx);
01837         return (0);
01838 }
01839 
01840 void
01841 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
01842 {
01843         zfsvfs_t        zfsvfs;
01844         uint64_t        moid, obj, sa_obj, version;
01845         uint64_t        sense = ZFS_CASE_SENSITIVE;
01846         uint64_t        norm = 0;
01847         nvpair_t        *elem;
01848         int             error;
01849         int             i;
01850         znode_t         *rootzp = NULL;
01851         vnode_t         vnode;
01852         vattr_t         vattr;
01853         znode_t         *zp;
01854         zfs_acl_ids_t   acl_ids;
01855 
01856         /*
01857          * First attempt to create master node.
01858          */
01859         /*
01860          * In an empty objset, there are no blocks to read and thus
01861          * there can be no i/o errors (which we assert below).
01862          */
01863         moid = MASTER_NODE_OBJ;
01864         error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
01865             DMU_OT_NONE, 0, tx);
01866         ASSERT(error == 0);
01867 
01868         /*
01869          * Set starting attributes.
01870          */
01871         version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
01872         elem = NULL;
01873         while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
01874                 /* For the moment we expect all zpl props to be uint64_ts */
01875                 uint64_t val;
01876                 char *name;
01877 
01878                 ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
01879                 VERIFY(nvpair_value_uint64(elem, &val) == 0);
01880                 name = nvpair_name(elem);
01881                 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
01882                         if (val < version)
01883                                 version = val;
01884                 } else {
01885                         error = zap_update(os, moid, name, 8, 1, &val, tx);
01886                 }
01887                 ASSERT(error == 0);
01888                 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
01889                         norm = val;
01890                 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
01891                         sense = val;
01892         }
01893         ASSERT(version != 0);
01894         error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
01895 
01896         /*
01897          * Create zap object used for SA attribute registration
01898          */
01899 
01900         if (version >= ZPL_VERSION_SA) {
01901                 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
01902                     DMU_OT_NONE, 0, tx);
01903                 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
01904                 ASSERT(error == 0);
01905         } else {
01906                 sa_obj = 0;
01907         }
01908         /*
01909          * Create a delete queue.
01910          */
01911         obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
01912 
01913         error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
01914         ASSERT(error == 0);
01915 
01916         /*
01917          * Create root znode.  Create minimal znode/vnode/zfsvfs
01918          * to allow zfs_mknode to work.
01919          */
01920         VATTR_NULL(&vattr);
01921         vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
01922         vattr.va_type = VDIR;
01923         vattr.va_mode = S_IFDIR|0755;
01924         vattr.va_uid = crgetuid(cr);
01925         vattr.va_gid = crgetgid(cr);
01926 
01927         bzero(&zfsvfs, sizeof (zfsvfs_t));
01928 
01929         rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
01930         zfs_znode_cache_constructor(rootzp, NULL, 0);
01931         ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
01932         rootzp->z_moved = 0;
01933         rootzp->z_unlinked = 0;
01934         rootzp->z_atime_dirty = 0;
01935         rootzp->z_is_sa = USE_SA(version, os);
01936 
01937         vnode.v_type = VDIR;
01938         vnode.v_data = rootzp;
01939         rootzp->z_vnode = &vnode;
01940 
01941         zfsvfs.z_os = os;
01942         zfsvfs.z_parent = &zfsvfs;
01943         zfsvfs.z_version = version;
01944         zfsvfs.z_use_fuids = USE_FUIDS(version, os);
01945         zfsvfs.z_use_sa = USE_SA(version, os);
01946         zfsvfs.z_norm = norm;
01947 
01948         error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
01949             &zfsvfs.z_attr_table);
01950 
01951         ASSERT(error == 0);
01952 
01953         /*
01954          * Fold case on file systems that are always or sometimes case
01955          * insensitive.
01956          */
01957         if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
01958                 zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER;
01959 
01960         mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
01961         list_create(&zfsvfs.z_all_znodes, sizeof (znode_t),
01962             offsetof(znode_t, z_link_node));
01963 
01964         for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
01965                 mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
01966 
01967         rootzp->z_zfsvfs = &zfsvfs;
01968         VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
01969             cr, NULL, &acl_ids));
01970         zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
01971         ASSERT3P(zp, ==, rootzp);
01972         error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
01973         ASSERT(error == 0);
01974         zfs_acl_ids_free(&acl_ids);
01975         POINTER_INVALIDATE(&rootzp->z_zfsvfs);
01976 
01977         sa_handle_destroy(rootzp->z_sa_hdl);
01978         rootzp->z_vnode = NULL;
01979         kmem_cache_free(znode_cache, rootzp);
01980 
01981         /*
01982          * Create shares directory
01983          */
01984 
01985         error = zfs_create_share_dir(&zfsvfs, tx);
01986 
01987         ASSERT(error == 0);
01988 
01989         for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
01990                 mutex_destroy(&zfsvfs.z_hold_mtx[i]);
01991 }
01992 
01993 #endif /* _KERNEL */
01994 
01995 static int
01996 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
01997 {
01998         uint64_t sa_obj = 0;
01999         int error;
02000 
02001         error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
02002         if (error != 0 && error != ENOENT)
02003                 return (error);
02004 
02005         error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
02006         return (error);
02007 }
02008 
02009 static int
02010 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
02011     dmu_buf_t **db, void *tag)
02012 {
02013         dmu_object_info_t doi;
02014         int error;
02015 
02016         if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
02017                 return (error);
02018 
02019         dmu_object_info_from_db(*db, &doi);
02020         if ((doi.doi_bonus_type != DMU_OT_SA &&
02021             doi.doi_bonus_type != DMU_OT_ZNODE) ||
02022             doi.doi_bonus_type == DMU_OT_ZNODE &&
02023             doi.doi_bonus_size < sizeof (znode_phys_t)) {
02024                 sa_buf_rele(*db, tag);
02025                 return (ENOTSUP);
02026         }
02027 
02028         error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
02029         if (error != 0) {
02030                 sa_buf_rele(*db, tag);
02031                 return (error);
02032         }
02033 
02034         return (0);
02035 }
02036 
02037 void
02038 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
02039 {
02040         sa_handle_destroy(hdl);
02041         sa_buf_rele(db, tag);
02042 }
02043 
02048 static int
02049 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
02050     uint64_t *pobjp, int *is_xattrdir)
02051 {
02052         uint64_t parent;
02053         uint64_t pflags;
02054         uint64_t mode;
02055         uint64_t parent_mode;
02056         sa_bulk_attr_t bulk[3];
02057         sa_handle_t *sa_hdl;
02058         dmu_buf_t *sa_db;
02059         int count = 0;
02060         int error;
02061 
02062         SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
02063             &parent, sizeof (parent));
02064         SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
02065             &pflags, sizeof (pflags));
02066         SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
02067             &mode, sizeof (mode));
02068 
02069         if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
02070                 return (error);
02071 
02072         /*
02073          * When a link is removed its parent pointer is not changed and will
02074          * be invalid.  There are two cases where a link is removed but the
02075          * file stays around, when it goes to the delete queue and when there
02076          * are additional links.
02077          */
02078         error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
02079         if (error != 0)
02080                 return (error);
02081 
02082         error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
02083         zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
02084         if (error != 0)
02085                 return (error);
02086 
02087         *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
02088 
02089         /*
02090          * Extended attributes can be applied to files, directories, etc.
02091          * Otherwise the parent must be a directory.
02092          */
02093         if (!*is_xattrdir && !S_ISDIR(parent_mode))
02094                 return (EINVAL);
02095 
02096         *pobjp = parent;
02097 
02098         return (0);
02099 }
02100 
02104 static int
02105 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
02106     zfs_stat_t *sb)
02107 {
02108         sa_bulk_attr_t bulk[4];
02109         int count = 0;
02110 
02111         SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
02112             &sb->zs_mode, sizeof (sb->zs_mode));
02113         SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
02114             &sb->zs_gen, sizeof (sb->zs_gen));
02115         SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
02116             &sb->zs_links, sizeof (sb->zs_links));
02117         SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
02118             &sb->zs_ctime, sizeof (sb->zs_ctime));
02119 
02120         return (sa_bulk_lookup(hdl, bulk, count));
02121 }
02122 
02123 static int
02124 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
02125     sa_attr_type_t *sa_table, char *buf, int len)
02126 {
02127         sa_handle_t *sa_hdl;
02128         sa_handle_t *prevhdl = NULL;
02129         dmu_buf_t *prevdb = NULL;
02130         dmu_buf_t *sa_db = NULL;
02131         char *path = buf + len - 1;
02132         int error;
02133 
02134         *path = '\0';
02135         sa_hdl = hdl;
02136 
02137         for (;;) {
02138                 uint64_t pobj;
02139                 char component[MAXNAMELEN + 2];
02140                 size_t complen;
02141                 int is_xattrdir;
02142 
02143                 if (prevdb)
02144                         zfs_release_sa_handle(prevhdl, prevdb, FTAG);
02145 
02146                 if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
02147                     &is_xattrdir)) != 0)
02148                         break;
02149 
02150                 if (pobj == obj) {
02151                         if (path[0] != '/')
02152                                 *--path = '/';
02153                         break;
02154                 }
02155 
02156                 component[0] = '/';
02157                 if (is_xattrdir) {
02158                         (void) sprintf(component + 1, "<xattrdir>");
02159                 } else {
02160                         error = zap_value_search(osp, pobj, obj,
02161                             ZFS_DIRENT_OBJ(-1ULL), component + 1);
02162                         if (error != 0)
02163                                 break;
02164                 }
02165 
02166                 complen = strlen(component);
02167                 path -= complen;
02168                 ASSERT(path >= buf);
02169                 bcopy(component, path, complen);
02170                 obj = pobj;
02171 
02172                 if (sa_hdl != hdl) {
02173                         prevhdl = sa_hdl;
02174                         prevdb = sa_db;
02175                 }
02176                 error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
02177                 if (error != 0) {
02178                         sa_hdl = prevhdl;
02179                         sa_db = prevdb;
02180                         break;
02181                 }
02182         }
02183 
02184         if (sa_hdl != NULL && sa_hdl != hdl) {
02185                 ASSERT(sa_db != NULL);
02186                 zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
02187         }
02188 
02189         if (error == 0)
02190                 (void) memmove(buf, path, buf + len - path);
02191 
02192         return (error);
02193 }
02194 
02195 int
02196 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
02197 {
02198         sa_attr_type_t *sa_table;
02199         sa_handle_t *hdl;
02200         dmu_buf_t *db;
02201         int error;
02202 
02203         error = zfs_sa_setup(osp, &sa_table);
02204         if (error != 0)
02205                 return (error);
02206 
02207         error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
02208         if (error != 0)
02209                 return (error);
02210 
02211         error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
02212 
02213         zfs_release_sa_handle(hdl, db, FTAG);
02214         return (error);
02215 }
02216 
02217 int
02218 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
02219     char *buf, int len)
02220 {
02221         char *path = buf + len - 1;
02222         sa_attr_type_t *sa_table;
02223         sa_handle_t *hdl;
02224         dmu_buf_t *db;
02225         int error;
02226 
02227         *path = '\0';
02228 
02229         error = zfs_sa_setup(osp, &sa_table);
02230         if (error != 0)
02231                 return (error);
02232 
02233         error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
02234         if (error != 0)
02235                 return (error);
02236 
02237         error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
02238         if (error != 0) {
02239                 zfs_release_sa_handle(hdl, db, FTAG);
02240                 return (error);
02241         }
02242 
02243         error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
02244 
02245         zfs_release_sa_handle(hdl, db, FTAG);
02246         return (error);
02247 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines