FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 00023 * Copyright (c) 2012 by Delphix. All rights reserved. 00024 */ 00025 00026 /* Portions Copyright 2007 Jeremy Teo */ 00027 /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 00028 00029 #ifdef _KERNEL 00030 #include <sys/types.h> 00031 #include <sys/param.h> 00032 #include <sys/time.h> 00033 #include <sys/systm.h> 00034 #include <sys/sysmacros.h> 00035 #include <sys/resource.h> 00036 #include <sys/mntent.h> 00037 #include <sys/u8_textprep.h> 00038 #include <sys/dsl_dataset.h> 00039 #include <sys/vfs.h> 00040 #include <sys/vnode.h> 00041 #include <sys/file.h> 00042 #include <sys/kmem.h> 00043 #include <sys/errno.h> 00044 #include <sys/unistd.h> 00045 #include <sys/atomic.h> 00046 #include <sys/zfs_dir.h> 00047 #include <sys/zfs_acl.h> 00048 #include <sys/zfs_ioctl.h> 00049 #include <sys/zfs_rlock.h> 00050 #include <sys/zfs_fuid.h> 00051 #include <sys/dnode.h> 00052 #include <sys/fs/zfs.h> 00053 #include <sys/kidmap.h> 00054 #endif /* _KERNEL */ 00055 00056 #include <sys/dmu.h> 00057 #include <sys/refcount.h> 00058 #include <sys/stat.h> 00059 #include <sys/zap.h> 00060 #include <sys/zfs_znode.h> 00061 #include <sys/sa.h> 00062 #include <sys/zfs_sa.h> 00063 #include <sys/zfs_stat.h> 00064 #include <sys/refcount.h> 00065 00066 #include "zfs_prop.h" 00067 #include "zfs_comutil.h" 00068 00069 /* Used by fstat(1). */ 00070 SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, 0, sizeof(znode_t), 00071 "sizeof(znode_t)"); 00072 00077 #ifdef DEBUG 00078 #define ZNODE_STATS 00079 #endif /* DEBUG */ 00080 00081 #ifdef ZNODE_STATS 00082 #define ZNODE_STAT_ADD(stat) ((stat)++) 00083 #else 00084 #define ZNODE_STAT_ADD(stat) /* nothing */ 00085 #endif /* ZNODE_STATS */ 00086 00087 /* 00088 * Functions needed for userland (ie: libzpool) are not put under 00089 * #ifdef_KERNEL; the rest of the functions have dependencies 00090 * (such as VFS logic) that will not compile easily in userland. 00091 */ 00092 #ifdef _KERNEL 00093 00097 krwlock_t zfsvfs_lock; 00098 00099 static kmem_cache_t *znode_cache = NULL; 00100 00101 /*ARGSUSED*/ 00102 static void 00103 znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) 00104 { 00105 /* 00106 * We should never drop all dbuf refs without first clearing 00107 * the eviction callback. 00108 */ 00109 panic("evicting znode %p\n", user_ptr); 00110 } 00111 00112 extern struct vop_vector zfs_vnodeops; 00113 extern struct vop_vector zfs_fifoops; 00114 extern struct vop_vector zfs_shareops; 00115 00116 /* 00117 * \note We cannot use this function as a cache constructor, because 00118 * there is one global cache for all file systems and we need 00119 * to pass vfsp here, which is not possible, because argument 00120 * 'cdrarg' is defined at kmem_cache_create() time. 00121 */ 00122 /*ARGSUSED*/ 00123 static int 00124 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) 00125 { 00126 znode_t *zp = buf; 00127 vnode_t *vp; 00128 vfs_t *vfsp = arg; 00129 int error; 00130 00131 POINTER_INVALIDATE(&zp->z_zfsvfs); 00132 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 00133 00134 if (vfsp != NULL) { 00135 error = getnewvnode("zfs", vfsp, &zfs_vnodeops, &vp); 00136 if (error != 0 && (kmflags & KM_NOSLEEP)) 00137 return (-1); 00138 ASSERT(error == 0); 00139 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 00140 zp->z_vnode = vp; 00141 vp->v_data = (caddr_t)zp; 00142 VN_LOCK_AREC(vp); 00143 VN_LOCK_ASHARE(vp); 00144 } else { 00145 zp->z_vnode = NULL; 00146 } 00147 00148 list_link_init(&zp->z_link_node); 00149 00150 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 00151 rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); 00152 rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); 00153 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 00154 00155 mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); 00156 avl_create(&zp->z_range_avl, zfs_range_compare, 00157 sizeof (rl_t), offsetof(rl_t, r_node)); 00158 00159 zp->z_dirlocks = NULL; 00160 zp->z_acl_cached = NULL; 00161 zp->z_moved = 0; 00162 return (0); 00163 } 00164 00165 /*ARGSUSED*/ 00166 static void 00167 zfs_znode_cache_destructor(void *buf, void *arg) 00168 { 00169 znode_t *zp = buf; 00170 00171 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 00172 ASSERT(ZTOV(zp) == NULL); 00173 vn_free(ZTOV(zp)); 00174 ASSERT(!list_link_active(&zp->z_link_node)); 00175 mutex_destroy(&zp->z_lock); 00176 rw_destroy(&zp->z_parent_lock); 00177 rw_destroy(&zp->z_name_lock); 00178 mutex_destroy(&zp->z_acl_lock); 00179 avl_destroy(&zp->z_range_avl); 00180 mutex_destroy(&zp->z_range_lock); 00181 00182 ASSERT(zp->z_dirlocks == NULL); 00183 ASSERT(zp->z_acl_cached == NULL); 00184 } 00185 00186 #ifdef ZNODE_STATS 00187 static struct { 00188 uint64_t zms_zfsvfs_invalid; 00189 uint64_t zms_zfsvfs_recheck1; 00190 uint64_t zms_zfsvfs_unmounted; 00191 uint64_t zms_zfsvfs_recheck2; 00192 uint64_t zms_obj_held; 00193 uint64_t zms_vnode_locked; 00194 uint64_t zms_not_only_dnlc; 00195 } znode_move_stats; 00196 #endif /* ZNODE_STATS */ 00197 00198 #ifdef sun 00199 static void 00200 zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) 00201 { 00202 vnode_t *vp; 00203 00204 /* Copy fields. */ 00205 nzp->z_zfsvfs = ozp->z_zfsvfs; 00206 00207 /* Swap vnodes. */ 00208 vp = nzp->z_vnode; 00209 nzp->z_vnode = ozp->z_vnode; 00210 ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ 00211 ZTOV(ozp)->v_data = ozp; 00212 ZTOV(nzp)->v_data = nzp; 00213 00214 nzp->z_id = ozp->z_id; 00215 ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ 00216 ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); 00217 nzp->z_unlinked = ozp->z_unlinked; 00218 nzp->z_atime_dirty = ozp->z_atime_dirty; 00219 nzp->z_zn_prefetch = ozp->z_zn_prefetch; 00220 nzp->z_blksz = ozp->z_blksz; 00221 nzp->z_seq = ozp->z_seq; 00222 nzp->z_mapcnt = ozp->z_mapcnt; 00223 nzp->z_gen = ozp->z_gen; 00224 nzp->z_sync_cnt = ozp->z_sync_cnt; 00225 nzp->z_is_sa = ozp->z_is_sa; 00226 nzp->z_sa_hdl = ozp->z_sa_hdl; 00227 bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2); 00228 nzp->z_links = ozp->z_links; 00229 nzp->z_size = ozp->z_size; 00230 nzp->z_pflags = ozp->z_pflags; 00231 nzp->z_uid = ozp->z_uid; 00232 nzp->z_gid = ozp->z_gid; 00233 nzp->z_mode = ozp->z_mode; 00234 00235 /* 00236 * Since this is just an idle znode and kmem is already dealing with 00237 * memory pressure, release any cached ACL. 00238 */ 00239 if (ozp->z_acl_cached) { 00240 zfs_acl_free(ozp->z_acl_cached); 00241 ozp->z_acl_cached = NULL; 00242 } 00243 00244 sa_set_userp(nzp->z_sa_hdl, nzp); 00245 00246 /* 00247 * Invalidate the original znode by clearing fields that provide a 00248 * pointer back to the znode. Set the low bit of the vfs pointer to 00249 * ensure that zfs_znode_move() recognizes the znode as invalid in any 00250 * subsequent callback. 00251 */ 00252 ozp->z_sa_hdl = NULL; 00253 POINTER_INVALIDATE(&ozp->z_zfsvfs); 00254 00255 /* 00256 * Mark the znode. 00257 */ 00258 nzp->z_moved = 1; 00259 ozp->z_moved = (uint8_t)-1; 00260 } 00261 00262 /*ARGSUSED*/ 00263 static kmem_cbrc_t 00264 zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) 00265 { 00266 znode_t *ozp = buf, *nzp = newbuf; 00267 zfsvfs_t *zfsvfs; 00268 vnode_t *vp; 00269 00270 /* 00271 * The znode is on the file system's list of known znodes if the vfs 00272 * pointer is valid. We set the low bit of the vfs pointer when freeing 00273 * the znode to invalidate it, and the memory patterns written by kmem 00274 * (baddcafe and deadbeef) set at least one of the two low bits. A newly 00275 * created znode sets the vfs pointer last of all to indicate that the 00276 * znode is known and in a valid state to be moved by this function. 00277 */ 00278 zfsvfs = ozp->z_zfsvfs; 00279 if (!POINTER_IS_VALID(zfsvfs)) { 00280 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); 00281 return (KMEM_CBRC_DONT_KNOW); 00282 } 00283 00284 /* 00285 * Close a small window in which it's possible that the filesystem could 00286 * be unmounted and freed, and zfsvfs, though valid in the previous 00287 * statement, could point to unrelated memory by the time we try to 00288 * prevent the filesystem from being unmounted. 00289 */ 00290 rw_enter(&zfsvfs_lock, RW_WRITER); 00291 if (zfsvfs != ozp->z_zfsvfs) { 00292 rw_exit(&zfsvfs_lock); 00293 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); 00294 return (KMEM_CBRC_DONT_KNOW); 00295 } 00296 00297 /* 00298 * If the znode is still valid, then so is the file system. We know that 00299 * no valid file system can be freed while we hold zfsvfs_lock, so we 00300 * can safely ensure that the filesystem is not and will not be 00301 * unmounted. The next statement is equivalent to ZFS_ENTER(). 00302 */ 00303 rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); 00304 if (zfsvfs->z_unmounted) { 00305 ZFS_EXIT(zfsvfs); 00306 rw_exit(&zfsvfs_lock); 00307 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); 00308 return (KMEM_CBRC_DONT_KNOW); 00309 } 00310 rw_exit(&zfsvfs_lock); 00311 00312 mutex_enter(&zfsvfs->z_znodes_lock); 00313 /* 00314 * Recheck the vfs pointer in case the znode was removed just before 00315 * acquiring the lock. 00316 */ 00317 if (zfsvfs != ozp->z_zfsvfs) { 00318 mutex_exit(&zfsvfs->z_znodes_lock); 00319 ZFS_EXIT(zfsvfs); 00320 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); 00321 return (KMEM_CBRC_DONT_KNOW); 00322 } 00323 00324 /* 00325 * At this point we know that as long as we hold z_znodes_lock, the 00326 * znode cannot be freed and fields within the znode can be safely 00327 * accessed. Now, prevent a race with zfs_zget(). 00328 */ 00329 if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { 00330 mutex_exit(&zfsvfs->z_znodes_lock); 00331 ZFS_EXIT(zfsvfs); 00332 ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); 00333 return (KMEM_CBRC_LATER); 00334 } 00335 00336 vp = ZTOV(ozp); 00337 if (mutex_tryenter(&vp->v_lock) == 0) { 00338 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 00339 mutex_exit(&zfsvfs->z_znodes_lock); 00340 ZFS_EXIT(zfsvfs); 00341 ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); 00342 return (KMEM_CBRC_LATER); 00343 } 00344 00345 /* Only move znodes that are referenced _only_ by the DNLC. */ 00346 if (vp->v_count != 1 || !vn_in_dnlc(vp)) { 00347 mutex_exit(&vp->v_lock); 00348 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 00349 mutex_exit(&zfsvfs->z_znodes_lock); 00350 ZFS_EXIT(zfsvfs); 00351 ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); 00352 return (KMEM_CBRC_LATER); 00353 } 00354 00355 /* 00356 * The znode is known and in a valid state to move. We're holding the 00357 * locks needed to execute the critical section. 00358 */ 00359 zfs_znode_move_impl(ozp, nzp); 00360 mutex_exit(&vp->v_lock); 00361 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 00362 00363 list_link_replace(&ozp->z_link_node, &nzp->z_link_node); 00364 mutex_exit(&zfsvfs->z_znodes_lock); 00365 ZFS_EXIT(zfsvfs); 00366 00367 return (KMEM_CBRC_YES); 00368 } 00369 #endif /* sun */ 00370 00371 void 00372 zfs_znode_init(void) 00373 { 00374 /* 00375 * Initialize zcache 00376 */ 00377 rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); 00378 ASSERT(znode_cache == NULL); 00379 znode_cache = kmem_cache_create("zfs_znode_cache", 00380 sizeof (znode_t), 0, /* zfs_znode_cache_constructor */ NULL, 00381 zfs_znode_cache_destructor, NULL, NULL, NULL, 0); 00382 kmem_cache_set_move(znode_cache, zfs_znode_move); 00383 } 00384 00385 void 00386 zfs_znode_fini(void) 00387 { 00388 #ifdef sun 00389 /* 00390 * Cleanup vfs & vnode ops 00391 */ 00392 zfs_remove_op_tables(); 00393 #endif /* sun */ 00394 00395 /* 00396 * Cleanup zcache 00397 */ 00398 if (znode_cache) 00399 kmem_cache_destroy(znode_cache); 00400 znode_cache = NULL; 00401 rw_destroy(&zfsvfs_lock); 00402 } 00403 00404 #ifdef sun 00405 struct vnodeops *zfs_dvnodeops; 00406 struct vnodeops *zfs_fvnodeops; 00407 struct vnodeops *zfs_symvnodeops; 00408 struct vnodeops *zfs_xdvnodeops; 00409 struct vnodeops *zfs_evnodeops; 00410 struct vnodeops *zfs_sharevnodeops; 00411 00412 void 00413 zfs_remove_op_tables() 00414 { 00415 /* 00416 * Remove vfs ops 00417 */ 00418 ASSERT(zfsfstype); 00419 (void) vfs_freevfsops_by_type(zfsfstype); 00420 zfsfstype = 0; 00421 00422 /* 00423 * Remove vnode ops 00424 */ 00425 if (zfs_dvnodeops) 00426 vn_freevnodeops(zfs_dvnodeops); 00427 if (zfs_fvnodeops) 00428 vn_freevnodeops(zfs_fvnodeops); 00429 if (zfs_symvnodeops) 00430 vn_freevnodeops(zfs_symvnodeops); 00431 if (zfs_xdvnodeops) 00432 vn_freevnodeops(zfs_xdvnodeops); 00433 if (zfs_evnodeops) 00434 vn_freevnodeops(zfs_evnodeops); 00435 if (zfs_sharevnodeops) 00436 vn_freevnodeops(zfs_sharevnodeops); 00437 00438 zfs_dvnodeops = NULL; 00439 zfs_fvnodeops = NULL; 00440 zfs_symvnodeops = NULL; 00441 zfs_xdvnodeops = NULL; 00442 zfs_evnodeops = NULL; 00443 zfs_sharevnodeops = NULL; 00444 } 00445 00446 extern const fs_operation_def_t zfs_dvnodeops_template[]; 00447 extern const fs_operation_def_t zfs_fvnodeops_template[]; 00448 extern const fs_operation_def_t zfs_xdvnodeops_template[]; 00449 extern const fs_operation_def_t zfs_symvnodeops_template[]; 00450 extern const fs_operation_def_t zfs_evnodeops_template[]; 00451 extern const fs_operation_def_t zfs_sharevnodeops_template[]; 00452 00453 int 00454 zfs_create_op_tables() 00455 { 00456 int error; 00457 00458 /* 00459 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() 00460 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). 00461 * In this case we just return as the ops vectors are already set up. 00462 */ 00463 if (zfs_dvnodeops) 00464 return (0); 00465 00466 error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, 00467 &zfs_dvnodeops); 00468 if (error) 00469 return (error); 00470 00471 error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, 00472 &zfs_fvnodeops); 00473 if (error) 00474 return (error); 00475 00476 error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, 00477 &zfs_symvnodeops); 00478 if (error) 00479 return (error); 00480 00481 error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, 00482 &zfs_xdvnodeops); 00483 if (error) 00484 return (error); 00485 00486 error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, 00487 &zfs_evnodeops); 00488 if (error) 00489 return (error); 00490 00491 error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, 00492 &zfs_sharevnodeops); 00493 00494 return (error); 00495 } 00496 #endif /* sun */ 00497 00498 int 00499 zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) 00500 { 00501 zfs_acl_ids_t acl_ids; 00502 vattr_t vattr; 00503 znode_t *sharezp; 00504 vnode_t *vp, vnode; 00505 znode_t *zp; 00506 int error; 00507 00508 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 00509 vattr.va_type = VDIR; 00510 vattr.va_mode = S_IFDIR|0555; 00511 vattr.va_uid = crgetuid(kcred); 00512 vattr.va_gid = crgetgid(kcred); 00513 00514 sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); 00515 zfs_znode_cache_constructor(sharezp, zfsvfs->z_parent->z_vfs, 0); 00516 ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); 00517 sharezp->z_moved = 0; 00518 sharezp->z_unlinked = 0; 00519 sharezp->z_atime_dirty = 0; 00520 sharezp->z_zfsvfs = zfsvfs; 00521 sharezp->z_is_sa = zfsvfs->z_use_sa; 00522 00523 sharezp->z_vnode = &vnode; 00524 vnode.v_data = sharezp; 00525 00526 vp = ZTOV(sharezp); 00527 vp->v_type = VDIR; 00528 00529 VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, 00530 kcred, NULL, &acl_ids)); 00531 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); 00532 ASSERT3P(zp, ==, sharezp); 00533 POINTER_INVALIDATE(&sharezp->z_zfsvfs); 00534 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 00535 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); 00536 zfsvfs->z_shares_dir = sharezp->z_id; 00537 00538 zfs_acl_ids_free(&acl_ids); 00539 ZTOV(sharezp)->v_data = NULL; 00540 ZTOV(sharezp)->v_count = 0; 00541 ZTOV(sharezp)->v_holdcnt = 0; 00542 zp->z_vnode = NULL; 00543 sa_handle_destroy(sharezp->z_sa_hdl); 00544 sharezp->z_vnode = NULL; 00545 kmem_cache_free(znode_cache, sharezp); 00546 00547 return (error); 00548 } 00549 00550 /* 00551 * define a couple of values we need available 00552 * for both 64 and 32 bit environments. 00553 */ 00554 #ifndef NBITSMINOR64 00555 #define NBITSMINOR64 32 00556 #endif 00557 #ifndef MAXMAJ64 00558 #define MAXMAJ64 0xffffffffUL 00559 #endif 00560 #ifndef MAXMIN64 00561 #define MAXMIN64 0xffffffffUL 00562 #endif 00563 00572 static uint64_t 00573 zfs_expldev(dev_t dev) 00574 { 00575 return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev)); 00576 } 00585 dev_t 00586 zfs_cmpldev(uint64_t dev) 00587 { 00588 return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64))); 00589 } 00590 00591 static void 00592 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, 00593 dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) 00594 { 00595 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); 00596 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); 00597 00598 mutex_enter(&zp->z_lock); 00599 00600 ASSERT(zp->z_sa_hdl == NULL); 00601 ASSERT(zp->z_acl_cached == NULL); 00602 if (sa_hdl == NULL) { 00603 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, 00604 SA_HDL_SHARED, &zp->z_sa_hdl)); 00605 } else { 00606 zp->z_sa_hdl = sa_hdl; 00607 sa_set_userp(sa_hdl, zp); 00608 } 00609 00610 zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; 00611 00612 /* 00613 * Slap on VROOT if we are the root znode 00614 */ 00615 if (zp->z_id == zfsvfs->z_root) 00616 ZTOV(zp)->v_flag |= VROOT; 00617 00618 mutex_exit(&zp->z_lock); 00619 vn_exists(ZTOV(zp)); 00620 } 00621 00622 void 00623 zfs_znode_dmu_fini(znode_t *zp) 00624 { 00625 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || 00626 zp->z_unlinked || 00627 RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); 00628 00629 sa_handle_destroy(zp->z_sa_hdl); 00630 zp->z_sa_hdl = NULL; 00631 } 00632 00633 static void 00634 zfs_vnode_forget(vnode_t *vp) 00635 { 00636 00637 /* copied from insmntque_stddtr */ 00638 vp->v_data = NULL; 00639 vp->v_op = &dead_vnodeops; 00640 vgone(vp); 00641 vput(vp); 00642 } 00643 00651 static znode_t * 00652 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, 00653 dmu_object_type_t obj_type, sa_handle_t *hdl) 00654 { 00655 znode_t *zp; 00656 vnode_t *vp; 00657 uint64_t mode; 00658 uint64_t parent; 00659 sa_bulk_attr_t bulk[9]; 00660 int count = 0; 00661 00662 zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 00663 zfs_znode_cache_constructor(zp, zfsvfs->z_parent->z_vfs, 0); 00664 00665 ASSERT(zp->z_dirlocks == NULL); 00666 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 00667 zp->z_moved = 0; 00668 00669 /* 00670 * Defer setting z_zfsvfs until the znode is ready to be a candidate for 00671 * the zfs_znode_move() callback. 00672 */ 00673 zp->z_sa_hdl = NULL; 00674 zp->z_unlinked = 0; 00675 zp->z_atime_dirty = 0; 00676 zp->z_mapcnt = 0; 00677 zp->z_id = db->db_object; 00678 zp->z_blksz = blksz; 00679 zp->z_seq = 0x7A4653; 00680 zp->z_sync_cnt = 0; 00681 00682 vp = ZTOV(zp); 00683 00684 zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); 00685 00686 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); 00687 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); 00688 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 00689 &zp->z_size, 8); 00690 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 00691 &zp->z_links, 8); 00692 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 00693 &zp->z_pflags, 8); 00694 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); 00695 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 00696 &zp->z_atime, 16); 00697 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 00698 &zp->z_uid, 8); 00699 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 00700 &zp->z_gid, 8); 00701 00702 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) { 00703 if (hdl == NULL) 00704 sa_handle_destroy(zp->z_sa_hdl); 00705 zfs_vnode_forget(vp); 00706 zp->z_vnode = NULL; 00707 kmem_cache_free(znode_cache, zp); 00708 return (NULL); 00709 } 00710 00711 zp->z_mode = mode; 00712 00713 vp->v_type = IFTOVT((mode_t)mode); 00714 00715 switch (vp->v_type) { 00716 case VDIR: 00717 zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 00718 break; 00719 #ifdef sun 00720 case VBLK: 00721 case VCHR: 00722 { 00723 uint64_t rdev; 00724 VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), 00725 &rdev, sizeof (rdev)) == 0); 00726 00727 vp->v_rdev = zfs_cmpldev(rdev); 00728 } 00729 break; 00730 #endif /* sun */ 00731 case VFIFO: 00732 #ifdef sun 00733 case VSOCK: 00734 case VDOOR: 00735 #endif /* sun */ 00736 vp->v_op = &zfs_fifoops; 00737 break; 00738 case VREG: 00739 if (parent == zfsvfs->z_shares_dir) { 00740 ASSERT(zp->z_uid == 0 && zp->z_gid == 0); 00741 vp->v_op = &zfs_shareops; 00742 } 00743 break; 00744 #ifdef sun 00745 case VLNK: 00746 vn_setops(vp, zfs_symvnodeops); 00747 break; 00748 default: 00749 vn_setops(vp, zfs_evnodeops); 00750 break; 00751 #endif /* sun */ 00752 } 00753 if (vp->v_type != VFIFO) 00754 VN_LOCK_ASHARE(vp); 00755 00756 mutex_enter(&zfsvfs->z_znodes_lock); 00757 list_insert_tail(&zfsvfs->z_all_znodes, zp); 00758 membar_producer(); 00759 /* 00760 * Everything else must be valid before assigning z_zfsvfs makes the 00761 * znode eligible for zfs_znode_move(). 00762 */ 00763 zp->z_zfsvfs = zfsvfs; 00764 mutex_exit(&zfsvfs->z_znodes_lock); 00765 00766 VFS_HOLD(zfsvfs->z_vfs); 00767 return (zp); 00768 } 00769 00770 static uint64_t empty_xattr; 00771 static uint64_t pad[4]; 00772 static zfs_acl_phys_t acl_phys; 00785 void 00786 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, 00787 uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) 00788 { 00789 uint64_t crtime[2], atime[2], mtime[2], ctime[2]; 00790 uint64_t mode, size, links, parent, pflags; 00791 uint64_t dzp_pflags = 0; 00792 uint64_t rdev = 0; 00793 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 00794 dmu_buf_t *db; 00795 timestruc_t now; 00796 uint64_t gen, obj; 00797 int err; 00798 int bonuslen; /* Length of bonus buffer */ 00799 sa_handle_t *sa_hdl; 00800 dmu_object_type_t obj_type; 00801 sa_bulk_attr_t sa_attrs[ZPL_END]; 00802 int cnt = 0; 00803 zfs_acl_locator_cb_t locate = { 0 }; 00804 00805 ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 00806 00807 if (zfsvfs->z_replay) { 00808 obj = vap->va_nodeid; 00809 now = vap->va_ctime; /* see zfs_replay_create() */ 00810 gen = vap->va_nblocks; /* ditto */ 00811 } else { 00812 obj = 0; 00813 gethrestime(&now); 00814 gen = dmu_tx_get_txg(tx); 00815 } 00816 00817 obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; 00818 bonuslen = (obj_type == DMU_OT_SA) ? 00819 DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE; 00820 00821 /* 00822 * Create a new DMU object. 00823 */ 00824 /* 00825 * There's currently no mechanism for pre-reading the blocks that will 00826 * be needed to allocate a new object, so we accept the small chance 00827 * that there will be an i/o error and we will fail one of the 00828 * assertions below. 00829 */ 00830 if (vap->va_type == VDIR) { 00831 if (zfsvfs->z_replay) { 00832 err = zap_create_claim_norm(zfsvfs->z_os, obj, 00833 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 00834 obj_type, bonuslen, tx); 00835 ASSERT0(err); 00836 } else { 00837 obj = zap_create_norm(zfsvfs->z_os, 00838 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 00839 obj_type, bonuslen, tx); 00840 } 00841 } else { 00842 if (zfsvfs->z_replay) { 00843 err = dmu_object_claim(zfsvfs->z_os, obj, 00844 DMU_OT_PLAIN_FILE_CONTENTS, 0, 00845 obj_type, bonuslen, tx); 00846 ASSERT0(err); 00847 } else { 00848 obj = dmu_object_alloc(zfsvfs->z_os, 00849 DMU_OT_PLAIN_FILE_CONTENTS, 0, 00850 obj_type, bonuslen, tx); 00851 } 00852 } 00853 00854 getnewvnode_reserve(1); 00855 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 00856 VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); 00857 00858 /* 00859 * If this is the root, fix up the half-initialized parent pointer 00860 * to reference the just-allocated physical data area. 00861 */ 00862 if (flag & IS_ROOT_NODE) { 00863 dzp->z_id = obj; 00864 } else { 00865 dzp_pflags = dzp->z_pflags; 00866 } 00867 00868 /* 00869 * If parent is an xattr, so am I. 00870 */ 00871 if (dzp_pflags & ZFS_XATTR) { 00872 flag |= IS_XATTR; 00873 } 00874 00875 if (zfsvfs->z_use_fuids) 00876 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; 00877 else 00878 pflags = 0; 00879 00880 if (vap->va_type == VDIR) { 00881 size = 2; /* contents ("." and "..") */ 00882 links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 00883 } else { 00884 size = links = 0; 00885 } 00886 00887 if (vap->va_type == VBLK || vap->va_type == VCHR) { 00888 rdev = zfs_expldev(vap->va_rdev); 00889 } 00890 00891 parent = dzp->z_id; 00892 mode = acl_ids->z_mode; 00893 if (flag & IS_XATTR) 00894 pflags |= ZFS_XATTR; 00895 00896 /* 00897 * No execs denied will be deterimed when zfs_mode_compute() is called. 00898 */ 00899 pflags |= acl_ids->z_aclp->z_hints & 00900 (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| 00901 ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); 00902 00903 ZFS_TIME_ENCODE(&now, crtime); 00904 ZFS_TIME_ENCODE(&now, ctime); 00905 00906 if (vap->va_mask & AT_ATIME) { 00907 ZFS_TIME_ENCODE(&vap->va_atime, atime); 00908 } else { 00909 ZFS_TIME_ENCODE(&now, atime); 00910 } 00911 00912 if (vap->va_mask & AT_MTIME) { 00913 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 00914 } else { 00915 ZFS_TIME_ENCODE(&now, mtime); 00916 } 00917 00918 /* Now add in all of the "SA" attributes */ 00919 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, 00920 &sa_hdl)); 00921 00922 /* 00923 * Setup the array of attributes to be replaced/set on the new file 00924 * 00925 * order for DMU_OT_ZNODE is critical since it needs to be constructed 00926 * in the old znode_phys_t format. Don't change this ordering 00927 */ 00928 00929 if (obj_type == DMU_OT_ZNODE) { 00930 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 00931 NULL, &atime, 16); 00932 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 00933 NULL, &mtime, 16); 00934 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 00935 NULL, &ctime, 16); 00936 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 00937 NULL, &crtime, 16); 00938 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 00939 NULL, &gen, 8); 00940 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 00941 NULL, &mode, 8); 00942 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 00943 NULL, &size, 8); 00944 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 00945 NULL, &parent, 8); 00946 } else { 00947 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 00948 NULL, &mode, 8); 00949 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 00950 NULL, &size, 8); 00951 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 00952 NULL, &gen, 8); 00953 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, 00954 &acl_ids->z_fuid, 8); 00955 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, 00956 &acl_ids->z_fgid, 8); 00957 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 00958 NULL, &parent, 8); 00959 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 00960 NULL, &pflags, 8); 00961 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 00962 NULL, &atime, 16); 00963 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 00964 NULL, &mtime, 16); 00965 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 00966 NULL, &ctime, 16); 00967 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 00968 NULL, &crtime, 16); 00969 } 00970 00971 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); 00972 00973 if (obj_type == DMU_OT_ZNODE) { 00974 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, 00975 &empty_xattr, 8); 00976 } 00977 if (obj_type == DMU_OT_ZNODE || 00978 (vap->va_type == VBLK || vap->va_type == VCHR)) { 00979 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), 00980 NULL, &rdev, 8); 00981 00982 } 00983 if (obj_type == DMU_OT_ZNODE) { 00984 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 00985 NULL, &pflags, 8); 00986 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, 00987 &acl_ids->z_fuid, 8); 00988 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, 00989 &acl_ids->z_fgid, 8); 00990 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, 00991 sizeof (uint64_t) * 4); 00992 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, 00993 &acl_phys, sizeof (zfs_acl_phys_t)); 00994 } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { 00995 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, 00996 &acl_ids->z_aclp->z_acl_count, 8); 00997 locate.cb_aclp = acl_ids->z_aclp; 00998 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), 00999 zfs_acl_data_locator, &locate, 01000 acl_ids->z_aclp->z_acl_bytes); 01001 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, 01002 acl_ids->z_fuid, acl_ids->z_fgid); 01003 } 01004 01005 VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); 01006 01007 if (!(flag & IS_ROOT_NODE)) { 01008 *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); 01009 ASSERT(*zpp != NULL); 01010 } else { 01011 /* 01012 * If we are creating the root node, the "parent" we 01013 * passed in is the znode for the root. 01014 */ 01015 *zpp = dzp; 01016 01017 (*zpp)->z_sa_hdl = sa_hdl; 01018 } 01019 01020 (*zpp)->z_pflags = pflags; 01021 (*zpp)->z_mode = mode; 01022 01023 if (vap->va_mask & AT_XVATTR) 01024 zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); 01025 01026 if (obj_type == DMU_OT_ZNODE || 01027 acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { 01028 err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx); 01029 ASSERT0(err); 01030 } 01031 if (!(flag & IS_ROOT_NODE)) { 01032 vnode_t *vp; 01033 01034 vp = ZTOV(*zpp); 01035 vp->v_vflag |= VV_FORCEINSMQ; 01036 err = insmntque(vp, zfsvfs->z_vfs); 01037 vp->v_vflag &= ~VV_FORCEINSMQ; 01038 KASSERT(err == 0, ("insmntque() failed: error %d", err)); 01039 } 01040 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 01041 getnewvnode_drop_reserve(); 01042 } 01043 01050 void 01051 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) 01052 { 01053 xoptattr_t *xoap; 01054 01055 xoap = xva_getxoptattr(xvap); 01056 ASSERT(xoap); 01057 01058 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 01059 uint64_t times[2]; 01060 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); 01061 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), 01062 ×, sizeof (times), tx); 01063 XVA_SET_RTN(xvap, XAT_CREATETIME); 01064 } 01065 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 01066 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, 01067 zp->z_pflags, tx); 01068 XVA_SET_RTN(xvap, XAT_READONLY); 01069 } 01070 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 01071 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, 01072 zp->z_pflags, tx); 01073 XVA_SET_RTN(xvap, XAT_HIDDEN); 01074 } 01075 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 01076 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, 01077 zp->z_pflags, tx); 01078 XVA_SET_RTN(xvap, XAT_SYSTEM); 01079 } 01080 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 01081 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, 01082 zp->z_pflags, tx); 01083 XVA_SET_RTN(xvap, XAT_ARCHIVE); 01084 } 01085 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 01086 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, 01087 zp->z_pflags, tx); 01088 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 01089 } 01090 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 01091 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, 01092 zp->z_pflags, tx); 01093 XVA_SET_RTN(xvap, XAT_NOUNLINK); 01094 } 01095 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 01096 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, 01097 zp->z_pflags, tx); 01098 XVA_SET_RTN(xvap, XAT_APPENDONLY); 01099 } 01100 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 01101 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, 01102 zp->z_pflags, tx); 01103 XVA_SET_RTN(xvap, XAT_NODUMP); 01104 } 01105 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 01106 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, 01107 zp->z_pflags, tx); 01108 XVA_SET_RTN(xvap, XAT_OPAQUE); 01109 } 01110 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 01111 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, 01112 xoap->xoa_av_quarantined, zp->z_pflags, tx); 01113 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 01114 } 01115 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 01116 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, 01117 zp->z_pflags, tx); 01118 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 01119 } 01120 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 01121 zfs_sa_set_scanstamp(zp, xvap, tx); 01122 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 01123 } 01124 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 01125 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, 01126 zp->z_pflags, tx); 01127 XVA_SET_RTN(xvap, XAT_REPARSE); 01128 } 01129 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 01130 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, 01131 zp->z_pflags, tx); 01132 XVA_SET_RTN(xvap, XAT_OFFLINE); 01133 } 01134 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 01135 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, 01136 zp->z_pflags, tx); 01137 XVA_SET_RTN(xvap, XAT_SPARSE); 01138 } 01139 } 01140 01141 int 01142 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 01143 { 01144 dmu_object_info_t doi; 01145 dmu_buf_t *db; 01146 znode_t *zp; 01147 int err; 01148 sa_handle_t *hdl; 01149 int first = 1; 01150 01151 *zpp = NULL; 01152 01153 getnewvnode_reserve(1); 01154 again: 01155 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 01156 01157 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 01158 if (err) { 01159 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01160 getnewvnode_drop_reserve(); 01161 return (err); 01162 } 01163 01164 dmu_object_info_from_db(db, &doi); 01165 if (doi.doi_bonus_type != DMU_OT_SA && 01166 (doi.doi_bonus_type != DMU_OT_ZNODE || 01167 (doi.doi_bonus_type == DMU_OT_ZNODE && 01168 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 01169 sa_buf_rele(db, NULL); 01170 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01171 getnewvnode_drop_reserve(); 01172 return (EINVAL); 01173 } 01174 01175 hdl = dmu_buf_get_user(db); 01176 if (hdl != NULL) { 01177 zp = sa_get_userdata(hdl); 01178 01179 01180 /* 01181 * Since "SA" does immediate eviction we 01182 * should never find a sa handle that doesn't 01183 * know about the znode. 01184 */ 01185 01186 ASSERT3P(zp, !=, NULL); 01187 01188 mutex_enter(&zp->z_lock); 01189 ASSERT3U(zp->z_id, ==, obj_num); 01190 if (zp->z_unlinked) { 01191 err = ENOENT; 01192 } else { 01193 vnode_t *vp; 01194 int dying = 0; 01195 01196 vp = ZTOV(zp); 01197 if (vp == NULL) 01198 dying = 1; 01199 else { 01200 VN_HOLD(vp); 01201 if ((vp->v_iflag & VI_DOOMED) != 0) { 01202 dying = 1; 01203 /* 01204 * Don't VN_RELE() vnode here, because 01205 * it can call vn_lock() which creates 01206 * LOR between vnode lock and znode 01207 * lock. We will VN_RELE() the vnode 01208 * after droping znode lock. 01209 */ 01210 } 01211 } 01212 if (dying) { 01213 if (first) { 01214 ZFS_LOG(1, "dying znode detected (zp=%p)", zp); 01215 first = 0; 01216 } 01217 /* 01218 * znode is dying so we can't reuse it, we must 01219 * wait until destruction is completed. 01220 */ 01221 sa_buf_rele(db, NULL); 01222 mutex_exit(&zp->z_lock); 01223 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01224 if (vp != NULL) 01225 VN_RELE(vp); 01226 tsleep(zp, 0, "zcollide", 1); 01227 goto again; 01228 } 01229 *zpp = zp; 01230 err = 0; 01231 } 01232 sa_buf_rele(db, NULL); 01233 mutex_exit(&zp->z_lock); 01234 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01235 getnewvnode_drop_reserve(); 01236 return (err); 01237 } 01238 01239 /* 01240 * Not found create new znode/vnode 01241 * but only if file exists. 01242 * 01243 * There is a small window where zfs_vget() could 01244 * find this object while a file create is still in 01245 * progress. This is checked for in zfs_znode_alloc() 01246 * 01247 * if zfs_znode_alloc() fails it will drop the hold on the 01248 * bonus buffer. 01249 */ 01250 zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, 01251 doi.doi_bonus_type, NULL); 01252 if (zp == NULL) { 01253 err = ENOENT; 01254 } else { 01255 *zpp = zp; 01256 } 01257 if (err == 0) { 01258 vnode_t *vp = ZTOV(zp); 01259 01260 err = insmntque(vp, zfsvfs->z_vfs); 01261 if (err == 0) 01262 VOP_UNLOCK(vp, 0); 01263 else { 01264 zp->z_vnode = NULL; 01265 zfs_znode_dmu_fini(zp); 01266 zfs_znode_free(zp); 01267 *zpp = NULL; 01268 } 01269 } 01270 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01271 getnewvnode_drop_reserve(); 01272 return (err); 01273 } 01274 01275 int 01276 zfs_rezget(znode_t *zp) 01277 { 01278 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01279 dmu_object_info_t doi; 01280 dmu_buf_t *db; 01281 vnode_t *vp; 01282 uint64_t obj_num = zp->z_id; 01283 uint64_t mode, size; 01284 sa_bulk_attr_t bulk[8]; 01285 int err; 01286 int count = 0; 01287 uint64_t gen; 01288 01289 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 01290 01291 mutex_enter(&zp->z_acl_lock); 01292 if (zp->z_acl_cached) { 01293 zfs_acl_free(zp->z_acl_cached); 01294 zp->z_acl_cached = NULL; 01295 } 01296 01297 mutex_exit(&zp->z_acl_lock); 01298 ASSERT(zp->z_sa_hdl == NULL); 01299 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 01300 if (err) { 01301 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01302 return (err); 01303 } 01304 01305 dmu_object_info_from_db(db, &doi); 01306 if (doi.doi_bonus_type != DMU_OT_SA && 01307 (doi.doi_bonus_type != DMU_OT_ZNODE || 01308 (doi.doi_bonus_type == DMU_OT_ZNODE && 01309 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 01310 sa_buf_rele(db, NULL); 01311 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01312 return (EINVAL); 01313 } 01314 01315 zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); 01316 size = zp->z_size; 01317 01318 /* reload cached values */ 01319 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, 01320 &gen, sizeof (gen)); 01321 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 01322 &zp->z_size, sizeof (zp->z_size)); 01323 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 01324 &zp->z_links, sizeof (zp->z_links)); 01325 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 01326 &zp->z_pflags, sizeof (zp->z_pflags)); 01327 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 01328 &zp->z_atime, sizeof (zp->z_atime)); 01329 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 01330 &zp->z_uid, sizeof (zp->z_uid)); 01331 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 01332 &zp->z_gid, sizeof (zp->z_gid)); 01333 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 01334 &mode, sizeof (mode)); 01335 01336 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { 01337 zfs_znode_dmu_fini(zp); 01338 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01339 return (EIO); 01340 } 01341 01342 zp->z_mode = mode; 01343 01344 if (gen != zp->z_gen) { 01345 zfs_znode_dmu_fini(zp); 01346 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01347 return (EIO); 01348 } 01349 01350 /* 01351 * XXXPJD: Not sure how is that possible, but under heavy 01352 * zfs recv -F load it happens that z_gen is the same, but 01353 * vnode type is different than znode type. This would mean 01354 * that for example regular file was replaced with directory 01355 * which has the same object number. 01356 */ 01357 vp = ZTOV(zp); 01358 if (vp != NULL && 01359 vp->v_type != IFTOVT((mode_t)zp->z_mode)) { 01360 zfs_znode_dmu_fini(zp); 01361 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01362 return (EIO); 01363 } 01364 01365 zp->z_unlinked = (zp->z_links == 0); 01366 zp->z_blksz = doi.doi_data_block_size; 01367 if (vp != NULL) { 01368 vn_pages_remove(vp, 0, 0); 01369 if (zp->z_size != size) 01370 vnode_pager_setsize(vp, zp->z_size); 01371 } 01372 01373 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 01374 01375 return (0); 01376 } 01377 01378 void 01379 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 01380 { 01381 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01382 objset_t *os = zfsvfs->z_os; 01383 uint64_t obj = zp->z_id; 01384 uint64_t acl_obj = zfs_external_acl(zp); 01385 01386 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 01387 if (acl_obj) { 01388 VERIFY(!zp->z_is_sa); 01389 VERIFY(0 == dmu_object_free(os, acl_obj, tx)); 01390 } 01391 VERIFY(0 == dmu_object_free(os, obj, tx)); 01392 zfs_znode_dmu_fini(zp); 01393 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 01394 zfs_znode_free(zp); 01395 } 01396 01397 void 01398 zfs_zinactive(znode_t *zp) 01399 { 01400 vnode_t *vp = ZTOV(zp); 01401 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01402 uint64_t z_id = zp->z_id; 01403 01404 ASSERT(zp->z_sa_hdl); 01405 01406 /* 01407 * Don't allow a zfs_zget() while were trying to release this znode 01408 */ 01409 ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 01410 01411 mutex_enter(&zp->z_lock); 01412 VI_LOCK(vp); 01413 if (vp->v_count > 0) { 01414 /* 01415 * If the hold count is greater than zero, somebody has 01416 * obtained a new reference on this znode while we were 01417 * processing it here, so we are done. 01418 */ 01419 VI_UNLOCK(vp); 01420 mutex_exit(&zp->z_lock); 01421 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 01422 return; 01423 } 01424 VI_UNLOCK(vp); 01425 01426 /* 01427 * If this was the last reference to a file with no links, 01428 * remove the file from the file system. 01429 */ 01430 if (zp->z_unlinked) { 01431 mutex_exit(&zp->z_lock); 01432 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 01433 ASSERT(vp->v_count == 0); 01434 vrecycle(vp); 01435 zfs_rmnode(zp); 01436 return; 01437 } 01438 01439 mutex_exit(&zp->z_lock); 01440 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 01441 } 01442 01443 void 01444 zfs_znode_free(znode_t *zp) 01445 { 01446 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01447 01448 ASSERT(ZTOV(zp) == NULL); 01449 ASSERT(zp->z_sa_hdl == NULL); 01450 mutex_enter(&zfsvfs->z_znodes_lock); 01451 POINTER_INVALIDATE(&zp->z_zfsvfs); 01452 list_remove(&zfsvfs->z_all_znodes, zp); 01453 mutex_exit(&zfsvfs->z_znodes_lock); 01454 01455 if (zp->z_acl_cached) { 01456 zfs_acl_free(zp->z_acl_cached); 01457 zp->z_acl_cached = NULL; 01458 } 01459 01460 kmem_cache_free(znode_cache, zp); 01461 01462 VFS_RELE(zfsvfs->z_vfs); 01463 } 01464 01465 void 01466 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], 01467 uint64_t ctime[2], boolean_t have_tx) 01468 { 01469 timestruc_t now; 01470 01471 gethrestime(&now); 01472 01473 if (have_tx) { /* will sa_bulk_update happen really soon? */ 01474 zp->z_atime_dirty = 0; 01475 zp->z_seq++; 01476 } else { 01477 zp->z_atime_dirty = 1; 01478 } 01479 01480 if (flag & AT_ATIME) { 01481 ZFS_TIME_ENCODE(&now, zp->z_atime); 01482 } 01483 01484 if (flag & AT_MTIME) { 01485 ZFS_TIME_ENCODE(&now, mtime); 01486 if (zp->z_zfsvfs->z_use_fuids) { 01487 zp->z_pflags |= (ZFS_ARCHIVE | 01488 ZFS_AV_MODIFIED); 01489 } 01490 } 01491 01492 if (flag & AT_CTIME) { 01493 ZFS_TIME_ENCODE(&now, ctime); 01494 if (zp->z_zfsvfs->z_use_fuids) 01495 zp->z_pflags |= ZFS_ARCHIVE; 01496 } 01497 } 01498 01508 void 01509 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 01510 { 01511 int error; 01512 u_longlong_t dummy; 01513 01514 if (size <= zp->z_blksz) 01515 return; 01516 /* 01517 * If the file size is already greater than the current blocksize, 01518 * we will not grow. If there is more than one block in a file, 01519 * the blocksize cannot change. 01520 */ 01521 if (zp->z_blksz && zp->z_size > zp->z_blksz) 01522 return; 01523 01524 error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 01525 size, 0, tx); 01526 01527 if (error == ENOTSUP) 01528 return; 01529 ASSERT0(error); 01530 01531 /* What blocksize did we actually get? */ 01532 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); 01533 } 01534 01535 #ifdef sun 01536 01541 /* ARGSUSED */ 01542 static int 01543 zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, 01544 int flags, cred_t *cr) 01545 { 01546 ASSERT(0); 01547 return (0); 01548 } 01549 #endif /* sun */ 01550 01559 static int 01560 zfs_extend(znode_t *zp, uint64_t end) 01561 { 01562 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01563 dmu_tx_t *tx; 01564 rl_t *rl; 01565 uint64_t newblksz; 01566 int error; 01567 01568 /* 01569 * We will change zp_size, lock the whole file. 01570 */ 01571 rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 01572 01573 /* 01574 * Nothing to do if file already at desired length. 01575 */ 01576 if (end <= zp->z_size) { 01577 zfs_range_unlock(rl); 01578 return (0); 01579 } 01580 top: 01581 tx = dmu_tx_create(zfsvfs->z_os); 01582 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 01583 zfs_sa_upgrade_txholds(tx, zp); 01584 if (end > zp->z_blksz && 01585 (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 01586 /* 01587 * We are growing the file past the current block size. 01588 */ 01589 if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 01590 ASSERT(!ISP2(zp->z_blksz)); 01591 newblksz = MIN(end, SPA_MAXBLOCKSIZE); 01592 } else { 01593 newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 01594 } 01595 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); 01596 } else { 01597 newblksz = 0; 01598 } 01599 01600 error = dmu_tx_assign(tx, TXG_NOWAIT); 01601 if (error) { 01602 if (error == ERESTART) { 01603 dmu_tx_wait(tx); 01604 dmu_tx_abort(tx); 01605 goto top; 01606 } 01607 dmu_tx_abort(tx); 01608 zfs_range_unlock(rl); 01609 return (error); 01610 } 01611 01612 if (newblksz) 01613 zfs_grow_blocksize(zp, newblksz, tx); 01614 01615 zp->z_size = end; 01616 01617 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), 01618 &zp->z_size, sizeof (zp->z_size), tx)); 01619 01620 vnode_pager_setsize(ZTOV(zp), end); 01621 01622 zfs_range_unlock(rl); 01623 01624 dmu_tx_commit(tx); 01625 01626 return (0); 01627 } 01628 01638 static int 01639 zfs_free_range( znode_t *zp, uint64_t off, uint64_t len) 01640 { 01641 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01642 rl_t *rl; 01643 int error; 01644 01645 /* 01646 * Lock the range being freed. 01647 */ 01648 rl = zfs_range_lock(zp, off, len, RL_WRITER); 01649 01650 /* 01651 * Nothing to do if file already at desired length. 01652 */ 01653 if (off >= zp->z_size) { 01654 zfs_range_unlock(rl); 01655 return (0); 01656 } 01657 01658 if (off + len > zp->z_size) 01659 len = zp->z_size - off; 01660 01661 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); 01662 01663 if (error == 0) { 01664 /* 01665 * In FreeBSD we cannot free block in the middle of a file, 01666 * but only at the end of a file, so this code path should 01667 * never happen. 01668 */ 01669 vnode_pager_setsize(ZTOV(zp), off); 01670 } 01671 01672 zfs_range_unlock(rl); 01673 01674 return (error); 01675 } 01676 01685 static int 01686 zfs_trunc(znode_t *zp, uint64_t end) 01687 { 01688 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01689 vnode_t *vp = ZTOV(zp); 01690 dmu_tx_t *tx; 01691 rl_t *rl; 01692 int error; 01693 sa_bulk_attr_t bulk[2]; 01694 int count = 0; 01695 01696 /* 01697 * We will change zp_size, lock the whole file. 01698 */ 01699 rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 01700 01701 /* 01702 * Nothing to do if file already at desired length. 01703 */ 01704 if (end >= zp->z_size) { 01705 zfs_range_unlock(rl); 01706 return (0); 01707 } 01708 01709 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); 01710 if (error) { 01711 zfs_range_unlock(rl); 01712 return (error); 01713 } 01714 top: 01715 tx = dmu_tx_create(zfsvfs->z_os); 01716 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 01717 zfs_sa_upgrade_txholds(tx, zp); 01718 error = dmu_tx_assign(tx, TXG_NOWAIT); 01719 if (error) { 01720 if (error == ERESTART) { 01721 dmu_tx_wait(tx); 01722 dmu_tx_abort(tx); 01723 goto top; 01724 } 01725 dmu_tx_abort(tx); 01726 zfs_range_unlock(rl); 01727 return (error); 01728 } 01729 01730 zp->z_size = end; 01731 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 01732 NULL, &zp->z_size, sizeof (zp->z_size)); 01733 01734 if (end == 0) { 01735 zp->z_pflags &= ~ZFS_SPARSE; 01736 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 01737 NULL, &zp->z_pflags, 8); 01738 } 01739 VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); 01740 01741 dmu_tx_commit(tx); 01742 01743 /* 01744 * Clear any mapped pages in the truncated region. This has to 01745 * happen outside of the transaction to avoid the possibility of 01746 * a deadlock with someone trying to push a page that we are 01747 * about to invalidate. 01748 */ 01749 vnode_pager_setsize(vp, end); 01750 01751 zfs_range_unlock(rl); 01752 01753 return (0); 01754 } 01755 01767 int 01768 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 01769 { 01770 vnode_t *vp = ZTOV(zp); 01771 dmu_tx_t *tx; 01772 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01773 zilog_t *zilog = zfsvfs->z_log; 01774 uint64_t mode; 01775 uint64_t mtime[2], ctime[2]; 01776 sa_bulk_attr_t bulk[3]; 01777 int count = 0; 01778 int error; 01779 01780 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, 01781 sizeof (mode))) != 0) 01782 return (error); 01783 01784 if (off > zp->z_size) { 01785 error = zfs_extend(zp, off+len); 01786 if (error == 0 && log) 01787 goto log; 01788 else 01789 return (error); 01790 } 01791 01792 /* 01793 * Check for any locks in the region to be freed. 01794 */ 01795 01796 if (MANDLOCK(vp, (mode_t)mode)) { 01797 uint64_t length = (len ? len : zp->z_size - off); 01798 if (error = chklock(vp, FWRITE, off, length, flag, NULL)) 01799 return (error); 01800 } 01801 01802 if (len == 0) { 01803 error = zfs_trunc(zp, off); 01804 } else { 01805 if ((error = zfs_free_range(zp, off, len)) == 0 && 01806 off + len > zp->z_size) 01807 error = zfs_extend(zp, off+len); 01808 } 01809 if (error || !log) 01810 return (error); 01811 log: 01812 tx = dmu_tx_create(zfsvfs->z_os); 01813 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 01814 zfs_sa_upgrade_txholds(tx, zp); 01815 error = dmu_tx_assign(tx, TXG_NOWAIT); 01816 if (error) { 01817 if (error == ERESTART) { 01818 dmu_tx_wait(tx); 01819 dmu_tx_abort(tx); 01820 goto log; 01821 } 01822 dmu_tx_abort(tx); 01823 return (error); 01824 } 01825 01826 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); 01827 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); 01828 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 01829 NULL, &zp->z_pflags, 8); 01830 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 01831 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 01832 ASSERT(error == 0); 01833 01834 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 01835 01836 dmu_tx_commit(tx); 01837 return (0); 01838 } 01839 01840 void 01841 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) 01842 { 01843 zfsvfs_t zfsvfs; 01844 uint64_t moid, obj, sa_obj, version; 01845 uint64_t sense = ZFS_CASE_SENSITIVE; 01846 uint64_t norm = 0; 01847 nvpair_t *elem; 01848 int error; 01849 int i; 01850 znode_t *rootzp = NULL; 01851 vnode_t vnode; 01852 vattr_t vattr; 01853 znode_t *zp; 01854 zfs_acl_ids_t acl_ids; 01855 01856 /* 01857 * First attempt to create master node. 01858 */ 01859 /* 01860 * In an empty objset, there are no blocks to read and thus 01861 * there can be no i/o errors (which we assert below). 01862 */ 01863 moid = MASTER_NODE_OBJ; 01864 error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 01865 DMU_OT_NONE, 0, tx); 01866 ASSERT(error == 0); 01867 01868 /* 01869 * Set starting attributes. 01870 */ 01871 version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); 01872 elem = NULL; 01873 while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { 01874 /* For the moment we expect all zpl props to be uint64_ts */ 01875 uint64_t val; 01876 char *name; 01877 01878 ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); 01879 VERIFY(nvpair_value_uint64(elem, &val) == 0); 01880 name = nvpair_name(elem); 01881 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { 01882 if (val < version) 01883 version = val; 01884 } else { 01885 error = zap_update(os, moid, name, 8, 1, &val, tx); 01886 } 01887 ASSERT(error == 0); 01888 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) 01889 norm = val; 01890 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) 01891 sense = val; 01892 } 01893 ASSERT(version != 0); 01894 error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); 01895 01896 /* 01897 * Create zap object used for SA attribute registration 01898 */ 01899 01900 if (version >= ZPL_VERSION_SA) { 01901 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 01902 DMU_OT_NONE, 0, tx); 01903 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 01904 ASSERT(error == 0); 01905 } else { 01906 sa_obj = 0; 01907 } 01908 /* 01909 * Create a delete queue. 01910 */ 01911 obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 01912 01913 error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); 01914 ASSERT(error == 0); 01915 01916 /* 01917 * Create root znode. Create minimal znode/vnode/zfsvfs 01918 * to allow zfs_mknode to work. 01919 */ 01920 VATTR_NULL(&vattr); 01921 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 01922 vattr.va_type = VDIR; 01923 vattr.va_mode = S_IFDIR|0755; 01924 vattr.va_uid = crgetuid(cr); 01925 vattr.va_gid = crgetgid(cr); 01926 01927 bzero(&zfsvfs, sizeof (zfsvfs_t)); 01928 01929 rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); 01930 zfs_znode_cache_constructor(rootzp, NULL, 0); 01931 ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); 01932 rootzp->z_moved = 0; 01933 rootzp->z_unlinked = 0; 01934 rootzp->z_atime_dirty = 0; 01935 rootzp->z_is_sa = USE_SA(version, os); 01936 01937 vnode.v_type = VDIR; 01938 vnode.v_data = rootzp; 01939 rootzp->z_vnode = &vnode; 01940 01941 zfsvfs.z_os = os; 01942 zfsvfs.z_parent = &zfsvfs; 01943 zfsvfs.z_version = version; 01944 zfsvfs.z_use_fuids = USE_FUIDS(version, os); 01945 zfsvfs.z_use_sa = USE_SA(version, os); 01946 zfsvfs.z_norm = norm; 01947 01948 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 01949 &zfsvfs.z_attr_table); 01950 01951 ASSERT(error == 0); 01952 01953 /* 01954 * Fold case on file systems that are always or sometimes case 01955 * insensitive. 01956 */ 01957 if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) 01958 zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER; 01959 01960 mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 01961 list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), 01962 offsetof(znode_t, z_link_node)); 01963 01964 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 01965 mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 01966 01967 rootzp->z_zfsvfs = &zfsvfs; 01968 VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, 01969 cr, NULL, &acl_ids)); 01970 zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); 01971 ASSERT3P(zp, ==, rootzp); 01972 error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); 01973 ASSERT(error == 0); 01974 zfs_acl_ids_free(&acl_ids); 01975 POINTER_INVALIDATE(&rootzp->z_zfsvfs); 01976 01977 sa_handle_destroy(rootzp->z_sa_hdl); 01978 rootzp->z_vnode = NULL; 01979 kmem_cache_free(znode_cache, rootzp); 01980 01981 /* 01982 * Create shares directory 01983 */ 01984 01985 error = zfs_create_share_dir(&zfsvfs, tx); 01986 01987 ASSERT(error == 0); 01988 01989 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 01990 mutex_destroy(&zfsvfs.z_hold_mtx[i]); 01991 } 01992 01993 #endif /* _KERNEL */ 01994 01995 static int 01996 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) 01997 { 01998 uint64_t sa_obj = 0; 01999 int error; 02000 02001 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); 02002 if (error != 0 && error != ENOENT) 02003 return (error); 02004 02005 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); 02006 return (error); 02007 } 02008 02009 static int 02010 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, 02011 dmu_buf_t **db, void *tag) 02012 { 02013 dmu_object_info_t doi; 02014 int error; 02015 02016 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) 02017 return (error); 02018 02019 dmu_object_info_from_db(*db, &doi); 02020 if ((doi.doi_bonus_type != DMU_OT_SA && 02021 doi.doi_bonus_type != DMU_OT_ZNODE) || 02022 doi.doi_bonus_type == DMU_OT_ZNODE && 02023 doi.doi_bonus_size < sizeof (znode_phys_t)) { 02024 sa_buf_rele(*db, tag); 02025 return (ENOTSUP); 02026 } 02027 02028 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); 02029 if (error != 0) { 02030 sa_buf_rele(*db, tag); 02031 return (error); 02032 } 02033 02034 return (0); 02035 } 02036 02037 void 02038 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) 02039 { 02040 sa_handle_destroy(hdl); 02041 sa_buf_rele(db, tag); 02042 } 02043 02048 static int 02049 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, 02050 uint64_t *pobjp, int *is_xattrdir) 02051 { 02052 uint64_t parent; 02053 uint64_t pflags; 02054 uint64_t mode; 02055 uint64_t parent_mode; 02056 sa_bulk_attr_t bulk[3]; 02057 sa_handle_t *sa_hdl; 02058 dmu_buf_t *sa_db; 02059 int count = 0; 02060 int error; 02061 02062 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, 02063 &parent, sizeof (parent)); 02064 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, 02065 &pflags, sizeof (pflags)); 02066 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 02067 &mode, sizeof (mode)); 02068 02069 if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) 02070 return (error); 02071 02072 /* 02073 * When a link is removed its parent pointer is not changed and will 02074 * be invalid. There are two cases where a link is removed but the 02075 * file stays around, when it goes to the delete queue and when there 02076 * are additional links. 02077 */ 02078 error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); 02079 if (error != 0) 02080 return (error); 02081 02082 error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); 02083 zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 02084 if (error != 0) 02085 return (error); 02086 02087 *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); 02088 02089 /* 02090 * Extended attributes can be applied to files, directories, etc. 02091 * Otherwise the parent must be a directory. 02092 */ 02093 if (!*is_xattrdir && !S_ISDIR(parent_mode)) 02094 return (EINVAL); 02095 02096 *pobjp = parent; 02097 02098 return (0); 02099 } 02100 02104 static int 02105 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, 02106 zfs_stat_t *sb) 02107 { 02108 sa_bulk_attr_t bulk[4]; 02109 int count = 0; 02110 02111 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 02112 &sb->zs_mode, sizeof (sb->zs_mode)); 02113 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, 02114 &sb->zs_gen, sizeof (sb->zs_gen)); 02115 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, 02116 &sb->zs_links, sizeof (sb->zs_links)); 02117 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, 02118 &sb->zs_ctime, sizeof (sb->zs_ctime)); 02119 02120 return (sa_bulk_lookup(hdl, bulk, count)); 02121 } 02122 02123 static int 02124 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, 02125 sa_attr_type_t *sa_table, char *buf, int len) 02126 { 02127 sa_handle_t *sa_hdl; 02128 sa_handle_t *prevhdl = NULL; 02129 dmu_buf_t *prevdb = NULL; 02130 dmu_buf_t *sa_db = NULL; 02131 char *path = buf + len - 1; 02132 int error; 02133 02134 *path = '\0'; 02135 sa_hdl = hdl; 02136 02137 for (;;) { 02138 uint64_t pobj; 02139 char component[MAXNAMELEN + 2]; 02140 size_t complen; 02141 int is_xattrdir; 02142 02143 if (prevdb) 02144 zfs_release_sa_handle(prevhdl, prevdb, FTAG); 02145 02146 if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, 02147 &is_xattrdir)) != 0) 02148 break; 02149 02150 if (pobj == obj) { 02151 if (path[0] != '/') 02152 *--path = '/'; 02153 break; 02154 } 02155 02156 component[0] = '/'; 02157 if (is_xattrdir) { 02158 (void) sprintf(component + 1, "<xattrdir>"); 02159 } else { 02160 error = zap_value_search(osp, pobj, obj, 02161 ZFS_DIRENT_OBJ(-1ULL), component + 1); 02162 if (error != 0) 02163 break; 02164 } 02165 02166 complen = strlen(component); 02167 path -= complen; 02168 ASSERT(path >= buf); 02169 bcopy(component, path, complen); 02170 obj = pobj; 02171 02172 if (sa_hdl != hdl) { 02173 prevhdl = sa_hdl; 02174 prevdb = sa_db; 02175 } 02176 error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); 02177 if (error != 0) { 02178 sa_hdl = prevhdl; 02179 sa_db = prevdb; 02180 break; 02181 } 02182 } 02183 02184 if (sa_hdl != NULL && sa_hdl != hdl) { 02185 ASSERT(sa_db != NULL); 02186 zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 02187 } 02188 02189 if (error == 0) 02190 (void) memmove(buf, path, buf + len - path); 02191 02192 return (error); 02193 } 02194 02195 int 02196 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) 02197 { 02198 sa_attr_type_t *sa_table; 02199 sa_handle_t *hdl; 02200 dmu_buf_t *db; 02201 int error; 02202 02203 error = zfs_sa_setup(osp, &sa_table); 02204 if (error != 0) 02205 return (error); 02206 02207 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 02208 if (error != 0) 02209 return (error); 02210 02211 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 02212 02213 zfs_release_sa_handle(hdl, db, FTAG); 02214 return (error); 02215 } 02216 02217 int 02218 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, 02219 char *buf, int len) 02220 { 02221 char *path = buf + len - 1; 02222 sa_attr_type_t *sa_table; 02223 sa_handle_t *hdl; 02224 dmu_buf_t *db; 02225 int error; 02226 02227 *path = '\0'; 02228 02229 error = zfs_sa_setup(osp, &sa_table); 02230 if (error != 0) 02231 return (error); 02232 02233 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 02234 if (error != 0) 02235 return (error); 02236 02237 error = zfs_obj_to_stats_impl(hdl, sa_table, sb); 02238 if (error != 0) { 02239 zfs_release_sa_handle(hdl, db, FTAG); 02240 return (error); 02241 } 02242 02243 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 02244 02245 zfs_release_sa_handle(hdl, db, FTAG); 02246 return (error); 02247 }