FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 00023 */ 00024 00025 #include <sys/types.h> 00026 #include <sys/param.h> 00027 #include <sys/time.h> 00028 #include <sys/systm.h> 00029 #include <sys/sysmacros.h> 00030 #include <sys/resource.h> 00031 #include <sys/vfs.h> 00032 #include <sys/vnode.h> 00033 #include <sys/file.h> 00034 #include <sys/kmem.h> 00035 #include <sys/uio.h> 00036 #include <sys/cmn_err.h> 00037 #include <sys/errno.h> 00038 #include <sys/stat.h> 00039 #include <sys/unistd.h> 00040 #include <sys/sunddi.h> 00041 #include <sys/random.h> 00042 #include <sys/policy.h> 00043 #include <sys/kcondvar.h> 00044 #include <sys/callb.h> 00045 #include <sys/smp.h> 00046 #include <sys/zfs_dir.h> 00047 #include <sys/zfs_acl.h> 00048 #include <sys/fs/zfs.h> 00049 #include <sys/zap.h> 00050 #include <sys/dmu.h> 00051 #include <sys/atomic.h> 00052 #include <sys/zfs_ctldir.h> 00053 #include <sys/zfs_fuid.h> 00054 #include <sys/sa.h> 00055 #include <sys/zfs_sa.h> 00056 #include <sys/dnlc.h> 00057 #include <sys/extdirent.h> 00058 00063 static int 00064 zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact, 00065 boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid) 00066 { 00067 int error; 00068 00069 if (zfsvfs->z_norm) { 00070 matchtype_t mt = MT_FIRST; 00071 boolean_t conflict = B_FALSE; 00072 size_t bufsz = 0; 00073 char *buf = NULL; 00074 00075 if (rpnp) { 00076 buf = rpnp->pn_buf; 00077 bufsz = rpnp->pn_bufsize; 00078 } 00079 if (exact) 00080 mt = MT_EXACT; 00081 /* 00082 * In the non-mixed case we only expect there would ever 00083 * be one match, but we need to use the normalizing lookup. 00084 */ 00085 error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1, 00086 zoid, mt, buf, bufsz, &conflict); 00087 if (!error && deflags) 00088 *deflags = conflict ? ED_CASE_CONFLICT : 0; 00089 } else { 00090 error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid); 00091 } 00092 *zoid = ZFS_DIRENT_OBJ(*zoid); 00093 00094 if (error == ENOENT && update) 00095 dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); 00096 00097 return (error); 00098 } 00099 00135 int 00136 zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, 00137 int flag, int *direntflags, pathname_t *realpnp) 00138 { 00139 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 00140 zfs_dirlock_t *dl; 00141 boolean_t update; 00142 boolean_t exact; 00143 uint64_t zoid; 00144 vnode_t *vp = NULL; 00145 int error = 0; 00146 int cmpflags; 00147 00148 *zpp = NULL; 00149 *dlpp = NULL; 00150 00151 /* 00152 * Verify that we are not trying to lock '.', '..', or '.zfs' 00153 */ 00154 if (name[0] == '.' && 00155 (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || 00156 zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) 00157 return (EEXIST); 00158 00159 /* 00160 * Case sensitivity and normalization preferences are set when 00161 * the file system is created. These are stored in the 00162 * zfsvfs->z_case and zfsvfs->z_norm fields. These choices 00163 * affect what vnodes can be cached in the DNLC, how we 00164 * perform zap lookups, and the "width" of our dirlocks. 00165 * 00166 * A normal dirlock locks a single name. Note that with 00167 * normalization a name can be composed multiple ways, but 00168 * when normalized, these names all compare equal. A wide 00169 * dirlock locks multiple names. We need these when the file 00170 * system is supporting mixed-mode access. It is sometimes 00171 * necessary to lock all case permutations of file name at 00172 * once so that simultaneous case-insensitive/case-sensitive 00173 * behaves as rationally as possible. 00174 */ 00175 00176 /* 00177 * Decide if exact matches should be requested when performing 00178 * a zap lookup on file systems supporting case-insensitive 00179 * access. 00180 */ 00181 exact = 00182 ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || 00183 ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); 00184 00185 /* 00186 * Only look in or update the DNLC if we are looking for the 00187 * name on a file system that does not require normalization 00188 * or case folding. We can also look there if we happen to be 00189 * on a non-normalizing, mixed sensitivity file system IF we 00190 * are looking for the exact name. 00191 * 00192 * Maybe can add TO-UPPERed version of name to dnlc in ci-only 00193 * case for performance improvement? 00194 */ 00195 update = !zfsvfs->z_norm || 00196 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 00197 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); 00198 00199 /* 00200 * ZRENAMING indicates we are in a situation where we should 00201 * take narrow locks regardless of the file system's 00202 * preferences for normalizing and case folding. This will 00203 * prevent us deadlocking trying to grab the same wide lock 00204 * twice if the two names happen to be case-insensitive 00205 * matches. 00206 */ 00207 if (flag & ZRENAMING) 00208 cmpflags = 0; 00209 else 00210 cmpflags = zfsvfs->z_norm; 00211 00212 /* 00213 * Wait until there are no locks on this name. 00214 * 00215 * Don't grab the the lock if it is already held. However, cannot 00216 * have both ZSHARED and ZHAVELOCK together. 00217 */ 00218 ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); 00219 if (!(flag & ZHAVELOCK)) 00220 rw_enter(&dzp->z_name_lock, RW_READER); 00221 00222 mutex_enter(&dzp->z_lock); 00223 for (;;) { 00224 if (dzp->z_unlinked) { 00225 mutex_exit(&dzp->z_lock); 00226 if (!(flag & ZHAVELOCK)) 00227 rw_exit(&dzp->z_name_lock); 00228 return (ENOENT); 00229 } 00230 for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { 00231 if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, 00232 U8_UNICODE_LATEST, &error) == 0) || error != 0) 00233 break; 00234 } 00235 if (error != 0) { 00236 mutex_exit(&dzp->z_lock); 00237 if (!(flag & ZHAVELOCK)) 00238 rw_exit(&dzp->z_name_lock); 00239 return (ENOENT); 00240 } 00241 if (dl == NULL) { 00242 size_t namesize; 00243 00244 /* 00245 * Allocate a new dirlock and add it to the list. 00246 */ 00247 namesize = strlen(name) + 1; 00248 dl = kmem_alloc(sizeof (zfs_dirlock_t) + namesize, 00249 KM_SLEEP); 00250 cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); 00251 dl->dl_name = (char *)(dl + 1); 00252 bcopy(name, dl->dl_name, namesize); 00253 dl->dl_sharecnt = 0; 00254 dl->dl_namelock = 0; 00255 dl->dl_namesize = namesize; 00256 dl->dl_dzp = dzp; 00257 dl->dl_next = dzp->z_dirlocks; 00258 dzp->z_dirlocks = dl; 00259 break; 00260 } 00261 if ((flag & ZSHARED) && dl->dl_sharecnt != 0) 00262 break; 00263 cv_wait(&dl->dl_cv, &dzp->z_lock); 00264 } 00265 00266 /* 00267 * If the z_name_lock was NOT held for this dirlock record it. 00268 */ 00269 if (flag & ZHAVELOCK) 00270 dl->dl_namelock = 1; 00271 00272 if (flag & ZSHARED) 00273 dl->dl_sharecnt++; 00274 00275 mutex_exit(&dzp->z_lock); 00276 00277 /* 00278 * We have a dirlock on the name. (Note that it is the dirlock, 00279 * not the dzp's z_lock, that protects the name in the zap object.) 00280 * See if there's an object by this name; if so, put a hold on it. 00281 */ 00282 if (flag & ZXATTR) { 00283 error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, 00284 sizeof (zoid)); 00285 if (error == 0) 00286 error = (zoid == 0 ? ENOENT : 0); 00287 } else { 00288 if (update) 00289 vp = dnlc_lookup(ZTOV(dzp), name); 00290 if (vp == DNLC_NO_VNODE) { 00291 VN_RELE(vp); 00292 error = ENOENT; 00293 } else if (vp) { 00294 if (flag & ZNEW) { 00295 zfs_dirent_unlock(dl); 00296 VN_RELE(vp); 00297 return (EEXIST); 00298 } 00299 *dlpp = dl; 00300 *zpp = VTOZ(vp); 00301 return (0); 00302 } else { 00303 error = zfs_match_find(zfsvfs, dzp, name, exact, 00304 update, direntflags, realpnp, &zoid); 00305 } 00306 } 00307 if (error) { 00308 if (error != ENOENT || (flag & ZEXISTS)) { 00309 zfs_dirent_unlock(dl); 00310 return (error); 00311 } 00312 } else { 00313 if (flag & ZNEW) { 00314 zfs_dirent_unlock(dl); 00315 return (EEXIST); 00316 } 00317 error = zfs_zget(zfsvfs, zoid, zpp); 00318 if (error) { 00319 zfs_dirent_unlock(dl); 00320 return (error); 00321 } 00322 if (!(flag & ZXATTR) && update) 00323 dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); 00324 } 00325 00326 *dlpp = dl; 00327 00328 return (0); 00329 } 00330 00334 void 00335 zfs_dirent_unlock(zfs_dirlock_t *dl) 00336 { 00337 znode_t *dzp = dl->dl_dzp; 00338 zfs_dirlock_t **prev_dl, *cur_dl; 00339 00340 mutex_enter(&dzp->z_lock); 00341 00342 if (!dl->dl_namelock) 00343 rw_exit(&dzp->z_name_lock); 00344 00345 if (dl->dl_sharecnt > 1) { 00346 dl->dl_sharecnt--; 00347 mutex_exit(&dzp->z_lock); 00348 return; 00349 } 00350 prev_dl = &dzp->z_dirlocks; 00351 while ((cur_dl = *prev_dl) != dl) 00352 prev_dl = &cur_dl->dl_next; 00353 *prev_dl = dl->dl_next; 00354 cv_broadcast(&dl->dl_cv); 00355 mutex_exit(&dzp->z_lock); 00356 00357 cv_destroy(&dl->dl_cv); 00358 kmem_free(dl, sizeof (*dl) + dl->dl_namesize); 00359 } 00360 00369 int 00370 zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, 00371 int *deflg, pathname_t *rpnp) 00372 { 00373 zfs_dirlock_t *dl; 00374 znode_t *zp; 00375 int error = 0; 00376 uint64_t parent; 00377 int unlinked; 00378 00379 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 00380 mutex_enter(&dzp->z_lock); 00381 unlinked = dzp->z_unlinked; 00382 mutex_exit(&dzp->z_lock); 00383 if (unlinked) 00384 return (ENOENT); 00385 00386 *vpp = ZTOV(dzp); 00387 VN_HOLD(*vpp); 00388 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 00389 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 00390 00391 /* 00392 * If we are a snapshot mounted under .zfs, return 00393 * the vp for the snapshot directory. 00394 */ 00395 if ((error = sa_lookup(dzp->z_sa_hdl, 00396 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 00397 return (error); 00398 if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { 00399 error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, 00400 "snapshot", vpp, NULL, 0, NULL, kcred, 00401 NULL, NULL, NULL); 00402 return (error); 00403 } 00404 00405 mutex_enter(&dzp->z_lock); 00406 unlinked = dzp->z_unlinked; 00407 mutex_exit(&dzp->z_lock); 00408 if (unlinked) 00409 return (ENOENT); 00410 00411 rw_enter(&dzp->z_parent_lock, RW_READER); 00412 error = zfs_zget(zfsvfs, parent, &zp); 00413 if (error == 0) 00414 *vpp = ZTOV(zp); 00415 rw_exit(&dzp->z_parent_lock); 00416 } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { 00417 *vpp = zfsctl_root(dzp); 00418 } else { 00419 int zf; 00420 00421 zf = ZEXISTS | ZSHARED; 00422 if (flags & FIGNORECASE) 00423 zf |= ZCILOOK; 00424 00425 error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp); 00426 if (error == 0) { 00427 *vpp = ZTOV(zp); 00428 zfs_dirent_unlock(dl); 00429 dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ 00430 } 00431 rpnp = NULL; 00432 } 00433 00434 if ((flags & FIGNORECASE) && rpnp && !error) 00435 (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize); 00436 00437 return (error); 00438 } 00439 00454 void 00455 zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) 00456 { 00457 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 00458 00459 ASSERT(zp->z_unlinked); 00460 ASSERT(zp->z_links == 0); 00461 00462 VERIFY3U(0, ==, 00463 zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 00464 } 00465 00466 /* 00467 * Clean up any znodes that had no links when we either crashed or 00468 * (force) umounted the file system. 00469 */ 00470 void 00471 zfs_unlinked_drain(zfsvfs_t *zfsvfs) 00472 { 00473 zap_cursor_t zc; 00474 zap_attribute_t zap; 00475 dmu_object_info_t doi; 00476 znode_t *zp; 00477 int error; 00478 00479 /* 00480 * Interate over the contents of the unlinked set. 00481 */ 00482 for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); 00483 zap_cursor_retrieve(&zc, &zap) == 0; 00484 zap_cursor_advance(&zc)) { 00485 00486 /* 00487 * See what kind of object we have in list 00488 */ 00489 00490 error = dmu_object_info(zfsvfs->z_os, 00491 zap.za_first_integer, &doi); 00492 if (error != 0) 00493 continue; 00494 00495 ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) || 00496 (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS)); 00497 /* 00498 * We need to re-mark these list entries for deletion, 00499 * so we pull them back into core and set zp->z_unlinked. 00500 */ 00501 error = zfs_zget(zfsvfs, zap.za_first_integer, &zp); 00502 00503 /* 00504 * We may pick up znodes that are already marked for deletion. 00505 * This could happen during the purge of an extended attribute 00506 * directory. All we need to do is skip over them, since they 00507 * are already in the system marked z_unlinked. 00508 */ 00509 if (error != 0) 00510 continue; 00511 00512 zp->z_unlinked = B_TRUE; 00513 VN_RELE(ZTOV(zp)); 00514 } 00515 zap_cursor_fini(&zc); 00516 } 00517 00529 static int 00530 zfs_purgedir(znode_t *dzp) 00531 { 00532 zap_cursor_t zc; 00533 zap_attribute_t zap; 00534 znode_t *xzp; 00535 dmu_tx_t *tx; 00536 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 00537 zfs_dirlock_t dl; 00538 int skipped = 0; 00539 int error; 00540 00541 for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); 00542 (error = zap_cursor_retrieve(&zc, &zap)) == 0; 00543 zap_cursor_advance(&zc)) { 00544 error = zfs_zget(zfsvfs, 00545 ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); 00546 if (error) { 00547 skipped += 1; 00548 continue; 00549 } 00550 00551 ASSERT((ZTOV(xzp)->v_type == VREG) || 00552 (ZTOV(xzp)->v_type == VLNK)); 00553 00554 tx = dmu_tx_create(zfsvfs->z_os); 00555 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 00556 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); 00557 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 00558 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 00559 /* Is this really needed ? */ 00560 zfs_sa_upgrade_txholds(tx, xzp); 00561 error = dmu_tx_assign(tx, TXG_WAIT); 00562 if (error) { 00563 dmu_tx_abort(tx); 00564 VN_RELE(ZTOV(xzp)); 00565 skipped += 1; 00566 continue; 00567 } 00568 bzero(&dl, sizeof (dl)); 00569 dl.dl_dzp = dzp; 00570 dl.dl_name = zap.za_name; 00571 00572 error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); 00573 if (error) 00574 skipped += 1; 00575 dmu_tx_commit(tx); 00576 00577 VN_RELE(ZTOV(xzp)); 00578 } 00579 zap_cursor_fini(&zc); 00580 if (error != ENOENT) 00581 skipped += 1; 00582 return (skipped); 00583 } 00584 00585 void 00586 zfs_rmnode(znode_t *zp) 00587 { 00588 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 00589 objset_t *os = zfsvfs->z_os; 00590 znode_t *xzp = NULL; 00591 dmu_tx_t *tx; 00592 uint64_t acl_obj; 00593 uint64_t xattr_obj; 00594 int error; 00595 00596 ASSERT(zp->z_links == 0); 00597 00598 /* 00599 * If this is an attribute directory, purge its contents. 00600 */ 00601 if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR && 00602 (zp->z_pflags & ZFS_XATTR)) { 00603 if (zfs_purgedir(zp) != 0) { 00604 /* 00605 * Not enough space to delete some xattrs. 00606 * Leave it in the unlinked set. 00607 */ 00608 zfs_znode_dmu_fini(zp); 00609 zfs_znode_free(zp); 00610 return; 00611 } 00612 } 00613 00614 /* 00615 * Free up all the data in the file. 00616 */ 00617 error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); 00618 if (error) { 00619 /* 00620 * Not enough space. Leave the file in the unlinked set. 00621 */ 00622 zfs_znode_dmu_fini(zp); 00623 zfs_znode_free(zp); 00624 return; 00625 } 00626 00627 /* 00628 * If the file has extended attributes, we're going to unlink 00629 * the xattr dir. 00630 */ 00631 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 00632 &xattr_obj, sizeof (xattr_obj)); 00633 if (error == 0 && xattr_obj) { 00634 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 00635 ASSERT(error == 0); 00636 } 00637 00638 acl_obj = zfs_external_acl(zp); 00639 00640 /* 00641 * Set up the final transaction. 00642 */ 00643 tx = dmu_tx_create(os); 00644 dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); 00645 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 00646 if (xzp) { 00647 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); 00648 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 00649 } 00650 if (acl_obj) 00651 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 00652 00653 zfs_sa_upgrade_txholds(tx, zp); 00654 error = dmu_tx_assign(tx, TXG_WAIT); 00655 if (error) { 00656 /* 00657 * Not enough space to delete the file. Leave it in the 00658 * unlinked set, leaking it until the fs is remounted (at 00659 * which point we'll call zfs_unlinked_drain() to process it). 00660 */ 00661 dmu_tx_abort(tx); 00662 zfs_znode_dmu_fini(zp); 00663 zfs_znode_free(zp); 00664 goto out; 00665 } 00666 00667 if (xzp) { 00668 ASSERT(error == 0); 00669 mutex_enter(&xzp->z_lock); 00670 xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ 00671 xzp->z_links = 0; /* no more links to it */ 00672 VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 00673 &xzp->z_links, sizeof (xzp->z_links), tx)); 00674 mutex_exit(&xzp->z_lock); 00675 zfs_unlinked_add(xzp, tx); 00676 } 00677 00678 /* Remove this znode from the unlinked set */ 00679 VERIFY3U(0, ==, 00680 zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 00681 00682 zfs_znode_delete(zp, tx); 00683 00684 dmu_tx_commit(tx); 00685 out: 00686 if (xzp) 00687 VN_RELE(ZTOV(xzp)); 00688 } 00689 00690 static uint64_t 00691 zfs_dirent(znode_t *zp, uint64_t mode) 00692 { 00693 uint64_t de = zp->z_id; 00694 00695 if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE) 00696 de |= IFTODT(mode) << 60; 00697 return (de); 00698 } 00699 00703 int 00704 zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) 00705 { 00706 znode_t *dzp = dl->dl_dzp; 00707 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 00708 vnode_t *vp = ZTOV(zp); 00709 uint64_t value; 00710 int zp_is_dir = (vp->v_type == VDIR); 00711 sa_bulk_attr_t bulk[5]; 00712 uint64_t mtime[2], ctime[2]; 00713 int count = 0; 00714 int error; 00715 00716 mutex_enter(&zp->z_lock); 00717 00718 if (!(flag & ZRENAMING)) { 00719 if (zp->z_unlinked) { /* no new links to unlinked zp */ 00720 ASSERT(!(flag & (ZNEW | ZEXISTS))); 00721 mutex_exit(&zp->z_lock); 00722 return (ENOENT); 00723 } 00724 zp->z_links++; 00725 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 00726 &zp->z_links, sizeof (zp->z_links)); 00727 00728 } 00729 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, 00730 &dzp->z_id, sizeof (dzp->z_id)); 00731 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 00732 &zp->z_pflags, sizeof (zp->z_pflags)); 00733 00734 if (!(flag & ZNEW)) { 00735 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 00736 ctime, sizeof (ctime)); 00737 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, 00738 ctime, B_TRUE); 00739 } 00740 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 00741 ASSERT(error == 0); 00742 00743 mutex_exit(&zp->z_lock); 00744 00745 mutex_enter(&dzp->z_lock); 00746 dzp->z_size++; 00747 dzp->z_links += zp_is_dir; 00748 count = 0; 00749 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 00750 &dzp->z_size, sizeof (dzp->z_size)); 00751 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 00752 &dzp->z_links, sizeof (dzp->z_links)); 00753 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 00754 mtime, sizeof (mtime)); 00755 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 00756 ctime, sizeof (ctime)); 00757 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 00758 &dzp->z_pflags, sizeof (dzp->z_pflags)); 00759 zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 00760 error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 00761 ASSERT(error == 0); 00762 mutex_exit(&dzp->z_lock); 00763 00764 value = zfs_dirent(zp, zp->z_mode); 00765 error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, 00766 8, 1, &value, tx); 00767 ASSERT(error == 0); 00768 00769 dnlc_update(ZTOV(dzp), dl->dl_name, vp); 00770 00771 return (0); 00772 } 00773 00774 static int 00775 zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx, 00776 int flag) 00777 { 00778 int error; 00779 00780 if (zp->z_zfsvfs->z_norm) { 00781 if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && 00782 (flag & ZCIEXACT)) || 00783 ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) && 00784 !(flag & ZCILOOK))) 00785 error = zap_remove_norm(zp->z_zfsvfs->z_os, 00786 dzp->z_id, dl->dl_name, MT_EXACT, tx); 00787 else 00788 error = zap_remove_norm(zp->z_zfsvfs->z_os, 00789 dzp->z_id, dl->dl_name, MT_FIRST, tx); 00790 } else { 00791 error = zap_remove(zp->z_zfsvfs->z_os, 00792 dzp->z_id, dl->dl_name, tx); 00793 } 00794 00795 return (error); 00796 } 00797 00805 int 00806 zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, 00807 boolean_t *unlinkedp) 00808 { 00809 znode_t *dzp = dl->dl_dzp; 00810 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 00811 vnode_t *vp = ZTOV(zp); 00812 int zp_is_dir = (vp->v_type == VDIR); 00813 boolean_t unlinked = B_FALSE; 00814 sa_bulk_attr_t bulk[5]; 00815 uint64_t mtime[2], ctime[2]; 00816 int count = 0; 00817 int error; 00818 00819 dnlc_remove(ZTOV(dzp), dl->dl_name); 00820 00821 if (!(flag & ZRENAMING)) { 00822 if (vn_vfswlock(vp)) /* prevent new mounts on zp */ 00823 return (EBUSY); 00824 00825 if (vn_ismntpt(vp)) { /* don't remove mount point */ 00826 vn_vfsunlock(vp); 00827 return (EBUSY); 00828 } 00829 00830 mutex_enter(&zp->z_lock); 00831 00832 if (zp_is_dir && !zfs_dirempty(zp)) { 00833 mutex_exit(&zp->z_lock); 00834 vn_vfsunlock(vp); 00835 return (ENOTEMPTY); 00836 } 00837 00838 /* 00839 * If we get here, we are going to try to remove the object. 00840 * First try removing the name from the directory; if that 00841 * fails, return the error. 00842 */ 00843 error = zfs_dropname(dl, zp, dzp, tx, flag); 00844 if (error != 0) { 00845 mutex_exit(&zp->z_lock); 00846 vn_vfsunlock(vp); 00847 return (error); 00848 } 00849 00850 if (zp->z_links <= zp_is_dir) { 00851 zfs_panic_recover("zfs: link count on vnode %p is %u, " 00852 "should be at least %u", zp->z_vnode, 00853 (int)zp->z_links, 00854 zp_is_dir + 1); 00855 zp->z_links = zp_is_dir + 1; 00856 } 00857 if (--zp->z_links == zp_is_dir) { 00858 zp->z_unlinked = B_TRUE; 00859 zp->z_links = 0; 00860 unlinked = B_TRUE; 00861 } else { 00862 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 00863 NULL, &ctime, sizeof (ctime)); 00864 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 00865 NULL, &zp->z_pflags, sizeof (zp->z_pflags)); 00866 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 00867 B_TRUE); 00868 } 00869 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 00870 NULL, &zp->z_links, sizeof (zp->z_links)); 00871 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 00872 count = 0; 00873 ASSERT(error == 0); 00874 mutex_exit(&zp->z_lock); 00875 vn_vfsunlock(vp); 00876 } else { 00877 error = zfs_dropname(dl, zp, dzp, tx, flag); 00878 if (error != 0) 00879 return (error); 00880 } 00881 00882 mutex_enter(&dzp->z_lock); 00883 dzp->z_size--; /* one dirent removed */ 00884 dzp->z_links -= zp_is_dir; /* ".." link from zp */ 00885 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 00886 NULL, &dzp->z_links, sizeof (dzp->z_links)); 00887 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 00888 NULL, &dzp->z_size, sizeof (dzp->z_size)); 00889 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 00890 NULL, ctime, sizeof (ctime)); 00891 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 00892 NULL, mtime, sizeof (mtime)); 00893 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 00894 NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); 00895 zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 00896 error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 00897 ASSERT(error == 0); 00898 mutex_exit(&dzp->z_lock); 00899 00900 if (unlinkedp != NULL) 00901 *unlinkedp = unlinked; 00902 else if (unlinked) 00903 zfs_unlinked_add(zp, tx); 00904 00905 return (0); 00906 } 00907 00913 boolean_t 00914 zfs_dirempty(znode_t *dzp) 00915 { 00916 return (dzp->z_size == 2 && dzp->z_dirlocks == 0); 00917 } 00918 00919 int 00920 zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) 00921 { 00922 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 00923 znode_t *xzp; 00924 dmu_tx_t *tx; 00925 int error; 00926 zfs_acl_ids_t acl_ids; 00927 boolean_t fuid_dirtied; 00928 uint64_t parent; 00929 00930 *xvpp = NULL; 00931 00932 /* 00933 * In FreeBSD, access checking for creating an EA is being done 00934 * in zfs_setextattr(), 00935 */ 00936 #ifndef __FreeBSD__ 00937 if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) 00938 return (error); 00939 #endif 00940 00941 if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, 00942 &acl_ids)) != 0) 00943 return (error); 00944 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 00945 zfs_acl_ids_free(&acl_ids); 00946 return (EDQUOT); 00947 } 00948 00949 top: 00950 tx = dmu_tx_create(zfsvfs->z_os); 00951 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 00952 ZFS_SA_BASE_ATTR_SIZE); 00953 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 00954 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 00955 fuid_dirtied = zfsvfs->z_fuid_dirty; 00956 if (fuid_dirtied) 00957 zfs_fuid_txhold(zfsvfs, tx); 00958 error = dmu_tx_assign(tx, TXG_NOWAIT); 00959 if (error) { 00960 if (error == ERESTART) { 00961 dmu_tx_wait(tx); 00962 dmu_tx_abort(tx); 00963 goto top; 00964 } 00965 zfs_acl_ids_free(&acl_ids); 00966 dmu_tx_abort(tx); 00967 return (error); 00968 } 00969 zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); 00970 00971 if (fuid_dirtied) 00972 zfs_fuid_sync(zfsvfs, tx); 00973 00974 #ifdef DEBUG 00975 error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 00976 &parent, sizeof (parent)); 00977 ASSERT(error == 0 && parent == zp->z_id); 00978 #endif 00979 00980 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, 00981 sizeof (xzp->z_id), tx)); 00982 00983 (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, 00984 xzp, "", NULL, acl_ids.z_fuidp, vap); 00985 00986 zfs_acl_ids_free(&acl_ids); 00987 dmu_tx_commit(tx); 00988 00989 *xvpp = ZTOV(xzp); 00990 00991 return (0); 00992 } 00993 01007 int 01008 zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags) 01009 { 01010 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01011 znode_t *xzp; 01012 zfs_dirlock_t *dl; 01013 vattr_t va; 01014 int error; 01015 top: 01016 error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL); 01017 if (error) 01018 return (error); 01019 01020 if (xzp != NULL) { 01021 *xvpp = ZTOV(xzp); 01022 zfs_dirent_unlock(dl); 01023 return (0); 01024 } 01025 01026 01027 if (!(flags & CREATE_XATTR_DIR)) { 01028 zfs_dirent_unlock(dl); 01029 #ifdef __FreeBSD__ 01030 return (ENOATTR); 01031 #else 01032 return (ENOENT); 01033 #endif 01034 } 01035 01036 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 01037 zfs_dirent_unlock(dl); 01038 return (EROFS); 01039 } 01040 01041 /* 01042 * The ability to 'create' files in an attribute 01043 * directory comes from the write_xattr permission on the base file. 01044 * 01045 * The ability to 'search' an attribute directory requires 01046 * read_xattr permission on the base file. 01047 * 01048 * Once in a directory the ability to read/write attributes 01049 * is controlled by the permissions on the attribute file. 01050 */ 01051 va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; 01052 va.va_type = VDIR; 01053 va.va_mode = S_IFDIR | S_ISVTX | 0777; 01054 zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); 01055 01056 error = zfs_make_xattrdir(zp, &va, xvpp, cr); 01057 zfs_dirent_unlock(dl); 01058 01059 if (error == ERESTART) { 01060 /* NB: we already did dmu_tx_wait() if necessary */ 01061 goto top; 01062 } 01063 if (error == 0) 01064 VOP_UNLOCK(*xvpp, 0); 01065 01066 return (error); 01067 } 01068 01082 int 01083 zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr) 01084 { 01085 uid_t uid; 01086 uid_t downer; 01087 uid_t fowner; 01088 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 01089 01090 if (zdp->z_zfsvfs->z_replay) 01091 return (0); 01092 01093 if ((zdp->z_mode & S_ISVTX) == 0) 01094 return (0); 01095 01096 downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER); 01097 fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER); 01098 01099 if ((uid = crgetuid(cr)) == downer || uid == fowner || 01100 (ZTOV(zp)->v_type == VREG && 01101 zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)) 01102 return (0); 01103 else 01104 return (secpolicy_vnode_remove(ZTOV(zp), cr)); 01105 }