FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 00023 * Copyright (c) 2012 by Delphix. All rights reserved. 00024 */ 00025 00026 /* Portions Copyright 2007 Jeremy Teo */ 00027 /* Portions Copyright 2010 Robert Milkowski */ 00028 00029 #include <sys/types.h> 00030 #include <sys/param.h> 00031 #include <sys/time.h> 00032 #include <sys/systm.h> 00033 #include <sys/sysmacros.h> 00034 #include <sys/resource.h> 00035 #include <sys/vfs.h> 00036 #include <sys/vnode.h> 00037 #include <sys/file.h> 00038 #include <sys/stat.h> 00039 #include <sys/kmem.h> 00040 #include <sys/taskq.h> 00041 #include <sys/uio.h> 00042 #include <sys/atomic.h> 00043 #include <sys/namei.h> 00044 #include <sys/mman.h> 00045 #include <sys/cmn_err.h> 00046 #include <sys/errno.h> 00047 #include <sys/unistd.h> 00048 #include <sys/zfs_dir.h> 00049 #include <sys/zfs_ioctl.h> 00050 #include <sys/fs/zfs.h> 00051 #include <sys/dmu.h> 00052 #include <sys/dmu_objset.h> 00053 #include <sys/spa.h> 00054 #include <sys/txg.h> 00055 #include <sys/dbuf.h> 00056 #include <sys/zap.h> 00057 #include <sys/sa.h> 00058 #include <sys/dirent.h> 00059 #include <sys/policy.h> 00060 #include <sys/sunddi.h> 00061 #include <sys/filio.h> 00062 #include <sys/sid.h> 00063 #include <sys/zfs_ctldir.h> 00064 #include <sys/zfs_fuid.h> 00065 #include <sys/zfs_sa.h> 00066 #include <sys/dnlc.h> 00067 #include <sys/zfs_rlock.h> 00068 #include <sys/extdirent.h> 00069 #include <sys/kidmap.h> 00070 #include <sys/bio.h> 00071 #include <sys/buf.h> 00072 #include <sys/sf_buf.h> 00073 #include <sys/sched.h> 00074 #include <sys/acl.h> 00075 #include <vm/vm_param.h> 00076 #include <vm/vm_pageout.h> 00077 #include <vm/vm_page.h> 00078 00168 /* ARGSUSED */ 00169 static int 00170 zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 00171 { 00172 znode_t *zp = VTOZ(*vpp); 00173 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 00174 00175 ZFS_ENTER(zfsvfs); 00176 ZFS_VERIFY_ZP(zp); 00177 00178 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 00179 ((flag & FAPPEND) == 0)) { 00180 ZFS_EXIT(zfsvfs); 00181 return (EPERM); 00182 } 00183 00184 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 00185 ZTOV(zp)->v_type == VREG && 00186 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 00187 if (fs_vscan(*vpp, cr, 0) != 0) { 00188 ZFS_EXIT(zfsvfs); 00189 return (EACCES); 00190 } 00191 } 00192 00193 /* Keep a count of the synchronous opens in the znode */ 00194 if (flag & (FSYNC | FDSYNC)) 00195 atomic_inc_32(&zp->z_sync_cnt); 00196 00197 ZFS_EXIT(zfsvfs); 00198 return (0); 00199 } 00200 00201 /* ARGSUSED */ 00202 static int 00203 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 00204 caller_context_t *ct) 00205 { 00206 znode_t *zp = VTOZ(vp); 00207 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 00208 00209 /* 00210 * Clean up any locks held by this process on the vp. 00211 */ 00212 cleanlocks(vp, ddi_get_pid(), 0); 00213 cleanshares(vp, ddi_get_pid()); 00214 00215 ZFS_ENTER(zfsvfs); 00216 ZFS_VERIFY_ZP(zp); 00217 00218 /* Decrement the synchronous opens in the znode */ 00219 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 00220 atomic_dec_32(&zp->z_sync_cnt); 00221 00222 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 00223 ZTOV(zp)->v_type == VREG && 00224 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 00225 VERIFY(fs_vscan(vp, cr, 1) == 0); 00226 00227 ZFS_EXIT(zfsvfs); 00228 return (0); 00229 } 00230 00235 static int 00236 zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 00237 { 00238 znode_t *zp = VTOZ(vp); 00239 uint64_t noff = (uint64_t)*off; /* new offset */ 00240 uint64_t file_sz; 00241 int error; 00242 boolean_t hole; 00243 00244 file_sz = zp->z_size; 00245 if (noff >= file_sz) { 00246 return (ENXIO); 00247 } 00248 00249 if (cmd == _FIO_SEEK_HOLE) 00250 hole = B_TRUE; 00251 else 00252 hole = B_FALSE; 00253 00254 error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 00255 00256 /* end of file? */ 00257 if ((error == ESRCH) || (noff > file_sz)) { 00258 /* 00259 * Handle the virtual hole at the end of file. 00260 */ 00261 if (hole) { 00262 *off = file_sz; 00263 return (0); 00264 } 00265 return (ENXIO); 00266 } 00267 00268 if (noff < *off) 00269 return (error); 00270 *off = noff; 00271 return (error); 00272 } 00273 00274 /* ARGSUSED */ 00275 static int 00276 zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 00277 int *rvalp, caller_context_t *ct) 00278 { 00279 offset_t off; 00280 int error; 00281 zfsvfs_t *zfsvfs; 00282 znode_t *zp; 00283 00284 switch (com) { 00285 case _FIOFFS: 00286 return (0); 00287 00288 /* 00289 * The following two ioctls are used by bfu. Faking out, 00290 * necessary to avoid bfu errors. 00291 */ 00292 case _FIOGDIO: 00293 case _FIOSDIO: 00294 return (0); 00295 00296 case _FIO_SEEK_DATA: 00297 case _FIO_SEEK_HOLE: 00298 #ifdef sun 00299 if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 00300 return (EFAULT); 00301 #else 00302 off = *(offset_t *)data; 00303 #endif 00304 zp = VTOZ(vp); 00305 zfsvfs = zp->z_zfsvfs; 00306 ZFS_ENTER(zfsvfs); 00307 ZFS_VERIFY_ZP(zp); 00308 00309 /* offset parameter is in/out */ 00310 error = zfs_holey(vp, com, &off); 00311 ZFS_EXIT(zfsvfs); 00312 if (error) 00313 return (error); 00314 #ifdef sun 00315 if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 00316 return (EFAULT); 00317 #else 00318 *(offset_t *)data = off; 00319 #endif 00320 return (0); 00321 } 00322 return (ENOTTY); 00323 } 00324 00325 static vm_page_t 00326 page_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 00327 { 00328 vm_object_t obj; 00329 vm_page_t pp; 00330 00331 obj = vp->v_object; 00332 VM_OBJECT_LOCK_ASSERT(obj, MA_OWNED); 00333 00334 for (;;) { 00335 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 00336 vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { 00337 if ((pp->oflags & VPO_BUSY) != 0) { 00338 /* 00339 * Reference the page before unlocking and 00340 * sleeping so that the page daemon is less 00341 * likely to reclaim it. 00342 */ 00343 vm_page_reference(pp); 00344 vm_page_sleep(pp, "zfsmwb"); 00345 continue; 00346 } 00347 vm_page_busy(pp); 00348 vm_page_undirty(pp); 00349 } else { 00350 if (vm_page_is_cached(obj, OFF_TO_IDX(start))) 00351 vm_page_cache_free(obj, OFF_TO_IDX(start), 00352 OFF_TO_IDX(start) + 1); 00353 pp = NULL; 00354 } 00355 break; 00356 } 00357 return (pp); 00358 } 00359 00360 static void 00361 page_unlock(vm_page_t pp) 00362 { 00363 00364 vm_page_wakeup(pp); 00365 } 00366 00367 static caddr_t 00368 zfs_map_page(vm_page_t pp, struct sf_buf **sfp) 00369 { 00370 00371 *sfp = sf_buf_alloc(pp, 0); 00372 return ((caddr_t)sf_buf_kva(*sfp)); 00373 } 00374 00375 static void 00376 zfs_unmap_page(struct sf_buf *sf) 00377 { 00378 00379 sf_buf_free(sf); 00380 } 00381 00389 static void 00390 update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 00391 int segflg, dmu_tx_t *tx) 00392 { 00393 vm_object_t obj; 00394 struct sf_buf *sf; 00395 int off; 00396 00397 ASSERT(vp->v_mount != NULL); 00398 obj = vp->v_object; 00399 ASSERT(obj != NULL); 00400 00401 off = start & PAGEOFFSET; 00402 VM_OBJECT_LOCK(obj); 00403 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 00404 vm_page_t pp; 00405 int nbytes = MIN(PAGESIZE - off, len); 00406 00407 if ((pp = page_lookup(vp, start, off, nbytes)) != NULL) { 00408 caddr_t va; 00409 00410 VM_OBJECT_UNLOCK(obj); 00411 va = zfs_map_page(pp, &sf); 00412 if (segflg == UIO_NOCOPY) { 00413 (void) dmu_write(os, oid, start+off, nbytes, 00414 va+off, tx); 00415 } else { 00416 (void) dmu_read(os, oid, start+off, nbytes, 00417 va+off, DMU_READ_PREFETCH); 00418 } 00419 zfs_unmap_page(sf); 00420 VM_OBJECT_LOCK(obj); 00421 page_unlock(pp); 00422 } 00423 len -= nbytes; 00424 off = 0; 00425 } 00426 VM_OBJECT_UNLOCK(obj); 00427 } 00428 00438 static int 00439 mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 00440 { 00441 znode_t *zp = VTOZ(vp); 00442 objset_t *os = zp->z_zfsvfs->z_os; 00443 struct sf_buf *sf; 00444 vm_object_t obj; 00445 vm_page_t pp; 00446 int64_t start; 00447 caddr_t va; 00448 int len = nbytes; 00449 int off; 00450 int error = 0; 00451 00452 ASSERT(uio->uio_segflg == UIO_NOCOPY); 00453 ASSERT(vp->v_mount != NULL); 00454 obj = vp->v_object; 00455 ASSERT(obj != NULL); 00456 ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 00457 00458 VM_OBJECT_LOCK(obj); 00459 for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 00460 int bytes = MIN(PAGESIZE, len); 00461 00462 pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_NOBUSY | 00463 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY); 00464 if (pp->valid == 0) { 00465 vm_page_io_start(pp); 00466 VM_OBJECT_UNLOCK(obj); 00467 va = zfs_map_page(pp, &sf); 00468 error = dmu_read(os, zp->z_id, start, bytes, va, 00469 DMU_READ_PREFETCH); 00470 if (bytes != PAGESIZE && error == 0) 00471 bzero(va + bytes, PAGESIZE - bytes); 00472 zfs_unmap_page(sf); 00473 VM_OBJECT_LOCK(obj); 00474 vm_page_io_finish(pp); 00475 vm_page_lock(pp); 00476 if (error) { 00477 vm_page_free(pp); 00478 } else { 00479 pp->valid = VM_PAGE_BITS_ALL; 00480 vm_page_activate(pp); 00481 } 00482 vm_page_unlock(pp); 00483 } 00484 if (error) 00485 break; 00486 uio->uio_resid -= bytes; 00487 uio->uio_offset += bytes; 00488 len -= bytes; 00489 } 00490 VM_OBJECT_UNLOCK(obj); 00491 return (error); 00492 } 00493 00504 static int 00505 mappedread(vnode_t *vp, int nbytes, uio_t *uio) 00506 { 00507 znode_t *zp = VTOZ(vp); 00508 objset_t *os = zp->z_zfsvfs->z_os; 00509 vm_object_t obj; 00510 int64_t start; 00511 caddr_t va; 00512 int len = nbytes; 00513 int off; 00514 int error = 0; 00515 00516 ASSERT(vp->v_mount != NULL); 00517 obj = vp->v_object; 00518 ASSERT(obj != NULL); 00519 00520 start = uio->uio_loffset; 00521 off = start & PAGEOFFSET; 00522 VM_OBJECT_LOCK(obj); 00523 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 00524 vm_page_t pp; 00525 uint64_t bytes = MIN(PAGESIZE - off, len); 00526 00527 if (pp = page_lookup(vp, start, off, bytes)) { 00528 struct sf_buf *sf; 00529 caddr_t va; 00530 00531 VM_OBJECT_UNLOCK(obj); 00532 va = zfs_map_page(pp, &sf); 00533 error = uiomove(va + off, bytes, UIO_READ, uio); 00534 zfs_unmap_page(sf); 00535 VM_OBJECT_LOCK(obj); 00536 page_unlock(pp); 00537 } else { 00538 VM_OBJECT_UNLOCK(obj); 00539 error = dmu_read_uio(os, zp->z_id, uio, bytes); 00540 VM_OBJECT_LOCK(obj); 00541 } 00542 len -= bytes; 00543 off = 0; 00544 if (error) 00545 break; 00546 } 00547 VM_OBJECT_UNLOCK(obj); 00548 return (error); 00549 } 00550 00551 offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 00552 00569 /* ARGSUSED */ 00570 static int 00571 zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 00572 { 00573 znode_t *zp = VTOZ(vp); 00574 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 00575 objset_t *os; 00576 ssize_t n, nbytes; 00577 int error; 00578 rl_t *rl; 00579 xuio_t *xuio = NULL; 00580 00581 ZFS_ENTER(zfsvfs); 00582 ZFS_VERIFY_ZP(zp); 00583 os = zfsvfs->z_os; 00584 00585 if (zp->z_pflags & ZFS_AV_QUARANTINED) { 00586 ZFS_EXIT(zfsvfs); 00587 return (EACCES); 00588 } 00589 00590 /* 00591 * Validate file offset 00592 */ 00593 if (uio->uio_loffset < (offset_t)0) { 00594 ZFS_EXIT(zfsvfs); 00595 return (EINVAL); 00596 } 00597 00598 /* 00599 * Fasttrack empty reads 00600 */ 00601 if (uio->uio_resid == 0) { 00602 ZFS_EXIT(zfsvfs); 00603 return (0); 00604 } 00605 00606 /* 00607 * Check for mandatory locks 00608 */ 00609 if (MANDMODE(zp->z_mode)) { 00610 if (error = chklock(vp, FREAD, 00611 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 00612 ZFS_EXIT(zfsvfs); 00613 return (error); 00614 } 00615 } 00616 00617 /* 00618 * If we're in FRSYNC mode, sync out this znode before reading it. 00619 */ 00620 if (zfsvfs->z_log && 00621 (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 00622 zil_commit(zfsvfs->z_log, zp->z_id); 00623 00624 /* 00625 * Lock the range against changes. 00626 */ 00627 rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 00628 00629 /* 00630 * If we are reading past end-of-file we can skip 00631 * to the end; but we might still need to set atime. 00632 */ 00633 if (uio->uio_loffset >= zp->z_size) { 00634 error = 0; 00635 goto out; 00636 } 00637 00638 ASSERT(uio->uio_loffset < zp->z_size); 00639 n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 00640 00641 #ifdef sun 00642 if ((uio->uio_extflg == UIO_XUIO) && 00643 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 00644 int nblk; 00645 int blksz = zp->z_blksz; 00646 uint64_t offset = uio->uio_loffset; 00647 00648 xuio = (xuio_t *)uio; 00649 if ((ISP2(blksz))) { 00650 nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 00651 blksz)) / blksz; 00652 } else { 00653 ASSERT(offset + n <= blksz); 00654 nblk = 1; 00655 } 00656 (void) dmu_xuio_init(xuio, nblk); 00657 00658 if (vn_has_cached_data(vp)) { 00659 /* 00660 * For simplicity, we always allocate a full buffer 00661 * even if we only expect to read a portion of a block. 00662 */ 00663 while (--nblk >= 0) { 00664 (void) dmu_xuio_add(xuio, 00665 dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 00666 blksz), 0, blksz); 00667 } 00668 } 00669 } 00670 #endif /* sun */ 00671 00672 while (n > 0) { 00673 nbytes = MIN(n, zfs_read_chunk_size - 00674 P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 00675 00676 #ifdef __FreeBSD__ 00677 if (uio->uio_segflg == UIO_NOCOPY) 00678 error = mappedread_sf(vp, nbytes, uio); 00679 else 00680 #endif /* __FreeBSD__ */ 00681 if (vn_has_cached_data(vp)) 00682 error = mappedread(vp, nbytes, uio); 00683 else 00684 error = dmu_read_uio(os, zp->z_id, uio, nbytes); 00685 if (error) { 00686 /* convert checksum errors into IO errors */ 00687 if (error == ECKSUM) 00688 error = EIO; 00689 break; 00690 } 00691 00692 n -= nbytes; 00693 } 00694 out: 00695 zfs_range_unlock(rl); 00696 00697 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 00698 ZFS_EXIT(zfsvfs); 00699 return (error); 00700 } 00701 00719 /* ARGSUSED */ 00720 static int 00721 zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 00722 { 00723 znode_t *zp = VTOZ(vp); 00724 rlim64_t limit = MAXOFFSET_T; 00725 ssize_t start_resid = uio->uio_resid; 00726 ssize_t tx_bytes; 00727 uint64_t end_size; 00728 dmu_tx_t *tx; 00729 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 00730 zilog_t *zilog; 00731 offset_t woff; 00732 ssize_t n, nbytes; 00733 rl_t *rl; 00734 int max_blksz = zfsvfs->z_max_blksz; 00735 int error; 00736 arc_buf_t *abuf; 00737 iovec_t *aiov; 00738 xuio_t *xuio = NULL; 00739 int i_iov = 0; 00740 int iovcnt = uio->uio_iovcnt; 00741 iovec_t *iovp = uio->uio_iov; 00742 int write_eof; 00743 int count = 0; 00744 sa_bulk_attr_t bulk[4]; 00745 uint64_t mtime[2], ctime[2]; 00746 00747 /* 00748 * Fasttrack empty write 00749 */ 00750 n = start_resid; 00751 if (n == 0) 00752 return (0); 00753 00754 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 00755 limit = MAXOFFSET_T; 00756 00757 ZFS_ENTER(zfsvfs); 00758 ZFS_VERIFY_ZP(zp); 00759 00760 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 00761 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 00762 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 00763 &zp->z_size, 8); 00764 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 00765 &zp->z_pflags, 8); 00766 00767 /* 00768 * If immutable or not appending then return EPERM 00769 */ 00770 if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 00771 ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 00772 (uio->uio_loffset < zp->z_size))) { 00773 ZFS_EXIT(zfsvfs); 00774 return (EPERM); 00775 } 00776 00777 zilog = zfsvfs->z_log; 00778 00779 /* 00780 * Validate file offset 00781 */ 00782 woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 00783 if (woff < 0) { 00784 ZFS_EXIT(zfsvfs); 00785 return (EINVAL); 00786 } 00787 00788 /* 00789 * Check for mandatory locks before calling zfs_range_lock() 00790 * in order to prevent a deadlock with locks set via fcntl(). 00791 */ 00792 if (MANDMODE((mode_t)zp->z_mode) && 00793 (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 00794 ZFS_EXIT(zfsvfs); 00795 return (error); 00796 } 00797 00798 #ifdef sun 00799 /* 00800 * Pre-fault the pages to ensure slow (eg NFS) pages 00801 * don't hold up txg. 00802 * Skip this if uio contains loaned arc_buf. 00803 */ 00804 if ((uio->uio_extflg == UIO_XUIO) && 00805 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 00806 xuio = (xuio_t *)uio; 00807 else 00808 uio_prefaultpages(MIN(n, max_blksz), uio); 00809 #endif /* sun */ 00810 00811 /* 00812 * If in append mode, set the io offset pointer to eof. 00813 */ 00814 if (ioflag & FAPPEND) { 00815 /* 00816 * Obtain an appending range lock to guarantee file append 00817 * semantics. We reset the write offset once we have the lock. 00818 */ 00819 rl = zfs_range_lock(zp, 0, n, RL_APPEND); 00820 woff = rl->r_off; 00821 if (rl->r_len == UINT64_MAX) { 00822 /* 00823 * We overlocked the file because this write will cause 00824 * the file block size to increase. 00825 * Note that zp_size cannot change with this lock held. 00826 */ 00827 woff = zp->z_size; 00828 } 00829 uio->uio_loffset = woff; 00830 } else { 00831 /* 00832 * Note that if the file block size will change as a result of 00833 * this write, then this range lock will lock the entire file 00834 * so that we can re-write the block safely. 00835 */ 00836 rl = zfs_range_lock(zp, woff, n, RL_WRITER); 00837 } 00838 00839 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 00840 zfs_range_unlock(rl); 00841 ZFS_EXIT(zfsvfs); 00842 return (EFBIG); 00843 } 00844 00845 if (woff >= limit) { 00846 zfs_range_unlock(rl); 00847 ZFS_EXIT(zfsvfs); 00848 return (EFBIG); 00849 } 00850 00851 if ((woff + n) > limit || woff > (limit - n)) 00852 n = limit - woff; 00853 00854 /* Will this write extend the file length? */ 00855 write_eof = (woff + n > zp->z_size); 00856 00857 end_size = MAX(zp->z_size, woff + n); 00858 00859 /* 00860 * Write the file in reasonable size chunks. Each chunk is written 00861 * in a separate transaction; this keeps the intent log records small 00862 * and allows us to do more fine-grained space accounting. 00863 */ 00864 while (n > 0) { 00865 abuf = NULL; 00866 woff = uio->uio_loffset; 00867 again: 00868 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 00869 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 00870 if (abuf != NULL) 00871 dmu_return_arcbuf(abuf); 00872 error = EDQUOT; 00873 break; 00874 } 00875 00876 if (xuio && abuf == NULL) { 00877 ASSERT(i_iov < iovcnt); 00878 aiov = &iovp[i_iov]; 00879 abuf = dmu_xuio_arcbuf(xuio, i_iov); 00880 dmu_xuio_clear(xuio, i_iov); 00881 DTRACE_PROBE3(zfs_cp_write, int, i_iov, 00882 iovec_t *, aiov, arc_buf_t *, abuf); 00883 ASSERT((aiov->iov_base == abuf->b_data) || 00884 ((char *)aiov->iov_base - (char *)abuf->b_data + 00885 aiov->iov_len == arc_buf_size(abuf))); 00886 i_iov++; 00887 } else if (abuf == NULL && n >= max_blksz && 00888 woff >= zp->z_size && 00889 P2PHASE(woff, max_blksz) == 0 && 00890 zp->z_blksz == max_blksz) { 00891 /* 00892 * This write covers a full block. "Borrow" a buffer 00893 * from the dmu so that we can fill it before we enter 00894 * a transaction. This avoids the possibility of 00895 * holding up the transaction if the data copy hangs 00896 * up on a pagefault (e.g., from an NFS server mapping). 00897 */ 00898 size_t cbytes; 00899 00900 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 00901 max_blksz); 00902 ASSERT(abuf != NULL); 00903 ASSERT(arc_buf_size(abuf) == max_blksz); 00904 if (error = uiocopy(abuf->b_data, max_blksz, 00905 UIO_WRITE, uio, &cbytes)) { 00906 dmu_return_arcbuf(abuf); 00907 break; 00908 } 00909 ASSERT(cbytes == max_blksz); 00910 } 00911 00912 /* 00913 * Start a transaction. 00914 */ 00915 tx = dmu_tx_create(zfsvfs->z_os); 00916 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 00917 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 00918 zfs_sa_upgrade_txholds(tx, zp); 00919 error = dmu_tx_assign(tx, TXG_NOWAIT); 00920 if (error) { 00921 if (error == ERESTART) { 00922 dmu_tx_wait(tx); 00923 dmu_tx_abort(tx); 00924 goto again; 00925 } 00926 dmu_tx_abort(tx); 00927 if (abuf != NULL) 00928 dmu_return_arcbuf(abuf); 00929 break; 00930 } 00931 00932 /* 00933 * If zfs_range_lock() over-locked we grow the blocksize 00934 * and then reduce the lock range. This will only happen 00935 * on the first iteration since zfs_range_reduce() will 00936 * shrink down r_len to the appropriate size. 00937 */ 00938 if (rl->r_len == UINT64_MAX) { 00939 uint64_t new_blksz; 00940 00941 if (zp->z_blksz > max_blksz) { 00942 ASSERT(!ISP2(zp->z_blksz)); 00943 new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 00944 } else { 00945 new_blksz = MIN(end_size, max_blksz); 00946 } 00947 zfs_grow_blocksize(zp, new_blksz, tx); 00948 zfs_range_reduce(rl, woff, n); 00949 } 00950 00951 /* 00952 * XXX - should we really limit each write to z_max_blksz? 00953 * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 00954 */ 00955 nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 00956 00957 if (woff + nbytes > zp->z_size) 00958 vnode_pager_setsize(vp, woff + nbytes); 00959 00960 if (abuf == NULL) { 00961 tx_bytes = uio->uio_resid; 00962 error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 00963 uio, nbytes, tx); 00964 tx_bytes -= uio->uio_resid; 00965 } else { 00966 tx_bytes = nbytes; 00967 ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 00968 /* 00969 * If this is not a full block write, but we are 00970 * extending the file past EOF and this data starts 00971 * block-aligned, use assign_arcbuf(). Otherwise, 00972 * write via dmu_write(). 00973 */ 00974 if (tx_bytes < max_blksz && (!write_eof || 00975 aiov->iov_base != abuf->b_data)) { 00976 ASSERT(xuio); 00977 dmu_write(zfsvfs->z_os, zp->z_id, woff, 00978 aiov->iov_len, aiov->iov_base, tx); 00979 dmu_return_arcbuf(abuf); 00980 xuio_stat_wbuf_copied(); 00981 } else { 00982 ASSERT(xuio || tx_bytes == max_blksz); 00983 dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 00984 woff, abuf, tx); 00985 } 00986 ASSERT(tx_bytes <= uio->uio_resid); 00987 uioskip(uio, tx_bytes); 00988 } 00989 if (tx_bytes && vn_has_cached_data(vp)) { 00990 update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 00991 zp->z_id, uio->uio_segflg, tx); 00992 } 00993 00994 /* 00995 * If we made no progress, we're done. If we made even 00996 * partial progress, update the znode and ZIL accordingly. 00997 */ 00998 if (tx_bytes == 0) { 00999 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 01000 (void *)&zp->z_size, sizeof (uint64_t), tx); 01001 dmu_tx_commit(tx); 01002 ASSERT(error != 0); 01003 break; 01004 } 01005 01006 /* 01007 * Clear Set-UID/Set-GID bits on successful write if not 01008 * privileged and at least one of the excute bits is set. 01009 * 01010 * It would be nice to to this after all writes have 01011 * been done, but that would still expose the ISUID/ISGID 01012 * to another app after the partial write is committed. 01013 * 01014 * Note: we don't call zfs_fuid_map_id() here because 01015 * user 0 is not an ephemeral uid. 01016 */ 01017 mutex_enter(&zp->z_acl_lock); 01018 if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 01019 (S_IXUSR >> 6))) != 0 && 01020 (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 01021 secpolicy_vnode_setid_retain(vp, cr, 01022 (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 01023 uint64_t newmode; 01024 zp->z_mode &= ~(S_ISUID | S_ISGID); 01025 newmode = zp->z_mode; 01026 (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 01027 (void *)&newmode, sizeof (uint64_t), tx); 01028 } 01029 mutex_exit(&zp->z_acl_lock); 01030 01031 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 01032 B_TRUE); 01033 01034 /* 01035 * Update the file size (zp_size) if it has changed; 01036 * account for possible concurrent updates. 01037 */ 01038 while ((end_size = zp->z_size) < uio->uio_loffset) { 01039 (void) atomic_cas_64(&zp->z_size, end_size, 01040 uio->uio_loffset); 01041 ASSERT(error == 0); 01042 } 01043 /* 01044 * If we are replaying and eof is non zero then force 01045 * the file size to the specified eof. Note, there's no 01046 * concurrency during replay. 01047 */ 01048 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 01049 zp->z_size = zfsvfs->z_replay_eof; 01050 01051 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 01052 01053 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 01054 dmu_tx_commit(tx); 01055 01056 if (error != 0) 01057 break; 01058 ASSERT(tx_bytes == nbytes); 01059 n -= nbytes; 01060 01061 #ifdef sun 01062 if (!xuio && n > 0) 01063 uio_prefaultpages(MIN(n, max_blksz), uio); 01064 #endif /* sun */ 01065 } 01066 01067 zfs_range_unlock(rl); 01068 01069 /* 01070 * If we're in replay mode, or we made no progress, return error. 01071 * Otherwise, it's at least a partial write, so it's successful. 01072 */ 01073 if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 01074 ZFS_EXIT(zfsvfs); 01075 return (error); 01076 } 01077 01078 if (ioflag & (FSYNC | FDSYNC) || 01079 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 01080 zil_commit(zilog, zp->z_id); 01081 01082 ZFS_EXIT(zfsvfs); 01083 return (0); 01084 } 01085 01086 void 01087 zfs_get_done(zgd_t *zgd, int error) 01088 { 01089 znode_t *zp = zgd->zgd_private; 01090 objset_t *os = zp->z_zfsvfs->z_os; 01091 01092 if (zgd->zgd_db) 01093 dmu_buf_rele(zgd->zgd_db, zgd); 01094 01095 zfs_range_unlock(zgd->zgd_rl); 01096 01097 /* 01098 * Release the vnode asynchronously as we currently have the 01099 * txg stopped from syncing. 01100 */ 01101 VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 01102 01103 if (error == 0 && zgd->zgd_bp) 01104 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 01105 01106 kmem_free(zgd, sizeof (zgd_t)); 01107 } 01108 01109 #ifdef DEBUG 01110 static int zil_fault_io = 0; 01111 #endif 01112 01116 int 01117 zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 01118 { 01119 zfsvfs_t *zfsvfs = arg; 01120 objset_t *os = zfsvfs->z_os; 01121 znode_t *zp; 01122 uint64_t object = lr->lr_foid; 01123 uint64_t offset = lr->lr_offset; 01124 uint64_t size = lr->lr_length; 01125 blkptr_t *bp = &lr->lr_blkptr; 01126 dmu_buf_t *db; 01127 zgd_t *zgd; 01128 int error = 0; 01129 01130 ASSERT(zio != NULL); 01131 ASSERT(size != 0); 01132 01133 /* 01134 * Nothing to do if the file has been removed 01135 */ 01136 if (zfs_zget(zfsvfs, object, &zp) != 0) 01137 return (ENOENT); 01138 if (zp->z_unlinked) { 01139 /* 01140 * Release the vnode asynchronously as we currently have the 01141 * txg stopped from syncing. 01142 */ 01143 VN_RELE_ASYNC(ZTOV(zp), 01144 dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 01145 return (ENOENT); 01146 } 01147 01148 zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 01149 zgd->zgd_zilog = zfsvfs->z_log; 01150 zgd->zgd_private = zp; 01151 01152 /* 01153 * Write records come in two flavors: immediate and indirect. 01154 * For small writes it's cheaper to store the data with the 01155 * log record (immediate); for large writes it's cheaper to 01156 * sync the data and get a pointer to it (indirect) so that 01157 * we don't have to write the data twice. 01158 */ 01159 if (buf != NULL) { /* immediate write */ 01160 zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 01161 /* test for truncation needs to be done while range locked */ 01162 if (offset >= zp->z_size) { 01163 error = ENOENT; 01164 } else { 01165 error = dmu_read(os, object, offset, size, buf, 01166 DMU_READ_NO_PREFETCH); 01167 } 01168 ASSERT(error == 0 || error == ENOENT); 01169 } else { /* indirect write */ 01170 /* 01171 * Have to lock the whole block to ensure when it's 01172 * written out and it's checksum is being calculated 01173 * that no one can change the data. We need to re-check 01174 * blocksize after we get the lock in case it's changed! 01175 */ 01176 for (;;) { 01177 uint64_t blkoff; 01178 size = zp->z_blksz; 01179 blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 01180 offset -= blkoff; 01181 zgd->zgd_rl = zfs_range_lock(zp, offset, size, 01182 RL_READER); 01183 if (zp->z_blksz == size) 01184 break; 01185 offset += blkoff; 01186 zfs_range_unlock(zgd->zgd_rl); 01187 } 01188 /* test for truncation needs to be done while range locked */ 01189 if (lr->lr_offset >= zp->z_size) 01190 error = ENOENT; 01191 #ifdef DEBUG 01192 if (zil_fault_io) { 01193 error = EIO; 01194 zil_fault_io = 0; 01195 } 01196 #endif 01197 if (error == 0) 01198 error = dmu_buf_hold(os, object, offset, zgd, &db, 01199 DMU_READ_NO_PREFETCH); 01200 01201 if (error == 0) { 01202 zgd->zgd_db = db; 01203 zgd->zgd_bp = bp; 01204 01205 ASSERT(db->db_offset == offset); 01206 ASSERT(db->db_size == size); 01207 01208 error = dmu_sync(zio, lr->lr_common.lrc_txg, 01209 zfs_get_done, zgd); 01210 ASSERT(error || lr->lr_length <= zp->z_blksz); 01211 01212 /* 01213 * On success, we need to wait for the write I/O 01214 * initiated by dmu_sync() to complete before we can 01215 * release this dbuf. We will finish everything up 01216 * in the zfs_get_done() callback. 01217 */ 01218 if (error == 0) 01219 return (0); 01220 01221 if (error == EALREADY) { 01222 lr->lr_common.lrc_txtype = TX_WRITE2; 01223 error = 0; 01224 } 01225 } 01226 } 01227 01228 zfs_get_done(zgd, error); 01229 01230 return (error); 01231 } 01232 01233 /*ARGSUSED*/ 01234 static int 01235 zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 01236 caller_context_t *ct) 01237 { 01238 znode_t *zp = VTOZ(vp); 01239 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 01240 int error; 01241 01242 ZFS_ENTER(zfsvfs); 01243 ZFS_VERIFY_ZP(zp); 01244 01245 if (flag & V_ACE_MASK) 01246 error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 01247 else 01248 error = zfs_zaccess_rwx(zp, mode, flag, cr); 01249 01250 ZFS_EXIT(zfsvfs); 01251 return (error); 01252 } 01253 01257 static int 01258 specvp_check(vnode_t **vpp, cred_t *cr) 01259 { 01260 int error = 0; 01261 01262 if (IS_DEVVP(*vpp)) { 01263 struct vnode *svp; 01264 01265 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 01266 VN_RELE(*vpp); 01267 if (svp == NULL) 01268 error = ENOSYS; 01269 *vpp = svp; 01270 } 01271 return (error); 01272 } 01273 01274 01290 /* ARGSUSED */ 01291 static int 01292 zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 01293 int nameiop, cred_t *cr, kthread_t *td, int flags) 01294 { 01295 znode_t *zdp = VTOZ(dvp); 01296 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 01297 int error = 0; 01298 int *direntflags = NULL; /* directory lookup flags */ 01299 void *realpnp = NULL; /* returned pathname */ 01300 01301 /* fast path */ 01302 if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 01303 01304 if (dvp->v_type != VDIR) { 01305 return (ENOTDIR); 01306 } else if (zdp->z_sa_hdl == NULL) { 01307 return (EIO); 01308 } 01309 01310 if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 01311 error = zfs_fastaccesschk_execute(zdp, cr); 01312 if (!error) { 01313 *vpp = dvp; 01314 VN_HOLD(*vpp); 01315 return (0); 01316 } 01317 return (error); 01318 } else { 01319 vnode_t *tvp = dnlc_lookup(dvp, nm); 01320 01321 if (tvp) { 01322 error = zfs_fastaccesschk_execute(zdp, cr); 01323 if (error) { 01324 VN_RELE(tvp); 01325 return (error); 01326 } 01327 if (tvp == DNLC_NO_VNODE) { 01328 VN_RELE(tvp); 01329 return (ENOENT); 01330 } else { 01331 *vpp = tvp; 01332 return (specvp_check(vpp, cr)); 01333 } 01334 } 01335 } 01336 } 01337 01338 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 01339 01340 ZFS_ENTER(zfsvfs); 01341 ZFS_VERIFY_ZP(zdp); 01342 01343 *vpp = NULL; 01344 01345 if (flags & LOOKUP_XATTR) { 01346 #ifdef TODO 01347 /* 01348 * If the xattr property is off, refuse the lookup request. 01349 */ 01350 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 01351 ZFS_EXIT(zfsvfs); 01352 return (EINVAL); 01353 } 01354 #endif 01355 01356 /* 01357 * We don't allow recursive attributes.. 01358 * Maybe someday we will. 01359 */ 01360 if (zdp->z_pflags & ZFS_XATTR) { 01361 ZFS_EXIT(zfsvfs); 01362 return (EINVAL); 01363 } 01364 01365 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 01366 ZFS_EXIT(zfsvfs); 01367 return (error); 01368 } 01369 01370 /* 01371 * Do we have permission to get into attribute directory? 01372 */ 01373 01374 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 01375 B_FALSE, cr)) { 01376 VN_RELE(*vpp); 01377 *vpp = NULL; 01378 } 01379 01380 ZFS_EXIT(zfsvfs); 01381 return (error); 01382 } 01383 01384 if (dvp->v_type != VDIR) { 01385 ZFS_EXIT(zfsvfs); 01386 return (ENOTDIR); 01387 } 01388 01389 /* 01390 * Check accessibility of directory. 01391 */ 01392 01393 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 01394 ZFS_EXIT(zfsvfs); 01395 return (error); 01396 } 01397 01398 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 01399 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 01400 ZFS_EXIT(zfsvfs); 01401 return (EILSEQ); 01402 } 01403 01404 error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 01405 if (error == 0) 01406 error = specvp_check(vpp, cr); 01407 01408 /* Translate errors and add SAVENAME when needed. */ 01409 if (cnp->cn_flags & ISLASTCN) { 01410 switch (nameiop) { 01411 case CREATE: 01412 case RENAME: 01413 if (error == ENOENT) { 01414 error = EJUSTRETURN; 01415 cnp->cn_flags |= SAVENAME; 01416 break; 01417 } 01418 /* FALLTHROUGH */ 01419 case DELETE: 01420 if (error == 0) 01421 cnp->cn_flags |= SAVENAME; 01422 break; 01423 } 01424 } 01425 if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { 01426 int ltype = 0; 01427 01428 if (cnp->cn_flags & ISDOTDOT) { 01429 ltype = VOP_ISLOCKED(dvp); 01430 VOP_UNLOCK(dvp, 0); 01431 } 01432 ZFS_EXIT(zfsvfs); 01433 error = zfs_vnode_lock(*vpp, cnp->cn_lkflags); 01434 if (cnp->cn_flags & ISDOTDOT) 01435 vn_lock(dvp, ltype | LK_RETRY); 01436 if (error != 0) { 01437 VN_RELE(*vpp); 01438 *vpp = NULL; 01439 return (error); 01440 } 01441 } else { 01442 ZFS_EXIT(zfsvfs); 01443 } 01444 01445 #ifdef FREEBSD_NAMECACHE 01446 /* 01447 * Insert name into cache (as non-existent) if appropriate. 01448 */ 01449 if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 01450 cache_enter(dvp, *vpp, cnp); 01451 /* 01452 * Insert name into cache if appropriate. 01453 */ 01454 if (error == 0 && (cnp->cn_flags & MAKEENTRY)) { 01455 if (!(cnp->cn_flags & ISLASTCN) || 01456 (nameiop != DELETE && nameiop != RENAME)) { 01457 cache_enter(dvp, *vpp, cnp); 01458 } 01459 } 01460 #endif 01461 01462 return (error); 01463 } 01464 01490 /* ARGSUSED */ 01491 static int 01492 zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 01493 vnode_t **vpp, cred_t *cr, kthread_t *td) 01494 { 01495 znode_t *zp, *dzp = VTOZ(dvp); 01496 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 01497 zilog_t *zilog; 01498 objset_t *os; 01499 zfs_dirlock_t *dl; 01500 dmu_tx_t *tx; 01501 int error; 01502 ksid_t *ksid; 01503 uid_t uid; 01504 gid_t gid = crgetgid(cr); 01505 zfs_acl_ids_t acl_ids; 01506 boolean_t fuid_dirtied; 01507 boolean_t have_acl = B_FALSE; 01508 void *vsecp = NULL; /* ACL to be set */ 01509 int flag = 0; /* Large file flag */ 01510 01511 /* 01512 * If we have an ephemeral id, ACL, or XVATTR then 01513 * make sure file system is at proper version 01514 */ 01515 01516 ksid = crgetsid(cr, KSID_OWNER); 01517 if (ksid) 01518 uid = ksid_getid(ksid); 01519 else 01520 uid = crgetuid(cr); 01521 01522 if (zfsvfs->z_use_fuids == B_FALSE && 01523 (vsecp || (vap->va_mask & AT_XVATTR) || 01524 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 01525 return (EINVAL); 01526 01527 ZFS_ENTER(zfsvfs); 01528 ZFS_VERIFY_ZP(dzp); 01529 os = zfsvfs->z_os; 01530 zilog = zfsvfs->z_log; 01531 01532 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 01533 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 01534 ZFS_EXIT(zfsvfs); 01535 return (EILSEQ); 01536 } 01537 01538 if (vap->va_mask & AT_XVATTR) { 01539 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 01540 crgetuid(cr), cr, vap->va_type)) != 0) { 01541 ZFS_EXIT(zfsvfs); 01542 return (error); 01543 } 01544 } 01545 top: 01546 *vpp = NULL; 01547 01548 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 01549 vap->va_mode &= ~S_ISVTX; 01550 01551 if (*name == '\0') { 01552 /* 01553 * Null component name refers to the directory itself. 01554 */ 01555 VN_HOLD(dvp); 01556 zp = dzp; 01557 dl = NULL; 01558 error = 0; 01559 } else { 01560 /* possible VN_HOLD(zp) */ 01561 int zflg = 0; 01562 01563 if (flag & FIGNORECASE) 01564 zflg |= ZCILOOK; 01565 01566 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 01567 NULL, NULL); 01568 if (error) { 01569 if (have_acl) 01570 zfs_acl_ids_free(&acl_ids); 01571 if (strcmp(name, "..") == 0) 01572 error = EISDIR; 01573 ZFS_EXIT(zfsvfs); 01574 return (error); 01575 } 01576 } 01577 01578 if (zp == NULL) { 01579 uint64_t txtype; 01580 01581 /* 01582 * Create a new file object and update the directory 01583 * to reference it. 01584 */ 01585 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 01586 if (have_acl) 01587 zfs_acl_ids_free(&acl_ids); 01588 goto out; 01589 } 01590 01591 /* 01592 * We only support the creation of regular files in 01593 * extended attribute directories. 01594 */ 01595 01596 if ((dzp->z_pflags & ZFS_XATTR) && 01597 (vap->va_type != VREG)) { 01598 if (have_acl) 01599 zfs_acl_ids_free(&acl_ids); 01600 error = EINVAL; 01601 goto out; 01602 } 01603 01604 if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 01605 cr, vsecp, &acl_ids)) != 0) 01606 goto out; 01607 have_acl = B_TRUE; 01608 01609 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 01610 zfs_acl_ids_free(&acl_ids); 01611 error = EDQUOT; 01612 goto out; 01613 } 01614 01615 tx = dmu_tx_create(os); 01616 01617 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 01618 ZFS_SA_BASE_ATTR_SIZE); 01619 01620 fuid_dirtied = zfsvfs->z_fuid_dirty; 01621 if (fuid_dirtied) 01622 zfs_fuid_txhold(zfsvfs, tx); 01623 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 01624 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 01625 if (!zfsvfs->z_use_sa && 01626 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 01627 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 01628 0, acl_ids.z_aclp->z_acl_bytes); 01629 } 01630 error = dmu_tx_assign(tx, TXG_NOWAIT); 01631 if (error) { 01632 zfs_dirent_unlock(dl); 01633 if (error == ERESTART) { 01634 dmu_tx_wait(tx); 01635 dmu_tx_abort(tx); 01636 goto top; 01637 } 01638 zfs_acl_ids_free(&acl_ids); 01639 dmu_tx_abort(tx); 01640 ZFS_EXIT(zfsvfs); 01641 return (error); 01642 } 01643 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 01644 01645 if (fuid_dirtied) 01646 zfs_fuid_sync(zfsvfs, tx); 01647 01648 (void) zfs_link_create(dl, zp, tx, ZNEW); 01649 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 01650 if (flag & FIGNORECASE) 01651 txtype |= TX_CI; 01652 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 01653 vsecp, acl_ids.z_fuidp, vap); 01654 zfs_acl_ids_free(&acl_ids); 01655 dmu_tx_commit(tx); 01656 } else { 01657 int aflags = (flag & FAPPEND) ? V_APPEND : 0; 01658 01659 if (have_acl) 01660 zfs_acl_ids_free(&acl_ids); 01661 have_acl = B_FALSE; 01662 01663 /* 01664 * A directory entry already exists for this name. 01665 */ 01666 /* 01667 * Can't truncate an existing file if in exclusive mode. 01668 */ 01669 if (excl == EXCL) { 01670 error = EEXIST; 01671 goto out; 01672 } 01673 /* 01674 * Can't open a directory for writing. 01675 */ 01676 if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 01677 error = EISDIR; 01678 goto out; 01679 } 01680 /* 01681 * Verify requested access to file. 01682 */ 01683 if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 01684 goto out; 01685 } 01686 01687 mutex_enter(&dzp->z_lock); 01688 dzp->z_seq++; 01689 mutex_exit(&dzp->z_lock); 01690 01691 /* 01692 * Truncate regular files if requested. 01693 */ 01694 if ((ZTOV(zp)->v_type == VREG) && 01695 (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 01696 /* we can't hold any locks when calling zfs_freesp() */ 01697 zfs_dirent_unlock(dl); 01698 dl = NULL; 01699 error = zfs_freesp(zp, 0, 0, mode, TRUE); 01700 if (error == 0) { 01701 vnevent_create(ZTOV(zp), ct); 01702 } 01703 } 01704 } 01705 out: 01706 if (dl) 01707 zfs_dirent_unlock(dl); 01708 01709 if (error) { 01710 if (zp) 01711 VN_RELE(ZTOV(zp)); 01712 } else { 01713 *vpp = ZTOV(zp); 01714 error = specvp_check(vpp, cr); 01715 } 01716 01717 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 01718 zil_commit(zilog, 0); 01719 01720 ZFS_EXIT(zfsvfs); 01721 return (error); 01722 } 01723 01724 uint64_t null_xattr = 0; 01725 01742 /*ARGSUSED*/ 01743 static int 01744 zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 01745 int flags) 01746 { 01747 znode_t *zp, *dzp = VTOZ(dvp); 01748 znode_t *xzp; 01749 vnode_t *vp; 01750 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 01751 zilog_t *zilog; 01752 uint64_t acl_obj, xattr_obj; 01753 uint64_t xattr_obj_unlinked = 0; 01754 uint64_t obj = 0; 01755 zfs_dirlock_t *dl; 01756 dmu_tx_t *tx; 01757 boolean_t may_delete_now, delete_now = FALSE; 01758 boolean_t unlinked, toobig = FALSE; 01759 uint64_t txtype; 01760 pathname_t *realnmp = NULL; 01761 pathname_t realnm; 01762 int error; 01763 int zflg = ZEXISTS; 01764 01765 ZFS_ENTER(zfsvfs); 01766 ZFS_VERIFY_ZP(dzp); 01767 zilog = zfsvfs->z_log; 01768 01769 if (flags & FIGNORECASE) { 01770 zflg |= ZCILOOK; 01771 pn_alloc(&realnm); 01772 realnmp = &realnm; 01773 } 01774 01775 top: 01776 xattr_obj = 0; 01777 xzp = NULL; 01778 /* 01779 * Attempt to lock directory; fail if entry doesn't exist. 01780 */ 01781 if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 01782 NULL, realnmp)) { 01783 if (realnmp) 01784 pn_free(realnmp); 01785 ZFS_EXIT(zfsvfs); 01786 return (error); 01787 } 01788 01789 vp = ZTOV(zp); 01790 01791 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 01792 goto out; 01793 } 01794 01795 /* 01796 * Need to use rmdir for removing directories. 01797 */ 01798 if (vp->v_type == VDIR) { 01799 error = EPERM; 01800 goto out; 01801 } 01802 01803 vnevent_remove(vp, dvp, name, ct); 01804 01805 if (realnmp) 01806 dnlc_remove(dvp, realnmp->pn_buf); 01807 else 01808 dnlc_remove(dvp, name); 01809 01810 VI_LOCK(vp); 01811 may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 01812 VI_UNLOCK(vp); 01813 01814 /* 01815 * We may delete the znode now, or we may put it in the unlinked set; 01816 * it depends on whether we're the last link, and on whether there are 01817 * other holds on the vnode. So we dmu_tx_hold() the right things to 01818 * allow for either case. 01819 */ 01820 obj = zp->z_id; 01821 tx = dmu_tx_create(zfsvfs->z_os); 01822 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 01823 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 01824 zfs_sa_upgrade_txholds(tx, zp); 01825 zfs_sa_upgrade_txholds(tx, dzp); 01826 if (may_delete_now) { 01827 toobig = 01828 zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 01829 /* if the file is too big, only hold_free a token amount */ 01830 dmu_tx_hold_free(tx, zp->z_id, 0, 01831 (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 01832 } 01833 01834 /* are there any extended attributes? */ 01835 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 01836 &xattr_obj, sizeof (xattr_obj)); 01837 if (error == 0 && xattr_obj) { 01838 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 01839 ASSERT0(error); 01840 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 01841 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 01842 } 01843 01844 mutex_enter(&zp->z_lock); 01845 if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 01846 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 01847 mutex_exit(&zp->z_lock); 01848 01849 /* charge as an update -- would be nice not to charge at all */ 01850 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 01851 01852 error = dmu_tx_assign(tx, TXG_NOWAIT); 01853 if (error) { 01854 zfs_dirent_unlock(dl); 01855 VN_RELE(vp); 01856 if (xzp) 01857 VN_RELE(ZTOV(xzp)); 01858 if (error == ERESTART) { 01859 dmu_tx_wait(tx); 01860 dmu_tx_abort(tx); 01861 goto top; 01862 } 01863 if (realnmp) 01864 pn_free(realnmp); 01865 dmu_tx_abort(tx); 01866 ZFS_EXIT(zfsvfs); 01867 return (error); 01868 } 01869 01870 /* 01871 * Remove the directory entry. 01872 */ 01873 error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 01874 01875 if (error) { 01876 dmu_tx_commit(tx); 01877 goto out; 01878 } 01879 01880 if (unlinked) { 01881 01882 /* 01883 * Hold z_lock so that we can make sure that the ACL obj 01884 * hasn't changed. Could have been deleted due to 01885 * zfs_sa_upgrade(). 01886 */ 01887 mutex_enter(&zp->z_lock); 01888 VI_LOCK(vp); 01889 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 01890 &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 01891 delete_now = may_delete_now && !toobig && 01892 vp->v_count == 1 && !vn_has_cached_data(vp) && 01893 xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 01894 acl_obj; 01895 VI_UNLOCK(vp); 01896 } 01897 01898 if (delete_now) { 01899 if (xattr_obj_unlinked) { 01900 ASSERT3U(xzp->z_links, ==, 2); 01901 mutex_enter(&xzp->z_lock); 01902 xzp->z_unlinked = 1; 01903 xzp->z_links = 0; 01904 error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 01905 &xzp->z_links, sizeof (xzp->z_links), tx); 01906 ASSERT3U(error, ==, 0); 01907 mutex_exit(&xzp->z_lock); 01908 zfs_unlinked_add(xzp, tx); 01909 01910 if (zp->z_is_sa) 01911 error = sa_remove(zp->z_sa_hdl, 01912 SA_ZPL_XATTR(zfsvfs), tx); 01913 else 01914 error = sa_update(zp->z_sa_hdl, 01915 SA_ZPL_XATTR(zfsvfs), &null_xattr, 01916 sizeof (uint64_t), tx); 01917 ASSERT0(error); 01918 } 01919 VI_LOCK(vp); 01920 vp->v_count--; 01921 ASSERT0(vp->v_count); 01922 VI_UNLOCK(vp); 01923 mutex_exit(&zp->z_lock); 01924 zfs_znode_delete(zp, tx); 01925 } else if (unlinked) { 01926 mutex_exit(&zp->z_lock); 01927 zfs_unlinked_add(zp, tx); 01928 } 01929 01930 txtype = TX_REMOVE; 01931 if (flags & FIGNORECASE) 01932 txtype |= TX_CI; 01933 zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 01934 01935 dmu_tx_commit(tx); 01936 out: 01937 if (realnmp) 01938 pn_free(realnmp); 01939 01940 zfs_dirent_unlock(dl); 01941 01942 if (!delete_now) 01943 VN_RELE(vp); 01944 if (xzp) 01945 VN_RELE(ZTOV(xzp)); 01946 01947 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 01948 zil_commit(zilog, 0); 01949 01950 ZFS_EXIT(zfsvfs); 01951 return (error); 01952 } 01953 01974 /*ARGSUSED*/ 01975 static int 01976 zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 01977 caller_context_t *ct, int flags, vsecattr_t *vsecp) 01978 { 01979 znode_t *zp, *dzp = VTOZ(dvp); 01980 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 01981 zilog_t *zilog; 01982 zfs_dirlock_t *dl; 01983 uint64_t txtype; 01984 dmu_tx_t *tx; 01985 int error; 01986 int zf = ZNEW; 01987 ksid_t *ksid; 01988 uid_t uid; 01989 gid_t gid = crgetgid(cr); 01990 zfs_acl_ids_t acl_ids; 01991 boolean_t fuid_dirtied; 01992 01993 ASSERT(vap->va_type == VDIR); 01994 01995 /* 01996 * If we have an ephemeral id, ACL, or XVATTR then 01997 * make sure file system is at proper version 01998 */ 01999 02000 ksid = crgetsid(cr, KSID_OWNER); 02001 if (ksid) 02002 uid = ksid_getid(ksid); 02003 else 02004 uid = crgetuid(cr); 02005 if (zfsvfs->z_use_fuids == B_FALSE && 02006 (vsecp || (vap->va_mask & AT_XVATTR) || 02007 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 02008 return (EINVAL); 02009 02010 ZFS_ENTER(zfsvfs); 02011 ZFS_VERIFY_ZP(dzp); 02012 zilog = zfsvfs->z_log; 02013 02014 if (dzp->z_pflags & ZFS_XATTR) { 02015 ZFS_EXIT(zfsvfs); 02016 return (EINVAL); 02017 } 02018 02019 if (zfsvfs->z_utf8 && u8_validate(dirname, 02020 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 02021 ZFS_EXIT(zfsvfs); 02022 return (EILSEQ); 02023 } 02024 if (flags & FIGNORECASE) 02025 zf |= ZCILOOK; 02026 02027 if (vap->va_mask & AT_XVATTR) { 02028 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 02029 crgetuid(cr), cr, vap->va_type)) != 0) { 02030 ZFS_EXIT(zfsvfs); 02031 return (error); 02032 } 02033 } 02034 02035 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 02036 vsecp, &acl_ids)) != 0) { 02037 ZFS_EXIT(zfsvfs); 02038 return (error); 02039 } 02040 /* 02041 * First make sure the new directory doesn't exist. 02042 * 02043 * Existence is checked first to make sure we don't return 02044 * EACCES instead of EEXIST which can cause some applications 02045 * to fail. 02046 */ 02047 top: 02048 *vpp = NULL; 02049 02050 if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 02051 NULL, NULL)) { 02052 zfs_acl_ids_free(&acl_ids); 02053 ZFS_EXIT(zfsvfs); 02054 return (error); 02055 } 02056 02057 if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 02058 zfs_acl_ids_free(&acl_ids); 02059 zfs_dirent_unlock(dl); 02060 ZFS_EXIT(zfsvfs); 02061 return (error); 02062 } 02063 02064 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 02065 zfs_acl_ids_free(&acl_ids); 02066 zfs_dirent_unlock(dl); 02067 ZFS_EXIT(zfsvfs); 02068 return (EDQUOT); 02069 } 02070 02071 /* 02072 * Add a new entry to the directory. 02073 */ 02074 tx = dmu_tx_create(zfsvfs->z_os); 02075 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 02076 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 02077 fuid_dirtied = zfsvfs->z_fuid_dirty; 02078 if (fuid_dirtied) 02079 zfs_fuid_txhold(zfsvfs, tx); 02080 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 02081 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 02082 acl_ids.z_aclp->z_acl_bytes); 02083 } 02084 02085 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 02086 ZFS_SA_BASE_ATTR_SIZE); 02087 02088 error = dmu_tx_assign(tx, TXG_NOWAIT); 02089 if (error) { 02090 zfs_dirent_unlock(dl); 02091 if (error == ERESTART) { 02092 dmu_tx_wait(tx); 02093 dmu_tx_abort(tx); 02094 goto top; 02095 } 02096 zfs_acl_ids_free(&acl_ids); 02097 dmu_tx_abort(tx); 02098 ZFS_EXIT(zfsvfs); 02099 return (error); 02100 } 02101 02102 /* 02103 * Create new node. 02104 */ 02105 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 02106 02107 if (fuid_dirtied) 02108 zfs_fuid_sync(zfsvfs, tx); 02109 02110 /* 02111 * Now put new name in parent dir. 02112 */ 02113 (void) zfs_link_create(dl, zp, tx, ZNEW); 02114 02115 *vpp = ZTOV(zp); 02116 02117 txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 02118 if (flags & FIGNORECASE) 02119 txtype |= TX_CI; 02120 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 02121 acl_ids.z_fuidp, vap); 02122 02123 zfs_acl_ids_free(&acl_ids); 02124 02125 dmu_tx_commit(tx); 02126 02127 zfs_dirent_unlock(dl); 02128 02129 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 02130 zil_commit(zilog, 0); 02131 02132 ZFS_EXIT(zfsvfs); 02133 return (0); 02134 } 02135 02154 /*ARGSUSED*/ 02155 static int 02156 zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 02157 caller_context_t *ct, int flags) 02158 { 02159 znode_t *dzp = VTOZ(dvp); 02160 znode_t *zp; 02161 vnode_t *vp; 02162 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 02163 zilog_t *zilog; 02164 zfs_dirlock_t *dl; 02165 dmu_tx_t *tx; 02166 int error; 02167 int zflg = ZEXISTS; 02168 02169 ZFS_ENTER(zfsvfs); 02170 ZFS_VERIFY_ZP(dzp); 02171 zilog = zfsvfs->z_log; 02172 02173 if (flags & FIGNORECASE) 02174 zflg |= ZCILOOK; 02175 top: 02176 zp = NULL; 02177 02178 /* 02179 * Attempt to lock directory; fail if entry doesn't exist. 02180 */ 02181 if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 02182 NULL, NULL)) { 02183 ZFS_EXIT(zfsvfs); 02184 return (error); 02185 } 02186 02187 vp = ZTOV(zp); 02188 02189 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 02190 goto out; 02191 } 02192 02193 if (vp->v_type != VDIR) { 02194 error = ENOTDIR; 02195 goto out; 02196 } 02197 02198 if (vp == cwd) { 02199 error = EINVAL; 02200 goto out; 02201 } 02202 02203 vnevent_rmdir(vp, dvp, name, ct); 02204 02205 /* 02206 * Grab a lock on the directory to make sure that noone is 02207 * trying to add (or lookup) entries while we are removing it. 02208 */ 02209 rw_enter(&zp->z_name_lock, RW_WRITER); 02210 02211 /* 02212 * Grab a lock on the parent pointer to make sure we play well 02213 * with the treewalk and directory rename code. 02214 */ 02215 rw_enter(&zp->z_parent_lock, RW_WRITER); 02216 02217 tx = dmu_tx_create(zfsvfs->z_os); 02218 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 02219 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 02220 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 02221 zfs_sa_upgrade_txholds(tx, zp); 02222 zfs_sa_upgrade_txholds(tx, dzp); 02223 error = dmu_tx_assign(tx, TXG_NOWAIT); 02224 if (error) { 02225 rw_exit(&zp->z_parent_lock); 02226 rw_exit(&zp->z_name_lock); 02227 zfs_dirent_unlock(dl); 02228 VN_RELE(vp); 02229 if (error == ERESTART) { 02230 dmu_tx_wait(tx); 02231 dmu_tx_abort(tx); 02232 goto top; 02233 } 02234 dmu_tx_abort(tx); 02235 ZFS_EXIT(zfsvfs); 02236 return (error); 02237 } 02238 02239 #ifdef FREEBSD_NAMECACHE 02240 cache_purge(dvp); 02241 #endif 02242 02243 error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 02244 02245 if (error == 0) { 02246 uint64_t txtype = TX_RMDIR; 02247 if (flags & FIGNORECASE) 02248 txtype |= TX_CI; 02249 zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 02250 } 02251 02252 dmu_tx_commit(tx); 02253 02254 rw_exit(&zp->z_parent_lock); 02255 rw_exit(&zp->z_name_lock); 02256 #ifdef FREEBSD_NAMECACHE 02257 cache_purge(vp); 02258 #endif 02259 out: 02260 zfs_dirent_unlock(dl); 02261 02262 VN_RELE(vp); 02263 02264 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 02265 zil_commit(zilog, 0); 02266 02267 ZFS_EXIT(zfsvfs); 02268 return (error); 02269 } 02270 02298 /* ARGSUSED */ 02299 static int 02300 zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, 02301 u_long **cookies) 02302 { 02303 znode_t *zp = VTOZ(vp); 02304 iovec_t *iovp; 02305 edirent_t *eodp; 02306 dirent64_t *odp; 02307 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 02308 objset_t *os; 02309 caddr_t outbuf; 02310 size_t bufsize; 02311 zap_cursor_t zc; 02312 zap_attribute_t zap; 02313 uint_t bytes_wanted; 02314 uint64_t offset; /* must be unsigned; checks for < 1 */ 02315 uint64_t parent; 02316 int local_eof; 02317 int outcount; 02318 int error; 02319 uint8_t prefetch; 02320 boolean_t check_sysattrs; 02321 uint8_t type; 02322 int ncooks; 02323 u_long *cooks = NULL; 02324 int flags = 0; /* case flags */ 02325 02326 ZFS_ENTER(zfsvfs); 02327 ZFS_VERIFY_ZP(zp); 02328 02329 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 02330 &parent, sizeof (parent))) != 0) { 02331 ZFS_EXIT(zfsvfs); 02332 return (error); 02333 } 02334 02335 /* 02336 * If we are not given an eof variable, 02337 * use a local one. 02338 */ 02339 if (eofp == NULL) 02340 eofp = &local_eof; 02341 02342 /* 02343 * Check for valid iov_len. 02344 */ 02345 if (uio->uio_iov->iov_len <= 0) { 02346 ZFS_EXIT(zfsvfs); 02347 return (EINVAL); 02348 } 02349 02350 /* 02351 * Quit if directory has been removed (posix) 02352 */ 02353 if ((*eofp = zp->z_unlinked) != 0) { 02354 ZFS_EXIT(zfsvfs); 02355 return (0); 02356 } 02357 02358 error = 0; 02359 os = zfsvfs->z_os; 02360 offset = uio->uio_loffset; 02361 prefetch = zp->z_zn_prefetch; 02362 02363 /* 02364 * Initialize the iterator cursor. 02365 */ 02366 if (offset <= 3) { 02367 /* 02368 * Start iteration from the beginning of the directory. 02369 */ 02370 zap_cursor_init(&zc, os, zp->z_id); 02371 } else { 02372 /* 02373 * The offset is a serialized cursor. 02374 */ 02375 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 02376 } 02377 02378 /* 02379 * Get space to change directory entries into fs independent format. 02380 */ 02381 iovp = uio->uio_iov; 02382 bytes_wanted = iovp->iov_len; 02383 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 02384 bufsize = bytes_wanted; 02385 outbuf = kmem_alloc(bufsize, KM_SLEEP); 02386 odp = (struct dirent64 *)outbuf; 02387 } else { 02388 bufsize = bytes_wanted; 02389 odp = (struct dirent64 *)iovp->iov_base; 02390 } 02391 eodp = (struct edirent *)odp; 02392 02393 if (ncookies != NULL) { 02394 /* 02395 * Minimum entry size is dirent size and 1 byte for a file name. 02396 */ 02397 ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 02398 cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 02399 *cookies = cooks; 02400 *ncookies = ncooks; 02401 } 02402 /* 02403 * If this VFS supports the system attribute view interface; and 02404 * we're looking at an extended attribute directory; and we care 02405 * about normalization conflicts on this vfs; then we must check 02406 * for normalization conflicts with the sysattr name space. 02407 */ 02408 #ifdef TODO 02409 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 02410 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 02411 (flags & V_RDDIR_ENTFLAGS); 02412 #else 02413 check_sysattrs = 0; 02414 #endif 02415 02416 /* 02417 * Transform to file-system independent format 02418 */ 02419 outcount = 0; 02420 while (outcount < bytes_wanted) { 02421 ino64_t objnum; 02422 ushort_t reclen; 02423 off64_t *next = NULL; 02424 02425 /* 02426 * Special case `.', `..', and `.zfs'. 02427 */ 02428 if (offset == 0) { 02429 (void) strcpy(zap.za_name, "."); 02430 zap.za_normalization_conflict = 0; 02431 objnum = zp->z_id; 02432 type = DT_DIR; 02433 } else if (offset == 1) { 02434 (void) strcpy(zap.za_name, ".."); 02435 zap.za_normalization_conflict = 0; 02436 objnum = parent; 02437 type = DT_DIR; 02438 } else if (offset == 2 && zfs_show_ctldir(zp)) { 02439 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 02440 zap.za_normalization_conflict = 0; 02441 objnum = ZFSCTL_INO_ROOT; 02442 type = DT_DIR; 02443 } else { 02444 /* 02445 * Grab next entry. 02446 */ 02447 if (error = zap_cursor_retrieve(&zc, &zap)) { 02448 if ((*eofp = (error == ENOENT)) != 0) 02449 break; 02450 else 02451 goto update; 02452 } 02453 02454 if (zap.za_integer_length != 8 || 02455 zap.za_num_integers != 1) { 02456 cmn_err(CE_WARN, "zap_readdir: bad directory " 02457 "entry, obj = %lld, offset = %lld\n", 02458 (u_longlong_t)zp->z_id, 02459 (u_longlong_t)offset); 02460 error = ENXIO; 02461 goto update; 02462 } 02463 02464 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 02465 /* 02466 * MacOS X can extract the object type here such as: 02467 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 02468 */ 02469 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 02470 02471 if (check_sysattrs && !zap.za_normalization_conflict) { 02472 #ifdef TODO 02473 zap.za_normalization_conflict = 02474 xattr_sysattr_casechk(zap.za_name); 02475 #else 02476 panic("%s:%u: TODO", __func__, __LINE__); 02477 #endif 02478 } 02479 } 02480 02481 if (flags & V_RDDIR_ACCFILTER) { 02482 /* 02483 * If we have no access at all, don't include 02484 * this entry in the returned information 02485 */ 02486 znode_t *ezp; 02487 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 02488 goto skip_entry; 02489 if (!zfs_has_access(ezp, cr)) { 02490 VN_RELE(ZTOV(ezp)); 02491 goto skip_entry; 02492 } 02493 VN_RELE(ZTOV(ezp)); 02494 } 02495 02496 if (flags & V_RDDIR_ENTFLAGS) 02497 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 02498 else 02499 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 02500 02501 /* 02502 * Will this entry fit in the buffer? 02503 */ 02504 if (outcount + reclen > bufsize) { 02505 /* 02506 * Did we manage to fit anything in the buffer? 02507 */ 02508 if (!outcount) { 02509 error = EINVAL; 02510 goto update; 02511 } 02512 break; 02513 } 02514 if (flags & V_RDDIR_ENTFLAGS) { 02515 /* 02516 * Add extended flag entry: 02517 */ 02518 eodp->ed_ino = objnum; 02519 eodp->ed_reclen = reclen; 02520 /* NOTE: ed_off is the offset for the *next* entry */ 02521 next = &(eodp->ed_off); 02522 eodp->ed_eflags = zap.za_normalization_conflict ? 02523 ED_CASE_CONFLICT : 0; 02524 (void) strncpy(eodp->ed_name, zap.za_name, 02525 EDIRENT_NAMELEN(reclen)); 02526 eodp = (edirent_t *)((intptr_t)eodp + reclen); 02527 } else { 02528 /* 02529 * Add normal entry: 02530 */ 02531 odp->d_ino = objnum; 02532 odp->d_reclen = reclen; 02533 odp->d_namlen = strlen(zap.za_name); 02534 (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 02535 odp->d_type = type; 02536 odp = (dirent64_t *)((intptr_t)odp + reclen); 02537 } 02538 outcount += reclen; 02539 02540 ASSERT(outcount <= bufsize); 02541 02542 /* Prefetch znode */ 02543 if (prefetch) 02544 dmu_prefetch(os, objnum, 0, 0); 02545 02546 skip_entry: 02547 /* 02548 * Move to the next entry, fill in the previous offset. 02549 */ 02550 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 02551 zap_cursor_advance(&zc); 02552 offset = zap_cursor_serialize(&zc); 02553 } else { 02554 offset += 1; 02555 } 02556 02557 if (cooks != NULL) { 02558 *cooks++ = offset; 02559 ncooks--; 02560 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 02561 } 02562 } 02563 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 02564 02565 /* Subtract unused cookies */ 02566 if (ncookies != NULL) 02567 *ncookies -= ncooks; 02568 02569 if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 02570 iovp->iov_base += outcount; 02571 iovp->iov_len -= outcount; 02572 uio->uio_resid -= outcount; 02573 } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 02574 /* 02575 * Reset the pointer. 02576 */ 02577 offset = uio->uio_loffset; 02578 } 02579 02580 update: 02581 zap_cursor_fini(&zc); 02582 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 02583 kmem_free(outbuf, bufsize); 02584 02585 if (error == ENOENT) 02586 error = 0; 02587 02588 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 02589 02590 uio->uio_loffset = offset; 02591 ZFS_EXIT(zfsvfs); 02592 if (error != 0 && cookies != NULL) { 02593 free(*cookies, M_TEMP); 02594 *cookies = NULL; 02595 *ncookies = 0; 02596 } 02597 return (error); 02598 } 02599 02600 ulong_t zfs_fsync_sync_cnt = 4; 02601 02602 static int 02603 zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 02604 { 02605 znode_t *zp = VTOZ(vp); 02606 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 02607 02608 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 02609 02610 if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 02611 ZFS_ENTER(zfsvfs); 02612 ZFS_VERIFY_ZP(zp); 02613 zil_commit(zfsvfs->z_log, zp->z_id); 02614 ZFS_EXIT(zfsvfs); 02615 } 02616 return (0); 02617 } 02618 02619 02634 /* ARGSUSED */ 02635 static int 02636 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 02637 caller_context_t *ct) 02638 { 02639 znode_t *zp = VTOZ(vp); 02640 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 02641 int error = 0; 02642 uint32_t blksize; 02643 u_longlong_t nblocks; 02644 uint64_t links; 02645 uint64_t mtime[2], ctime[2], crtime[2], rdev; 02646 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 02647 xoptattr_t *xoap = NULL; 02648 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 02649 sa_bulk_attr_t bulk[4]; 02650 int count = 0; 02651 02652 ZFS_ENTER(zfsvfs); 02653 ZFS_VERIFY_ZP(zp); 02654 02655 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 02656 02657 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 02658 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 02659 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &crtime, 16); 02660 if (vp->v_type == VBLK || vp->v_type == VCHR) 02661 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 02662 &rdev, 8); 02663 02664 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 02665 ZFS_EXIT(zfsvfs); 02666 return (error); 02667 } 02668 02669 /* 02670 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 02671 * Also, if we are the owner don't bother, since owner should 02672 * always be allowed to read basic attributes of file. 02673 */ 02674 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 02675 (vap->va_uid != crgetuid(cr))) { 02676 if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 02677 skipaclchk, cr)) { 02678 ZFS_EXIT(zfsvfs); 02679 return (error); 02680 } 02681 } 02682 02683 /* 02684 * Return all attributes. It's cheaper to provide the answer 02685 * than to determine whether we were asked the question. 02686 */ 02687 02688 mutex_enter(&zp->z_lock); 02689 vap->va_type = IFTOVT(zp->z_mode); 02690 vap->va_mode = zp->z_mode & ~S_IFMT; 02691 #ifdef sun 02692 vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 02693 #else 02694 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 02695 #endif 02696 vap->va_nodeid = zp->z_id; 02697 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 02698 links = zp->z_links + 1; 02699 else 02700 links = zp->z_links; 02701 vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 02702 vap->va_size = zp->z_size; 02703 #ifdef sun 02704 vap->va_rdev = vp->v_rdev; 02705 #else 02706 if (vp->v_type == VBLK || vp->v_type == VCHR) 02707 vap->va_rdev = zfs_cmpldev(rdev); 02708 #endif 02709 vap->va_seq = zp->z_seq; 02710 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 02711 02712 /* 02713 * Add in any requested optional attributes and the create time. 02714 * Also set the corresponding bits in the returned attribute bitmap. 02715 */ 02716 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 02717 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 02718 xoap->xoa_archive = 02719 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 02720 XVA_SET_RTN(xvap, XAT_ARCHIVE); 02721 } 02722 02723 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 02724 xoap->xoa_readonly = 02725 ((zp->z_pflags & ZFS_READONLY) != 0); 02726 XVA_SET_RTN(xvap, XAT_READONLY); 02727 } 02728 02729 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 02730 xoap->xoa_system = 02731 ((zp->z_pflags & ZFS_SYSTEM) != 0); 02732 XVA_SET_RTN(xvap, XAT_SYSTEM); 02733 } 02734 02735 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 02736 xoap->xoa_hidden = 02737 ((zp->z_pflags & ZFS_HIDDEN) != 0); 02738 XVA_SET_RTN(xvap, XAT_HIDDEN); 02739 } 02740 02741 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 02742 xoap->xoa_nounlink = 02743 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 02744 XVA_SET_RTN(xvap, XAT_NOUNLINK); 02745 } 02746 02747 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 02748 xoap->xoa_immutable = 02749 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 02750 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 02751 } 02752 02753 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 02754 xoap->xoa_appendonly = 02755 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 02756 XVA_SET_RTN(xvap, XAT_APPENDONLY); 02757 } 02758 02759 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 02760 xoap->xoa_nodump = 02761 ((zp->z_pflags & ZFS_NODUMP) != 0); 02762 XVA_SET_RTN(xvap, XAT_NODUMP); 02763 } 02764 02765 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 02766 xoap->xoa_opaque = 02767 ((zp->z_pflags & ZFS_OPAQUE) != 0); 02768 XVA_SET_RTN(xvap, XAT_OPAQUE); 02769 } 02770 02771 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 02772 xoap->xoa_av_quarantined = 02773 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 02774 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 02775 } 02776 02777 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 02778 xoap->xoa_av_modified = 02779 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 02780 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 02781 } 02782 02783 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 02784 vp->v_type == VREG) { 02785 zfs_sa_get_scanstamp(zp, xvap); 02786 } 02787 02788 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 02789 uint64_t times[2]; 02790 02791 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 02792 times, sizeof (times)); 02793 ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 02794 XVA_SET_RTN(xvap, XAT_CREATETIME); 02795 } 02796 02797 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 02798 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 02799 XVA_SET_RTN(xvap, XAT_REPARSE); 02800 } 02801 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 02802 xoap->xoa_generation = zp->z_gen; 02803 XVA_SET_RTN(xvap, XAT_GEN); 02804 } 02805 02806 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 02807 xoap->xoa_offline = 02808 ((zp->z_pflags & ZFS_OFFLINE) != 0); 02809 XVA_SET_RTN(xvap, XAT_OFFLINE); 02810 } 02811 02812 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 02813 xoap->xoa_sparse = 02814 ((zp->z_pflags & ZFS_SPARSE) != 0); 02815 XVA_SET_RTN(xvap, XAT_SPARSE); 02816 } 02817 } 02818 02819 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 02820 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 02821 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 02822 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 02823 02824 mutex_exit(&zp->z_lock); 02825 02826 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 02827 vap->va_blksize = blksize; 02828 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 02829 02830 if (zp->z_blksz == 0) { 02831 /* 02832 * Block size hasn't been set; suggest maximal I/O transfers. 02833 */ 02834 vap->va_blksize = zfsvfs->z_max_blksz; 02835 } 02836 02837 ZFS_EXIT(zfsvfs); 02838 return (0); 02839 } 02840 02861 /* ARGSUSED */ 02862 static int 02863 zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 02864 caller_context_t *ct) 02865 { 02866 znode_t *zp = VTOZ(vp); 02867 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 02868 zilog_t *zilog; 02869 dmu_tx_t *tx; 02870 vattr_t oldva; 02871 xvattr_t tmpxvattr; 02872 uint_t mask = vap->va_mask; 02873 uint_t saved_mask; 02874 uint64_t saved_mode; 02875 int trim_mask = 0; 02876 uint64_t new_mode; 02877 uint64_t new_uid, new_gid; 02878 uint64_t xattr_obj; 02879 uint64_t mtime[2], ctime[2]; 02880 znode_t *attrzp; 02881 int need_policy = FALSE; 02882 int err, err2; 02883 zfs_fuid_info_t *fuidp = NULL; 02884 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 02885 xoptattr_t *xoap; 02886 zfs_acl_t *aclp; 02887 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 02888 boolean_t fuid_dirtied = B_FALSE; 02889 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 02890 int count = 0, xattr_count = 0; 02891 02892 if (mask == 0) 02893 return (0); 02894 02895 if (mask & AT_NOSET) 02896 return (EINVAL); 02897 02898 ZFS_ENTER(zfsvfs); 02899 ZFS_VERIFY_ZP(zp); 02900 02901 zilog = zfsvfs->z_log; 02902 02903 /* 02904 * Make sure that if we have ephemeral uid/gid or xvattr specified 02905 * that file system is at proper version level 02906 */ 02907 02908 if (zfsvfs->z_use_fuids == B_FALSE && 02909 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 02910 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 02911 (mask & AT_XVATTR))) { 02912 ZFS_EXIT(zfsvfs); 02913 return (EINVAL); 02914 } 02915 02916 if (mask & AT_SIZE && vp->v_type == VDIR) { 02917 ZFS_EXIT(zfsvfs); 02918 return (EISDIR); 02919 } 02920 02921 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 02922 ZFS_EXIT(zfsvfs); 02923 return (EINVAL); 02924 } 02925 02926 /* 02927 * If this is an xvattr_t, then get a pointer to the structure of 02928 * optional attributes. If this is NULL, then we have a vattr_t. 02929 */ 02930 xoap = xva_getxoptattr(xvap); 02931 02932 xva_init(&tmpxvattr); 02933 02934 /* 02935 * Immutable files can only alter immutable bit and atime 02936 */ 02937 if ((zp->z_pflags & ZFS_IMMUTABLE) && 02938 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 02939 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 02940 ZFS_EXIT(zfsvfs); 02941 return (EPERM); 02942 } 02943 02944 if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 02945 ZFS_EXIT(zfsvfs); 02946 return (EPERM); 02947 } 02948 02949 /* 02950 * Verify timestamps doesn't overflow 32 bits. 02951 * ZFS can handle large timestamps, but 32bit syscalls can't 02952 * handle times greater than 2039. This check should be removed 02953 * once large timestamps are fully supported. 02954 */ 02955 if (mask & (AT_ATIME | AT_MTIME)) { 02956 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 02957 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 02958 ZFS_EXIT(zfsvfs); 02959 return (EOVERFLOW); 02960 } 02961 } 02962 02963 top: 02964 attrzp = NULL; 02965 aclp = NULL; 02966 02967 /* Can this be moved to before the top label? */ 02968 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 02969 ZFS_EXIT(zfsvfs); 02970 return (EROFS); 02971 } 02972 02973 /* 02974 * First validate permissions 02975 */ 02976 02977 if (mask & AT_SIZE) { 02978 /* 02979 * XXX - Note, we are not providing any open 02980 * mode flags here (like FNDELAY), so we may 02981 * block if there are locks present... this 02982 * should be addressed in openat(). 02983 */ 02984 /* XXX - would it be OK to generate a log record here? */ 02985 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 02986 if (err) { 02987 ZFS_EXIT(zfsvfs); 02988 return (err); 02989 } 02990 } 02991 02992 if (mask & (AT_ATIME|AT_MTIME) || 02993 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 02994 XVA_ISSET_REQ(xvap, XAT_READONLY) || 02995 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 02996 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 02997 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 02998 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 02999 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 03000 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 03001 skipaclchk, cr); 03002 } 03003 03004 if (mask & (AT_UID|AT_GID)) { 03005 int idmask = (mask & (AT_UID|AT_GID)); 03006 int take_owner; 03007 int take_group; 03008 03009 /* 03010 * NOTE: even if a new mode is being set, 03011 * we may clear S_ISUID/S_ISGID bits. 03012 */ 03013 03014 if (!(mask & AT_MODE)) 03015 vap->va_mode = zp->z_mode; 03016 03017 /* 03018 * Take ownership or chgrp to group we are a member of 03019 */ 03020 03021 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 03022 take_group = (mask & AT_GID) && 03023 zfs_groupmember(zfsvfs, vap->va_gid, cr); 03024 03025 /* 03026 * If both AT_UID and AT_GID are set then take_owner and 03027 * take_group must both be set in order to allow taking 03028 * ownership. 03029 * 03030 * Otherwise, send the check through secpolicy_vnode_setattr() 03031 * 03032 */ 03033 03034 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 03035 ((idmask == AT_UID) && take_owner) || 03036 ((idmask == AT_GID) && take_group)) { 03037 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 03038 skipaclchk, cr) == 0) { 03039 /* 03040 * Remove setuid/setgid for non-privileged users 03041 */ 03042 secpolicy_setid_clear(vap, vp, cr); 03043 trim_mask = (mask & (AT_UID|AT_GID)); 03044 } else { 03045 need_policy = TRUE; 03046 } 03047 } else { 03048 need_policy = TRUE; 03049 } 03050 } 03051 03052 mutex_enter(&zp->z_lock); 03053 oldva.va_mode = zp->z_mode; 03054 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 03055 if (mask & AT_XVATTR) { 03056 /* 03057 * Update xvattr mask to include only those attributes 03058 * that are actually changing. 03059 * 03060 * the bits will be restored prior to actually setting 03061 * the attributes so the caller thinks they were set. 03062 */ 03063 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 03064 if (xoap->xoa_appendonly != 03065 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 03066 need_policy = TRUE; 03067 } else { 03068 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 03069 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 03070 } 03071 } 03072 03073 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 03074 if (xoap->xoa_nounlink != 03075 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 03076 need_policy = TRUE; 03077 } else { 03078 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 03079 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 03080 } 03081 } 03082 03083 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 03084 if (xoap->xoa_immutable != 03085 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 03086 need_policy = TRUE; 03087 } else { 03088 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 03089 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 03090 } 03091 } 03092 03093 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 03094 if (xoap->xoa_nodump != 03095 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 03096 need_policy = TRUE; 03097 } else { 03098 XVA_CLR_REQ(xvap, XAT_NODUMP); 03099 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 03100 } 03101 } 03102 03103 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 03104 if (xoap->xoa_av_modified != 03105 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 03106 need_policy = TRUE; 03107 } else { 03108 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 03109 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 03110 } 03111 } 03112 03113 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 03114 if ((vp->v_type != VREG && 03115 xoap->xoa_av_quarantined) || 03116 xoap->xoa_av_quarantined != 03117 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 03118 need_policy = TRUE; 03119 } else { 03120 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 03121 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 03122 } 03123 } 03124 03125 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 03126 mutex_exit(&zp->z_lock); 03127 ZFS_EXIT(zfsvfs); 03128 return (EPERM); 03129 } 03130 03131 if (need_policy == FALSE && 03132 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 03133 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 03134 need_policy = TRUE; 03135 } 03136 } 03137 03138 mutex_exit(&zp->z_lock); 03139 03140 if (mask & AT_MODE) { 03141 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 03142 err = secpolicy_setid_setsticky_clear(vp, vap, 03143 &oldva, cr); 03144 if (err) { 03145 ZFS_EXIT(zfsvfs); 03146 return (err); 03147 } 03148 trim_mask |= AT_MODE; 03149 } else { 03150 need_policy = TRUE; 03151 } 03152 } 03153 03154 if (need_policy) { 03155 /* 03156 * If trim_mask is set then take ownership 03157 * has been granted or write_acl is present and user 03158 * has the ability to modify mode. In that case remove 03159 * UID|GID and or MODE from mask so that 03160 * secpolicy_vnode_setattr() doesn't revoke it. 03161 */ 03162 03163 if (trim_mask) { 03164 saved_mask = vap->va_mask; 03165 vap->va_mask &= ~trim_mask; 03166 if (trim_mask & AT_MODE) { 03167 /* 03168 * Save the mode, as secpolicy_vnode_setattr() 03169 * will overwrite it with ova.va_mode. 03170 */ 03171 saved_mode = vap->va_mode; 03172 } 03173 } 03174 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 03175 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 03176 if (err) { 03177 ZFS_EXIT(zfsvfs); 03178 return (err); 03179 } 03180 03181 if (trim_mask) { 03182 vap->va_mask |= saved_mask; 03183 if (trim_mask & AT_MODE) { 03184 /* 03185 * Recover the mode after 03186 * secpolicy_vnode_setattr(). 03187 */ 03188 vap->va_mode = saved_mode; 03189 } 03190 } 03191 } 03192 03193 /* 03194 * secpolicy_vnode_setattr, or take ownership may have 03195 * changed va_mask 03196 */ 03197 mask = vap->va_mask; 03198 03199 if ((mask & (AT_UID | AT_GID))) { 03200 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 03201 &xattr_obj, sizeof (xattr_obj)); 03202 03203 if (err == 0 && xattr_obj) { 03204 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 03205 if (err) 03206 goto out2; 03207 } 03208 if (mask & AT_UID) { 03209 new_uid = zfs_fuid_create(zfsvfs, 03210 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 03211 if (new_uid != zp->z_uid && 03212 zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 03213 if (attrzp) 03214 VN_RELE(ZTOV(attrzp)); 03215 err = EDQUOT; 03216 goto out2; 03217 } 03218 } 03219 03220 if (mask & AT_GID) { 03221 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 03222 cr, ZFS_GROUP, &fuidp); 03223 if (new_gid != zp->z_gid && 03224 zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 03225 if (attrzp) 03226 VN_RELE(ZTOV(attrzp)); 03227 err = EDQUOT; 03228 goto out2; 03229 } 03230 } 03231 } 03232 tx = dmu_tx_create(zfsvfs->z_os); 03233 03234 if (mask & AT_MODE) { 03235 uint64_t pmode = zp->z_mode; 03236 uint64_t acl_obj; 03237 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 03238 03239 if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 03240 goto out; 03241 03242 mutex_enter(&zp->z_lock); 03243 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 03244 /* 03245 * Are we upgrading ACL from old V0 format 03246 * to V1 format? 03247 */ 03248 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 03249 zfs_znode_acl_version(zp) == 03250 ZFS_ACL_VERSION_INITIAL) { 03251 dmu_tx_hold_free(tx, acl_obj, 0, 03252 DMU_OBJECT_END); 03253 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 03254 0, aclp->z_acl_bytes); 03255 } else { 03256 dmu_tx_hold_write(tx, acl_obj, 0, 03257 aclp->z_acl_bytes); 03258 } 03259 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 03260 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 03261 0, aclp->z_acl_bytes); 03262 } 03263 mutex_exit(&zp->z_lock); 03264 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 03265 } else { 03266 if ((mask & AT_XVATTR) && 03267 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 03268 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 03269 else 03270 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 03271 } 03272 03273 if (attrzp) { 03274 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 03275 } 03276 03277 fuid_dirtied = zfsvfs->z_fuid_dirty; 03278 if (fuid_dirtied) 03279 zfs_fuid_txhold(zfsvfs, tx); 03280 03281 zfs_sa_upgrade_txholds(tx, zp); 03282 03283 err = dmu_tx_assign(tx, TXG_NOWAIT); 03284 if (err) { 03285 if (err == ERESTART) 03286 dmu_tx_wait(tx); 03287 goto out; 03288 } 03289 03290 count = 0; 03291 /* 03292 * Set each attribute requested. 03293 * We group settings according to the locks they need to acquire. 03294 * 03295 * Note: you cannot set ctime directly, although it will be 03296 * updated as a side-effect of calling this function. 03297 */ 03298 03299 03300 if (mask & (AT_UID|AT_GID|AT_MODE)) 03301 mutex_enter(&zp->z_acl_lock); 03302 mutex_enter(&zp->z_lock); 03303 03304 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 03305 &zp->z_pflags, sizeof (zp->z_pflags)); 03306 03307 if (attrzp) { 03308 if (mask & (AT_UID|AT_GID|AT_MODE)) 03309 mutex_enter(&attrzp->z_acl_lock); 03310 mutex_enter(&attrzp->z_lock); 03311 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 03312 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 03313 sizeof (attrzp->z_pflags)); 03314 } 03315 03316 if (mask & (AT_UID|AT_GID)) { 03317 03318 if (mask & AT_UID) { 03319 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 03320 &new_uid, sizeof (new_uid)); 03321 zp->z_uid = new_uid; 03322 if (attrzp) { 03323 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 03324 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 03325 sizeof (new_uid)); 03326 attrzp->z_uid = new_uid; 03327 } 03328 } 03329 03330 if (mask & AT_GID) { 03331 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 03332 NULL, &new_gid, sizeof (new_gid)); 03333 zp->z_gid = new_gid; 03334 if (attrzp) { 03335 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 03336 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 03337 sizeof (new_gid)); 03338 attrzp->z_gid = new_gid; 03339 } 03340 } 03341 if (!(mask & AT_MODE)) { 03342 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 03343 NULL, &new_mode, sizeof (new_mode)); 03344 new_mode = zp->z_mode; 03345 } 03346 err = zfs_acl_chown_setattr(zp); 03347 ASSERT(err == 0); 03348 if (attrzp) { 03349 err = zfs_acl_chown_setattr(attrzp); 03350 ASSERT(err == 0); 03351 } 03352 } 03353 03354 if (mask & AT_MODE) { 03355 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 03356 &new_mode, sizeof (new_mode)); 03357 zp->z_mode = new_mode; 03358 ASSERT3U((uintptr_t)aclp, !=, 0); 03359 err = zfs_aclset_common(zp, aclp, cr, tx); 03360 ASSERT0(err); 03361 if (zp->z_acl_cached) 03362 zfs_acl_free(zp->z_acl_cached); 03363 zp->z_acl_cached = aclp; 03364 aclp = NULL; 03365 } 03366 03367 03368 if (mask & AT_ATIME) { 03369 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 03370 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 03371 &zp->z_atime, sizeof (zp->z_atime)); 03372 } 03373 03374 if (mask & AT_MTIME) { 03375 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 03376 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 03377 mtime, sizeof (mtime)); 03378 } 03379 03380 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 03381 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 03382 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 03383 NULL, mtime, sizeof (mtime)); 03384 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 03385 &ctime, sizeof (ctime)); 03386 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 03387 B_TRUE); 03388 } else if (mask != 0) { 03389 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 03390 &ctime, sizeof (ctime)); 03391 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 03392 B_TRUE); 03393 if (attrzp) { 03394 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 03395 SA_ZPL_CTIME(zfsvfs), NULL, 03396 &ctime, sizeof (ctime)); 03397 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 03398 mtime, ctime, B_TRUE); 03399 } 03400 } 03401 /* 03402 * Do this after setting timestamps to prevent timestamp 03403 * update from toggling bit 03404 */ 03405 03406 if (xoap && (mask & AT_XVATTR)) { 03407 03408 /* 03409 * restore trimmed off masks 03410 * so that return masks can be set for caller. 03411 */ 03412 03413 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 03414 XVA_SET_REQ(xvap, XAT_APPENDONLY); 03415 } 03416 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 03417 XVA_SET_REQ(xvap, XAT_NOUNLINK); 03418 } 03419 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 03420 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 03421 } 03422 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 03423 XVA_SET_REQ(xvap, XAT_NODUMP); 03424 } 03425 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 03426 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 03427 } 03428 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 03429 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 03430 } 03431 03432 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 03433 ASSERT(vp->v_type == VREG); 03434 03435 zfs_xvattr_set(zp, xvap, tx); 03436 } 03437 03438 if (fuid_dirtied) 03439 zfs_fuid_sync(zfsvfs, tx); 03440 03441 if (mask != 0) 03442 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 03443 03444 mutex_exit(&zp->z_lock); 03445 if (mask & (AT_UID|AT_GID|AT_MODE)) 03446 mutex_exit(&zp->z_acl_lock); 03447 03448 if (attrzp) { 03449 if (mask & (AT_UID|AT_GID|AT_MODE)) 03450 mutex_exit(&attrzp->z_acl_lock); 03451 mutex_exit(&attrzp->z_lock); 03452 } 03453 out: 03454 if (err == 0 && attrzp) { 03455 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 03456 xattr_count, tx); 03457 ASSERT(err2 == 0); 03458 } 03459 03460 if (attrzp) 03461 VN_RELE(ZTOV(attrzp)); 03462 if (aclp) 03463 zfs_acl_free(aclp); 03464 03465 if (fuidp) { 03466 zfs_fuid_info_free(fuidp); 03467 fuidp = NULL; 03468 } 03469 03470 if (err) { 03471 dmu_tx_abort(tx); 03472 if (err == ERESTART) 03473 goto top; 03474 } else { 03475 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 03476 dmu_tx_commit(tx); 03477 } 03478 03479 out2: 03480 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 03481 zil_commit(zilog, 0); 03482 03483 ZFS_EXIT(zfsvfs); 03484 return (err); 03485 } 03486 03487 typedef struct zfs_zlock { 03488 krwlock_t *zl_rwlock; 03489 znode_t *zl_znode; 03490 struct zfs_zlock *zl_next; 03491 } zfs_zlock_t; 03492 03496 static void 03497 zfs_rename_unlock(zfs_zlock_t **zlpp) 03498 { 03499 zfs_zlock_t *zl; 03500 03501 while ((zl = *zlpp) != NULL) { 03502 if (zl->zl_znode != NULL) 03503 VN_RELE(ZTOV(zl->zl_znode)); 03504 rw_exit(zl->zl_rwlock); 03505 *zlpp = zl->zl_next; 03506 kmem_free(zl, sizeof (*zl)); 03507 } 03508 } 03509 03516 static int 03517 zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 03518 { 03519 zfs_zlock_t *zl; 03520 znode_t *zp = tdzp; 03521 uint64_t rootid = zp->z_zfsvfs->z_root; 03522 uint64_t oidp = zp->z_id; 03523 krwlock_t *rwlp = &szp->z_parent_lock; 03524 krw_t rw = RW_WRITER; 03525 03526 /* 03527 * First pass write-locks szp and compares to zp->z_id. 03528 * Later passes read-lock zp and compare to zp->z_parent. 03529 */ 03530 do { 03531 if (!rw_tryenter(rwlp, rw)) { 03532 /* 03533 * Another thread is renaming in this path. 03534 * Note that if we are a WRITER, we don't have any 03535 * parent_locks held yet. 03536 */ 03537 if (rw == RW_READER && zp->z_id > szp->z_id) { 03538 /* 03539 * Drop our locks and restart 03540 */ 03541 zfs_rename_unlock(&zl); 03542 *zlpp = NULL; 03543 zp = tdzp; 03544 oidp = zp->z_id; 03545 rwlp = &szp->z_parent_lock; 03546 rw = RW_WRITER; 03547 continue; 03548 } else { 03549 /* 03550 * Wait for other thread to drop its locks 03551 */ 03552 rw_enter(rwlp, rw); 03553 } 03554 } 03555 03556 zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 03557 zl->zl_rwlock = rwlp; 03558 zl->zl_znode = NULL; 03559 zl->zl_next = *zlpp; 03560 *zlpp = zl; 03561 03562 if (oidp == szp->z_id) /* We're a descendant of szp */ 03563 return (EINVAL); 03564 03565 if (oidp == rootid) /* We've hit the top */ 03566 return (0); 03567 03568 if (rw == RW_READER) { /* i.e. not the first pass */ 03569 int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 03570 if (error) 03571 return (error); 03572 zl->zl_znode = zp; 03573 } 03574 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 03575 &oidp, sizeof (oidp)); 03576 rwlp = &zp->z_parent_lock; 03577 rw = RW_READER; 03578 03579 } while (zp->z_id != sdzp->z_id); 03580 03581 return (0); 03582 } 03583 03603 /*ARGSUSED*/ 03604 static int 03605 zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 03606 caller_context_t *ct, int flags) 03607 { 03608 znode_t *tdzp, *szp, *tzp; 03609 znode_t *sdzp = VTOZ(sdvp); 03610 zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 03611 zilog_t *zilog; 03612 vnode_t *realvp; 03613 zfs_dirlock_t *sdl, *tdl; 03614 dmu_tx_t *tx; 03615 zfs_zlock_t *zl; 03616 int cmp, serr, terr; 03617 int error = 0; 03618 int zflg = 0; 03619 03620 ZFS_ENTER(zfsvfs); 03621 ZFS_VERIFY_ZP(sdzp); 03622 zilog = zfsvfs->z_log; 03623 03624 /* 03625 * Make sure we have the real vp for the target directory. 03626 */ 03627 if (VOP_REALVP(tdvp, &realvp, ct) == 0) 03628 tdvp = realvp; 03629 03630 if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { 03631 ZFS_EXIT(zfsvfs); 03632 return (EXDEV); 03633 } 03634 03635 tdzp = VTOZ(tdvp); 03636 ZFS_VERIFY_ZP(tdzp); 03637 if (zfsvfs->z_utf8 && u8_validate(tnm, 03638 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 03639 ZFS_EXIT(zfsvfs); 03640 return (EILSEQ); 03641 } 03642 03643 if (flags & FIGNORECASE) 03644 zflg |= ZCILOOK; 03645 03646 top: 03647 szp = NULL; 03648 tzp = NULL; 03649 zl = NULL; 03650 03651 /* 03652 * This is to prevent the creation of links into attribute space 03653 * by renaming a linked file into/outof an attribute directory. 03654 * See the comment in zfs_link() for why this is considered bad. 03655 */ 03656 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 03657 ZFS_EXIT(zfsvfs); 03658 return (EINVAL); 03659 } 03660 03661 /* 03662 * Lock source and target directory entries. To prevent deadlock, 03663 * a lock ordering must be defined. We lock the directory with 03664 * the smallest object id first, or if it's a tie, the one with 03665 * the lexically first name. 03666 */ 03667 if (sdzp->z_id < tdzp->z_id) { 03668 cmp = -1; 03669 } else if (sdzp->z_id > tdzp->z_id) { 03670 cmp = 1; 03671 } else { 03672 /* 03673 * First compare the two name arguments without 03674 * considering any case folding. 03675 */ 03676 int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 03677 03678 cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 03679 ASSERT(error == 0 || !zfsvfs->z_utf8); 03680 if (cmp == 0) { 03681 /* 03682 * POSIX: "If the old argument and the new argument 03683 * both refer to links to the same existing file, 03684 * the rename() function shall return successfully 03685 * and perform no other action." 03686 */ 03687 ZFS_EXIT(zfsvfs); 03688 return (0); 03689 } 03690 /* 03691 * If the file system is case-folding, then we may 03692 * have some more checking to do. A case-folding file 03693 * system is either supporting mixed case sensitivity 03694 * access or is completely case-insensitive. Note 03695 * that the file system is always case preserving. 03696 * 03697 * In mixed sensitivity mode case sensitive behavior 03698 * is the default. FIGNORECASE must be used to 03699 * explicitly request case insensitive behavior. 03700 * 03701 * If the source and target names provided differ only 03702 * by case (e.g., a request to rename 'tim' to 'Tim'), 03703 * we will treat this as a special case in the 03704 * case-insensitive mode: as long as the source name 03705 * is an exact match, we will allow this to proceed as 03706 * a name-change request. 03707 */ 03708 if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 03709 (zfsvfs->z_case == ZFS_CASE_MIXED && 03710 flags & FIGNORECASE)) && 03711 u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 03712 &error) == 0) { 03713 /* 03714 * case preserving rename request, require exact 03715 * name matches 03716 */ 03717 zflg |= ZCIEXACT; 03718 zflg &= ~ZCILOOK; 03719 } 03720 } 03721 03722 /* 03723 * If the source and destination directories are the same, we should 03724 * grab the z_name_lock of that directory only once. 03725 */ 03726 if (sdzp == tdzp) { 03727 zflg |= ZHAVELOCK; 03728 rw_enter(&sdzp->z_name_lock, RW_READER); 03729 } 03730 03731 if (cmp < 0) { 03732 serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 03733 ZEXISTS | zflg, NULL, NULL); 03734 terr = zfs_dirent_lock(&tdl, 03735 tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 03736 } else { 03737 terr = zfs_dirent_lock(&tdl, 03738 tdzp, tnm, &tzp, zflg, NULL, NULL); 03739 serr = zfs_dirent_lock(&sdl, 03740 sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 03741 NULL, NULL); 03742 } 03743 03744 if (serr) { 03745 /* 03746 * Source entry invalid or not there. 03747 */ 03748 if (!terr) { 03749 zfs_dirent_unlock(tdl); 03750 if (tzp) 03751 VN_RELE(ZTOV(tzp)); 03752 } 03753 03754 if (sdzp == tdzp) 03755 rw_exit(&sdzp->z_name_lock); 03756 03757 /* 03758 * FreeBSD: In OpenSolaris they only check if rename source is 03759 * ".." here, because "." is handled in their lookup. This is 03760 * not the case for FreeBSD, so we check for "." explicitly. 03761 */ 03762 if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) 03763 serr = EINVAL; 03764 ZFS_EXIT(zfsvfs); 03765 return (serr); 03766 } 03767 if (terr) { 03768 zfs_dirent_unlock(sdl); 03769 VN_RELE(ZTOV(szp)); 03770 03771 if (sdzp == tdzp) 03772 rw_exit(&sdzp->z_name_lock); 03773 03774 if (strcmp(tnm, "..") == 0) 03775 terr = EINVAL; 03776 ZFS_EXIT(zfsvfs); 03777 return (terr); 03778 } 03779 03780 /* 03781 * Must have write access at the source to remove the old entry 03782 * and write access at the target to create the new entry. 03783 * Note that if target and source are the same, this can be 03784 * done in a single check. 03785 */ 03786 03787 if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 03788 goto out; 03789 03790 if (ZTOV(szp)->v_type == VDIR) { 03791 /* 03792 * Check to make sure rename is valid. 03793 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 03794 */ 03795 if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 03796 goto out; 03797 } 03798 03799 /* 03800 * Does target exist? 03801 */ 03802 if (tzp) { 03803 /* 03804 * Source and target must be the same type. 03805 */ 03806 if (ZTOV(szp)->v_type == VDIR) { 03807 if (ZTOV(tzp)->v_type != VDIR) { 03808 error = ENOTDIR; 03809 goto out; 03810 } 03811 } else { 03812 if (ZTOV(tzp)->v_type == VDIR) { 03813 error = EISDIR; 03814 goto out; 03815 } 03816 } 03817 /* 03818 * POSIX dictates that when the source and target 03819 * entries refer to the same file object, rename 03820 * must do nothing and exit without error. 03821 */ 03822 if (szp->z_id == tzp->z_id) { 03823 error = 0; 03824 goto out; 03825 } 03826 } 03827 03828 vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 03829 if (tzp) 03830 vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 03831 03832 /* 03833 * notify the target directory if it is not the same 03834 * as source directory. 03835 */ 03836 if (tdvp != sdvp) { 03837 vnevent_rename_dest_dir(tdvp, ct); 03838 } 03839 03840 tx = dmu_tx_create(zfsvfs->z_os); 03841 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 03842 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 03843 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 03844 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 03845 if (sdzp != tdzp) { 03846 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 03847 zfs_sa_upgrade_txholds(tx, tdzp); 03848 } 03849 if (tzp) { 03850 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 03851 zfs_sa_upgrade_txholds(tx, tzp); 03852 } 03853 03854 zfs_sa_upgrade_txholds(tx, szp); 03855 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 03856 error = dmu_tx_assign(tx, TXG_NOWAIT); 03857 if (error) { 03858 if (zl != NULL) 03859 zfs_rename_unlock(&zl); 03860 zfs_dirent_unlock(sdl); 03861 zfs_dirent_unlock(tdl); 03862 03863 if (sdzp == tdzp) 03864 rw_exit(&sdzp->z_name_lock); 03865 03866 VN_RELE(ZTOV(szp)); 03867 if (tzp) 03868 VN_RELE(ZTOV(tzp)); 03869 if (error == ERESTART) { 03870 dmu_tx_wait(tx); 03871 dmu_tx_abort(tx); 03872 goto top; 03873 } 03874 dmu_tx_abort(tx); 03875 ZFS_EXIT(zfsvfs); 03876 return (error); 03877 } 03878 03879 if (tzp) /* Attempt to remove the existing target */ 03880 error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 03881 03882 if (error == 0) { 03883 error = zfs_link_create(tdl, szp, tx, ZRENAMING); 03884 if (error == 0) { 03885 szp->z_pflags |= ZFS_AV_MODIFIED; 03886 03887 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 03888 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 03889 ASSERT0(error); 03890 03891 error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 03892 if (error == 0) { 03893 zfs_log_rename(zilog, tx, TX_RENAME | 03894 (flags & FIGNORECASE ? TX_CI : 0), sdzp, 03895 sdl->dl_name, tdzp, tdl->dl_name, szp); 03896 03897 /* 03898 * Update path information for the target vnode 03899 */ 03900 vn_renamepath(tdvp, ZTOV(szp), tnm, 03901 strlen(tnm)); 03902 } else { 03903 /* 03904 * At this point, we have successfully created 03905 * the target name, but have failed to remove 03906 * the source name. Since the create was done 03907 * with the ZRENAMING flag, there are 03908 * complications; for one, the link count is 03909 * wrong. The easiest way to deal with this 03910 * is to remove the newly created target, and 03911 * return the original error. This must 03912 * succeed; fortunately, it is very unlikely to 03913 * fail, since we just created it. 03914 */ 03915 VERIFY3U(zfs_link_destroy(tdl, szp, tx, 03916 ZRENAMING, NULL), ==, 0); 03917 } 03918 } 03919 #ifdef FREEBSD_NAMECACHE 03920 if (error == 0) { 03921 cache_purge(sdvp); 03922 cache_purge(tdvp); 03923 cache_purge(ZTOV(szp)); 03924 if (tzp) 03925 cache_purge(ZTOV(tzp)); 03926 } 03927 #endif 03928 } 03929 03930 dmu_tx_commit(tx); 03931 out: 03932 if (zl != NULL) 03933 zfs_rename_unlock(&zl); 03934 03935 zfs_dirent_unlock(sdl); 03936 zfs_dirent_unlock(tdl); 03937 03938 if (sdzp == tdzp) 03939 rw_exit(&sdzp->z_name_lock); 03940 03941 03942 VN_RELE(ZTOV(szp)); 03943 if (tzp) 03944 VN_RELE(ZTOV(tzp)); 03945 03946 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 03947 zil_commit(zilog, 0); 03948 03949 ZFS_EXIT(zfsvfs); 03950 03951 return (error); 03952 } 03953 03969 /*ARGSUSED*/ 03970 static int 03971 zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 03972 cred_t *cr, kthread_t *td) 03973 { 03974 znode_t *zp, *dzp = VTOZ(dvp); 03975 zfs_dirlock_t *dl; 03976 dmu_tx_t *tx; 03977 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 03978 zilog_t *zilog; 03979 uint64_t len = strlen(link); 03980 int error; 03981 int zflg = ZNEW; 03982 zfs_acl_ids_t acl_ids; 03983 boolean_t fuid_dirtied; 03984 uint64_t txtype = TX_SYMLINK; 03985 int flags = 0; /* Case flags */ 03986 03987 ASSERT(vap->va_type == VLNK); 03988 03989 ZFS_ENTER(zfsvfs); 03990 ZFS_VERIFY_ZP(dzp); 03991 zilog = zfsvfs->z_log; 03992 03993 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 03994 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 03995 ZFS_EXIT(zfsvfs); 03996 return (EILSEQ); 03997 } 03998 if (flags & FIGNORECASE) 03999 zflg |= ZCILOOK; 04000 04001 if (len > MAXPATHLEN) { 04002 ZFS_EXIT(zfsvfs); 04003 return (ENAMETOOLONG); 04004 } 04005 04006 if ((error = zfs_acl_ids_create(dzp, 0, 04007 vap, cr, NULL, &acl_ids)) != 0) { 04008 ZFS_EXIT(zfsvfs); 04009 return (error); 04010 } 04011 top: 04012 /* 04013 * Attempt to lock directory; fail if entry already exists. 04014 */ 04015 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 04016 if (error) { 04017 zfs_acl_ids_free(&acl_ids); 04018 ZFS_EXIT(zfsvfs); 04019 return (error); 04020 } 04021 04022 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 04023 zfs_acl_ids_free(&acl_ids); 04024 zfs_dirent_unlock(dl); 04025 ZFS_EXIT(zfsvfs); 04026 return (error); 04027 } 04028 04029 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 04030 zfs_acl_ids_free(&acl_ids); 04031 zfs_dirent_unlock(dl); 04032 ZFS_EXIT(zfsvfs); 04033 return (EDQUOT); 04034 } 04035 tx = dmu_tx_create(zfsvfs->z_os); 04036 fuid_dirtied = zfsvfs->z_fuid_dirty; 04037 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 04038 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 04039 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 04040 ZFS_SA_BASE_ATTR_SIZE + len); 04041 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 04042 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 04043 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 04044 acl_ids.z_aclp->z_acl_bytes); 04045 } 04046 if (fuid_dirtied) 04047 zfs_fuid_txhold(zfsvfs, tx); 04048 error = dmu_tx_assign(tx, TXG_NOWAIT); 04049 if (error) { 04050 zfs_dirent_unlock(dl); 04051 if (error == ERESTART) { 04052 dmu_tx_wait(tx); 04053 dmu_tx_abort(tx); 04054 goto top; 04055 } 04056 zfs_acl_ids_free(&acl_ids); 04057 dmu_tx_abort(tx); 04058 ZFS_EXIT(zfsvfs); 04059 return (error); 04060 } 04061 04062 /* 04063 * Create a new object for the symlink. 04064 * for version 4 ZPL datsets the symlink will be an SA attribute 04065 */ 04066 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 04067 04068 if (fuid_dirtied) 04069 zfs_fuid_sync(zfsvfs, tx); 04070 04071 mutex_enter(&zp->z_lock); 04072 if (zp->z_is_sa) 04073 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 04074 link, len, tx); 04075 else 04076 zfs_sa_symlink(zp, link, len, tx); 04077 mutex_exit(&zp->z_lock); 04078 04079 zp->z_size = len; 04080 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 04081 &zp->z_size, sizeof (zp->z_size), tx); 04082 /* 04083 * Insert the new object into the directory. 04084 */ 04085 (void) zfs_link_create(dl, zp, tx, ZNEW); 04086 04087 if (flags & FIGNORECASE) 04088 txtype |= TX_CI; 04089 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 04090 *vpp = ZTOV(zp); 04091 04092 zfs_acl_ids_free(&acl_ids); 04093 04094 dmu_tx_commit(tx); 04095 04096 zfs_dirent_unlock(dl); 04097 04098 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 04099 zil_commit(zilog, 0); 04100 04101 ZFS_EXIT(zfsvfs); 04102 return (error); 04103 } 04104 04120 /* ARGSUSED */ 04121 static int 04122 zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 04123 { 04124 znode_t *zp = VTOZ(vp); 04125 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 04126 int error; 04127 04128 ZFS_ENTER(zfsvfs); 04129 ZFS_VERIFY_ZP(zp); 04130 04131 mutex_enter(&zp->z_lock); 04132 if (zp->z_is_sa) 04133 error = sa_lookup_uio(zp->z_sa_hdl, 04134 SA_ZPL_SYMLINK(zfsvfs), uio); 04135 else 04136 error = zfs_sa_readlink(zp, uio); 04137 mutex_exit(&zp->z_lock); 04138 04139 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 04140 04141 ZFS_EXIT(zfsvfs); 04142 return (error); 04143 } 04144 04161 /* ARGSUSED */ 04162 static int 04163 zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 04164 caller_context_t *ct, int flags) 04165 { 04166 znode_t *dzp = VTOZ(tdvp); 04167 znode_t *tzp, *szp; 04168 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 04169 zilog_t *zilog; 04170 zfs_dirlock_t *dl; 04171 dmu_tx_t *tx; 04172 vnode_t *realvp; 04173 int error; 04174 int zf = ZNEW; 04175 uint64_t parent; 04176 uid_t owner; 04177 04178 ASSERT(tdvp->v_type == VDIR); 04179 04180 ZFS_ENTER(zfsvfs); 04181 ZFS_VERIFY_ZP(dzp); 04182 zilog = zfsvfs->z_log; 04183 04184 if (VOP_REALVP(svp, &realvp, ct) == 0) 04185 svp = realvp; 04186 04187 /* 04188 * POSIX dictates that we return EPERM here. 04189 * Better choices include ENOTSUP or EISDIR. 04190 */ 04191 if (svp->v_type == VDIR) { 04192 ZFS_EXIT(zfsvfs); 04193 return (EPERM); 04194 } 04195 04196 if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { 04197 ZFS_EXIT(zfsvfs); 04198 return (EXDEV); 04199 } 04200 04201 szp = VTOZ(svp); 04202 ZFS_VERIFY_ZP(szp); 04203 04204 /* Prevent links to .zfs/shares files */ 04205 04206 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 04207 &parent, sizeof (uint64_t))) != 0) { 04208 ZFS_EXIT(zfsvfs); 04209 return (error); 04210 } 04211 if (parent == zfsvfs->z_shares_dir) { 04212 ZFS_EXIT(zfsvfs); 04213 return (EPERM); 04214 } 04215 04216 if (zfsvfs->z_utf8 && u8_validate(name, 04217 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 04218 ZFS_EXIT(zfsvfs); 04219 return (EILSEQ); 04220 } 04221 if (flags & FIGNORECASE) 04222 zf |= ZCILOOK; 04223 04224 /* 04225 * We do not support links between attributes and non-attributes 04226 * because of the potential security risk of creating links 04227 * into "normal" file space in order to circumvent restrictions 04228 * imposed in attribute space. 04229 */ 04230 if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 04231 ZFS_EXIT(zfsvfs); 04232 return (EINVAL); 04233 } 04234 04235 04236 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 04237 if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 04238 ZFS_EXIT(zfsvfs); 04239 return (EPERM); 04240 } 04241 04242 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 04243 ZFS_EXIT(zfsvfs); 04244 return (error); 04245 } 04246 04247 top: 04248 /* 04249 * Attempt to lock directory; fail if entry already exists. 04250 */ 04251 error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 04252 if (error) { 04253 ZFS_EXIT(zfsvfs); 04254 return (error); 04255 } 04256 04257 tx = dmu_tx_create(zfsvfs->z_os); 04258 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 04259 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 04260 zfs_sa_upgrade_txholds(tx, szp); 04261 zfs_sa_upgrade_txholds(tx, dzp); 04262 error = dmu_tx_assign(tx, TXG_NOWAIT); 04263 if (error) { 04264 zfs_dirent_unlock(dl); 04265 if (error == ERESTART) { 04266 dmu_tx_wait(tx); 04267 dmu_tx_abort(tx); 04268 goto top; 04269 } 04270 dmu_tx_abort(tx); 04271 ZFS_EXIT(zfsvfs); 04272 return (error); 04273 } 04274 04275 error = zfs_link_create(dl, szp, tx, 0); 04276 04277 if (error == 0) { 04278 uint64_t txtype = TX_LINK; 04279 if (flags & FIGNORECASE) 04280 txtype |= TX_CI; 04281 zfs_log_link(zilog, tx, txtype, dzp, szp, name); 04282 } 04283 04284 dmu_tx_commit(tx); 04285 04286 zfs_dirent_unlock(dl); 04287 04288 if (error == 0) { 04289 vnevent_link(svp, ct); 04290 } 04291 04292 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 04293 zil_commit(zilog, 0); 04294 04295 ZFS_EXIT(zfsvfs); 04296 return (error); 04297 } 04298 04299 #ifdef sun 04300 04304 /* ARGSUSED */ 04305 static int 04306 zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 04307 size_t *lenp, int flags, cred_t *cr) 04308 { 04309 pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 04310 return (0); 04311 } 04312 04329 /* ARGSUSED */ 04330 static int 04331 zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 04332 size_t *lenp, int flags, cred_t *cr) 04333 { 04334 znode_t *zp = VTOZ(vp); 04335 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 04336 dmu_tx_t *tx; 04337 u_offset_t off, koff; 04338 size_t len, klen; 04339 int err; 04340 04341 off = pp->p_offset; 04342 len = PAGESIZE; 04343 /* 04344 * If our blocksize is bigger than the page size, try to kluster 04345 * multiple pages so that we write a full block (thus avoiding 04346 * a read-modify-write). 04347 */ 04348 if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 04349 klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 04350 koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 04351 ASSERT(koff <= zp->z_size); 04352 if (koff + klen > zp->z_size) 04353 klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 04354 pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 04355 } 04356 ASSERT3U(btop(len), ==, btopr(len)); 04357 04358 /* 04359 * Can't push pages past end-of-file. 04360 */ 04361 if (off >= zp->z_size) { 04362 /* ignore all pages */ 04363 err = 0; 04364 goto out; 04365 } else if (off + len > zp->z_size) { 04366 int npages = btopr(zp->z_size - off); 04367 page_t *trunc; 04368 04369 page_list_break(&pp, &trunc, npages); 04370 /* ignore pages past end of file */ 04371 if (trunc) 04372 pvn_write_done(trunc, flags); 04373 len = zp->z_size - off; 04374 } 04375 04376 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 04377 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 04378 err = EDQUOT; 04379 goto out; 04380 } 04381 top: 04382 tx = dmu_tx_create(zfsvfs->z_os); 04383 dmu_tx_hold_write(tx, zp->z_id, off, len); 04384 04385 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 04386 zfs_sa_upgrade_txholds(tx, zp); 04387 err = dmu_tx_assign(tx, TXG_NOWAIT); 04388 if (err != 0) { 04389 if (err == ERESTART) { 04390 dmu_tx_wait(tx); 04391 dmu_tx_abort(tx); 04392 goto top; 04393 } 04394 dmu_tx_abort(tx); 04395 goto out; 04396 } 04397 04398 if (zp->z_blksz <= PAGESIZE) { 04399 caddr_t va = zfs_map_page(pp, S_READ); 04400 ASSERT3U(len, <=, PAGESIZE); 04401 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 04402 zfs_unmap_page(pp, va); 04403 } else { 04404 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 04405 } 04406 04407 if (err == 0) { 04408 uint64_t mtime[2], ctime[2]; 04409 sa_bulk_attr_t bulk[3]; 04410 int count = 0; 04411 04412 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 04413 &mtime, 16); 04414 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 04415 &ctime, 16); 04416 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 04417 &zp->z_pflags, 8); 04418 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 04419 B_TRUE); 04420 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 04421 } 04422 dmu_tx_commit(tx); 04423 04424 out: 04425 pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 04426 if (offp) 04427 *offp = off; 04428 if (lenp) 04429 *lenp = len; 04430 04431 return (err); 04432 } 04433 04451 /*ARGSUSED*/ 04452 static int 04453 zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 04454 caller_context_t *ct) 04455 { 04456 znode_t *zp = VTOZ(vp); 04457 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 04458 page_t *pp; 04459 size_t io_len; 04460 u_offset_t io_off; 04461 uint_t blksz; 04462 rl_t *rl; 04463 int error = 0; 04464 04465 ZFS_ENTER(zfsvfs); 04466 ZFS_VERIFY_ZP(zp); 04467 04468 /* 04469 * Align this request to the file block size in case we kluster. 04470 * XXX - this can result in pretty aggresive locking, which can 04471 * impact simultanious read/write access. One option might be 04472 * to break up long requests (len == 0) into block-by-block 04473 * operations to get narrower locking. 04474 */ 04475 blksz = zp->z_blksz; 04476 if (ISP2(blksz)) 04477 io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 04478 else 04479 io_off = 0; 04480 if (len > 0 && ISP2(blksz)) 04481 io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 04482 else 04483 io_len = 0; 04484 04485 if (io_len == 0) { 04486 /* 04487 * Search the entire vp list for pages >= io_off. 04488 */ 04489 rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 04490 error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 04491 goto out; 04492 } 04493 rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 04494 04495 if (off > zp->z_size) { 04496 /* past end of file */ 04497 zfs_range_unlock(rl); 04498 ZFS_EXIT(zfsvfs); 04499 return (0); 04500 } 04501 04502 len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 04503 04504 for (off = io_off; io_off < off + len; io_off += io_len) { 04505 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 04506 pp = page_lookup(vp, io_off, 04507 (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 04508 } else { 04509 pp = page_lookup_nowait(vp, io_off, 04510 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 04511 } 04512 04513 if (pp != NULL && pvn_getdirty(pp, flags)) { 04514 int err; 04515 04516 /* 04517 * Found a dirty page to push 04518 */ 04519 err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 04520 if (err) 04521 error = err; 04522 } else { 04523 io_len = PAGESIZE; 04524 } 04525 } 04526 out: 04527 zfs_range_unlock(rl); 04528 if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 04529 zil_commit(zfsvfs->z_log, zp->z_id); 04530 ZFS_EXIT(zfsvfs); 04531 return (error); 04532 } 04533 #endif /* sun */ 04534 04535 /*ARGSUSED*/ 04536 void 04537 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 04538 { 04539 znode_t *zp = VTOZ(vp); 04540 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 04541 int error; 04542 04543 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 04544 if (zp->z_sa_hdl == NULL) { 04545 /* 04546 * The fs has been unmounted, or we did a 04547 * suspend/resume and this file no longer exists. 04548 */ 04549 VI_LOCK(vp); 04550 ASSERT(vp->v_count <= 1); 04551 vp->v_count = 0; 04552 VI_UNLOCK(vp); 04553 vrecycle(vp); 04554 rw_exit(&zfsvfs->z_teardown_inactive_lock); 04555 return; 04556 } 04557 04558 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 04559 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 04560 04561 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 04562 zfs_sa_upgrade_txholds(tx, zp); 04563 error = dmu_tx_assign(tx, TXG_WAIT); 04564 if (error) { 04565 dmu_tx_abort(tx); 04566 } else { 04567 mutex_enter(&zp->z_lock); 04568 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 04569 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 04570 zp->z_atime_dirty = 0; 04571 mutex_exit(&zp->z_lock); 04572 dmu_tx_commit(tx); 04573 } 04574 } 04575 04576 zfs_zinactive(zp); 04577 rw_exit(&zfsvfs->z_teardown_inactive_lock); 04578 } 04579 04580 #ifdef sun 04581 04592 /* ARGSUSED */ 04593 static int 04594 zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 04595 caller_context_t *ct) 04596 { 04597 if (vp->v_type == VDIR) 04598 return (0); 04599 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 04600 } 04601 04606 static int 04607 zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 04608 flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 04609 { 04610 znode_t *zp = VTOZ(vp); 04611 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 04612 04613 ZFS_ENTER(zfsvfs); 04614 ZFS_VERIFY_ZP(zp); 04615 04616 /* 04617 * We are following the UFS semantics with respect to mapcnt 04618 * here: If we see that the file is mapped already, then we will 04619 * return an error, but we don't worry about races between this 04620 * function and zfs_map(). 04621 */ 04622 if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 04623 ZFS_EXIT(zfsvfs); 04624 return (EAGAIN); 04625 } 04626 ZFS_EXIT(zfsvfs); 04627 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 04628 } 04629 04637 static int 04638 zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 04639 caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 04640 { 04641 znode_t *zp = VTOZ(vp); 04642 page_t *pp, *cur_pp; 04643 objset_t *os = zp->z_zfsvfs->z_os; 04644 u_offset_t io_off, total; 04645 size_t io_len; 04646 int err; 04647 04648 if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 04649 /* 04650 * We only have a single page, don't bother klustering 04651 */ 04652 io_off = off; 04653 io_len = PAGESIZE; 04654 pp = page_create_va(vp, io_off, io_len, 04655 PG_EXCL | PG_WAIT, seg, addr); 04656 } else { 04657 /* 04658 * Try to find enough pages to fill the page list 04659 */ 04660 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 04661 &io_len, off, plsz, 0); 04662 } 04663 if (pp == NULL) { 04664 /* 04665 * The page already exists, nothing to do here. 04666 */ 04667 *pl = NULL; 04668 return (0); 04669 } 04670 04671 /* 04672 * Fill the pages in the kluster. 04673 */ 04674 cur_pp = pp; 04675 for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 04676 caddr_t va; 04677 04678 ASSERT3U(io_off, ==, cur_pp->p_offset); 04679 va = zfs_map_page(cur_pp, S_WRITE); 04680 err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 04681 DMU_READ_PREFETCH); 04682 zfs_unmap_page(cur_pp, va); 04683 if (err) { 04684 /* On error, toss the entire kluster */ 04685 pvn_read_done(pp, B_ERROR); 04686 /* convert checksum errors into IO errors */ 04687 if (err == ECKSUM) 04688 err = EIO; 04689 return (err); 04690 } 04691 cur_pp = cur_pp->p_next; 04692 } 04693 04694 /* 04695 * Fill in the page list array from the kluster starting 04696 * from the desired offset `off'. 04697 * NOTE: the page list will always be null terminated. 04698 */ 04699 pvn_plist_init(pp, pl, plsz, off, io_len, rw); 04700 ASSERT(pl == NULL || (*pl)->p_offset == off); 04701 04702 return (0); 04703 } 04704 04731 /* ARGSUSED */ 04732 static int 04733 zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 04734 page_t *pl, size_t plsz, struct seg *seg, caddr_t addr, 04735 enum seg_rw rw, cred_t *cr, caller_context_t *ct) 04736 { 04737 znode_t *zp = VTOZ(vp); 04738 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 04739 page_t **pl0 = pl; 04740 int err = 0; 04741 04742 /* we do our own caching, faultahead is unnecessary */ 04743 if (pl == NULL) 04744 return (0); 04745 else if (len > plsz) 04746 len = plsz; 04747 else 04748 len = P2ROUNDUP(len, PAGESIZE); 04749 ASSERT(plsz >= len); 04750 04751 ZFS_ENTER(zfsvfs); 04752 ZFS_VERIFY_ZP(zp); 04753 04754 if (protp) 04755 *protp = PROT_ALL; 04756 04757 /* 04758 * Loop through the requested range [off, off + len) looking 04759 * for pages. If we don't find a page, we will need to create 04760 * a new page and fill it with data from the file. 04761 */ 04762 while (len > 0) { 04763 if (*pl = page_lookup(vp, off, SE_SHARED)) 04764 *(pl+1) = NULL; 04765 else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 04766 goto out; 04767 while (*pl) { 04768 ASSERT3U((*pl)->p_offset, ==, off); 04769 off += PAGESIZE; 04770 addr += PAGESIZE; 04771 if (len > 0) { 04772 ASSERT3U(len, >=, PAGESIZE); 04773 len -= PAGESIZE; 04774 } 04775 ASSERT3U(plsz, >=, PAGESIZE); 04776 plsz -= PAGESIZE; 04777 pl++; 04778 } 04779 } 04780 04781 /* 04782 * Fill out the page array with any pages already in the cache. 04783 */ 04784 while (plsz > 0 && 04785 (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 04786 off += PAGESIZE; 04787 plsz -= PAGESIZE; 04788 } 04789 out: 04790 if (err) { 04791 /* 04792 * Release any pages we have previously locked. 04793 */ 04794 while (pl > pl0) 04795 page_unlock(*--pl); 04796 } else { 04797 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 04798 } 04799 04800 *pl = NULL; 04801 04802 ZFS_EXIT(zfsvfs); 04803 return (err); 04804 } 04805 04815 /*ARGSUSED*/ 04816 static int 04817 zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 04818 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 04819 caller_context_t *ct) 04820 { 04821 znode_t *zp = VTOZ(vp); 04822 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 04823 segvn_crargs_t vn_a; 04824 int error; 04825 04826 ZFS_ENTER(zfsvfs); 04827 ZFS_VERIFY_ZP(zp); 04828 04829 if ((prot & PROT_WRITE) && (zp->z_pflags & 04830 (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 04831 ZFS_EXIT(zfsvfs); 04832 return (EPERM); 04833 } 04834 04835 if ((prot & (PROT_READ | PROT_EXEC)) && 04836 (zp->z_pflags & ZFS_AV_QUARANTINED)) { 04837 ZFS_EXIT(zfsvfs); 04838 return (EACCES); 04839 } 04840 04841 if (vp->v_flag & VNOMAP) { 04842 ZFS_EXIT(zfsvfs); 04843 return (ENOSYS); 04844 } 04845 04846 if (off < 0 || len > MAXOFFSET_T - off) { 04847 ZFS_EXIT(zfsvfs); 04848 return (ENXIO); 04849 } 04850 04851 if (vp->v_type != VREG) { 04852 ZFS_EXIT(zfsvfs); 04853 return (ENODEV); 04854 } 04855 04856 /* 04857 * If file is locked, disallow mapping. 04858 */ 04859 if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 04860 ZFS_EXIT(zfsvfs); 04861 return (EAGAIN); 04862 } 04863 04864 as_rangelock(as); 04865 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 04866 if (error != 0) { 04867 as_rangeunlock(as); 04868 ZFS_EXIT(zfsvfs); 04869 return (error); 04870 } 04871 04872 vn_a.vp = vp; 04873 vn_a.offset = (u_offset_t)off; 04874 vn_a.type = flags & MAP_TYPE; 04875 vn_a.prot = prot; 04876 vn_a.maxprot = maxprot; 04877 vn_a.cred = cr; 04878 vn_a.amp = NULL; 04879 vn_a.flags = flags & ~MAP_TYPE; 04880 vn_a.szc = 0; 04881 vn_a.lgrp_mem_policy_flags = 0; 04882 04883 error = as_map(as, *addrp, len, segvn_create, &vn_a); 04884 04885 as_rangeunlock(as); 04886 ZFS_EXIT(zfsvfs); 04887 return (error); 04888 } 04889 04890 /* ARGSUSED */ 04891 static int 04892 zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 04893 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 04894 caller_context_t *ct) 04895 { 04896 uint64_t pages = btopr(len); 04897 04898 atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 04899 return (0); 04900 } 04901 04922 /* ARGSUSED */ 04923 static int 04924 zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 04925 size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 04926 caller_context_t *ct) 04927 { 04928 uint64_t pages = btopr(len); 04929 04930 ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 04931 atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 04932 04933 if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 04934 vn_has_cached_data(vp)) 04935 (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 04936 04937 return (0); 04938 } 04939 04960 /* ARGSUSED */ 04961 static int 04962 zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 04963 offset_t offset, cred_t *cr, caller_context_t *ct) 04964 { 04965 znode_t *zp = VTOZ(vp); 04966 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 04967 uint64_t off, len; 04968 int error; 04969 04970 ZFS_ENTER(zfsvfs); 04971 ZFS_VERIFY_ZP(zp); 04972 04973 if (cmd != F_FREESP) { 04974 ZFS_EXIT(zfsvfs); 04975 return (EINVAL); 04976 } 04977 04978 if (error = convoff(vp, bfp, 0, offset)) { 04979 ZFS_EXIT(zfsvfs); 04980 return (error); 04981 } 04982 04983 if (bfp->l_len < 0) { 04984 ZFS_EXIT(zfsvfs); 04985 return (EINVAL); 04986 } 04987 04988 off = bfp->l_start; 04989 len = bfp->l_len; /* 0 means from off to end of file */ 04990 04991 error = zfs_freesp(zp, off, len, flag, TRUE); 04992 04993 ZFS_EXIT(zfsvfs); 04994 return (error); 04995 } 04996 #endif /* sun */ 04997 04998 CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 04999 CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 05000 05001 /*ARGSUSED*/ 05002 static int 05003 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 05004 { 05005 znode_t *zp = VTOZ(vp); 05006 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 05007 uint32_t gen; 05008 uint64_t gen64; 05009 uint64_t object = zp->z_id; 05010 zfid_short_t *zfid; 05011 int size, i, error; 05012 05013 ZFS_ENTER(zfsvfs); 05014 ZFS_VERIFY_ZP(zp); 05015 05016 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 05017 &gen64, sizeof (uint64_t))) != 0) { 05018 ZFS_EXIT(zfsvfs); 05019 return (error); 05020 } 05021 05022 gen = (uint32_t)gen64; 05023 05024 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 05025 fidp->fid_len = size; 05026 05027 zfid = (zfid_short_t *)fidp; 05028 05029 zfid->zf_len = size; 05030 05031 for (i = 0; i < sizeof (zfid->zf_object); i++) 05032 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 05033 05034 /* Must have a non-zero generation number to distinguish from .zfs */ 05035 if (gen == 0) 05036 gen = 1; 05037 for (i = 0; i < sizeof (zfid->zf_gen); i++) 05038 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 05039 05040 if (size == LONG_FID_LEN) { 05041 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 05042 zfid_long_t *zlfid; 05043 05044 zlfid = (zfid_long_t *)fidp; 05045 05046 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 05047 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 05048 05049 /* XXX - this should be the generation number for the objset */ 05050 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 05051 zlfid->zf_setgen[i] = 0; 05052 } 05053 05054 ZFS_EXIT(zfsvfs); 05055 return (0); 05056 } 05057 05058 static int 05059 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 05060 caller_context_t *ct) 05061 { 05062 znode_t *zp, *xzp; 05063 zfsvfs_t *zfsvfs; 05064 zfs_dirlock_t *dl; 05065 int error; 05066 05067 switch (cmd) { 05068 case _PC_LINK_MAX: 05069 *valp = INT_MAX; 05070 return (0); 05071 05072 case _PC_FILESIZEBITS: 05073 *valp = 64; 05074 return (0); 05075 #ifdef sun 05076 case _PC_XATTR_EXISTS: 05077 zp = VTOZ(vp); 05078 zfsvfs = zp->z_zfsvfs; 05079 ZFS_ENTER(zfsvfs); 05080 ZFS_VERIFY_ZP(zp); 05081 *valp = 0; 05082 error = zfs_dirent_lock(&dl, zp, "", &xzp, 05083 ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 05084 if (error == 0) { 05085 zfs_dirent_unlock(dl); 05086 if (!zfs_dirempty(xzp)) 05087 *valp = 1; 05088 VN_RELE(ZTOV(xzp)); 05089 } else if (error == ENOENT) { 05090 /* 05091 * If there aren't extended attributes, it's the 05092 * same as having zero of them. 05093 */ 05094 error = 0; 05095 } 05096 ZFS_EXIT(zfsvfs); 05097 return (error); 05098 05099 case _PC_SATTR_ENABLED: 05100 case _PC_SATTR_EXISTS: 05101 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 05102 (vp->v_type == VREG || vp->v_type == VDIR); 05103 return (0); 05104 05105 case _PC_ACCESS_FILTERING: 05106 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 05107 vp->v_type == VDIR; 05108 return (0); 05109 05110 case _PC_ACL_ENABLED: 05111 *valp = _ACL_ACE_ENABLED; 05112 return (0); 05113 #endif /* sun */ 05114 case _PC_MIN_HOLE_SIZE: 05115 *valp = (int)SPA_MINBLOCKSIZE; 05116 return (0); 05117 #ifdef sun 05118 case _PC_TIMESTAMP_RESOLUTION: 05119 /* nanosecond timestamp resolution */ 05120 *valp = 1L; 05121 return (0); 05122 #endif /* sun */ 05123 case _PC_ACL_EXTENDED: 05124 *valp = 0; 05125 return (0); 05126 05127 case _PC_ACL_NFS4: 05128 *valp = 1; 05129 return (0); 05130 05131 case _PC_ACL_PATH_MAX: 05132 *valp = ACL_MAX_ENTRIES; 05133 return (0); 05134 05135 default: 05136 return (EOPNOTSUPP); 05137 } 05138 } 05139 05140 /*ARGSUSED*/ 05141 static int 05142 zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 05143 caller_context_t *ct) 05144 { 05145 znode_t *zp = VTOZ(vp); 05146 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 05147 int error; 05148 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 05149 05150 ZFS_ENTER(zfsvfs); 05151 ZFS_VERIFY_ZP(zp); 05152 error = zfs_getacl(zp, vsecp, skipaclchk, cr); 05153 ZFS_EXIT(zfsvfs); 05154 05155 return (error); 05156 } 05157 05158 /*ARGSUSED*/ 05159 int 05160 zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 05161 caller_context_t *ct) 05162 { 05163 znode_t *zp = VTOZ(vp); 05164 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 05165 int error; 05166 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 05167 zilog_t *zilog = zfsvfs->z_log; 05168 05169 ZFS_ENTER(zfsvfs); 05170 ZFS_VERIFY_ZP(zp); 05171 05172 error = zfs_setacl(zp, vsecp, skipaclchk, cr); 05173 05174 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 05175 zil_commit(zilog, 0); 05176 05177 ZFS_EXIT(zfsvfs); 05178 return (error); 05179 } 05180 05181 #ifdef sun 05182 05188 int zcr_blksz_min = (1 << 10); /* 1K */ 05196 int zcr_blksz_max = (1 << 17); /* 128K */ 05197 05198 /*ARGSUSED*/ 05199 static int 05200 zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 05201 caller_context_t *ct) 05202 { 05203 znode_t *zp = VTOZ(vp); 05204 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 05205 int max_blksz = zfsvfs->z_max_blksz; 05206 uio_t *uio = &xuio->xu_uio; 05207 ssize_t size = uio->uio_resid; 05208 offset_t offset = uio->uio_loffset; 05209 int blksz; 05210 int fullblk, i; 05211 arc_buf_t *abuf; 05212 ssize_t maxsize; 05213 int preamble, postamble; 05214 05215 if (xuio->xu_type != UIOTYPE_ZEROCOPY) 05216 return (EINVAL); 05217 05218 ZFS_ENTER(zfsvfs); 05219 ZFS_VERIFY_ZP(zp); 05220 switch (ioflag) { 05221 case UIO_WRITE: 05222 /* 05223 * Loan out an arc_buf for write if write size is bigger than 05224 * max_blksz, and the file's block size is also max_blksz. 05225 */ 05226 blksz = max_blksz; 05227 if (size < blksz || zp->z_blksz != blksz) { 05228 ZFS_EXIT(zfsvfs); 05229 return (EINVAL); 05230 } 05231 /* 05232 * Caller requests buffers for write before knowing where the 05233 * write offset might be (e.g. NFS TCP write). 05234 */ 05235 if (offset == -1) { 05236 preamble = 0; 05237 } else { 05238 preamble = P2PHASE(offset, blksz); 05239 if (preamble) { 05240 preamble = blksz - preamble; 05241 size -= preamble; 05242 } 05243 } 05244 05245 postamble = P2PHASE(size, blksz); 05246 size -= postamble; 05247 05248 fullblk = size / blksz; 05249 (void) dmu_xuio_init(xuio, 05250 (preamble != 0) + fullblk + (postamble != 0)); 05251 DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 05252 int, postamble, int, 05253 (preamble != 0) + fullblk + (postamble != 0)); 05254 05255 /* 05256 * Have to fix iov base/len for partial buffers. They 05257 * currently represent full arc_buf's. 05258 */ 05259 if (preamble) { 05260 /* data begins in the middle of the arc_buf */ 05261 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 05262 blksz); 05263 ASSERT(abuf); 05264 (void) dmu_xuio_add(xuio, abuf, 05265 blksz - preamble, preamble); 05266 } 05267 05268 for (i = 0; i < fullblk; i++) { 05269 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 05270 blksz); 05271 ASSERT(abuf); 05272 (void) dmu_xuio_add(xuio, abuf, 0, blksz); 05273 } 05274 05275 if (postamble) { 05276 /* data ends in the middle of the arc_buf */ 05277 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 05278 blksz); 05279 ASSERT(abuf); 05280 (void) dmu_xuio_add(xuio, abuf, 0, postamble); 05281 } 05282 break; 05283 case UIO_READ: 05284 /* 05285 * Loan out an arc_buf for read if the read size is larger than 05286 * the current file block size. Block alignment is not 05287 * considered. Partial arc_buf will be loaned out for read. 05288 */ 05289 blksz = zp->z_blksz; 05290 if (blksz < zcr_blksz_min) 05291 blksz = zcr_blksz_min; 05292 if (blksz > zcr_blksz_max) 05293 blksz = zcr_blksz_max; 05294 /* avoid potential complexity of dealing with it */ 05295 if (blksz > max_blksz) { 05296 ZFS_EXIT(zfsvfs); 05297 return (EINVAL); 05298 } 05299 05300 maxsize = zp->z_size - uio->uio_loffset; 05301 if (size > maxsize) 05302 size = maxsize; 05303 05304 if (size < blksz || vn_has_cached_data(vp)) { 05305 ZFS_EXIT(zfsvfs); 05306 return (EINVAL); 05307 } 05308 break; 05309 default: 05310 ZFS_EXIT(zfsvfs); 05311 return (EINVAL); 05312 } 05313 05314 uio->uio_extflg = UIO_XUIO; 05315 XUIO_XUZC_RW(xuio) = ioflag; 05316 ZFS_EXIT(zfsvfs); 05317 return (0); 05318 } 05319 05320 /*ARGSUSED*/ 05321 static int 05322 zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 05323 { 05324 int i; 05325 arc_buf_t *abuf; 05326 int ioflag = XUIO_XUZC_RW(xuio); 05327 05328 ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 05329 05330 i = dmu_xuio_cnt(xuio); 05331 while (i-- > 0) { 05332 abuf = dmu_xuio_arcbuf(xuio, i); 05333 /* 05334 * if abuf == NULL, it must be a write buffer 05335 * that has been returned in zfs_write(). 05336 */ 05337 if (abuf) 05338 dmu_return_arcbuf(abuf); 05339 ASSERT(abuf || ioflag == UIO_WRITE); 05340 } 05341 05342 dmu_xuio_fini(xuio); 05343 return (0); 05344 } 05345 05346 /* 05347 * Predeclare these here so that the compiler assumes that 05348 * this is an "old style" function declaration that does 05349 * not include arguments => we won't get type mismatch errors 05350 * in the initializations that follow. 05351 */ 05352 static int zfs_inval(); 05353 static int zfs_isdir(); 05354 05355 static int 05356 zfs_inval() 05357 { 05358 return (EINVAL); 05359 } 05360 05361 static int 05362 zfs_isdir() 05363 { 05364 return (EISDIR); 05365 } 05369 vnodeops_t *zfs_dvnodeops; 05370 const fs_operation_def_t zfs_dvnodeops_template[] = { 05371 VOPNAME_OPEN, { .vop_open = zfs_open }, 05372 VOPNAME_CLOSE, { .vop_close = zfs_close }, 05373 VOPNAME_READ, { .error = zfs_isdir }, 05374 VOPNAME_WRITE, { .error = zfs_isdir }, 05375 VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 05376 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 05377 VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 05378 VOPNAME_ACCESS, { .vop_access = zfs_access }, 05379 VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 05380 VOPNAME_CREATE, { .vop_create = zfs_create }, 05381 VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 05382 VOPNAME_LINK, { .vop_link = zfs_link }, 05383 VOPNAME_RENAME, { .vop_rename = zfs_rename }, 05384 VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 05385 VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 05386 VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 05387 VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 05388 VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 05389 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 05390 VOPNAME_FID, { .vop_fid = zfs_fid }, 05391 VOPNAME_SEEK, { .vop_seek = zfs_seek }, 05392 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 05393 VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 05394 VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 05395 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 05396 NULL, NULL 05397 }; 05398 05402 vnodeops_t *zfs_fvnodeops; 05403 const fs_operation_def_t zfs_fvnodeops_template[] = { 05404 VOPNAME_OPEN, { .vop_open = zfs_open }, 05405 VOPNAME_CLOSE, { .vop_close = zfs_close }, 05406 VOPNAME_READ, { .vop_read = zfs_read }, 05407 VOPNAME_WRITE, { .vop_write = zfs_write }, 05408 VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 05409 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 05410 VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 05411 VOPNAME_ACCESS, { .vop_access = zfs_access }, 05412 VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 05413 VOPNAME_RENAME, { .vop_rename = zfs_rename }, 05414 VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 05415 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 05416 VOPNAME_FID, { .vop_fid = zfs_fid }, 05417 VOPNAME_SEEK, { .vop_seek = zfs_seek }, 05418 VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 05419 VOPNAME_SPACE, { .vop_space = zfs_space }, 05420 VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 05421 VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 05422 VOPNAME_MAP, { .vop_map = zfs_map }, 05423 VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 05424 VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 05425 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 05426 VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 05427 VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 05428 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 05429 VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 05430 VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 05431 NULL, NULL 05432 }; 05433 05437 vnodeops_t *zfs_symvnodeops; 05438 const fs_operation_def_t zfs_symvnodeops_template[] = { 05439 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 05440 VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 05441 VOPNAME_ACCESS, { .vop_access = zfs_access }, 05442 VOPNAME_RENAME, { .vop_rename = zfs_rename }, 05443 VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 05444 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 05445 VOPNAME_FID, { .vop_fid = zfs_fid }, 05446 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 05447 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 05448 NULL, NULL 05449 }; 05450 05454 vnodeops_t *zfs_sharevnodeops; 05455 const fs_operation_def_t zfs_sharevnodeops_template[] = { 05456 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 05457 VOPNAME_ACCESS, { .vop_access = zfs_access }, 05458 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 05459 VOPNAME_FID, { .vop_fid = zfs_fid }, 05460 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 05461 VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 05462 VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 05463 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 05464 NULL, NULL 05465 }; 05466 05479 vnodeops_t *zfs_xdvnodeops; 05480 const fs_operation_def_t zfs_xdvnodeops_template[] = { 05481 VOPNAME_OPEN, { .vop_open = zfs_open }, 05482 VOPNAME_CLOSE, { .vop_close = zfs_close }, 05483 VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 05484 VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 05485 VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 05486 VOPNAME_ACCESS, { .vop_access = zfs_access }, 05487 VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 05488 VOPNAME_CREATE, { .vop_create = zfs_create }, 05489 VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 05490 VOPNAME_LINK, { .vop_link = zfs_link }, 05491 VOPNAME_RENAME, { .vop_rename = zfs_rename }, 05492 VOPNAME_MKDIR, { .error = zfs_inval }, 05493 VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 05494 VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 05495 VOPNAME_SYMLINK, { .error = zfs_inval }, 05496 VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 05497 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 05498 VOPNAME_FID, { .vop_fid = zfs_fid }, 05499 VOPNAME_SEEK, { .vop_seek = zfs_seek }, 05500 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 05501 VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 05502 VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 05503 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 05504 NULL, NULL 05505 }; 05506 05510 vnodeops_t *zfs_evnodeops; 05511 const fs_operation_def_t zfs_evnodeops_template[] = { 05512 VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 05513 VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 05514 NULL, NULL 05515 }; 05516 #endif /* sun */ 05517 05518 static int 05519 ioflags(int ioflags) 05520 { 05521 int flags = 0; 05522 05523 if (ioflags & IO_APPEND) 05524 flags |= FAPPEND; 05525 if (ioflags & IO_NDELAY) 05526 flags |= FNONBLOCK; 05527 if (ioflags & IO_SYNC) 05528 flags |= (FSYNC | FDSYNC | FRSYNC); 05529 05530 return (flags); 05531 } 05532 05533 static int 05534 zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) 05535 { 05536 znode_t *zp = VTOZ(vp); 05537 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 05538 objset_t *os = zp->z_zfsvfs->z_os; 05539 vm_page_t mreq; 05540 vm_object_t object; 05541 caddr_t va; 05542 struct sf_buf *sf; 05543 int i, error; 05544 int pcount, size; 05545 05546 ZFS_ENTER(zfsvfs); 05547 ZFS_VERIFY_ZP(zp); 05548 05549 pcount = round_page(count) / PAGE_SIZE; 05550 mreq = m[reqpage]; 05551 object = mreq->object; 05552 error = 0; 05553 05554 KASSERT(vp->v_object == object, ("mismatching object")); 05555 05556 VM_OBJECT_LOCK(object); 05557 05558 for (i = 0; i < pcount; i++) { 05559 if (i != reqpage) { 05560 vm_page_lock(m[i]); 05561 vm_page_free(m[i]); 05562 vm_page_unlock(m[i]); 05563 } 05564 } 05565 05566 if (mreq->valid) { 05567 if (mreq->valid != VM_PAGE_BITS_ALL) 05568 vm_page_zero_invalid(mreq, TRUE); 05569 VM_OBJECT_UNLOCK(object); 05570 ZFS_EXIT(zfsvfs); 05571 return (VM_PAGER_OK); 05572 } 05573 05574 PCPU_INC(cnt.v_vnodein); 05575 PCPU_INC(cnt.v_vnodepgsin); 05576 05577 if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { 05578 VM_OBJECT_UNLOCK(object); 05579 ZFS_EXIT(zfsvfs); 05580 return (VM_PAGER_BAD); 05581 } 05582 05583 size = PAGE_SIZE; 05584 if (IDX_TO_OFF(mreq->pindex) + size > object->un_pager.vnp.vnp_size) 05585 size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mreq->pindex); 05586 05587 VM_OBJECT_UNLOCK(object); 05588 va = zfs_map_page(mreq, &sf); 05589 error = dmu_read(os, zp->z_id, IDX_TO_OFF(mreq->pindex), 05590 size, va, DMU_READ_PREFETCH); 05591 if (size != PAGE_SIZE) 05592 bzero(va + size, PAGE_SIZE - size); 05593 zfs_unmap_page(sf); 05594 VM_OBJECT_LOCK(object); 05595 05596 if (!error) 05597 mreq->valid = VM_PAGE_BITS_ALL; 05598 KASSERT(mreq->dirty == 0, ("zfs_getpages: page %p is dirty", mreq)); 05599 05600 VM_OBJECT_UNLOCK(object); 05601 05602 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 05603 ZFS_EXIT(zfsvfs); 05604 return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 05605 } 05606 05607 static int 05608 zfs_freebsd_getpages(ap) 05609 struct vop_getpages_args /* { 05610 struct vnode *a_vp; 05611 vm_page_t *a_m; 05612 int a_count; 05613 int a_reqpage; 05614 vm_ooffset_t a_offset; 05615 } */ *ap; 05616 { 05617 05618 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); 05619 } 05620 05621 static int 05622 zfs_freebsd_open(ap) 05623 struct vop_open_args /* { 05624 struct vnode *a_vp; 05625 int a_mode; 05626 struct ucred *a_cred; 05627 struct thread *a_td; 05628 } */ *ap; 05629 { 05630 vnode_t *vp = ap->a_vp; 05631 znode_t *zp = VTOZ(vp); 05632 int error; 05633 05634 error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); 05635 if (error == 0) 05636 vnode_create_vobject(vp, zp->z_size, ap->a_td); 05637 return (error); 05638 } 05639 05640 static int 05641 zfs_freebsd_close(ap) 05642 struct vop_close_args /* { 05643 struct vnode *a_vp; 05644 int a_fflag; 05645 struct ucred *a_cred; 05646 struct thread *a_td; 05647 } */ *ap; 05648 { 05649 05650 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL)); 05651 } 05652 05653 static int 05654 zfs_freebsd_ioctl(ap) 05655 struct vop_ioctl_args /* { 05656 struct vnode *a_vp; 05657 u_long a_command; 05658 caddr_t a_data; 05659 int a_fflag; 05660 struct ucred *cred; 05661 struct thread *td; 05662 } */ *ap; 05663 { 05664 05665 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 05666 ap->a_fflag, ap->a_cred, NULL, NULL)); 05667 } 05668 05669 static int 05670 zfs_freebsd_read(ap) 05671 struct vop_read_args /* { 05672 struct vnode *a_vp; 05673 struct uio *a_uio; 05674 int a_ioflag; 05675 struct ucred *a_cred; 05676 } */ *ap; 05677 { 05678 05679 return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 05680 ap->a_cred, NULL)); 05681 } 05682 05683 static int 05684 zfs_freebsd_write(ap) 05685 struct vop_write_args /* { 05686 struct vnode *a_vp; 05687 struct uio *a_uio; 05688 int a_ioflag; 05689 struct ucred *a_cred; 05690 } */ *ap; 05691 { 05692 05693 return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), 05694 ap->a_cred, NULL)); 05695 } 05696 05697 static int 05698 zfs_freebsd_access(ap) 05699 struct vop_access_args /* { 05700 struct vnode *a_vp; 05701 accmode_t a_accmode; 05702 struct ucred *a_cred; 05703 struct thread *a_td; 05704 } */ *ap; 05705 { 05706 vnode_t *vp = ap->a_vp; 05707 znode_t *zp = VTOZ(vp); 05708 accmode_t accmode; 05709 int error = 0; 05710 05711 /* 05712 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 05713 */ 05714 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 05715 if (accmode != 0) 05716 error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL); 05717 05718 /* 05719 * VADMIN has to be handled by vaccess(). 05720 */ 05721 if (error == 0) { 05722 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 05723 if (accmode != 0) { 05724 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 05725 zp->z_gid, accmode, ap->a_cred, NULL); 05726 } 05727 } 05728 05729 /* 05730 * For VEXEC, ensure that at least one execute bit is set for 05731 * non-directories. 05732 */ 05733 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 05734 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 05735 error = EACCES; 05736 } 05737 05738 return (error); 05739 } 05740 05741 static int 05742 zfs_freebsd_lookup(ap) 05743 struct vop_lookup_args /* { 05744 struct vnode *a_dvp; 05745 struct vnode **a_vpp; 05746 struct componentname *a_cnp; 05747 } */ *ap; 05748 { 05749 struct componentname *cnp = ap->a_cnp; 05750 char nm[NAME_MAX + 1]; 05751 05752 ASSERT(cnp->cn_namelen < sizeof(nm)); 05753 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); 05754 05755 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 05756 cnp->cn_cred, cnp->cn_thread, 0)); 05757 } 05758 05759 static int 05760 zfs_freebsd_create(ap) 05761 struct vop_create_args /* { 05762 struct vnode *a_dvp; 05763 struct vnode **a_vpp; 05764 struct componentname *a_cnp; 05765 struct vattr *a_vap; 05766 } */ *ap; 05767 { 05768 struct componentname *cnp = ap->a_cnp; 05769 vattr_t *vap = ap->a_vap; 05770 int mode; 05771 05772 ASSERT(cnp->cn_flags & SAVENAME); 05773 05774 vattr_init_mask(vap); 05775 mode = vap->va_mode & ALLPERMS; 05776 05777 return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode, 05778 ap->a_vpp, cnp->cn_cred, cnp->cn_thread)); 05779 } 05780 05781 static int 05782 zfs_freebsd_remove(ap) 05783 struct vop_remove_args /* { 05784 struct vnode *a_dvp; 05785 struct vnode *a_vp; 05786 struct componentname *a_cnp; 05787 } */ *ap; 05788 { 05789 05790 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 05791 05792 return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr, 05793 ap->a_cnp->cn_cred, NULL, 0)); 05794 } 05795 05796 static int 05797 zfs_freebsd_mkdir(ap) 05798 struct vop_mkdir_args /* { 05799 struct vnode *a_dvp; 05800 struct vnode **a_vpp; 05801 struct componentname *a_cnp; 05802 struct vattr *a_vap; 05803 } */ *ap; 05804 { 05805 vattr_t *vap = ap->a_vap; 05806 05807 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 05808 05809 vattr_init_mask(vap); 05810 05811 return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp, 05812 ap->a_cnp->cn_cred, NULL, 0, NULL)); 05813 } 05814 05815 static int 05816 zfs_freebsd_rmdir(ap) 05817 struct vop_rmdir_args /* { 05818 struct vnode *a_dvp; 05819 struct vnode *a_vp; 05820 struct componentname *a_cnp; 05821 } */ *ap; 05822 { 05823 struct componentname *cnp = ap->a_cnp; 05824 05825 ASSERT(cnp->cn_flags & SAVENAME); 05826 05827 return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); 05828 } 05829 05830 static int 05831 zfs_freebsd_readdir(ap) 05832 struct vop_readdir_args /* { 05833 struct vnode *a_vp; 05834 struct uio *a_uio; 05835 struct ucred *a_cred; 05836 int *a_eofflag; 05837 int *a_ncookies; 05838 u_long **a_cookies; 05839 } */ *ap; 05840 { 05841 05842 return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 05843 ap->a_ncookies, ap->a_cookies)); 05844 } 05845 05846 static int 05847 zfs_freebsd_fsync(ap) 05848 struct vop_fsync_args /* { 05849 struct vnode *a_vp; 05850 int a_waitfor; 05851 struct thread *a_td; 05852 } */ *ap; 05853 { 05854 05855 vop_stdfsync(ap); 05856 return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); 05857 } 05858 05859 static int 05860 zfs_freebsd_getattr(ap) 05861 struct vop_getattr_args /* { 05862 struct vnode *a_vp; 05863 struct vattr *a_vap; 05864 struct ucred *a_cred; 05865 } */ *ap; 05866 { 05867 vattr_t *vap = ap->a_vap; 05868 xvattr_t xvap; 05869 u_long fflags = 0; 05870 int error; 05871 05872 xva_init(&xvap); 05873 xvap.xva_vattr = *vap; 05874 xvap.xva_vattr.va_mask |= AT_XVATTR; 05875 05876 /* Convert chflags into ZFS-type flags. */ 05877 /* XXX: what about SF_SETTABLE?. */ 05878 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 05879 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 05880 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 05881 XVA_SET_REQ(&xvap, XAT_NODUMP); 05882 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 05883 if (error != 0) 05884 return (error); 05885 05886 /* Convert ZFS xattr into chflags. */ 05887 #define FLAG_CHECK(fflag, xflag, xfield) do { \ 05888 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 05889 fflags |= (fflag); \ 05890 } while (0) 05891 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 05892 xvap.xva_xoptattrs.xoa_immutable); 05893 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 05894 xvap.xva_xoptattrs.xoa_appendonly); 05895 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 05896 xvap.xva_xoptattrs.xoa_nounlink); 05897 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 05898 xvap.xva_xoptattrs.xoa_nodump); 05899 #undef FLAG_CHECK 05900 *vap = xvap.xva_vattr; 05901 vap->va_flags = fflags; 05902 return (0); 05903 } 05904 05905 static int 05906 zfs_freebsd_setattr(ap) 05907 struct vop_setattr_args /* { 05908 struct vnode *a_vp; 05909 struct vattr *a_vap; 05910 struct ucred *a_cred; 05911 } */ *ap; 05912 { 05913 vnode_t *vp = ap->a_vp; 05914 vattr_t *vap = ap->a_vap; 05915 cred_t *cred = ap->a_cred; 05916 xvattr_t xvap; 05917 u_long fflags; 05918 uint64_t zflags; 05919 05920 vattr_init_mask(vap); 05921 vap->va_mask &= ~AT_NOSET; 05922 05923 xva_init(&xvap); 05924 xvap.xva_vattr = *vap; 05925 05926 zflags = VTOZ(vp)->z_pflags; 05927 05928 if (vap->va_flags != VNOVAL) { 05929 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 05930 int error; 05931 05932 if (zfsvfs->z_use_fuids == B_FALSE) 05933 return (EOPNOTSUPP); 05934 05935 fflags = vap->va_flags; 05936 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 05937 return (EOPNOTSUPP); 05938 /* 05939 * Unprivileged processes are not permitted to unset system 05940 * flags, or modify flags if any system flags are set. 05941 * Privileged non-jail processes may not modify system flags 05942 * if securelevel > 0 and any existing system flags are set. 05943 * Privileged jail processes behave like privileged non-jail 05944 * processes if the security.jail.chflags_allowed sysctl is 05945 * is non-zero; otherwise, they behave like unprivileged 05946 * processes. 05947 */ 05948 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 05949 priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) { 05950 if (zflags & 05951 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 05952 error = securelevel_gt(cred, 0); 05953 if (error != 0) 05954 return (error); 05955 } 05956 } else { 05957 /* 05958 * Callers may only modify the file flags on objects they 05959 * have VADMIN rights for. 05960 */ 05961 if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0) 05962 return (error); 05963 if (zflags & 05964 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 05965 return (EPERM); 05966 } 05967 if (fflags & 05968 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 05969 return (EPERM); 05970 } 05971 } 05972 05973 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 05974 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 05975 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 05976 XVA_SET_REQ(&xvap, (xflag)); \ 05977 (xfield) = ((fflags & (fflag)) != 0); \ 05978 } \ 05979 } while (0) 05980 /* Convert chflags into ZFS-type flags. */ 05981 /* XXX: what about SF_SETTABLE?. */ 05982 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 05983 xvap.xva_xoptattrs.xoa_immutable); 05984 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 05985 xvap.xva_xoptattrs.xoa_appendonly); 05986 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 05987 xvap.xva_xoptattrs.xoa_nounlink); 05988 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 05989 xvap.xva_xoptattrs.xoa_nodump); 05990 #undef FLAG_CHANGE 05991 } 05992 return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL)); 05993 } 05994 05995 static int 05996 zfs_freebsd_rename(ap) 05997 struct vop_rename_args /* { 05998 struct vnode *a_fdvp; 05999 struct vnode *a_fvp; 06000 struct componentname *a_fcnp; 06001 struct vnode *a_tdvp; 06002 struct vnode *a_tvp; 06003 struct componentname *a_tcnp; 06004 } */ *ap; 06005 { 06006 vnode_t *fdvp = ap->a_fdvp; 06007 vnode_t *fvp = ap->a_fvp; 06008 vnode_t *tdvp = ap->a_tdvp; 06009 vnode_t *tvp = ap->a_tvp; 06010 int error; 06011 06012 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 06013 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 06014 06015 error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp, 06016 ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); 06017 06018 if (tdvp == tvp) 06019 VN_RELE(tdvp); 06020 else 06021 VN_URELE(tdvp); 06022 if (tvp) 06023 VN_URELE(tvp); 06024 VN_RELE(fdvp); 06025 VN_RELE(fvp); 06026 06027 return (error); 06028 } 06029 06030 static int 06031 zfs_freebsd_symlink(ap) 06032 struct vop_symlink_args /* { 06033 struct vnode *a_dvp; 06034 struct vnode **a_vpp; 06035 struct componentname *a_cnp; 06036 struct vattr *a_vap; 06037 char *a_target; 06038 } */ *ap; 06039 { 06040 struct componentname *cnp = ap->a_cnp; 06041 vattr_t *vap = ap->a_vap; 06042 06043 ASSERT(cnp->cn_flags & SAVENAME); 06044 06045 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 06046 vattr_init_mask(vap); 06047 06048 return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap, 06049 ap->a_target, cnp->cn_cred, cnp->cn_thread)); 06050 } 06051 06052 static int 06053 zfs_freebsd_readlink(ap) 06054 struct vop_readlink_args /* { 06055 struct vnode *a_vp; 06056 struct uio *a_uio; 06057 struct ucred *a_cred; 06058 } */ *ap; 06059 { 06060 06061 return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 06062 } 06063 06064 static int 06065 zfs_freebsd_link(ap) 06066 struct vop_link_args /* { 06067 struct vnode *a_tdvp; 06068 struct vnode *a_vp; 06069 struct componentname *a_cnp; 06070 } */ *ap; 06071 { 06072 struct componentname *cnp = ap->a_cnp; 06073 06074 ASSERT(cnp->cn_flags & SAVENAME); 06075 06076 return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); 06077 } 06078 06079 static int 06080 zfs_freebsd_inactive(ap) 06081 struct vop_inactive_args /* { 06082 struct vnode *a_vp; 06083 struct thread *a_td; 06084 } */ *ap; 06085 { 06086 vnode_t *vp = ap->a_vp; 06087 06088 zfs_inactive(vp, ap->a_td->td_ucred, NULL); 06089 return (0); 06090 } 06091 06092 static void 06093 zfs_reclaim_complete(void *arg, int pending) 06094 { 06095 znode_t *zp = arg; 06096 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 06097 06098 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 06099 if (zp->z_sa_hdl != NULL) { 06100 ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); 06101 zfs_znode_dmu_fini(zp); 06102 ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 06103 } 06104 zfs_znode_free(zp); 06105 rw_exit(&zfsvfs->z_teardown_inactive_lock); 06106 /* 06107 * If the file system is being unmounted, there is a process waiting 06108 * for us, wake it up. 06109 */ 06110 if (zfsvfs->z_unmounted) 06111 wakeup_one(zfsvfs); 06112 } 06113 06114 static int 06115 zfs_freebsd_reclaim(ap) 06116 struct vop_reclaim_args /* { 06117 struct vnode *a_vp; 06118 struct thread *a_td; 06119 } */ *ap; 06120 { 06121 vnode_t *vp = ap->a_vp; 06122 znode_t *zp = VTOZ(vp); 06123 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 06124 boolean_t rlocked; 06125 06126 rlocked = rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 06127 06128 ASSERT(zp != NULL); 06129 06130 /* 06131 * Destroy the vm object and flush associated pages. 06132 */ 06133 vnode_destroy_vobject(vp); 06134 06135 mutex_enter(&zp->z_lock); 06136 zp->z_vnode = NULL; 06137 mutex_exit(&zp->z_lock); 06138 06139 if (zp->z_unlinked) { 06140 ; /* Do nothing. */ 06141 } else if (!rlocked) { 06142 TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); 06143 taskqueue_enqueue(taskqueue_thread, &zp->z_task); 06144 } else if (zp->z_sa_hdl == NULL) { 06145 zfs_znode_free(zp); 06146 } else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ { 06147 int locked; 06148 06149 locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 : 06150 ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id); 06151 if (locked == 0) { 06152 /* 06153 * Lock can't be obtained due to deadlock possibility, 06154 * so defer znode destruction. 06155 */ 06156 TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); 06157 taskqueue_enqueue(taskqueue_thread, &zp->z_task); 06158 } else { 06159 zfs_znode_dmu_fini(zp); 06160 if (locked == 1) 06161 ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 06162 zfs_znode_free(zp); 06163 } 06164 } 06165 VI_LOCK(vp); 06166 vp->v_data = NULL; 06167 ASSERT(vp->v_holdcnt >= 1); 06168 VI_UNLOCK(vp); 06169 if (rlocked) 06170 rw_exit(&zfsvfs->z_teardown_inactive_lock); 06171 return (0); 06172 } 06173 06174 static int 06175 zfs_freebsd_fid(ap) 06176 struct vop_fid_args /* { 06177 struct vnode *a_vp; 06178 struct fid *a_fid; 06179 } */ *ap; 06180 { 06181 06182 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 06183 } 06184 06185 static int 06186 zfs_freebsd_pathconf(ap) 06187 struct vop_pathconf_args /* { 06188 struct vnode *a_vp; 06189 int a_name; 06190 register_t *a_retval; 06191 } */ *ap; 06192 { 06193 ulong_t val; 06194 int error; 06195 06196 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); 06197 if (error == 0) 06198 *ap->a_retval = val; 06199 else if (error == EOPNOTSUPP) 06200 error = vop_stdpathconf(ap); 06201 return (error); 06202 } 06203 06204 static int 06205 zfs_freebsd_fifo_pathconf(ap) 06206 struct vop_pathconf_args /* { 06207 struct vnode *a_vp; 06208 int a_name; 06209 register_t *a_retval; 06210 } */ *ap; 06211 { 06212 06213 switch (ap->a_name) { 06214 case _PC_ACL_EXTENDED: 06215 case _PC_ACL_NFS4: 06216 case _PC_ACL_PATH_MAX: 06217 case _PC_MAC_PRESENT: 06218 return (zfs_freebsd_pathconf(ap)); 06219 default: 06220 return (fifo_specops.vop_pathconf(ap)); 06221 } 06222 } 06223 06224 /* 06225 * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 06226 * extended attribute name: 06227 * 06228 * NAMESPACE PREFIX 06229 * system freebsd:system: 06230 * user (none, can be used to access ZFS fsattr(5) attributes 06231 * created on Solaris) 06232 */ 06233 static int 06234 zfs_create_attrname(int attrnamespace, const char *name, char *attrname, 06235 size_t size) 06236 { 06237 const char *namespace, *prefix, *suffix; 06238 06239 /* We don't allow '/' character in attribute name. */ 06240 if (strchr(name, '/') != NULL) 06241 return (EINVAL); 06242 /* We don't allow attribute names that start with "freebsd:" string. */ 06243 if (strncmp(name, "freebsd:", 8) == 0) 06244 return (EINVAL); 06245 06246 bzero(attrname, size); 06247 06248 switch (attrnamespace) { 06249 case EXTATTR_NAMESPACE_USER: 06250 #if 0 06251 prefix = "freebsd:"; 06252 namespace = EXTATTR_NAMESPACE_USER_STRING; 06253 suffix = ":"; 06254 #else 06255 /* 06256 * This is the default namespace by which we can access all 06257 * attributes created on Solaris. 06258 */ 06259 prefix = namespace = suffix = ""; 06260 #endif 06261 break; 06262 case EXTATTR_NAMESPACE_SYSTEM: 06263 prefix = "freebsd:"; 06264 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 06265 suffix = ":"; 06266 break; 06267 case EXTATTR_NAMESPACE_EMPTY: 06268 default: 06269 return (EINVAL); 06270 } 06271 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 06272 name) >= size) { 06273 return (ENAMETOOLONG); 06274 } 06275 return (0); 06276 } 06277 06281 static int 06282 zfs_getextattr(struct vop_getextattr_args *ap) 06283 /* 06284 vop_getextattr { 06285 IN struct vnode *a_vp; 06286 IN int a_attrnamespace; 06287 IN const char *a_name; 06288 INOUT struct uio *a_uio; 06289 OUT size_t *a_size; 06290 IN struct ucred *a_cred; 06291 IN struct thread *a_td; 06292 }; 06293 */ 06294 { 06295 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 06296 struct thread *td = ap->a_td; 06297 struct nameidata nd; 06298 char attrname[255]; 06299 struct vattr va; 06300 vnode_t *xvp = NULL, *vp; 06301 int error, flags; 06302 06303 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 06304 ap->a_cred, ap->a_td, VREAD); 06305 if (error != 0) 06306 return (error); 06307 06308 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 06309 sizeof(attrname)); 06310 if (error != 0) 06311 return (error); 06312 06313 ZFS_ENTER(zfsvfs); 06314 06315 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 06316 LOOKUP_XATTR); 06317 if (error != 0) { 06318 ZFS_EXIT(zfsvfs); 06319 return (error); 06320 } 06321 06322 flags = FREAD; 06323 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 06324 xvp, td); 06325 error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL); 06326 vp = nd.ni_vp; 06327 NDFREE(&nd, NDF_ONLY_PNBUF); 06328 if (error != 0) { 06329 ZFS_EXIT(zfsvfs); 06330 if (error == ENOENT) 06331 error = ENOATTR; 06332 return (error); 06333 } 06334 06335 if (ap->a_size != NULL) { 06336 error = VOP_GETATTR(vp, &va, ap->a_cred); 06337 if (error == 0) 06338 *ap->a_size = (size_t)va.va_size; 06339 } else if (ap->a_uio != NULL) 06340 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 06341 06342 VOP_UNLOCK(vp, 0); 06343 vn_close(vp, flags, ap->a_cred, td); 06344 ZFS_EXIT(zfsvfs); 06345 06346 return (error); 06347 } 06348 06349 /* 06350 * Vnode operation to remove a named attribute. 06351 */ 06352 int 06353 zfs_deleteextattr(struct vop_deleteextattr_args *ap) 06354 /* 06355 vop_deleteextattr { 06356 IN struct vnode *a_vp; 06357 IN int a_attrnamespace; 06358 IN const char *a_name; 06359 IN struct ucred *a_cred; 06360 IN struct thread *a_td; 06361 }; 06362 */ 06363 { 06364 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 06365 struct thread *td = ap->a_td; 06366 struct nameidata nd; 06367 char attrname[255]; 06368 struct vattr va; 06369 vnode_t *xvp = NULL, *vp; 06370 int error, flags; 06371 06372 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 06373 ap->a_cred, ap->a_td, VWRITE); 06374 if (error != 0) 06375 return (error); 06376 06377 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 06378 sizeof(attrname)); 06379 if (error != 0) 06380 return (error); 06381 06382 ZFS_ENTER(zfsvfs); 06383 06384 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 06385 LOOKUP_XATTR); 06386 if (error != 0) { 06387 ZFS_EXIT(zfsvfs); 06388 return (error); 06389 } 06390 06391 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 06392 UIO_SYSSPACE, attrname, xvp, td); 06393 error = namei(&nd); 06394 vp = nd.ni_vp; 06395 NDFREE(&nd, NDF_ONLY_PNBUF); 06396 if (error != 0) { 06397 ZFS_EXIT(zfsvfs); 06398 if (error == ENOENT) 06399 error = ENOATTR; 06400 return (error); 06401 } 06402 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 06403 06404 vput(nd.ni_dvp); 06405 if (vp == nd.ni_dvp) 06406 vrele(vp); 06407 else 06408 vput(vp); 06409 ZFS_EXIT(zfsvfs); 06410 06411 return (error); 06412 } 06413 06417 static int 06418 zfs_setextattr(struct vop_setextattr_args *ap) 06419 /* 06420 vop_setextattr { 06421 IN struct vnode *a_vp; 06422 IN int a_attrnamespace; 06423 IN const char *a_name; 06424 INOUT struct uio *a_uio; 06425 IN struct ucred *a_cred; 06426 IN struct thread *a_td; 06427 }; 06428 */ 06429 { 06430 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 06431 struct thread *td = ap->a_td; 06432 struct nameidata nd; 06433 char attrname[255]; 06434 struct vattr va; 06435 vnode_t *xvp = NULL, *vp; 06436 int error, flags; 06437 06438 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 06439 ap->a_cred, ap->a_td, VWRITE); 06440 if (error != 0) 06441 return (error); 06442 06443 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 06444 sizeof(attrname)); 06445 if (error != 0) 06446 return (error); 06447 06448 ZFS_ENTER(zfsvfs); 06449 06450 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 06451 LOOKUP_XATTR | CREATE_XATTR_DIR); 06452 if (error != 0) { 06453 ZFS_EXIT(zfsvfs); 06454 return (error); 06455 } 06456 06457 flags = FFLAGS(O_WRONLY | O_CREAT); 06458 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 06459 xvp, td); 06460 error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL); 06461 vp = nd.ni_vp; 06462 NDFREE(&nd, NDF_ONLY_PNBUF); 06463 if (error != 0) { 06464 ZFS_EXIT(zfsvfs); 06465 return (error); 06466 } 06467 06468 VATTR_NULL(&va); 06469 va.va_size = 0; 06470 error = VOP_SETATTR(vp, &va, ap->a_cred); 06471 if (error == 0) 06472 VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred); 06473 06474 VOP_UNLOCK(vp, 0); 06475 vn_close(vp, flags, ap->a_cred, td); 06476 ZFS_EXIT(zfsvfs); 06477 06478 return (error); 06479 } 06480 06484 static int 06485 zfs_listextattr(struct vop_listextattr_args *ap) 06486 /* 06487 vop_listextattr { 06488 IN struct vnode *a_vp; 06489 IN int a_attrnamespace; 06490 INOUT struct uio *a_uio; 06491 OUT size_t *a_size; 06492 IN struct ucred *a_cred; 06493 IN struct thread *a_td; 06494 }; 06495 */ 06496 { 06497 zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs; 06498 struct thread *td = ap->a_td; 06499 struct nameidata nd; 06500 char attrprefix[16]; 06501 u_char dirbuf[sizeof(struct dirent)]; 06502 struct dirent *dp; 06503 struct iovec aiov; 06504 struct uio auio, *uio = ap->a_uio; 06505 size_t *sizep = ap->a_size; 06506 size_t plen; 06507 vnode_t *xvp = NULL, *vp; 06508 int done, error, eof, pos; 06509 06510 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 06511 ap->a_cred, ap->a_td, VREAD); 06512 if (error != 0) 06513 return (error); 06514 06515 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 06516 sizeof(attrprefix)); 06517 if (error != 0) 06518 return (error); 06519 plen = strlen(attrprefix); 06520 06521 ZFS_ENTER(zfsvfs); 06522 06523 if (sizep != NULL) 06524 *sizep = 0; 06525 06526 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 06527 LOOKUP_XATTR); 06528 if (error != 0) { 06529 ZFS_EXIT(zfsvfs); 06530 /* 06531 * ENOATTR means that the EA directory does not yet exist, 06532 * i.e. there are no extended attributes there. 06533 */ 06534 if (error == ENOATTR) 06535 error = 0; 06536 return (error); 06537 } 06538 06539 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 06540 UIO_SYSSPACE, ".", xvp, td); 06541 error = namei(&nd); 06542 vp = nd.ni_vp; 06543 NDFREE(&nd, NDF_ONLY_PNBUF); 06544 if (error != 0) { 06545 ZFS_EXIT(zfsvfs); 06546 return (error); 06547 } 06548 06549 auio.uio_iov = &aiov; 06550 auio.uio_iovcnt = 1; 06551 auio.uio_segflg = UIO_SYSSPACE; 06552 auio.uio_td = td; 06553 auio.uio_rw = UIO_READ; 06554 auio.uio_offset = 0; 06555 06556 do { 06557 u_char nlen; 06558 06559 aiov.iov_base = (void *)dirbuf; 06560 aiov.iov_len = sizeof(dirbuf); 06561 auio.uio_resid = sizeof(dirbuf); 06562 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 06563 done = sizeof(dirbuf) - auio.uio_resid; 06564 if (error != 0) 06565 break; 06566 for (pos = 0; pos < done;) { 06567 dp = (struct dirent *)(dirbuf + pos); 06568 pos += dp->d_reclen; 06569 /* 06570 * XXX: Temporarily we also accept DT_UNKNOWN, as this 06571 * is what we get when attribute was created on Solaris. 06572 */ 06573 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 06574 continue; 06575 if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0) 06576 continue; 06577 else if (strncmp(dp->d_name, attrprefix, plen) != 0) 06578 continue; 06579 nlen = dp->d_namlen - plen; 06580 if (sizep != NULL) 06581 *sizep += 1 + nlen; 06582 else if (uio != NULL) { 06583 /* 06584 * Format of extattr name entry is one byte for 06585 * length and the rest for name. 06586 */ 06587 error = uiomove(&nlen, 1, uio->uio_rw, uio); 06588 if (error == 0) { 06589 error = uiomove(dp->d_name + plen, nlen, 06590 uio->uio_rw, uio); 06591 } 06592 if (error != 0) 06593 break; 06594 } 06595 } 06596 } while (!eof && error == 0); 06597 06598 vput(vp); 06599 ZFS_EXIT(zfsvfs); 06600 06601 return (error); 06602 } 06603 06604 int 06605 zfs_freebsd_getacl(ap) 06606 struct vop_getacl_args /* { 06607 struct vnode *vp; 06608 acl_type_t type; 06609 struct acl *aclp; 06610 struct ucred *cred; 06611 struct thread *td; 06612 } */ *ap; 06613 { 06614 int error; 06615 vsecattr_t vsecattr; 06616 06617 if (ap->a_type != ACL_TYPE_NFS4) 06618 return (EINVAL); 06619 06620 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 06621 if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)) 06622 return (error); 06623 06624 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt); 06625 if (vsecattr.vsa_aclentp != NULL) 06626 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 06627 06628 return (error); 06629 } 06630 06631 int 06632 zfs_freebsd_setacl(ap) 06633 struct vop_setacl_args /* { 06634 struct vnode *vp; 06635 acl_type_t type; 06636 struct acl *aclp; 06637 struct ucred *cred; 06638 struct thread *td; 06639 } */ *ap; 06640 { 06641 int error; 06642 vsecattr_t vsecattr; 06643 int aclbsize; /* size of acl list in bytes */ 06644 aclent_t *aaclp; 06645 06646 if (ap->a_type != ACL_TYPE_NFS4) 06647 return (EINVAL); 06648 06649 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 06650 return (EINVAL); 06651 06652 /* 06653 * With NFSv4 ACLs, chmod(2) may need to add additional entries, 06654 * splitting every entry into two and appending "canonical six" 06655 * entries at the end. Don't allow for setting an ACL that would 06656 * cause chmod(2) to run out of ACL entries. 06657 */ 06658 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 06659 return (ENOSPC); 06660 06661 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 06662 if (error != 0) 06663 return (error); 06664 06665 vsecattr.vsa_mask = VSA_ACE; 06666 aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t); 06667 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 06668 aaclp = vsecattr.vsa_aclentp; 06669 vsecattr.vsa_aclentsz = aclbsize; 06670 06671 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 06672 error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL); 06673 kmem_free(aaclp, aclbsize); 06674 06675 return (error); 06676 } 06677 06678 int 06679 zfs_freebsd_aclcheck(ap) 06680 struct vop_aclcheck_args /* { 06681 struct vnode *vp; 06682 acl_type_t type; 06683 struct acl *aclp; 06684 struct ucred *cred; 06685 struct thread *td; 06686 } */ *ap; 06687 { 06688 06689 return (EOPNOTSUPP); 06690 } 06691 06692 struct vop_vector zfs_vnodeops; 06693 struct vop_vector zfs_fifoops; 06694 struct vop_vector zfs_shareops; 06695 06696 struct vop_vector zfs_vnodeops = { 06697 .vop_default = &default_vnodeops, 06698 .vop_inactive = zfs_freebsd_inactive, 06699 .vop_reclaim = zfs_freebsd_reclaim, 06700 .vop_access = zfs_freebsd_access, 06701 #ifdef FREEBSD_NAMECACHE 06702 .vop_lookup = vfs_cache_lookup, 06703 .vop_cachedlookup = zfs_freebsd_lookup, 06704 #else 06705 .vop_lookup = zfs_freebsd_lookup, 06706 #endif 06707 .vop_getattr = zfs_freebsd_getattr, 06708 .vop_setattr = zfs_freebsd_setattr, 06709 .vop_create = zfs_freebsd_create, 06710 .vop_mknod = zfs_freebsd_create, 06711 .vop_mkdir = zfs_freebsd_mkdir, 06712 .vop_readdir = zfs_freebsd_readdir, 06713 .vop_fsync = zfs_freebsd_fsync, 06714 .vop_open = zfs_freebsd_open, 06715 .vop_close = zfs_freebsd_close, 06716 .vop_rmdir = zfs_freebsd_rmdir, 06717 .vop_ioctl = zfs_freebsd_ioctl, 06718 .vop_link = zfs_freebsd_link, 06719 .vop_symlink = zfs_freebsd_symlink, 06720 .vop_readlink = zfs_freebsd_readlink, 06721 .vop_read = zfs_freebsd_read, 06722 .vop_write = zfs_freebsd_write, 06723 .vop_remove = zfs_freebsd_remove, 06724 .vop_rename = zfs_freebsd_rename, 06725 .vop_pathconf = zfs_freebsd_pathconf, 06726 .vop_bmap = VOP_EOPNOTSUPP, 06727 .vop_fid = zfs_freebsd_fid, 06728 .vop_getextattr = zfs_getextattr, 06729 .vop_deleteextattr = zfs_deleteextattr, 06730 .vop_setextattr = zfs_setextattr, 06731 .vop_listextattr = zfs_listextattr, 06732 .vop_getacl = zfs_freebsd_getacl, 06733 .vop_setacl = zfs_freebsd_setacl, 06734 .vop_aclcheck = zfs_freebsd_aclcheck, 06735 .vop_getpages = zfs_freebsd_getpages, 06736 }; 06737 06738 struct vop_vector zfs_fifoops = { 06739 .vop_default = &fifo_specops, 06740 .vop_fsync = zfs_freebsd_fsync, 06741 .vop_access = zfs_freebsd_access, 06742 .vop_getattr = zfs_freebsd_getattr, 06743 .vop_inactive = zfs_freebsd_inactive, 06744 .vop_read = VOP_PANIC, 06745 .vop_reclaim = zfs_freebsd_reclaim, 06746 .vop_setattr = zfs_freebsd_setattr, 06747 .vop_write = VOP_PANIC, 06748 .vop_pathconf = zfs_freebsd_fifo_pathconf, 06749 .vop_fid = zfs_freebsd_fid, 06750 .vop_getacl = zfs_freebsd_getacl, 06751 .vop_setacl = zfs_freebsd_setacl, 06752 .vop_aclcheck = zfs_freebsd_aclcheck, 06753 }; 06754 06755 /* 06756 * special share hidden files vnode operations template 06757 */ 06758 struct vop_vector zfs_shareops = { 06759 .vop_default = &default_vnodeops, 06760 .vop_access = zfs_freebsd_access, 06761 .vop_inactive = zfs_freebsd_inactive, 06762 .vop_reclaim = zfs_freebsd_reclaim, 06763 .vop_fid = zfs_freebsd_fid, 06764 .vop_pathconf = zfs_freebsd_pathconf, 06765 };