FreeBSD ZFS: zfs_vnops.c Source File

FreeBSD ZFS
The Zettabyte File System
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 /*
00022  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
00023  * Copyright (c) 2012 by Delphix. All rights reserved.
00024  */
00025 
00026 /* Portions Copyright 2007 Jeremy Teo */
00027 /* Portions Copyright 2010 Robert Milkowski */
00028 
00029 #include <sys/types.h>
00030 #include <sys/param.h>
00031 #include <sys/time.h>
00032 #include <sys/systm.h>
00033 #include <sys/sysmacros.h>
00034 #include <sys/resource.h>
00035 #include <sys/vfs.h>
00036 #include <sys/vnode.h>
00037 #include <sys/file.h>
00038 #include <sys/stat.h>
00039 #include <sys/kmem.h>
00040 #include <sys/taskq.h>
00041 #include <sys/uio.h>
00042 #include <sys/atomic.h>
00043 #include <sys/namei.h>
00044 #include <sys/mman.h>
00045 #include <sys/cmn_err.h>
00046 #include <sys/errno.h>
00047 #include <sys/unistd.h>
00048 #include <sys/zfs_dir.h>
00049 #include <sys/zfs_ioctl.h>
00050 #include <sys/fs/zfs.h>
00051 #include <sys/dmu.h>
00052 #include <sys/dmu_objset.h>
00053 #include <sys/spa.h>
00054 #include <sys/txg.h>
00055 #include <sys/dbuf.h>
00056 #include <sys/zap.h>
00057 #include <sys/sa.h>
00058 #include <sys/dirent.h>
00059 #include <sys/policy.h>
00060 #include <sys/sunddi.h>
00061 #include <sys/filio.h>
00062 #include <sys/sid.h>
00063 #include <sys/zfs_ctldir.h>
00064 #include <sys/zfs_fuid.h>
00065 #include <sys/zfs_sa.h>
00066 #include <sys/dnlc.h>
00067 #include <sys/zfs_rlock.h>
00068 #include <sys/extdirent.h>
00069 #include <sys/kidmap.h>
00070 #include <sys/bio.h>
00071 #include <sys/buf.h>
00072 #include <sys/sf_buf.h>
00073 #include <sys/sched.h>
00074 #include <sys/acl.h>
00075 #include <vm/vm_param.h>
00076 #include <vm/vm_pageout.h>
00077 #include <vm/vm_page.h>
00078 
00168 /* ARGSUSED */
00169 static int
00170 zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
00171 {
00172         znode_t *zp = VTOZ(*vpp);
00173         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
00174 
00175         ZFS_ENTER(zfsvfs);
00176         ZFS_VERIFY_ZP(zp);
00177 
00178         if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
00179             ((flag & FAPPEND) == 0)) {
00180                 ZFS_EXIT(zfsvfs);
00181                 return (EPERM);
00182         }
00183 
00184         if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
00185             ZTOV(zp)->v_type == VREG &&
00186             !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
00187                 if (fs_vscan(*vpp, cr, 0) != 0) {
00188                         ZFS_EXIT(zfsvfs);
00189                         return (EACCES);
00190                 }
00191         }
00192 
00193         /* Keep a count of the synchronous opens in the znode */
00194         if (flag & (FSYNC | FDSYNC))
00195                 atomic_inc_32(&zp->z_sync_cnt);
00196 
00197         ZFS_EXIT(zfsvfs);
00198         return (0);
00199 }
00200 
00201 /* ARGSUSED */
00202 static int
00203 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
00204     caller_context_t *ct)
00205 {
00206         znode_t *zp = VTOZ(vp);
00207         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
00208 
00209         /*
00210          * Clean up any locks held by this process on the vp.
00211          */
00212         cleanlocks(vp, ddi_get_pid(), 0);
00213         cleanshares(vp, ddi_get_pid());
00214 
00215         ZFS_ENTER(zfsvfs);
00216         ZFS_VERIFY_ZP(zp);
00217 
00218         /* Decrement the synchronous opens in the znode */
00219         if ((flag & (FSYNC | FDSYNC)) && (count == 1))
00220                 atomic_dec_32(&zp->z_sync_cnt);
00221 
00222         if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
00223             ZTOV(zp)->v_type == VREG &&
00224             !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
00225                 VERIFY(fs_vscan(vp, cr, 1) == 0);
00226 
00227         ZFS_EXIT(zfsvfs);
00228         return (0);
00229 }
00230 
00235 static int
00236 zfs_holey(vnode_t *vp, u_long cmd, offset_t *off)
00237 {
00238         znode_t *zp = VTOZ(vp);
00239         uint64_t noff = (uint64_t)*off; /* new offset */
00240         uint64_t file_sz;
00241         int error;
00242         boolean_t hole;
00243 
00244         file_sz = zp->z_size;
00245         if (noff >= file_sz)  {
00246                 return (ENXIO);
00247         }
00248 
00249         if (cmd == _FIO_SEEK_HOLE)
00250                 hole = B_TRUE;
00251         else
00252                 hole = B_FALSE;
00253 
00254         error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff);
00255 
00256         /* end of file? */
00257         if ((error == ESRCH) || (noff > file_sz)) {
00258                 /*
00259                  * Handle the virtual hole at the end of file.
00260                  */
00261                 if (hole) {
00262                         *off = file_sz;
00263                         return (0);
00264                 }
00265                 return (ENXIO);
00266         }
00267 
00268         if (noff < *off)
00269                 return (error);
00270         *off = noff;
00271         return (error);
00272 }
00273 
00274 /* ARGSUSED */
00275 static int
00276 zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred,
00277     int *rvalp, caller_context_t *ct)
00278 {
00279         offset_t off;
00280         int error;
00281         zfsvfs_t *zfsvfs;
00282         znode_t *zp;
00283 
00284         switch (com) {
00285         case _FIOFFS:
00286                 return (0);
00287 
00288                 /*
00289                  * The following two ioctls are used by bfu.  Faking out,
00290                  * necessary to avoid bfu errors.
00291                  */
00292         case _FIOGDIO:
00293         case _FIOSDIO:
00294                 return (0);
00295 
00296         case _FIO_SEEK_DATA:
00297         case _FIO_SEEK_HOLE:
00298 #ifdef sun
00299                 if (ddi_copyin((void *)data, &off, sizeof (off), flag))
00300                         return (EFAULT);
00301 #else
00302                 off = *(offset_t *)data;
00303 #endif
00304                 zp = VTOZ(vp);
00305                 zfsvfs = zp->z_zfsvfs;
00306                 ZFS_ENTER(zfsvfs);
00307                 ZFS_VERIFY_ZP(zp);
00308 
00309                 /* offset parameter is in/out */
00310                 error = zfs_holey(vp, com, &off);
00311                 ZFS_EXIT(zfsvfs);
00312                 if (error)
00313                         return (error);
00314 #ifdef sun
00315                 if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
00316                         return (EFAULT);
00317 #else
00318                 *(offset_t *)data = off;
00319 #endif
00320                 return (0);
00321         }
00322         return (ENOTTY);
00323 }
00324 
00325 static vm_page_t
00326 page_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
00327 {
00328         vm_object_t obj;
00329         vm_page_t pp;
00330 
00331         obj = vp->v_object;
00332         VM_OBJECT_LOCK_ASSERT(obj, MA_OWNED);
00333 
00334         for (;;) {
00335                 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
00336                     vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) {
00337                         if ((pp->oflags & VPO_BUSY) != 0) {
00338                                 /*
00339                                  * Reference the page before unlocking and
00340                                  * sleeping so that the page daemon is less
00341                                  * likely to reclaim it.
00342                                  */
00343                                 vm_page_reference(pp);
00344                                 vm_page_sleep(pp, "zfsmwb");
00345                                 continue;
00346                         }
00347                         vm_page_busy(pp);
00348                         vm_page_undirty(pp);
00349                 } else {
00350                         if (vm_page_is_cached(obj, OFF_TO_IDX(start)))
00351                                 vm_page_cache_free(obj, OFF_TO_IDX(start),
00352                                     OFF_TO_IDX(start) + 1);
00353                         pp = NULL;
00354                 }
00355                 break;
00356         }
00357         return (pp);
00358 }
00359 
00360 static void
00361 page_unlock(vm_page_t pp)
00362 {
00363 
00364         vm_page_wakeup(pp);
00365 }
00366 
00367 static caddr_t
00368 zfs_map_page(vm_page_t pp, struct sf_buf **sfp)
00369 {
00370 
00371         *sfp = sf_buf_alloc(pp, 0);
00372         return ((caddr_t)sf_buf_kva(*sfp));
00373 }
00374 
00375 static void
00376 zfs_unmap_page(struct sf_buf *sf)
00377 {
00378 
00379         sf_buf_free(sf);
00380 }
00381 
00389 static void
00390 update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
00391     int segflg, dmu_tx_t *tx)
00392 {
00393         vm_object_t obj;
00394         struct sf_buf *sf;
00395         int off;
00396 
00397         ASSERT(vp->v_mount != NULL);
00398         obj = vp->v_object;
00399         ASSERT(obj != NULL);
00400 
00401         off = start & PAGEOFFSET;
00402         VM_OBJECT_LOCK(obj);
00403         for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
00404                 vm_page_t pp;
00405                 int nbytes = MIN(PAGESIZE - off, len);
00406 
00407                 if ((pp = page_lookup(vp, start, off, nbytes)) != NULL) {
00408                         caddr_t va;
00409 
00410                         VM_OBJECT_UNLOCK(obj);
00411                         va = zfs_map_page(pp, &sf);
00412                         if (segflg == UIO_NOCOPY) {
00413                                 (void) dmu_write(os, oid, start+off, nbytes,
00414                                     va+off, tx);
00415                         } else {
00416                                 (void) dmu_read(os, oid, start+off, nbytes,
00417                                     va+off, DMU_READ_PREFETCH);
00418                         }
00419                         zfs_unmap_page(sf);
00420                         VM_OBJECT_LOCK(obj);
00421                         page_unlock(pp);
00422                 }
00423                 len -= nbytes;
00424                 off = 0;
00425         }
00426         VM_OBJECT_UNLOCK(obj);
00427 }
00428 
00438 static int
00439 mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
00440 {
00441         znode_t *zp = VTOZ(vp);
00442         objset_t *os = zp->z_zfsvfs->z_os;
00443         struct sf_buf *sf;
00444         vm_object_t obj;
00445         vm_page_t pp;
00446         int64_t start;
00447         caddr_t va;
00448         int len = nbytes;
00449         int off;
00450         int error = 0;
00451 
00452         ASSERT(uio->uio_segflg == UIO_NOCOPY);
00453         ASSERT(vp->v_mount != NULL);
00454         obj = vp->v_object;
00455         ASSERT(obj != NULL);
00456         ASSERT((uio->uio_loffset & PAGEOFFSET) == 0);
00457 
00458         VM_OBJECT_LOCK(obj);
00459         for (start = uio->uio_loffset; len > 0; start += PAGESIZE) {
00460                 int bytes = MIN(PAGESIZE, len);
00461 
00462                 pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_NOBUSY |
00463                     VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY);
00464                 if (pp->valid == 0) {
00465                         vm_page_io_start(pp);
00466                         VM_OBJECT_UNLOCK(obj);
00467                         va = zfs_map_page(pp, &sf);
00468                         error = dmu_read(os, zp->z_id, start, bytes, va,
00469                             DMU_READ_PREFETCH);
00470                         if (bytes != PAGESIZE && error == 0)
00471                                 bzero(va + bytes, PAGESIZE - bytes);
00472                         zfs_unmap_page(sf);
00473                         VM_OBJECT_LOCK(obj);
00474                         vm_page_io_finish(pp);
00475                         vm_page_lock(pp);
00476                         if (error) {
00477                                 vm_page_free(pp);
00478                         } else {
00479                                 pp->valid = VM_PAGE_BITS_ALL;
00480                                 vm_page_activate(pp);
00481                         }
00482                         vm_page_unlock(pp);
00483                 }
00484                 if (error)
00485                         break;
00486                 uio->uio_resid -= bytes;
00487                 uio->uio_offset += bytes;
00488                 len -= bytes;
00489         }
00490         VM_OBJECT_UNLOCK(obj);
00491         return (error);
00492 }
00493 
00504 static int
00505 mappedread(vnode_t *vp, int nbytes, uio_t *uio)
00506 {
00507         znode_t *zp = VTOZ(vp);
00508         objset_t *os = zp->z_zfsvfs->z_os;
00509         vm_object_t obj;
00510         int64_t start;
00511         caddr_t va;
00512         int len = nbytes;
00513         int off;
00514         int error = 0;
00515 
00516         ASSERT(vp->v_mount != NULL);
00517         obj = vp->v_object;
00518         ASSERT(obj != NULL);
00519 
00520         start = uio->uio_loffset;
00521         off = start & PAGEOFFSET;
00522         VM_OBJECT_LOCK(obj);
00523         for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
00524                 vm_page_t pp;
00525                 uint64_t bytes = MIN(PAGESIZE - off, len);
00526 
00527                 if (pp = page_lookup(vp, start, off, bytes)) {
00528                         struct sf_buf *sf;
00529                         caddr_t va;
00530 
00531                         VM_OBJECT_UNLOCK(obj);
00532                         va = zfs_map_page(pp, &sf);
00533                         error = uiomove(va + off, bytes, UIO_READ, uio);
00534                         zfs_unmap_page(sf);
00535                         VM_OBJECT_LOCK(obj);
00536                         page_unlock(pp);
00537                 } else {
00538                         VM_OBJECT_UNLOCK(obj);
00539                         error = dmu_read_uio(os, zp->z_id, uio, bytes);
00540                         VM_OBJECT_LOCK(obj);
00541                 }
00542                 len -= bytes;
00543                 off = 0;
00544                 if (error)
00545                         break;
00546         }
00547         VM_OBJECT_UNLOCK(obj);
00548         return (error);
00549 }
00550 
00551 offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */
00552 
00569 /* ARGSUSED */
00570 static int
00571 zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
00572 {
00573         znode_t         *zp = VTOZ(vp);
00574         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
00575         objset_t        *os;
00576         ssize_t         n, nbytes;
00577         int             error;
00578         rl_t            *rl;
00579         xuio_t          *xuio = NULL;
00580 
00581         ZFS_ENTER(zfsvfs);
00582         ZFS_VERIFY_ZP(zp);
00583         os = zfsvfs->z_os;
00584 
00585         if (zp->z_pflags & ZFS_AV_QUARANTINED) {
00586                 ZFS_EXIT(zfsvfs);
00587                 return (EACCES);
00588         }
00589 
00590         /*
00591          * Validate file offset
00592          */
00593         if (uio->uio_loffset < (offset_t)0) {
00594                 ZFS_EXIT(zfsvfs);
00595                 return (EINVAL);
00596         }
00597 
00598         /*
00599          * Fasttrack empty reads
00600          */
00601         if (uio->uio_resid == 0) {
00602                 ZFS_EXIT(zfsvfs);
00603                 return (0);
00604         }
00605 
00606         /*
00607          * Check for mandatory locks
00608          */
00609         if (MANDMODE(zp->z_mode)) {
00610                 if (error = chklock(vp, FREAD,
00611                     uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) {
00612                         ZFS_EXIT(zfsvfs);
00613                         return (error);
00614                 }
00615         }
00616 
00617         /*
00618          * If we're in FRSYNC mode, sync out this znode before reading it.
00619          */
00620         if (zfsvfs->z_log &&
00621             (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
00622                 zil_commit(zfsvfs->z_log, zp->z_id);
00623 
00624         /*
00625          * Lock the range against changes.
00626          */
00627         rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
00628 
00629         /*
00630          * If we are reading past end-of-file we can skip
00631          * to the end; but we might still need to set atime.
00632          */
00633         if (uio->uio_loffset >= zp->z_size) {
00634                 error = 0;
00635                 goto out;
00636         }
00637 
00638         ASSERT(uio->uio_loffset < zp->z_size);
00639         n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
00640 
00641 #ifdef sun
00642         if ((uio->uio_extflg == UIO_XUIO) &&
00643             (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
00644                 int nblk;
00645                 int blksz = zp->z_blksz;
00646                 uint64_t offset = uio->uio_loffset;
00647 
00648                 xuio = (xuio_t *)uio;
00649                 if ((ISP2(blksz))) {
00650                         nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
00651                             blksz)) / blksz;
00652                 } else {
00653                         ASSERT(offset + n <= blksz);
00654                         nblk = 1;
00655                 }
00656                 (void) dmu_xuio_init(xuio, nblk);
00657 
00658                 if (vn_has_cached_data(vp)) {
00659                         /*
00660                          * For simplicity, we always allocate a full buffer
00661                          * even if we only expect to read a portion of a block.
00662                          */
00663                         while (--nblk >= 0) {
00664                                 (void) dmu_xuio_add(xuio,
00665                                     dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
00666                                     blksz), 0, blksz);
00667                         }
00668                 }
00669         }
00670 #endif  /* sun */
00671 
00672         while (n > 0) {
00673                 nbytes = MIN(n, zfs_read_chunk_size -
00674                     P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
00675 
00676 #ifdef __FreeBSD__
00677                 if (uio->uio_segflg == UIO_NOCOPY)
00678                         error = mappedread_sf(vp, nbytes, uio);
00679                 else
00680 #endif /* __FreeBSD__ */
00681                 if (vn_has_cached_data(vp))
00682                         error = mappedread(vp, nbytes, uio);
00683                 else
00684                         error = dmu_read_uio(os, zp->z_id, uio, nbytes);
00685                 if (error) {
00686                         /* convert checksum errors into IO errors */
00687                         if (error == ECKSUM)
00688                                 error = EIO;
00689                         break;
00690                 }
00691 
00692                 n -= nbytes;
00693         }
00694 out:
00695         zfs_range_unlock(rl);
00696 
00697         ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
00698         ZFS_EXIT(zfsvfs);
00699         return (error);
00700 }
00701 
00719 /* ARGSUSED */
00720 static int
00721 zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
00722 {
00723         znode_t         *zp = VTOZ(vp);
00724         rlim64_t        limit = MAXOFFSET_T;
00725         ssize_t         start_resid = uio->uio_resid;
00726         ssize_t         tx_bytes;
00727         uint64_t        end_size;
00728         dmu_tx_t        *tx;
00729         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
00730         zilog_t         *zilog;
00731         offset_t        woff;
00732         ssize_t         n, nbytes;
00733         rl_t            *rl;
00734         int             max_blksz = zfsvfs->z_max_blksz;
00735         int             error;
00736         arc_buf_t       *abuf;
00737         iovec_t         *aiov;
00738         xuio_t          *xuio = NULL;
00739         int             i_iov = 0;
00740         int             iovcnt = uio->uio_iovcnt;
00741         iovec_t         *iovp = uio->uio_iov;
00742         int             write_eof;
00743         int             count = 0;
00744         sa_bulk_attr_t  bulk[4];
00745         uint64_t        mtime[2], ctime[2];
00746 
00747         /*
00748          * Fasttrack empty write
00749          */
00750         n = start_resid;
00751         if (n == 0)
00752                 return (0);
00753 
00754         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
00755                 limit = MAXOFFSET_T;
00756 
00757         ZFS_ENTER(zfsvfs);
00758         ZFS_VERIFY_ZP(zp);
00759 
00760         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
00761         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
00762         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
00763             &zp->z_size, 8);
00764         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
00765             &zp->z_pflags, 8);
00766 
00767         /*
00768          * If immutable or not appending then return EPERM
00769          */
00770         if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
00771             ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
00772             (uio->uio_loffset < zp->z_size))) {
00773                 ZFS_EXIT(zfsvfs);
00774                 return (EPERM);
00775         }
00776 
00777         zilog = zfsvfs->z_log;
00778 
00779         /*
00780          * Validate file offset
00781          */
00782         woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
00783         if (woff < 0) {
00784                 ZFS_EXIT(zfsvfs);
00785                 return (EINVAL);
00786         }
00787 
00788         /*
00789          * Check for mandatory locks before calling zfs_range_lock()
00790          * in order to prevent a deadlock with locks set via fcntl().
00791          */
00792         if (MANDMODE((mode_t)zp->z_mode) &&
00793             (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
00794                 ZFS_EXIT(zfsvfs);
00795                 return (error);
00796         }
00797 
00798 #ifdef sun
00799         /*
00800          * Pre-fault the pages to ensure slow (eg NFS) pages
00801          * don't hold up txg.
00802          * Skip this if uio contains loaned arc_buf.
00803          */
00804         if ((uio->uio_extflg == UIO_XUIO) &&
00805             (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
00806                 xuio = (xuio_t *)uio;
00807         else
00808                 uio_prefaultpages(MIN(n, max_blksz), uio);
00809 #endif  /* sun */
00810 
00811         /*
00812          * If in append mode, set the io offset pointer to eof.
00813          */
00814         if (ioflag & FAPPEND) {
00815                 /*
00816                  * Obtain an appending range lock to guarantee file append
00817                  * semantics.  We reset the write offset once we have the lock.
00818                  */
00819                 rl = zfs_range_lock(zp, 0, n, RL_APPEND);
00820                 woff = rl->r_off;
00821                 if (rl->r_len == UINT64_MAX) {
00822                         /*
00823                          * We overlocked the file because this write will cause
00824                          * the file block size to increase.
00825                          * Note that zp_size cannot change with this lock held.
00826                          */
00827                         woff = zp->z_size;
00828                 }
00829                 uio->uio_loffset = woff;
00830         } else {
00831                 /*
00832                  * Note that if the file block size will change as a result of
00833                  * this write, then this range lock will lock the entire file
00834                  * so that we can re-write the block safely.
00835                  */
00836                 rl = zfs_range_lock(zp, woff, n, RL_WRITER);
00837         }
00838 
00839         if (vn_rlimit_fsize(vp, uio, uio->uio_td)) {
00840                 zfs_range_unlock(rl);
00841                 ZFS_EXIT(zfsvfs);
00842                 return (EFBIG);
00843         }
00844 
00845         if (woff >= limit) {
00846                 zfs_range_unlock(rl);
00847                 ZFS_EXIT(zfsvfs);
00848                 return (EFBIG);
00849         }
00850 
00851         if ((woff + n) > limit || woff > (limit - n))
00852                 n = limit - woff;
00853 
00854         /* Will this write extend the file length? */
00855         write_eof = (woff + n > zp->z_size);
00856 
00857         end_size = MAX(zp->z_size, woff + n);
00858 
00859         /*
00860          * Write the file in reasonable size chunks.  Each chunk is written
00861          * in a separate transaction; this keeps the intent log records small
00862          * and allows us to do more fine-grained space accounting.
00863          */
00864         while (n > 0) {
00865                 abuf = NULL;
00866                 woff = uio->uio_loffset;
00867 again:
00868                 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
00869                     zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
00870                         if (abuf != NULL)
00871                                 dmu_return_arcbuf(abuf);
00872                         error = EDQUOT;
00873                         break;
00874                 }
00875 
00876                 if (xuio && abuf == NULL) {
00877                         ASSERT(i_iov < iovcnt);
00878                         aiov = &iovp[i_iov];
00879                         abuf = dmu_xuio_arcbuf(xuio, i_iov);
00880                         dmu_xuio_clear(xuio, i_iov);
00881                         DTRACE_PROBE3(zfs_cp_write, int, i_iov,
00882                             iovec_t *, aiov, arc_buf_t *, abuf);
00883                         ASSERT((aiov->iov_base == abuf->b_data) ||
00884                             ((char *)aiov->iov_base - (char *)abuf->b_data +
00885                             aiov->iov_len == arc_buf_size(abuf)));
00886                         i_iov++;
00887                 } else if (abuf == NULL && n >= max_blksz &&
00888                     woff >= zp->z_size &&
00889                     P2PHASE(woff, max_blksz) == 0 &&
00890                     zp->z_blksz == max_blksz) {
00891                         /*
00892                          * This write covers a full block.  "Borrow" a buffer
00893                          * from the dmu so that we can fill it before we enter
00894                          * a transaction.  This avoids the possibility of
00895                          * holding up the transaction if the data copy hangs
00896                          * up on a pagefault (e.g., from an NFS server mapping).
00897                          */
00898                         size_t cbytes;
00899 
00900                         abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
00901                             max_blksz);
00902                         ASSERT(abuf != NULL);
00903                         ASSERT(arc_buf_size(abuf) == max_blksz);
00904                         if (error = uiocopy(abuf->b_data, max_blksz,
00905                             UIO_WRITE, uio, &cbytes)) {
00906                                 dmu_return_arcbuf(abuf);
00907                                 break;
00908                         }
00909                         ASSERT(cbytes == max_blksz);
00910                 }
00911 
00912                 /*
00913                  * Start a transaction.
00914                  */
00915                 tx = dmu_tx_create(zfsvfs->z_os);
00916                 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
00917                 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
00918                 zfs_sa_upgrade_txholds(tx, zp);
00919                 error = dmu_tx_assign(tx, TXG_NOWAIT);
00920                 if (error) {
00921                         if (error == ERESTART) {
00922                                 dmu_tx_wait(tx);
00923                                 dmu_tx_abort(tx);
00924                                 goto again;
00925                         }
00926                         dmu_tx_abort(tx);
00927                         if (abuf != NULL)
00928                                 dmu_return_arcbuf(abuf);
00929                         break;
00930                 }
00931 
00932                 /*
00933                  * If zfs_range_lock() over-locked we grow the blocksize
00934                  * and then reduce the lock range.  This will only happen
00935                  * on the first iteration since zfs_range_reduce() will
00936                  * shrink down r_len to the appropriate size.
00937                  */
00938                 if (rl->r_len == UINT64_MAX) {
00939                         uint64_t new_blksz;
00940 
00941                         if (zp->z_blksz > max_blksz) {
00942                                 ASSERT(!ISP2(zp->z_blksz));
00943                                 new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE);
00944                         } else {
00945                                 new_blksz = MIN(end_size, max_blksz);
00946                         }
00947                         zfs_grow_blocksize(zp, new_blksz, tx);
00948                         zfs_range_reduce(rl, woff, n);
00949                 }
00950 
00951                 /*
00952                  * XXX - should we really limit each write to z_max_blksz?
00953                  * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
00954                  */
00955                 nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
00956 
00957                 if (woff + nbytes > zp->z_size)
00958                         vnode_pager_setsize(vp, woff + nbytes);
00959 
00960                 if (abuf == NULL) {
00961                         tx_bytes = uio->uio_resid;
00962                         error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
00963                             uio, nbytes, tx);
00964                         tx_bytes -= uio->uio_resid;
00965                 } else {
00966                         tx_bytes = nbytes;
00967                         ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
00968                         /*
00969                          * If this is not a full block write, but we are
00970                          * extending the file past EOF and this data starts
00971                          * block-aligned, use assign_arcbuf().  Otherwise,
00972                          * write via dmu_write().
00973                          */
00974                         if (tx_bytes < max_blksz && (!write_eof ||
00975                             aiov->iov_base != abuf->b_data)) {
00976                                 ASSERT(xuio);
00977                                 dmu_write(zfsvfs->z_os, zp->z_id, woff,
00978                                     aiov->iov_len, aiov->iov_base, tx);
00979                                 dmu_return_arcbuf(abuf);
00980                                 xuio_stat_wbuf_copied();
00981                         } else {
00982                                 ASSERT(xuio || tx_bytes == max_blksz);
00983                                 dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
00984                                     woff, abuf, tx);
00985                         }
00986                         ASSERT(tx_bytes <= uio->uio_resid);
00987                         uioskip(uio, tx_bytes);
00988                 }
00989                 if (tx_bytes && vn_has_cached_data(vp)) {
00990                         update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
00991                             zp->z_id, uio->uio_segflg, tx);
00992                 }
00993 
00994                 /*
00995                  * If we made no progress, we're done.  If we made even
00996                  * partial progress, update the znode and ZIL accordingly.
00997                  */
00998                 if (tx_bytes == 0) {
00999                         (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
01000                             (void *)&zp->z_size, sizeof (uint64_t), tx);
01001                         dmu_tx_commit(tx);
01002                         ASSERT(error != 0);
01003                         break;
01004                 }
01005 
01006                 /*
01007                  * Clear Set-UID/Set-GID bits on successful write if not
01008                  * privileged and at least one of the excute bits is set.
01009                  *
01010                  * It would be nice to to this after all writes have
01011                  * been done, but that would still expose the ISUID/ISGID
01012                  * to another app after the partial write is committed.
01013                  *
01014                  * Note: we don't call zfs_fuid_map_id() here because
01015                  * user 0 is not an ephemeral uid.
01016                  */
01017                 mutex_enter(&zp->z_acl_lock);
01018                 if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
01019                     (S_IXUSR >> 6))) != 0 &&
01020                     (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
01021                     secpolicy_vnode_setid_retain(vp, cr,
01022                     (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
01023                         uint64_t newmode;
01024                         zp->z_mode &= ~(S_ISUID | S_ISGID);
01025                         newmode = zp->z_mode;
01026                         (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
01027                             (void *)&newmode, sizeof (uint64_t), tx);
01028                 }
01029                 mutex_exit(&zp->z_acl_lock);
01030 
01031                 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
01032                     B_TRUE);
01033 
01034                 /*
01035                  * Update the file size (zp_size) if it has changed;
01036                  * account for possible concurrent updates.
01037                  */
01038                 while ((end_size = zp->z_size) < uio->uio_loffset) {
01039                         (void) atomic_cas_64(&zp->z_size, end_size,
01040                             uio->uio_loffset);
01041                         ASSERT(error == 0);
01042                 }
01043                 /*
01044                  * If we are replaying and eof is non zero then force
01045                  * the file size to the specified eof. Note, there's no
01046                  * concurrency during replay.
01047                  */
01048                 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
01049                         zp->z_size = zfsvfs->z_replay_eof;
01050 
01051                 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
01052 
01053                 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
01054                 dmu_tx_commit(tx);
01055 
01056                 if (error != 0)
01057                         break;
01058                 ASSERT(tx_bytes == nbytes);
01059                 n -= nbytes;
01060 
01061 #ifdef sun
01062                 if (!xuio && n > 0)
01063                         uio_prefaultpages(MIN(n, max_blksz), uio);
01064 #endif  /* sun */
01065         }
01066 
01067         zfs_range_unlock(rl);
01068 
01069         /*
01070          * If we're in replay mode, or we made no progress, return error.
01071          * Otherwise, it's at least a partial write, so it's successful.
01072          */
01073         if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
01074                 ZFS_EXIT(zfsvfs);
01075                 return (error);
01076         }
01077 
01078         if (ioflag & (FSYNC | FDSYNC) ||
01079             zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
01080                 zil_commit(zilog, zp->z_id);
01081 
01082         ZFS_EXIT(zfsvfs);
01083         return (0);
01084 }
01085 
01086 void
01087 zfs_get_done(zgd_t *zgd, int error)
01088 {
01089         znode_t *zp = zgd->zgd_private;
01090         objset_t *os = zp->z_zfsvfs->z_os;
01091 
01092         if (zgd->zgd_db)
01093                 dmu_buf_rele(zgd->zgd_db, zgd);
01094 
01095         zfs_range_unlock(zgd->zgd_rl);
01096 
01097         /*
01098          * Release the vnode asynchronously as we currently have the
01099          * txg stopped from syncing.
01100          */
01101         VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
01102 
01103         if (error == 0 && zgd->zgd_bp)
01104                 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
01105 
01106         kmem_free(zgd, sizeof (zgd_t));
01107 }
01108 
01109 #ifdef DEBUG
01110 static int zil_fault_io = 0;
01111 #endif
01112 
01116 int
01117 zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
01118 {
01119         zfsvfs_t *zfsvfs = arg;
01120         objset_t *os = zfsvfs->z_os;
01121         znode_t *zp;
01122         uint64_t object = lr->lr_foid;
01123         uint64_t offset = lr->lr_offset;
01124         uint64_t size = lr->lr_length;
01125         blkptr_t *bp = &lr->lr_blkptr;
01126         dmu_buf_t *db;
01127         zgd_t *zgd;
01128         int error = 0;
01129 
01130         ASSERT(zio != NULL);
01131         ASSERT(size != 0);
01132 
01133         /*
01134          * Nothing to do if the file has been removed
01135          */
01136         if (zfs_zget(zfsvfs, object, &zp) != 0)
01137                 return (ENOENT);
01138         if (zp->z_unlinked) {
01139                 /*
01140                  * Release the vnode asynchronously as we currently have the
01141                  * txg stopped from syncing.
01142                  */
01143                 VN_RELE_ASYNC(ZTOV(zp),
01144                     dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
01145                 return (ENOENT);
01146         }
01147 
01148         zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
01149         zgd->zgd_zilog = zfsvfs->z_log;
01150         zgd->zgd_private = zp;
01151 
01152         /*
01153          * Write records come in two flavors: immediate and indirect.
01154          * For small writes it's cheaper to store the data with the
01155          * log record (immediate); for large writes it's cheaper to
01156          * sync the data and get a pointer to it (indirect) so that
01157          * we don't have to write the data twice.
01158          */
01159         if (buf != NULL) { /* immediate write */
01160                 zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
01161                 /* test for truncation needs to be done while range locked */
01162                 if (offset >= zp->z_size) {
01163                         error = ENOENT;
01164                 } else {
01165                         error = dmu_read(os, object, offset, size, buf,
01166                             DMU_READ_NO_PREFETCH);
01167                 }
01168                 ASSERT(error == 0 || error == ENOENT);
01169         } else { /* indirect write */
01170                 /*
01171                  * Have to lock the whole block to ensure when it's
01172                  * written out and it's checksum is being calculated
01173                  * that no one can change the data. We need to re-check
01174                  * blocksize after we get the lock in case it's changed!
01175                  */
01176                 for (;;) {
01177                         uint64_t blkoff;
01178                         size = zp->z_blksz;
01179                         blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
01180                         offset -= blkoff;
01181                         zgd->zgd_rl = zfs_range_lock(zp, offset, size,
01182                             RL_READER);
01183                         if (zp->z_blksz == size)
01184                                 break;
01185                         offset += blkoff;
01186                         zfs_range_unlock(zgd->zgd_rl);
01187                 }
01188                 /* test for truncation needs to be done while range locked */
01189                 if (lr->lr_offset >= zp->z_size)
01190                         error = ENOENT;
01191 #ifdef DEBUG
01192                 if (zil_fault_io) {
01193                         error = EIO;
01194                         zil_fault_io = 0;
01195                 }
01196 #endif
01197                 if (error == 0)
01198                         error = dmu_buf_hold(os, object, offset, zgd, &db,
01199                             DMU_READ_NO_PREFETCH);
01200 
01201                 if (error == 0) {
01202                         zgd->zgd_db = db;
01203                         zgd->zgd_bp = bp;
01204 
01205                         ASSERT(db->db_offset == offset);
01206                         ASSERT(db->db_size == size);
01207 
01208                         error = dmu_sync(zio, lr->lr_common.lrc_txg,
01209                             zfs_get_done, zgd);
01210                         ASSERT(error || lr->lr_length <= zp->z_blksz);
01211 
01212                         /*
01213                          * On success, we need to wait for the write I/O
01214                          * initiated by dmu_sync() to complete before we can
01215                          * release this dbuf.  We will finish everything up
01216                          * in the zfs_get_done() callback.
01217                          */
01218                         if (error == 0)
01219                                 return (0);
01220 
01221                         if (error == EALREADY) {
01222                                 lr->lr_common.lrc_txtype = TX_WRITE2;
01223                                 error = 0;
01224                         }
01225                 }
01226         }
01227 
01228         zfs_get_done(zgd, error);
01229 
01230         return (error);
01231 }
01232 
01233 /*ARGSUSED*/
01234 static int
01235 zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
01236     caller_context_t *ct)
01237 {
01238         znode_t *zp = VTOZ(vp);
01239         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
01240         int error;
01241 
01242         ZFS_ENTER(zfsvfs);
01243         ZFS_VERIFY_ZP(zp);
01244 
01245         if (flag & V_ACE_MASK)
01246                 error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
01247         else
01248                 error = zfs_zaccess_rwx(zp, mode, flag, cr);
01249 
01250         ZFS_EXIT(zfsvfs);
01251         return (error);
01252 }
01253 
01257 static int
01258 specvp_check(vnode_t **vpp, cred_t *cr)
01259 {
01260         int error = 0;
01261 
01262         if (IS_DEVVP(*vpp)) {
01263                 struct vnode *svp;
01264 
01265                 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
01266                 VN_RELE(*vpp);
01267                 if (svp == NULL)
01268                         error = ENOSYS;
01269                 *vpp = svp;
01270         }
01271         return (error);
01272 }
01273 
01274 
01290 /* ARGSUSED */
01291 static int
01292 zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp,
01293     int nameiop, cred_t *cr, kthread_t *td, int flags)
01294 {
01295         znode_t *zdp = VTOZ(dvp);
01296         zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
01297         int     error = 0;
01298         int *direntflags = NULL;        /* directory lookup flags */
01299         void *realpnp = NULL;           /* returned pathname */
01300 
01301         /* fast path */
01302         if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
01303 
01304                 if (dvp->v_type != VDIR) {
01305                         return (ENOTDIR);
01306                 } else if (zdp->z_sa_hdl == NULL) {
01307                         return (EIO);
01308                 }
01309 
01310                 if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
01311                         error = zfs_fastaccesschk_execute(zdp, cr);
01312                         if (!error) {
01313                                 *vpp = dvp;
01314                                 VN_HOLD(*vpp);
01315                                 return (0);
01316                         }
01317                         return (error);
01318                 } else {
01319                         vnode_t *tvp = dnlc_lookup(dvp, nm);
01320 
01321                         if (tvp) {
01322                                 error = zfs_fastaccesschk_execute(zdp, cr);
01323                                 if (error) {
01324                                         VN_RELE(tvp);
01325                                         return (error);
01326                                 }
01327                                 if (tvp == DNLC_NO_VNODE) {
01328                                         VN_RELE(tvp);
01329                                         return (ENOENT);
01330                                 } else {
01331                                         *vpp = tvp;
01332                                         return (specvp_check(vpp, cr));
01333                                 }
01334                         }
01335                 }
01336         }
01337 
01338         DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
01339 
01340         ZFS_ENTER(zfsvfs);
01341         ZFS_VERIFY_ZP(zdp);
01342 
01343         *vpp = NULL;
01344 
01345         if (flags & LOOKUP_XATTR) {
01346 #ifdef TODO
01347                 /*
01348                  * If the xattr property is off, refuse the lookup request.
01349                  */
01350                 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) {
01351                         ZFS_EXIT(zfsvfs);
01352                         return (EINVAL);
01353                 }
01354 #endif
01355 
01356                 /*
01357                  * We don't allow recursive attributes..
01358                  * Maybe someday we will.
01359                  */
01360                 if (zdp->z_pflags & ZFS_XATTR) {
01361                         ZFS_EXIT(zfsvfs);
01362                         return (EINVAL);
01363                 }
01364 
01365                 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) {
01366                         ZFS_EXIT(zfsvfs);
01367                         return (error);
01368                 }
01369 
01370                 /*
01371                  * Do we have permission to get into attribute directory?
01372                  */
01373 
01374                 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0,
01375                     B_FALSE, cr)) {
01376                         VN_RELE(*vpp);
01377                         *vpp = NULL;
01378                 }
01379 
01380                 ZFS_EXIT(zfsvfs);
01381                 return (error);
01382         }
01383 
01384         if (dvp->v_type != VDIR) {
01385                 ZFS_EXIT(zfsvfs);
01386                 return (ENOTDIR);
01387         }
01388 
01389         /*
01390          * Check accessibility of directory.
01391          */
01392 
01393         if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) {
01394                 ZFS_EXIT(zfsvfs);
01395                 return (error);
01396         }
01397 
01398         if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
01399             NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
01400                 ZFS_EXIT(zfsvfs);
01401                 return (EILSEQ);
01402         }
01403 
01404         error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp);
01405         if (error == 0)
01406                 error = specvp_check(vpp, cr);
01407 
01408         /* Translate errors and add SAVENAME when needed. */
01409         if (cnp->cn_flags & ISLASTCN) {
01410                 switch (nameiop) {
01411                 case CREATE:
01412                 case RENAME:
01413                         if (error == ENOENT) {
01414                                 error = EJUSTRETURN;
01415                                 cnp->cn_flags |= SAVENAME;
01416                                 break;
01417                         }
01418                         /* FALLTHROUGH */
01419                 case DELETE:
01420                         if (error == 0)
01421                                 cnp->cn_flags |= SAVENAME;
01422                         break;
01423                 }
01424         }
01425         if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) {
01426                 int ltype = 0;
01427 
01428                 if (cnp->cn_flags & ISDOTDOT) {
01429                         ltype = VOP_ISLOCKED(dvp);
01430                         VOP_UNLOCK(dvp, 0);
01431                 }
01432                 ZFS_EXIT(zfsvfs);
01433                 error = zfs_vnode_lock(*vpp, cnp->cn_lkflags);
01434                 if (cnp->cn_flags & ISDOTDOT)
01435                         vn_lock(dvp, ltype | LK_RETRY);
01436                 if (error != 0) {
01437                         VN_RELE(*vpp);
01438                         *vpp = NULL;
01439                         return (error);
01440                 }
01441         } else {
01442                 ZFS_EXIT(zfsvfs);
01443         }
01444 
01445 #ifdef FREEBSD_NAMECACHE
01446         /*
01447          * Insert name into cache (as non-existent) if appropriate.
01448          */
01449         if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
01450                 cache_enter(dvp, *vpp, cnp);
01451         /*
01452          * Insert name into cache if appropriate.
01453          */
01454         if (error == 0 && (cnp->cn_flags & MAKEENTRY)) {
01455                 if (!(cnp->cn_flags & ISLASTCN) ||
01456                     (nameiop != DELETE && nameiop != RENAME)) {
01457                         cache_enter(dvp, *vpp, cnp);
01458                 }
01459         }
01460 #endif
01461 
01462         return (error);
01463 }
01464 
01490 /* ARGSUSED */
01491 static int
01492 zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode,
01493     vnode_t **vpp, cred_t *cr, kthread_t *td)
01494 {
01495         znode_t         *zp, *dzp = VTOZ(dvp);
01496         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
01497         zilog_t         *zilog;
01498         objset_t        *os;
01499         zfs_dirlock_t   *dl;
01500         dmu_tx_t        *tx;
01501         int             error;
01502         ksid_t          *ksid;
01503         uid_t           uid;
01504         gid_t           gid = crgetgid(cr);
01505         zfs_acl_ids_t   acl_ids;
01506         boolean_t       fuid_dirtied;
01507         boolean_t       have_acl = B_FALSE;
01508         void            *vsecp = NULL;  /* ACL to be set */
01509         int             flag = 0;       /* Large file flag */
01510 
01511         /*
01512          * If we have an ephemeral id, ACL, or XVATTR then
01513          * make sure file system is at proper version
01514          */
01515 
01516         ksid = crgetsid(cr, KSID_OWNER);
01517         if (ksid)
01518                 uid = ksid_getid(ksid);
01519         else
01520                 uid = crgetuid(cr);
01521 
01522         if (zfsvfs->z_use_fuids == B_FALSE &&
01523             (vsecp || (vap->va_mask & AT_XVATTR) ||
01524             IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
01525                 return (EINVAL);
01526 
01527         ZFS_ENTER(zfsvfs);
01528         ZFS_VERIFY_ZP(dzp);
01529         os = zfsvfs->z_os;
01530         zilog = zfsvfs->z_log;
01531 
01532         if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
01533             NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
01534                 ZFS_EXIT(zfsvfs);
01535                 return (EILSEQ);
01536         }
01537 
01538         if (vap->va_mask & AT_XVATTR) {
01539                 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
01540                     crgetuid(cr), cr, vap->va_type)) != 0) {
01541                         ZFS_EXIT(zfsvfs);
01542                         return (error);
01543                 }
01544         }
01545 top:
01546         *vpp = NULL;
01547 
01548         if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
01549                 vap->va_mode &= ~S_ISVTX;
01550 
01551         if (*name == '\0') {
01552                 /*
01553                  * Null component name refers to the directory itself.
01554                  */
01555                 VN_HOLD(dvp);
01556                 zp = dzp;
01557                 dl = NULL;
01558                 error = 0;
01559         } else {
01560                 /* possible VN_HOLD(zp) */
01561                 int zflg = 0;
01562 
01563                 if (flag & FIGNORECASE)
01564                         zflg |= ZCILOOK;
01565 
01566                 error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
01567                     NULL, NULL);
01568                 if (error) {
01569                         if (have_acl)
01570                                 zfs_acl_ids_free(&acl_ids);
01571                         if (strcmp(name, "..") == 0)
01572                                 error = EISDIR;
01573                         ZFS_EXIT(zfsvfs);
01574                         return (error);
01575                 }
01576         }
01577 
01578         if (zp == NULL) {
01579                 uint64_t txtype;
01580 
01581                 /*
01582                  * Create a new file object and update the directory
01583                  * to reference it.
01584                  */
01585                 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
01586                         if (have_acl)
01587                                 zfs_acl_ids_free(&acl_ids);
01588                         goto out;
01589                 }
01590 
01591                 /*
01592                  * We only support the creation of regular files in
01593                  * extended attribute directories.
01594                  */
01595 
01596                 if ((dzp->z_pflags & ZFS_XATTR) &&
01597                     (vap->va_type != VREG)) {
01598                         if (have_acl)
01599                                 zfs_acl_ids_free(&acl_ids);
01600                         error = EINVAL;
01601                         goto out;
01602                 }
01603 
01604                 if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
01605                     cr, vsecp, &acl_ids)) != 0)
01606                         goto out;
01607                 have_acl = B_TRUE;
01608 
01609                 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
01610                         zfs_acl_ids_free(&acl_ids);
01611                         error = EDQUOT;
01612                         goto out;
01613                 }
01614 
01615                 tx = dmu_tx_create(os);
01616 
01617                 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
01618                     ZFS_SA_BASE_ATTR_SIZE);
01619 
01620                 fuid_dirtied = zfsvfs->z_fuid_dirty;
01621                 if (fuid_dirtied)
01622                         zfs_fuid_txhold(zfsvfs, tx);
01623                 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
01624                 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
01625                 if (!zfsvfs->z_use_sa &&
01626                     acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
01627                         dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
01628                             0, acl_ids.z_aclp->z_acl_bytes);
01629                 }
01630                 error = dmu_tx_assign(tx, TXG_NOWAIT);
01631                 if (error) {
01632                         zfs_dirent_unlock(dl);
01633                         if (error == ERESTART) {
01634                                 dmu_tx_wait(tx);
01635                                 dmu_tx_abort(tx);
01636                                 goto top;
01637                         }
01638                         zfs_acl_ids_free(&acl_ids);
01639                         dmu_tx_abort(tx);
01640                         ZFS_EXIT(zfsvfs);
01641                         return (error);
01642                 }
01643                 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
01644 
01645                 if (fuid_dirtied)
01646                         zfs_fuid_sync(zfsvfs, tx);
01647 
01648                 (void) zfs_link_create(dl, zp, tx, ZNEW);
01649                 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
01650                 if (flag & FIGNORECASE)
01651                         txtype |= TX_CI;
01652                 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
01653                     vsecp, acl_ids.z_fuidp, vap);
01654                 zfs_acl_ids_free(&acl_ids);
01655                 dmu_tx_commit(tx);
01656         } else {
01657                 int aflags = (flag & FAPPEND) ? V_APPEND : 0;
01658 
01659                 if (have_acl)
01660                         zfs_acl_ids_free(&acl_ids);
01661                 have_acl = B_FALSE;
01662 
01663                 /*
01664                  * A directory entry already exists for this name.
01665                  */
01666                 /*
01667                  * Can't truncate an existing file if in exclusive mode.
01668                  */
01669                 if (excl == EXCL) {
01670                         error = EEXIST;
01671                         goto out;
01672                 }
01673                 /*
01674                  * Can't open a directory for writing.
01675                  */
01676                 if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) {
01677                         error = EISDIR;
01678                         goto out;
01679                 }
01680                 /*
01681                  * Verify requested access to file.
01682                  */
01683                 if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
01684                         goto out;
01685                 }
01686 
01687                 mutex_enter(&dzp->z_lock);
01688                 dzp->z_seq++;
01689                 mutex_exit(&dzp->z_lock);
01690 
01691                 /*
01692                  * Truncate regular files if requested.
01693                  */
01694                 if ((ZTOV(zp)->v_type == VREG) &&
01695                     (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) {
01696                         /* we can't hold any locks when calling zfs_freesp() */
01697                         zfs_dirent_unlock(dl);
01698                         dl = NULL;
01699                         error = zfs_freesp(zp, 0, 0, mode, TRUE);
01700                         if (error == 0) {
01701                                 vnevent_create(ZTOV(zp), ct);
01702                         }
01703                 }
01704         }
01705 out:
01706         if (dl)
01707                 zfs_dirent_unlock(dl);
01708 
01709         if (error) {
01710                 if (zp)
01711                         VN_RELE(ZTOV(zp));
01712         } else {
01713                 *vpp = ZTOV(zp);
01714                 error = specvp_check(vpp, cr);
01715         }
01716 
01717         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
01718                 zil_commit(zilog, 0);
01719 
01720         ZFS_EXIT(zfsvfs);
01721         return (error);
01722 }
01723 
01724 uint64_t null_xattr = 0;
01725 
01742 /*ARGSUSED*/
01743 static int
01744 zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
01745     int flags)
01746 {
01747         znode_t         *zp, *dzp = VTOZ(dvp);
01748         znode_t         *xzp;
01749         vnode_t         *vp;
01750         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
01751         zilog_t         *zilog;
01752         uint64_t        acl_obj, xattr_obj;
01753         uint64_t        xattr_obj_unlinked = 0;
01754         uint64_t        obj = 0;
01755         zfs_dirlock_t   *dl;
01756         dmu_tx_t        *tx;
01757         boolean_t       may_delete_now, delete_now = FALSE;
01758         boolean_t       unlinked, toobig = FALSE;
01759         uint64_t        txtype;
01760         pathname_t      *realnmp = NULL;
01761         pathname_t      realnm;
01762         int             error;
01763         int             zflg = ZEXISTS;
01764 
01765         ZFS_ENTER(zfsvfs);
01766         ZFS_VERIFY_ZP(dzp);
01767         zilog = zfsvfs->z_log;
01768 
01769         if (flags & FIGNORECASE) {
01770                 zflg |= ZCILOOK;
01771                 pn_alloc(&realnm);
01772                 realnmp = &realnm;
01773         }
01774 
01775 top:
01776         xattr_obj = 0;
01777         xzp = NULL;
01778         /*
01779          * Attempt to lock directory; fail if entry doesn't exist.
01780          */
01781         if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
01782             NULL, realnmp)) {
01783                 if (realnmp)
01784                         pn_free(realnmp);
01785                 ZFS_EXIT(zfsvfs);
01786                 return (error);
01787         }
01788 
01789         vp = ZTOV(zp);
01790 
01791         if (error = zfs_zaccess_delete(dzp, zp, cr)) {
01792                 goto out;
01793         }
01794 
01795         /*
01796          * Need to use rmdir for removing directories.
01797          */
01798         if (vp->v_type == VDIR) {
01799                 error = EPERM;
01800                 goto out;
01801         }
01802 
01803         vnevent_remove(vp, dvp, name, ct);
01804 
01805         if (realnmp)
01806                 dnlc_remove(dvp, realnmp->pn_buf);
01807         else
01808                 dnlc_remove(dvp, name);
01809 
01810         VI_LOCK(vp);
01811         may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp);
01812         VI_UNLOCK(vp);
01813 
01814         /*
01815          * We may delete the znode now, or we may put it in the unlinked set;
01816          * it depends on whether we're the last link, and on whether there are
01817          * other holds on the vnode.  So we dmu_tx_hold() the right things to
01818          * allow for either case.
01819          */
01820         obj = zp->z_id;
01821         tx = dmu_tx_create(zfsvfs->z_os);
01822         dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
01823         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
01824         zfs_sa_upgrade_txholds(tx, zp);
01825         zfs_sa_upgrade_txholds(tx, dzp);
01826         if (may_delete_now) {
01827                 toobig =
01828                     zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT;
01829                 /* if the file is too big, only hold_free a token amount */
01830                 dmu_tx_hold_free(tx, zp->z_id, 0,
01831                     (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
01832         }
01833 
01834         /* are there any extended attributes? */
01835         error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
01836             &xattr_obj, sizeof (xattr_obj));
01837         if (error == 0 && xattr_obj) {
01838                 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
01839                 ASSERT0(error);
01840                 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
01841                 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
01842         }
01843 
01844         mutex_enter(&zp->z_lock);
01845         if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
01846                 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
01847         mutex_exit(&zp->z_lock);
01848 
01849         /* charge as an update -- would be nice not to charge at all */
01850         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
01851 
01852         error = dmu_tx_assign(tx, TXG_NOWAIT);
01853         if (error) {
01854                 zfs_dirent_unlock(dl);
01855                 VN_RELE(vp);
01856                 if (xzp)
01857                         VN_RELE(ZTOV(xzp));
01858                 if (error == ERESTART) {
01859                         dmu_tx_wait(tx);
01860                         dmu_tx_abort(tx);
01861                         goto top;
01862                 }
01863                 if (realnmp)
01864                         pn_free(realnmp);
01865                 dmu_tx_abort(tx);
01866                 ZFS_EXIT(zfsvfs);
01867                 return (error);
01868         }
01869 
01870         /*
01871          * Remove the directory entry.
01872          */
01873         error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
01874 
01875         if (error) {
01876                 dmu_tx_commit(tx);
01877                 goto out;
01878         }
01879 
01880         if (unlinked) {
01881 
01882                 /*
01883                  * Hold z_lock so that we can make sure that the ACL obj
01884                  * hasn't changed.  Could have been deleted due to
01885                  * zfs_sa_upgrade().
01886                  */
01887                 mutex_enter(&zp->z_lock);
01888                 VI_LOCK(vp);
01889                 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
01890                     &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
01891                 delete_now = may_delete_now && !toobig &&
01892                     vp->v_count == 1 && !vn_has_cached_data(vp) &&
01893                     xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) ==
01894                     acl_obj;
01895                 VI_UNLOCK(vp);
01896         }
01897 
01898         if (delete_now) {
01899                 if (xattr_obj_unlinked) {
01900                         ASSERT3U(xzp->z_links, ==, 2);
01901                         mutex_enter(&xzp->z_lock);
01902                         xzp->z_unlinked = 1;
01903                         xzp->z_links = 0;
01904                         error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
01905                             &xzp->z_links, sizeof (xzp->z_links), tx);
01906                         ASSERT3U(error,  ==,  0);
01907                         mutex_exit(&xzp->z_lock);
01908                         zfs_unlinked_add(xzp, tx);
01909 
01910                         if (zp->z_is_sa)
01911                                 error = sa_remove(zp->z_sa_hdl,
01912                                     SA_ZPL_XATTR(zfsvfs), tx);
01913                         else
01914                                 error = sa_update(zp->z_sa_hdl,
01915                                     SA_ZPL_XATTR(zfsvfs), &null_xattr,
01916                                     sizeof (uint64_t), tx);
01917                         ASSERT0(error);
01918                 }
01919                 VI_LOCK(vp);
01920                 vp->v_count--;
01921                 ASSERT0(vp->v_count);
01922                 VI_UNLOCK(vp);
01923                 mutex_exit(&zp->z_lock);
01924                 zfs_znode_delete(zp, tx);
01925         } else if (unlinked) {
01926                 mutex_exit(&zp->z_lock);
01927                 zfs_unlinked_add(zp, tx);
01928         }
01929 
01930         txtype = TX_REMOVE;
01931         if (flags & FIGNORECASE)
01932                 txtype |= TX_CI;
01933         zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
01934 
01935         dmu_tx_commit(tx);
01936 out:
01937         if (realnmp)
01938                 pn_free(realnmp);
01939 
01940         zfs_dirent_unlock(dl);
01941 
01942         if (!delete_now)
01943                 VN_RELE(vp);
01944         if (xzp)
01945                 VN_RELE(ZTOV(xzp));
01946 
01947         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
01948                 zil_commit(zilog, 0);
01949 
01950         ZFS_EXIT(zfsvfs);
01951         return (error);
01952 }
01953 
01974 /*ARGSUSED*/
01975 static int
01976 zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr,
01977     caller_context_t *ct, int flags, vsecattr_t *vsecp)
01978 {
01979         znode_t         *zp, *dzp = VTOZ(dvp);
01980         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
01981         zilog_t         *zilog;
01982         zfs_dirlock_t   *dl;
01983         uint64_t        txtype;
01984         dmu_tx_t        *tx;
01985         int             error;
01986         int             zf = ZNEW;
01987         ksid_t          *ksid;
01988         uid_t           uid;
01989         gid_t           gid = crgetgid(cr);
01990         zfs_acl_ids_t   acl_ids;
01991         boolean_t       fuid_dirtied;
01992 
01993         ASSERT(vap->va_type == VDIR);
01994 
01995         /*
01996          * If we have an ephemeral id, ACL, or XVATTR then
01997          * make sure file system is at proper version
01998          */
01999 
02000         ksid = crgetsid(cr, KSID_OWNER);
02001         if (ksid)
02002                 uid = ksid_getid(ksid);
02003         else
02004                 uid = crgetuid(cr);
02005         if (zfsvfs->z_use_fuids == B_FALSE &&
02006             (vsecp || (vap->va_mask & AT_XVATTR) ||
02007             IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
02008                 return (EINVAL);
02009 
02010         ZFS_ENTER(zfsvfs);
02011         ZFS_VERIFY_ZP(dzp);
02012         zilog = zfsvfs->z_log;
02013 
02014         if (dzp->z_pflags & ZFS_XATTR) {
02015                 ZFS_EXIT(zfsvfs);
02016                 return (EINVAL);
02017         }
02018 
02019         if (zfsvfs->z_utf8 && u8_validate(dirname,
02020             strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
02021                 ZFS_EXIT(zfsvfs);
02022                 return (EILSEQ);
02023         }
02024         if (flags & FIGNORECASE)
02025                 zf |= ZCILOOK;
02026 
02027         if (vap->va_mask & AT_XVATTR) {
02028                 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
02029                     crgetuid(cr), cr, vap->va_type)) != 0) {
02030                         ZFS_EXIT(zfsvfs);
02031                         return (error);
02032                 }
02033         }
02034 
02035         if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
02036             vsecp, &acl_ids)) != 0) {
02037                 ZFS_EXIT(zfsvfs);
02038                 return (error);
02039         }
02040         /*
02041          * First make sure the new directory doesn't exist.
02042          *
02043          * Existence is checked first to make sure we don't return
02044          * EACCES instead of EEXIST which can cause some applications
02045          * to fail.
02046          */
02047 top:
02048         *vpp = NULL;
02049 
02050         if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
02051             NULL, NULL)) {
02052                 zfs_acl_ids_free(&acl_ids);
02053                 ZFS_EXIT(zfsvfs);
02054                 return (error);
02055         }
02056 
02057         if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) {
02058                 zfs_acl_ids_free(&acl_ids);
02059                 zfs_dirent_unlock(dl);
02060                 ZFS_EXIT(zfsvfs);
02061                 return (error);
02062         }
02063 
02064         if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
02065                 zfs_acl_ids_free(&acl_ids);
02066                 zfs_dirent_unlock(dl);
02067                 ZFS_EXIT(zfsvfs);
02068                 return (EDQUOT);
02069         }
02070 
02071         /*
02072          * Add a new entry to the directory.
02073          */
02074         tx = dmu_tx_create(zfsvfs->z_os);
02075         dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
02076         dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
02077         fuid_dirtied = zfsvfs->z_fuid_dirty;
02078         if (fuid_dirtied)
02079                 zfs_fuid_txhold(zfsvfs, tx);
02080         if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
02081                 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
02082                     acl_ids.z_aclp->z_acl_bytes);
02083         }
02084 
02085         dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
02086             ZFS_SA_BASE_ATTR_SIZE);
02087 
02088         error = dmu_tx_assign(tx, TXG_NOWAIT);
02089         if (error) {
02090                 zfs_dirent_unlock(dl);
02091                 if (error == ERESTART) {
02092                         dmu_tx_wait(tx);
02093                         dmu_tx_abort(tx);
02094                         goto top;
02095                 }
02096                 zfs_acl_ids_free(&acl_ids);
02097                 dmu_tx_abort(tx);
02098                 ZFS_EXIT(zfsvfs);
02099                 return (error);
02100         }
02101 
02102         /*
02103          * Create new node.
02104          */
02105         zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
02106 
02107         if (fuid_dirtied)
02108                 zfs_fuid_sync(zfsvfs, tx);
02109 
02110         /*
02111          * Now put new name in parent dir.
02112          */
02113         (void) zfs_link_create(dl, zp, tx, ZNEW);
02114 
02115         *vpp = ZTOV(zp);
02116 
02117         txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
02118         if (flags & FIGNORECASE)
02119                 txtype |= TX_CI;
02120         zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
02121             acl_ids.z_fuidp, vap);
02122 
02123         zfs_acl_ids_free(&acl_ids);
02124 
02125         dmu_tx_commit(tx);
02126 
02127         zfs_dirent_unlock(dl);
02128 
02129         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
02130                 zil_commit(zilog, 0);
02131 
02132         ZFS_EXIT(zfsvfs);
02133         return (0);
02134 }
02135 
02154 /*ARGSUSED*/
02155 static int
02156 zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
02157     caller_context_t *ct, int flags)
02158 {
02159         znode_t         *dzp = VTOZ(dvp);
02160         znode_t         *zp;
02161         vnode_t         *vp;
02162         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
02163         zilog_t         *zilog;
02164         zfs_dirlock_t   *dl;
02165         dmu_tx_t        *tx;
02166         int             error;
02167         int             zflg = ZEXISTS;
02168 
02169         ZFS_ENTER(zfsvfs);
02170         ZFS_VERIFY_ZP(dzp);
02171         zilog = zfsvfs->z_log;
02172 
02173         if (flags & FIGNORECASE)
02174                 zflg |= ZCILOOK;
02175 top:
02176         zp = NULL;
02177 
02178         /*
02179          * Attempt to lock directory; fail if entry doesn't exist.
02180          */
02181         if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
02182             NULL, NULL)) {
02183                 ZFS_EXIT(zfsvfs);
02184                 return (error);
02185         }
02186 
02187         vp = ZTOV(zp);
02188 
02189         if (error = zfs_zaccess_delete(dzp, zp, cr)) {
02190                 goto out;
02191         }
02192 
02193         if (vp->v_type != VDIR) {
02194                 error = ENOTDIR;
02195                 goto out;
02196         }
02197 
02198         if (vp == cwd) {
02199                 error = EINVAL;
02200                 goto out;
02201         }
02202 
02203         vnevent_rmdir(vp, dvp, name, ct);
02204 
02205         /*
02206          * Grab a lock on the directory to make sure that noone is
02207          * trying to add (or lookup) entries while we are removing it.
02208          */
02209         rw_enter(&zp->z_name_lock, RW_WRITER);
02210 
02211         /*
02212          * Grab a lock on the parent pointer to make sure we play well
02213          * with the treewalk and directory rename code.
02214          */
02215         rw_enter(&zp->z_parent_lock, RW_WRITER);
02216 
02217         tx = dmu_tx_create(zfsvfs->z_os);
02218         dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
02219         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
02220         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
02221         zfs_sa_upgrade_txholds(tx, zp);
02222         zfs_sa_upgrade_txholds(tx, dzp);
02223         error = dmu_tx_assign(tx, TXG_NOWAIT);
02224         if (error) {
02225                 rw_exit(&zp->z_parent_lock);
02226                 rw_exit(&zp->z_name_lock);
02227                 zfs_dirent_unlock(dl);
02228                 VN_RELE(vp);
02229                 if (error == ERESTART) {
02230                         dmu_tx_wait(tx);
02231                         dmu_tx_abort(tx);
02232                         goto top;
02233                 }
02234                 dmu_tx_abort(tx);
02235                 ZFS_EXIT(zfsvfs);
02236                 return (error);
02237         }
02238 
02239 #ifdef FREEBSD_NAMECACHE
02240         cache_purge(dvp);
02241 #endif
02242 
02243         error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
02244 
02245         if (error == 0) {
02246                 uint64_t txtype = TX_RMDIR;
02247                 if (flags & FIGNORECASE)
02248                         txtype |= TX_CI;
02249                 zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
02250         }
02251 
02252         dmu_tx_commit(tx);
02253 
02254         rw_exit(&zp->z_parent_lock);
02255         rw_exit(&zp->z_name_lock);
02256 #ifdef FREEBSD_NAMECACHE
02257         cache_purge(vp);
02258 #endif
02259 out:
02260         zfs_dirent_unlock(dl);
02261 
02262         VN_RELE(vp);
02263 
02264         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
02265                 zil_commit(zilog, 0);
02266 
02267         ZFS_EXIT(zfsvfs);
02268         return (error);
02269 }
02270 
02298 /* ARGSUSED */
02299 static int
02300 zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies,
02301     u_long **cookies)
02302 {
02303         znode_t         *zp = VTOZ(vp);
02304         iovec_t         *iovp;
02305         edirent_t       *eodp;
02306         dirent64_t      *odp;
02307         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
02308         objset_t        *os;
02309         caddr_t         outbuf;
02310         size_t          bufsize;
02311         zap_cursor_t    zc;
02312         zap_attribute_t zap;
02313         uint_t          bytes_wanted;
02314         uint64_t        offset; /* must be unsigned; checks for < 1 */
02315         uint64_t        parent;
02316         int             local_eof;
02317         int             outcount;
02318         int             error;
02319         uint8_t         prefetch;
02320         boolean_t       check_sysattrs;
02321         uint8_t         type;
02322         int             ncooks;
02323         u_long          *cooks = NULL;
02324         int             flags = 0;      /* case flags */
02325 
02326         ZFS_ENTER(zfsvfs);
02327         ZFS_VERIFY_ZP(zp);
02328 
02329         if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
02330             &parent, sizeof (parent))) != 0) {
02331                 ZFS_EXIT(zfsvfs);
02332                 return (error);
02333         }
02334 
02335         /*
02336          * If we are not given an eof variable,
02337          * use a local one.
02338          */
02339         if (eofp == NULL)
02340                 eofp = &local_eof;
02341 
02342         /*
02343          * Check for valid iov_len.
02344          */
02345         if (uio->uio_iov->iov_len <= 0) {
02346                 ZFS_EXIT(zfsvfs);
02347                 return (EINVAL);
02348         }
02349 
02350         /*
02351          * Quit if directory has been removed (posix)
02352          */
02353         if ((*eofp = zp->z_unlinked) != 0) {
02354                 ZFS_EXIT(zfsvfs);
02355                 return (0);
02356         }
02357 
02358         error = 0;
02359         os = zfsvfs->z_os;
02360         offset = uio->uio_loffset;
02361         prefetch = zp->z_zn_prefetch;
02362 
02363         /*
02364          * Initialize the iterator cursor.
02365          */
02366         if (offset <= 3) {
02367                 /*
02368                  * Start iteration from the beginning of the directory.
02369                  */
02370                 zap_cursor_init(&zc, os, zp->z_id);
02371         } else {
02372                 /*
02373                  * The offset is a serialized cursor.
02374                  */
02375                 zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
02376         }
02377 
02378         /*
02379          * Get space to change directory entries into fs independent format.
02380          */
02381         iovp = uio->uio_iov;
02382         bytes_wanted = iovp->iov_len;
02383         if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) {
02384                 bufsize = bytes_wanted;
02385                 outbuf = kmem_alloc(bufsize, KM_SLEEP);
02386                 odp = (struct dirent64 *)outbuf;
02387         } else {
02388                 bufsize = bytes_wanted;
02389                 odp = (struct dirent64 *)iovp->iov_base;
02390         }
02391         eodp = (struct edirent *)odp;
02392 
02393         if (ncookies != NULL) {
02394                 /*
02395                  * Minimum entry size is dirent size and 1 byte for a file name.
02396                  */
02397                 ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1);
02398                 cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK);
02399                 *cookies = cooks;
02400                 *ncookies = ncooks;
02401         }
02402         /*
02403          * If this VFS supports the system attribute view interface; and
02404          * we're looking at an extended attribute directory; and we care
02405          * about normalization conflicts on this vfs; then we must check
02406          * for normalization conflicts with the sysattr name space.
02407          */
02408 #ifdef TODO
02409         check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
02410             (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
02411             (flags & V_RDDIR_ENTFLAGS);
02412 #else
02413         check_sysattrs = 0;
02414 #endif
02415 
02416         /*
02417          * Transform to file-system independent format
02418          */
02419         outcount = 0;
02420         while (outcount < bytes_wanted) {
02421                 ino64_t objnum;
02422                 ushort_t reclen;
02423                 off64_t *next = NULL;
02424 
02425                 /*
02426                  * Special case `.', `..', and `.zfs'.
02427                  */
02428                 if (offset == 0) {
02429                         (void) strcpy(zap.za_name, ".");
02430                         zap.za_normalization_conflict = 0;
02431                         objnum = zp->z_id;
02432                         type = DT_DIR;
02433                 } else if (offset == 1) {
02434                         (void) strcpy(zap.za_name, "..");
02435                         zap.za_normalization_conflict = 0;
02436                         objnum = parent;
02437                         type = DT_DIR;
02438                 } else if (offset == 2 && zfs_show_ctldir(zp)) {
02439                         (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
02440                         zap.za_normalization_conflict = 0;
02441                         objnum = ZFSCTL_INO_ROOT;
02442                         type = DT_DIR;
02443                 } else {
02444                         /*
02445                          * Grab next entry.
02446                          */
02447                         if (error = zap_cursor_retrieve(&zc, &zap)) {
02448                                 if ((*eofp = (error == ENOENT)) != 0)
02449                                         break;
02450                                 else
02451                                         goto update;
02452                         }
02453 
02454                         if (zap.za_integer_length != 8 ||
02455                             zap.za_num_integers != 1) {
02456                                 cmn_err(CE_WARN, "zap_readdir: bad directory "
02457                                     "entry, obj = %lld, offset = %lld\n",
02458                                     (u_longlong_t)zp->z_id,
02459                                     (u_longlong_t)offset);
02460                                 error = ENXIO;
02461                                 goto update;
02462                         }
02463 
02464                         objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
02465                         /*
02466                          * MacOS X can extract the object type here such as:
02467                          * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
02468                          */
02469                         type = ZFS_DIRENT_TYPE(zap.za_first_integer);
02470 
02471                         if (check_sysattrs && !zap.za_normalization_conflict) {
02472 #ifdef TODO
02473                                 zap.za_normalization_conflict =
02474                                     xattr_sysattr_casechk(zap.za_name);
02475 #else
02476                                 panic("%s:%u: TODO", __func__, __LINE__);
02477 #endif
02478                         }
02479                 }
02480 
02481                 if (flags & V_RDDIR_ACCFILTER) {
02482                         /*
02483                          * If we have no access at all, don't include
02484                          * this entry in the returned information
02485                          */
02486                         znode_t *ezp;
02487                         if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
02488                                 goto skip_entry;
02489                         if (!zfs_has_access(ezp, cr)) {
02490                                 VN_RELE(ZTOV(ezp));
02491                                 goto skip_entry;
02492                         }
02493                         VN_RELE(ZTOV(ezp));
02494                 }
02495 
02496                 if (flags & V_RDDIR_ENTFLAGS)
02497                         reclen = EDIRENT_RECLEN(strlen(zap.za_name));
02498                 else
02499                         reclen = DIRENT64_RECLEN(strlen(zap.za_name));
02500 
02501                 /*
02502                  * Will this entry fit in the buffer?
02503                  */
02504                 if (outcount + reclen > bufsize) {
02505                         /*
02506                          * Did we manage to fit anything in the buffer?
02507                          */
02508                         if (!outcount) {
02509                                 error = EINVAL;
02510                                 goto update;
02511                         }
02512                         break;
02513                 }
02514                 if (flags & V_RDDIR_ENTFLAGS) {
02515                         /*
02516                          * Add extended flag entry:
02517                          */
02518                         eodp->ed_ino = objnum;
02519                         eodp->ed_reclen = reclen;
02520                         /* NOTE: ed_off is the offset for the *next* entry */
02521                         next = &(eodp->ed_off);
02522                         eodp->ed_eflags = zap.za_normalization_conflict ?
02523                             ED_CASE_CONFLICT : 0;
02524                         (void) strncpy(eodp->ed_name, zap.za_name,
02525                             EDIRENT_NAMELEN(reclen));
02526                         eodp = (edirent_t *)((intptr_t)eodp + reclen);
02527                 } else {
02528                         /*
02529                          * Add normal entry:
02530                          */
02531                         odp->d_ino = objnum;
02532                         odp->d_reclen = reclen;
02533                         odp->d_namlen = strlen(zap.za_name);
02534                         (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
02535                         odp->d_type = type;
02536                         odp = (dirent64_t *)((intptr_t)odp + reclen);
02537                 }
02538                 outcount += reclen;
02539 
02540                 ASSERT(outcount <= bufsize);
02541 
02542                 /* Prefetch znode */
02543                 if (prefetch)
02544                         dmu_prefetch(os, objnum, 0, 0);
02545 
02546         skip_entry:
02547                 /*
02548                  * Move to the next entry, fill in the previous offset.
02549                  */
02550                 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
02551                         zap_cursor_advance(&zc);
02552                         offset = zap_cursor_serialize(&zc);
02553                 } else {
02554                         offset += 1;
02555                 }
02556 
02557                 if (cooks != NULL) {
02558                         *cooks++ = offset;
02559                         ncooks--;
02560                         KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
02561                 }
02562         }
02563         zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
02564 
02565         /* Subtract unused cookies */
02566         if (ncookies != NULL)
02567                 *ncookies -= ncooks;
02568 
02569         if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) {
02570                 iovp->iov_base += outcount;
02571                 iovp->iov_len -= outcount;
02572                 uio->uio_resid -= outcount;
02573         } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) {
02574                 /*
02575                  * Reset the pointer.
02576                  */
02577                 offset = uio->uio_loffset;
02578         }
02579 
02580 update:
02581         zap_cursor_fini(&zc);
02582         if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
02583                 kmem_free(outbuf, bufsize);
02584 
02585         if (error == ENOENT)
02586                 error = 0;
02587 
02588         ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
02589 
02590         uio->uio_loffset = offset;
02591         ZFS_EXIT(zfsvfs);
02592         if (error != 0 && cookies != NULL) {
02593                 free(*cookies, M_TEMP);
02594                 *cookies = NULL;
02595                 *ncookies = 0;
02596         }
02597         return (error);
02598 }
02599 
02600 ulong_t zfs_fsync_sync_cnt = 4;
02601 
02602 static int
02603 zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
02604 {
02605         znode_t *zp = VTOZ(vp);
02606         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
02607 
02608         (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
02609 
02610         if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
02611                 ZFS_ENTER(zfsvfs);
02612                 ZFS_VERIFY_ZP(zp);
02613                 zil_commit(zfsvfs->z_log, zp->z_id);
02614                 ZFS_EXIT(zfsvfs);
02615         }
02616         return (0);
02617 }
02618 
02619 
02634 /* ARGSUSED */
02635 static int
02636 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
02637     caller_context_t *ct)
02638 {
02639         znode_t *zp = VTOZ(vp);
02640         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
02641         int     error = 0;
02642         uint32_t blksize;
02643         u_longlong_t nblocks;
02644         uint64_t links;
02645         uint64_t mtime[2], ctime[2], crtime[2], rdev;
02646         xvattr_t *xvap = (xvattr_t *)vap;       /* vap may be an xvattr_t * */
02647         xoptattr_t *xoap = NULL;
02648         boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
02649         sa_bulk_attr_t bulk[4];
02650         int count = 0;
02651 
02652         ZFS_ENTER(zfsvfs);
02653         ZFS_VERIFY_ZP(zp);
02654 
02655         zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
02656 
02657         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
02658         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
02659         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &crtime, 16);
02660         if (vp->v_type == VBLK || vp->v_type == VCHR)
02661                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
02662                     &rdev, 8);
02663 
02664         if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
02665                 ZFS_EXIT(zfsvfs);
02666                 return (error);
02667         }
02668 
02669         /*
02670          * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
02671          * Also, if we are the owner don't bother, since owner should
02672          * always be allowed to read basic attributes of file.
02673          */
02674         if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
02675             (vap->va_uid != crgetuid(cr))) {
02676                 if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
02677                     skipaclchk, cr)) {
02678                         ZFS_EXIT(zfsvfs);
02679                         return (error);
02680                 }
02681         }
02682 
02683         /*
02684          * Return all attributes.  It's cheaper to provide the answer
02685          * than to determine whether we were asked the question.
02686          */
02687 
02688         mutex_enter(&zp->z_lock);
02689         vap->va_type = IFTOVT(zp->z_mode);
02690         vap->va_mode = zp->z_mode & ~S_IFMT;
02691 #ifdef sun
02692         vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev;
02693 #else
02694         vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
02695 #endif
02696         vap->va_nodeid = zp->z_id;
02697         if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp))
02698                 links = zp->z_links + 1;
02699         else
02700                 links = zp->z_links;
02701         vap->va_nlink = MIN(links, LINK_MAX);   /* nlink_t limit! */
02702         vap->va_size = zp->z_size;
02703 #ifdef sun
02704         vap->va_rdev = vp->v_rdev;
02705 #else
02706         if (vp->v_type == VBLK || vp->v_type == VCHR)
02707                 vap->va_rdev = zfs_cmpldev(rdev);
02708 #endif
02709         vap->va_seq = zp->z_seq;
02710         vap->va_flags = 0;      /* FreeBSD: Reset chflags(2) flags. */
02711 
02712         /*
02713          * Add in any requested optional attributes and the create time.
02714          * Also set the corresponding bits in the returned attribute bitmap.
02715          */
02716         if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
02717                 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
02718                         xoap->xoa_archive =
02719                             ((zp->z_pflags & ZFS_ARCHIVE) != 0);
02720                         XVA_SET_RTN(xvap, XAT_ARCHIVE);
02721                 }
02722 
02723                 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
02724                         xoap->xoa_readonly =
02725                             ((zp->z_pflags & ZFS_READONLY) != 0);
02726                         XVA_SET_RTN(xvap, XAT_READONLY);
02727                 }
02728 
02729                 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
02730                         xoap->xoa_system =
02731                             ((zp->z_pflags & ZFS_SYSTEM) != 0);
02732                         XVA_SET_RTN(xvap, XAT_SYSTEM);
02733                 }
02734 
02735                 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
02736                         xoap->xoa_hidden =
02737                             ((zp->z_pflags & ZFS_HIDDEN) != 0);
02738                         XVA_SET_RTN(xvap, XAT_HIDDEN);
02739                 }
02740 
02741                 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
02742                         xoap->xoa_nounlink =
02743                             ((zp->z_pflags & ZFS_NOUNLINK) != 0);
02744                         XVA_SET_RTN(xvap, XAT_NOUNLINK);
02745                 }
02746 
02747                 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
02748                         xoap->xoa_immutable =
02749                             ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
02750                         XVA_SET_RTN(xvap, XAT_IMMUTABLE);
02751                 }
02752 
02753                 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
02754                         xoap->xoa_appendonly =
02755                             ((zp->z_pflags & ZFS_APPENDONLY) != 0);
02756                         XVA_SET_RTN(xvap, XAT_APPENDONLY);
02757                 }
02758 
02759                 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
02760                         xoap->xoa_nodump =
02761                             ((zp->z_pflags & ZFS_NODUMP) != 0);
02762                         XVA_SET_RTN(xvap, XAT_NODUMP);
02763                 }
02764 
02765                 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
02766                         xoap->xoa_opaque =
02767                             ((zp->z_pflags & ZFS_OPAQUE) != 0);
02768                         XVA_SET_RTN(xvap, XAT_OPAQUE);
02769                 }
02770 
02771                 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
02772                         xoap->xoa_av_quarantined =
02773                             ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
02774                         XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
02775                 }
02776 
02777                 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
02778                         xoap->xoa_av_modified =
02779                             ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
02780                         XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
02781                 }
02782 
02783                 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
02784                     vp->v_type == VREG) {
02785                         zfs_sa_get_scanstamp(zp, xvap);
02786                 }
02787 
02788                 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
02789                         uint64_t times[2];
02790 
02791                         (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
02792                             times, sizeof (times));
02793                         ZFS_TIME_DECODE(&xoap->xoa_createtime, times);
02794                         XVA_SET_RTN(xvap, XAT_CREATETIME);
02795                 }
02796 
02797                 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
02798                         xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
02799                         XVA_SET_RTN(xvap, XAT_REPARSE);
02800                 }
02801                 if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
02802                         xoap->xoa_generation = zp->z_gen;
02803                         XVA_SET_RTN(xvap, XAT_GEN);
02804                 }
02805 
02806                 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
02807                         xoap->xoa_offline =
02808                             ((zp->z_pflags & ZFS_OFFLINE) != 0);
02809                         XVA_SET_RTN(xvap, XAT_OFFLINE);
02810                 }
02811 
02812                 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
02813                         xoap->xoa_sparse =
02814                             ((zp->z_pflags & ZFS_SPARSE) != 0);
02815                         XVA_SET_RTN(xvap, XAT_SPARSE);
02816                 }
02817         }
02818 
02819         ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
02820         ZFS_TIME_DECODE(&vap->va_mtime, mtime);
02821         ZFS_TIME_DECODE(&vap->va_ctime, ctime);
02822         ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
02823 
02824         mutex_exit(&zp->z_lock);
02825 
02826         sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
02827         vap->va_blksize = blksize;
02828         vap->va_bytes = nblocks << 9;   /* nblocks * 512 */
02829 
02830         if (zp->z_blksz == 0) {
02831                 /*
02832                  * Block size hasn't been set; suggest maximal I/O transfers.
02833                  */
02834                 vap->va_blksize = zfsvfs->z_max_blksz;
02835         }
02836 
02837         ZFS_EXIT(zfsvfs);
02838         return (0);
02839 }
02840 
02861 /* ARGSUSED */
02862 static int
02863 zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
02864     caller_context_t *ct)
02865 {
02866         znode_t         *zp = VTOZ(vp);
02867         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
02868         zilog_t         *zilog;
02869         dmu_tx_t        *tx;
02870         vattr_t         oldva;
02871         xvattr_t        tmpxvattr;
02872         uint_t          mask = vap->va_mask;
02873         uint_t          saved_mask;
02874         uint64_t        saved_mode;
02875         int             trim_mask = 0;
02876         uint64_t        new_mode;
02877         uint64_t        new_uid, new_gid;
02878         uint64_t        xattr_obj;
02879         uint64_t        mtime[2], ctime[2];
02880         znode_t         *attrzp;
02881         int             need_policy = FALSE;
02882         int             err, err2;
02883         zfs_fuid_info_t *fuidp = NULL;
02884         xvattr_t *xvap = (xvattr_t *)vap;       /* vap may be an xvattr_t * */
02885         xoptattr_t      *xoap;
02886         zfs_acl_t       *aclp;
02887         boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
02888         boolean_t       fuid_dirtied = B_FALSE;
02889         sa_bulk_attr_t  bulk[7], xattr_bulk[7];
02890         int             count = 0, xattr_count = 0;
02891 
02892         if (mask == 0)
02893                 return (0);
02894 
02895         if (mask & AT_NOSET)
02896                 return (EINVAL);
02897 
02898         ZFS_ENTER(zfsvfs);
02899         ZFS_VERIFY_ZP(zp);
02900 
02901         zilog = zfsvfs->z_log;
02902 
02903         /*
02904          * Make sure that if we have ephemeral uid/gid or xvattr specified
02905          * that file system is at proper version level
02906          */
02907 
02908         if (zfsvfs->z_use_fuids == B_FALSE &&
02909             (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
02910             ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
02911             (mask & AT_XVATTR))) {
02912                 ZFS_EXIT(zfsvfs);
02913                 return (EINVAL);
02914         }
02915 
02916         if (mask & AT_SIZE && vp->v_type == VDIR) {
02917                 ZFS_EXIT(zfsvfs);
02918                 return (EISDIR);
02919         }
02920 
02921         if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
02922                 ZFS_EXIT(zfsvfs);
02923                 return (EINVAL);
02924         }
02925 
02926         /*
02927          * If this is an xvattr_t, then get a pointer to the structure of
02928          * optional attributes.  If this is NULL, then we have a vattr_t.
02929          */
02930         xoap = xva_getxoptattr(xvap);
02931 
02932         xva_init(&tmpxvattr);
02933 
02934         /*
02935          * Immutable files can only alter immutable bit and atime
02936          */
02937         if ((zp->z_pflags & ZFS_IMMUTABLE) &&
02938             ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
02939             ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
02940                 ZFS_EXIT(zfsvfs);
02941                 return (EPERM);
02942         }
02943 
02944         if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
02945                 ZFS_EXIT(zfsvfs);
02946                 return (EPERM);
02947         }
02948 
02949         /*
02950          * Verify timestamps doesn't overflow 32 bits.
02951          * ZFS can handle large timestamps, but 32bit syscalls can't
02952          * handle times greater than 2039.  This check should be removed
02953          * once large timestamps are fully supported.
02954          */
02955         if (mask & (AT_ATIME | AT_MTIME)) {
02956                 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
02957                     ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
02958                         ZFS_EXIT(zfsvfs);
02959                         return (EOVERFLOW);
02960                 }
02961         }
02962 
02963 top:
02964         attrzp = NULL;
02965         aclp = NULL;
02966 
02967         /* Can this be moved to before the top label? */
02968         if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
02969                 ZFS_EXIT(zfsvfs);
02970                 return (EROFS);
02971         }
02972 
02973         /*
02974          * First validate permissions
02975          */
02976 
02977         if (mask & AT_SIZE) {
02978                 /*
02979                  * XXX - Note, we are not providing any open
02980                  * mode flags here (like FNDELAY), so we may
02981                  * block if there are locks present... this
02982                  * should be addressed in openat().
02983                  */
02984                 /* XXX - would it be OK to generate a log record here? */
02985                 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
02986                 if (err) {
02987                         ZFS_EXIT(zfsvfs);
02988                         return (err);
02989                 }
02990         }
02991 
02992         if (mask & (AT_ATIME|AT_MTIME) ||
02993             ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
02994             XVA_ISSET_REQ(xvap, XAT_READONLY) ||
02995             XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
02996             XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
02997             XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
02998             XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
02999             XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
03000                 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
03001                     skipaclchk, cr);
03002         }
03003 
03004         if (mask & (AT_UID|AT_GID)) {
03005                 int     idmask = (mask & (AT_UID|AT_GID));
03006                 int     take_owner;
03007                 int     take_group;
03008 
03009                 /*
03010                  * NOTE: even if a new mode is being set,
03011                  * we may clear S_ISUID/S_ISGID bits.
03012                  */
03013 
03014                 if (!(mask & AT_MODE))
03015                         vap->va_mode = zp->z_mode;
03016 
03017                 /*
03018                  * Take ownership or chgrp to group we are a member of
03019                  */
03020 
03021                 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
03022                 take_group = (mask & AT_GID) &&
03023                     zfs_groupmember(zfsvfs, vap->va_gid, cr);
03024 
03025                 /*
03026                  * If both AT_UID and AT_GID are set then take_owner and
03027                  * take_group must both be set in order to allow taking
03028                  * ownership.
03029                  *
03030                  * Otherwise, send the check through secpolicy_vnode_setattr()
03031                  *
03032                  */
03033 
03034                 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
03035                     ((idmask == AT_UID) && take_owner) ||
03036                     ((idmask == AT_GID) && take_group)) {
03037                         if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
03038                             skipaclchk, cr) == 0) {
03039                                 /*
03040                                  * Remove setuid/setgid for non-privileged users
03041                                  */
03042                                 secpolicy_setid_clear(vap, vp, cr);
03043                                 trim_mask = (mask & (AT_UID|AT_GID));
03044                         } else {
03045                                 need_policy =  TRUE;
03046                         }
03047                 } else {
03048                         need_policy =  TRUE;
03049                 }
03050         }
03051 
03052         mutex_enter(&zp->z_lock);
03053         oldva.va_mode = zp->z_mode;
03054         zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
03055         if (mask & AT_XVATTR) {
03056                 /*
03057                  * Update xvattr mask to include only those attributes
03058                  * that are actually changing.
03059                  *
03060                  * the bits will be restored prior to actually setting
03061                  * the attributes so the caller thinks they were set.
03062                  */
03063                 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
03064                         if (xoap->xoa_appendonly !=
03065                             ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
03066                                 need_policy = TRUE;
03067                         } else {
03068                                 XVA_CLR_REQ(xvap, XAT_APPENDONLY);
03069                                 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
03070                         }
03071                 }
03072 
03073                 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
03074                         if (xoap->xoa_nounlink !=
03075                             ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
03076                                 need_policy = TRUE;
03077                         } else {
03078                                 XVA_CLR_REQ(xvap, XAT_NOUNLINK);
03079                                 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
03080                         }
03081                 }
03082 
03083                 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
03084                         if (xoap->xoa_immutable !=
03085                             ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
03086                                 need_policy = TRUE;
03087                         } else {
03088                                 XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
03089                                 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
03090                         }
03091                 }
03092 
03093                 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
03094                         if (xoap->xoa_nodump !=
03095                             ((zp->z_pflags & ZFS_NODUMP) != 0)) {
03096                                 need_policy = TRUE;
03097                         } else {
03098                                 XVA_CLR_REQ(xvap, XAT_NODUMP);
03099                                 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
03100                         }
03101                 }
03102 
03103                 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
03104                         if (xoap->xoa_av_modified !=
03105                             ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
03106                                 need_policy = TRUE;
03107                         } else {
03108                                 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
03109                                 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
03110                         }
03111                 }
03112 
03113                 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
03114                         if ((vp->v_type != VREG &&
03115                             xoap->xoa_av_quarantined) ||
03116                             xoap->xoa_av_quarantined !=
03117                             ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
03118                                 need_policy = TRUE;
03119                         } else {
03120                                 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
03121                                 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
03122                         }
03123                 }
03124 
03125                 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
03126                         mutex_exit(&zp->z_lock);
03127                         ZFS_EXIT(zfsvfs);
03128                         return (EPERM);
03129                 }
03130 
03131                 if (need_policy == FALSE &&
03132                     (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
03133                     XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
03134                         need_policy = TRUE;
03135                 }
03136         }
03137 
03138         mutex_exit(&zp->z_lock);
03139 
03140         if (mask & AT_MODE) {
03141                 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
03142                         err = secpolicy_setid_setsticky_clear(vp, vap,
03143                             &oldva, cr);
03144                         if (err) {
03145                                 ZFS_EXIT(zfsvfs);
03146                                 return (err);
03147                         }
03148                         trim_mask |= AT_MODE;
03149                 } else {
03150                         need_policy = TRUE;
03151                 }
03152         }
03153 
03154         if (need_policy) {
03155                 /*
03156                  * If trim_mask is set then take ownership
03157                  * has been granted or write_acl is present and user
03158                  * has the ability to modify mode.  In that case remove
03159                  * UID|GID and or MODE from mask so that
03160                  * secpolicy_vnode_setattr() doesn't revoke it.
03161                  */
03162 
03163                 if (trim_mask) {
03164                         saved_mask = vap->va_mask;
03165                         vap->va_mask &= ~trim_mask;
03166                         if (trim_mask & AT_MODE) {
03167                                 /*
03168                                  * Save the mode, as secpolicy_vnode_setattr()
03169                                  * will overwrite it with ova.va_mode.
03170                                  */
03171                                 saved_mode = vap->va_mode;
03172                         }
03173                 }
03174                 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
03175                     (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
03176                 if (err) {
03177                         ZFS_EXIT(zfsvfs);
03178                         return (err);
03179                 }
03180 
03181                 if (trim_mask) {
03182                         vap->va_mask |= saved_mask;
03183                         if (trim_mask & AT_MODE) {
03184                                 /*
03185                                  * Recover the mode after
03186                                  * secpolicy_vnode_setattr().
03187                                  */
03188                                 vap->va_mode = saved_mode;
03189                         }
03190                 }
03191         }
03192 
03193         /*
03194          * secpolicy_vnode_setattr, or take ownership may have
03195          * changed va_mask
03196          */
03197         mask = vap->va_mask;
03198 
03199         if ((mask & (AT_UID | AT_GID))) {
03200                 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
03201                     &xattr_obj, sizeof (xattr_obj));
03202 
03203                 if (err == 0 && xattr_obj) {
03204                         err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
03205                         if (err)
03206                                 goto out2;
03207                 }
03208                 if (mask & AT_UID) {
03209                         new_uid = zfs_fuid_create(zfsvfs,
03210                             (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
03211                         if (new_uid != zp->z_uid &&
03212                             zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) {
03213                                 if (attrzp)
03214                                         VN_RELE(ZTOV(attrzp));
03215                                 err = EDQUOT;
03216                                 goto out2;
03217                         }
03218                 }
03219 
03220                 if (mask & AT_GID) {
03221                         new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
03222                             cr, ZFS_GROUP, &fuidp);
03223                         if (new_gid != zp->z_gid &&
03224                             zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) {
03225                                 if (attrzp)
03226                                         VN_RELE(ZTOV(attrzp));
03227                                 err = EDQUOT;
03228                                 goto out2;
03229                         }
03230                 }
03231         }
03232         tx = dmu_tx_create(zfsvfs->z_os);
03233 
03234         if (mask & AT_MODE) {
03235                 uint64_t pmode = zp->z_mode;
03236                 uint64_t acl_obj;
03237                 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
03238 
03239                 if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
03240                         goto out;
03241 
03242                 mutex_enter(&zp->z_lock);
03243                 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
03244                         /*
03245                          * Are we upgrading ACL from old V0 format
03246                          * to V1 format?
03247                          */
03248                         if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
03249                             zfs_znode_acl_version(zp) ==
03250                             ZFS_ACL_VERSION_INITIAL) {
03251                                 dmu_tx_hold_free(tx, acl_obj, 0,
03252                                     DMU_OBJECT_END);
03253                                 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
03254                                     0, aclp->z_acl_bytes);
03255                         } else {
03256                                 dmu_tx_hold_write(tx, acl_obj, 0,
03257                                     aclp->z_acl_bytes);
03258                         }
03259                 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
03260                         dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
03261                             0, aclp->z_acl_bytes);
03262                 }
03263                 mutex_exit(&zp->z_lock);
03264                 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
03265         } else {
03266                 if ((mask & AT_XVATTR) &&
03267                     XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
03268                         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
03269                 else
03270                         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
03271         }
03272 
03273         if (attrzp) {
03274                 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
03275         }
03276 
03277         fuid_dirtied = zfsvfs->z_fuid_dirty;
03278         if (fuid_dirtied)
03279                 zfs_fuid_txhold(zfsvfs, tx);
03280 
03281         zfs_sa_upgrade_txholds(tx, zp);
03282 
03283         err = dmu_tx_assign(tx, TXG_NOWAIT);
03284         if (err) {
03285                 if (err == ERESTART)
03286                         dmu_tx_wait(tx);
03287                 goto out;
03288         }
03289 
03290         count = 0;
03291         /*
03292          * Set each attribute requested.
03293          * We group settings according to the locks they need to acquire.
03294          *
03295          * Note: you cannot set ctime directly, although it will be
03296          * updated as a side-effect of calling this function.
03297          */
03298 
03299 
03300         if (mask & (AT_UID|AT_GID|AT_MODE))
03301                 mutex_enter(&zp->z_acl_lock);
03302         mutex_enter(&zp->z_lock);
03303 
03304         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
03305             &zp->z_pflags, sizeof (zp->z_pflags));
03306 
03307         if (attrzp) {
03308                 if (mask & (AT_UID|AT_GID|AT_MODE))
03309                         mutex_enter(&attrzp->z_acl_lock);
03310                 mutex_enter(&attrzp->z_lock);
03311                 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
03312                     SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
03313                     sizeof (attrzp->z_pflags));
03314         }
03315 
03316         if (mask & (AT_UID|AT_GID)) {
03317 
03318                 if (mask & AT_UID) {
03319                         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
03320                             &new_uid, sizeof (new_uid));
03321                         zp->z_uid = new_uid;
03322                         if (attrzp) {
03323                                 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
03324                                     SA_ZPL_UID(zfsvfs), NULL, &new_uid,
03325                                     sizeof (new_uid));
03326                                 attrzp->z_uid = new_uid;
03327                         }
03328                 }
03329 
03330                 if (mask & AT_GID) {
03331                         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
03332                             NULL, &new_gid, sizeof (new_gid));
03333                         zp->z_gid = new_gid;
03334                         if (attrzp) {
03335                                 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
03336                                     SA_ZPL_GID(zfsvfs), NULL, &new_gid,
03337                                     sizeof (new_gid));
03338                                 attrzp->z_gid = new_gid;
03339                         }
03340                 }
03341                 if (!(mask & AT_MODE)) {
03342                         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
03343                             NULL, &new_mode, sizeof (new_mode));
03344                         new_mode = zp->z_mode;
03345                 }
03346                 err = zfs_acl_chown_setattr(zp);
03347                 ASSERT(err == 0);
03348                 if (attrzp) {
03349                         err = zfs_acl_chown_setattr(attrzp);
03350                         ASSERT(err == 0);
03351                 }
03352         }
03353 
03354         if (mask & AT_MODE) {
03355                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
03356                     &new_mode, sizeof (new_mode));
03357                 zp->z_mode = new_mode;
03358                 ASSERT3U((uintptr_t)aclp, !=, 0);
03359                 err = zfs_aclset_common(zp, aclp, cr, tx);
03360                 ASSERT0(err);
03361                 if (zp->z_acl_cached)
03362                         zfs_acl_free(zp->z_acl_cached);
03363                 zp->z_acl_cached = aclp;
03364                 aclp = NULL;
03365         }
03366 
03367 
03368         if (mask & AT_ATIME) {
03369                 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
03370                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
03371                     &zp->z_atime, sizeof (zp->z_atime));
03372         }
03373 
03374         if (mask & AT_MTIME) {
03375                 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
03376                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
03377                     mtime, sizeof (mtime));
03378         }
03379 
03380         /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
03381         if (mask & AT_SIZE && !(mask & AT_MTIME)) {
03382                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
03383                     NULL, mtime, sizeof (mtime));
03384                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
03385                     &ctime, sizeof (ctime));
03386                 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
03387                     B_TRUE);
03388         } else if (mask != 0) {
03389                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
03390                     &ctime, sizeof (ctime));
03391                 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
03392                     B_TRUE);
03393                 if (attrzp) {
03394                         SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
03395                             SA_ZPL_CTIME(zfsvfs), NULL,
03396                             &ctime, sizeof (ctime));
03397                         zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
03398                             mtime, ctime, B_TRUE);
03399                 }
03400         }
03401         /*
03402          * Do this after setting timestamps to prevent timestamp
03403          * update from toggling bit
03404          */
03405 
03406         if (xoap && (mask & AT_XVATTR)) {
03407 
03408                 /*
03409                  * restore trimmed off masks
03410                  * so that return masks can be set for caller.
03411                  */
03412 
03413                 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
03414                         XVA_SET_REQ(xvap, XAT_APPENDONLY);
03415                 }
03416                 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
03417                         XVA_SET_REQ(xvap, XAT_NOUNLINK);
03418                 }
03419                 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
03420                         XVA_SET_REQ(xvap, XAT_IMMUTABLE);
03421                 }
03422                 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
03423                         XVA_SET_REQ(xvap, XAT_NODUMP);
03424                 }
03425                 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
03426                         XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
03427                 }
03428                 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
03429                         XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
03430                 }
03431 
03432                 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
03433                         ASSERT(vp->v_type == VREG);
03434 
03435                 zfs_xvattr_set(zp, xvap, tx);
03436         }
03437 
03438         if (fuid_dirtied)
03439                 zfs_fuid_sync(zfsvfs, tx);
03440 
03441         if (mask != 0)
03442                 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
03443 
03444         mutex_exit(&zp->z_lock);
03445         if (mask & (AT_UID|AT_GID|AT_MODE))
03446                 mutex_exit(&zp->z_acl_lock);
03447 
03448         if (attrzp) {
03449                 if (mask & (AT_UID|AT_GID|AT_MODE))
03450                         mutex_exit(&attrzp->z_acl_lock);
03451                 mutex_exit(&attrzp->z_lock);
03452         }
03453 out:
03454         if (err == 0 && attrzp) {
03455                 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
03456                     xattr_count, tx);
03457                 ASSERT(err2 == 0);
03458         }
03459 
03460         if (attrzp)
03461                 VN_RELE(ZTOV(attrzp));
03462         if (aclp)
03463                 zfs_acl_free(aclp);
03464 
03465         if (fuidp) {
03466                 zfs_fuid_info_free(fuidp);
03467                 fuidp = NULL;
03468         }
03469 
03470         if (err) {
03471                 dmu_tx_abort(tx);
03472                 if (err == ERESTART)
03473                         goto top;
03474         } else {
03475                 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
03476                 dmu_tx_commit(tx);
03477         }
03478 
03479 out2:
03480         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
03481                 zil_commit(zilog, 0);
03482 
03483         ZFS_EXIT(zfsvfs);
03484         return (err);
03485 }
03486 
03487 typedef struct zfs_zlock {
03488         krwlock_t       *zl_rwlock;     
03489         znode_t         *zl_znode;      
03490         struct zfs_zlock *zl_next;      
03491 } zfs_zlock_t;
03492 
03496 static void
03497 zfs_rename_unlock(zfs_zlock_t **zlpp)
03498 {
03499         zfs_zlock_t *zl;
03500 
03501         while ((zl = *zlpp) != NULL) {
03502                 if (zl->zl_znode != NULL)
03503                         VN_RELE(ZTOV(zl->zl_znode));
03504                 rw_exit(zl->zl_rwlock);
03505                 *zlpp = zl->zl_next;
03506                 kmem_free(zl, sizeof (*zl));
03507         }
03508 }
03509 
03516 static int
03517 zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
03518 {
03519         zfs_zlock_t     *zl;
03520         znode_t         *zp = tdzp;
03521         uint64_t        rootid = zp->z_zfsvfs->z_root;
03522         uint64_t        oidp = zp->z_id;
03523         krwlock_t       *rwlp = &szp->z_parent_lock;
03524         krw_t           rw = RW_WRITER;
03525 
03526         /*
03527          * First pass write-locks szp and compares to zp->z_id.
03528          * Later passes read-lock zp and compare to zp->z_parent.
03529          */
03530         do {
03531                 if (!rw_tryenter(rwlp, rw)) {
03532                         /*
03533                          * Another thread is renaming in this path.
03534                          * Note that if we are a WRITER, we don't have any
03535                          * parent_locks held yet.
03536                          */
03537                         if (rw == RW_READER && zp->z_id > szp->z_id) {
03538                                 /*
03539                                  * Drop our locks and restart
03540                                  */
03541                                 zfs_rename_unlock(&zl);
03542                                 *zlpp = NULL;
03543                                 zp = tdzp;
03544                                 oidp = zp->z_id;
03545                                 rwlp = &szp->z_parent_lock;
03546                                 rw = RW_WRITER;
03547                                 continue;
03548                         } else {
03549                                 /*
03550                                  * Wait for other thread to drop its locks
03551                                  */
03552                                 rw_enter(rwlp, rw);
03553                         }
03554                 }
03555 
03556                 zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
03557                 zl->zl_rwlock = rwlp;
03558                 zl->zl_znode = NULL;
03559                 zl->zl_next = *zlpp;
03560                 *zlpp = zl;
03561 
03562                 if (oidp == szp->z_id)          /* We're a descendant of szp */
03563                         return (EINVAL);
03564 
03565                 if (oidp == rootid)             /* We've hit the top */
03566                         return (0);
03567 
03568                 if (rw == RW_READER) {          /* i.e. not the first pass */
03569                         int error = zfs_zget(zp->z_zfsvfs, oidp, &zp);
03570                         if (error)
03571                                 return (error);
03572                         zl->zl_znode = zp;
03573                 }
03574                 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs),
03575                     &oidp, sizeof (oidp));
03576                 rwlp = &zp->z_parent_lock;
03577                 rw = RW_READER;
03578 
03579         } while (zp->z_id != sdzp->z_id);
03580 
03581         return (0);
03582 }
03583 
03603 /*ARGSUSED*/
03604 static int
03605 zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr,
03606     caller_context_t *ct, int flags)
03607 {
03608         znode_t         *tdzp, *szp, *tzp;
03609         znode_t         *sdzp = VTOZ(sdvp);
03610         zfsvfs_t        *zfsvfs = sdzp->z_zfsvfs;
03611         zilog_t         *zilog;
03612         vnode_t         *realvp;
03613         zfs_dirlock_t   *sdl, *tdl;
03614         dmu_tx_t        *tx;
03615         zfs_zlock_t     *zl;
03616         int             cmp, serr, terr;
03617         int             error = 0;
03618         int             zflg = 0;
03619 
03620         ZFS_ENTER(zfsvfs);
03621         ZFS_VERIFY_ZP(sdzp);
03622         zilog = zfsvfs->z_log;
03623 
03624         /*
03625          * Make sure we have the real vp for the target directory.
03626          */
03627         if (VOP_REALVP(tdvp, &realvp, ct) == 0)
03628                 tdvp = realvp;
03629 
03630         if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) {
03631                 ZFS_EXIT(zfsvfs);
03632                 return (EXDEV);
03633         }
03634 
03635         tdzp = VTOZ(tdvp);
03636         ZFS_VERIFY_ZP(tdzp);
03637         if (zfsvfs->z_utf8 && u8_validate(tnm,
03638             strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
03639                 ZFS_EXIT(zfsvfs);
03640                 return (EILSEQ);
03641         }
03642 
03643         if (flags & FIGNORECASE)
03644                 zflg |= ZCILOOK;
03645 
03646 top:
03647         szp = NULL;
03648         tzp = NULL;
03649         zl = NULL;
03650 
03651         /*
03652          * This is to prevent the creation of links into attribute space
03653          * by renaming a linked file into/outof an attribute directory.
03654          * See the comment in zfs_link() for why this is considered bad.
03655          */
03656         if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
03657                 ZFS_EXIT(zfsvfs);
03658                 return (EINVAL);
03659         }
03660 
03661         /*
03662          * Lock source and target directory entries.  To prevent deadlock,
03663          * a lock ordering must be defined.  We lock the directory with
03664          * the smallest object id first, or if it's a tie, the one with
03665          * the lexically first name.
03666          */
03667         if (sdzp->z_id < tdzp->z_id) {
03668                 cmp = -1;
03669         } else if (sdzp->z_id > tdzp->z_id) {
03670                 cmp = 1;
03671         } else {
03672                 /*
03673                  * First compare the two name arguments without
03674                  * considering any case folding.
03675                  */
03676                 int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
03677 
03678                 cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
03679                 ASSERT(error == 0 || !zfsvfs->z_utf8);
03680                 if (cmp == 0) {
03681                         /*
03682                          * POSIX: "If the old argument and the new argument
03683                          * both refer to links to the same existing file,
03684                          * the rename() function shall return successfully
03685                          * and perform no other action."
03686                          */
03687                         ZFS_EXIT(zfsvfs);
03688                         return (0);
03689                 }
03690                 /*
03691                  * If the file system is case-folding, then we may
03692                  * have some more checking to do.  A case-folding file
03693                  * system is either supporting mixed case sensitivity
03694                  * access or is completely case-insensitive.  Note
03695                  * that the file system is always case preserving.
03696                  *
03697                  * In mixed sensitivity mode case sensitive behavior
03698                  * is the default.  FIGNORECASE must be used to
03699                  * explicitly request case insensitive behavior.
03700                  *
03701                  * If the source and target names provided differ only
03702                  * by case (e.g., a request to rename 'tim' to 'Tim'),
03703                  * we will treat this as a special case in the
03704                  * case-insensitive mode: as long as the source name
03705                  * is an exact match, we will allow this to proceed as
03706                  * a name-change request.
03707                  */
03708                 if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
03709                     (zfsvfs->z_case == ZFS_CASE_MIXED &&
03710                     flags & FIGNORECASE)) &&
03711                     u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
03712                     &error) == 0) {
03713                         /*
03714                          * case preserving rename request, require exact
03715                          * name matches
03716                          */
03717                         zflg |= ZCIEXACT;
03718                         zflg &= ~ZCILOOK;
03719                 }
03720         }
03721 
03722         /*
03723          * If the source and destination directories are the same, we should
03724          * grab the z_name_lock of that directory only once.
03725          */
03726         if (sdzp == tdzp) {
03727                 zflg |= ZHAVELOCK;
03728                 rw_enter(&sdzp->z_name_lock, RW_READER);
03729         }
03730 
03731         if (cmp < 0) {
03732                 serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
03733                     ZEXISTS | zflg, NULL, NULL);
03734                 terr = zfs_dirent_lock(&tdl,
03735                     tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
03736         } else {
03737                 terr = zfs_dirent_lock(&tdl,
03738                     tdzp, tnm, &tzp, zflg, NULL, NULL);
03739                 serr = zfs_dirent_lock(&sdl,
03740                     sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
03741                     NULL, NULL);
03742         }
03743 
03744         if (serr) {
03745                 /*
03746                  * Source entry invalid or not there.
03747                  */
03748                 if (!terr) {
03749                         zfs_dirent_unlock(tdl);
03750                         if (tzp)
03751                                 VN_RELE(ZTOV(tzp));
03752                 }
03753 
03754                 if (sdzp == tdzp)
03755                         rw_exit(&sdzp->z_name_lock);
03756 
03757                 /*
03758                  * FreeBSD: In OpenSolaris they only check if rename source is
03759                  * ".." here, because "." is handled in their lookup. This is
03760                  * not the case for FreeBSD, so we check for "." explicitly.
03761                  */
03762                 if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0)
03763                         serr = EINVAL;
03764                 ZFS_EXIT(zfsvfs);
03765                 return (serr);
03766         }
03767         if (terr) {
03768                 zfs_dirent_unlock(sdl);
03769                 VN_RELE(ZTOV(szp));
03770 
03771                 if (sdzp == tdzp)
03772                         rw_exit(&sdzp->z_name_lock);
03773 
03774                 if (strcmp(tnm, "..") == 0)
03775                         terr = EINVAL;
03776                 ZFS_EXIT(zfsvfs);
03777                 return (terr);
03778         }
03779 
03780         /*
03781          * Must have write access at the source to remove the old entry
03782          * and write access at the target to create the new entry.
03783          * Note that if target and source are the same, this can be
03784          * done in a single check.
03785          */
03786 
03787         if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))
03788                 goto out;
03789 
03790         if (ZTOV(szp)->v_type == VDIR) {
03791                 /*
03792                  * Check to make sure rename is valid.
03793                  * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
03794                  */
03795                 if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl))
03796                         goto out;
03797         }
03798 
03799         /*
03800          * Does target exist?
03801          */
03802         if (tzp) {
03803                 /*
03804                  * Source and target must be the same type.
03805                  */
03806                 if (ZTOV(szp)->v_type == VDIR) {
03807                         if (ZTOV(tzp)->v_type != VDIR) {
03808                                 error = ENOTDIR;
03809                                 goto out;
03810                         }
03811                 } else {
03812                         if (ZTOV(tzp)->v_type == VDIR) {
03813                                 error = EISDIR;
03814                                 goto out;
03815                         }
03816                 }
03817                 /*
03818                  * POSIX dictates that when the source and target
03819                  * entries refer to the same file object, rename
03820                  * must do nothing and exit without error.
03821                  */
03822                 if (szp->z_id == tzp->z_id) {
03823                         error = 0;
03824                         goto out;
03825                 }
03826         }
03827 
03828         vnevent_rename_src(ZTOV(szp), sdvp, snm, ct);
03829         if (tzp)
03830                 vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct);
03831 
03832         /*
03833          * notify the target directory if it is not the same
03834          * as source directory.
03835          */
03836         if (tdvp != sdvp) {
03837                 vnevent_rename_dest_dir(tdvp, ct);
03838         }
03839 
03840         tx = dmu_tx_create(zfsvfs->z_os);
03841         dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
03842         dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
03843         dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
03844         dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
03845         if (sdzp != tdzp) {
03846                 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
03847                 zfs_sa_upgrade_txholds(tx, tdzp);
03848         }
03849         if (tzp) {
03850                 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
03851                 zfs_sa_upgrade_txholds(tx, tzp);
03852         }
03853 
03854         zfs_sa_upgrade_txholds(tx, szp);
03855         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
03856         error = dmu_tx_assign(tx, TXG_NOWAIT);
03857         if (error) {
03858                 if (zl != NULL)
03859                         zfs_rename_unlock(&zl);
03860                 zfs_dirent_unlock(sdl);
03861                 zfs_dirent_unlock(tdl);
03862 
03863                 if (sdzp == tdzp)
03864                         rw_exit(&sdzp->z_name_lock);
03865 
03866                 VN_RELE(ZTOV(szp));
03867                 if (tzp)
03868                         VN_RELE(ZTOV(tzp));
03869                 if (error == ERESTART) {
03870                         dmu_tx_wait(tx);
03871                         dmu_tx_abort(tx);
03872                         goto top;
03873                 }
03874                 dmu_tx_abort(tx);
03875                 ZFS_EXIT(zfsvfs);
03876                 return (error);
03877         }
03878 
03879         if (tzp)        /* Attempt to remove the existing target */
03880                 error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
03881 
03882         if (error == 0) {
03883                 error = zfs_link_create(tdl, szp, tx, ZRENAMING);
03884                 if (error == 0) {
03885                         szp->z_pflags |= ZFS_AV_MODIFIED;
03886 
03887                         error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
03888                             (void *)&szp->z_pflags, sizeof (uint64_t), tx);
03889                         ASSERT0(error);
03890 
03891                         error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
03892                         if (error == 0) {
03893                                 zfs_log_rename(zilog, tx, TX_RENAME |
03894                                     (flags & FIGNORECASE ? TX_CI : 0), sdzp,
03895                                     sdl->dl_name, tdzp, tdl->dl_name, szp);
03896 
03897                                 /*
03898                                  * Update path information for the target vnode
03899                                  */
03900                                 vn_renamepath(tdvp, ZTOV(szp), tnm,
03901                                     strlen(tnm));
03902                         } else {
03903                                 /*
03904                                  * At this point, we have successfully created
03905                                  * the target name, but have failed to remove
03906                                  * the source name.  Since the create was done
03907                                  * with the ZRENAMING flag, there are
03908                                  * complications; for one, the link count is
03909                                  * wrong.  The easiest way to deal with this
03910                                  * is to remove the newly created target, and
03911                                  * return the original error.  This must
03912                                  * succeed; fortunately, it is very unlikely to
03913                                  * fail, since we just created it.
03914                                  */
03915                                 VERIFY3U(zfs_link_destroy(tdl, szp, tx,
03916                                     ZRENAMING, NULL), ==, 0);
03917                         }
03918                 }
03919 #ifdef FREEBSD_NAMECACHE
03920                 if (error == 0) {
03921                         cache_purge(sdvp);
03922                         cache_purge(tdvp);
03923                         cache_purge(ZTOV(szp));
03924                         if (tzp)
03925                                 cache_purge(ZTOV(tzp));
03926                 }
03927 #endif
03928         }
03929 
03930         dmu_tx_commit(tx);
03931 out:
03932         if (zl != NULL)
03933                 zfs_rename_unlock(&zl);
03934 
03935         zfs_dirent_unlock(sdl);
03936         zfs_dirent_unlock(tdl);
03937 
03938         if (sdzp == tdzp)
03939                 rw_exit(&sdzp->z_name_lock);
03940 
03941 
03942         VN_RELE(ZTOV(szp));
03943         if (tzp)
03944                 VN_RELE(ZTOV(tzp));
03945 
03946         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
03947                 zil_commit(zilog, 0);
03948 
03949         ZFS_EXIT(zfsvfs);
03950 
03951         return (error);
03952 }
03953 
03969 /*ARGSUSED*/
03970 static int
03971 zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link,
03972     cred_t *cr, kthread_t *td)
03973 {
03974         znode_t         *zp, *dzp = VTOZ(dvp);
03975         zfs_dirlock_t   *dl;
03976         dmu_tx_t        *tx;
03977         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
03978         zilog_t         *zilog;
03979         uint64_t        len = strlen(link);
03980         int             error;
03981         int             zflg = ZNEW;
03982         zfs_acl_ids_t   acl_ids;
03983         boolean_t       fuid_dirtied;
03984         uint64_t        txtype = TX_SYMLINK;
03985         int             flags = 0;      /* Case flags */
03986 
03987         ASSERT(vap->va_type == VLNK);
03988 
03989         ZFS_ENTER(zfsvfs);
03990         ZFS_VERIFY_ZP(dzp);
03991         zilog = zfsvfs->z_log;
03992 
03993         if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
03994             NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
03995                 ZFS_EXIT(zfsvfs);
03996                 return (EILSEQ);
03997         }
03998         if (flags & FIGNORECASE)
03999                 zflg |= ZCILOOK;
04000 
04001         if (len > MAXPATHLEN) {
04002                 ZFS_EXIT(zfsvfs);
04003                 return (ENAMETOOLONG);
04004         }
04005 
04006         if ((error = zfs_acl_ids_create(dzp, 0,
04007             vap, cr, NULL, &acl_ids)) != 0) {
04008                 ZFS_EXIT(zfsvfs);
04009                 return (error);
04010         }
04011 top:
04012         /*
04013          * Attempt to lock directory; fail if entry already exists.
04014          */
04015         error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
04016         if (error) {
04017                 zfs_acl_ids_free(&acl_ids);
04018                 ZFS_EXIT(zfsvfs);
04019                 return (error);
04020         }
04021 
04022         if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
04023                 zfs_acl_ids_free(&acl_ids);
04024                 zfs_dirent_unlock(dl);
04025                 ZFS_EXIT(zfsvfs);
04026                 return (error);
04027         }
04028 
04029         if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
04030                 zfs_acl_ids_free(&acl_ids);
04031                 zfs_dirent_unlock(dl);
04032                 ZFS_EXIT(zfsvfs);
04033                 return (EDQUOT);
04034         }
04035         tx = dmu_tx_create(zfsvfs->z_os);
04036         fuid_dirtied = zfsvfs->z_fuid_dirty;
04037         dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
04038         dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
04039         dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
04040             ZFS_SA_BASE_ATTR_SIZE + len);
04041         dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
04042         if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
04043                 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
04044                     acl_ids.z_aclp->z_acl_bytes);
04045         }
04046         if (fuid_dirtied)
04047                 zfs_fuid_txhold(zfsvfs, tx);
04048         error = dmu_tx_assign(tx, TXG_NOWAIT);
04049         if (error) {
04050                 zfs_dirent_unlock(dl);
04051                 if (error == ERESTART) {
04052                         dmu_tx_wait(tx);
04053                         dmu_tx_abort(tx);
04054                         goto top;
04055                 }
04056                 zfs_acl_ids_free(&acl_ids);
04057                 dmu_tx_abort(tx);
04058                 ZFS_EXIT(zfsvfs);
04059                 return (error);
04060         }
04061 
04062         /*
04063          * Create a new object for the symlink.
04064          * for version 4 ZPL datsets the symlink will be an SA attribute
04065          */
04066         zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
04067 
04068         if (fuid_dirtied)
04069                 zfs_fuid_sync(zfsvfs, tx);
04070 
04071         mutex_enter(&zp->z_lock);
04072         if (zp->z_is_sa)
04073                 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
04074                     link, len, tx);
04075         else
04076                 zfs_sa_symlink(zp, link, len, tx);
04077         mutex_exit(&zp->z_lock);
04078 
04079         zp->z_size = len;
04080         (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
04081             &zp->z_size, sizeof (zp->z_size), tx);
04082         /*
04083          * Insert the new object into the directory.
04084          */
04085         (void) zfs_link_create(dl, zp, tx, ZNEW);
04086 
04087         if (flags & FIGNORECASE)
04088                 txtype |= TX_CI;
04089         zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
04090         *vpp = ZTOV(zp);
04091 
04092         zfs_acl_ids_free(&acl_ids);
04093 
04094         dmu_tx_commit(tx);
04095 
04096         zfs_dirent_unlock(dl);
04097 
04098         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
04099                 zil_commit(zilog, 0);
04100 
04101         ZFS_EXIT(zfsvfs);
04102         return (error);
04103 }
04104 
04120 /* ARGSUSED */
04121 static int
04122 zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
04123 {
04124         znode_t         *zp = VTOZ(vp);
04125         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
04126         int             error;
04127 
04128         ZFS_ENTER(zfsvfs);
04129         ZFS_VERIFY_ZP(zp);
04130 
04131         mutex_enter(&zp->z_lock);
04132         if (zp->z_is_sa)
04133                 error = sa_lookup_uio(zp->z_sa_hdl,
04134                     SA_ZPL_SYMLINK(zfsvfs), uio);
04135         else
04136                 error = zfs_sa_readlink(zp, uio);
04137         mutex_exit(&zp->z_lock);
04138 
04139         ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
04140 
04141         ZFS_EXIT(zfsvfs);
04142         return (error);
04143 }
04144 
04161 /* ARGSUSED */
04162 static int
04163 zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
04164     caller_context_t *ct, int flags)
04165 {
04166         znode_t         *dzp = VTOZ(tdvp);
04167         znode_t         *tzp, *szp;
04168         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
04169         zilog_t         *zilog;
04170         zfs_dirlock_t   *dl;
04171         dmu_tx_t        *tx;
04172         vnode_t         *realvp;
04173         int             error;
04174         int             zf = ZNEW;
04175         uint64_t        parent;
04176         uid_t           owner;
04177 
04178         ASSERT(tdvp->v_type == VDIR);
04179 
04180         ZFS_ENTER(zfsvfs);
04181         ZFS_VERIFY_ZP(dzp);
04182         zilog = zfsvfs->z_log;
04183 
04184         if (VOP_REALVP(svp, &realvp, ct) == 0)
04185                 svp = realvp;
04186 
04187         /*
04188          * POSIX dictates that we return EPERM here.
04189          * Better choices include ENOTSUP or EISDIR.
04190          */
04191         if (svp->v_type == VDIR) {
04192                 ZFS_EXIT(zfsvfs);
04193                 return (EPERM);
04194         }
04195 
04196         if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) {
04197                 ZFS_EXIT(zfsvfs);
04198                 return (EXDEV);
04199         }
04200 
04201         szp = VTOZ(svp);
04202         ZFS_VERIFY_ZP(szp);
04203 
04204         /* Prevent links to .zfs/shares files */
04205 
04206         if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
04207             &parent, sizeof (uint64_t))) != 0) {
04208                 ZFS_EXIT(zfsvfs);
04209                 return (error);
04210         }
04211         if (parent == zfsvfs->z_shares_dir) {
04212                 ZFS_EXIT(zfsvfs);
04213                 return (EPERM);
04214         }
04215 
04216         if (zfsvfs->z_utf8 && u8_validate(name,
04217             strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
04218                 ZFS_EXIT(zfsvfs);
04219                 return (EILSEQ);
04220         }
04221         if (flags & FIGNORECASE)
04222                 zf |= ZCILOOK;
04223 
04224         /*
04225          * We do not support links between attributes and non-attributes
04226          * because of the potential security risk of creating links
04227          * into "normal" file space in order to circumvent restrictions
04228          * imposed in attribute space.
04229          */
04230         if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
04231                 ZFS_EXIT(zfsvfs);
04232                 return (EINVAL);
04233         }
04234 
04235 
04236         owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
04237         if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) {
04238                 ZFS_EXIT(zfsvfs);
04239                 return (EPERM);
04240         }
04241 
04242         if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
04243                 ZFS_EXIT(zfsvfs);
04244                 return (error);
04245         }
04246 
04247 top:
04248         /*
04249          * Attempt to lock directory; fail if entry already exists.
04250          */
04251         error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL);
04252         if (error) {
04253                 ZFS_EXIT(zfsvfs);
04254                 return (error);
04255         }
04256 
04257         tx = dmu_tx_create(zfsvfs->z_os);
04258         dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
04259         dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
04260         zfs_sa_upgrade_txholds(tx, szp);
04261         zfs_sa_upgrade_txholds(tx, dzp);
04262         error = dmu_tx_assign(tx, TXG_NOWAIT);
04263         if (error) {
04264                 zfs_dirent_unlock(dl);
04265                 if (error == ERESTART) {
04266                         dmu_tx_wait(tx);
04267                         dmu_tx_abort(tx);
04268                         goto top;
04269                 }
04270                 dmu_tx_abort(tx);
04271                 ZFS_EXIT(zfsvfs);
04272                 return (error);
04273         }
04274 
04275         error = zfs_link_create(dl, szp, tx, 0);
04276 
04277         if (error == 0) {
04278                 uint64_t txtype = TX_LINK;
04279                 if (flags & FIGNORECASE)
04280                         txtype |= TX_CI;
04281                 zfs_log_link(zilog, tx, txtype, dzp, szp, name);
04282         }
04283 
04284         dmu_tx_commit(tx);
04285 
04286         zfs_dirent_unlock(dl);
04287 
04288         if (error == 0) {
04289                 vnevent_link(svp, ct);
04290         }
04291 
04292         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
04293                 zil_commit(zilog, 0);
04294 
04295         ZFS_EXIT(zfsvfs);
04296         return (error);
04297 }
04298 
04299 #ifdef sun
04300 
04304 /* ARGSUSED */
04305 static int
04306 zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
04307                 size_t *lenp, int flags, cred_t *cr)
04308 {
04309         pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR);
04310         return (0);
04311 }
04312 
04329 /* ARGSUSED */
04330 static int
04331 zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
04332     size_t *lenp, int flags, cred_t *cr)
04333 {
04334         znode_t         *zp = VTOZ(vp);
04335         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
04336         dmu_tx_t        *tx;
04337         u_offset_t      off, koff;
04338         size_t          len, klen;
04339         int             err;
04340 
04341         off = pp->p_offset;
04342         len = PAGESIZE;
04343         /*
04344          * If our blocksize is bigger than the page size, try to kluster
04345          * multiple pages so that we write a full block (thus avoiding
04346          * a read-modify-write).
04347          */
04348         if (off < zp->z_size && zp->z_blksz > PAGESIZE) {
04349                 klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE);
04350                 koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0;
04351                 ASSERT(koff <= zp->z_size);
04352                 if (koff + klen > zp->z_size)
04353                         klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE);
04354                 pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags);
04355         }
04356         ASSERT3U(btop(len), ==, btopr(len));
04357 
04358         /*
04359          * Can't push pages past end-of-file.
04360          */
04361         if (off >= zp->z_size) {
04362                 /* ignore all pages */
04363                 err = 0;
04364                 goto out;
04365         } else if (off + len > zp->z_size) {
04366                 int npages = btopr(zp->z_size - off);
04367                 page_t *trunc;
04368 
04369                 page_list_break(&pp, &trunc, npages);
04370                 /* ignore pages past end of file */
04371                 if (trunc)
04372                         pvn_write_done(trunc, flags);
04373                 len = zp->z_size - off;
04374         }
04375 
04376         if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
04377             zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
04378                 err = EDQUOT;
04379                 goto out;
04380         }
04381 top:
04382         tx = dmu_tx_create(zfsvfs->z_os);
04383         dmu_tx_hold_write(tx, zp->z_id, off, len);
04384 
04385         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
04386         zfs_sa_upgrade_txholds(tx, zp);
04387         err = dmu_tx_assign(tx, TXG_NOWAIT);
04388         if (err != 0) {
04389                 if (err == ERESTART) {
04390                         dmu_tx_wait(tx);
04391                         dmu_tx_abort(tx);
04392                         goto top;
04393                 }
04394                 dmu_tx_abort(tx);
04395                 goto out;
04396         }
04397 
04398         if (zp->z_blksz <= PAGESIZE) {
04399                 caddr_t va = zfs_map_page(pp, S_READ);
04400                 ASSERT3U(len, <=, PAGESIZE);
04401                 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx);
04402                 zfs_unmap_page(pp, va);
04403         } else {
04404                 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx);
04405         }
04406 
04407         if (err == 0) {
04408                 uint64_t mtime[2], ctime[2];
04409                 sa_bulk_attr_t bulk[3];
04410                 int count = 0;
04411 
04412                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
04413                     &mtime, 16);
04414                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
04415                     &ctime, 16);
04416                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
04417                     &zp->z_pflags, 8);
04418                 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
04419                     B_TRUE);
04420                 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
04421         }
04422         dmu_tx_commit(tx);
04423 
04424 out:
04425         pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
04426         if (offp)
04427                 *offp = off;
04428         if (lenp)
04429                 *lenp = len;
04430 
04431         return (err);
04432 }
04433 
04451 /*ARGSUSED*/
04452 static int
04453 zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
04454     caller_context_t *ct)
04455 {
04456         znode_t         *zp = VTOZ(vp);
04457         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
04458         page_t          *pp;
04459         size_t          io_len;
04460         u_offset_t      io_off;
04461         uint_t          blksz;
04462         rl_t            *rl;
04463         int             error = 0;
04464 
04465         ZFS_ENTER(zfsvfs);
04466         ZFS_VERIFY_ZP(zp);
04467 
04468         /*
04469          * Align this request to the file block size in case we kluster.
04470          * XXX - this can result in pretty aggresive locking, which can
04471          * impact simultanious read/write access.  One option might be
04472          * to break up long requests (len == 0) into block-by-block
04473          * operations to get narrower locking.
04474          */
04475         blksz = zp->z_blksz;
04476         if (ISP2(blksz))
04477                 io_off = P2ALIGN_TYPED(off, blksz, u_offset_t);
04478         else
04479                 io_off = 0;
04480         if (len > 0 && ISP2(blksz))
04481                 io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t);
04482         else
04483                 io_len = 0;
04484 
04485         if (io_len == 0) {
04486                 /*
04487                  * Search the entire vp list for pages >= io_off.
04488                  */
04489                 rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER);
04490                 error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr);
04491                 goto out;
04492         }
04493         rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER);
04494 
04495         if (off > zp->z_size) {
04496                 /* past end of file */
04497                 zfs_range_unlock(rl);
04498                 ZFS_EXIT(zfsvfs);
04499                 return (0);
04500         }
04501 
04502         len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off);
04503 
04504         for (off = io_off; io_off < off + len; io_off += io_len) {
04505                 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
04506                         pp = page_lookup(vp, io_off,
04507                             (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED);
04508                 } else {
04509                         pp = page_lookup_nowait(vp, io_off,
04510                             (flags & B_FREE) ? SE_EXCL : SE_SHARED);
04511                 }
04512 
04513                 if (pp != NULL && pvn_getdirty(pp, flags)) {
04514                         int err;
04515 
04516                         /*
04517                          * Found a dirty page to push
04518                          */
04519                         err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr);
04520                         if (err)
04521                                 error = err;
04522                 } else {
04523                         io_len = PAGESIZE;
04524                 }
04525         }
04526 out:
04527         zfs_range_unlock(rl);
04528         if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
04529                 zil_commit(zfsvfs->z_log, zp->z_id);
04530         ZFS_EXIT(zfsvfs);
04531         return (error);
04532 }
04533 #endif  /* sun */
04534 
04535 /*ARGSUSED*/
04536 void
04537 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
04538 {
04539         znode_t *zp = VTOZ(vp);
04540         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
04541         int error;
04542 
04543         rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
04544         if (zp->z_sa_hdl == NULL) {
04545                 /*
04546                  * The fs has been unmounted, or we did a
04547                  * suspend/resume and this file no longer exists.
04548                  */
04549                 VI_LOCK(vp);
04550                 ASSERT(vp->v_count <= 1);
04551                 vp->v_count = 0;
04552                 VI_UNLOCK(vp);
04553                 vrecycle(vp);
04554                 rw_exit(&zfsvfs->z_teardown_inactive_lock);
04555                 return;
04556         }
04557 
04558         if (zp->z_atime_dirty && zp->z_unlinked == 0) {
04559                 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
04560 
04561                 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
04562                 zfs_sa_upgrade_txholds(tx, zp);
04563                 error = dmu_tx_assign(tx, TXG_WAIT);
04564                 if (error) {
04565                         dmu_tx_abort(tx);
04566                 } else {
04567                         mutex_enter(&zp->z_lock);
04568                         (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
04569                             (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
04570                         zp->z_atime_dirty = 0;
04571                         mutex_exit(&zp->z_lock);
04572                         dmu_tx_commit(tx);
04573                 }
04574         }
04575 
04576         zfs_zinactive(zp);
04577         rw_exit(&zfsvfs->z_teardown_inactive_lock);
04578 }
04579 
04580 #ifdef sun
04581 
04592 /* ARGSUSED */
04593 static int
04594 zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
04595     caller_context_t *ct)
04596 {
04597         if (vp->v_type == VDIR)
04598                 return (0);
04599         return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
04600 }
04601 
04606 static int
04607 zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
04608     flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
04609 {
04610         znode_t *zp = VTOZ(vp);
04611         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
04612 
04613         ZFS_ENTER(zfsvfs);
04614         ZFS_VERIFY_ZP(zp);
04615 
04616         /*
04617          * We are following the UFS semantics with respect to mapcnt
04618          * here: If we see that the file is mapped already, then we will
04619          * return an error, but we don't worry about races between this
04620          * function and zfs_map().
04621          */
04622         if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) {
04623                 ZFS_EXIT(zfsvfs);
04624                 return (EAGAIN);
04625         }
04626         ZFS_EXIT(zfsvfs);
04627         return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
04628 }
04629 
04637 static int
04638 zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
04639     caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw)
04640 {
04641         znode_t *zp = VTOZ(vp);
04642         page_t *pp, *cur_pp;
04643         objset_t *os = zp->z_zfsvfs->z_os;
04644         u_offset_t io_off, total;
04645         size_t io_len;
04646         int err;
04647 
04648         if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) {
04649                 /*
04650                  * We only have a single page, don't bother klustering
04651                  */
04652                 io_off = off;
04653                 io_len = PAGESIZE;
04654                 pp = page_create_va(vp, io_off, io_len,
04655                     PG_EXCL | PG_WAIT, seg, addr);
04656         } else {
04657                 /*
04658                  * Try to find enough pages to fill the page list
04659                  */
04660                 pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
04661                     &io_len, off, plsz, 0);
04662         }
04663         if (pp == NULL) {
04664                 /*
04665                  * The page already exists, nothing to do here.
04666                  */
04667                 *pl = NULL;
04668                 return (0);
04669         }
04670 
04671         /*
04672          * Fill the pages in the kluster.
04673          */
04674         cur_pp = pp;
04675         for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
04676                 caddr_t va;
04677 
04678                 ASSERT3U(io_off, ==, cur_pp->p_offset);
04679                 va = zfs_map_page(cur_pp, S_WRITE);
04680                 err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
04681                     DMU_READ_PREFETCH);
04682                 zfs_unmap_page(cur_pp, va);
04683                 if (err) {
04684                         /* On error, toss the entire kluster */
04685                         pvn_read_done(pp, B_ERROR);
04686                         /* convert checksum errors into IO errors */
04687                         if (err == ECKSUM)
04688                                 err = EIO;
04689                         return (err);
04690                 }
04691                 cur_pp = cur_pp->p_next;
04692         }
04693 
04694         /*
04695          * Fill in the page list array from the kluster starting
04696          * from the desired offset `off'.
04697          * NOTE: the page list will always be null terminated.
04698          */
04699         pvn_plist_init(pp, pl, plsz, off, io_len, rw);
04700         ASSERT(pl == NULL || (*pl)->p_offset == off);
04701 
04702         return (0);
04703 }
04704 
04731 /* ARGSUSED */
04732 static int
04733 zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
04734     page_t *pl, size_t plsz, struct seg *seg, caddr_t addr,
04735     enum seg_rw rw, cred_t *cr, caller_context_t *ct)
04736 {
04737         znode_t         *zp = VTOZ(vp);
04738         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
04739         page_t          **pl0 = pl;
04740         int             err = 0;
04741 
04742         /* we do our own caching, faultahead is unnecessary */
04743         if (pl == NULL)
04744                 return (0);
04745         else if (len > plsz)
04746                 len = plsz;
04747         else
04748                 len = P2ROUNDUP(len, PAGESIZE);
04749         ASSERT(plsz >= len);
04750 
04751         ZFS_ENTER(zfsvfs);
04752         ZFS_VERIFY_ZP(zp);
04753 
04754         if (protp)
04755                 *protp = PROT_ALL;
04756 
04757         /*
04758          * Loop through the requested range [off, off + len) looking
04759          * for pages.  If we don't find a page, we will need to create
04760          * a new page and fill it with data from the file.
04761          */
04762         while (len > 0) {
04763                 if (*pl = page_lookup(vp, off, SE_SHARED))
04764                         *(pl+1) = NULL;
04765                 else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw))
04766                         goto out;
04767                 while (*pl) {
04768                         ASSERT3U((*pl)->p_offset, ==, off);
04769                         off += PAGESIZE;
04770                         addr += PAGESIZE;
04771                         if (len > 0) {
04772                                 ASSERT3U(len, >=, PAGESIZE);
04773                                 len -= PAGESIZE;
04774                         }
04775                         ASSERT3U(plsz, >=, PAGESIZE);
04776                         plsz -= PAGESIZE;
04777                         pl++;
04778                 }
04779         }
04780 
04781         /*
04782          * Fill out the page array with any pages already in the cache.
04783          */
04784         while (plsz > 0 &&
04785             (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) {
04786                         off += PAGESIZE;
04787                         plsz -= PAGESIZE;
04788         }
04789 out:
04790         if (err) {
04791                 /*
04792                  * Release any pages we have previously locked.
04793                  */
04794                 while (pl > pl0)
04795                         page_unlock(*--pl);
04796         } else {
04797                 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
04798         }
04799 
04800         *pl = NULL;
04801 
04802         ZFS_EXIT(zfsvfs);
04803         return (err);
04804 }
04805 
04815 /*ARGSUSED*/
04816 static int
04817 zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
04818     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
04819     caller_context_t *ct)
04820 {
04821         znode_t *zp = VTOZ(vp);
04822         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
04823         segvn_crargs_t  vn_a;
04824         int             error;
04825 
04826         ZFS_ENTER(zfsvfs);
04827         ZFS_VERIFY_ZP(zp);
04828 
04829         if ((prot & PROT_WRITE) && (zp->z_pflags &
04830             (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
04831                 ZFS_EXIT(zfsvfs);
04832                 return (EPERM);
04833         }
04834 
04835         if ((prot & (PROT_READ | PROT_EXEC)) &&
04836             (zp->z_pflags & ZFS_AV_QUARANTINED)) {
04837                 ZFS_EXIT(zfsvfs);
04838                 return (EACCES);
04839         }
04840 
04841         if (vp->v_flag & VNOMAP) {
04842                 ZFS_EXIT(zfsvfs);
04843                 return (ENOSYS);
04844         }
04845 
04846         if (off < 0 || len > MAXOFFSET_T - off) {
04847                 ZFS_EXIT(zfsvfs);
04848                 return (ENXIO);
04849         }
04850 
04851         if (vp->v_type != VREG) {
04852                 ZFS_EXIT(zfsvfs);
04853                 return (ENODEV);
04854         }
04855 
04856         /*
04857          * If file is locked, disallow mapping.
04858          */
04859         if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) {
04860                 ZFS_EXIT(zfsvfs);
04861                 return (EAGAIN);
04862         }
04863 
04864         as_rangelock(as);
04865         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
04866         if (error != 0) {
04867                 as_rangeunlock(as);
04868                 ZFS_EXIT(zfsvfs);
04869                 return (error);
04870         }
04871 
04872         vn_a.vp = vp;
04873         vn_a.offset = (u_offset_t)off;
04874         vn_a.type = flags & MAP_TYPE;
04875         vn_a.prot = prot;
04876         vn_a.maxprot = maxprot;
04877         vn_a.cred = cr;
04878         vn_a.amp = NULL;
04879         vn_a.flags = flags & ~MAP_TYPE;
04880         vn_a.szc = 0;
04881         vn_a.lgrp_mem_policy_flags = 0;
04882 
04883         error = as_map(as, *addrp, len, segvn_create, &vn_a);
04884 
04885         as_rangeunlock(as);
04886         ZFS_EXIT(zfsvfs);
04887         return (error);
04888 }
04889 
04890 /* ARGSUSED */
04891 static int
04892 zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
04893     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
04894     caller_context_t *ct)
04895 {
04896         uint64_t pages = btopr(len);
04897 
04898         atomic_add_64(&VTOZ(vp)->z_mapcnt, pages);
04899         return (0);
04900 }
04901 
04922 /* ARGSUSED */
04923 static int
04924 zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
04925     size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
04926     caller_context_t *ct)
04927 {
04928         uint64_t pages = btopr(len);
04929 
04930         ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages);
04931         atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages);
04932 
04933         if ((flags & MAP_SHARED) && (prot & PROT_WRITE) &&
04934             vn_has_cached_data(vp))
04935                 (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct);
04936 
04937         return (0);
04938 }
04939 
04960 /* ARGSUSED */
04961 static int
04962 zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag,
04963     offset_t offset, cred_t *cr, caller_context_t *ct)
04964 {
04965         znode_t         *zp = VTOZ(vp);
04966         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
04967         uint64_t        off, len;
04968         int             error;
04969 
04970         ZFS_ENTER(zfsvfs);
04971         ZFS_VERIFY_ZP(zp);
04972 
04973         if (cmd != F_FREESP) {
04974                 ZFS_EXIT(zfsvfs);
04975                 return (EINVAL);
04976         }
04977 
04978         if (error = convoff(vp, bfp, 0, offset)) {
04979                 ZFS_EXIT(zfsvfs);
04980                 return (error);
04981         }
04982 
04983         if (bfp->l_len < 0) {
04984                 ZFS_EXIT(zfsvfs);
04985                 return (EINVAL);
04986         }
04987 
04988         off = bfp->l_start;
04989         len = bfp->l_len; /* 0 means from off to end of file */
04990 
04991         error = zfs_freesp(zp, off, len, flag, TRUE);
04992 
04993         ZFS_EXIT(zfsvfs);
04994         return (error);
04995 }
04996 #endif  /* sun */
04997 
04998 CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid));
04999 CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid));
05000 
05001 /*ARGSUSED*/
05002 static int
05003 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
05004 {
05005         znode_t         *zp = VTOZ(vp);
05006         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
05007         uint32_t        gen;
05008         uint64_t        gen64;
05009         uint64_t        object = zp->z_id;
05010         zfid_short_t    *zfid;
05011         int             size, i, error;
05012 
05013         ZFS_ENTER(zfsvfs);
05014         ZFS_VERIFY_ZP(zp);
05015 
05016         if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
05017             &gen64, sizeof (uint64_t))) != 0) {
05018                 ZFS_EXIT(zfsvfs);
05019                 return (error);
05020         }
05021 
05022         gen = (uint32_t)gen64;
05023 
05024         size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
05025         fidp->fid_len = size;
05026 
05027         zfid = (zfid_short_t *)fidp;
05028 
05029         zfid->zf_len = size;
05030 
05031         for (i = 0; i < sizeof (zfid->zf_object); i++)
05032                 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
05033 
05034         /* Must have a non-zero generation number to distinguish from .zfs */
05035         if (gen == 0)
05036                 gen = 1;
05037         for (i = 0; i < sizeof (zfid->zf_gen); i++)
05038                 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
05039 
05040         if (size == LONG_FID_LEN) {
05041                 uint64_t        objsetid = dmu_objset_id(zfsvfs->z_os);
05042                 zfid_long_t     *zlfid;
05043 
05044                 zlfid = (zfid_long_t *)fidp;
05045 
05046                 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
05047                         zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
05048 
05049                 /* XXX - this should be the generation number for the objset */
05050                 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
05051                         zlfid->zf_setgen[i] = 0;
05052         }
05053 
05054         ZFS_EXIT(zfsvfs);
05055         return (0);
05056 }
05057 
05058 static int
05059 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
05060     caller_context_t *ct)
05061 {
05062         znode_t         *zp, *xzp;
05063         zfsvfs_t        *zfsvfs;
05064         zfs_dirlock_t   *dl;
05065         int             error;
05066 
05067         switch (cmd) {
05068         case _PC_LINK_MAX:
05069                 *valp = INT_MAX;
05070                 return (0);
05071 
05072         case _PC_FILESIZEBITS:
05073                 *valp = 64;
05074                 return (0);
05075 #ifdef sun
05076         case _PC_XATTR_EXISTS:
05077                 zp = VTOZ(vp);
05078                 zfsvfs = zp->z_zfsvfs;
05079                 ZFS_ENTER(zfsvfs);
05080                 ZFS_VERIFY_ZP(zp);
05081                 *valp = 0;
05082                 error = zfs_dirent_lock(&dl, zp, "", &xzp,
05083                     ZXATTR | ZEXISTS | ZSHARED, NULL, NULL);
05084                 if (error == 0) {
05085                         zfs_dirent_unlock(dl);
05086                         if (!zfs_dirempty(xzp))
05087                                 *valp = 1;
05088                         VN_RELE(ZTOV(xzp));
05089                 } else if (error == ENOENT) {
05090                         /*
05091                          * If there aren't extended attributes, it's the
05092                          * same as having zero of them.
05093                          */
05094                         error = 0;
05095                 }
05096                 ZFS_EXIT(zfsvfs);
05097                 return (error);
05098 
05099         case _PC_SATTR_ENABLED:
05100         case _PC_SATTR_EXISTS:
05101                 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
05102                     (vp->v_type == VREG || vp->v_type == VDIR);
05103                 return (0);
05104 
05105         case _PC_ACCESS_FILTERING:
05106                 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) &&
05107                     vp->v_type == VDIR;
05108                 return (0);
05109 
05110         case _PC_ACL_ENABLED:
05111                 *valp = _ACL_ACE_ENABLED;
05112                 return (0);
05113 #endif  /* sun */
05114         case _PC_MIN_HOLE_SIZE:
05115                 *valp = (int)SPA_MINBLOCKSIZE;
05116                 return (0);
05117 #ifdef sun
05118         case _PC_TIMESTAMP_RESOLUTION:
05119                 /* nanosecond timestamp resolution */
05120                 *valp = 1L;
05121                 return (0);
05122 #endif  /* sun */
05123         case _PC_ACL_EXTENDED:
05124                 *valp = 0;
05125                 return (0);
05126 
05127         case _PC_ACL_NFS4:
05128                 *valp = 1;
05129                 return (0);
05130 
05131         case _PC_ACL_PATH_MAX:
05132                 *valp = ACL_MAX_ENTRIES;
05133                 return (0);
05134 
05135         default:
05136                 return (EOPNOTSUPP);
05137         }
05138 }
05139 
05140 /*ARGSUSED*/
05141 static int
05142 zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
05143     caller_context_t *ct)
05144 {
05145         znode_t *zp = VTOZ(vp);
05146         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
05147         int error;
05148         boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
05149 
05150         ZFS_ENTER(zfsvfs);
05151         ZFS_VERIFY_ZP(zp);
05152         error = zfs_getacl(zp, vsecp, skipaclchk, cr);
05153         ZFS_EXIT(zfsvfs);
05154 
05155         return (error);
05156 }
05157 
05158 /*ARGSUSED*/
05159 int
05160 zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
05161     caller_context_t *ct)
05162 {
05163         znode_t *zp = VTOZ(vp);
05164         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
05165         int error;
05166         boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
05167         zilog_t *zilog = zfsvfs->z_log;
05168 
05169         ZFS_ENTER(zfsvfs);
05170         ZFS_VERIFY_ZP(zp);
05171 
05172         error = zfs_setacl(zp, vsecp, skipaclchk, cr);
05173 
05174         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
05175                 zil_commit(zilog, 0);
05176 
05177         ZFS_EXIT(zfsvfs);
05178         return (error);
05179 }
05180 
05181 #ifdef sun
05182 
05188 int zcr_blksz_min = (1 << 10);  /* 1K */
05196 int zcr_blksz_max = (1 << 17);  /* 128K */
05197 
05198 /*ARGSUSED*/
05199 static int
05200 zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr,
05201     caller_context_t *ct)
05202 {
05203         znode_t *zp = VTOZ(vp);
05204         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
05205         int max_blksz = zfsvfs->z_max_blksz;
05206         uio_t *uio = &xuio->xu_uio;
05207         ssize_t size = uio->uio_resid;
05208         offset_t offset = uio->uio_loffset;
05209         int blksz;
05210         int fullblk, i;
05211         arc_buf_t *abuf;
05212         ssize_t maxsize;
05213         int preamble, postamble;
05214 
05215         if (xuio->xu_type != UIOTYPE_ZEROCOPY)
05216                 return (EINVAL);
05217 
05218         ZFS_ENTER(zfsvfs);
05219         ZFS_VERIFY_ZP(zp);
05220         switch (ioflag) {
05221         case UIO_WRITE:
05222                 /*
05223                  * Loan out an arc_buf for write if write size is bigger than
05224                  * max_blksz, and the file's block size is also max_blksz.
05225                  */
05226                 blksz = max_blksz;
05227                 if (size < blksz || zp->z_blksz != blksz) {
05228                         ZFS_EXIT(zfsvfs);
05229                         return (EINVAL);
05230                 }
05231                 /*
05232                  * Caller requests buffers for write before knowing where the
05233                  * write offset might be (e.g. NFS TCP write).
05234                  */
05235                 if (offset == -1) {
05236                         preamble = 0;
05237                 } else {
05238                         preamble = P2PHASE(offset, blksz);
05239                         if (preamble) {
05240                                 preamble = blksz - preamble;
05241                                 size -= preamble;
05242                         }
05243                 }
05244 
05245                 postamble = P2PHASE(size, blksz);
05246                 size -= postamble;
05247 
05248                 fullblk = size / blksz;
05249                 (void) dmu_xuio_init(xuio,
05250                     (preamble != 0) + fullblk + (postamble != 0));
05251                 DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble,
05252                     int, postamble, int,
05253                     (preamble != 0) + fullblk + (postamble != 0));
05254 
05255                 /*
05256                  * Have to fix iov base/len for partial buffers.  They
05257                  * currently represent full arc_buf's.
05258                  */
05259                 if (preamble) {
05260                         /* data begins in the middle of the arc_buf */
05261                         abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
05262                             blksz);
05263                         ASSERT(abuf);
05264                         (void) dmu_xuio_add(xuio, abuf,
05265                             blksz - preamble, preamble);
05266                 }
05267 
05268                 for (i = 0; i < fullblk; i++) {
05269                         abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
05270                             blksz);
05271                         ASSERT(abuf);
05272                         (void) dmu_xuio_add(xuio, abuf, 0, blksz);
05273                 }
05274 
05275                 if (postamble) {
05276                         /* data ends in the middle of the arc_buf */
05277                         abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
05278                             blksz);
05279                         ASSERT(abuf);
05280                         (void) dmu_xuio_add(xuio, abuf, 0, postamble);
05281                 }
05282                 break;
05283         case UIO_READ:
05284                 /*
05285                  * Loan out an arc_buf for read if the read size is larger than
05286                  * the current file block size.  Block alignment is not
05287                  * considered.  Partial arc_buf will be loaned out for read.
05288                  */
05289                 blksz = zp->z_blksz;
05290                 if (blksz < zcr_blksz_min)
05291                         blksz = zcr_blksz_min;
05292                 if (blksz > zcr_blksz_max)
05293                         blksz = zcr_blksz_max;
05294                 /* avoid potential complexity of dealing with it */
05295                 if (blksz > max_blksz) {
05296                         ZFS_EXIT(zfsvfs);
05297                         return (EINVAL);
05298                 }
05299 
05300                 maxsize = zp->z_size - uio->uio_loffset;
05301                 if (size > maxsize)
05302                         size = maxsize;
05303 
05304                 if (size < blksz || vn_has_cached_data(vp)) {
05305                         ZFS_EXIT(zfsvfs);
05306                         return (EINVAL);
05307                 }
05308                 break;
05309         default:
05310                 ZFS_EXIT(zfsvfs);
05311                 return (EINVAL);
05312         }
05313 
05314         uio->uio_extflg = UIO_XUIO;
05315         XUIO_XUZC_RW(xuio) = ioflag;
05316         ZFS_EXIT(zfsvfs);
05317         return (0);
05318 }
05319 
05320 /*ARGSUSED*/
05321 static int
05322 zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct)
05323 {
05324         int i;
05325         arc_buf_t *abuf;
05326         int ioflag = XUIO_XUZC_RW(xuio);
05327 
05328         ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY);
05329 
05330         i = dmu_xuio_cnt(xuio);
05331         while (i-- > 0) {
05332                 abuf = dmu_xuio_arcbuf(xuio, i);
05333                 /*
05334                  * if abuf == NULL, it must be a write buffer
05335                  * that has been returned in zfs_write().
05336                  */
05337                 if (abuf)
05338                         dmu_return_arcbuf(abuf);
05339                 ASSERT(abuf || ioflag == UIO_WRITE);
05340         }
05341 
05342         dmu_xuio_fini(xuio);
05343         return (0);
05344 }
05345 
05346 /*
05347  * Predeclare these here so that the compiler assumes that
05348  * this is an "old style" function declaration that does
05349  * not include arguments => we won't get type mismatch errors
05350  * in the initializations that follow.
05351  */
05352 static int zfs_inval();
05353 static int zfs_isdir();
05354 
05355 static int
05356 zfs_inval()
05357 {
05358         return (EINVAL);
05359 }
05360 
05361 static int
05362 zfs_isdir()
05363 {
05364         return (EISDIR);
05365 }
05369 vnodeops_t *zfs_dvnodeops;
05370 const fs_operation_def_t zfs_dvnodeops_template[] = {
05371         VOPNAME_OPEN,           { .vop_open = zfs_open },
05372         VOPNAME_CLOSE,          { .vop_close = zfs_close },
05373         VOPNAME_READ,           { .error = zfs_isdir },
05374         VOPNAME_WRITE,          { .error = zfs_isdir },
05375         VOPNAME_IOCTL,          { .vop_ioctl = zfs_ioctl },
05376         VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
05377         VOPNAME_SETATTR,        { .vop_setattr = zfs_setattr },
05378         VOPNAME_ACCESS,         { .vop_access = zfs_access },
05379         VOPNAME_LOOKUP,         { .vop_lookup = zfs_lookup },
05380         VOPNAME_CREATE,         { .vop_create = zfs_create },
05381         VOPNAME_REMOVE,         { .vop_remove = zfs_remove },
05382         VOPNAME_LINK,           { .vop_link = zfs_link },
05383         VOPNAME_RENAME,         { .vop_rename = zfs_rename },
05384         VOPNAME_MKDIR,          { .vop_mkdir = zfs_mkdir },
05385         VOPNAME_RMDIR,          { .vop_rmdir = zfs_rmdir },
05386         VOPNAME_READDIR,        { .vop_readdir = zfs_readdir },
05387         VOPNAME_SYMLINK,        { .vop_symlink = zfs_symlink },
05388         VOPNAME_FSYNC,          { .vop_fsync = zfs_fsync },
05389         VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
05390         VOPNAME_FID,            { .vop_fid = zfs_fid },
05391         VOPNAME_SEEK,           { .vop_seek = zfs_seek },
05392         VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
05393         VOPNAME_GETSECATTR,     { .vop_getsecattr = zfs_getsecattr },
05394         VOPNAME_SETSECATTR,     { .vop_setsecattr = zfs_setsecattr },
05395         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
05396         NULL,                   NULL
05397 };
05398 
05402 vnodeops_t *zfs_fvnodeops;
05403 const fs_operation_def_t zfs_fvnodeops_template[] = {
05404         VOPNAME_OPEN,           { .vop_open = zfs_open },
05405         VOPNAME_CLOSE,          { .vop_close = zfs_close },
05406         VOPNAME_READ,           { .vop_read = zfs_read },
05407         VOPNAME_WRITE,          { .vop_write = zfs_write },
05408         VOPNAME_IOCTL,          { .vop_ioctl = zfs_ioctl },
05409         VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
05410         VOPNAME_SETATTR,        { .vop_setattr = zfs_setattr },
05411         VOPNAME_ACCESS,         { .vop_access = zfs_access },
05412         VOPNAME_LOOKUP,         { .vop_lookup = zfs_lookup },
05413         VOPNAME_RENAME,         { .vop_rename = zfs_rename },
05414         VOPNAME_FSYNC,          { .vop_fsync = zfs_fsync },
05415         VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
05416         VOPNAME_FID,            { .vop_fid = zfs_fid },
05417         VOPNAME_SEEK,           { .vop_seek = zfs_seek },
05418         VOPNAME_FRLOCK,         { .vop_frlock = zfs_frlock },
05419         VOPNAME_SPACE,          { .vop_space = zfs_space },
05420         VOPNAME_GETPAGE,        { .vop_getpage = zfs_getpage },
05421         VOPNAME_PUTPAGE,        { .vop_putpage = zfs_putpage },
05422         VOPNAME_MAP,            { .vop_map = zfs_map },
05423         VOPNAME_ADDMAP,         { .vop_addmap = zfs_addmap },
05424         VOPNAME_DELMAP,         { .vop_delmap = zfs_delmap },
05425         VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
05426         VOPNAME_GETSECATTR,     { .vop_getsecattr = zfs_getsecattr },
05427         VOPNAME_SETSECATTR,     { .vop_setsecattr = zfs_setsecattr },
05428         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
05429         VOPNAME_REQZCBUF,       { .vop_reqzcbuf = zfs_reqzcbuf },
05430         VOPNAME_RETZCBUF,       { .vop_retzcbuf = zfs_retzcbuf },
05431         NULL,                   NULL
05432 };
05433 
05437 vnodeops_t *zfs_symvnodeops;
05438 const fs_operation_def_t zfs_symvnodeops_template[] = {
05439         VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
05440         VOPNAME_SETATTR,        { .vop_setattr = zfs_setattr },
05441         VOPNAME_ACCESS,         { .vop_access = zfs_access },
05442         VOPNAME_RENAME,         { .vop_rename = zfs_rename },
05443         VOPNAME_READLINK,       { .vop_readlink = zfs_readlink },
05444         VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
05445         VOPNAME_FID,            { .vop_fid = zfs_fid },
05446         VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
05447         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
05448         NULL,                   NULL
05449 };
05450 
05454 vnodeops_t *zfs_sharevnodeops;
05455 const fs_operation_def_t zfs_sharevnodeops_template[] = {
05456         VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
05457         VOPNAME_ACCESS,         { .vop_access = zfs_access },
05458         VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
05459         VOPNAME_FID,            { .vop_fid = zfs_fid },
05460         VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
05461         VOPNAME_GETSECATTR,     { .vop_getsecattr = zfs_getsecattr },
05462         VOPNAME_SETSECATTR,     { .vop_setsecattr = zfs_setsecattr },
05463         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
05464         NULL,                   NULL
05465 };
05466 
05479 vnodeops_t *zfs_xdvnodeops;
05480 const fs_operation_def_t zfs_xdvnodeops_template[] = {
05481         VOPNAME_OPEN,           { .vop_open = zfs_open },
05482         VOPNAME_CLOSE,          { .vop_close = zfs_close },
05483         VOPNAME_IOCTL,          { .vop_ioctl = zfs_ioctl },
05484         VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
05485         VOPNAME_SETATTR,        { .vop_setattr = zfs_setattr },
05486         VOPNAME_ACCESS,         { .vop_access = zfs_access },
05487         VOPNAME_LOOKUP,         { .vop_lookup = zfs_lookup },
05488         VOPNAME_CREATE,         { .vop_create = zfs_create },
05489         VOPNAME_REMOVE,         { .vop_remove = zfs_remove },
05490         VOPNAME_LINK,           { .vop_link = zfs_link },
05491         VOPNAME_RENAME,         { .vop_rename = zfs_rename },
05492         VOPNAME_MKDIR,          { .error = zfs_inval },
05493         VOPNAME_RMDIR,          { .vop_rmdir = zfs_rmdir },
05494         VOPNAME_READDIR,        { .vop_readdir = zfs_readdir },
05495         VOPNAME_SYMLINK,        { .error = zfs_inval },
05496         VOPNAME_FSYNC,          { .vop_fsync = zfs_fsync },
05497         VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
05498         VOPNAME_FID,            { .vop_fid = zfs_fid },
05499         VOPNAME_SEEK,           { .vop_seek = zfs_seek },
05500         VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
05501         VOPNAME_GETSECATTR,     { .vop_getsecattr = zfs_getsecattr },
05502         VOPNAME_SETSECATTR,     { .vop_setsecattr = zfs_setsecattr },
05503         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
05504         NULL,                   NULL
05505 };
05506 
05510 vnodeops_t *zfs_evnodeops;
05511 const fs_operation_def_t zfs_evnodeops_template[] = {
05512         VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
05513         VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
05514         NULL,                   NULL
05515 };
05516 #endif  /* sun */
05517 
05518 static int
05519 ioflags(int ioflags)
05520 {
05521         int flags = 0;
05522 
05523         if (ioflags & IO_APPEND)
05524                 flags |= FAPPEND;
05525         if (ioflags & IO_NDELAY)
05526                 flags |= FNONBLOCK;
05527         if (ioflags & IO_SYNC)
05528                 flags |= (FSYNC | FDSYNC | FRSYNC);
05529 
05530         return (flags);
05531 }
05532 
05533 static int
05534 zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage)
05535 {
05536         znode_t *zp = VTOZ(vp);
05537         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
05538         objset_t *os = zp->z_zfsvfs->z_os;
05539         vm_page_t mreq;
05540         vm_object_t object;
05541         caddr_t va;
05542         struct sf_buf *sf;
05543         int i, error;
05544         int pcount, size;
05545 
05546         ZFS_ENTER(zfsvfs);
05547         ZFS_VERIFY_ZP(zp);
05548 
05549         pcount = round_page(count) / PAGE_SIZE;
05550         mreq = m[reqpage];
05551         object = mreq->object;
05552         error = 0;
05553 
05554         KASSERT(vp->v_object == object, ("mismatching object"));
05555 
05556         VM_OBJECT_LOCK(object);
05557 
05558         for (i = 0; i < pcount; i++) {
05559                 if (i != reqpage) {
05560                         vm_page_lock(m[i]);
05561                         vm_page_free(m[i]);
05562                         vm_page_unlock(m[i]);
05563                 }
05564         }
05565 
05566         if (mreq->valid) {
05567                 if (mreq->valid != VM_PAGE_BITS_ALL)
05568                         vm_page_zero_invalid(mreq, TRUE);
05569                 VM_OBJECT_UNLOCK(object);
05570                 ZFS_EXIT(zfsvfs);
05571                 return (VM_PAGER_OK);
05572         }
05573 
05574         PCPU_INC(cnt.v_vnodein);
05575         PCPU_INC(cnt.v_vnodepgsin);
05576 
05577         if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) {
05578                 VM_OBJECT_UNLOCK(object);
05579                 ZFS_EXIT(zfsvfs);
05580                 return (VM_PAGER_BAD);
05581         }
05582 
05583         size = PAGE_SIZE;
05584         if (IDX_TO_OFF(mreq->pindex) + size > object->un_pager.vnp.vnp_size)
05585                 size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mreq->pindex);
05586 
05587         VM_OBJECT_UNLOCK(object);
05588         va = zfs_map_page(mreq, &sf);
05589         error = dmu_read(os, zp->z_id, IDX_TO_OFF(mreq->pindex),
05590             size, va, DMU_READ_PREFETCH);
05591         if (size != PAGE_SIZE)
05592                 bzero(va + size, PAGE_SIZE - size);
05593         zfs_unmap_page(sf);
05594         VM_OBJECT_LOCK(object);
05595 
05596         if (!error)
05597                 mreq->valid = VM_PAGE_BITS_ALL;
05598         KASSERT(mreq->dirty == 0, ("zfs_getpages: page %p is dirty", mreq));
05599 
05600         VM_OBJECT_UNLOCK(object);
05601 
05602         ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
05603         ZFS_EXIT(zfsvfs);
05604         return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
05605 }
05606 
05607 static int
05608 zfs_freebsd_getpages(ap)
05609         struct vop_getpages_args /* {
05610                 struct vnode *a_vp;
05611                 vm_page_t *a_m;
05612                 int a_count;
05613                 int a_reqpage;
05614                 vm_ooffset_t a_offset;
05615         } */ *ap;
05616 {
05617 
05618         return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage));
05619 }
05620 
05621 static int
05622 zfs_freebsd_open(ap)
05623         struct vop_open_args /* {
05624                 struct vnode *a_vp;
05625                 int a_mode;
05626                 struct ucred *a_cred;
05627                 struct thread *a_td;
05628         } */ *ap;
05629 {
05630         vnode_t *vp = ap->a_vp;
05631         znode_t *zp = VTOZ(vp);
05632         int error;
05633 
05634         error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL);
05635         if (error == 0)
05636                 vnode_create_vobject(vp, zp->z_size, ap->a_td);
05637         return (error);
05638 }
05639 
05640 static int
05641 zfs_freebsd_close(ap)
05642         struct vop_close_args /* {
05643                 struct vnode *a_vp;
05644                 int  a_fflag;
05645                 struct ucred *a_cred;
05646                 struct thread *a_td;
05647         } */ *ap;
05648 {
05649 
05650         return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred, NULL));
05651 }
05652 
05653 static int
05654 zfs_freebsd_ioctl(ap)
05655         struct vop_ioctl_args /* {
05656                 struct vnode *a_vp;
05657                 u_long a_command;
05658                 caddr_t a_data;
05659                 int a_fflag;
05660                 struct ucred *cred;
05661                 struct thread *td;
05662         } */ *ap;
05663 {
05664 
05665         return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
05666             ap->a_fflag, ap->a_cred, NULL, NULL));
05667 }
05668 
05669 static int
05670 zfs_freebsd_read(ap)
05671         struct vop_read_args /* {
05672                 struct vnode *a_vp;
05673                 struct uio *a_uio;
05674                 int a_ioflag;
05675                 struct ucred *a_cred;
05676         } */ *ap;
05677 {
05678 
05679         return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
05680             ap->a_cred, NULL));
05681 }
05682 
05683 static int
05684 zfs_freebsd_write(ap)
05685         struct vop_write_args /* {
05686                 struct vnode *a_vp;
05687                 struct uio *a_uio;
05688                 int a_ioflag;
05689                 struct ucred *a_cred;
05690         } */ *ap;
05691 {
05692 
05693         return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
05694             ap->a_cred, NULL));
05695 }
05696 
05697 static int
05698 zfs_freebsd_access(ap)
05699         struct vop_access_args /* {
05700                 struct vnode *a_vp;
05701                 accmode_t a_accmode;
05702                 struct ucred *a_cred;
05703                 struct thread *a_td;
05704         } */ *ap;
05705 {
05706         vnode_t *vp = ap->a_vp;
05707         znode_t *zp = VTOZ(vp);
05708         accmode_t accmode;
05709         int error = 0;
05710 
05711         /*
05712          * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
05713          */
05714         accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
05715         if (accmode != 0)
05716                 error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL);
05717 
05718         /*
05719          * VADMIN has to be handled by vaccess().
05720          */
05721         if (error == 0) {
05722                 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
05723                 if (accmode != 0) {
05724                         error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
05725                             zp->z_gid, accmode, ap->a_cred, NULL);
05726                 }
05727         }
05728 
05729         /*
05730          * For VEXEC, ensure that at least one execute bit is set for
05731          * non-directories.
05732          */
05733         if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
05734             (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
05735                 error = EACCES;
05736         }
05737 
05738         return (error);
05739 }
05740 
05741 static int
05742 zfs_freebsd_lookup(ap)
05743         struct vop_lookup_args /* {
05744                 struct vnode *a_dvp;
05745                 struct vnode **a_vpp;
05746                 struct componentname *a_cnp;
05747         } */ *ap;
05748 {
05749         struct componentname *cnp = ap->a_cnp;
05750         char nm[NAME_MAX + 1];
05751 
05752         ASSERT(cnp->cn_namelen < sizeof(nm));
05753         strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm)));
05754 
05755         return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
05756             cnp->cn_cred, cnp->cn_thread, 0));
05757 }
05758 
05759 static int
05760 zfs_freebsd_create(ap)
05761         struct vop_create_args /* {
05762                 struct vnode *a_dvp;
05763                 struct vnode **a_vpp;
05764                 struct componentname *a_cnp;
05765                 struct vattr *a_vap;
05766         } */ *ap;
05767 {
05768         struct componentname *cnp = ap->a_cnp;
05769         vattr_t *vap = ap->a_vap;
05770         int mode;
05771 
05772         ASSERT(cnp->cn_flags & SAVENAME);
05773 
05774         vattr_init_mask(vap);
05775         mode = vap->va_mode & ALLPERMS;
05776 
05777         return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode,
05778             ap->a_vpp, cnp->cn_cred, cnp->cn_thread));
05779 }
05780 
05781 static int
05782 zfs_freebsd_remove(ap)
05783         struct vop_remove_args /* {
05784                 struct vnode *a_dvp;
05785                 struct vnode *a_vp;
05786                 struct componentname *a_cnp;
05787         } */ *ap;
05788 {
05789 
05790         ASSERT(ap->a_cnp->cn_flags & SAVENAME);
05791 
05792         return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr,
05793             ap->a_cnp->cn_cred, NULL, 0));
05794 }
05795 
05796 static int
05797 zfs_freebsd_mkdir(ap)
05798         struct vop_mkdir_args /* {
05799                 struct vnode *a_dvp;
05800                 struct vnode **a_vpp;
05801                 struct componentname *a_cnp;
05802                 struct vattr *a_vap;
05803         } */ *ap;
05804 {
05805         vattr_t *vap = ap->a_vap;
05806 
05807         ASSERT(ap->a_cnp->cn_flags & SAVENAME);
05808 
05809         vattr_init_mask(vap);
05810 
05811         return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp,
05812             ap->a_cnp->cn_cred, NULL, 0, NULL));
05813 }
05814 
05815 static int
05816 zfs_freebsd_rmdir(ap)
05817         struct vop_rmdir_args /* {
05818                 struct vnode *a_dvp;
05819                 struct vnode *a_vp;
05820                 struct componentname *a_cnp;
05821         } */ *ap;
05822 {
05823         struct componentname *cnp = ap->a_cnp;
05824 
05825         ASSERT(cnp->cn_flags & SAVENAME);
05826 
05827         return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0));
05828 }
05829 
05830 static int
05831 zfs_freebsd_readdir(ap)
05832         struct vop_readdir_args /* {
05833                 struct vnode *a_vp;
05834                 struct uio *a_uio;
05835                 struct ucred *a_cred;
05836                 int *a_eofflag;
05837                 int *a_ncookies;
05838                 u_long **a_cookies;
05839         } */ *ap;
05840 {
05841 
05842         return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag,
05843             ap->a_ncookies, ap->a_cookies));
05844 }
05845 
05846 static int
05847 zfs_freebsd_fsync(ap)
05848         struct vop_fsync_args /* {
05849                 struct vnode *a_vp;
05850                 int a_waitfor;
05851                 struct thread *a_td;
05852         } */ *ap;
05853 {
05854 
05855         vop_stdfsync(ap);
05856         return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL));
05857 }
05858 
05859 static int
05860 zfs_freebsd_getattr(ap)
05861         struct vop_getattr_args /* {
05862                 struct vnode *a_vp;
05863                 struct vattr *a_vap;
05864                 struct ucred *a_cred;
05865         } */ *ap;
05866 {
05867         vattr_t *vap = ap->a_vap;
05868         xvattr_t xvap;
05869         u_long fflags = 0;
05870         int error;
05871 
05872         xva_init(&xvap);
05873         xvap.xva_vattr = *vap;
05874         xvap.xva_vattr.va_mask |= AT_XVATTR;
05875 
05876         /* Convert chflags into ZFS-type flags. */
05877         /* XXX: what about SF_SETTABLE?. */
05878         XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
05879         XVA_SET_REQ(&xvap, XAT_APPENDONLY);
05880         XVA_SET_REQ(&xvap, XAT_NOUNLINK);
05881         XVA_SET_REQ(&xvap, XAT_NODUMP);
05882         error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL);
05883         if (error != 0)
05884                 return (error);
05885 
05886         /* Convert ZFS xattr into chflags. */
05887 #define FLAG_CHECK(fflag, xflag, xfield)        do {                    \
05888         if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0)             \
05889                 fflags |= (fflag);                                      \
05890 } while (0)
05891         FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
05892             xvap.xva_xoptattrs.xoa_immutable);
05893         FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
05894             xvap.xva_xoptattrs.xoa_appendonly);
05895         FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
05896             xvap.xva_xoptattrs.xoa_nounlink);
05897         FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
05898             xvap.xva_xoptattrs.xoa_nodump);
05899 #undef  FLAG_CHECK
05900         *vap = xvap.xva_vattr;
05901         vap->va_flags = fflags;
05902         return (0);
05903 }
05904 
05905 static int
05906 zfs_freebsd_setattr(ap)
05907         struct vop_setattr_args /* {
05908                 struct vnode *a_vp;
05909                 struct vattr *a_vap;
05910                 struct ucred *a_cred;
05911         } */ *ap;
05912 {
05913         vnode_t *vp = ap->a_vp;
05914         vattr_t *vap = ap->a_vap;
05915         cred_t *cred = ap->a_cred;
05916         xvattr_t xvap;
05917         u_long fflags;
05918         uint64_t zflags;
05919 
05920         vattr_init_mask(vap);
05921         vap->va_mask &= ~AT_NOSET;
05922 
05923         xva_init(&xvap);
05924         xvap.xva_vattr = *vap;
05925 
05926         zflags = VTOZ(vp)->z_pflags;
05927 
05928         if (vap->va_flags != VNOVAL) {
05929                 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
05930                 int error;
05931 
05932                 if (zfsvfs->z_use_fuids == B_FALSE)
05933                         return (EOPNOTSUPP);
05934 
05935                 fflags = vap->va_flags;
05936                 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0)
05937                         return (EOPNOTSUPP);
05938                 /*
05939                  * Unprivileged processes are not permitted to unset system
05940                  * flags, or modify flags if any system flags are set.
05941                  * Privileged non-jail processes may not modify system flags
05942                  * if securelevel > 0 and any existing system flags are set.
05943                  * Privileged jail processes behave like privileged non-jail
05944                  * processes if the security.jail.chflags_allowed sysctl is
05945                  * is non-zero; otherwise, they behave like unprivileged
05946                  * processes.
05947                  */
05948                 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
05949                     priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0) == 0) {
05950                         if (zflags &
05951                             (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
05952                                 error = securelevel_gt(cred, 0);
05953                                 if (error != 0)
05954                                         return (error);
05955                         }
05956                 } else {
05957                         /*
05958                          * Callers may only modify the file flags on objects they
05959                          * have VADMIN rights for.
05960                          */
05961                         if ((error = VOP_ACCESS(vp, VADMIN, cred, curthread)) != 0)
05962                                 return (error);
05963                         if (zflags &
05964                             (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
05965                                 return (EPERM);
05966                         }
05967                         if (fflags &
05968                             (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
05969                                 return (EPERM);
05970                         }
05971                 }
05972 
05973 #define FLAG_CHANGE(fflag, zflag, xflag, xfield)        do {            \
05974         if (((fflags & (fflag)) && !(zflags & (zflag))) ||              \
05975             ((zflags & (zflag)) && !(fflags & (fflag)))) {              \
05976                 XVA_SET_REQ(&xvap, (xflag));                            \
05977                 (xfield) = ((fflags & (fflag)) != 0);                   \
05978         }                                                               \
05979 } while (0)
05980                 /* Convert chflags into ZFS-type flags. */
05981                 /* XXX: what about SF_SETTABLE?. */
05982                 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
05983                     xvap.xva_xoptattrs.xoa_immutable);
05984                 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
05985                     xvap.xva_xoptattrs.xoa_appendonly);
05986                 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
05987                     xvap.xva_xoptattrs.xoa_nounlink);
05988                 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
05989                     xvap.xva_xoptattrs.xoa_nodump);
05990 #undef  FLAG_CHANGE
05991         }
05992         return (zfs_setattr(vp, (vattr_t *)&xvap, 0, cred, NULL));
05993 }
05994 
05995 static int
05996 zfs_freebsd_rename(ap)
05997         struct vop_rename_args  /* {
05998                 struct vnode *a_fdvp;
05999                 struct vnode *a_fvp;
06000                 struct componentname *a_fcnp;
06001                 struct vnode *a_tdvp;
06002                 struct vnode *a_tvp;
06003                 struct componentname *a_tcnp;
06004         } */ *ap;
06005 {
06006         vnode_t *fdvp = ap->a_fdvp;
06007         vnode_t *fvp = ap->a_fvp;
06008         vnode_t *tdvp = ap->a_tdvp;
06009         vnode_t *tvp = ap->a_tvp;
06010         int error;
06011 
06012         ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
06013         ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
06014 
06015         error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp,
06016             ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0);
06017 
06018         if (tdvp == tvp)
06019                 VN_RELE(tdvp);
06020         else
06021                 VN_URELE(tdvp);
06022         if (tvp)
06023                 VN_URELE(tvp);
06024         VN_RELE(fdvp);
06025         VN_RELE(fvp);
06026 
06027         return (error);
06028 }
06029 
06030 static int
06031 zfs_freebsd_symlink(ap)
06032         struct vop_symlink_args /* {
06033                 struct vnode *a_dvp;
06034                 struct vnode **a_vpp;
06035                 struct componentname *a_cnp;
06036                 struct vattr *a_vap;
06037                 char *a_target;
06038         } */ *ap;
06039 {
06040         struct componentname *cnp = ap->a_cnp;
06041         vattr_t *vap = ap->a_vap;
06042 
06043         ASSERT(cnp->cn_flags & SAVENAME);
06044 
06045         vap->va_type = VLNK;    /* FreeBSD: Syscall only sets va_mode. */
06046         vattr_init_mask(vap);
06047 
06048         return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap,
06049             ap->a_target, cnp->cn_cred, cnp->cn_thread));
06050 }
06051 
06052 static int
06053 zfs_freebsd_readlink(ap)
06054         struct vop_readlink_args /* {
06055                 struct vnode *a_vp;
06056                 struct uio *a_uio;
06057                 struct ucred *a_cred;
06058         } */ *ap;
06059 {
06060 
06061         return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL));
06062 }
06063 
06064 static int
06065 zfs_freebsd_link(ap)
06066         struct vop_link_args /* {
06067                 struct vnode *a_tdvp;
06068                 struct vnode *a_vp;
06069                 struct componentname *a_cnp;
06070         } */ *ap;
06071 {
06072         struct componentname *cnp = ap->a_cnp;
06073 
06074         ASSERT(cnp->cn_flags & SAVENAME);
06075 
06076         return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred, NULL, 0));
06077 }
06078 
06079 static int
06080 zfs_freebsd_inactive(ap)
06081         struct vop_inactive_args /* {
06082                 struct vnode *a_vp;
06083                 struct thread *a_td;
06084         } */ *ap;
06085 {
06086         vnode_t *vp = ap->a_vp;
06087 
06088         zfs_inactive(vp, ap->a_td->td_ucred, NULL);
06089         return (0);
06090 }
06091 
06092 static void
06093 zfs_reclaim_complete(void *arg, int pending)
06094 {
06095         znode_t *zp = arg;
06096         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
06097 
06098         rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
06099         if (zp->z_sa_hdl != NULL) {
06100                 ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id);
06101                 zfs_znode_dmu_fini(zp);
06102                 ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
06103         }
06104         zfs_znode_free(zp);
06105         rw_exit(&zfsvfs->z_teardown_inactive_lock);
06106         /*
06107          * If the file system is being unmounted, there is a process waiting
06108          * for us, wake it up.
06109          */
06110         if (zfsvfs->z_unmounted)
06111                 wakeup_one(zfsvfs);
06112 }
06113 
06114 static int
06115 zfs_freebsd_reclaim(ap)
06116         struct vop_reclaim_args /* {
06117                 struct vnode *a_vp;
06118                 struct thread *a_td;
06119         } */ *ap;
06120 {
06121         vnode_t *vp = ap->a_vp;
06122         znode_t *zp = VTOZ(vp);
06123         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
06124         boolean_t rlocked;
06125 
06126         rlocked = rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
06127 
06128         ASSERT(zp != NULL);
06129 
06130         /*
06131          * Destroy the vm object and flush associated pages.
06132          */
06133         vnode_destroy_vobject(vp);
06134 
06135         mutex_enter(&zp->z_lock);
06136         zp->z_vnode = NULL;
06137         mutex_exit(&zp->z_lock);
06138 
06139         if (zp->z_unlinked) {
06140                 ;       /* Do nothing. */
06141         } else if (!rlocked) {
06142                 TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp);
06143                 taskqueue_enqueue(taskqueue_thread, &zp->z_task);
06144         } else if (zp->z_sa_hdl == NULL) {
06145                 zfs_znode_free(zp);
06146         } else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ {
06147                 int locked;
06148 
06149                 locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 :
06150                     ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id);
06151                 if (locked == 0) {
06152                         /*
06153                          * Lock can't be obtained due to deadlock possibility,
06154                          * so defer znode destruction.
06155                          */
06156                         TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp);
06157                         taskqueue_enqueue(taskqueue_thread, &zp->z_task);
06158                 } else {
06159                         zfs_znode_dmu_fini(zp);
06160                         if (locked == 1)
06161                                 ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
06162                         zfs_znode_free(zp);
06163                 }
06164         }
06165         VI_LOCK(vp);
06166         vp->v_data = NULL;
06167         ASSERT(vp->v_holdcnt >= 1);
06168         VI_UNLOCK(vp);
06169         if (rlocked)
06170                 rw_exit(&zfsvfs->z_teardown_inactive_lock);
06171         return (0);
06172 }
06173 
06174 static int
06175 zfs_freebsd_fid(ap)
06176         struct vop_fid_args /* {
06177                 struct vnode *a_vp;
06178                 struct fid *a_fid;
06179         } */ *ap;
06180 {
06181 
06182         return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
06183 }
06184 
06185 static int
06186 zfs_freebsd_pathconf(ap)
06187         struct vop_pathconf_args /* {
06188                 struct vnode *a_vp;
06189                 int a_name;
06190                 register_t *a_retval;
06191         } */ *ap;
06192 {
06193         ulong_t val;
06194         int error;
06195 
06196         error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL);
06197         if (error == 0)
06198                 *ap->a_retval = val;
06199         else if (error == EOPNOTSUPP)
06200                 error = vop_stdpathconf(ap);
06201         return (error);
06202 }
06203 
06204 static int
06205 zfs_freebsd_fifo_pathconf(ap)
06206         struct vop_pathconf_args /* {
06207                 struct vnode *a_vp;
06208                 int a_name;
06209                 register_t *a_retval;
06210         } */ *ap;
06211 {
06212 
06213         switch (ap->a_name) {
06214         case _PC_ACL_EXTENDED:
06215         case _PC_ACL_NFS4:
06216         case _PC_ACL_PATH_MAX:
06217         case _PC_MAC_PRESENT:
06218                 return (zfs_freebsd_pathconf(ap));
06219         default:
06220                 return (fifo_specops.vop_pathconf(ap));
06221         }
06222 }
06223 
06224 /*
06225  * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
06226  * extended attribute name:
06227  *
06228  *      NAMESPACE       PREFIX  
06229  *      system          freebsd:system:
06230  *      user            (none, can be used to access ZFS fsattr(5) attributes
06231  *                      created on Solaris)
06232  */
06233 static int
06234 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
06235     size_t size)
06236 {
06237         const char *namespace, *prefix, *suffix;
06238 
06239         /* We don't allow '/' character in attribute name. */
06240         if (strchr(name, '/') != NULL)
06241                 return (EINVAL);
06242         /* We don't allow attribute names that start with "freebsd:" string. */
06243         if (strncmp(name, "freebsd:", 8) == 0)
06244                 return (EINVAL);
06245 
06246         bzero(attrname, size);
06247 
06248         switch (attrnamespace) {
06249         case EXTATTR_NAMESPACE_USER:
06250 #if 0
06251                 prefix = "freebsd:";
06252                 namespace = EXTATTR_NAMESPACE_USER_STRING;
06253                 suffix = ":";
06254 #else
06255                 /*
06256                  * This is the default namespace by which we can access all
06257                  * attributes created on Solaris.
06258                  */
06259                 prefix = namespace = suffix = "";
06260 #endif
06261                 break;
06262         case EXTATTR_NAMESPACE_SYSTEM:
06263                 prefix = "freebsd:";
06264                 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
06265                 suffix = ":";
06266                 break;
06267         case EXTATTR_NAMESPACE_EMPTY:
06268         default:
06269                 return (EINVAL);
06270         }
06271         if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
06272             name) >= size) {
06273                 return (ENAMETOOLONG);
06274         }
06275         return (0);
06276 }
06277 
06281 static int
06282 zfs_getextattr(struct vop_getextattr_args *ap)
06283 /*
06284 vop_getextattr {
06285         IN struct vnode *a_vp;
06286         IN int a_attrnamespace;
06287         IN const char *a_name;
06288         INOUT struct uio *a_uio;
06289         OUT size_t *a_size;
06290         IN struct ucred *a_cred;
06291         IN struct thread *a_td;
06292 };
06293 */
06294 {
06295         zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
06296         struct thread *td = ap->a_td;
06297         struct nameidata nd;
06298         char attrname[255];
06299         struct vattr va;
06300         vnode_t *xvp = NULL, *vp;
06301         int error, flags;
06302 
06303         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
06304             ap->a_cred, ap->a_td, VREAD);
06305         if (error != 0)
06306                 return (error);
06307 
06308         error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
06309             sizeof(attrname));
06310         if (error != 0)
06311                 return (error);
06312 
06313         ZFS_ENTER(zfsvfs);
06314 
06315         error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
06316             LOOKUP_XATTR);
06317         if (error != 0) {
06318                 ZFS_EXIT(zfsvfs);
06319                 return (error);
06320         }
06321 
06322         flags = FREAD;
06323         NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
06324             xvp, td);
06325         error = vn_open_cred(&nd, &flags, 0, 0, ap->a_cred, NULL);
06326         vp = nd.ni_vp;
06327         NDFREE(&nd, NDF_ONLY_PNBUF);
06328         if (error != 0) {
06329                 ZFS_EXIT(zfsvfs);
06330                 if (error == ENOENT)
06331                         error = ENOATTR;
06332                 return (error);
06333         }
06334 
06335         if (ap->a_size != NULL) {
06336                 error = VOP_GETATTR(vp, &va, ap->a_cred);
06337                 if (error == 0)
06338                         *ap->a_size = (size_t)va.va_size;
06339         } else if (ap->a_uio != NULL)
06340                 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
06341 
06342         VOP_UNLOCK(vp, 0);
06343         vn_close(vp, flags, ap->a_cred, td);
06344         ZFS_EXIT(zfsvfs);
06345 
06346         return (error);
06347 }
06348 
06349 /*
06350  * Vnode operation to remove a named attribute.
06351  */
06352 int
06353 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
06354 /*
06355 vop_deleteextattr {
06356         IN struct vnode *a_vp;
06357         IN int a_attrnamespace;
06358         IN const char *a_name;
06359         IN struct ucred *a_cred;
06360         IN struct thread *a_td;
06361 };
06362 */
06363 {
06364         zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
06365         struct thread *td = ap->a_td;
06366         struct nameidata nd;
06367         char attrname[255];
06368         struct vattr va;
06369         vnode_t *xvp = NULL, *vp;
06370         int error, flags;
06371 
06372         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
06373             ap->a_cred, ap->a_td, VWRITE);
06374         if (error != 0)
06375                 return (error);
06376 
06377         error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
06378             sizeof(attrname));
06379         if (error != 0)
06380                 return (error);
06381 
06382         ZFS_ENTER(zfsvfs);
06383 
06384         error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
06385             LOOKUP_XATTR);
06386         if (error != 0) {
06387                 ZFS_EXIT(zfsvfs);
06388                 return (error);
06389         }
06390 
06391         NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
06392             UIO_SYSSPACE, attrname, xvp, td);
06393         error = namei(&nd);
06394         vp = nd.ni_vp;
06395         NDFREE(&nd, NDF_ONLY_PNBUF);
06396         if (error != 0) {
06397                 ZFS_EXIT(zfsvfs);
06398                 if (error == ENOENT)
06399                         error = ENOATTR;
06400                 return (error);
06401         }
06402         error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
06403 
06404         vput(nd.ni_dvp);
06405         if (vp == nd.ni_dvp)
06406                 vrele(vp);
06407         else
06408                 vput(vp);
06409         ZFS_EXIT(zfsvfs);
06410 
06411         return (error);
06412 }
06413 
06417 static int
06418 zfs_setextattr(struct vop_setextattr_args *ap)
06419 /*
06420 vop_setextattr {
06421         IN struct vnode *a_vp;
06422         IN int a_attrnamespace;
06423         IN const char *a_name;
06424         INOUT struct uio *a_uio;
06425         IN struct ucred *a_cred;
06426         IN struct thread *a_td;
06427 };
06428 */
06429 {
06430         zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
06431         struct thread *td = ap->a_td;
06432         struct nameidata nd;
06433         char attrname[255];
06434         struct vattr va;
06435         vnode_t *xvp = NULL, *vp;
06436         int error, flags;
06437 
06438         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
06439             ap->a_cred, ap->a_td, VWRITE);
06440         if (error != 0)
06441                 return (error);
06442 
06443         error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
06444             sizeof(attrname));
06445         if (error != 0)
06446                 return (error);
06447 
06448         ZFS_ENTER(zfsvfs);
06449 
06450         error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
06451             LOOKUP_XATTR | CREATE_XATTR_DIR);
06452         if (error != 0) {
06453                 ZFS_EXIT(zfsvfs);
06454                 return (error);
06455         }
06456 
06457         flags = FFLAGS(O_WRONLY | O_CREAT);
06458         NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
06459             xvp, td);
06460         error = vn_open_cred(&nd, &flags, 0600, 0, ap->a_cred, NULL);
06461         vp = nd.ni_vp;
06462         NDFREE(&nd, NDF_ONLY_PNBUF);
06463         if (error != 0) {
06464                 ZFS_EXIT(zfsvfs);
06465                 return (error);
06466         }
06467 
06468         VATTR_NULL(&va);
06469         va.va_size = 0;
06470         error = VOP_SETATTR(vp, &va, ap->a_cred);
06471         if (error == 0)
06472                 VOP_WRITE(vp, ap->a_uio, IO_UNIT | IO_SYNC, ap->a_cred);
06473 
06474         VOP_UNLOCK(vp, 0);
06475         vn_close(vp, flags, ap->a_cred, td);
06476         ZFS_EXIT(zfsvfs);
06477 
06478         return (error);
06479 }
06480 
06484 static int
06485 zfs_listextattr(struct vop_listextattr_args *ap)
06486 /*
06487 vop_listextattr {
06488         IN struct vnode *a_vp;
06489         IN int a_attrnamespace;
06490         INOUT struct uio *a_uio;
06491         OUT size_t *a_size;
06492         IN struct ucred *a_cred;
06493         IN struct thread *a_td;
06494 };
06495 */
06496 {
06497         zfsvfs_t *zfsvfs = VTOZ(ap->a_vp)->z_zfsvfs;
06498         struct thread *td = ap->a_td;
06499         struct nameidata nd;
06500         char attrprefix[16];
06501         u_char dirbuf[sizeof(struct dirent)];
06502         struct dirent *dp;
06503         struct iovec aiov;
06504         struct uio auio, *uio = ap->a_uio;
06505         size_t *sizep = ap->a_size;
06506         size_t plen;
06507         vnode_t *xvp = NULL, *vp;
06508         int done, error, eof, pos;
06509 
06510         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
06511             ap->a_cred, ap->a_td, VREAD);
06512         if (error != 0)
06513                 return (error);
06514 
06515         error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
06516             sizeof(attrprefix));
06517         if (error != 0)
06518                 return (error);
06519         plen = strlen(attrprefix);
06520 
06521         ZFS_ENTER(zfsvfs);
06522 
06523         if (sizep != NULL)
06524                 *sizep = 0;
06525 
06526         error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td,
06527             LOOKUP_XATTR);
06528         if (error != 0) {
06529                 ZFS_EXIT(zfsvfs);
06530                 /*
06531                  * ENOATTR means that the EA directory does not yet exist,
06532                  * i.e. there are no extended attributes there.
06533                  */
06534                 if (error == ENOATTR)
06535                         error = 0;
06536                 return (error);
06537         }
06538 
06539         NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
06540             UIO_SYSSPACE, ".", xvp, td);
06541         error = namei(&nd);
06542         vp = nd.ni_vp;
06543         NDFREE(&nd, NDF_ONLY_PNBUF);
06544         if (error != 0) {
06545                 ZFS_EXIT(zfsvfs);
06546                 return (error);
06547         }
06548 
06549         auio.uio_iov = &aiov;
06550         auio.uio_iovcnt = 1;
06551         auio.uio_segflg = UIO_SYSSPACE;
06552         auio.uio_td = td;
06553         auio.uio_rw = UIO_READ;
06554         auio.uio_offset = 0;
06555 
06556         do {
06557                 u_char nlen;
06558 
06559                 aiov.iov_base = (void *)dirbuf;
06560                 aiov.iov_len = sizeof(dirbuf);
06561                 auio.uio_resid = sizeof(dirbuf);
06562                 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
06563                 done = sizeof(dirbuf) - auio.uio_resid;
06564                 if (error != 0)
06565                         break;
06566                 for (pos = 0; pos < done;) {
06567                         dp = (struct dirent *)(dirbuf + pos);
06568                         pos += dp->d_reclen;
06569                         /*
06570                          * XXX: Temporarily we also accept DT_UNKNOWN, as this
06571                          * is what we get when attribute was created on Solaris.
06572                          */
06573                         if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
06574                                 continue;
06575                         if (plen == 0 && strncmp(dp->d_name, "freebsd:", 8) == 0)
06576                                 continue;
06577                         else if (strncmp(dp->d_name, attrprefix, plen) != 0)
06578                                 continue;
06579                         nlen = dp->d_namlen - plen;
06580                         if (sizep != NULL)
06581                                 *sizep += 1 + nlen;
06582                         else if (uio != NULL) {
06583                                 /*
06584                                  * Format of extattr name entry is one byte for
06585                                  * length and the rest for name.
06586                                  */
06587                                 error = uiomove(&nlen, 1, uio->uio_rw, uio);
06588                                 if (error == 0) {
06589                                         error = uiomove(dp->d_name + plen, nlen,
06590                                             uio->uio_rw, uio);
06591                                 }
06592                                 if (error != 0)
06593                                         break;
06594                         }
06595                 }
06596         } while (!eof && error == 0);
06597 
06598         vput(vp);
06599         ZFS_EXIT(zfsvfs);
06600 
06601         return (error);
06602 }
06603 
06604 int
06605 zfs_freebsd_getacl(ap)
06606         struct vop_getacl_args /* {
06607                 struct vnode *vp;
06608                 acl_type_t type;
06609                 struct acl *aclp;
06610                 struct ucred *cred;
06611                 struct thread *td;
06612         } */ *ap;
06613 {
06614         int             error;
06615         vsecattr_t      vsecattr;
06616 
06617         if (ap->a_type != ACL_TYPE_NFS4)
06618                 return (EINVAL);
06619 
06620         vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
06621         if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL))
06622                 return (error);
06623 
06624         error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt);
06625         if (vsecattr.vsa_aclentp != NULL)
06626                 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
06627 
06628         return (error);
06629 }
06630 
06631 int
06632 zfs_freebsd_setacl(ap)
06633         struct vop_setacl_args /* {
06634                 struct vnode *vp;
06635                 acl_type_t type;
06636                 struct acl *aclp;
06637                 struct ucred *cred;
06638                 struct thread *td;
06639         } */ *ap;
06640 {
06641         int             error;
06642         vsecattr_t      vsecattr;
06643         int             aclbsize;       /* size of acl list in bytes */
06644         aclent_t        *aaclp;
06645 
06646         if (ap->a_type != ACL_TYPE_NFS4)
06647                 return (EINVAL);
06648 
06649         if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
06650                 return (EINVAL);
06651 
06652         /*
06653          * With NFSv4 ACLs, chmod(2) may need to add additional entries,
06654          * splitting every entry into two and appending "canonical six"
06655          * entries at the end.  Don't allow for setting an ACL that would
06656          * cause chmod(2) to run out of ACL entries.
06657          */
06658         if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
06659                 return (ENOSPC);
06660 
06661         error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
06662         if (error != 0)
06663                 return (error);
06664 
06665         vsecattr.vsa_mask = VSA_ACE;
06666         aclbsize = ap->a_aclp->acl_cnt * sizeof(ace_t);
06667         vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
06668         aaclp = vsecattr.vsa_aclentp;
06669         vsecattr.vsa_aclentsz = aclbsize;
06670 
06671         aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
06672         error = zfs_setsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL);
06673         kmem_free(aaclp, aclbsize);
06674 
06675         return (error);
06676 }
06677 
06678 int
06679 zfs_freebsd_aclcheck(ap)
06680         struct vop_aclcheck_args /* {
06681                 struct vnode *vp;
06682                 acl_type_t type;
06683                 struct acl *aclp;
06684                 struct ucred *cred;
06685                 struct thread *td;
06686         } */ *ap;
06687 {
06688 
06689         return (EOPNOTSUPP);
06690 }
06691 
06692 struct vop_vector zfs_vnodeops;
06693 struct vop_vector zfs_fifoops;
06694 struct vop_vector zfs_shareops;
06695 
06696 struct vop_vector zfs_vnodeops = {
06697         .vop_default =          &default_vnodeops,
06698         .vop_inactive =         zfs_freebsd_inactive,
06699         .vop_reclaim =          zfs_freebsd_reclaim,
06700         .vop_access =           zfs_freebsd_access,
06701 #ifdef FREEBSD_NAMECACHE
06702         .vop_lookup =           vfs_cache_lookup,
06703         .vop_cachedlookup =     zfs_freebsd_lookup,
06704 #else
06705         .vop_lookup =           zfs_freebsd_lookup,
06706 #endif
06707         .vop_getattr =          zfs_freebsd_getattr,
06708         .vop_setattr =          zfs_freebsd_setattr,
06709         .vop_create =           zfs_freebsd_create,
06710         .vop_mknod =            zfs_freebsd_create,
06711         .vop_mkdir =            zfs_freebsd_mkdir,
06712         .vop_readdir =          zfs_freebsd_readdir,
06713         .vop_fsync =            zfs_freebsd_fsync,
06714         .vop_open =             zfs_freebsd_open,
06715         .vop_close =            zfs_freebsd_close,
06716         .vop_rmdir =            zfs_freebsd_rmdir,
06717         .vop_ioctl =            zfs_freebsd_ioctl,
06718         .vop_link =             zfs_freebsd_link,
06719         .vop_symlink =          zfs_freebsd_symlink,
06720         .vop_readlink =         zfs_freebsd_readlink,
06721         .vop_read =             zfs_freebsd_read,
06722         .vop_write =            zfs_freebsd_write,
06723         .vop_remove =           zfs_freebsd_remove,
06724         .vop_rename =           zfs_freebsd_rename,
06725         .vop_pathconf =         zfs_freebsd_pathconf,
06726         .vop_bmap =             VOP_EOPNOTSUPP,
06727         .vop_fid =              zfs_freebsd_fid,
06728         .vop_getextattr =       zfs_getextattr,
06729         .vop_deleteextattr =    zfs_deleteextattr,
06730         .vop_setextattr =       zfs_setextattr,
06731         .vop_listextattr =      zfs_listextattr,
06732         .vop_getacl =           zfs_freebsd_getacl,
06733         .vop_setacl =           zfs_freebsd_setacl,
06734         .vop_aclcheck =         zfs_freebsd_aclcheck,
06735         .vop_getpages =         zfs_freebsd_getpages,
06736 };
06737 
06738 struct vop_vector zfs_fifoops = {
06739         .vop_default =          &fifo_specops,
06740         .vop_fsync =            zfs_freebsd_fsync,
06741         .vop_access =           zfs_freebsd_access,
06742         .vop_getattr =          zfs_freebsd_getattr,
06743         .vop_inactive =         zfs_freebsd_inactive,
06744         .vop_read =             VOP_PANIC,
06745         .vop_reclaim =          zfs_freebsd_reclaim,
06746         .vop_setattr =          zfs_freebsd_setattr,
06747         .vop_write =            VOP_PANIC,
06748         .vop_pathconf =         zfs_freebsd_fifo_pathconf,
06749         .vop_fid =              zfs_freebsd_fid,
06750         .vop_getacl =           zfs_freebsd_getacl,
06751         .vop_setacl =           zfs_freebsd_setacl,
06752         .vop_aclcheck =         zfs_freebsd_aclcheck,
06753 };
06754 
06755 /*
06756  * special share hidden files vnode operations template
06757  */
06758 struct vop_vector zfs_shareops = {
06759         .vop_default =          &default_vnodeops,
06760         .vop_access =           zfs_freebsd_access,
06761         .vop_inactive =         zfs_freebsd_inactive,
06762         .vop_reclaim =          zfs_freebsd_reclaim,
06763         .vop_fid =              zfs_freebsd_fid,
06764         .vop_pathconf =         zfs_freebsd_pathconf,
06765 };