FreeBSD ZFS
The Zettabyte File System

zfs_replay.c

Go to the documentation of this file.
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 /*
00022  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
00023  */
00024 
00025 #include <sys/types.h>
00026 #include <sys/param.h>
00027 #include <sys/systm.h>
00028 #include <sys/sysmacros.h>
00029 #include <sys/cmn_err.h>
00030 #include <sys/kmem.h>
00031 #include <sys/file.h>
00032 #include <sys/fcntl.h>
00033 #include <sys/vfs.h>
00034 #include <sys/fs/zfs.h>
00035 #include <sys/zfs_znode.h>
00036 #include <sys/zfs_dir.h>
00037 #include <sys/zfs_acl.h>
00038 #include <sys/zfs_fuid.h>
00039 #include <sys/spa.h>
00040 #include <sys/zil.h>
00041 #include <sys/byteorder.h>
00042 #include <sys/stat.h>
00043 #include <sys/acl.h>
00044 #include <sys/atomic.h>
00045 #include <sys/cred.h>
00046 #include <sys/namei.h>
00047 
00055 static void
00056 zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
00057         uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid)
00058 {
00059         VATTR_NULL(vap);
00060         vap->va_mask = (uint_t)mask;
00061         if (mask & AT_TYPE)
00062                 vap->va_type = IFTOVT(mode);
00063         if (mask & AT_MODE)
00064                 vap->va_mode = mode & MODEMASK;
00065         if (mask & AT_UID)
00066                 vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid;
00067         if (mask & AT_GID)
00068                 vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid;
00069         vap->va_rdev = zfs_cmpldev(rdev);
00070         vap->va_nodeid = nodeid;
00071 }
00072 
00073 /* ARGSUSED */
00074 static int
00075 zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap)
00076 {
00077         return (ENOTSUP);
00078 }
00079 
00080 static void
00081 zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
00082 {
00083         xoptattr_t *xoap = NULL;
00084         uint64_t *attrs;
00085         uint64_t *crtime;
00086         uint32_t *bitmap;
00087         void *scanstamp;
00088         int i;
00089 
00090         xvap->xva_vattr.va_mask |= AT_XVATTR;
00091         if ((xoap = xva_getxoptattr(xvap)) == NULL) {
00092                 xvap->xva_vattr.va_mask &= ~AT_XVATTR; /* shouldn't happen */
00093                 return;
00094         }
00095 
00096         ASSERT(lrattr->lr_attr_masksize == xvap->xva_mapsize);
00097 
00098         bitmap = &lrattr->lr_attr_bitmap;
00099         for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++)
00100                 xvap->xva_reqattrmap[i] = *bitmap;
00101 
00102         attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1);
00103         crtime = attrs + 1;
00104         scanstamp = (caddr_t)(crtime + 2);
00105 
00106         if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
00107                 xoap->xoa_hidden = ((*attrs & XAT0_HIDDEN) != 0);
00108         if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
00109                 xoap->xoa_system = ((*attrs & XAT0_SYSTEM) != 0);
00110         if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
00111                 xoap->xoa_archive = ((*attrs & XAT0_ARCHIVE) != 0);
00112         if (XVA_ISSET_REQ(xvap, XAT_READONLY))
00113                 xoap->xoa_readonly = ((*attrs & XAT0_READONLY) != 0);
00114         if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
00115                 xoap->xoa_immutable = ((*attrs & XAT0_IMMUTABLE) != 0);
00116         if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
00117                 xoap->xoa_nounlink = ((*attrs & XAT0_NOUNLINK) != 0);
00118         if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
00119                 xoap->xoa_appendonly = ((*attrs & XAT0_APPENDONLY) != 0);
00120         if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
00121                 xoap->xoa_nodump = ((*attrs & XAT0_NODUMP) != 0);
00122         if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
00123                 xoap->xoa_opaque = ((*attrs & XAT0_OPAQUE) != 0);
00124         if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
00125                 xoap->xoa_av_modified = ((*attrs & XAT0_AV_MODIFIED) != 0);
00126         if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
00127                 xoap->xoa_av_quarantined =
00128                     ((*attrs & XAT0_AV_QUARANTINED) != 0);
00129         if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
00130                 ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime);
00131         if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
00132                 bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ);
00133         if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
00134                 xoap->xoa_reparse = ((*attrs & XAT0_REPARSE) != 0);
00135         if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
00136                 xoap->xoa_offline = ((*attrs & XAT0_OFFLINE) != 0);
00137         if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
00138                 xoap->xoa_sparse = ((*attrs & XAT0_SPARSE) != 0);
00139 }
00140 
00141 static int
00142 zfs_replay_domain_cnt(uint64_t uid, uint64_t gid)
00143 {
00144         uint64_t uid_idx;
00145         uint64_t gid_idx;
00146         int domcnt = 0;
00147 
00148         uid_idx = FUID_INDEX(uid);
00149         gid_idx = FUID_INDEX(gid);
00150         if (uid_idx)
00151                 domcnt++;
00152         if (gid_idx > 0 && gid_idx != uid_idx)
00153                 domcnt++;
00154 
00155         return (domcnt);
00156 }
00157 
00158 static void *
00159 zfs_replay_fuid_domain_common(zfs_fuid_info_t *fuid_infop, void *start,
00160     int domcnt)
00161 {
00162         int i;
00163 
00164         for (i = 0; i != domcnt; i++) {
00165                 fuid_infop->z_domain_table[i] = start;
00166                 start = (caddr_t)start + strlen(start) + 1;
00167         }
00168 
00169         return (start);
00170 }
00171 
00175 static void
00176 zfs_replay_fuid_ugid(zfs_fuid_info_t *fuid_infop, uint64_t uid, uint64_t gid)
00177 {
00178         /*
00179          * If owner or group are log specific FUIDs then slurp up
00180          * domain information and build zfs_fuid_info_t
00181          */
00182         if (IS_EPHEMERAL(uid))
00183                 fuid_infop->z_fuid_owner = uid;
00184 
00185         if (IS_EPHEMERAL(gid))
00186                 fuid_infop->z_fuid_group = gid;
00187 }
00188 
00192 static zfs_fuid_info_t *
00193 zfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid)
00194 {
00195         int domcnt;
00196 
00197         zfs_fuid_info_t *fuid_infop;
00198 
00199         fuid_infop = zfs_fuid_info_alloc();
00200 
00201         domcnt = zfs_replay_domain_cnt(uid, gid);
00202 
00203         if (domcnt == 0)
00204                 return (fuid_infop);
00205 
00206         fuid_infop->z_domain_table =
00207             kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP);
00208 
00209         zfs_replay_fuid_ugid(fuid_infop, uid, gid);
00210 
00211         fuid_infop->z_domain_cnt = domcnt;
00212         *end = zfs_replay_fuid_domain_common(fuid_infop, buf, domcnt);
00213         return (fuid_infop);
00214 }
00215 
00219 static zfs_fuid_info_t *
00220 zfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid,
00221     uint64_t gid)
00222 {
00223         uint64_t *log_fuid = (uint64_t *)start;
00224         zfs_fuid_info_t *fuid_infop;
00225         int i;
00226 
00227         fuid_infop = zfs_fuid_info_alloc();
00228         fuid_infop->z_domain_cnt = domcnt;
00229 
00230         fuid_infop->z_domain_table =
00231             kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP);
00232 
00233         for (i = 0; i != idcnt; i++) {
00234                 zfs_fuid_t *zfuid;
00235 
00236                 zfuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
00237                 zfuid->z_logfuid = *log_fuid;
00238                 zfuid->z_id = -1;
00239                 zfuid->z_domidx = 0;
00240                 list_insert_tail(&fuid_infop->z_fuids, zfuid);
00241                 log_fuid++;
00242         }
00243 
00244         zfs_replay_fuid_ugid(fuid_infop, uid, gid);
00245 
00246         *end = zfs_replay_fuid_domain_common(fuid_infop, log_fuid, domcnt);
00247         return (fuid_infop);
00248 }
00249 
00250 static void
00251 zfs_replay_swap_attrs(lr_attr_t *lrattr)
00252 {
00253         /* swap the lr_attr structure */
00254         byteswap_uint32_array(lrattr, sizeof (*lrattr));
00255         /* swap the bitmap */
00256         byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) *
00257             sizeof (uint32_t));
00258         /* swap the attributes, create time + 64 bit word for attributes */
00259         byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) *
00260             (lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t));
00261 }
00262 
00267 static int
00268 zfs_replay_create_acl(zfsvfs_t *zfsvfs,
00269     lr_acl_create_t *lracl, boolean_t byteswap)
00270 {
00271         char *name = NULL;              /* location determined later */
00272         lr_create_t *lr = (lr_create_t *)lracl;
00273         znode_t *dzp;
00274         vnode_t *vp = NULL;
00275         xvattr_t xva;
00276         int vflg = 0;
00277         vsecattr_t vsec = { 0 };
00278         lr_attr_t *lrattr;
00279         void *aclstart;
00280         void *fuidstart;
00281         size_t xvatlen = 0;
00282         uint64_t txtype;
00283         int error;
00284 
00285         txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
00286         if (byteswap) {
00287                 byteswap_uint64_array(lracl, sizeof (*lracl));
00288                 if (txtype == TX_CREATE_ACL_ATTR ||
00289                     txtype == TX_MKDIR_ACL_ATTR) {
00290                         lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
00291                         zfs_replay_swap_attrs(lrattr);
00292                         xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
00293                 }
00294 
00295                 aclstart = (caddr_t)(lracl + 1) + xvatlen;
00296                 zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE);
00297                 /* swap fuids */
00298                 if (lracl->lr_fuidcnt) {
00299                         byteswap_uint64_array((caddr_t)aclstart +
00300                             ZIL_ACE_LENGTH(lracl->lr_acl_bytes),
00301                             lracl->lr_fuidcnt * sizeof (uint64_t));
00302                 }
00303         }
00304 
00305         if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
00306                 return (error);
00307 
00308         xva_init(&xva);
00309         zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
00310             lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
00311 
00312         /*
00313          * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
00314          * eventually end up in zfs_mknode(), which assigns the object's
00315          * creation time and generation number.  The generic VOP_CREATE()
00316          * doesn't have either concept, so we smuggle the values inside
00317          * the vattr's otherwise unused va_ctime and va_nblocks fields.
00318          */
00319         ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
00320         xva.xva_vattr.va_nblocks = lr->lr_gen;
00321 
00322         error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
00323         if (error != ENOENT)
00324                 goto bail;
00325 
00326         if (lr->lr_common.lrc_txtype & TX_CI)
00327                 vflg |= FIGNORECASE;
00328         switch (txtype) {
00329         case TX_CREATE_ACL:
00330                 aclstart = (caddr_t)(lracl + 1);
00331                 fuidstart = (caddr_t)aclstart +
00332                     ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
00333                 zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
00334                     (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
00335                     lr->lr_uid, lr->lr_gid);
00336                 /*FALLTHROUGH*/
00337         case TX_CREATE_ACL_ATTR:
00338                 if (name == NULL) {
00339                         lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
00340                         xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
00341                         xva.xva_vattr.va_mask |= AT_XVATTR;
00342                         zfs_replay_xvattr(lrattr, &xva);
00343                 }
00344                 vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
00345                 vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
00346                 vsec.vsa_aclcnt = lracl->lr_aclcnt;
00347                 vsec.vsa_aclentsz = lracl->lr_acl_bytes;
00348                 vsec.vsa_aclflags = lracl->lr_acl_flags;
00349                 if (zfsvfs->z_fuid_replay == NULL) {
00350                         fuidstart = (caddr_t)(lracl + 1) + xvatlen +
00351                             ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
00352                         zfsvfs->z_fuid_replay =
00353                             zfs_replay_fuids(fuidstart,
00354                             (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
00355                             lr->lr_uid, lr->lr_gid);
00356                 }
00357 
00358 #ifdef TODO
00359                 error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr,
00360                     0, 0, &vp, kcred, vflg, NULL, &vsec);
00361 #else
00362                 panic("%s:%u: unsupported condition", __func__, __LINE__);
00363 #endif
00364                 break;
00365         case TX_MKDIR_ACL:
00366                 aclstart = (caddr_t)(lracl + 1);
00367                 fuidstart = (caddr_t)aclstart +
00368                     ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
00369                 zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
00370                     (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
00371                     lr->lr_uid, lr->lr_gid);
00372                 /*FALLTHROUGH*/
00373         case TX_MKDIR_ACL_ATTR:
00374                 if (name == NULL) {
00375                         lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
00376                         xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
00377                         zfs_replay_xvattr(lrattr, &xva);
00378                 }
00379                 vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
00380                 vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
00381                 vsec.vsa_aclcnt = lracl->lr_aclcnt;
00382                 vsec.vsa_aclentsz = lracl->lr_acl_bytes;
00383                 vsec.vsa_aclflags = lracl->lr_acl_flags;
00384                 if (zfsvfs->z_fuid_replay == NULL) {
00385                         fuidstart = (caddr_t)(lracl + 1) + xvatlen +
00386                             ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
00387                         zfsvfs->z_fuid_replay =
00388                             zfs_replay_fuids(fuidstart,
00389                             (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
00390                             lr->lr_uid, lr->lr_gid);
00391                 }
00392 #ifdef TODO
00393                 error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr,
00394                     &vp, kcred, NULL, vflg, &vsec);
00395 #else
00396                 panic("%s:%u: unsupported condition", __func__, __LINE__);
00397 #endif
00398                 break;
00399         default:
00400                 error = ENOTSUP;
00401         }
00402 
00403 bail:
00404         if (error == 0 && vp != NULL)
00405                 VN_RELE(vp);
00406 
00407         VN_RELE(ZTOV(dzp));
00408 
00409         if (zfsvfs->z_fuid_replay)
00410                 zfs_fuid_info_free(zfsvfs->z_fuid_replay);
00411         zfsvfs->z_fuid_replay = NULL;
00412 
00413         return (error);
00414 }
00415 
00416 static int
00417 zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
00418 {
00419         char *name = NULL;              /* location determined later */
00420         char *link;                     /* symlink content follows name */
00421         znode_t *dzp;
00422         vnode_t *vp = NULL;
00423         xvattr_t xva;
00424         int vflg = 0;
00425         size_t lrsize = sizeof (lr_create_t);
00426         lr_attr_t *lrattr;
00427         void *start;
00428         size_t xvatlen;
00429         uint64_t txtype;
00430         struct componentname cn;
00431         int error;
00432 
00433         txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
00434         if (byteswap) {
00435                 byteswap_uint64_array(lr, sizeof (*lr));
00436                 if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR)
00437                         zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
00438         }
00439 
00440 
00441         if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
00442                 return (error);
00443 
00444         xva_init(&xva);
00445         zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
00446             lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
00447 
00448         /*
00449          * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
00450          * eventually end up in zfs_mknode(), which assigns the object's
00451          * creation time and generation number.  The generic VOP_CREATE()
00452          * doesn't have either concept, so we smuggle the values inside
00453          * the vattr's otherwise unused va_ctime and va_nblocks fields.
00454          */
00455         ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
00456         xva.xva_vattr.va_nblocks = lr->lr_gen;
00457 
00458         error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
00459         if (error != ENOENT)
00460                 goto out;
00461 
00462         if (lr->lr_common.lrc_txtype & TX_CI)
00463                 vflg |= FIGNORECASE;
00464 
00465         /*
00466          * Symlinks don't have fuid info, and CIFS never creates
00467          * symlinks.
00468          *
00469          * The _ATTR versions will grab the fuid info in their subcases.
00470          */
00471         if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK &&
00472             (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
00473             (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
00474                 start = (lr + 1);
00475                 zfsvfs->z_fuid_replay =
00476                     zfs_replay_fuid_domain(start, &start,
00477                     lr->lr_uid, lr->lr_gid);
00478         }
00479 
00480         cn.cn_cred = kcred;
00481         cn.cn_thread = curthread;
00482         cn.cn_flags = SAVENAME;
00483 
00484         vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY);
00485         switch (txtype) {
00486         case TX_CREATE_ATTR:
00487                 lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
00488                 xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
00489                 zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
00490                 start = (caddr_t)(lr + 1) + xvatlen;
00491                 zfsvfs->z_fuid_replay =
00492                     zfs_replay_fuid_domain(start, &start,
00493                     lr->lr_uid, lr->lr_gid);
00494                 name = (char *)start;
00495 
00496                 /*FALLTHROUGH*/
00497         case TX_CREATE:
00498                 if (name == NULL)
00499                         name = (char *)start;
00500 
00501                 cn.cn_nameptr = name;
00502                 error = VOP_CREATE(ZTOV(dzp), &vp, &cn, &xva.xva_vattr /*,vflg*/);
00503                 break;
00504         case TX_MKDIR_ATTR:
00505                 lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
00506                 xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
00507                 zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
00508                 start = (caddr_t)(lr + 1) + xvatlen;
00509                 zfsvfs->z_fuid_replay =
00510                     zfs_replay_fuid_domain(start, &start,
00511                     lr->lr_uid, lr->lr_gid);
00512                 name = (char *)start;
00513 
00514                 /*FALLTHROUGH*/
00515         case TX_MKDIR:
00516                 if (name == NULL)
00517                         name = (char *)(lr + 1);
00518 
00519                 cn.cn_nameptr = name;
00520                 error = VOP_MKDIR(ZTOV(dzp), &vp, &cn, &xva.xva_vattr /*,vflg*/);
00521                 break;
00522         case TX_MKXATTR:
00523                 error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred);
00524                 break;
00525         case TX_SYMLINK:
00526                 name = (char *)(lr + 1);
00527                 link = name + strlen(name) + 1;
00528                 cn.cn_nameptr = name;
00529                 error = VOP_SYMLINK(ZTOV(dzp), &vp, &cn, &xva.xva_vattr, link /*,vflg*/);
00530                 break;
00531         default:
00532                 error = ENOTSUP;
00533         }
00534         VOP_UNLOCK(ZTOV(dzp), 0);
00535 
00536 out:
00537         if (error == 0 && vp != NULL)
00538                 VN_URELE(vp);
00539 
00540         VN_RELE(ZTOV(dzp));
00541 
00542         if (zfsvfs->z_fuid_replay)
00543                 zfs_fuid_info_free(zfsvfs->z_fuid_replay);
00544         zfsvfs->z_fuid_replay = NULL;
00545         return (error);
00546 }
00547 
00548 static int
00549 zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap)
00550 {
00551         char *name = (char *)(lr + 1);  /* name follows lr_remove_t */
00552         znode_t *dzp;
00553         struct componentname cn;
00554         vnode_t *vp;
00555         int error;
00556         int vflg = 0;
00557 
00558         if (byteswap)
00559                 byteswap_uint64_array(lr, sizeof (*lr));
00560 
00561         if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
00562                 return (error);
00563 
00564         if (lr->lr_common.lrc_txtype & TX_CI)
00565                 vflg |= FIGNORECASE;
00566         cn.cn_nameptr = name;
00567         cn.cn_namelen = strlen(name);
00568         cn.cn_nameiop = DELETE;
00569         cn.cn_flags = ISLASTCN | SAVENAME;
00570         cn.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
00571         cn.cn_cred = kcred;
00572         cn.cn_thread = curthread;
00573         vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY);
00574         error = VOP_LOOKUP(ZTOV(dzp), &vp, &cn);
00575         if (error != 0) {
00576                 VOP_UNLOCK(ZTOV(dzp), 0);
00577                 goto fail;
00578         }
00579 
00580         switch ((int)lr->lr_common.lrc_txtype) {
00581         case TX_REMOVE:
00582                 error = VOP_REMOVE(ZTOV(dzp), vp, &cn /*,vflg*/);
00583                 break;
00584         case TX_RMDIR:
00585                 error = VOP_RMDIR(ZTOV(dzp), vp, &cn /*,vflg*/);
00586                 break;
00587         default:
00588                 error = ENOTSUP;
00589         }
00590         vput(vp);
00591         VOP_UNLOCK(ZTOV(dzp), 0);
00592 
00593 fail:
00594         VN_RELE(ZTOV(dzp));
00595 
00596         return (error);
00597 }
00598 
00599 static int
00600 zfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap)
00601 {
00602         char *name = (char *)(lr + 1);  /* name follows lr_link_t */
00603         znode_t *dzp, *zp;
00604         struct componentname cn;
00605         int error;
00606         int vflg = 0;
00607 
00608         if (byteswap)
00609                 byteswap_uint64_array(lr, sizeof (*lr));
00610 
00611         if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
00612                 return (error);
00613 
00614         if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) {
00615                 VN_RELE(ZTOV(dzp));
00616                 return (error);
00617         }
00618 
00619         if (lr->lr_common.lrc_txtype & TX_CI)
00620                 vflg |= FIGNORECASE;
00621 
00622         cn.cn_nameptr = name;
00623         cn.cn_cred = kcred;
00624         cn.cn_thread = curthread;
00625         cn.cn_flags = SAVENAME;
00626 
00627         vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY);
00628         vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
00629         error = VOP_LINK(ZTOV(dzp), ZTOV(zp), &cn /*,vflg*/);
00630         VOP_UNLOCK(ZTOV(zp), 0);
00631         VOP_UNLOCK(ZTOV(dzp), 0);
00632 
00633         VN_RELE(ZTOV(zp));
00634         VN_RELE(ZTOV(dzp));
00635 
00636         return (error);
00637 }
00638 
00639 static int
00640 zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap)
00641 {
00642         char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
00643         char *tname = sname + strlen(sname) + 1;
00644         znode_t *sdzp, *tdzp;
00645         struct componentname scn, tcn;
00646         vnode_t *svp, *tvp;
00647         kthread_t *td = curthread;
00648         int error;
00649         int vflg = 0;
00650 
00651         if (byteswap)
00652                 byteswap_uint64_array(lr, sizeof (*lr));
00653 
00654         if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
00655                 return (error);
00656 
00657         if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) {
00658                 VN_RELE(ZTOV(sdzp));
00659                 return (error);
00660         }
00661 
00662         if (lr->lr_common.lrc_txtype & TX_CI)
00663                 vflg |= FIGNORECASE;
00664         svp = tvp = NULL;
00665 
00666         scn.cn_nameptr = sname;
00667         scn.cn_namelen = strlen(sname);
00668         scn.cn_nameiop = DELETE;
00669         scn.cn_flags = ISLASTCN | SAVENAME;
00670         scn.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
00671         scn.cn_cred = kcred;
00672         scn.cn_thread = td;
00673         vn_lock(ZTOV(sdzp), LK_EXCLUSIVE | LK_RETRY);
00674         error = VOP_LOOKUP(ZTOV(sdzp), &svp, &scn);
00675         VOP_UNLOCK(ZTOV(sdzp), 0);
00676         if (error != 0)
00677                 goto fail;
00678         VOP_UNLOCK(svp, 0);
00679 
00680         tcn.cn_nameptr = tname;
00681         tcn.cn_namelen = strlen(tname);
00682         tcn.cn_nameiop = RENAME;
00683         tcn.cn_flags = ISLASTCN | SAVENAME;
00684         tcn.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
00685         tcn.cn_cred = kcred;
00686         tcn.cn_thread = td;
00687         vn_lock(ZTOV(tdzp), LK_EXCLUSIVE | LK_RETRY);
00688         error = VOP_LOOKUP(ZTOV(tdzp), &tvp, &tcn);
00689         if (error == EJUSTRETURN)
00690                 tvp = NULL;
00691         else if (error != 0) {
00692                 VOP_UNLOCK(ZTOV(tdzp), 0);
00693                 goto fail;
00694         }
00695 
00696         error = VOP_RENAME(ZTOV(sdzp), svp, &scn, ZTOV(tdzp), tvp, &tcn /*,vflg*/);
00697         return (error);
00698 fail:
00699         if (svp != NULL)
00700                 vrele(svp);
00701         if (tvp != NULL)
00702                 vrele(tvp);
00703         VN_RELE(ZTOV(tdzp));
00704         VN_RELE(ZTOV(sdzp));
00705 
00706         return (error);
00707 }
00708 
00709 static int
00710 zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
00711 {
00712         char *data = (char *)(lr + 1);  /* data follows lr_write_t */
00713         znode_t *zp;
00714         int error;
00715         ssize_t resid;
00716         uint64_t eod, offset, length;
00717 
00718         if (byteswap)
00719                 byteswap_uint64_array(lr, sizeof (*lr));
00720 
00721         if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
00722                 /*
00723                  * As we can log writes out of order, it's possible the
00724                  * file has been removed. In this case just drop the write
00725                  * and return success.
00726                  */
00727                 if (error == ENOENT)
00728                         error = 0;
00729                 return (error);
00730         }
00731 
00732         offset = lr->lr_offset;
00733         length = lr->lr_length;
00734         eod = offset + length;  /* end of data for this write */
00735 
00736         /*
00737          * This may be a write from a dmu_sync() for a whole block,
00738          * and may extend beyond the current end of the file.
00739          * We can't just replay what was written for this TX_WRITE as
00740          * a future TX_WRITE2 may extend the eof and the data for that
00741          * write needs to be there. So we write the whole block and
00742          * reduce the eof. This needs to be done within the single dmu
00743          * transaction created within vn_rdwr -> zfs_write. So a possible
00744          * new end of file is passed through in zfsvfs->z_replay_eof
00745          */
00746 
00747         zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */
00748 
00749         /* If it's a dmu_sync() block, write the whole block */
00750         if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
00751                 uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
00752                 if (length < blocksize) {
00753                         offset -= offset % blocksize;
00754                         length = blocksize;
00755                 }
00756                 if (zp->z_size < eod)
00757                         zfsvfs->z_replay_eof = eod;
00758         }
00759 
00760         error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset,
00761             UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
00762 
00763         VN_RELE(ZTOV(zp));
00764         zfsvfs->z_replay_eof = 0;       /* safety */
00765 
00766         return (error);
00767 }
00768 
00775 static int
00776 zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
00777 {
00778         znode_t *zp;
00779         int error;
00780         uint64_t end;
00781 
00782         if (byteswap)
00783                 byteswap_uint64_array(lr, sizeof (*lr));
00784 
00785         if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
00786                 return (error);
00787 
00788 top:
00789         end = lr->lr_offset + lr->lr_length;
00790         if (end > zp->z_size) {
00791                 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
00792 
00793                 zp->z_size = end;
00794                 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
00795                 error = dmu_tx_assign(tx, TXG_WAIT);
00796                 if (error) {
00797                         VN_RELE(ZTOV(zp));
00798                         if (error == ERESTART) {
00799                                 dmu_tx_wait(tx);
00800                                 dmu_tx_abort(tx);
00801                                 goto top;
00802                         }
00803                         dmu_tx_abort(tx);
00804                         return (error);
00805                 }
00806                 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
00807                     (void *)&zp->z_size, sizeof (uint64_t), tx);
00808 
00809                 /* Ensure the replayed seq is updated */
00810                 (void) zil_replaying(zfsvfs->z_log, tx);
00811 
00812                 dmu_tx_commit(tx);
00813         }
00814 
00815         VN_RELE(ZTOV(zp));
00816 
00817         return (error);
00818 }
00819 
00820 static int
00821 zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap)
00822 {
00823 #ifdef sun
00824         znode_t *zp;
00825         flock64_t fl;
00826         int error;
00827 
00828         if (byteswap)
00829                 byteswap_uint64_array(lr, sizeof (*lr));
00830 
00831         if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
00832                 return (error);
00833 
00834         bzero(&fl, sizeof (fl));
00835         fl.l_type = F_WRLCK;
00836         fl.l_whence = 0;
00837         fl.l_start = lr->lr_offset;
00838         fl.l_len = lr->lr_length;
00839 
00840         error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX,
00841             lr->lr_offset, kcred, NULL);
00842 
00843         VN_RELE(ZTOV(zp));
00844 
00845         return (error);
00846 #else   /* !sun */
00847         ZFS_LOG(0, "Unexpected code path, report to pjd@FreeBSD.org");
00848         return (EOPNOTSUPP);
00849 #endif  /* !sun */
00850 }
00851 
00852 static int
00853 zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap)
00854 {
00855         znode_t *zp;
00856         xvattr_t xva;
00857         vattr_t *vap = &xva.xva_vattr;
00858         vnode_t *vp;
00859         int error;
00860         void *start;
00861 
00862         xva_init(&xva);
00863         if (byteswap) {
00864                 byteswap_uint64_array(lr, sizeof (*lr));
00865 
00866                 if ((lr->lr_mask & AT_XVATTR) &&
00867                     zfsvfs->z_version >= ZPL_VERSION_INITIAL)
00868                         zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
00869         }
00870 
00871         if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
00872                 return (error);
00873 
00874         zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode,
00875             lr->lr_uid, lr->lr_gid, 0, lr->lr_foid);
00876 
00877         vap->va_size = lr->lr_size;
00878         ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime);
00879         ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime);
00880 
00881         /*
00882          * Fill in xvattr_t portions if necessary.
00883          */
00884 
00885         start = (lr_setattr_t *)(lr + 1);
00886         if (vap->va_mask & AT_XVATTR) {
00887                 zfs_replay_xvattr((lr_attr_t *)start, &xva);
00888                 start = (caddr_t)start +
00889                     ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize);
00890         } else
00891                 xva.xva_vattr.va_mask &= ~AT_XVATTR;
00892 
00893         zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
00894             lr->lr_uid, lr->lr_gid);
00895 
00896         vp = ZTOV(zp);
00897         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
00898         error = VOP_SETATTR(vp, vap, kcred);
00899         VOP_UNLOCK(vp, 0);
00900 
00901         zfs_fuid_info_free(zfsvfs->z_fuid_replay);
00902         zfsvfs->z_fuid_replay = NULL;
00903         VN_RELE(vp);
00904 
00905         return (error);
00906 }
00907 
00908 extern int zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
00909     caller_context_t *ct);
00910 
00911 static int
00912 zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap)
00913 {
00914         ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */
00915         vsecattr_t vsa;
00916         vnode_t *vp;
00917         znode_t *zp;
00918         int error;
00919 
00920         if (byteswap) {
00921                 byteswap_uint64_array(lr, sizeof (*lr));
00922                 zfs_oldace_byteswap(ace, lr->lr_aclcnt);
00923         }
00924 
00925         if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
00926                 return (error);
00927 
00928         bzero(&vsa, sizeof (vsa));
00929         vsa.vsa_mask = VSA_ACE | VSA_ACECNT;
00930         vsa.vsa_aclcnt = lr->lr_aclcnt;
00931         vsa.vsa_aclentsz = sizeof (ace_t) * vsa.vsa_aclcnt;
00932         vsa.vsa_aclflags = 0;
00933         vsa.vsa_aclentp = ace;
00934 
00935         vp = ZTOV(zp);
00936         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
00937         error = zfs_setsecattr(vp, &vsa, 0, kcred, NULL);
00938         VOP_UNLOCK(vp, 0);
00939 
00940         VN_RELE(vp);
00941 
00942         return (error);
00943 }
00944 
00959 static int
00960 zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
00961 {
00962         ace_t *ace = (ace_t *)(lr + 1);
00963         vsecattr_t vsa;
00964         znode_t *zp;
00965         vnode_t *vp;
00966         int error;
00967 
00968         if (byteswap) {
00969                 byteswap_uint64_array(lr, sizeof (*lr));
00970                 zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE);
00971                 if (lr->lr_fuidcnt) {
00972                         byteswap_uint64_array((caddr_t)ace +
00973                             ZIL_ACE_LENGTH(lr->lr_acl_bytes),
00974                             lr->lr_fuidcnt * sizeof (uint64_t));
00975                 }
00976         }
00977 
00978         if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
00979                 return (error);
00980 
00981         bzero(&vsa, sizeof (vsa));
00982         vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS;
00983         vsa.vsa_aclcnt = lr->lr_aclcnt;
00984         vsa.vsa_aclentp = ace;
00985         vsa.vsa_aclentsz = lr->lr_acl_bytes;
00986         vsa.vsa_aclflags = lr->lr_acl_flags;
00987 
00988         if (lr->lr_fuidcnt) {
00989                 void *fuidstart = (caddr_t)ace +
00990                     ZIL_ACE_LENGTH(lr->lr_acl_bytes);
00991 
00992                 zfsvfs->z_fuid_replay =
00993                     zfs_replay_fuids(fuidstart, &fuidstart,
00994                     lr->lr_fuidcnt, lr->lr_domcnt, 0, 0);
00995         }
00996 
00997         vp = ZTOV(zp);
00998         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
00999         error = zfs_setsecattr(vp, &vsa, 0, kcred, NULL);
01000         VOP_UNLOCK(vp, 0);
01001 
01002         if (zfsvfs->z_fuid_replay)
01003                 zfs_fuid_info_free(zfsvfs->z_fuid_replay);
01004 
01005         zfsvfs->z_fuid_replay = NULL;
01006         VN_RELE(vp);
01007 
01008         return (error);
01009 }
01010 
01014 zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = {
01015         zfs_replay_error,       /* 0 no such transaction type */
01016         zfs_replay_create,      /* TX_CREATE */
01017         zfs_replay_create,      /* TX_MKDIR */
01018         zfs_replay_create,      /* TX_MKXATTR */
01019         zfs_replay_create,      /* TX_SYMLINK */
01020         zfs_replay_remove,      /* TX_REMOVE */
01021         zfs_replay_remove,      /* TX_RMDIR */
01022         zfs_replay_link,        /* TX_LINK */
01023         zfs_replay_rename,      /* TX_RENAME */
01024         zfs_replay_write,       /* TX_WRITE */
01025         zfs_replay_truncate,    /* TX_TRUNCATE */
01026         zfs_replay_setattr,     /* TX_SETATTR */
01027         zfs_replay_acl_v0,      /* TX_ACL_V0 */
01028         zfs_replay_acl,         /* TX_ACL */
01029         zfs_replay_create_acl,  /* TX_CREATE_ACL */
01030         zfs_replay_create,      /* TX_CREATE_ATTR */
01031         zfs_replay_create_acl,  /* TX_CREATE_ACL_ATTR */
01032         zfs_replay_create_acl,  /* TX_MKDIR_ACL */
01033         zfs_replay_create,      /* TX_MKDIR_ATTR */
01034         zfs_replay_create_acl,  /* TX_MKDIR_ACL_ATTR */
01035         zfs_replay_write2,      /* TX_WRITE2 */
01036 };
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines