diff -urBN /usr/src.orig/sbin/mount_unionfs/mount_unionfs.8 /usr/src/sbin/mount_unionfs/mount_unionfs.8 --- /usr/src.orig/sbin/mount_unionfs/mount_unionfs.8 Tue Jan 18 19:09:36 2005 +++ /usr/src/sbin/mount_unionfs/mount_unionfs.8 Mon Jul 10 22:37:11 2006 @@ -31,7 +31,7 @@ .\" @(#)mount_union.8 8.6 (Berkeley) 3/27/94 .\" $FreeBSD: src/sbin/mount_unionfs/mount_unionfs.8,v 1.20 2005/01/18 10:09:36 ru Exp $ .\" -.Dd March 27, 1994 +.Dd June 20, 2006 .Dt MOUNT_UNIONFS 8 .Os .Sh NAME @@ -40,6 +40,11 @@ .Sh SYNOPSIS .Nm .Op Fl br +.Op Fl c Ar copymode +.Op Fl M Ar mode +.Op Fl m Ar mode +.Op Fl u Ar uid +.Op Fl g Ar gid .Op Fl o Ar options .Ar directory .Ar uniondir @@ -72,16 +77,41 @@ However, .Ar uniondir remains the mount point. +.It Fl c Ar traditional | transparent | masquerade +Specifies the way to create a file or a directory in the upper layer +automatically when needed. +.Ar traditional +uses the same way as the old unionfs for backward compatibility, and +.Ar transparent +duplicates the file and directory mode bits and the ownership in the +lower layer to the created file in the upper layer. +For behavior of the +.Ar masquerade +mode, see +.Sx MASQUERADE MODE . +.It Fl m Ar mode +Specifies file mode bits for +.Ar masquerade +mode. +.It Fl M Ar mode +Specifies directory mode bits for +.Ar masquerade +mode. .It Fl o Options are specified with a .Fl o flag followed by a comma separated string of options. See the .Xr mount 8 -man page for possible options and their meanings. -.It Fl r -Hide the lower layer completely in the same way as mounting with -.Xr mount_nullfs 8 . +manual page for possible options and their meanings. +.It Fl u Ar uid +Specifies user for +.Ar masquerade +mode. +.It Fl g Ar gid +Specifies group for +.Ar masquerade +mode. .El .Pp To enforce file system security, the user mounting the file system @@ -91,6 +121,13 @@ .Va vfs.usermount .Xr sysctl 8 variable must be set to 1 to permit file system mounting by ordinary users. +However, note that +.Ar transparent +and +.Ar masquerade +mode require +.Va vfs.usermount +is set to 0 because the functionality depends on the superuser privilege. .Pp Filenames are looked up in the upper layer and then in the lower layer. @@ -98,8 +135,12 @@ in the upper layer, then a .Em shadow directory will be created in the upper layer. -It will be owned by the user who originally did the union mount, -with mode +The ownership and the mode bits are set depending on +.Fl c +option. In +.Ar traditional +mode, it will be owned by the user who originally did the +union mount, with mode .Dq rwxrwxrwx (0777) modified by the umask in effect at that time. .Pp @@ -142,11 +183,70 @@ .Xr mount 8 which only applies the union operation to the mount point itself, and then only for lookups. +.Sh MASQUERADE MODE +When a file +.Pq or a directory +is created in the upper layer, the +.Ar masquerade +mode sets it the fixed access mode bits given in +.Fl m Pq for files +or +.Fl M Pq for directories +option and the owner given in +.Fl u +and +.Fl g +options, instead of ones in the lower layer. Note that in the +.Ar masquerade +mode and when owner of the file or directory matches +one specified in +.Fl u +option, only mode bits for the owner will be modified. +More specifically, the file mode bits in the upper layer will +be +.Pq mode in the lower layer +OR +.Pq Po mode given in Fl m +AND 0700 +.Pc , and the ownership will be the same as one in the lower layer. +.Pp +The default values for +.Fl m , M , u , +and +.Fl g +are as follow: +.Pp +.Bl -bullet -compact +.It +If both +.Fl m +and +.Fl M +are not specified, access mode bits in the mount point will be used. +.It +If both +.Fl u +and +.Fl g +are not specified, ownership in the mount point will be used. +.It +If either +.Fl m +or +.Fl M +is not specified, the other will be the same as the specified one. +.It +If either +.Fl u +or +.Fl g +is not specified, the other will be the same as the specified one. +.El .Sh EXAMPLES The commands .Bd -literal -offset indent mount -t cd9660 -o ro /dev/cd0a /usr/src -mount -t unionfs /var/obj /usr/src +mount -t unionfs -o noatime /var/obj /usr/src .Ed .Pp mount the CD-ROM drive @@ -158,11 +258,42 @@ on top. For most purposes the effect of this is to make the source tree appear writable -even though it is stored on a CD-ROM. +even though it is stored on a CD-ROM. The +.Fl o Ar noatime +option is useful to avoid unnecessary copying from the lower to the +upper layer. +.Pp +The commands +.Bd -literal -offset indent +mount -t cd9660 -o ro /dev/cd0a /usr/src +chown builder /usr/src +mount -t unionfs -o noatime -c masquerade -u builder \\ + -M 755 -m 644 /var/obj /usr/src +.Ed +.Pp +also mount the CD-ROM drive +.Pa /dev/cd0a +on +.Pa /usr/src +and then attaches +.Pa /var/obj +on top. Furthermore, the owner of all files and directories in /usr/src +is a regular user +.Pq builder +when seen from the upper layer. Note that for the access mode bits, +ones in the lower layer +.Pq on the CD-ROM, in this example +are still used without change. +Thus, write privilege to the upper layer can be controlled +independently from access mode bits and ownership in the lower layer. +If a user does not have read privilege from the lower layer, +one cannot still read even when the upper layer is mounted by using +.Ar masquerade +mode. .Pp The command .Bd -literal -offset indent -mount -t unionfs -o -b /sys $HOME/sys +mount -t unionfs -o noatime -b /sys $HOME/sys .Ed .Pp attaches the system source tree below the @@ -186,8 +317,19 @@ .Nm utility first appeared in .Bx 4.4 . -It first worked in -.Fx Ns -(fill this in) . +.Pp +The +.Fl r +option for hiding the lower layer completely is no longer supported +because the same functionality has already provided by +.Xr mount_nullfs 8 . +.Sh AUTHORS +In +.Fx 7.0 , +.An Masanori OZAWA Aq ozawa@ongs.co.jp +reimplemented handling of locking, whiteout, and file mode bits, and +.An Hiroki Sato Aq hrs@FreeBSD.org +wrote about the changes in this manual page. .Sh BUGS THIS FILE SYSTEM TYPE IS NOT YET FULLY SUPPORTED (READ: IT DOESN'T WORK) AND USING IT MAY, IN FACT, DESTROY DATA ON YOUR SYSTEM. @@ -214,3 +356,20 @@ .Xr find 1 over a union tree has the side-effect of creating a tree of shadow directories in the upper layer. +.Pp +The current implementation does not support copying extended attributes +for +.Xr acl 9 , +.Xr mac 9 , +or so on to the upper layer. Note that this may be a security issue. +.Pp +A shadow directory, which is one automatically created in the upper +layer when it exists in the lower layer and does not exist in the +upper layer, is always created with the superuser privilege. +However, a file copied from the lower layer in the same way +is created by the user who accessed it. Because of this, +if the user is not the superuser, even in +.Ar transparent +mode the access mode bits in the copied file in the upper layer +will not always be the same as ones in the lower layer. +This behavior should be fixed. diff -urBN /usr/src.orig/sbin/mount_unionfs/mount_unionfs.c /usr/src/sbin/mount_unionfs/mount_unionfs.c --- /usr/src.orig/sbin/mount_unionfs/mount_unionfs.c Fri Jun 10 18:51:43 2005 +++ /usr/src/sbin/mount_unionfs/mount_unionfs.c Mon Jul 10 22:37:50 2006 @@ -1,6 +1,9 @@ -/* +/*- * Copyright (c) 1992, 1993, 1994 - * The Regents of the University of California. All rights reserved. + * The Regents of the University of California. + * Copyright (c) 2005, 2006 Masanori Ozawa , ONGS Inc. + * Copyright (c) 2006 Daichi Goto + * All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. @@ -48,6 +51,7 @@ #include #include #include +#include #include #include @@ -55,6 +59,10 @@ #include #include #include +#include +#include + +#include #include "mntopts.h" @@ -65,44 +73,105 @@ static int subdir(const char *, const char *); static void usage (void) __dead2; +static void check_root(); +static gid_t parse_gid(const char *s); +static uid_t parse_uid(const char *s); +static mode_t parse_mask(const char *s); -int -main(argc, argv) - int argc; - char *argv[]; -{ - struct iovec iov[8]; - int ch, mntflags; - char source[MAXPATHLEN]; - char target[MAXPATHLEN]; - int iovcnt; +int +main(int argc, char *argv[]) +{ + struct iovec iov[18]; + int ch , mntflags; + char source [MAXPATHLEN]; + char target [MAXPATHLEN]; + int iovcnt; + unionfs_copymode copymode; + uid_t uid; + gid_t gid; + mode_t ufile; + mode_t udir; iovcnt = 6; mntflags = 0; - while ((ch = getopt(argc, argv, "bo:r")) != -1) + while ((ch = getopt(argc, argv, "bc:o:m:M:u:g:")) != -1) { switch (ch) { case 'b': - iov[6].iov_base = "below"; - iov[6].iov_len = strlen(iov[6].iov_base) + 1; - iov[7].iov_base = NULL; - iov[7].iov_len = 0; - iovcnt = 8; + iov[iovcnt].iov_base = "below"; + iov[iovcnt].iov_len = strlen(iov[iovcnt].iov_base) + 1; + iov[iovcnt + 1].iov_base = NULL; + iov[iovcnt + 1].iov_len = 0; + iovcnt += 2; + break; + case 'c': + if (!optarg) + usage(); + else if (!strcasecmp("traditional", optarg)) + copymode = UNIONFS_TRADITIONAL; + else if (!strcasecmp("masquerade", optarg)) + copymode = UNIONFS_MASQUERADE; + else if (!strcasecmp("transparent", optarg)) + copymode = UNIONFS_TRANSPARENT; + else + usage(); + check_root(); + iov[iovcnt].iov_base = "copymode"; + iov[iovcnt].iov_len = strlen(iov[iovcnt].iov_base) + 1; + iov[iovcnt + 1].iov_base = ©mode; + iov[iovcnt + 1].iov_len = sizeof(copymode); + iovcnt += 2; break; case 'o': getmntopts(optarg, mopts, &mntflags, 0); break; - case 'r': - iov[6].iov_base = "replace"; - iov[6].iov_len = strlen(iov[6].iov_base) + 1; - iov[7].iov_base = NULL; - iov[7].iov_len = 0; - iovcnt = 8; + case 'm': + if (!optarg) + usage(); + ufile = parse_mask(optarg); + iov[iovcnt].iov_base = "ufile"; + iov[iovcnt].iov_len = strlen(iov[iovcnt].iov_base) + 1; + iov[iovcnt + 1].iov_base = &ufile; + iov[iovcnt + 1].iov_len = sizeof(ufile); + iovcnt += 2; + break; + case 'M': + if (!optarg) + usage(); + udir = parse_mask(optarg); + iov[iovcnt].iov_base = "udir"; + iov[iovcnt].iov_len = strlen(iov[iovcnt].iov_base) + 1; + iov[iovcnt + 1].iov_base = &udir; + iov[iovcnt + 1].iov_len = sizeof(udir); + iovcnt += 2; + break; + case 'u': + check_root(); + if (!optarg) + usage(); + uid = parse_uid(optarg); + iov[iovcnt].iov_base = "uid"; + iov[iovcnt].iov_len = strlen(iov[iovcnt].iov_base) + 1; + iov[iovcnt + 1].iov_base = &uid; + iov[iovcnt + 1].iov_len = sizeof(uid); + iovcnt += 2; + break; + case 'g': + check_root(); + if (!optarg) + usage(); + gid = parse_gid(optarg); + iov[iovcnt].iov_base = "gid"; + iov[iovcnt].iov_len = strlen(iov[iovcnt].iov_base) + 1; + iov[iovcnt + 1].iov_base = &gid; + iov[iovcnt + 1].iov_len = sizeof(gid); + iovcnt += 2; break; case '?': default: usage(); /* NOTREACHED */ } + } argc -= optind; argv += optind; @@ -115,7 +184,7 @@ if (subdir(target, source) || subdir(source, target)) errx(EX_USAGE, "%s (%s) and %s (%s) are not distinct paths", - argv[0], target, argv[1], source); + argv[0], target, argv[1], source); iov[0].iov_base = "fstype"; iov[0].iov_len = strlen(iov[0].iov_base) + 1; @@ -134,12 +205,10 @@ exit(0); } -int -subdir(p, dir) - const char *p; - const char *dir; +static int +subdir(const char *p, const char *dir) { - int l; + int l; l = strlen(dir); if (l <= 1) @@ -151,10 +220,85 @@ return (0); } -void +static void usage() { (void)fprintf(stderr, - "usage: mount_unionfs [-br] [-o options] directory uniondir\n"); + "usage: mount_unionfs [-b] [-c ]\n" + " [-m mask] [-M mask] [-u uid] [-g gid]\n" + " [-o options] directory uniondir\n"); exit(EX_USAGE); +} + +static void +check_root() +{ + if (getuid()) { + errno = EACCES; + perror("you need the root account."); + } +} + +static gid_t +parse_gid(const char *s) +{ + struct group *gr; + const char *gname; + gid_t gid; + + if ((gr = getgrnam(s)) != NULL) + gid = gr->gr_gid; + else { + for (gname = s; *s && isdigit((int)*s); s++); + if (!*s) + gid = atoi(gname); + else { + errx(EX_NOUSER, "unknown group id: %s", gname); + usage(); + } + } + + return gid; +} + +static uid_t +parse_uid(const char *s) +{ + struct passwd *pw; + const char *uname; + uid_t uid; + + if ((pw = getpwnam(s)) != NULL) + uid = pw->pw_uid; + else { + for (uname = s; *s && isdigit((int)*s); s++); + if (!*s) + uid = atoi(uname); + else { + errx(EX_NOUSER, "unknown user id: %s", uname); + usage(); + } + } + + return uid; +} + +static mode_t +parse_mask(const char *s) +{ + int done, rv; + char *ep; + + done = 0; + rv = -1; + if (*s >= '0' && *s <= '7') { + done = 1; + rv = strtol(optarg, &ep, 8); + } + if (!done || rv < 0 || *ep) { + errx(EX_USAGE, "invalid file mode: %s", s); + usage(); + } + + return rv; } diff -urBN /usr/src.orig/sys/fs/unionfs/union.h /usr/src/sys/fs/unionfs/union.h --- /usr/src.orig/sys/fs/unionfs/union.h Wed Aug 10 16:10:02 2005 +++ /usr/src/sys/fs/unionfs/union.h Mon Jul 10 22:37:35 2006 @@ -1,140 +1,117 @@ /*- + * union.h + * * Copyright (c) 1994 The Regents of the University of California. * Copyright (c) 1994 Jan-Simon Pendry. + * Copyright (c) 2005, 2006 Masanori Ozawa , ONGS Inc. + * Copyright (c) 2006 Daichi Goto * All rights reserved. * - * This code is derived from software donated to Berkeley by - * Jan-Simon Pendry. + * This code includes some code from pre union.h derived from + * software donated to Berkeley by Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: + * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. * - * @(#)union.h 8.9 (Berkeley) 12/10/94 - * $FreeBSD: src/sys/fs/unionfs/union.h,v 1.32 2005/08/10 07:10:02 obrien Exp $ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#define UNMNT_ABOVE 0x0001 /* Target appears above mount point */ -#define UNMNT_BELOW 0x0002 /* Target appears below mount point */ -#define UNMNT_REPLACE 0x0003 /* Target replaces mount point */ - -struct union_mount { - struct vnode *um_uppervp; /* UN_ULOCK holds locking state */ - struct vnode *um_lowervp; /* Left unlocked */ - struct ucred *um_cred; /* Credentials of user calling mount */ - int um_cmode; /* cmask from mount process */ - int um_op; /* Operation mode */ - dev_t um_upperdev; /* Upper root node fsid[0]*/ +/* copy method of attr from lower to upper */ +typedef enum _unionfs_copymode { + UNIONFS_TRADITIONAL = 0, + UNIONFS_TRANSPARENT, + UNIONFS_MASQUERADE +} unionfs_copymode; + +struct unionfs_mount { + struct vnode *um_lowervp; /* VREFed once */ + struct vnode *um_uppervp; /* VREFed once */ + struct vnode *um_rootvp; /* ROOT vnode */ + unionfs_copymode um_copymode; + uid_t um_uid; + gid_t um_gid; + u_short um_udir; + u_short um_ufile; }; #ifdef _KERNEL - -#ifndef DIAGNOSTIC -#define DIAGNOSTIC -#endif +/* A cache of vnode references */ +struct unionfs_node { + LIST_ENTRY(unionfs_node) un_hash; /* Hash list */ + struct vnode *un_lowervp; /* lower side vnode */ + struct vnode *un_uppervp; /* upper side vnode */ + struct vnode *un_dvp; /* parent unionfs vnode */ + struct vnode *un_vnode; /* Back pointer */ + char *un_path; /* path */ + int un_lower_opencnt; /* open count for lowervp */ + int un_upper_opencnt; /* open count for uppervp */ + int un_readdir_flag; /* status flag for readdir */ + int un_flag; /* unionfs node flag */ +}; /* - * DEFDIRMODE is the mode bits used to create a shadow directory. + * unionfs node flags */ -#define VRWXMODE (VREAD|VWRITE|VEXEC) -#define VRWMODE (VREAD|VWRITE) -#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6)) -#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6)) +#define UNIONFS_CACHED 0x01 /* is chached */ +#define UNIONFS_OPENEXTL 0x02 /* openextattr (lower) */ +#define UNIONFS_OPENEXTU 0x04 /* openextattr (upper) */ + +#define MOUNTTOUNIONFSMOUNT(mp) ((struct unionfs_mount *)((mp)->mnt_data)) +#define VTOUNIONFS(vp) ((struct unionfs_node *)(vp)->v_data) +#define UNIONFSTOV(xp) ((xp)->un_vnode) + +int unionfs_init(struct vfsconf *vfsp); +int unionfs_uninit(struct vfsconf *vfsp); +int unionfs_nodeget(struct mount *mp, struct vnode *uppervp, struct vnode *lowervp, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct thread *td); +void unionfs_hashrem(struct vnode *vp, struct thread *td); +void unionfs_create_uppervattr_core(struct unionfs_mount *ump, struct vattr *lva, struct vattr *uva, struct thread *td); +int unionfs_create_uppervattr(struct unionfs_mount *ump, struct vnode *lvp, struct vattr *uva, struct ucred *cred, struct thread *td); +int unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp, struct thread *td); +int unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp, struct thread *td); +int unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp, struct thread *td); +int unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *duvp, struct unionfs_node *unp, struct componentname *cnp, struct thread *td); +int unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp, struct thread *td, char *path); +int unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred, struct thread *td); +int unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td); -/* - * A cache of vnode references (hangs off v_data) - */ -struct union_node { - LIST_ENTRY(union_node) un_cache; /* Hash chain */ - struct vnode *un_vnode; /* Back pointer */ - struct vnode *un_uppervp; /* overlaying object */ - struct vnode *un_lowervp; /* underlying object */ - struct vnode *un_dirvp; /* Parent dir of uppervp */ - struct vnode *un_pvp; /* Parent vnode */ - char *un_path; /* saved component name */ - int un_openl; /* # of opens on lowervp */ - int un_exclcnt; /* exclusive count */ - unsigned int un_flags; - struct vnode **un_dircache; /* cached union stack */ - off_t un_uppersz; /* size of upper object */ - off_t un_lowersz; /* size of lower object */ #ifdef DIAGNOSTIC - pid_t un_pid; +struct vnode *unionfs_checklowervp(struct vnode *vp, char *fil, int lno); +struct vnode *unionfs_checkuppervp(struct vnode *vp, char *fil, int lno); +#define UNIONFSVPTOLOWERVP(vp) unionfs_checklowervp((vp), __FILE__, __LINE__) +#define UNIONFSVPTOUPPERVP(vp) unionfs_checkuppervp((vp), __FILE__, __LINE__) +#else +#define UNIONFSVPTOLOWERVP(vp) (VTOUNIONFS(vp)->un_lowervp) +#define UNIONFSVPTOUPPERVP(vp) (VTOUNIONFS(vp)->un_uppervp) #endif -}; -/* - * XXX UN_ULOCK - indicates that the uppervp is locked - * - * UN_CACHED - node is in the union cache - */ - -/*#define UN_ULOCK 0x04*/ /* Upper node is locked */ -#define UN_CACHED 0x10 /* In union cache */ +extern struct vop_vector unionfs_vnodeops; -/* - * Hash table locking flags - */ - -#define UNVP_WANT 0x01 -#define UNVP_LOCKED 0x02 - -extern int union_allocvp(struct vnode **, struct mount *, - struct vnode *, - struct vnode *, - struct componentname *, struct vnode *, - struct vnode *, int); -extern int union_freevp(struct vnode *); -extern struct vnode *union_dircache_get(struct vnode *, struct thread *); -extern void union_dircache_free(struct union_node *); -extern int union_copyup(struct union_node *, int, struct ucred *, - struct thread *); -extern int union_dowhiteout(struct union_node *, struct ucred *, - struct thread *); -extern int union_mkshadow(struct union_mount *, struct vnode *, - struct componentname *, struct vnode **); -extern int union_mkwhiteout(struct union_mount *, struct vnode *, - struct componentname *, char *); -extern int union_cn_close(struct vnode *, int, struct ucred *, - struct thread *); -extern void union_removed_upper(struct union_node *un); -extern struct vnode *union_lowervp(struct vnode *); -extern void union_newsize(struct vnode *, off_t, off_t); - -extern int (*union_dircheckp)(struct thread *, struct vnode **, - struct file *); - -#define MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data)) -#define VTOUNION(vp) ((struct union_node *)(vp)->v_data) -#define UNIONTOV(un) ((un)->un_vnode) -#define LOWERVP(vp) (VTOUNION(vp)->un_lowervp) -#define UPPERVP(vp) (VTOUNION(vp)->un_uppervp) -#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp)) - -#define UDEBUG(x) if (uniondebug) printf x -#define UDEBUG_ENABLED 1 +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_UNIONFSNODE); +MALLOC_DECLARE(M_UNIONFSPATH); +#endif -extern struct vop_vector union_vnodeops; -extern int uniondebug; +#ifdef UNIONFS_DEBUG +#define UNIONFSDEBUG(format, args...) printf(format ,## args) +#else +#define UNIONFSDEBUG(format, args...) +#endif /* UNIONFS_DEBUG */ -#endif /* _KERNEL */ +#endif /* _KERNEL */ diff -urBN /usr/src.orig/sys/fs/unionfs/union_subr.c /usr/src/sys/fs/unionfs/union_subr.c --- /usr/src.orig/sys/fs/unionfs/union_subr.c Wed Apr 27 18:06:06 2005 +++ /usr/src/sys/fs/unionfs/union_subr.c Mon Jul 10 22:37:35 2006 @@ -1,817 +1,604 @@ /*- - * Copyright (c) 1994 Jan-Simon Pendry - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. + * union_subr.c * - * This code is derived from software contributed to Berkeley by - * Jan-Simon Pendry. + * Copyright (c) 2005, 2006 Masanori Ozawa , ONGS Inc. + * Copyright (c) 2006 Daichi Goto + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: + * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. * - * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 - * $FreeBSD: src/sys/fs/unionfs/union_subr.c,v 1.86 2005/04/27 09:06:06 jeff Exp $ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include -#include -#include -#include #include #include +#include #include -#include #include -#include #include -#include +#include #include +#include +#include +#include +#include +#include + +#ifdef MAC +#include +#endif -#include -#include /* for vnode_pager_setsize */ -#include /* for vm cache coherency */ #include #include -#include - -extern int union_init(void); +#define NUNIONFSNODECACHE 32 -/* must be power of two, otherwise change UNION_HASH() */ -#define NHASH 32 +#define UNIONFS_NHASH(upper, lower) \ + (&unionfs_node_hashtbl[(((uintptr_t)upper + (uintptr_t)lower) >> 8) & unionfs_node_hash]) -/* unsigned int ... */ -#define UNION_HASH(u, l) \ - (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) - -static MALLOC_DEFINE(M_UNPATH, "unpath", "UNION path component"); -static MALLOC_DEFINE(M_UNDCACHE, "undcac", "UNION directory cache"); - -static LIST_HEAD(unhead, union_node) unhead[NHASH]; -static int unvplock[NHASH]; - -static void union_dircache_r(struct vnode *vp, struct vnode ***vppp, - int *cntp); -static int union_list_lock(int ix); -static void union_list_unlock(int ix); -static int union_relookup(struct union_mount *um, struct vnode *dvp, - struct vnode **vpp, - struct componentname *cnp, - struct componentname *cn, char *path, - int pathlen); -static void union_updatevp(struct union_node *un, - struct vnode *uppervp, - struct vnode *lowervp); -static void union_newlower(struct union_node *, struct vnode *); -static void union_newupper(struct union_node *, struct vnode *); -static int union_copyfile(struct vnode *, struct vnode *, - struct ucred *, struct thread *); -static int union_vn_create(struct vnode **, struct union_node *, - struct thread *); -static int union_vn_close(struct vnode *, int, struct ucred *, - struct thread *); +static LIST_HEAD(unionfs_node_hashhead, unionfs_node) *unionfs_node_hashtbl; +static u_long unionfs_node_hash; +struct mtx unionfs_hashmtx; + +static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table"); +MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part"); +MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part"); -int -union_init() +/* + * Initialize cache headers + */ +int +unionfs_init(struct vfsconf *vfsp) { - int i; + UNIONFSDEBUG("unionfs_init\n"); /* printed during system boot */ + unionfs_node_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH, &unionfs_node_hash); + mtx_init(&unionfs_hashmtx, "unionfs", NULL, MTX_DEF); - for (i = 0; i < NHASH; i++) - LIST_INIT(&unhead[i]); - bzero((caddr_t)unvplock, sizeof(unvplock)); return (0); } -static int -union_list_lock(ix) - int ix; +/* + * Destroy cache headers + */ +int +unionfs_uninit(struct vfsconf *vfsp) { - if (unvplock[ix] & UNVP_LOCKED) { - unvplock[ix] |= UNVP_WANT; - (void) tsleep( &unvplock[ix], PINOD, "unllck", 0); - return (1); - } - unvplock[ix] |= UNVP_LOCKED; + mtx_destroy(&unionfs_hashmtx); + free(unionfs_node_hashtbl, M_UNIONFSHASH); return (0); } -static void -union_list_unlock(ix) - int ix; -{ - unvplock[ix] &= ~UNVP_LOCKED; - - if (unvplock[ix] & UNVP_WANT) { - unvplock[ix] &= ~UNVP_WANT; - wakeup( &unvplock[ix]); - } -} - /* - * union_updatevp: - * - * The uppervp, if not NULL, must be referenced and not locked by us - * The lowervp, if not NULL, must be referenced. - * - * If uppervp and lowervp match pointers already installed, then - * nothing happens. The passed vp's (when matching) are not adjusted. - * - * This routine may only be called by union_newupper() and - * union_newlower(). + * Return a VREF'ed alias for unionfs vnode if already exists, else 0. */ +static struct vnode * +unionfs_hashget(struct mount *mp, struct vnode *uppervp, + struct vnode *lowervp, struct vnode *dvp, char *path, + int lkflags, struct thread *td) +{ + struct unionfs_node_hashhead *hd; + struct unionfs_node *unp; + struct vnode *vp; + + if (lkflags & LK_TYPE_MASK) + lkflags |= LK_RETRY; + hd = UNIONFS_NHASH(uppervp, lowervp); -static void -union_updatevp(un, uppervp, lowervp) - struct union_node *un; - struct vnode *uppervp; - struct vnode *lowervp; -{ - int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); - int nhash = UNION_HASH(uppervp, lowervp); - int docache = (lowervp != NULLVP || uppervp != NULLVP); - int lhash, uhash; - - /* - * Ensure locking is ordered from lower to higher - * to avoid deadlocks. - */ - if (nhash < ohash) { - lhash = nhash; - uhash = ohash; - } else { - lhash = ohash; - uhash = nhash; - } - - if (lhash != uhash) { - while (union_list_lock(lhash)) - continue; - } - - while (union_list_lock(uhash)) - continue; - - if (ohash != nhash || !docache) { - if (un->un_flags & UN_CACHED) { - un->un_flags &= ~UN_CACHED; - LIST_REMOVE(un, un_cache); - } - } - - if (ohash != nhash) - union_list_unlock(ohash); +loop: + mtx_lock(&unionfs_hashmtx); + LIST_FOREACH(unp, hd, un_hash) { + if (unp->un_uppervp == uppervp && + unp->un_lowervp == lowervp && + unp->un_dvp == dvp && + UNIONFSTOV(unp)->v_mount == mp && + (!path || !(unp->un_path) || !strcmp(unp->un_path, path))) { + vp = UNIONFSTOV(unp); + VI_LOCK(vp); - if (un->un_lowervp != lowervp) { - if (un->un_lowervp) { - vrele(un->un_lowervp); - if (un->un_path) { - free(un->un_path, M_UNPATH); - un->un_path = 0; + /* + * If the unionfs node is being recycled we have to + * wait until it finishes prior to scanning again. + */ + mtx_unlock(&unionfs_hashmtx); + if (vp->v_iflag & VI_DOOMED) { + /* Wait for recycling to finish. */ + vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); + VOP_UNLOCK(vp, 0, td); + goto loop; } - } - un->un_lowervp = lowervp; - un->un_lowersz = VNOVAL; - } - - if (un->un_uppervp != uppervp) { - if (un->un_uppervp) - vrele(un->un_uppervp); - un->un_uppervp = uppervp; - un->un_uppersz = VNOVAL; - } + /* + * We need to clear the OWEINACT flag here as this + * may lead vget() to try to lock our vnode which is + * already locked via vp. + */ + vp->v_iflag &= ~VI_OWEINACT; + vget(vp, lkflags | LK_INTERLOCK, td); - if (docache && (ohash != nhash)) { - LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); - un->un_flags |= UN_CACHED; + return (vp); + } } - union_list_unlock(nhash); -} - -/* - * Set a new lowervp. The passed lowervp must be referenced and will be - * stored in the vp in a referenced state. - */ - -static void -union_newlower(un, lowervp) - struct union_node *un; - struct vnode *lowervp; -{ - union_updatevp(un, un->un_uppervp, lowervp); -} + mtx_unlock(&unionfs_hashmtx); -/* - * Set a new uppervp. The passed uppervp must be locked and will be - * stored in the vp in a locked state. The caller should not unlock - * uppervp. - */ - -static void -union_newupper(un, uppervp) - struct union_node *un; - struct vnode *uppervp; -{ - union_updatevp(un, uppervp, un->un_lowervp); + return (NULLVP); } /* - * Keep track of size changes in the underlying vnodes. - * If the size changes, then callback to the vm layer - * giving priority to the upper layer size. + * Act like unionfs_hashget, but add passed unionfs_node to hash if no existing + * node found. */ -void -union_newsize(vp, uppersz, lowersz) - struct vnode *vp; - off_t uppersz, lowersz; -{ - struct union_node *un; - off_t sz; +static struct vnode * +unionfs_hashins(struct mount *mp, struct unionfs_node *uncp, + char *path, int lkflags, struct thread *td) +{ + struct unionfs_node_hashhead *hd; + struct unionfs_node *unp; + struct vnode *vp; + + if (lkflags & LK_TYPE_MASK) + lkflags |= LK_RETRY; + hd = UNIONFS_NHASH(uncp->un_uppervp, uncp->un_lowervp); - /* only interested in regular files */ - if (vp->v_type != VREG) - return; - - un = VTOUNION(vp); - sz = VNOVAL; +loop: + mtx_lock(&unionfs_hashmtx); + LIST_FOREACH(unp, hd, un_hash) { + if (unp->un_uppervp == uncp->un_uppervp && + unp->un_lowervp == uncp->un_lowervp && + unp->un_dvp == uncp->un_dvp && + UNIONFSTOV(unp)->v_mount == mp && + (!path || !(unp->un_path) || !strcmp(unp->un_path, path))) { + vp = UNIONFSTOV(unp); + VI_LOCK(vp); + + mtx_unlock(&unionfs_hashmtx); + if (vp->v_iflag & VI_DOOMED) { + /* Wait for recycling to finish. */ + vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); + VOP_UNLOCK(vp, 0, td); + goto loop; + } + vp->v_iflag &= ~VI_OWEINACT; + vget(vp, lkflags | LK_INTERLOCK, td); - if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { - un->un_uppersz = uppersz; - if (sz == VNOVAL) - sz = un->un_uppersz; + return (vp); + } } - if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { - un->un_lowersz = lowersz; - if (sz == VNOVAL) - sz = un->un_lowersz; - } + LIST_INSERT_HEAD(hd, uncp, un_hash); + uncp->un_flag |= UNIONFS_CACHED; + mtx_unlock(&unionfs_hashmtx); - if (sz != VNOVAL) { - UDEBUG(("union: %s size now %ld\n", - (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); - /* - * There is no need to change size of non-existent object. - */ - /* vnode_pager_setsize(vp, sz); */ - } + return (NULLVP); } /* - * union_allocvp: allocate a union_node and associate it with a - * parent union_node and one or two vnodes. - * - * vpp Holds the returned vnode locked and referenced if no - * error occurs. - * - * mp Holds the mount point. mp may or may not be busied. - * allocvp() makes no changes to mp. - * - * dvp Holds the parent union_node to the one we wish to create. - * XXX may only be used to traverse an uncopied lowervp-based - * tree? XXX - * - * dvp may or may not be locked. allocvp() makes no changes - * to dvp. - * - * upperdvp Holds the parent vnode to uppervp, generally used along - * with path component information to create a shadow of - * lowervp when uppervp does not exist. - * - * upperdvp is referenced but unlocked on entry, and will be - * dereferenced on return. - * - * uppervp Holds the new uppervp vnode to be stored in the - * union_node we are allocating. uppervp is referenced but - * not locked, and will be dereferenced on return. - * - * lowervp Holds the new lowervp vnode to be stored in the - * union_node we are allocating. lowervp is referenced but - * not locked, and will be dereferenced on return. + * Make a new or get existing unionfs node. * - * cnp Holds path component information to be coupled with - * lowervp and upperdvp to allow unionfs to create an uppervp - * later on. Only used if lowervp is valid. The contents - * of cnp is only valid for the duration of the call. - * - * docache Determine whether this node should be entered in the - * cache or whether it should be destroyed as soon as possible. - * - * All union_nodes are maintained on a singly-linked - * list. New nodes are only allocated when they cannot - * be found on this list. Entries on the list are - * removed when the vfs reclaim entry is called. - * - * A single lock is kept for the entire list. This is - * needed because the getnewvnode() function can block - * waiting for a vnode to become free, in which case there - * may be more than one process trying to get the same - * vnode. This lock is only taken if we are going to - * call getnewvnode(), since the kernel itself is single-threaded. - * - * If an entry is found on the list, then call vget() to - * take a reference. This is done because there may be - * zero references to it and so it needs to removed from - * the vnode free list. + * uppervp and lowervp should be unlocked. Because if new unionfs vnode is + * locked, uppervp or lowervp is locked too. In order to prevent dead lock, + * you should not lock plurality simultaneously. */ - int -union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) - struct vnode **vpp; - struct mount *mp; - struct vnode *dvp; /* parent union vnode */ - struct vnode *upperdvp; /* parent vnode of uppervp */ - struct componentname *cnp; /* may be null */ - struct vnode *uppervp; /* may be null */ - struct vnode *lowervp; /* may be null */ - int docache; -{ - int error; - struct union_node *un = 0; - struct union_mount *um = MOUNTTOUNIONMOUNT(mp); - struct thread *td = (cnp) ? cnp->cn_thread : curthread; - int hash = 0; - int vflag; - int try; - - if (uppervp == NULLVP && lowervp == NULLVP) - panic("union: unidentifiable allocation"); +unionfs_nodeget(struct mount *mp, struct vnode *uppervp, + struct vnode *lowervp, struct vnode *dvp, + struct vnode **vpp, struct componentname *cnp, + struct thread *td) +{ + struct unionfs_mount *ump; + struct unionfs_node *unp; + struct vnode *vp; + int error; + int lkflags; + char *path; + + ump = MOUNTTOUNIONFSMOUNT(mp); + lkflags = (cnp ? cnp->cn_lkflags : 0); + path = (cnp ? cnp->cn_nameptr : ""); + + if (NULLVP == uppervp && NULLVP == lowervp) + panic("unionfs_nodeget: upper and lower is null"); + + /* If it has no ISLASTCN flag, path check is skipped. */ + if (!cnp || !(cnp->cn_flags & ISLASTCN)) + path = NULL; + + /* Lookup the hash firstly */ + *vpp = unionfs_hashget(mp, uppervp, lowervp, dvp, path, lkflags, td); + if (*vpp != NULLVP) + return (0); - if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { - vrele(lowervp); - lowervp = NULLVP; + if ((uppervp == NULLVP || ump->um_uppervp != uppervp) || + (lowervp == NULLVP || ump->um_lowervp != lowervp)) { + if (NULLVP == dvp) + return (EINVAL); } - /* detect the root vnode (and aliases) */ - vflag = 0; - if ((uppervp == um->um_uppervp) && - ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { - if (lowervp == NULLVP) { - lowervp = um->um_lowervp; - if (lowervp != NULLVP) - VREF(lowervp); - } - vflag = VV_ROOT; - } - -loop: - if (!docache) { - un = 0; - } else for (try = 0; try < 3; try++) { - switch (try) { - case 0: - if (lowervp == NULLVP) - continue; - hash = UNION_HASH(uppervp, lowervp); - break; - - case 1: - if (uppervp == NULLVP) - continue; - hash = UNION_HASH(uppervp, NULLVP); - break; - - case 2: - if (lowervp == NULLVP) - continue; - hash = UNION_HASH(NULLVP, lowervp); - break; - } - - while (union_list_lock(hash)) - continue; - - LIST_FOREACH(un, &unhead[hash], un_cache) { - if ((un->un_lowervp == lowervp || - un->un_lowervp == NULLVP) && - (un->un_uppervp == uppervp || - un->un_uppervp == NULLVP) && - (UNIONTOV(un)->v_mount == mp)) { - if (vget(UNIONTOV(un), 0, - cnp ? cnp->cn_thread : NULL)) { - union_list_unlock(hash); - goto loop; - } - break; - } - } - - union_list_unlock(hash); + /* + * Do the MALLOC before the getnewvnode since doing so afterward + * might cause a bogus v_data pointer to get dereferenced elsewhere + * if MALLOC should block. + */ + MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node), + M_UNIONFSNODE, M_WAITOK | M_ZERO); - if (un) - break; + error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp); + if (error) { + FREE(unp, M_UNIONFSNODE); + return (error); } + if (NULLVP != dvp) + vref(dvp); + if (NULLVP != uppervp) + vref(uppervp); + if (NULLVP != lowervp) + vref(lowervp); + + unp->un_vnode = vp; + unp->un_uppervp = uppervp; + unp->un_lowervp = lowervp; + unp->un_dvp = dvp; + if (NULLVP != uppervp) + vp->v_vnlock = uppervp->v_vnlock; + else + vp->v_vnlock = lowervp->v_vnlock; - if (un) { - /* - * Obtain a lock on the union_node. Everything is unlocked - * except for dvp, so check that case. If they match, our - * new un is already locked. Otherwise we have to lock our - * new un. - * - * A potential deadlock situation occurs when we are holding - * one lock while trying to get another. We must follow - * strict ordering rules to avoid it. We try to locate dvp - * by scanning up from un_vnode, since the most likely - * scenario is un being under dvp. - */ - - if (dvp && un->un_vnode != dvp) { - struct vnode *scan = un->un_vnode; - - do { - scan = VTOUNION(scan)->un_pvp; - } while (scan && scan->v_op == &union_vnodeops && - scan != dvp); - if (scan != dvp) { - /* - * our new un is above dvp (we never saw dvp - * while moving up the tree). - */ - VREF(dvp); - VOP_UNLOCK(dvp, 0, td); - error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); - vrele(dvp); - } else { - /* - * our new un is under dvp - */ - error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); - } - } else if (dvp == NULLVP) { - /* - * dvp is NULL, we need to lock un. - */ - error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); - } else { - /* - * dvp == un->un_vnode, we are already locked. - */ - error = 0; - } - - if (error) - goto loop; - - /* - * At this point, the union_node is locked and referenced. - * - * uppervp is locked and referenced or NULL, lowervp is - * referenced or NULL. - */ - UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", - un, un->un_vnode, un->un_uppervp, - (un->un_uppervp ? vrefcnt(un->un_uppervp) : -99), - uppervp, - (uppervp ? vrefcnt(uppervp) : -99) - )); - - if (uppervp != un->un_uppervp) { - KASSERT(uppervp == NULL || vrefcnt(uppervp) > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", vrefcnt(uppervp))); - union_newupper(un, uppervp); - } else if (uppervp) { - KASSERT(vrefcnt(uppervp) > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", vrefcnt(uppervp))); + if (cnp) { + unp->un_path = (char *) + malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK | M_ZERO); + bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen); + unp->un_path[cnp->cn_namelen] = '\0'; + } + vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type); + vp->v_data = unp; + + if ((uppervp != NULLVP && ump->um_uppervp == uppervp) && + (lowervp != NULLVP && ump->um_lowervp == lowervp)) + vp->v_vflag |= VV_ROOT; + + *vpp = unionfs_hashins(mp, unp, path, lkflags, td); + if (NULLVP != *vpp) { + if (NULLVP != dvp) + vrele(dvp); + if (NULLVP != uppervp) vrele(uppervp); - } - - /* - * Save information about the lower layer. - * This needs to keep track of pathname - * and directory information which union_vn_create() - * might need. - */ - if (lowervp != un->un_lowervp) { - union_newlower(un, lowervp); - if (cnp && (lowervp != NULLVP)) { - un->un_path = malloc(cnp->cn_namelen+1, - M_UNPATH, M_WAITOK); - bcopy(cnp->cn_nameptr, un->un_path, - cnp->cn_namelen); - un->un_path[cnp->cn_namelen] = '\0'; - } - } else if (lowervp) { + if (NULLVP != lowervp) vrele(lowervp); - } - /* - * and upperdvp - */ - if (upperdvp != un->un_dirvp) { - if (un->un_dirvp) - vrele(un->un_dirvp); - un->un_dirvp = upperdvp; - } else if (upperdvp) { - vrele(upperdvp); - } + unp->un_uppervp = unp->un_lowervp = unp->un_dvp = NULLVP; + vrele(vp); - *vpp = UNIONTOV(un); return (0); } - if (docache) { - /* - * Otherwise lock the vp list while we call getnewvnode() - * since that can block. - */ - hash = UNION_HASH(uppervp, lowervp); + if (lkflags & LK_TYPE_MASK) + vn_lock(vp, lkflags | LK_RETRY, td); - if (union_list_lock(hash)) - goto loop; - } + *vpp = vp; + + return (0); +} + +/* + * Remove node from hash. + */ +void +unionfs_hashrem(struct vnode *vp, struct thread *td) +{ + int vfslocked; + struct unionfs_node *unp; + struct lock *vnlock; + struct vnode *lowervp; + struct vnode *uppervp; /* - * Create new node rather than replace old node. + * Use the interlock to protect the clearing of v_data to + * prevent faults in unionfs_lock(). */ + VI_LOCK(vp); + unp = VTOUNIONFS(vp); + lowervp = unp->un_lowervp; + uppervp = unp->un_uppervp; + unp->un_lowervp = unp->un_uppervp = NULLVP; + + vnlock = vp->v_vnlock; + vp->v_vnlock = &(vp->v_lock); + vp->v_data = NULL; + lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp), td); + lockmgr(vnlock, LK_RELEASE, NULL, td); + + mtx_lock(&unionfs_hashmtx); + if (unp->un_flag & UNIONFS_CACHED) { + LIST_REMOVE(unp, un_hash); + unp->un_flag &= ~UNIONFS_CACHED; + } + mtx_unlock(&unionfs_hashmtx); + vp->v_object = NULL; - error = getnewvnode("union", mp, &union_vnodeops, vpp); - if (error) { - /* - * If an error occurs, clear out vnodes. - */ - if (lowervp) - vrele(lowervp); - if (uppervp) - vrele(uppervp); - if (upperdvp) - vrele(upperdvp); - *vpp = NULL; - goto out; + if (NULLVP != lowervp) { + vfslocked = VFS_LOCK_GIANT(lowervp->v_mount); + vrele(lowervp); + VFS_UNLOCK_GIANT(vfslocked); } + if (NULLVP != uppervp) { + vfslocked = VFS_LOCK_GIANT(uppervp->v_mount); + vrele(uppervp); + VFS_UNLOCK_GIANT(vfslocked); + } + if (NULLVP != unp->un_dvp) { + vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount); + vrele(unp->un_dvp); + VFS_UNLOCK_GIANT(vfslocked); + unp->un_dvp = NULLVP; + } + if (unp->un_path) { + free(unp->un_path, M_UNIONFSPATH); + unp->un_path = NULL; + } + FREE(unp, M_UNIONFSNODE); +} - MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), - M_TEMP, M_WAITOK); - - (*vpp)->v_vflag |= vflag; - if (uppervp) - (*vpp)->v_type = uppervp->v_type; - else - (*vpp)->v_type = lowervp->v_type; +/* + * Create upper node attr. + */ +void +unionfs_create_uppervattr_core(struct unionfs_mount *ump, + struct vattr *lva, + struct vattr *uva, + struct thread *td) +{ + VATTR_NULL(uva); + uva->va_type = lva->va_type; + uva->va_atime = lva->va_atime; + uva->va_mtime = lva->va_mtime; + uva->va_ctime = lva->va_ctime; + + switch (ump->um_copymode) { + case UNIONFS_TRANSPARENT: + uva->va_mode = lva->va_mode; + uva->va_uid = lva->va_uid; + uva->va_gid = lva->va_gid; + break; + case UNIONFS_MASQUERADE: + if (ump->um_uid == lva->va_uid) { + uva->va_mode = lva->va_mode & 077077; + uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700; + uva->va_uid = lva->va_uid; + uva->va_gid = lva->va_gid; + } else { + uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile); + uva->va_uid = ump->um_uid; + uva->va_gid = ump->um_gid; + } + break; + default: /* UNIONFS_TRADITIONAL */ + FILEDESC_LOCK_FAST(td->td_proc->p_fd); + uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); + uva->va_uid = ump->um_uid; + uva->va_gid = ump->um_gid; + break; + } +} - un = VTOUNION(*vpp); - bzero(un, sizeof(*un)); +/* + * Create upper node attr. + */ +int +unionfs_create_uppervattr(struct unionfs_mount *ump, + struct vnode *lvp, + struct vattr *uva, + struct ucred *cred, + struct thread *td) +{ + int error; + struct vattr lva; - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); + if ((error = VOP_GETATTR(lvp, &lva, cred, td))) + return (error); - un->un_vnode = *vpp; - un->un_uppervp = uppervp; - un->un_uppersz = VNOVAL; - un->un_lowervp = lowervp; - un->un_lowersz = VNOVAL; - un->un_dirvp = upperdvp; - un->un_pvp = dvp; /* only parent dir in new allocation */ - if (dvp != NULLVP) - VREF(dvp); - un->un_dircache = NULL; - un->un_openl = 0; - - if (cnp && (lowervp != NULLVP)) { - un->un_path = malloc(cnp->cn_namelen+1, M_UNPATH, M_WAITOK); - bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); - un->un_path[cnp->cn_namelen] = '\0'; - } else { - un->un_path = NULL; - un->un_dirvp = NULL; - } - - if (docache) { - LIST_INSERT_HEAD(&unhead[hash], un, un_cache); - un->un_flags |= UN_CACHED; - } - -out: - if (docache) - union_list_unlock(hash); + unionfs_create_uppervattr_core(ump, &lva, uva, td); return (error); } -int -union_freevp(vp) - struct vnode *vp; +/* + * relookup + * + * dvp should be locked on entry and will be locked on return. + * + * If an error is returned, *vpp will be invalid, otherwise it will hold a + * locked, referenced vnode. If *vpp == dvp then remember that only one + * LK_EXCLUSIVE lock is held. + */ +static int +unionfs_relookup(struct vnode *dvp, struct vnode **vpp, + struct componentname *cnp, struct componentname *cn, + struct thread *td, char *path, int pathlen, u_long nameiop) { - struct union_node *un = VTOUNION(vp); + int error; - if (un->un_flags & UN_CACHED) { - un->un_flags &= ~UN_CACHED; - LIST_REMOVE(un, un_cache); - } + cn->cn_namelen = pathlen; + cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); + bcopy(path, cn->cn_pnbuf, pathlen); + cn->cn_pnbuf[pathlen] = '\0'; - if (un->un_pvp != NULLVP) { - vrele(un->un_pvp); - un->un_pvp = NULL; - } - if (un->un_uppervp != NULLVP) { - vrele(un->un_uppervp); - un->un_uppervp = NULL; - } - if (un->un_lowervp != NULLVP) { - vrele(un->un_lowervp); - un->un_lowervp = NULL; - } - if (un->un_dirvp != NULLVP) { - vrele(un->un_dirvp); - un->un_dirvp = NULL; - } - if (un->un_path) { - free(un->un_path, M_UNPATH); - un->un_path = NULL; - } + cn->cn_nameiop = nameiop; + cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN); + cn->cn_lkflags = LK_EXCLUSIVE; + cn->cn_thread = td; + cn->cn_cred = cnp->cn_cred; - FREE(vp->v_data, M_TEMP); - vp->v_data = 0; - vp->v_object = NULL; + cn->cn_nameptr = cn->cn_pnbuf; + cn->cn_consume = cnp->cn_consume; - return (0); + if (DELETE == nameiop) + cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART)); + else if (RENAME == nameiop) + cn->cn_flags |= (cnp->cn_flags & SAVESTART); + + vref(dvp); + VOP_UNLOCK(dvp, 0, td); + + if ((error = relookup(dvp, vpp, cn))) { + uma_zfree(namei_zone, cn->cn_pnbuf); + cn->cn_flags &= ~HASBUF; + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); + } else + vrele(dvp); + + return (error); } /* - * copyfile. Copy the vnode (fvp) to the vnode (tvp) - * using a sequence of reads and writes. Both (fvp) - * and (tvp) are locked on entry and exit. + * relookup for CREATE namei operation. + * + * dvp is unionfs vnode. dvp should be locked. * - * fvp and tvp are both exclusive locked on call, but their refcount's - * haven't been bumped at all. + * If it called 'unionfs_copyfile' function by unionfs_link etc, + * VOP_LOOKUP information is broken. + * So it need relookup in order to create link etc. */ -static int -union_copyfile(fvp, tvp, cred, td) - struct vnode *fvp; - struct vnode *tvp; - struct ucred *cred; - struct thread *td; -{ - char *buf; - struct uio uio; - struct iovec iov; - int error = 0; +int +unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp, + struct thread *td) +{ + int error; + struct vnode *udvp; + struct vnode *vp; + struct componentname cn; - /* - * strategy: - * Allocate a buffer of size MAXBSIZE. - * Loop doing reads and writes, keeping track - * of the current uio offset. - * Give up at the first sign of trouble. - */ + udvp = UNIONFSVPTOUPPERVP(dvp); + vp = NULLVP; - bzero(&uio, sizeof(uio)); + error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, + strlen(cnp->cn_nameptr), CREATE); + if (error) + return (error); - uio.uio_td = td; - uio.uio_segflg = UIO_SYSSPACE; - uio.uio_offset = 0; + if (NULLVP != vp) { + if (udvp == vp) + vrele(vp); + else + vput(vp); - VOP_LEASE(fvp, td, cred, LEASE_READ); - VOP_LEASE(tvp, td, cred, LEASE_WRITE); + error = EEXIST; + } - buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); + if (cn.cn_flags & HASBUF) { + uma_zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; + } - /* ugly loop follows... */ - do { - off_t offset = uio.uio_offset; - int count; - int bufoffset; + if (!error) { + cn.cn_flags |= (cnp->cn_flags & HASBUF); + cnp->cn_flags = cn.cn_flags; + } - /* - * Setup for big read. - */ - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - iov.iov_base = buf; - iov.iov_len = MAXBSIZE; - uio.uio_resid = iov.iov_len; - uio.uio_rw = UIO_READ; + return (error); +} - if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) - break; +/* + * relookup for DELETE namei operation. + * + * dvp is unionfs vnode. dvp should be locked. + */ +int +unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp, + struct thread *td) +{ + int error; + struct vnode *udvp; + struct vnode *vp; + struct componentname cn; - /* - * Get bytes read, handle read eof case and setup for - * write loop. - */ - if ((count = MAXBSIZE - uio.uio_resid) == 0) - break; - bufoffset = 0; + udvp = UNIONFSVPTOUPPERVP(dvp); + vp = NULLVP; - /* - * Write until an error occurs or our buffer has been - * exhausted, then update the offset for the next read. - */ - while (bufoffset < count) { - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - iov.iov_base = buf + bufoffset; - iov.iov_len = count - bufoffset; - uio.uio_offset = offset + bufoffset; - uio.uio_rw = UIO_WRITE; - uio.uio_resid = iov.iov_len; + error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, + strlen(cnp->cn_nameptr), DELETE); + if (error) + return (error); - if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) - break; - bufoffset += (count - bufoffset) - uio.uio_resid; - } - uio.uio_offset = offset + bufoffset; - } while (error == 0); + if (NULLVP == vp) + error = ENOENT; + else { + if (udvp == vp) + vrele(vp); + else + vput(vp); + } + + if (cn.cn_flags & HASBUF) { + uma_zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; + } + + if (!error) { + cn.cn_flags |= (cnp->cn_flags & HASBUF); + cnp->cn_flags = cn.cn_flags; + } - free(buf, M_TEMP); return (error); } /* + * relookup for RENAME namei operation. * - * un's vnode is assumed to be locked on entry and remains locked on exit. + * dvp is unionfs vnode. dvp should be locked. */ - int -union_copyup(un, docopy, cred, td) - struct union_node *un; - int docopy; - struct ucred *cred; - struct thread *td; +unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp, + struct thread *td) { int error; - struct mount *mp; - struct vnode *lvp, *uvp; + struct vnode *udvp; + struct vnode *vp; + struct componentname cn; - /* - * If the user does not have read permission, the vnode should not - * be copied to upper layer. - */ - vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_ACCESS(un->un_lowervp, VREAD, cred, td); - VOP_UNLOCK(un->un_lowervp, 0, td); + udvp = UNIONFSVPTOUPPERVP(dvp); + vp = NULLVP; + + error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, + strlen(cnp->cn_nameptr), RENAME); if (error) return (error); - if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0) - return (error); - if ((error = union_vn_create(&uvp, un, td)) != 0) { - vn_finished_write(mp); - return (error); + if (NULLVP != vp) { + if (udvp == vp) + vrele(vp); + else + vput(vp); } - lvp = un->un_lowervp; - - KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); - if (docopy) { - /* - * XX - should not ignore errors - * from VOP_CLOSE() - */ - vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_OPEN(lvp, FREAD, cred, td, -1); - if (error == 0) { - error = union_copyfile(lvp, uvp, cred, td); - VOP_UNLOCK(lvp, 0, td); - (void) VOP_CLOSE(lvp, FREAD, cred, td); - } - if (error == 0) - UDEBUG(("union: copied up %s\n", un->un_path)); - + if (cn.cn_flags & HASBUF) { + uma_zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; } - VOP_UNLOCK(uvp, 0, td); - vn_finished_write(mp); - union_newupper(un, uvp); - KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); - union_vn_close(uvp, FWRITE, cred, td); - KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); - /* - * Subsequent IOs will go to the top layer, so - * call close on the lower vnode and open on the - * upper vnode to ensure that the filesystem keeps - * its references counts right. This doesn't do - * the right thing with (cred) and (FREAD) though. - * Ignoring error returns is not right, either. - */ - if (error == 0) { - int i; - for (i = 0; i < un->un_openl; i++) { - (void) VOP_CLOSE(lvp, FREAD, cred, td); - (void) VOP_OPEN(uvp, FREAD, cred, td, -1); - } - un->un_openl = 0; + if (!error) { + cn.cn_flags |= (cnp->cn_flags & HASBUF); + cnp->cn_flags = cn.cn_flags; } return (error); @@ -819,550 +605,549 @@ } /* - * union_relookup: - * - * dvp should be locked on entry and will be locked on return. No - * net change in the ref count will occur. - * - * If an error is returned, *vpp will be invalid, otherwise it - * will hold a locked, referenced vnode. If *vpp == dvp then - * remember that only one exclusive lock is held. + * Update the unionfs_node. + * + * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the + * uvp's lock and lower's lock will be unlocked. */ - -static int -union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) - struct union_mount *um; - struct vnode *dvp; - struct vnode **vpp; - struct componentname *cnp; - struct componentname *cn; - char *path; - int pathlen; +static void +unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp, + struct thread *td) { - int error; - - /* - * A new componentname structure must be faked up because - * there is no way to know where the upper level cnp came - * from or what it is being used for. This must duplicate - * some of the work done by NDINIT(), some of the work done - * by namei(), some of the work done by lookup() and some of - * the work done by VOP_LOOKUP() when given a CREATE flag. - * Conclusion: Horrible. - */ - cn->cn_namelen = pathlen; - cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); - bcopy(path, cn->cn_pnbuf, cn->cn_namelen); - cn->cn_pnbuf[cn->cn_namelen] = '\0'; - - cn->cn_nameiop = CREATE; - cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); - cn->cn_thread = cnp->cn_thread; - if (um->um_op == UNMNT_ABOVE) - cn->cn_cred = cnp->cn_cred; - else - cn->cn_cred = um->um_cred; - cn->cn_nameptr = cn->cn_pnbuf; - cn->cn_consume = cnp->cn_consume; + int count, lockcnt; + struct vnode *vp; + struct vnode *lvp; - VREF(dvp); - VOP_UNLOCK(dvp, 0, cnp->cn_thread); + vp = UNIONFSTOV(unp); + lvp = unp->un_lowervp; /* - * Pass dvp unlocked and referenced on call to relookup(). - * - * If an error occurs, dvp will be returned unlocked and dereferenced. + * lock update */ - - if ((error = relookup(dvp, vpp, cn)) != 0) { - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread); - return(error); - } + VI_LOCK(vp); + unp->un_uppervp = uvp; + lockcnt = lvp->v_vnlock->lk_exclusivecount; + for (count = 0; count < lockcnt; count++) + VOP_UNLOCK(lvp, 0, td); + vp->v_vnlock = uvp->v_vnlock; + VI_UNLOCK(vp); + for (count = 1; count < lockcnt; count++) + vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td); /* - * If no error occurs, dvp will be returned locked with the reference - * left as before, and vpp will be returned referenced and locked. - * - * We want to return with dvp as it was passed to us, so we get - * rid of our reference. + * cache update */ - vrele(dvp); - return (0); + mtx_lock(&unionfs_hashmtx); + if (unp->un_flag & UNIONFS_CACHED) + LIST_REMOVE(unp, un_hash); + LIST_INSERT_HEAD(UNIONFS_NHASH(uvp, lvp), unp, un_hash); + unp->un_flag |= UNIONFS_CACHED; + mtx_unlock(&unionfs_hashmtx); } /* - * Create a shadow directory in the upper layer. - * The new vnode is returned locked. - * - * (um) points to the union mount structure for access to the - * the mounting process's credentials. - * (dvp) is the directory in which to create the shadow directory, - * It is locked (but not ref'd) on entry and return. - * (cnp) is the component name to be created. - * (vpp) is the returned newly created shadow directory, which - * is returned locked and ref'd + * Create a new shadow dir. + * + * udvp should be locked on entry and will be locked on return. + * + * If no error returned, unp will be updated. */ int -union_mkshadow(um, dvp, cnp, vpp) - struct union_mount *um; - struct vnode *dvp; - struct componentname *cnp; - struct vnode **vpp; -{ - int error; - struct vattr va; - struct thread *td = cnp->cn_thread; +unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp, + struct unionfs_node *unp, struct componentname *cnp, + struct thread *td) +{ + int error; + struct vnode *lvp; + struct vnode *uvp; + struct vattr va; + struct vattr lva; struct componentname cn; - struct mount *mp; + struct mount *mp; + struct ucred *cred; + struct ucred *credbk; + struct uidinfo *rootinfo; - if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) - return (error); - if ((error = union_relookup(um, dvp, vpp, cnp, &cn, - cnp->cn_nameptr, cnp->cn_namelen)) != 0) { - vn_finished_write(mp); - return (error); - } + if (NULLVP != unp->un_uppervp) + return (EEXIST); - if (*vpp) { - if (cn.cn_flags & HASBUF) { - uma_zfree(namei_zone, cn.cn_pnbuf); - cn.cn_flags &= ~HASBUF; - } - if (dvp == *vpp) - vrele(*vpp); + lvp = unp->un_lowervp; + uvp = NULLVP; + credbk = cnp->cn_cred; + + /* Authority change to root */ + rootinfo = uifind((uid_t)0); + cred = crdup(cnp->cn_cred); + chgproccnt(cred->cr_ruidinfo, 1, 0); + change_euid(cred, rootinfo); + change_ruid(cred, rootinfo); + change_svuid(cred, (uid_t)0); + uifree(rootinfo); + cnp->cn_cred = cred; + + memset(&cn, 0, sizeof(cn)); + + if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td))) + goto unionfs_mkshadowdir_abort; + + if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE))) + goto unionfs_mkshadowdir_abort; + if (NULLVP != uvp) { + if (udvp == uvp) + vrele(uvp); else - vput(*vpp); + vput(uvp); + + error = EEXIST; + goto unionfs_mkshadowdir_free_out; + } + + if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH))) + goto unionfs_mkshadowdir_free_out; + if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) { vn_finished_write(mp); - *vpp = NULLVP; - return (EEXIST); + goto unionfs_mkshadowdir_free_out; } + unionfs_create_uppervattr_core(ump, &lva, &va, td); - /* - * Policy: when creating the shadow directory in the - * upper layer, create it owned by the user who did - * the mount, group from parent directory, and mode - * 777 modified by umask (ie mostly identical to the - * mkdir syscall). (jsp, kb) - */ + error = VOP_MKDIR(udvp, &uvp, &cn, &va); - VATTR_NULL(&va); - va.va_type = VDIR; - va.va_mode = um->um_cmode; + if (!error) { + unionfs_node_update(unp, uvp, td); - /* VOP_LEASE: dvp is locked */ - VOP_LEASE(dvp, td, cn.cn_cred, LEASE_WRITE); + /* + * XXX The bug which cannot set uid/gid was corrected. Ignore + * errors. + */ + va.va_type = VNON; + VOP_SETATTR(uvp, &va, cn.cn_cred, td); + } + vn_finished_write(mp); - error = VOP_MKDIR(dvp, vpp, &cn, &va); +unionfs_mkshadowdir_free_out: if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } - /*vput(dvp);*/ - vn_finished_write(mp); + +unionfs_mkshadowdir_abort: + cnp->cn_cred = credbk; + chgproccnt(cred->cr_ruidinfo, -1, 0); + crfree(cred); + return (error); } /* - * Create a whiteout entry in the upper layer. - * - * (um) points to the union mount structure for access to the - * the mounting process's credentials. - * (dvp) is the directory in which to create the whiteout. - * It is locked on entry and return. - * (cnp) is the component name to be created. + * Create a new whiteout. + * + * dvp should be locked on entry and will be locked on return. */ int -union_mkwhiteout(um, dvp, cnp, path) - struct union_mount *um; - struct vnode *dvp; - struct componentname *cnp; - char *path; +unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp, + struct thread *td, char *path) { - int error; - struct thread *td = cnp->cn_thread; - struct vnode *wvp; + int error; + struct vnode *wvp; struct componentname cn; - struct mount *mp; + struct mount *mp; - if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) - return (error); - error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); - if (error) { - vn_finished_write(mp); - return (error); - } + if (!path) + path = cnp->cn_nameptr; - if (wvp) { + if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE))) + return (error); + if (NULLVP != wvp) { if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } - if (wvp == dvp) + if (dvp == wvp) vrele(wvp); else vput(wvp); - vn_finished_write(mp); + return (EEXIST); } - /* VOP_LEASE: dvp is locked */ - VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE); + if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH))) + goto unionfs_mkwhiteout_free_out; + if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE))) + error = VOP_WHITEOUT(dvp, &cn, CREATE); + + vn_finished_write(mp); - error = VOP_WHITEOUT(dvp, &cn, CREATE); +unionfs_mkwhiteout_free_out: if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } - vn_finished_write(mp); + return (error); } /* - * union_vn_create: creates and opens a new shadow file - * on the upper union layer. This function is similar - * in spirit to calling vn_open() but it avoids calling namei(). - * The problem with calling namei() is that a) it locks too many - * things, and b) it doesn't start at the "right" directory, - * whereas relookup() is told where to start. - * - * On entry, the vnode associated with un is locked. It remains locked - * on return. - * - * If no error occurs, *vpp contains a locked referenced vnode for your - * use. If an error occurs *vpp iis undefined. + * Create a new vnode for create a new shadow file. + * + * If an error is returned, *vpp will be invalid, otherwise it will hold a + * locked, referenced and opened vnode. + * + * unp is never updated. */ static int -union_vn_create(vpp, un, td) - struct vnode **vpp; - struct union_node *un; - struct thread *td; +unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp, + struct unionfs_node *unp, struct thread *td) { - struct vnode *vp; - struct ucred *cred = td->td_ucred; - struct vattr vat; - struct vattr *vap = &vat; - int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); - int error; - int cmode; + struct unionfs_mount *ump; + struct vnode *vp; + struct vnode *lvp; + struct ucred *cred; + struct vattr va; + struct vattr lva; + int fmode; + int error; struct componentname cn; - *vpp = NULLVP; - FILEDESC_LOCK_FAST(td->td_proc->p_fd); - cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask; - FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); + ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount); + vp = NULLVP; + lvp = unp->un_lowervp; + cred = td->td_ucred; + fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL); + error = 0; - /* - * Build a new componentname structure (for the same - * reasons outlines in union_mkshadow()). - * The difference here is that the file is owned by - * the current user, rather than by the person who - * did the mount, since the current user needs to be - * able to write the file (that's why it is being - * copied in the first place). - */ - cn.cn_namelen = strlen(un->un_path); + if ((error = VOP_GETATTR(lvp, &lva, cred, td))) + return (error); + unionfs_create_uppervattr_core(ump, &lva, &va, td); + + if (!unp->un_path) + panic("unionfs: un_path is null"); + + cn.cn_namelen = strlen(unp->un_path); cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); - bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); + bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1); cn.cn_nameiop = CREATE; - cn.cn_flags = ISOPEN|LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN; + cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN); + cn.cn_lkflags = LK_EXCLUSIVE; cn.cn_thread = td; - cn.cn_cred = td->td_ucred; + cn.cn_cred = cred; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_consume = 0; - /* - * Pass dvp unlocked and referenced on call to relookup(). - * - * If an error occurs, dvp will be returned unlocked and dereferenced. - */ - VREF(un->un_dirvp); - error = relookup(un->un_dirvp, &vp, &cn); - if (error) - return (error); + vref(udvp); + if ((error = relookup(udvp, &vp, &cn))) + goto unionfs_vn_create_on_upper_free_out2; + vrele(udvp); - /* - * If no error occurs, dvp will be returned locked with the reference - * left as before, and vpp will be returned referenced and locked. - */ - if (vp) { - vput(un->un_dirvp); - if (cn.cn_flags & HASBUF) { - uma_zfree(namei_zone, cn.cn_pnbuf); - cn.cn_flags &= ~HASBUF; - } - if (vp == un->un_dirvp) + if (NULLVP != vp) { + if (vp == udvp) vrele(vp); else vput(vp); - return (EEXIST); + error = EEXIST; + goto unionfs_vn_create_on_upper_free_out1; } - /* - * Good - there was no race to create the file - * so go ahead and create it. The permissions - * on the file will be 0666 modified by the - * current user's umask. Access to the file, while - * it is unioned, will require access to the top *and* - * bottom files. Access when not unioned will simply - * require access to the top-level file. - * TODO: confirm choice of access permissions. - */ - VATTR_NULL(vap); - vap->va_type = VREG; - vap->va_mode = cmode; - VOP_LEASE(un->un_dirvp, td, cred, LEASE_WRITE); - error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); - if (cn.cn_flags & HASBUF) { - uma_zfree(namei_zone, cn.cn_pnbuf); - cn.cn_flags &= ~HASBUF; - } - vput(un->un_dirvp); - if (error) - return (error); + if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE))) + goto unionfs_vn_create_on_upper_free_out1; - error = VOP_OPEN(vp, fmode, cred, td, -1); - if (error) { + if ((error = VOP_CREATE(udvp, &vp, &cn, &va))) + goto unionfs_vn_create_on_upper_free_out1; + + if ((error = VOP_OPEN(vp, fmode, cred, td, -1))) { vput(vp); - return (error); + goto unionfs_vn_create_on_upper_free_out1; } vp->v_writecount++; *vpp = vp; - return (0); -} -static int -union_vn_close(vp, fmode, cred, td) - struct vnode *vp; - int fmode; - struct ucred *cred; - struct thread *td; -{ +unionfs_vn_create_on_upper_free_out1: + VOP_UNLOCK(udvp, 0, td); + +unionfs_vn_create_on_upper_free_out2: + if (cn.cn_flags & HASBUF) { + uma_zfree(namei_zone, cn.cn_pnbuf); + cn.cn_flags &= ~HASBUF; + } - if (fmode & FWRITE) - --vp->v_writecount; - return (VOP_CLOSE(vp, fmode, cred, td)); + return (error); } /* - * union_removed_upper: - * - * An upper-only file/directory has been removed; un-cache it so - * that unionfs vnode gets reclaimed and the last uppervp reference - * disappears. - * - * Called with union_node unlocked. + * Copy from lvp to uvp. + * + * lvp and uvp should be locked and opened on entry and will be locked and + * opened on return. */ - -void -union_removed_upper(un) - struct union_node *un; +static int +unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp, + struct ucred *cred, struct thread *td) { - if (un->un_flags & UN_CACHED) { - int hash = UNION_HASH(un->un_uppervp, un->un_lowervp); + int error; + off_t offset; + int count; + int bufoffset; + char *buf; + struct uio uio; + struct iovec iov; - while (union_list_lock(hash)) - continue; - un->un_flags &= ~UN_CACHED; - LIST_REMOVE(un, un_cache); - union_list_unlock(hash); - } -} + error = 0; + memset(&uio, 0, sizeof(uio)); -/* - * Determine whether a whiteout is needed - * during a remove/rmdir operation. - */ -int -union_dowhiteout(un, cred, td) - struct union_node *un; - struct ucred *cred; - struct thread *td; -{ - struct vattr va; - - if (un->un_lowervp != NULLVP) - return (1); - - if (VOP_GETATTR(un->un_uppervp, &va, cred, td) == 0 && - (va.va_flags & OPAQUE)) - return (1); + uio.uio_td = td; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_offset = 0; - return (0); -} + if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ))) + return (error); + if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE))) + return (error); + buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); -static void -union_dircache_r(vp, vppp, cntp) - struct vnode *vp; - struct vnode ***vppp; - int *cntp; -{ - struct union_node *un; + while (!error) { + offset = uio.uio_offset; - if (vp->v_op != &union_vnodeops) { - if (vppp) { - VREF(vp); - *(*vppp)++ = vp; - if (--(*cntp) == 0) - panic("union: dircache table too small"); - } else { - (*cntp)++; - } - } else { - un = VTOUNION(vp); - if (un->un_uppervp != NULLVP) - union_dircache_r(un->un_uppervp, vppp, cntp); - if (un->un_lowervp != NULLVP) - union_dircache_r(un->un_lowervp, vppp, cntp); - } -} + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + iov.iov_base = buf; + iov.iov_len = MAXBSIZE; + uio.uio_resid = iov.iov_len; + uio.uio_rw = UIO_READ; -struct vnode * -union_dircache_get(vp, td) - struct vnode *vp; - struct thread *td; -{ - int cnt; - struct vnode *nvp; - struct vnode **vpp; - struct vnode **dircache, **newdircache; - struct union_node *un; - int error; + if ((error = VOP_READ(lvp, &uio, 0, cred))) + break; + if ((count = MAXBSIZE - uio.uio_resid) == 0) + break; + + bufoffset = 0; + while (bufoffset < count) { + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + iov.iov_base = buf + bufoffset; + iov.iov_len = count - bufoffset; + uio.uio_offset = offset + bufoffset; + uio.uio_resid = iov.iov_len; + uio.uio_rw = UIO_WRITE; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - un = VTOUNION(vp); - dircache = un->un_dircache; - newdircache = NULL; - - nvp = NULLVP; - - if (dircache == NULL) { - cnt = 0; - union_dircache_r(vp, 0, &cnt); - cnt++; - newdircache = dircache = malloc(cnt * sizeof(struct vnode *), - M_UNDCACHE, M_WAITOK); - vpp = dircache; - union_dircache_r(vp, &vpp, &cnt); - *vpp = NULLVP; - vpp = dircache + 1; - } else { - vpp = dircache; - do { - if (*vpp++ == un->un_uppervp) + if ((error = VOP_WRITE(uvp, &uio, 0, cred))) break; - } while (*vpp != NULLVP); + + bufoffset += (count - bufoffset) - uio.uio_resid; + } + + uio.uio_offset = offset + bufoffset; } - if (*vpp == NULLVP) - goto out; + free(buf, M_TEMP); + + return (error); +} + +/* + * Copy file from lower to upper. + * + * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to + * docopy. + * + * If no error returned, unp will be updated. + */ +int +unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred, + struct thread *td) +{ + int error; + struct mount *mp; + struct vnode *udvp; + struct vnode *lvp; + struct vnode *uvp; + + lvp = unp->un_lowervp; + uvp = NULLVP; + + if (NULLVP == unp->un_dvp) + return (EINVAL); + if (NULLVP != unp->un_uppervp) + return (EEXIST); + udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp; + if (NULLVP == udvp) + return (EROFS); - /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);*/ - UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? vrefcnt(*vpp) : -99))); - VREF(*vpp); - error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); - UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? vrefcnt(*vpp) : -99))); + error = VOP_ACCESS(lvp, VREAD, cred, td); if (error) - goto out; + return (error); - un->un_dircache = NULL; - VTOUNION(nvp)->un_dircache = dircache; - newdircache = NULL; + if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH))) + return (error); + if ((error = unionfs_vn_create_on_upper(&uvp, udvp, unp, td))) { + vn_finished_write(mp); + return (error); + } -out: - /* - * If we allocated a new dircache and couldn't attach - * it to a new vp, free the resources we allocated. - */ - if (newdircache) { - for (vpp = newdircache; *vpp != NULLVP; vpp++) - vrele(*vpp); - free(newdircache, M_UNDCACHE); + if (docopy) { + error = VOP_OPEN(lvp, FREAD, cred, td, -1); + if (!error) { + error = unionfs_copyfile_core(lvp, uvp, cred, td); + VOP_CLOSE(lvp, FREAD, cred, td); + } } + VOP_CLOSE(uvp, FWRITE, cred, td); + uvp->v_writecount--; - VOP_UNLOCK(vp, 0, td); - return (nvp); -} + vn_finished_write(mp); -void -union_dircache_free(struct union_node *un) -{ - struct vnode **vpp; + unionfs_node_update(unp, uvp, td); - for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) - vrele(*vpp); - free(un->un_dircache, M_UNDCACHE); - un->un_dircache = NULL; + return (error); } /* - * Module glue to remove #ifdef UNION from vfs_syscalls.c + * It checks whether vp can rmdir. (check empty) + * + * vp is unionfs vnode. + * vp should be locked. */ -static int -union_dircheck(struct thread *td, struct vnode **vp, struct file *fp) +int +unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td) { - int error = 0; + int error; + int eofflag; + int lookuperr; + struct vnode *uvp; + struct vnode *lvp; + struct vnode *tvp; + struct vattr va; + struct componentname cn; + /* + * The size of buf needs to be larger than DIRBLKSIZ. + */ + char buf[256 * 6]; + struct dirent *dp; + struct dirent *edp; + struct uio uio; + struct iovec iov; + + eofflag = 0; + uvp = UNIONFSVPTOUPPERVP(vp); + lvp = UNIONFSVPTOLOWERVP(vp); + + /* check opaque */ + if ((error = VOP_GETATTR(uvp, &va, cred, td))) + return (error); + if (va.va_flags & OPAQUE) + return (0); + + uio.uio_rw = UIO_READ; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_td = td; + uio.uio_offset = 0; - if ((*vp)->v_op == &union_vnodeops) { - struct vnode *lvp; +#ifdef MAC + error = mac_check_vnode_readdir(td->td_ucred, lvp); +#endif + while (!error && !eofflag) { + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_resid = iov.iov_len; + + vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); + error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL); + VOP_UNLOCK(lvp, 0, td); + if (error) + break; - lvp = union_dircache_get(*vp, td); - if (lvp != NULLVP) { - struct vattr va; + edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid]; + for (dp = (struct dirent*)buf; !error && dp < edp; + dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) { + if (DT_WHT == dp->d_type || + (1 == dp->d_namlen && dp->d_name[0] == '.') || + (2 == dp->d_namlen && !bcmp(dp->d_name, "..", 2))) + continue; + + cn.cn_namelen = dp->d_namlen; + cn.cn_pnbuf = NULL; + cn.cn_nameptr = dp->d_name; + cn.cn_nameiop = LOOKUP; + cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN); + cn.cn_lkflags = LK_EXCLUSIVE; + cn.cn_thread = td; + cn.cn_cred = cred; + cn.cn_consume = 0; /* - * If the directory is opaque, - * then don't show lower entries + * check entry in lower. + * Sometimes, readdir function returns + * wrong entry. */ - error = VOP_GETATTR(*vp, &va, fp->f_cred, td); - if (va.va_flags & OPAQUE) { - vput(lvp); - lvp = NULLVP; - } - } - - if (lvp != NULLVP) { - error = VOP_OPEN(lvp, FREAD, fp->f_cred, td, -1); - if (error) { - vput(lvp); - return (error); - } + vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); + lookuperr = VOP_LOOKUP(lvp, &tvp, &cn); VOP_UNLOCK(lvp, 0, td); - FILE_LOCK(fp); - fp->f_vnode = lvp; - fp->f_data = lvp; - fp->f_offset = 0; - FILE_UNLOCK(fp); - error = vn_close(*vp, FREAD, fp->f_cred, td); - if (error) - return (error); - *vp = lvp; - return -1; /* goto unionread */ + + if (!lookuperr) + vput(tvp); + else + continue; /* skip entry */ + + /* + * check entry + * If it has no exist/whiteout entry in upper, + * directory is not empty. + */ + cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN); + lookuperr = VOP_LOOKUP(uvp, &tvp, &cn); + + if (!lookuperr) + vput(tvp); + + /* ignore exist or whiteout entry */ + if (!lookuperr || + (ENOENT == lookuperr && (cn.cn_flags & ISWHITEOUT))) + continue; + + error = ENOTEMPTY; } } - return error; + + return (error); } -static int -union_modevent(module_t mod, int type, void *data) +#ifdef DIAGNOSTIC + +struct vnode * +unionfs_checkuppervp(struct vnode *vp, char *fil, int lno) { - switch (type) { - case MOD_LOAD: - union_dircheckp = union_dircheck; - break; - case MOD_UNLOAD: - union_dircheckp = NULL; - break; - default: - return EOPNOTSUPP; - break; - } - return 0; -} + struct unionfs_node *unp; -static moduledata_t union_mod = { - "union_dircheck", - union_modevent, - NULL -}; + unp = VTOUNIONFS(vp); -DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); +#ifdef notyet + if (vp->v_op != unionfs_vnodeop_p) { + printf("unionfs_checkuppervp: on non-unionfs-node.\n"); +#ifdef KDB + kdb_enter("unionfs_checkuppervp: on non-unionfs-node.\n"); +#endif + panic("unionfs_checkuppervp"); + }; +#endif + return (unp->un_uppervp); +} + +struct vnode * +unionfs_checklowervp(struct vnode *vp, char *fil, int lno) +{ + struct unionfs_node *unp; + + unp = VTOUNIONFS(vp); + +#ifdef notyet + if (vp->v_op != unionfs_vnodeop_p) { + printf("unionfs_checklowervp: on non-unionfs-node.\n"); +#ifdef KDB + kdb_enter("unionfs_checklowervp: on non-unionfs-node.\n"); +#endif + panic("unionfs_checklowervp"); + }; +#endif + return (unp->un_lowervp); +} +#endif diff -urBN /usr/src.orig/sys/fs/unionfs/union_vfsops.c /usr/src/sys/fs/unionfs/union_vfsops.c --- /usr/src.orig/sys/fs/unionfs/union_vfsops.c Tue Nov 1 00:41:22 2005 +++ /usr/src/sys/fs/unionfs/union_vfsops.c Mon Jul 10 22:37:35 2006 @@ -1,92 +1,134 @@ /*- - * Copyright (c) 1994, 1995 The Regents of the University of California. - * Copyright (c) 1994, 1995 Jan-Simon Pendry. - * All rights reserved. + * union_vfsops.c * - * This code is derived from software donated to Berkeley by - * Jan-Simon Pendry. + * Copyright (c) 2005, 2006 Masanori Ozawa , ONGS Inc. + * Copyright (c) 2006 Daichi Goto + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: + * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)union_vfsops.c 8.20 (Berkeley) 5/20/95 - * $FreeBSD: src/sys/fs/unionfs/union_vfsops.c,v 1.77 2005/10/31 15:41:22 rwatson Exp $ - */ - -/* - * Union Layer + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include +#include #include #include -#include -#include -#include +#include #include #include -#include -#include +#include +#include +#include + #include -static MALLOC_DEFINE(M_UNIONFSMNT, "union_mount", "UNION mount structure"); +static MALLOC_DEFINE(M_UNIONFSMNT, "UNIONFS mount", "UNIONFS mount structure"); + +static vfs_fhtovp_t unionfs_fhtovp; +static vfs_checkexp_t unionfs_checkexp; +static vfs_mount_t unionfs_domount; +static vfs_quotactl_t unionfs_quotactl; +static vfs_root_t unionfs_root; +static vfs_sync_t unionfs_sync; +static vfs_statfs_t unionfs_statfs; +static vfs_unmount_t unionfs_unmount; +static vfs_vget_t unionfs_vget; +static vfs_vptofh_t unionfs_vptofh; +static vfs_extattrctl_t unionfs_extattrctl; + +static struct vfsops unionfs_vfsops; + +/* + * Exchange from userland file mode to vmode. + */ +static u_short +mode2vmode(mode_t mode) +{ + u_short ret; + + ret = 0; -extern vfs_init_t union_init; -static vfs_root_t union_root; -static vfs_mount_t union_mount; -static vfs_statfs_t union_statfs; -static vfs_unmount_t union_unmount; + /* other */ + if (mode & S_IXOTH) + ret |= VEXEC >> 6; + if (mode & S_IWOTH) + ret |= VWRITE >> 6; + if (mode & S_IROTH) + ret |= VREAD >> 6; + + /* group */ + if (mode & S_IXGRP) + ret |= VEXEC >> 3; + if (mode & S_IWGRP) + ret |= VWRITE >> 3; + if (mode & S_IRGRP) + ret |= VREAD >> 3; + + /* owner */ + if (mode & S_IXUSR) + ret |= VEXEC; + if (mode & S_IWUSR) + ret |= VWRITE; + if (mode & S_IRUSR) + ret |= VREAD; + + return (ret); +} /* - * Mount union filesystem. + * Mount unionfs layer. */ static int -union_mount(mp, td) - struct mount *mp; - struct thread *td; -{ - int error = 0; - struct vfsoptlist *opts; - struct vnode *lowerrootvp = NULLVP; - struct vnode *upperrootvp = NULLVP; - struct union_mount *um = 0; - struct vattr va; - char *cp = 0, *target; - int op; - int len; - size_t size; +unionfs_domount(struct mount *mp, struct thread *td) +{ + int error; + struct vnode *lowerrootvp; + struct vnode *upperrootvp; + struct unionfs_mount *ump; + char *target; + char *tmp; + int len; + size_t done; + int below; + uid_t uid; + gid_t gid; + u_short udir; + u_short ufile; + unionfs_copymode copymode; struct componentname fakecn; - struct nameidata nd, *ndp = &nd; + struct nameidata nd, *ndp; + struct vattr va; - UDEBUG(("union_mount(mp = %p)\n", (void *)mp)); + UNIONFSDEBUG("unionfs_mount(mp = %p)\n", (void *)mp); - opts = mp->mnt_optnew; - /* - * Disable clustered write, otherwise system becomes unstable. - */ - mp->mnt_flag |= MNT_NOCLUSTERW; + error = 0; + below = 0; + uid = 0; + gid = 0; + udir = 0; + ufile = 0; + copymode = UNIONFS_TRADITIONAL; /* default */ + ndp = &nd; if (mp->mnt_flag & MNT_ROOTFS) return (EOPNOTSUPP); @@ -90,278 +132,237 @@ if (mp->mnt_flag & MNT_ROOTFS) return (EOPNOTSUPP); + /* - * Update is a no-op + * Update is a no operation. */ if (mp->mnt_flag & MNT_UPDATE) - /* - * Need to provide: - * 1. a way to convert between rdonly and rdwr mounts. - * 2. support for nfs exports. - */ return (EOPNOTSUPP); /* - * Get arguments. + * Check multi unionfs mount to avoid `lock against myself' panic. */ - error = vfs_getopt(opts, "target", (void **)&target, &len); + lowerrootvp = mp->mnt_vnodecovered; + if (lowerrootvp->v_mount->mnt_op == &unionfs_vfsops) { + UNIONFSDEBUG("unionfs_mount: multi unionfs mount?\n"); + return (EDEADLK); + } + /* + * Get argument (part 1) + */ + error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len); if (error || target[len - 1] != '\0') return (EINVAL); - - op = 0; - if (vfs_getopt(opts, "below", NULL, NULL) == 0) - op = UNMNT_BELOW; - if (vfs_getopt(opts, "replace", NULL, NULL) == 0) { - /* These options are mutually exclusive. */ - if (op) + if (0 == vfs_getopt(mp->mnt_optnew, "below", NULL, NULL)) + below = 1; + if (0 == vfs_getopt(mp->mnt_optnew, "udir", (void **)&tmp, &len)) { + if (len != sizeof(mode_t)) + return (EINVAL); + udir = mode2vmode(*((mode_t *) tmp)); + } + if (0 == vfs_getopt(mp->mnt_optnew, "ufile", (void **)&tmp, &len)) { + if (len != sizeof(mode_t)) return (EINVAL); - op = UNMNT_REPLACE; + ufile = mode2vmode(*((mode_t *) tmp)); } + /* check umask, uid and gid */ + if (!udir && ufile) + udir = ufile; + if (!ufile && udir) + ufile = udir; + + vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY, td); + error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred, td); + if (!error) { + if (!udir) + udir = va.va_mode; + if (!ufile) + ufile = va.va_mode; + uid = va.va_uid; + gid = va.va_gid; + } + VOP_UNLOCK(mp->mnt_vnodecovered, 0, td); + if (error) + return (error); + /* - * UNMNT_ABOVE is the default. + * Get argument (part 2) */ - if (op == 0) - op = UNMNT_ABOVE; + if (0 == mp->mnt_cred->cr_ruid) { /* root only */ + if (0 == vfs_getopt(mp->mnt_optnew, "uid", (void **)&tmp, &len)) { + if (len != sizeof(uid_t)) + return (EINVAL); + uid = *((uid_t *) tmp); + } + if (0 == vfs_getopt(mp->mnt_optnew, "gid", (void **)&tmp, &len)) { + if (len != sizeof(gid_t)) + return (EINVAL); + gid = *((gid_t *) tmp); + } + if (0 == vfs_getopt(mp->mnt_optnew, "copymode", (void **)&tmp, &len)) { + if (len != sizeof(unionfs_copymode)) + return (EINVAL); + copymode = *((unionfs_copymode *) tmp); + } + } + /* If copymode is UNIONFS_TRADITIONAL, uid/gid is mounted user. */ + if (UNIONFS_TRADITIONAL == copymode) { + uid = mp->mnt_cred->cr_ruid; + gid = mp->mnt_cred->cr_rgid; + } /* - * Obtain lower vnode. Vnode is stored in mp->mnt_vnodecovered. - * We need to reference it but not lock it. + * Find upper node */ - lowerrootvp = mp->mnt_vnodecovered; - VREF(lowerrootvp); + NDINIT(ndp, LOOKUP, FOLLOW | WANTPARENT | LOCKLEAF, UIO_SYSSPACE, target, td); + if ((error = namei(ndp))) + return (error); + + NDFREE(ndp, NDF_ONLY_PNBUF); + /* - * Obtain upper vnode by calling namei() on the path. The - * upperrootvp will be turned referenced and locked. + * get root vnodes */ - NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, td); - error = namei(ndp); - if (error) - goto bad; - NDFREE(ndp, NDF_ONLY_PNBUF); upperrootvp = ndp->ni_vp; - UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp, - VOP_ISLOCKED(upperrootvp, NULL))); + vrele(ndp->ni_dvp); + ndp->ni_dvp = NULLVP; /* - * Check multi union mount to avoid `lock myself again' panic. - * Also require that it be a directory. + * Check multi unionfs mount to avoid `lock against myself' panic. + * + * XXX: It is not being checked whether it operates safely. + * + * if (upperrootvp->v_mount->mnt_op == &unionfs_vfsops) { + * UNIONFSDEBUG("unionfs_mount: multi unionfs mount?\n"); + * vput(upperrootvp); return (EDEADLK); } */ - if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) { -#ifdef DIAGNOSTIC - printf("union_mount: multi union mount?\n"); -#endif - error = EDEADLK; - goto bad; - } - if (upperrootvp->v_type != VDIR) { - error = EINVAL; - goto bad; - } + ump = (struct unionfs_mount *)malloc(sizeof(struct unionfs_mount), + M_UNIONFSMNT, M_WAITOK | M_ZERO); /* - * Allocate our union_mount structure and populate the fields. - * The vnode references are stored in the union_mount as held, - * unlocked references. Depending on the _BELOW flag, the - * filesystems are viewed in a different order. In effect this - * is the same as providing a mount-under option to the mount - * syscall. + * Save reference */ - - um = (struct union_mount *) malloc(sizeof(struct union_mount), - M_UNIONFSMNT, M_WAITOK | M_ZERO); - - um->um_op = op; - - error = VOP_GETATTR(upperrootvp, &va, td->td_ucred, td); - if (error) - goto bad; - - um->um_upperdev = va.va_fsid; - - switch (um->um_op) { - case UNMNT_ABOVE: - um->um_lowervp = lowerrootvp; - um->um_uppervp = upperrootvp; - upperrootvp = NULL; - lowerrootvp = NULL; - break; - - case UNMNT_BELOW: + if (below) { VOP_UNLOCK(upperrootvp, 0, td); - vn_lock(lowerrootvp, LK_RETRY|LK_EXCLUSIVE, td); - um->um_lowervp = upperrootvp; - um->um_uppervp = lowerrootvp; - upperrootvp = NULL; - lowerrootvp = NULL; - break; - - case UNMNT_REPLACE: - vrele(lowerrootvp); - lowerrootvp = NULL; - um->um_uppervp = upperrootvp; - um->um_lowervp = lowerrootvp; - upperrootvp = NULL; - break; - - default: - error = EINVAL; - goto bad; + vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, td); + ump->um_lowervp = upperrootvp; + ump->um_uppervp = lowerrootvp; + } else { + ump->um_lowervp = lowerrootvp; + ump->um_uppervp = upperrootvp; } + ump->um_rootvp = NULLVP; + ump->um_uid = uid; + ump->um_gid = gid; + ump->um_udir = udir; + ump->um_ufile = ufile; + ump->um_copymode = copymode; + + mp->mnt_data = (qaddr_t)ump; /* - * Unless the mount is readonly, ensure that the top layer - * supports whiteout operations. + * Copy upper layer's RDONLY flag. + */ + mp->mnt_flag |= ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY; + + /* + * Check whiteout */ if ((mp->mnt_flag & MNT_RDONLY) == 0) { - /* - * XXX Fake up a struct componentname with only cn_nameiop - * and cn_thread valid; union_whiteout() needs to use the - * thread pointer to lock the vnode. - */ - bzero(&fakecn, sizeof(fakecn)); + memset(&fakecn, 0, sizeof(fakecn)); fakecn.cn_nameiop = LOOKUP; fakecn.cn_thread = td; - error = VOP_WHITEOUT(um->um_uppervp, &fakecn, LOOKUP); - if (error) - goto bad; + error = VOP_WHITEOUT(ump->um_uppervp, &fakecn, LOOKUP); + if (error) { + if (below) { + VOP_UNLOCK(ump->um_uppervp, 0, td); + vrele(upperrootvp); + } else + vput(ump->um_uppervp); + free(ump, M_UNIONFSMNT); + mp->mnt_data = NULL; + return (error); + } } - VOP_UNLOCK(um->um_uppervp, 0, td); - um->um_cred = crhold(td->td_ucred); - FILEDESC_LOCK_FAST(td->td_proc->p_fd); - um->um_cmode = UN_DIRMODE &~ td->td_proc->p_fd->fd_cmask; - FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); + /* + * Unlock the node + */ + VOP_UNLOCK(ump->um_uppervp, 0, td); /* - * Depending on what you think the MNT_LOCAL flag might mean, - * you may want the && to be || on the conditional below. - * At the moment it has been defined that the filesystem is - * only local if it is all local, ie the MNT_LOCAL flag implies - * that the entire namespace is local. If you think the MNT_LOCAL - * flag implies that some of the files might be stored locally - * then you will want to change the conditional. + * Get the unionfs root vnode. */ - if (um->um_op == UNMNT_ABOVE) { - if (((um->um_lowervp == NULLVP) || - (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) && - (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL)) - mp->mnt_flag |= MNT_LOCAL; + error = unionfs_nodeget(mp, ump->um_uppervp, ump->um_lowervp, + NULLVP, &(ump->um_rootvp), NULL, td); + if (error) { + vrele(upperrootvp); + free(ump, M_UNIONFSMNT); + mp->mnt_data = NULL; + return (error); } /* - * Copy in the upper layer's RDONLY flag. This is for the benefit - * of lookup() which explicitly checks the flag, rather than asking - * the filesystem for its own opinion. This means, that an update - * mount of the underlying filesystem to go from rdonly to rdwr - * will leave the unioned view as read-only. + * Check mnt_flag */ - mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY); + if ((ump->um_lowervp->v_mount->mnt_flag & MNT_LOCAL) && + (ump->um_uppervp->v_mount->mnt_flag & MNT_LOCAL)) + mp->mnt_flag |= MNT_LOCAL; - mp->mnt_data = (qaddr_t) um; + /* + * Get new fsid + */ vfs_getnewfsid(mp); - switch (um->um_op) { - case UNMNT_ABOVE: - cp = ":"; - break; - case UNMNT_BELOW: - cp = ":"; - break; - case UNMNT_REPLACE: - cp = ""; - break; - } - len = strlen(cp); - bcopy(cp, mp->mnt_stat.f_mntfromname, len); - - cp = mp->mnt_stat.f_mntfromname + len; - len = MNAMELEN - len; + len = MNAMELEN - 1; + tmp = mp->mnt_stat.f_mntfromname; + copystr((below ? ":" : ":"), tmp, len, &done); + len -= done - 1; + tmp += done - 1; + copystr(target, tmp, len, NULL); - (void) copystr(target, cp, len - 1, &size); - bzero(cp + size, len - size); + UNIONFSDEBUG("unionfs_mount: from %s, on %s\n", + mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); - UDEBUG(("union_mount: from %s, on %s\n", - mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname)); return (0); - -bad: - if (um) { - if (um->um_uppervp) - vput(um->um_uppervp); - if (um->um_lowervp) - vrele(um->um_lowervp); - /* XXX other fields */ - free(um, M_UNIONFSMNT); - } - if (upperrootvp) - vput(upperrootvp); - if (lowerrootvp) - vrele(lowerrootvp); - return (error); } /* - * Free reference to union layer. + * Free reference to unionfs layer */ static int -union_unmount(mp, mntflags, td) - struct mount *mp; - int mntflags; - struct thread *td; -{ - struct union_mount *um = MOUNTTOUNIONMOUNT(mp); - int error; - int freeing; - int flags = 0; +unionfs_unmount(struct mount *mp, int mntflags, struct thread *td) +{ + struct unionfs_mount *ump; + int error; + int num; + int freeing; + int flags; + + UNIONFSDEBUG("unionfs_unmount: mp = %p\n", (void *)mp); - UDEBUG(("union_unmount(mp = %p)\n", (void *)mp)); + ump = MOUNTTOUNIONFSMOUNT(mp); + flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; - /* - * Keep flushing vnodes from the mount list. - * This is needed because of the un_pvp held - * reference to the parent vnode. - * If more vnodes have been freed on a given pass, - * the try again. The loop will iterate at most - * (d) times, where (d) is the maximum tree depth - * in the filesystem. - */ - for (freeing = 0; (error = vflush(mp, 0, flags, td)) != 0;) { - int n; - - /* count #vnodes held on mount list */ - n = mp->mnt_nvnodelistsize; - - /* if this is unchanged then stop */ - if (n == freeing) + /* vflush (no need to call vrele) */ + for (freeing = 0; (error = vflush(mp, 1, flags, td)) != 0;) { + num = mp->mnt_nvnodelistsize; + if (num == freeing) break; - - /* otherwise try once more time */ - freeing = n; + freeing = num; } - /* - * If the most recent vflush failed, the filesystem is still busy. - */ if (error) return (error); - /* - * Discard references to upper and lower target vnodes. - */ - if (um->um_lowervp) - vrele(um->um_lowervp); - vrele(um->um_uppervp); - crfree(um->um_cred); - /* - * Finally, throw away the union_mount structure. - */ - free(mp->mnt_data, M_UNIONFSMNT); /* XXX */ + free(ump, M_UNIONFSMNT); mp->mnt_data = 0; + return (0); } @@ -366,98 +367,95 @@ } static int -union_root(mp, flags, vpp, td) - struct mount *mp; - int flags; - struct vnode **vpp; - struct thread *td; +unionfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td) { - struct union_mount *um = MOUNTTOUNIONMOUNT(mp); - int error; + struct unionfs_mount *ump; + struct vnode *vp; - /* - * Supply an unlocked reference to um_uppervp and to um_lowervp. It - * is possible for um_uppervp to be locked without the associated - * root union_node being locked. We let union_allocvp() deal with - * it. - */ - UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp, - VOP_ISLOCKED(um->um_uppervp, NULL))); + ump = MOUNTTOUNIONFSMOUNT(mp); + vp = ump->um_rootvp; - VREF(um->um_uppervp); - if (um->um_lowervp) - VREF(um->um_lowervp); + UNIONFSDEBUG("unionfs_root: rootvp=%p locked=%d\n", + vp, VOP_ISLOCKED(vp, NULL)); - error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL, - um->um_uppervp, um->um_lowervp, 1); - UDEBUG(("error %d\n", error)); - UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp, - VOP_ISLOCKED(um->um_uppervp, NULL))); + vref(vp); + if (flags & LK_TYPE_MASK) + vn_lock(vp, flags, td); - return (error); + *vpp = vp; + + return (0); } static int -union_statfs(mp, sbp, td) - struct mount *mp; - struct statfs *sbp; - struct thread *td; +unionfs_quotactl(struct mount *mp, int cmd, uid_t uid, +#if __FreeBSD_version >= 700000 + void *arg, +#else + caddr_t arg, +#endif + struct thread *td) { - int error; - struct union_mount *um = MOUNTTOUNIONMOUNT(mp); - struct statfs mstat; - int lbsize; + struct unionfs_mount *ump; - UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", - (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp)); + ump = MOUNTTOUNIONFSMOUNT(mp); + + /* + * Writing is always performed to upper vnode. + */ + return (VFS_QUOTACTL(ump->um_uppervp->v_mount, cmd, uid, arg, td)); +} + +static int +unionfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) +{ + struct unionfs_mount *ump; + int error; + struct statfs mstat; + uint64_t lbsize; + + ump = MOUNTTOUNIONFSMOUNT(mp); + + UNIONFSDEBUG("unionfs_statfs(mp = %p, lvp = %p, uvp = %p)\n", + (void *)mp, (void *)ump->um_lowervp, (void *)ump->um_uppervp); bzero(&mstat, sizeof(mstat)); - if (um->um_lowervp) { - error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, td); - if (error) - return (error); - } + error = VFS_STATFS(ump->um_lowervp->v_mount, &mstat, td); + if (error) + return (error); - /* - * Now copy across the "interesting" information and fake the rest. - */ + /* now copy across the "interesting" information and fake the rest */ #if 0 sbp->f_type = mstat.f_type; sbp->f_flags = mstat.f_flags; sbp->f_bsize = mstat.f_bsize; sbp->f_iosize = mstat.f_iosize; -#endif - lbsize = mstat.f_bsize; - sbp->f_blocks = mstat.f_blocks; sbp->f_bfree = mstat.f_bfree; sbp->f_bavail = mstat.f_bavail; - sbp->f_files = mstat.f_files; sbp->f_ffree = mstat.f_ffree; +#endif + sbp->f_blocks = mstat.f_blocks; + sbp->f_files = mstat.f_files; + + lbsize = mstat.f_bsize; - error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, td); + error = VFS_STATFS(ump->um_uppervp->v_mount, &mstat, td); if (error) return (error); + /* + * The FS type etc is copy from upper vfs. + * (write able vfs have priority) + */ + sbp->f_type = mstat.f_type; sbp->f_flags = mstat.f_flags; sbp->f_bsize = mstat.f_bsize; sbp->f_iosize = mstat.f_iosize; - /* - * If the lower and upper blocksizes differ, then frig the - * block counts so that the sizes reported by df make some - * kind of sense. None of this makes sense though. - */ - if (mstat.f_bsize != lbsize) - sbp->f_blocks = ((off_t) sbp->f_blocks * lbsize) / mstat.f_bsize; + sbp->f_blocks = ((off_t)sbp->f_blocks * lbsize) / mstat.f_bsize; - /* - * The "total" fields count total resources in all layers, - * the "free" fields count only those resources which are - * free in the upper layer (since only the upper layer - * is writeable). - */ sbp->f_blocks += mstat.f_blocks; sbp->f_bfree = mstat.f_bfree; sbp->f_bavail = mstat.f_bavail; @@ -466,12 +465,71 @@ return (0); } -static struct vfsops union_vfsops = { - .vfs_init = union_init, - .vfs_mount = union_mount, - .vfs_root = union_root, - .vfs_statfs = union_statfs, - .vfs_unmount = union_unmount, +static int +unionfs_sync(struct mount *mp, int waitfor, struct thread *td) +{ + /* nothing to do */ + return (0); +} + +static int +unionfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) +{ + return (EOPNOTSUPP); +} + +static int +unionfs_fhtovp(struct mount *mp, struct fid *fidp, struct vnode **vpp) +{ + return (EOPNOTSUPP); +} + +static int +unionfs_checkexp(struct mount *mp, struct sockaddr *nam, int *extflagsp, + struct ucred **credanonp) +{ + return (EOPNOTSUPP); +} + +static int +unionfs_vptofh(struct vnode *vp, struct fid *fhp) +{ + return (EOPNOTSUPP); +} + +static int +unionfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, + int namespace, const char *attrname, struct thread *td) +{ + struct unionfs_mount *ump; + struct unionfs_node *unp; + + ump = MOUNTTOUNIONFSMOUNT(mp); + unp = VTOUNIONFS(filename_vp); + + if (NULLVP != unp->un_uppervp) { + return (VFS_EXTATTRCTL(ump->um_uppervp->v_mount, cmd, + unp->un_uppervp, namespace, attrname, td)); + } else { + return (VFS_EXTATTRCTL(ump->um_lowervp->v_mount, cmd, + unp->un_lowervp, namespace, attrname, td)); + } +} + +static struct vfsops unionfs_vfsops = { + .vfs_checkexp = unionfs_checkexp, + .vfs_extattrctl = unionfs_extattrctl, + .vfs_fhtovp = unionfs_fhtovp, + .vfs_init = unionfs_init, + .vfs_mount = unionfs_domount, + .vfs_quotactl = unionfs_quotactl, + .vfs_root = unionfs_root, + .vfs_statfs = unionfs_statfs, + .vfs_sync = unionfs_sync, + .vfs_uninit = unionfs_uninit, + .vfs_unmount = unionfs_unmount, + .vfs_vget = unionfs_vget, + .vfs_vptofh = unionfs_vptofh, }; -VFS_SET(union_vfsops, unionfs, VFCF_LOOPBACK); +VFS_SET(unionfs_vfsops, unionfs, VFCF_LOOPBACK); diff -urBN /usr/src.orig/sys/fs/unionfs/union_vnops.c /usr/src/sys/fs/unionfs/union_vnops.c --- /usr/src.orig/sys/fs/unionfs/union_vnops.c Thu Jul 6 23:01:25 2006 +++ /usr/src/sys/fs/unionfs/union_vnops.c Mon Jul 10 22:37:35 2006 @@ -1,826 +1,587 @@ /*- - * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry. - * Copyright (c) 1992, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. + * union_vnops.c * - * This code is derived from software contributed to Berkeley by - * Jan-Simon Pendry. + * Copyright (c) 2005, 2006 Masanori Ozawa , ONGS Inc. + * Copyright (c) 2006 Daichi Goto + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: + * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. * - * @(#)union_vnops.c 8.32 (Berkeley) 6/23/95 - * $FreeBSD: src/sys/fs/unionfs/union_vnops.c,v 1.134 2006/07/06 13:25:01 rwatson Exp $ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include -#include -#include +#include #include -#include +#include +#include #include +#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include + #include #include -#include - -#include +#include #include +#include -int uniondebug = 0; - -#if UDEBUG_ENABLED -SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, ""); +#if 0 +#define UNIONFS_INTERNAL_DEBUG(msg, args...) printf(msg, ## args) #else -SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, ""); +#define UNIONFS_INTERNAL_DEBUG(msg, args...) #endif -static vop_access_t union_access; -static vop_aclcheck_t union_aclcheck; -static vop_advlock_t union_advlock; -static vop_close_t union_close; -static vop_closeextattr_t union_closeextattr; -static vop_create_t union_create; -static vop_deleteextattr_t union_deleteextattr; -static vop_fsync_t union_fsync; -static vop_getacl_t union_getacl; -static vop_getattr_t union_getattr; -static vop_getextattr_t union_getextattr; -static vop_inactive_t union_inactive; -static vop_ioctl_t union_ioctl; -static vop_lease_t union_lease; -static vop_link_t union_link; -static vop_listextattr_t union_listextattr; -static vop_lookup_t union_lookup; -static int union_lookup1(struct vnode *udvp, struct vnode **dvp, - struct vnode **vpp, - struct componentname *cnp); -static vop_mkdir_t union_mkdir; -static vop_mknod_t union_mknod; -static vop_open_t union_open; -static vop_openextattr_t union_openextattr; -static vop_pathconf_t union_pathconf; -static vop_print_t union_print; -static vop_read_t union_read; -static vop_readdir_t union_readdir; -static vop_readlink_t union_readlink; -static vop_getwritemount_t union_getwritemount; -static vop_reclaim_t union_reclaim; -static vop_remove_t union_remove; -static vop_rename_t union_rename; -static vop_rmdir_t union_rmdir; -static vop_poll_t union_poll; -static vop_setacl_t union_setacl; -static vop_setattr_t union_setattr; -static vop_setlabel_t union_setlabel; -static vop_setextattr_t union_setextattr; -static vop_strategy_t union_strategy; -static vop_symlink_t union_symlink; -static vop_whiteout_t union_whiteout; -static vop_write_t union_write; - -static __inline -struct vnode * -union_lock_upper(struct union_node *un, struct thread *td) -{ - struct vnode *uppervp; - - if ((uppervp = un->un_uppervp) != NULL) { - VREF(uppervp); - vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td); - } - KASSERT((uppervp == NULL || vrefcnt(uppervp) > 0), ("uppervp usecount is 0")); - return(uppervp); -} - -static __inline -void -union_unlock_upper(struct vnode *uppervp, struct thread *td) -{ - vput(uppervp); -} - -static __inline -struct vnode * -union_lock_other(struct union_node *un, struct thread *td) -{ - struct vnode *vp; - - if (un->un_uppervp != NULL) { - vp = union_lock_upper(un, td); - } else if ((vp = un->un_lowervp) != NULL) { - VREF(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td); - } - return(vp); -} - -static __inline -void -union_unlock_other(struct vnode *vp, struct thread *td) -{ - vput(vp); -} - -/* - * union_lookup: - * - * udvp must be exclusively locked on call and will remain - * exclusively locked on return. This is the mount point - * for our filesystem. - * - * dvp Our base directory, locked and referenced. - * The passed dvp will be dereferenced and unlocked on return - * and a new dvp will be returned which is locked and - * referenced in the same variable. - * - * vpp is filled in with the result if no error occured, - * locked and ref'd. - * - * If an error is returned, *vpp is set to NULLVP. If no - * error occurs, *vpp is returned with a reference and an - * exclusive lock. - */ - static int -union_lookup1(udvp, pdvp, vpp, cnp) - struct vnode *udvp; - struct vnode **pdvp; - struct vnode **vpp; - struct componentname *cnp; +unionfs_lookup(struct vop_lookup_args *ap) { - int error; - struct thread *td = cnp->cn_thread; - struct vnode *dvp = *pdvp; - struct vnode *tdvp; - struct mount *mp; + int iswhiteout; + int lockflag; + int error , uerror, lerror; + u_long nameiop; + u_long cnflags, cnflagsbk; + struct unionfs_node *dunp; + struct vnode *dvp, *udvp, *ldvp, *vp, *uvp, *lvp, *dtmpvp; + struct vattr va; + struct componentname *cnp; + struct thread *td; - /* - * If stepping up the directory tree, check for going - * back across the mount point, in which case do what - * lookup would do by stepping back down the mount - * hierarchy. - */ - if (cnp->cn_flags & ISDOTDOT) { - while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) { - /* - * Don't do the NOCROSSMOUNT check - * at this level. By definition, - * union fs deals with namespaces, not - * filesystems. - */ - tdvp = dvp; - dvp = dvp->v_mount->mnt_vnodecovered; - VREF(dvp); - vput(tdvp); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); - } - } + iswhiteout = 0; + lockflag = 0; + error = uerror = lerror = ENOENT; + cnp = ap->a_cnp; + nameiop = cnp->cn_nameiop; + cnflags = cnp->cn_flags; + dvp = ap->a_dvp; + dunp = VTOUNIONFS(dvp); + udvp = dunp->un_uppervp; + ldvp = dunp->un_lowervp; + vp = uvp = lvp = NULLVP; + td = curthread; + *(ap->a_vpp) = NULLVP; + + UNIONFS_INTERNAL_DEBUG("unionfs_lookup: enter: nameiop=%ld, flags=%lx, path=%s\n", nameiop, cnflags, cnp->cn_nameptr); - /* - * Set return dvp to be the upperdvp 'parent directory. - */ - *pdvp = dvp; + if (dvp->v_type != VDIR) + return (ENOTDIR); /* - * If the VOP_LOOKUP() call generates an error, tdvp is invalid and - * no changes will have been made to dvp, so we are set to return. + * If read-only and op is not LOOKUP, will return EROFS. */ - - error = VOP_LOOKUP(dvp, &tdvp, cnp); - if (error) { - UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags)); - *vpp = NULL; - return (error); - } - UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags)); + if ((cnflags & ISLASTCN) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + LOOKUP != nameiop) + return (EROFS); /* - * Lastly check if the current node is a mount point in - * which case walk up the mount hierarchy making sure not to - * bump into the root of the mount tree (ie. dvp != udvp). - * - * We use dvp as a temporary variable here, it is no longer related - * to the dvp above. However, we have to ensure that both *pdvp and - * tdvp are locked on return. + * lookup dotdot */ + if (cnflags & ISDOTDOT) { + if (LOOKUP != nameiop && NULLVP == udvp) + return (EROFS); - dvp = tdvp; - while ( - dvp != udvp && - (dvp->v_type == VDIR) && - (mp = dvp->v_mountedhere) - ) { - int relock_pdvp = 0; - - if (vfs_busy(mp, 0, 0, td)) - continue; - - if (dvp == *pdvp) - relock_pdvp = 1; - vput(dvp); - dvp = NULL; - error = VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td); + if (NULLVP != udvp) + dtmpvp = udvp; + else + dtmpvp = ldvp; - vfs_unbusy(mp, td); + error = VOP_LOOKUP(dtmpvp, &vp, cnp); - if (relock_pdvp) - vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, td); + if (!error) { + /* + * Exchange lock and reference from vp to + * dunp->un_dvp. vp is upper/lower vnode, but it + * will need to return the unionfs vnode. + */ + if (DELETE == nameiop || RENAME == nameiop || + (cnp->cn_lkflags & LK_TYPE_MASK)) + VOP_UNLOCK(vp, 0, td); + vrele(vp); + + VOP_UNLOCK(dvp, 0, td); + *(ap->a_vpp) = dunp->un_dvp; + vref(dunp->un_dvp); + + if (DELETE == nameiop || RENAME == nameiop) + vn_lock(dunp->un_dvp, LK_EXCLUSIVE | LK_RETRY, td); + else if (cnp->cn_lkflags & LK_TYPE_MASK) + vn_lock(dunp->un_dvp, cnp->cn_lkflags | LK_RETRY, td); - if (error) { - *vpp = NULL; - return (error); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); } - } - *vpp = dvp; - return (0); -} -static int -union_lookup(ap) - struct vop_lookup_args /* { - struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - } */ *ap; -{ - int error; - int uerror, lerror; - struct vnode *uppervp, *lowervp; - struct vnode *upperdvp, *lowerdvp; - struct vnode *dvp = ap->a_dvp; /* starting dir */ - struct union_node *dun = VTOUNION(dvp); /* associated union node */ - struct componentname *cnp = ap->a_cnp; - struct thread *td = cnp->cn_thread; - struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); - struct ucred *saved_cred = NULL; - int iswhiteout; - struct vattr va; - - *ap->a_vpp = NULLVP; + UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", error); - /* - * Disallow write attempts to the filesystem mounted read-only. - */ - if ((cnp->cn_flags & ISLASTCN) && - (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { - return (EROFS); + return (error); } /* - * For any lookups we do, always return with the parent locked. + * lookup upper layer */ - cnp->cn_flags |= LOCKPARENT; - - lowerdvp = dun->un_lowervp; - uppervp = NULLVP; - lowervp = NULLVP; - iswhiteout = 0; - - uerror = ENOENT; - lerror = ENOENT; + if (NULLVP != udvp) { + uerror = VOP_LOOKUP(udvp, &uvp, cnp); - /* - * Get a private lock on uppervp and a reference, effectively - * taking it out of the union_node's control. - * - * We must lock upperdvp while holding our lock on dvp - * to avoid a deadlock. - */ - upperdvp = union_lock_upper(dun, td); + if (!uerror) { + if (udvp == uvp) { /* is dot */ + vrele(uvp); + *(ap->a_vpp) = dvp; + vref(dvp); - /* - * Do the lookup in the upper level. - * If that level consumes additional pathnames, - * then assume that something special is going - * on and just return that vnode. - */ - if (upperdvp != NULLVP) { - /* - * We do not have to worry about the DOTDOT case, we've - * already unlocked dvp. - */ - UDEBUG(("A %p\n", upperdvp)); + UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", uerror); - /* - * Do the lookup. We must supply a locked and referenced - * upperdvp to the function and will get a new locked and - * referenced upperdvp back, with the old having been - * dereferenced. - * - * If an error is returned, uppervp will be NULLVP. If no - * error occurs, uppervp will be the locked and referenced. - * Return vnode, or possibly NULL, depending on what is being - * requested. It is possible that the returned uppervp - * will be the same as upperdvp. - */ - uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); - UDEBUG(( - "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n", - uerror, - upperdvp, - vrefcnt(upperdvp), - VOP_ISLOCKED(upperdvp, NULL), - uppervp, - (uppervp ? vrefcnt(uppervp) : -99), - (uppervp ? VOP_ISLOCKED(uppervp, NULL) : -99) - )); - - /* - * Disallow write attempts to the filesystem mounted read-only. - */ - if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && - (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { - error = EROFS; - goto out; - } - - /* - * Special case: If cn_consume != 0 then skip out. The result - * of the lookup is transfered to our return variable. If - * an error occured we have to throw away the results. - */ - - if (cnp->cn_consume != 0) { - if ((error = uerror) == 0) { - *ap->a_vpp = uppervp; - uppervp = NULL; + return (uerror); } - goto out; + if (DELETE == nameiop || RENAME == nameiop || + (cnp->cn_lkflags & LK_TYPE_MASK)) + VOP_UNLOCK(uvp, 0, td); } - /* - * Calculate whiteout, fall through. - */ - - if (uerror == ENOENT || uerror == EJUSTRETURN) { - if (cnp->cn_flags & ISWHITEOUT) { - iswhiteout = 1; - } else if (lowerdvp != NULLVP) { - int terror; - - terror = VOP_GETATTR(upperdvp, &va, - cnp->cn_cred, cnp->cn_thread); - if (terror == 0 && (va.va_flags & OPAQUE)) - iswhiteout = 1; - } - } + /* check whiteout */ + if (uerror == ENOENT || uerror == EJUSTRETURN) + if (cnp->cn_flags & ISWHITEOUT) + iswhiteout = 1; /* don't lookup lower */ + if (!iswhiteout && NULLVP != ldvp) + if (!VOP_GETATTR(udvp, &va, cnp->cn_cred, td) && + (va.va_flags & OPAQUE)) + iswhiteout = 1; /* don't lookup lower */ +#if 0 + UNIONFS_INTERNAL_DEBUG("unionfs_lookup: debug: whiteout=%d, path=%s\n", iswhiteout, cnp->cn_nameptr); +#endif } /* - * In a similar way to the upper layer, do the lookup - * in the lower layer. This time, if there is some - * component magic going on, then vput whatever we got - * back from the upper layer and return the lower vnode - * instead. + * lookup lower layer */ - - if (lowerdvp != NULLVP && !iswhiteout) { - int nameiop; - - UDEBUG(("B %p\n", lowerdvp)); - - /* - * Force only LOOKUPs on the lower node, since - * we won't be making changes to it anyway. - */ - nameiop = cnp->cn_nameiop; + if (NULLVP != ldvp && !(cnflags & DOWHITEOUT) && !iswhiteout) { + /* always op is LOOKUP */ cnp->cn_nameiop = LOOKUP; - if (um->um_op == UNMNT_BELOW) { - saved_cred = cnp->cn_cred; - cnp->cn_cred = um->um_cred; - } + cnflagsbk = cnp->cn_flags; + cnp->cn_flags = cnflags; + if (NULLVP != udvp) + vn_lock(ldvp, LK_EXCLUSIVE | LK_RETRY, td); + lerror = VOP_LOOKUP(ldvp, &lvp, cnp); + if (NULLVP != udvp) + VOP_UNLOCK(ldvp, 0, td); + cnp->cn_nameiop = nameiop; + if (NULLVP != udvp && (!uerror || EJUSTRETURN == uerror)) + cnp->cn_flags = cnflagsbk; - /* - * We shouldn't have to worry about locking interactions - * between the lower layer and our union layer (w.r.t. - * `..' processing) because we don't futz with lowervp - * locks in the union-node instantiation code path. - * - * union_lookup1() requires lowervp to be locked on entry, - * and it will be unlocked on return. The ref count will - * not change. On return lowervp doesn't represent anything - * to us so we NULL it out. - */ - VREF(lowerdvp); - vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, td); - lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); - if (lowerdvp == lowervp) - vrele(lowerdvp); - else - vput(lowerdvp); - lowerdvp = NULL; /* lowerdvp invalid after vput */ + if (!lerror) { + if (ldvp == lvp) { /* is dot */ + if (NULLVP != uvp) + vrele(uvp); /* no need? */ + vrele(lvp); + *(ap->a_vpp) = dvp; + vref(dvp); - if (um->um_op == UNMNT_BELOW) - cnp->cn_cred = saved_cred; - cnp->cn_nameiop = nameiop; + UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", lerror); - if (cnp->cn_consume != 0 || lerror == EACCES) { - if ((error = lerror) == 0) { - *ap->a_vpp = lowervp; - lowervp = NULL; - } - goto out; - } - } else { - UDEBUG(("C %p\n", lowerdvp)); - if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { - if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) { - VREF(lowervp); - vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, td); - lerror = 0; + return (lerror); } + if (cnp->cn_lkflags & LK_TYPE_MASK) + VOP_UNLOCK(lvp, 0, td); } } /* - * Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp. - * - * 1. If both layers returned an error, select the upper layer. - * - * 2. If the upper layer failed and the bottom layer succeeded, - * two subcases occur: - * - * a. The bottom vnode is not a directory, in which case - * just return a new union vnode referencing an - * empty top layer and the existing bottom layer. - * - * b. The bottom vnode is a directory, in which case - * create a new directory in the top layer and - * and fall through to case 3. - * - * 3. If the top layer succeeded, then return a new union - * vnode referencing whatever the new top layer and - * whatever the bottom layer returned. + * check lookup result */ - - /* case 1. */ - if ((uerror != 0) && (lerror != 0)) { - error = uerror; - goto out; + if (NULLVP == uvp && NULLVP == lvp) { + UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", + (NULLVP != udvp ? uerror : lerror)); + return (NULLVP != udvp ? uerror : lerror); } - /* case 2. */ - if (uerror != 0 /* && (lerror == 0) */ ) { - if (lowervp->v_type == VDIR) { /* case 2b. */ - KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL")); - /* - * Oops, uppervp has a problem, we may have to shadow. - */ - uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); - if (uerror) { - error = uerror; - goto out; - } - } + /* + * check vnode type + */ + if (NULLVP != uvp && NULLVP != lvp && uvp->v_type != lvp->v_type) { + vrele(lvp); + lvp = NULLVP; } /* - * Must call union_allocvp() with both the upper and lower vnodes - * referenced and the upper vnode locked. ap->a_vpp is returned - * referenced and locked. lowervp, uppervp, and upperdvp are - * absorbed by union_allocvp() whether it succeeds or fails. - * - * upperdvp is the parent directory of uppervp which may be - * different, depending on the path, from dvp->un_uppervp. That's - * why it is a separate argument. Note that it must be unlocked. - * - * dvp must be locked on entry to the call and will be locked on - * return. + * check shadow dir */ + if (uerror && uerror != EJUSTRETURN && NULLVP != udvp && + !lerror && NULLVP != lvp && lvp->v_type == VDIR && + !(dvp->v_mount->mnt_flag & MNT_RDONLY) && + (1 < cnp->cn_namelen || '.' != *(cnp->cn_nameptr))) { + /* get unionfs vnode for create a new shadow dir. */ + error = unionfs_nodeget(dvp->v_mount, NULLVP, lvp, dvp, &vp, cnp, td); + if (error) + goto unionfs_lookup_out; - if (uppervp && uppervp != upperdvp) - VOP_UNLOCK(uppervp, 0, td); - if (lowervp) - VOP_UNLOCK(lowervp, 0, td); - if (upperdvp) - VOP_UNLOCK(upperdvp, 0, td); - - error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, - uppervp, lowervp, 1); - - UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99)); - - uppervp = NULL; - upperdvp = NULL; - lowervp = NULL; - - /* - * Termination Code - * - * - put away any extra junk laying around. Note that lowervp - * (if not NULL) will never be the same as *ap->a_vp and - * neither will uppervp, because when we set that state we - * NULL-out lowervp or uppervp. On the otherhand, upperdvp - * may match uppervp or *ap->a_vpp. - * - * - relock/unlock dvp if appropriate. + if (LK_SHARED == (cnp->cn_lkflags & LK_TYPE_MASK)) + VOP_UNLOCK(vp, 0, td); + if (LK_EXCLUSIVE != VOP_ISLOCKED(vp, td)) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + lockflag = 1; + } + error = unionfs_mkshadowdir(MOUNTTOUNIONFSMOUNT(dvp->v_mount), + udvp, VTOUNIONFS(vp), cnp, td); + if (lockflag) + VOP_UNLOCK(vp, 0, td); + if (error) { + UNIONFSDEBUG("unionfs_lookup: Unable to create shadow dir."); + if (LK_EXCLUSIVE == (cnp->cn_lkflags & LK_TYPE_MASK)) + vput(vp); + else + vrele(vp); + goto unionfs_lookup_out; + } + if (LK_SHARED == (cnp->cn_lkflags & LK_TYPE_MASK)) + vn_lock(vp, LK_SHARED | LK_RETRY, td); + uerror = lerror; + } + /* + * get unionfs vnode. */ - -out: - if (upperdvp) { - if (upperdvp == uppervp || upperdvp == *ap->a_vpp) - vrele(upperdvp); - else - vput(upperdvp); + else { + error = unionfs_nodeget(dvp->v_mount, uvp, lvp, dvp, &vp, cnp, td); + if (error) { + UNIONFSDEBUG("unionfs_lookup: Unable to create unionfs vnode."); + goto unionfs_lookup_out; + } + if ((DELETE == nameiop || RENAME == nameiop) && + !(cnp->cn_lkflags & LK_TYPE_MASK)) + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); } - if (uppervp) - vput(uppervp); + *(ap->a_vpp) = vp; - if (lowervp) - vput(lowervp); + if (NULLVP != uvp) + error = uerror; + else + error = lerror; - UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp, - ((*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99), - lowervp, uppervp)); - - if (error == 0 || error == EJUSTRETURN) { - if (cnp->cn_namelen == 1 && - cnp->cn_nameptr[0] == '.' && - *ap->a_vpp != dvp) { -#ifdef DIAGNOSTIC - vprint("union_lookup: vp", *ap->a_vpp); - vprint("union_lookup: dvp", dvp); -#endif - panic("union_lookup returning . (%p) != startdir (%p)", - *ap->a_vpp, dvp); - } - } +unionfs_lookup_out: + if (NULLVP != uvp) + vrele(uvp); + if (NULLVP != lvp) + vrele(lvp); + + UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", error); return (error); } -/* - * union_create: - * - * a_dvp is locked on entry and remains locked on return. a_vpp is returned - * locked if no error occurs, otherwise it is garbage. - */ - static int -union_create(ap) - struct vop_create_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; -{ - struct union_node *dun = VTOUNION(ap->a_dvp); - struct componentname *cnp = ap->a_cnp; - struct thread *td = cnp->cn_thread; - struct vnode *dvp; - int error = EROFS; - - if ((dvp = union_lock_upper(dun, td)) != NULL) { - struct vnode *vp; - struct mount *mp; - - error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); - if (error == 0) { - mp = ap->a_dvp->v_mount; +unionfs_create(struct vop_create_args *ap) +{ + struct unionfs_node *dunp; + struct componentname *cnp; + struct thread *td; + struct vnode *udvp; + struct vnode *vp; + int error; + + UNIONFS_INTERNAL_DEBUG("unionfs_create: enter\n"); + + dunp = VTOUNIONFS(ap->a_dvp); + cnp = ap->a_cnp; + td = curthread; + udvp = dunp->un_uppervp; + error = EROFS; + + if (NULLVP != udvp) { + if (!(error = VOP_CREATE(udvp, &vp, cnp, ap->a_vap))) { VOP_UNLOCK(vp, 0, td); - UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vrefcnt(vp))); - error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, - cnp, vp, NULLVP, 1); - UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp))); + error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP, + ap->a_dvp, ap->a_vpp, cnp, td); + vrele(vp); } - union_unlock_upper(dvp, td); } + + UNIONFS_INTERNAL_DEBUG("unionfs_create: leave (%d)\n", error); + return (error); } static int -union_whiteout(ap) - struct vop_whiteout_args /* { - struct vnode *a_dvp; - struct componentname *a_cnp; - int a_flags; - } */ *ap; +unionfs_whiteout(struct vop_whiteout_args *ap) { - struct union_node *un = VTOUNION(ap->a_dvp); - struct componentname *cnp = ap->a_cnp; - struct vnode *uppervp; - int error; + struct unionfs_node *dunp; + struct componentname *cnp; + struct vnode *udvp; + int error; - switch (ap->a_flags) { - case CREATE: - case DELETE: - uppervp = union_lock_upper(un, cnp->cn_thread); - if (uppervp != NULLVP) { - error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags); - union_unlock_upper(uppervp, cnp->cn_thread); - } else - error = EOPNOTSUPP; - break; - case LOOKUP: - error = EOPNOTSUPP; - break; - default: - panic("union_whiteout: unknown op"); + UNIONFS_INTERNAL_DEBUG("unionfs_whiteout: enter\n"); + + dunp = VTOUNIONFS(ap->a_dvp); + cnp = ap->a_cnp; + udvp = dunp->un_uppervp; + error = EOPNOTSUPP; + + if (NULLVP != udvp) { + switch (ap->a_flags) { + case CREATE: + case DELETE: + case LOOKUP: + error = VOP_WHITEOUT(udvp, cnp, ap->a_flags); + break; + default: + error = EINVAL; + break; + } } + + UNIONFS_INTERNAL_DEBUG("unionfs_whiteout: leave (%d)\n", error); + return (error); } -/* - * union_mknod: - * - * a_dvp is locked on entry and should remain locked on return. - * a_vpp is garbage whether an error occurs or not. - */ - static int -union_mknod(ap) - struct vop_mknod_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; -{ - struct union_node *dun = VTOUNION(ap->a_dvp); - struct componentname *cnp = ap->a_cnp; - struct vnode *dvp; - int error = EROFS; - - if ((dvp = union_lock_upper(dun, cnp->cn_thread)) != NULL) { - error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap); - union_unlock_upper(dvp, cnp->cn_thread); +unionfs_mknod(struct vop_mknod_args *ap) +{ + struct unionfs_node *dunp; + struct componentname *cnp; + struct thread *td; + struct vnode *udvp; + struct vnode *vp; + int error; + + UNIONFS_INTERNAL_DEBUG("unionfs_mknod: enter\n"); + + dunp = VTOUNIONFS(ap->a_dvp); + cnp = ap->a_cnp; + td = curthread; + udvp = dunp->un_uppervp; + error = EROFS; + + if (NULLVP != udvp) { + if (!(error = VOP_MKNOD(udvp, &vp, cnp, ap->a_vap))) { + VOP_UNLOCK(vp, 0, td); + error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP, + ap->a_dvp, ap->a_vpp, cnp, td); + vrele(vp); + } } + + UNIONFS_INTERNAL_DEBUG("unionfs_mknod: leave (%d)\n", error); + return (error); } -/* - * union_open: - * - * run open VOP. When opening the underlying vnode we have to mimic - * vn_open(). What we *really* need to do to avoid screwups if the - * open semantics change is to call vn_open(). For example, ufs blows - * up if you open a file but do not vmio it prior to writing. - */ - static int -union_open(ap) - struct vop_open_args /* { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *tvp; - int mode = ap->a_mode; - struct ucred *cred = ap->a_cred; - struct thread *td = ap->a_td; - int error = 0; - int tvpisupper = 1; +unionfs_open(struct vop_open_args *ap) +{ + int error; + int lvplocked; + struct unionfs_node *unp; + struct vnode *uvp; + struct vnode *lvp; + struct vnode *targetvp; + struct ucred *cred; + struct thread *td; - /* - * If there is an existing upper vp then simply open that. - * The upper vp takes precedence over the lower vp. When opening - * a lower vp for writing copy it to the uppervp and then open the - * uppervp. - * - * At the end of this section tvp will be left locked. - */ - if ((tvp = union_lock_upper(un, td)) == NULLVP) { - /* - * If the lower vnode is being opened for writing, then - * copy the file contents to the upper vnode and open that, - * otherwise can simply open the lower vnode. - */ - tvp = un->un_lowervp; - if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { - int docopy = !(mode & O_TRUNC); - error = union_copyup(un, docopy, cred, td); - tvp = union_lock_upper(un, td); - } else { - un->un_openl++; - VREF(tvp); - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td); - tvpisupper = 0; + UNIONFS_INTERNAL_DEBUG("unionfs_open: enter\n"); + + error = 0; + lvplocked = 0; + unp = VTOUNIONFS(ap->a_vp); + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + targetvp = NULLVP; + cred = ap->a_cred; + td = ap->a_td; + + if (unp->un_lower_opencnt || unp->un_upper_opencnt) { + /* vnode is already opend. */ + if (unp->un_upper_opencnt) + targetvp = uvp; + else + targetvp = lvp; + + if (targetvp == lvp && + (ap->a_mode & FWRITE) && (lvp->v_type == VREG)) + targetvp = NULLVP; + if (targetvp == lvp && NULLVP != uvp) { + vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); + lvplocked = 1; } } + if (NULLVP == targetvp) { + if (NULLVP == uvp) { + if ((ap->a_mode & FWRITE) && (lvp->v_type == VREG)) { + error = unionfs_copyfile(unp, !(ap->a_mode & O_TRUNC), cred, td); + if (error) + return (error); + targetvp = uvp = unp->un_uppervp; + } + } else + targetvp = uvp; + } + if (NULLVP == targetvp) + targetvp = lvp; - /* - * We are holding the correct vnode, open it. - */ - - if (error == 0) - error = VOP_OPEN(tvp, mode, cred, td, -1); - if (error == 0) - ap->a_vp->v_object = tvp->v_object; - /* - * Release any locks held. - */ - if (tvpisupper) { - if (tvp) - union_unlock_upper(tvp, td); - } else { - vput(tvp); + error = VOP_OPEN(targetvp, ap->a_mode, cred, td, ap->a_fdidx); + if (!error) { + if (targetvp == uvp) + unp->un_upper_opencnt++; + else + unp->un_lower_opencnt++; + ap->a_vp->v_object = targetvp->v_object; } + if (lvplocked) + VOP_UNLOCK(lvp, 0, td); + + UNIONFS_INTERNAL_DEBUG("unionfs_open: leave (%d)\n", error); + return (error); } -/* - * union_close: - * - * It is unclear whether a_vp is passed locked or unlocked. Whatever - * the case we do not change it. - */ - static int -union_close(ap) - struct vop_close_args /* { - struct vnode *a_vp; - int a_fflag; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; - - if ((vp = un->un_uppervp) == NULLVP) { -#ifdef UNION_DIAGNOSTIC - if (un->un_openl <= 0) - panic("union: un_openl cnt"); +unionfs_close(struct vop_close_args *ap) +{ + struct unionfs_node *unp; + struct ucred *cred; + struct thread *td; + struct vnode *ovp; + int error; + + UNIONFS_INTERNAL_DEBUG("unionfs_close: enter\n"); + + unp = VTOUNIONFS(ap->a_vp); + cred = ap->a_cred; + td = ap->a_td; + + if (!(unp->un_lower_opencnt) && !(unp->un_upper_opencnt)) { +#ifdef DIAGNOSTIC + panic("unionfs: open count is 0"); #endif - --un->un_openl; - vp = un->un_lowervp; + return (EINVAL); + } + if (unp->un_upper_opencnt) + ovp = unp->un_uppervp; + else + ovp = unp->un_lowervp; + + error = VOP_CLOSE(ovp, ap->a_fflag, cred, td); + + if (!error) { + ap->a_vp->v_object = ovp->v_object; + + if (ovp == unp->un_uppervp) + unp->un_upper_opencnt--; + else + unp->un_lower_opencnt--; } - ap->a_vp = vp; - return (VOP_CLOSE_AP(ap)); + + UNIONFS_INTERNAL_DEBUG("unionfs_close: leave (%d)\n", error); + + return (error); } /* - * Check access permission on the union vnode. - * The access check being enforced is to check - * against both the underlying vnode, and any - * copied vnode. This ensures that no additional - * file permissions are given away simply because - * the user caused an implicit file copy. + * Check the access mode toward shadow file/dir. */ static int -union_access(ap) - struct vop_access_args /* { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct union_node *un = VTOUNION(ap->a_vp); - struct thread *td = ap->a_td; - int error = EACCES; - struct vnode *vp; +unionfs_check_corrected_access(u_short mode, + struct vattr *va, + struct ucred *cred) +{ + int count; + uid_t uid; /* upper side vnode's uid */ + gid_t gid; /* upper side vnode's gid */ + u_short vmode; /* upper side vnode's mode */ + gid_t *gp; + u_short mask; + + mask = 0; + uid = va->va_uid; + gid = va->va_gid; + vmode = va->va_mode; + + /* check owner */ + if (cred->cr_uid == uid) { + if (mode & VEXEC) + mask |= S_IXUSR; + if (mode & VREAD) + mask |= S_IRUSR; + if (mode & VWRITE) + mask |= S_IWUSR; + return ((vmode & mask) == mask ? 0 : EACCES); + } + + /* check group */ + count = 0; + gp = cred->cr_groups; + for (; count < cred->cr_ngroups; count++, gp++) { + if (gid == *gp) { + if (mode & VEXEC) + mask |= S_IXGRP; + if (mode & VREAD) + mask |= S_IRGRP; + if (mode & VWRITE) + mask |= S_IWGRP; + return ((vmode & mask) == mask ? 0 : EACCES); + } + } + + /* check other */ + if (mode & VEXEC) + mask |= S_IXOTH; + if (mode & VREAD) + mask |= S_IROTH; + if (mode & VWRITE) + mask |= S_IWOTH; + + return ((vmode & mask) == mask ? 0 : EACCES); +} + +static int +unionfs_access(struct vop_access_args *ap) +{ + struct unionfs_mount *ump; + struct unionfs_node *unp; + struct vnode *uvp; + struct vnode *lvp; + struct thread *td; + struct vattr va; + int mode; + int error; + + UNIONFS_INTERNAL_DEBUG("unionfs_access: enter\n"); + + ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount); + unp = VTOUNIONFS(ap->a_vp); + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + td = ap->a_td; + mode = ap->a_mode; + error = EACCES; - /* - * Disallow write attempts on filesystems mounted read-only. - */ - if ((ap->a_mode & VWRITE) && + if ((mode & VWRITE) && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (ap->a_vp->v_type) { - case VREG: + case VREG: case VDIR: case VLNK: return (EROFS); @@ -829,1177 +590,1460 @@ } } - if ((vp = union_lock_upper(un, td)) != NULLVP) { - ap->a_vp = vp; - error = VOP_ACCESS_AP(ap); - union_unlock_upper(vp, td); - return(error); - } + if (NULLVP != uvp) { + error = VOP_ACCESS(uvp, mode, ap->a_cred, td); - if ((vp = un->un_lowervp) != NULLVP) { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - ap->a_vp = vp; + UNIONFS_INTERNAL_DEBUG("unionfs_access: leave (%d)\n", error); - /* - * Remove VWRITE from a_mode if our mount point is RW, because - * we want to allow writes and lowervp may be read-only. - */ - if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0) - ap->a_mode &= ~VWRITE; + return (error); + } - error = VOP_ACCESS_AP(ap); - if (error == 0) { - struct union_mount *um; - - um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount); - - if (um->um_op == UNMNT_BELOW) { - ap->a_cred = um->um_cred; - error = VOP_ACCESS_AP(ap); + if (NULLVP != lvp) { + if (mode & VWRITE) { + if (ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY) { + switch (ap->a_vp->v_type) { + case VREG: + case VDIR: + case VLNK: + return (EROFS); + default: + break; + } + } else if (ap->a_vp->v_type == VREG || ap->a_vp->v_type == VDIR) { + /* check shadow file/dir */ + if (UNIONFS_TRANSPARENT != ump->um_copymode) { + error = unionfs_create_uppervattr(ump, + lvp, &va, ap->a_cred, td); + if (error) + return (error); + + error = unionfs_check_corrected_access( + mode, &va, ap->a_cred); + if (error) + return (error); + } } + mode &= ~VWRITE; + mode |= VREAD; /* will copy to upper */ } - VOP_UNLOCK(vp, 0, td); + error = VOP_ACCESS(lvp, mode, ap->a_cred, td); } - return(error); -} -/* - * We handle getattr only to change the fsid and - * track object sizes - * - * It's not clear whether VOP_GETATTR is to be - * called with the vnode locked or not. stat() calls - * it with (vp) locked, and fstat() calls it with - * (vp) unlocked. - * - * Because of this we cannot use our normal locking functions - * if we do not intend to lock the main a_vp node. At the moment - * we are running without any specific locking at all, but beware - * to any programmer that care must be taken if locking is added - * to this function. - */ + UNIONFS_INTERNAL_DEBUG("unionfs_access: leave (%d)\n", error); + + return (error); +} static int -union_getattr(ap) - struct vop_getattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_getattr(struct vop_getattr_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct union_mount *um = MOUNTTOUNIONMOUNT(ap->a_vp->v_mount); - struct vnode *vp; - struct vattr *vap; - struct vattr va; + int error; + struct unionfs_node *unp; + struct unionfs_mount *ump; + struct vnode *uvp; + struct vnode *lvp; + struct thread *td; + struct vattr va; - /* - * Some programs walk the filesystem hierarchy by counting - * links to directories to avoid stat'ing all the time. - * This means the link count on directories needs to be "correct". - * The only way to do that is to call getattr on both layers - * and fix up the link count. The link count will not necessarily - * be accurate but will be large enough to defeat the tree walkers. - */ + UNIONFS_INTERNAL_DEBUG("unionfs_getattr: enter\n"); - vap = ap->a_vap; + unp = VTOUNIONFS(ap->a_vp); + ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount); + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + td = ap->a_td; - if ((vp = un->un_uppervp) != NULLVP) { - error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td); - if (error) - return (error); - /* XXX isn't this dangerous without a lock? */ - union_newsize(ap->a_vp, vap->va_size, VNOVAL); - } + if (NULLVP != uvp) { + if (!(error = VOP_GETATTR(uvp, ap->a_vap, ap->a_cred, td))) + ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; - if (vp == NULLVP) { - vp = un->un_lowervp; - } else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) { - vp = un->un_lowervp; - vap = &va; - } else { - vp = NULLVP; + UNIONFS_INTERNAL_DEBUG("unionfs_getattr: leave mode=%o, uid=%d, gid=%d (%d)\n", + ap->a_vap->va_mode, ap->a_vap->va_uid, + ap->a_vap->va_gid, error); + + return (error); } - if (vp != NULLVP) { - error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td); - if (error) - return (error); - /* XXX isn't this dangerous without a lock? */ - union_newsize(ap->a_vp, VNOVAL, vap->va_size); + error = VOP_GETATTR(lvp, ap->a_vap, ap->a_cred, td); + + if (!error && !(ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY)) { + /* correct the attr toward shadow file/dir. */ + if ((ap->a_vp->v_type == VREG || ap->a_vp->v_type == VDIR)) { + unionfs_create_uppervattr_core(ump, ap->a_vap, &va, td); + ap->a_vap->va_mode = va.va_mode; + ap->a_vap->va_uid = va.va_uid; + ap->a_vap->va_gid = va.va_gid; + } } - if (ap->a_vap->va_fsid == um->um_upperdev) + if (!error) ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; - if ((vap != ap->a_vap) && (vap->va_type == VDIR)) - ap->a_vap->va_nlink += vap->va_nlink; - return (0); + UNIONFS_INTERNAL_DEBUG("unionfs_getattr: leave mode=%o, uid=%d, gid=%d (%d)\n", + ap->a_vap->va_mode, ap->a_vap->va_uid, ap->a_vap->va_gid, error); + + return (error); } static int -union_setattr(ap) - struct vop_setattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct union_node *un = VTOUNION(ap->a_vp); - struct thread *td = ap->a_td; - struct vattr *vap = ap->a_vap; - struct vnode *uppervp; - int error; +unionfs_setattr(struct vop_setattr_args *ap) +{ + int error; + struct unionfs_node *unp; + struct vnode *uvp; + struct vnode *lvp; + struct thread *td; + struct vattr *vap; + + UNIONFS_INTERNAL_DEBUG("unionfs_setattr: enter\n"); + + error = EROFS; + unp = VTOUNIONFS(ap->a_vp); + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + td = ap->a_td; + vap = ap->a_vap; - /* - * Disallow write attempts on filesystems mounted read-only. - */ if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && - (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || - vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || - vap->va_mtime.tv_sec != VNOVAL || - vap->va_mode != (mode_t)VNOVAL)) { + (vap->va_flags != VNOVAL || vap->va_uid != (uid_t) VNOVAL || + vap->va_gid != (gid_t) VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t) VNOVAL)) return (EROFS); - } - /* - * Handle case of truncating lower object to zero size - * by creating a zero length upper object. This is to - * handle the case of open with O_TRUNC and O_CREAT. - */ - if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) { - error = union_copyup(un, (ap->a_vap->va_size != 0), - ap->a_cred, ap->a_td); - if (error) + if (NULLVP == uvp && lvp->v_type == VREG) { + if ((error = unionfs_copyfile(unp, (vap->va_size != 0), ap->a_cred, td))) return (error); + uvp = unp->un_uppervp; } - /* - * Try to set attributes in upper layer, - * otherwise return read-only filesystem error. - */ - error = EROFS; - if ((uppervp = union_lock_upper(un, td)) != NULLVP) { - error = VOP_SETATTR(un->un_uppervp, ap->a_vap, - ap->a_cred, ap->a_td); - if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) - union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); - union_unlock_upper(uppervp, td); + if (NULLVP != uvp) + error = VOP_SETATTR(uvp, vap, ap->a_cred, td); + + UNIONFS_INTERNAL_DEBUG("unionfs_setattr: leave (%d)\n", error); + + return (error); +} + +static int +unionfs_read(struct vop_read_args *ap) +{ + int error; + struct unionfs_node *unp; + struct vnode *ovp; + + /* UNIONFS_INTERNAL_DEBUG("unionfs_read: enter\n"); */ + + unp = VTOUNIONFS(ap->a_vp); + ovp = (unp->un_upper_opencnt ? unp->un_uppervp : unp->un_lowervp); + + if (NULLVP == ovp) { +#ifdef DIAGNOSTIC + panic("unionfs: no open vnode"); +#endif + return (EBADF); } + + error = VOP_READ(ovp, ap->a_uio, ap->a_ioflag, ap->a_cred); + + /* UNIONFS_INTERNAL_DEBUG("unionfs_read: leave (%d)\n", error); */ + return (error); } static int -union_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; +unionfs_write(struct vop_write_args *ap) +{ + int error; + struct unionfs_node *unp; + struct vnode *ovp; + + /* UNIONFS_INTERNAL_DEBUG("unionfs_write: enter\n"); */ + + unp = VTOUNIONFS(ap->a_vp); + ovp = (unp->un_upper_opencnt ? unp->un_uppervp : unp->un_lowervp); + + if (NULLVP == ovp) { +#ifdef DIAGNOSTIC + panic("unionfs: no open vnode"); +#endif + return (EBADF); + } + if (ovp != unp->un_uppervp && ovp->v_type != VSOCK) + return (EROFS); + + error = VOP_WRITE(ovp, ap->a_uio, ap->a_ioflag, ap->a_cred); + + /* UNIONFS_INTERNAL_DEBUG("unionfs_write: leave (%d)\n", error); */ + + return (error); +} + +static int +unionfs_lease(struct vop_lease_args *ap) { - struct union_node *un = VTOUNION(ap->a_vp); - struct thread *td = ap->a_uio->uio_td; - struct vnode *uvp; int error; + struct unionfs_node *unp; + struct vnode *vp; - uvp = union_lock_other(un, td); - KASSERT(uvp != NULL, ("union_read: backing vnode missing!")); + UNIONFS_INTERNAL_DEBUG("unionfs_lease: enter\n"); - error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred); - union_unlock_other(uvp, td); + unp = VTOUNIONFS(ap->a_vp); + vp = (NULLVP != unp->un_uppervp ? unp->un_uppervp : unp->un_lowervp); - /* - * XXX - * Perhaps the size of the underlying object has changed under - * our feet. Take advantage of the offset information present - * in the uio structure. - */ - if (error == 0) { - struct union_node *un = VTOUNION(ap->a_vp); - off_t cur = ap->a_uio->uio_offset; + error = VOP_LEASE(vp, ap->a_td, ap->a_cred, ap->a_flag); + + UNIONFS_INTERNAL_DEBUG("unionfs_lease: lease (%d)\n", error); - if (uvp == un->un_uppervp) { - if (cur > un->un_uppersz) - union_newsize(ap->a_vp, cur, VNOVAL); - } else { - if (cur > un->un_lowersz) - union_newsize(ap->a_vp, VNOVAL, cur); - } - } return (error); } static int -union_write(ap) - struct vop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; +unionfs_ioctl(struct vop_ioctl_args *ap) { - struct union_node *un = VTOUNION(ap->a_vp); - struct thread *td = ap->a_uio->uio_td; - struct vnode *uppervp; int error; + struct unionfs_node *unp; + struct vnode *ovp; - if ((uppervp = union_lock_upper(un, td)) == NULLVP) - panic("union: missing upper layer in write"); + UNIONFS_INTERNAL_DEBUG("unionfs_ioctl: enter\n"); - error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred); + unp = VTOUNIONFS(ap->a_vp); + ovp = (unp->un_upper_opencnt ? unp->un_uppervp : unp->un_lowervp); - /* - * The size of the underlying object may be changed by the - * write. - */ - if (error == 0) { - off_t cur = ap->a_uio->uio_offset; + if (NULLVP == ovp) + return (EBADF); + + error = VOP_IOCTL(ovp, ap->a_command, ap->a_data, ap->a_fflag, + ap->a_cred, ap->a_td); + + UNIONFS_INTERNAL_DEBUG("unionfs_ioctl: lease (%d)\n", error); - if (cur > un->un_uppersz) - union_newsize(ap->a_vp, cur, VNOVAL); - } - union_unlock_upper(uppervp, td); return (error); } static int -union_lease(ap) - struct vop_lease_args /* { - struct vnode *a_vp; - struct thread *a_td; - struct ucred *a_cred; - int a_flag; - } */ *ap; +unionfs_poll(struct vop_poll_args *ap) { - struct vnode *ovp = OTHERVP(ap->a_vp); + struct unionfs_node *unp; + struct vnode *ovp; - ap->a_vp = ovp; - return (VOP_LEASE_AP(ap)); + unp = VTOUNIONFS(ap->a_vp); + ovp = (unp->un_upper_opencnt ? unp->un_uppervp : unp->un_lowervp); + + if (NULLVP == ovp) + return (EBADF); + + return (VOP_POLL(ovp, ap->a_events, ap->a_cred, ap->a_td)); } static int -union_ioctl(ap) - struct vop_ioctl_args /* { - struct vnode *a_vp; - u_long a_command; - caddr_t a_data; - int a_fflag; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_fsync(struct vop_fsync_args *ap) { - struct vnode *ovp = OTHERVP(ap->a_vp); + struct unionfs_node *unp; + struct vnode *ovp; + + unp = VTOUNIONFS(ap->a_vp); + ovp = (unp->un_upper_opencnt ? unp->un_uppervp : unp->un_lowervp); - ap->a_vp = ovp; - return (VOP_IOCTL_AP(ap)); + if (NULLVP == ovp) + return (EBADF); + + return (VOP_FSYNC(ovp, ap->a_waitfor, ap->a_td)); } static int -union_poll(ap) - struct vop_poll_args /* { - struct vnode *a_vp; - int a_events; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_remove(struct vop_remove_args *ap) { - struct vnode *ovp = OTHERVP(ap->a_vp); + int error; + struct unionfs_node *dunp; + struct unionfs_node *unp; + struct vnode *udvp; + struct vnode *uvp; + struct vnode *lvp; + struct componentname *cnp; + struct thread *td; + + UNIONFS_INTERNAL_DEBUG("unionfs_remove: enter\n"); + + error = 0; + dunp = VTOUNIONFS(ap->a_dvp); + unp = VTOUNIONFS(ap->a_vp); + udvp = dunp->un_uppervp; + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + cnp = ap->a_cnp; + td = curthread; - ap->a_vp = ovp; - return (VOP_POLL_AP(ap)); + if (NULLVP == udvp) + return (EROFS); + + if (NULLVP != uvp) { + cnp->cn_flags |= DOWHITEOUT; + error = VOP_REMOVE(udvp, uvp, cnp); + } + else if (NULLVP != lvp) + error = unionfs_mkwhiteout(udvp, cnp, td, unp->un_path); + + UNIONFS_INTERNAL_DEBUG("unionfs_remove: leave (%d)\n", error); + + return (error); } static int -union_fsync(ap) - struct vop_fsync_args /* { - struct vnode *a_vp; - struct ucred *a_cred; - int a_waitfor; - struct thread *a_td; - } */ *ap; +unionfs_link(struct vop_link_args *ap) { - int error = 0; - struct thread *td = ap->a_td; - struct vnode *targetvp; - struct union_node *un = VTOUNION(ap->a_vp); + int error; + int needrelookup; + struct unionfs_node *dunp; + struct unionfs_node *unp; + struct vnode *udvp; + struct vnode *uvp; + struct componentname *cnp; + struct thread *td; + + UNIONFS_INTERNAL_DEBUG("unionfs_link: enter\n"); + + error = 0; + needrelookup = 0; + dunp = VTOUNIONFS(ap->a_tdvp); + unp = NULL; + udvp = dunp->un_uppervp; + uvp = NULLVP; + cnp = ap->a_cnp; + td = curthread; - if ((targetvp = union_lock_other(un, td)) != NULLVP) { - error = VOP_FSYNC(targetvp, ap->a_waitfor, td); - union_unlock_other(targetvp, td); + if (NULLVP == udvp) + return (EROFS); + + if (ap->a_vp->v_op != &unionfs_vnodeops) + uvp = ap->a_vp; + else { + unp = VTOUNIONFS(ap->a_vp); + + if (NULLVP == unp->un_uppervp) { + if (ap->a_vp->v_type != VREG) + return (EOPNOTSUPP); + + if ((error = unionfs_copyfile(unp, 1, cnp->cn_cred, td))) + return (error); + needrelookup = 1; + } + uvp = unp->un_uppervp; } + if (needrelookup) + error = unionfs_relookup_for_create(ap->a_tdvp, cnp, td); + + if (!error) + error = VOP_LINK(udvp, uvp, cnp); + + UNIONFS_INTERNAL_DEBUG("unionfs_link: leave (%d)\n", error); + return (error); } -/* - * union_remove: - * - * Remove the specified cnp. The dvp and vp are passed to us locked - * and must remain locked on return. - */ +static int +unionfs_rename(struct vop_rename_args *ap) +{ + int error; + struct vnode *fdvp; + struct vnode *fvp; + struct componentname *fcnp; + struct vnode *tdvp; + struct vnode *tvp; + struct componentname *tcnp; + struct thread *td; + + /* rename target vnodes */ + struct vnode *rfdvp; + struct vnode *rfvp; + struct vnode *rtdvp; + struct vnode *rtvp; + + int needrelookup; + struct unionfs_mount *ump; + struct unionfs_node *unp; + + UNIONFS_INTERNAL_DEBUG("unionfs_rename: enter\n"); + + error = 0; + fdvp = ap->a_fdvp; + fvp = ap->a_fvp; + fcnp = ap->a_fcnp; + tdvp = ap->a_tdvp; + tvp = ap->a_tvp; + tcnp = ap->a_tcnp; + td = curthread; + rfdvp = fdvp; + rfvp = fvp; + rtdvp = tdvp; + rtvp = tvp; + needrelookup = 0; + +#ifdef DIAGNOSTIC + if ((fcnp->cn_flags & HASBUF) == 0 || (tcnp->cn_flags & HASBUF) == 0) + panic("unionfs_rename: no name"); +#endif + + /* check for cross device rename */ + if ((fvp->v_mount != tdvp->v_mount) || + (NULLVP != tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; + goto unionfs_rename_abort; + } + + /* Renaming a file to itself has no effect. */ + if (fvp == tvp) + goto unionfs_rename_abort; + + /* + * from/to vnode is unionfs node. + */ + + unp = VTOUNIONFS(fdvp); + if (NULLVP == unp->un_uppervp) { + error = ENODEV; + goto unionfs_rename_abort; + } + rfdvp = unp->un_uppervp; + vref(rfdvp); + + unp = VTOUNIONFS(fvp); + ump = MOUNTTOUNIONFSMOUNT(fvp->v_mount); + if (NULLVP == unp->un_uppervp) { + switch (fvp->v_type) { + case VREG: + if ((error = vn_lock(fvp, LK_EXCLUSIVE, td)) != 0) + goto unionfs_rename_abort; + error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td); + VOP_UNLOCK(fvp, 0, td); + if (error) + goto unionfs_rename_abort; + break; + case VDIR: + if ((error = vn_lock(fvp, LK_EXCLUSIVE, td)) != 0) + goto unionfs_rename_abort; + error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td); + VOP_UNLOCK(fvp, 0, td); + if (error) + goto unionfs_rename_abort; + break; + default: + error = ENODEV; + goto unionfs_rename_abort; + } + + needrelookup = 1; + } + + if (NULLVP != unp->un_lowervp) + fcnp->cn_flags |= DOWHITEOUT; + rfvp = unp->un_uppervp; + vref(rfvp); + + unp = VTOUNIONFS(tdvp); + if (NULLVP == unp->un_uppervp) { + error = ENODEV; + goto unionfs_rename_abort; + } + rtdvp = unp->un_uppervp; + vref(rtdvp); + + if (tdvp == tvp) { + rtvp = rtdvp; + vref(rtvp); + } else if (NULLVP != tvp) { + unp = VTOUNIONFS(tvp); + if (NULLVP == unp->un_uppervp) + rtvp = NULLVP; + else { + if (tvp->v_type == VDIR) { + error = EINVAL; + goto unionfs_rename_abort; + } + rtvp = unp->un_uppervp; + vref(rtvp); + } + } + + if (needrelookup) { + if ((error = vn_lock(fdvp, LK_EXCLUSIVE, td)) != 0) + goto unionfs_rename_abort; + error = unionfs_relookup_for_delete(fdvp, fcnp, td); + VOP_UNLOCK(fdvp, 0, td); + if (error) + goto unionfs_rename_abort; + + /* Locke of tvp is canceled in order to avoid recursive lock. */ + if (NULLVP != tvp && tvp != tdvp) + VOP_UNLOCK(tvp, 0, td); + error = unionfs_relookup_for_rename(tdvp, tcnp, td); + if (NULLVP != tvp && tvp != tdvp) + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td); + if (error) + goto unionfs_rename_abort; + } + + error = VOP_RENAME(rfdvp, rfvp, fcnp, rtdvp, rtvp, tcnp); + + if (fdvp != rfdvp) + vrele(fdvp); + if (fvp != rfvp) + vrele(fvp); + if (tdvp != rtdvp) + vrele(tdvp); + if (tvp != rtvp && NULLVP != tvp) { + if (NULLVP == rtvp) + vput(tvp); + else + vrele(tvp); + } + + UNIONFS_INTERNAL_DEBUG("unionfs_rename: leave (%d)\n", error); + + return (error); + +unionfs_rename_abort: + if (fdvp != rfdvp) + vrele(rfdvp); + if (fvp != rfvp) + vrele(rfvp); + if (tdvp != rtdvp) + vrele(rtdvp); + vput(tdvp); + if (tvp != rtvp && NULLVP != rtvp) + vrele(rtvp); + if (NULLVP != tvp) { + if (tdvp != tvp) + vput(tvp); + else + vrele(tvp); + } + vrele(fdvp); + vrele(fvp); + + UNIONFS_INTERNAL_DEBUG("unionfs_rename: leave (%d)\n", error); + + return (error); +} static int -union_remove(ap) - struct vop_remove_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - } */ *ap; -{ - struct union_node *dun = VTOUNION(ap->a_dvp); - struct union_node *un = VTOUNION(ap->a_vp); - struct componentname *cnp = ap->a_cnp; - struct thread *td = cnp->cn_thread; - struct vnode *uppervp; - struct vnode *upperdvp; - int error; +unionfs_mkdir(struct vop_mkdir_args *ap) +{ + int error; + struct unionfs_node *dunp; + struct componentname *cnp; + struct thread *td; + struct vnode *udvp; + struct vnode *uvp; + struct vattr va; + + UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: enter\n"); + + error = EROFS; + dunp = VTOUNIONFS(ap->a_dvp); + cnp = ap->a_cnp; + td = curthread; + udvp = dunp->un_uppervp; + + if (NULLVP != udvp) { + /* check opaque */ + if (!(cnp->cn_flags & ISWHITEOUT)) { + if ((error = VOP_GETATTR(udvp, &va, cnp->cn_cred, td))) + return (error); + if (va.va_flags & OPAQUE) + cnp->cn_flags |= ISWHITEOUT; /* create with opaque */ + } + + if (!(error = VOP_MKDIR(udvp, &uvp, cnp, ap->a_vap))) { + VOP_UNLOCK(uvp, 0, td); + error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP, + ap->a_dvp, ap->a_vpp, cnp, td); + vrele(uvp); + } + } + + UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: leave (%d)\n", error); + + return (error); +} + +static int +unionfs_rmdir(struct vop_rmdir_args *ap) +{ + int error; + struct unionfs_node *dunp; + struct unionfs_node *unp; + struct componentname *cnp; + struct thread *td; + struct vnode *udvp; + struct vnode *uvp; + struct vnode *lvp; + + UNIONFS_INTERNAL_DEBUG("unionfs_rmdir: enter\n"); + + error = 0; + dunp = VTOUNIONFS(ap->a_dvp); + unp = VTOUNIONFS(ap->a_vp); + cnp = ap->a_cnp; + td = curthread; + udvp = dunp->un_uppervp; + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + + if (NULLVP == udvp) + return (EROFS); + + if (udvp == uvp) + return (EOPNOTSUPP); + + if (NULLVP != uvp) { + if (NULLVP != lvp && + (error = unionfs_check_rmdir(ap->a_vp, cnp->cn_cred, td))) + return (error); + cnp->cn_flags |= DOWHITEOUT; + error = VOP_RMDIR(udvp, uvp, cnp); + } + else if (NULLVP != lvp) + error = unionfs_mkwhiteout(udvp, cnp, td, unp->un_path); - if ((upperdvp = union_lock_upper(dun, td)) == NULLVP) - panic("union remove: null upper vnode"); + UNIONFS_INTERNAL_DEBUG("unionfs_rmdir: leave (%d)\n", error); - if ((uppervp = union_lock_upper(un, td)) != NULLVP) { - if (union_dowhiteout(un, cnp->cn_cred, td)) - cnp->cn_flags |= DOWHITEOUT; - if (cnp->cn_flags & DOWHITEOUT) /* XXX fs corruption */ - error = EOPNOTSUPP; - else - error = VOP_REMOVE(upperdvp, uppervp, cnp); - if (!error) - union_removed_upper(un); - union_unlock_upper(uppervp, td); - } else { - error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), - upperdvp, ap->a_cnp, un->un_path); - } - union_unlock_upper(upperdvp, td); return (error); } -/* - * union_link: - * - * tdvp and vp will be locked on entry. - * tdvp and vp should remain locked on return. - */ - static int -union_link(ap) - struct vop_link_args /* { - struct vnode *a_tdvp; - struct vnode *a_vp; - struct componentname *a_cnp; - } */ *ap; -{ - struct componentname *cnp = ap->a_cnp; - struct thread *td = cnp->cn_thread; - struct union_node *dun = VTOUNION(ap->a_tdvp); - struct vnode *vp; - struct vnode *tdvp; - int error = 0; - - if (ap->a_tdvp->v_op != ap->a_vp->v_op) { - vp = ap->a_vp; - } else { - struct union_node *tun = VTOUNION(ap->a_vp); +unionfs_symlink(struct vop_symlink_args *ap) +{ + int error; + struct unionfs_node *dunp; + struct componentname *cnp; + struct thread *td; + struct vnode *udvp; + struct vnode *uvp; - if (tun->un_uppervp == NULLVP) { -#if 0 - if (dun->un_uppervp == tun->un_dirvp) { - if (dun->un_flags & UN_ULOCK) { - dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(dun->un_uppervp, 0, td); - } - } -#endif - error = union_copyup(tun, 1, cnp->cn_cred, td); -#if 0 - if (dun->un_uppervp == tun->un_dirvp) { - vn_lock(dun->un_uppervp, - LK_EXCLUSIVE | LK_RETRY, td); - dun->un_flags |= UN_ULOCK; - } -#endif - if (error) - return (error); - } - vp = tun->un_uppervp; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - } + UNIONFS_INTERNAL_DEBUG("unionfs_symlink: enter\n"); - /* - * Make sure upper is locked, then unlock the union directory we were - * called with to avoid a deadlock while we are calling VOP_LINK() on - * the upper (with tdvp locked and vp not locked). Our ap->a_tdvp - * is expected to be locked on return. - */ + error = EROFS; + dunp = VTOUNIONFS(ap->a_dvp); + cnp = ap->a_cnp; + td = curthread; + udvp = dunp->un_uppervp; - if ((tdvp = union_lock_upper(dun, td)) == NULLVP) - return (EROFS); + if (NULLVP != udvp) { + if (!(error = VOP_SYMLINK(udvp, &uvp, cnp, ap->a_vap, ap->a_target))) { + VOP_UNLOCK(uvp, 0, td); + error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP, + ap->a_dvp, ap->a_vpp, cnp, td); + vrele(uvp); + } + } - VOP_UNLOCK(ap->a_tdvp, 0, td); /* unlock calling node */ - error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */ + UNIONFS_INTERNAL_DEBUG("unionfs_symlink: leave (%d)\n", error); - /* - * Unlock tun->un_uppervp if we locked it above. - */ - if (ap->a_tdvp->v_op == ap->a_vp->v_op) - VOP_UNLOCK(vp, 0, td); - /* - * We have to unlock tdvp prior to relocking our calling node in - * order to avoid a deadlock. We also have to unlock ap->a_vp - * before relocking the directory, but then we have to relock - * ap->a_vp as our caller expects. - */ - VOP_UNLOCK(ap->a_vp, 0, td); - union_unlock_upper(tdvp, td); - vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, td); - vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td); return (error); } static int -union_rename(ap) - struct vop_rename_args /* { - struct vnode *a_fdvp; - struct vnode *a_fvp; - struct componentname *a_fcnp; - struct vnode *a_tdvp; - struct vnode *a_tvp; - struct componentname *a_tcnp; - } */ *ap; +unionfs_readdir(struct vop_readdir_args *ap) { - int error; - struct vnode *fdvp = ap->a_fdvp; - struct vnode *fvp = ap->a_fvp; - struct vnode *tdvp = ap->a_tdvp; - struct vnode *tvp = ap->a_tvp; + int error; + int eofflag; + struct unionfs_node *unp; + struct uio *uio; + struct vnode *uvp; + struct vnode *lvp; + struct thread *td; + struct vattr va; - /* - * Figure out what fdvp to pass to our upper or lower vnode. If we - * replace the fdvp, release the original one and ref the new one. - */ + int ncookies_bk; + u_long *cookies_bk; - if (fdvp->v_op == &union_vnodeops) { /* always true */ - struct union_node *un = VTOUNION(fdvp); - if (un->un_uppervp == NULLVP) { - /* - * this should never happen in normal - * operation but might if there was - * a problem creating the top-level shadow - * directory. - */ - error = EXDEV; - goto bad; - } - fdvp = un->un_uppervp; - VREF(fdvp); - vrele(ap->a_fdvp); + UNIONFS_INTERNAL_DEBUG("unionfs_readdir: enter\n"); + + error = 0; + eofflag = 0; + unp = VTOUNIONFS(ap->a_vp); + uio = ap->a_uio; + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + td = uio->uio_td; + ncookies_bk = 0; + cookies_bk = NULL; + + if (ap->a_vp->v_type != VDIR) + return (ENOTDIR); + + /* check opaque */ + if (NULLVP != uvp && NULLVP != lvp) { + if ((error = VOP_GETATTR(uvp, &va, ap->a_cred, td))) + return (error); + if (va.va_flags & OPAQUE) + lvp = NULLVP; } - /* - * Figure out what fvp to pass to our upper or lower vnode. If we - * replace the fvp, release the original one and ref the new one. - */ + /* upper only */ + if (NULLVP != uvp && NULLVP == lvp) { + error = VOP_READDIR(uvp, uio, ap->a_cred, ap->a_eofflag, + ap->a_ncookies, ap->a_cookies); - if (fvp->v_op == &union_vnodeops) { /* always true */ - struct union_node *un = VTOUNION(fvp); -#if 0 - struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount); -#endif + UNIONFS_INTERNAL_DEBUG("unionfs_readdir: leave (%d)\n", error); - if (un->un_uppervp == NULLVP) { - switch(fvp->v_type) { - case VREG: - vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread); - error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_thread); - VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_thread); - if (error) - goto bad; - break; - case VDIR: - /* - * XXX not yet. - * - * There is only one way to rename a directory - * based in the lowervp, and that is to copy - * the entire directory hierarchy. Otherwise - * it would not last across a reboot. - */ -#if 0 - vrele(fvp); - fvp = NULL; - vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread); - error = union_mkshadow(um, fdvp, - ap->a_fcnp, &un->un_uppervp); - VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_thread); - if (un->un_uppervp) - VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_thread); - if (error) - goto bad; - break; -#endif - default: - error = EXDEV; - goto bad; - } - } + return (error); + } + + /* lower only */ + if (NULLVP == uvp && NULLVP != lvp) { + error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag, + ap->a_ncookies, ap->a_cookies); - if (un->un_lowervp != NULLVP) - ap->a_fcnp->cn_flags |= DOWHITEOUT; - fvp = un->un_uppervp; - VREF(fvp); - vrele(ap->a_fvp); + UNIONFS_INTERNAL_DEBUG("unionfs_readdir: leave (%d)\n", error); + + return (error); } /* - * Figure out what tdvp (destination directory) to pass to the - * lower level. If we replace it with uppervp, we need to vput the - * old one. The exclusive lock is transfered to what we will pass - * down in the VOP_RENAME() and we replace uppervp with a simple - * reference. + * readdir upper and lower */ + if (0 == uio->uio_offset) + unp->un_readdir_flag = 0; - if (tdvp->v_op == &union_vnodeops) { - struct union_node *un = VTOUNION(tdvp); + if (0 == unp->un_readdir_flag) { + /* read upper */ + error = VOP_READDIR(uvp, uio, ap->a_cred, &eofflag, + ap->a_ncookies, ap->a_cookies); - if (un->un_uppervp == NULLVP) { - /* - * This should never happen in normal - * operation but might if there was - * a problem creating the top-level shadow - * directory. - */ - error = EXDEV; - goto bad; + if (error || !eofflag) { + UNIONFS_INTERNAL_DEBUG("unionfs_readdir: leave (%d)\n", error); + return (error); } + unp->un_readdir_flag = 1; /* - * New tdvp is a lock and reference on uppervp. - * Put away the old tdvp. + * ufs(and other fs) needs size of uio_resid larger than DIRBLKSIZ. + * size of DIRBLKSIZ equals DEV_BSIZE. + * (see: ufs/ufs/ufs_vnops.c ufs_readdir func , ufs/ufs/dir.h) */ - tdvp = union_lock_upper(un, ap->a_tcnp->cn_thread); - vput(ap->a_tdvp); - } - - /* - * Figure out what tvp (destination file) to pass to the - * lower level. - * - * If the uppervp file does not exist, put away the (wrong) - * file and change tvp to NULL. - */ + if (uio->uio_resid <= (uio->uio_resid & (DEV_BSIZE -1))) { + UNIONFS_INTERNAL_DEBUG("unionfs_readdir: leave (%d)\n", error); + return (0); + } - if (tvp != NULLVP && tvp->v_op == &union_vnodeops) { - struct union_node *un = VTOUNION(tvp); + /* + * backup cookies + * It prepares to readdir in lower. + */ + if (ap->a_ncookies) { + ncookies_bk = *(ap->a_ncookies); + *(ap->a_ncookies) = 0; + } + if (ap->a_cookies) { + cookies_bk = *(ap->a_cookies); + *(ap->a_cookies) = NULL; + } + } - tvp = union_lock_upper(un, ap->a_tcnp->cn_thread); - vput(ap->a_tvp); - /* note: tvp may be NULL */ + /* initialize for readdir in lower */ + if (1 == unp->un_readdir_flag) { + unp->un_readdir_flag = 2; + uio->uio_offset = 0; } - /* - * VOP_RENAME() releases/vputs prior to returning, so we have no - * cleanup to do. - */ + /* read lower */ + vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); + error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag, + ap->a_ncookies, ap->a_cookies); + VOP_UNLOCK(lvp, 0, td); - return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); + if (cookies_bk) { + /* merge cookies */ + int size; + u_long *newcookies, *pos; - /* - * Error. We still have to release / vput the various elements. - */ + size = *(ap->a_ncookies) + ncookies_bk; + pos = newcookies = (u_long *) malloc(size * sizeof(u_long), + M_TEMP, M_WAITOK); -bad: - vrele(fdvp); - if (fvp) - vrele(fvp); - vput(tdvp); - if (tvp != NULLVP) { - if (tvp != tdvp) - vput(tvp); - else - vrele(tvp); + memcpy(pos, cookies_bk, ncookies_bk * sizeof(u_long)); + pos += ncookies_bk * sizeof(u_long); + memcpy(pos, *(ap->a_cookies), *(ap->a_ncookies) * sizeof(u_long)); + free(cookies_bk, M_TEMP); + free(*(ap->a_cookies), M_TEMP); + *(ap->a_ncookies) = size; + *(ap->a_cookies) = newcookies; } + + UNIONFS_INTERNAL_DEBUG("unionfs_readdir: leave (%d)\n", error); + return (error); } static int -union_mkdir(ap) - struct vop_mkdir_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; -{ - struct union_node *dun = VTOUNION(ap->a_dvp); - struct componentname *cnp = ap->a_cnp; - struct thread *td = cnp->cn_thread; - struct vnode *upperdvp; - int error = EROFS; +unionfs_readlink(struct vop_readlink_args *ap) +{ + int error; + struct unionfs_node *unp; + struct vnode *vp; - if ((upperdvp = union_lock_upper(dun, td)) != NULLVP) { - struct vnode *vp; + UNIONFS_INTERNAL_DEBUG("unionfs_readlink: enter\n"); - error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap); - union_unlock_upper(upperdvp, td); + unp = VTOUNIONFS(ap->a_vp); + vp = (NULLVP != unp->un_uppervp ? unp->un_uppervp : unp->un_lowervp); + + error = VOP_READLINK(vp, ap->a_uio, ap->a_cred); + + UNIONFS_INTERNAL_DEBUG("unionfs_readlink: leave (%d)\n", error); - if (error == 0) { - VOP_UNLOCK(vp, 0, td); - UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vrefcnt(vp))); - error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, - ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); - UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp))); - } - } return (error); } static int -union_rmdir(ap) - struct vop_rmdir_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - } */ *ap; +unionfs_getwritemount(struct vop_getwritemount_args *ap) { - struct union_node *dun = VTOUNION(ap->a_dvp); - struct union_node *un = VTOUNION(ap->a_vp); - struct componentname *cnp = ap->a_cnp; - struct thread *td = cnp->cn_thread; - struct vnode *upperdvp; - struct vnode *uppervp; - int error; + int error; + struct vnode *uvp; + struct vnode *vp; + + UNIONFS_INTERNAL_DEBUG("unionfs_getwritemount: enter\n"); - if ((upperdvp = union_lock_upper(dun, td)) == NULLVP) - panic("union rmdir: null upper vnode"); + error = 0; + vp = ap->a_vp; - if ((uppervp = union_lock_upper(un, td)) != NULLVP) { - if (union_dowhiteout(un, cnp->cn_cred, td)) - cnp->cn_flags |= DOWHITEOUT; - if (cnp->cn_flags & DOWHITEOUT) /* XXX fs corruption */ + if (NULLVP == vp || (vp->v_mount->mnt_flag & MNT_RDONLY)) + return (EACCES); + + uvp = UNIONFSVPTOUPPERVP(vp); + if (NULLVP == uvp && VREG == vp->v_type) + uvp = UNIONFSVPTOUPPERVP(VTOUNIONFS(vp)->un_dvp); + + if (NULLVP != uvp) + error = VOP_GETWRITEMOUNT(uvp, ap->a_mpp); + else { + VI_LOCK(vp); + if (vp->v_iflag & VI_FREE) error = EOPNOTSUPP; else - error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp); - if (!error) - union_removed_upper(un); - union_unlock_upper(uppervp, td); - } else { - error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), - dun->un_uppervp, ap->a_cnp, un->un_path); + error = EACCES; + VI_UNLOCK(vp); } - union_unlock_upper(upperdvp, td); + + UNIONFS_INTERNAL_DEBUG("unionfs_getwritemount: leave (%d)\n", error); + return (error); } -/* - * union_symlink: - * - * dvp is locked on entry and remains locked on return. a_vpp is garbage - * (unused). - */ - static int -union_symlink(ap) - struct vop_symlink_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - char *a_target; - } */ *ap; -{ - struct union_node *dun = VTOUNION(ap->a_dvp); - struct componentname *cnp = ap->a_cnp; - struct thread *td = cnp->cn_thread; - struct vnode *dvp; - int error = EROFS; - - if ((dvp = union_lock_upper(dun, td)) != NULLVP) { - error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap, - ap->a_target); - union_unlock_upper(dvp, td); - } - return (error); +unionfs_inactive(struct vop_inactive_args *ap) +{ + struct unionfs_node *unp; + + unp = VTOUNIONFS(ap->a_vp); + + if (NULL == unp || !(unp->un_flag & UNIONFS_CACHED)) + vgone(ap->a_vp); + + return (0); } -/* - * union_readdir ()works in concert with getdirentries() and - * readdir(3) to provide a list of entries in the unioned - * directories. getdirentries() is responsible for walking - * down the union stack. readdir(3) is responsible for - * eliminating duplicate names from the returned data stream. - */ static int -union_readdir(ap) - struct vop_readdir_args /* { - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; - int *a_eofflag; - u_long *a_cookies; - int a_ncookies; - } */ *ap; -{ - struct union_node *un = VTOUNION(ap->a_vp); - struct thread *td = ap->a_uio->uio_td; - struct vnode *uvp; - int error = 0; - - if ((uvp = union_lock_upper(un, td)) != NULLVP) { - ap->a_vp = uvp; - error = VOP_READDIR_AP(ap); - union_unlock_upper(uvp, td); - } - return(error); +unionfs_reclaim(struct vop_reclaim_args *ap) +{ + /* UNIONFS_INTERNAL_DEBUG("unionfs_reclaim: enter\n"); */ + + unionfs_hashrem(ap->a_vp, ap->a_td); + + /* UNIONFS_INTERNAL_DEBUG("unionfs_reclaim: leave\n"); */ + + return (0); } static int -union_readlink(ap) - struct vop_readlink_args /* { - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; - } */ *ap; +unionfs_print(struct vop_print_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct uio *uio = ap->a_uio; - struct thread *td = uio->uio_td; - struct vnode *vp; + struct unionfs_node *unp; - vp = union_lock_other(un, td); - KASSERT(vp != NULL, ("union_readlink: backing vnode missing!")); + unp = VTOUNIONFS(ap->a_vp); - ap->a_vp = vp; - error = VOP_READLINK_AP(ap); - union_unlock_other(vp, td); + printf("unionfs_vp=%p, uppervp=%p, lowervp=%p\n", + ap->a_vp, unp->un_uppervp, unp->un_lowervp); + printf("unionfs opencnt: uppervp=%d, lowervp=%d\n", + unp->un_upper_opencnt, unp->un_lower_opencnt); - return (error); + if (NULLVP != unp->un_uppervp) + vprint("unionfs: upper", unp->un_uppervp); + if (NULLVP != unp->un_lowervp) + vprint("unionfs: lower", unp->un_lowervp); + + return (0); } static int -union_getwritemount(ap) - struct vop_getwritemount_args /* { - struct vnode *a_vp; - struct mount **a_mpp; - } */ *ap; +unionfs_lock(struct vop_lock_args *ap) { - struct vnode *vp = ap->a_vp; - struct vnode *uvp = UPPERVP(vp); + int error; + int flags; + struct vnode *vp; + struct vnode *tvp; + struct vnode *uvp; + struct unionfs_node *unp; - if (uvp == NULL) { + flags = ap->a_flags; + vp = ap->a_vp; + + if (LK_RELEASE == (flags & LK_TYPE_MASK)) + return (VOP_UNLOCK(vp, flags, ap->a_td)); + + if (!(flags & LK_INTERLOCK)) VI_LOCK(vp); - if (vp->v_iflag & VI_FREE) { - VI_UNLOCK(vp); - return (EOPNOTSUPP); - } + + unp = VTOUNIONFS(vp); + if (NULL == unp) + goto unionfs_lock_null_vnode; + tvp = (NULLVP != unp->un_uppervp ? unp->un_uppervp : unp->un_lowervp); + if (NULLVP == tvp) + goto unionfs_lock_null_vnode; + + VI_LOCK_FLAGS(tvp, MTX_DUPOK); + flags |= LK_INTERLOCK; + vholdl(tvp); + + VI_UNLOCK(vp); + ap->a_flags &= ~LK_INTERLOCK; + + error = VOP_LOCK(tvp, flags, ap->a_td); + + VI_LOCK(vp); + unp = VTOUNIONFS(vp); + if (NULL == unp && !error) { + VOP_UNLOCK(tvp, 0, ap->a_td); VI_UNLOCK(vp); - return (EACCES); + vdrop(tvp); + return (vop_stdlock(ap)); } - return(VOP_GETWRITEMOUNT(uvp, ap->a_mpp)); -} -/* - * union_inactive: - * - * Called with the vnode locked. We are expected to unlock the vnode. - */ + if (!error && tvp == unp->un_lowervp && + NULLVP != unp->un_uppervp) { + VOP_UNLOCK(tvp, 0, ap->a_td); + uvp = unp->un_uppervp; + VI_LOCK_FLAGS(uvp, MTX_DUPOK); + vholdl(uvp); + VI_UNLOCK(vp); + error = VOP_LOCK(uvp, flags, ap->a_td); + + VI_LOCK(vp); + unp = VTOUNIONFS(vp); + if (NULL == unp && !error) { + VOP_UNLOCK(uvp, 0, ap->a_td); + VI_UNLOCK(vp); + vdrop(uvp); + vdrop(tvp); + return (vop_stdlock(ap)); + } else + VI_UNLOCK(vp); + vdrop(uvp); + } else + VI_UNLOCK(vp); + vdrop(tvp); + + return (error); + +unionfs_lock_null_vnode: + ap->a_flags |= LK_INTERLOCK; + return (vop_stdlock(ap)); +} static int -union_inactive(ap) - struct vop_inactive_args /* { - struct vnode *a_vp; - struct thread *a_td; - } */ *ap; +unionfs_unlock(struct vop_unlock_args *ap) { - struct vnode *vp = ap->a_vp; - struct union_node *un = VTOUNION(vp); + int flags; + int ismtxlock; + struct vnode *vp; + struct vnode *tvp; + struct unionfs_node *unp; - /* - * Do nothing (and _don't_ bypass). - * Wait to vrele lowervp until reclaim, - * so that until then our union_node is in the - * cache and reusable. - * - */ + flags = ap->a_flags; + ismtxlock = 0; + vp = ap->a_vp; - if (un->un_dircache != NULL) - union_dircache_free(un); + if (!mtx_owned(VI_MTX(vp))) { + VI_LOCK(vp); + ismtxlock = 1; + } -#if 0 - if ((un->un_flags & UN_ULOCK) && un->un_uppervp) { - un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp, 0, td); + unp = VTOUNIONFS(vp); + if (NULL == unp) + goto unionfs_unlock_null_vnode; + tvp = (NULLVP != unp->un_uppervp ? unp->un_uppervp : unp->un_lowervp); + if (NULLVP == tvp) + goto unionfs_unlock_null_vnode; + + VI_LOCK_FLAGS(tvp, MTX_DUPOK); + flags |= LK_INTERLOCK; + if (ismtxlock || (ap->a_flags & LK_INTERLOCK)) { + VI_UNLOCK(vp); + ap->a_flags &= ~LK_INTERLOCK; } -#endif - if ((un->un_flags & UN_CACHED) == 0) - vgone(vp); + return (VOP_UNLOCK(tvp, flags, ap->a_td)); - return (0); +unionfs_unlock_null_vnode: + if (ismtxlock) + VI_UNLOCK(vp); + return (vop_stdunlock(ap)); } static int -union_reclaim(ap) - struct vop_reclaim_args /* { - struct vnode *a_vp; - } */ *ap; +unionfs_pathconf(struct vop_pathconf_args *ap) { - union_freevp(ap->a_vp); - - return (0); -} + struct unionfs_node *unp; + struct vnode *vp; -static int -union_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - printf("\tvp=%p, uppervp=%p, lowervp=%p\n", - vp, UPPERVP(vp), LOWERVP(vp)); - if (UPPERVP(vp) != NULLVP) - vprint("union: upper", UPPERVP(vp)); - if (LOWERVP(vp) != NULLVP) - vprint("union: lower", LOWERVP(vp)); + unp = VTOUNIONFS(ap->a_vp); + vp = (NULLVP != unp->un_uppervp ? unp->un_uppervp : unp->un_lowervp); - return (0); + return (VOP_PATHCONF(vp, ap->a_name, ap->a_retval)); } static int -union_pathconf(ap) - struct vop_pathconf_args /* { - struct vnode *a_vp; - int a_name; - int *a_retval; - } */ *ap; +unionfs_advlock(struct vop_advlock_args *ap) { - int error; - struct thread *td = curthread; /* XXX */ - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + int error; + struct unionfs_node *unp; + struct vnode *vp; + struct vnode *uvp; + struct thread *td; - vp = union_lock_other(un, td); - KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!")); + error = 0; + unp = VTOUNIONFS(ap->a_vp); + vp = ap->a_vp; + uvp = unp->un_uppervp; + td = curthread; - ap->a_vp = vp; - error = VOP_PATHCONF_AP(ap); - union_unlock_other(vp, td); + if (VREG != vp->v_type) + return (EOPNOTSUPP); - return (error); -} + UNIONFS_INTERNAL_DEBUG("unionfs_advlock: enter\n"); -static int -union_advlock(ap) - struct vop_advlock_args /* { - struct vnode *a_vp; - caddr_t a_id; - int a_op; - struct flock *a_fl; - int a_flags; - } */ *ap; -{ - register struct vnode *ovp = OTHERVP(ap->a_vp); + if (NULLVP == uvp) { + /* vp is always unlocked. */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - ap->a_vp = ovp; - return (VOP_ADVLOCK_AP(ap)); -} + /* + * In order to guarantee Locke's consistency, + * only the upper file is made applicable to operation. + */ + if (NULLVP == unp->un_uppervp) + error = unionfs_copyfile(unp, 1, td->td_ucred, td); + VOP_UNLOCK(vp, 0, td); + if (error) + goto unionfs_advlock_abort; + uvp = unp->un_uppervp; + } + + error = VOP_ADVLOCK(uvp, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags); + +unionfs_advlock_abort: + UNIONFS_INTERNAL_DEBUG("unionfs_advlock: leave (%d)\n", error); + + return (error); +} -/* - * XXX - vop_strategy must be hand coded because it has no - * YYY - and it is not coherent with anything - * - * vnode in its arguments. - * This goes away with a merged VM/buffer cache. - */ static int -union_strategy(ap) - struct vop_strategy_args /* { - struct vnode *a_vp; - struct buf *a_bp; - } */ *ap; +unionfs_strategy(struct vop_strategy_args *ap) { - struct buf *bp = ap->a_bp; - struct vnode *othervp = OTHERVP(ap->a_vp); + struct unionfs_node *unp; + struct vnode *vp; + + unp = VTOUNIONFS(ap->a_vp); + vp = (NULLVP != unp->un_uppervp ? unp->un_uppervp : unp->un_lowervp); #ifdef DIAGNOSTIC - if (othervp == NULLVP) - panic("union_strategy: nil vp"); - if ((bp->b_iocmd == BIO_WRITE) && - (othervp == LOWERVP(ap->a_vp))) - panic("union_strategy: writing to lowervp"); + if (NULLVP == vp) + panic("unionfs_strategy: nullvp"); + + if (ap->a_bp->b_iocmd == BIO_WRITE && vp == unp->un_lowervp) + panic("unionfs_strategy: writing to lowervp"); #endif - return (VOP_STRATEGY(othervp, bp)); + + return (VOP_STRATEGY(vp, ap->a_bp)); } static int -union_getacl(ap) - struct vop_getacl_args /* { - struct vnode *a_vp; - acl_type_t a_type; - struct acl *a_aclp; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_getacl(struct vop_getacl_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + int error; + struct unionfs_node *unp; + struct vnode *vp; + + unp = VTOUNIONFS(ap->a_vp); + vp = (NULLVP != unp->un_uppervp ? unp->un_uppervp : unp->un_lowervp); - vp = union_lock_other(un, ap->a_td); - ap->a_vp = vp; - error = VOP_GETACL_AP(ap); - union_unlock_other(vp, ap->a_td); + UNIONFS_INTERNAL_DEBUG("unionfs_getacl: enter\n"); + + error = VOP_GETACL(vp, ap->a_type, ap->a_aclp, ap->a_cred, ap->a_td); + + UNIONFS_INTERNAL_DEBUG("unionfs_getacl: leave (%d)\n", error); return (error); } static int -union_setacl(ap) - struct vop_setacl_args /* { - struct vnode *a_vp; - acl_type_t a_type; - struct acl *a_aclp; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_setacl(struct vop_setacl_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + int error; + struct unionfs_node *unp; + struct vnode *uvp; + struct vnode *lvp; + struct thread *td; + + UNIONFS_INTERNAL_DEBUG("unionfs_setacl: enter\n"); + + error = EROFS; + unp = VTOUNIONFS(ap->a_vp); + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + td = ap->a_td; + + if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + + if (NULLVP == uvp && lvp->v_type == VREG) { + if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td))) + return (error); + uvp = unp->un_uppervp; + } + + if (NULLVP != uvp) + error = VOP_SETACL(uvp, ap->a_type, ap->a_aclp, ap->a_cred, td); - vp = union_lock_other(un, ap->a_td); - ap->a_vp = vp; - error = VOP_SETACL_AP(ap); - union_unlock_other(vp, ap->a_td); + UNIONFS_INTERNAL_DEBUG("unionfs_setacl: leave (%d)\n", error); return (error); } static int -union_aclcheck(ap) - struct vop_aclcheck_args /* { - struct vnode *a_vp; - acl_type_t a_type; - struct acl *a_aclp; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_aclcheck(struct vop_aclcheck_args *ap) { - struct vnode *ovp = OTHERVP(ap->a_vp); + int error; + struct unionfs_node *unp; + struct vnode *vp; - ap->a_vp = ovp; - return (VOP_ACLCHECK_AP(ap)); + UNIONFS_INTERNAL_DEBUG("unionfs_aclcheck: enter\n"); + + unp = VTOUNIONFS(ap->a_vp); + vp = (NULLVP != unp->un_uppervp ? unp->un_uppervp : unp->un_lowervp); + + error = VOP_ACLCHECK(vp, ap->a_type, ap->a_aclp, ap->a_cred, ap->a_td); + + UNIONFS_INTERNAL_DEBUG("unionfs_aclcheck: leave (%d)\n", error); + + return (error); } static int -union_closeextattr(ap) - struct vop_closeextattr_args /* { - struct vnode *a_vp; - int a_commit; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_openextattr(struct vop_openextattr_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + int error; + struct unionfs_node *unp; + struct vnode *vp; + + unp = VTOUNIONFS(ap->a_vp); + vp = (NULLVP != unp->un_uppervp ? unp->un_uppervp : unp->un_lowervp); + + if ((vp == unp->un_uppervp && (unp->un_flag & UNIONFS_OPENEXTU)) || + (vp == unp->un_lowervp && (unp->un_flag & UNIONFS_OPENEXTL))) + return (EBUSY); - vp = union_lock_other(un, ap->a_td); - ap->a_vp = vp; - error = VOP_CLOSEEXTATTR_AP(ap); - union_unlock_other(vp, ap->a_td); + error = VOP_OPENEXTATTR(vp, ap->a_cred, ap->a_td); + + if (!error) { + if (vp == unp->un_uppervp) + unp->un_flag |= UNIONFS_OPENEXTU; + else + unp->un_flag |= UNIONFS_OPENEXTL; + } return (error); } static int -union_getextattr(ap) - struct vop_getextattr_args /* { - struct vnode *a_vp; - int a_attrnamespace; - const char *a_name; - struct uio *a_uio; - size_t *a_size; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_closeextattr(struct vop_closeextattr_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + int error; + struct unionfs_node *unp; + struct vnode *vp; + + unp = VTOUNIONFS(ap->a_vp); + vp = NULLVP; + + if (unp->un_flag & UNIONFS_OPENEXTU) + vp = unp->un_uppervp; + else if (unp->un_flag & UNIONFS_OPENEXTL) + vp = unp->un_lowervp; + + if (NULLVP == vp) + return (EOPNOTSUPP); + + error = VOP_CLOSEEXTATTR(vp, ap->a_commit, ap->a_cred, ap->a_td); - vp = union_lock_other(un, ap->a_td); - ap->a_vp = vp; - error = VOP_GETEXTATTR_AP(ap); - union_unlock_other(vp, ap->a_td); + if (!error) { + if (vp == unp->un_uppervp) + unp->un_flag &= ~UNIONFS_OPENEXTU; + else + unp->un_flag &= ~UNIONFS_OPENEXTL; + } return (error); } static int -union_listextattr(ap) - struct vop_listextattr_args /* { - struct vnode *a_vp; - int a_attrnamespace; - struct uio *a_uio; - size_t *a_size; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_getextattr(struct vop_getextattr_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + struct unionfs_node *unp; + struct vnode *vp; - vp = union_lock_other(un, ap->a_td); - ap->a_vp = vp; - error = VOP_LISTEXTATTR_AP(ap); - union_unlock_other(vp, ap->a_td); + unp = VTOUNIONFS(ap->a_vp); + vp = NULLVP; - return (error); + if (unp->un_flag & UNIONFS_OPENEXTU) + vp = unp->un_uppervp; + else if (unp->un_flag & UNIONFS_OPENEXTL) + vp = unp->un_lowervp; + + if (NULLVP == vp) + return (EOPNOTSUPP); + + return (VOP_GETEXTATTR(vp, ap->a_attrnamespace, ap->a_name, + ap->a_uio, ap->a_size, ap->a_cred, ap->a_td)); } static int -union_openextattr(ap) - struct vop_openextattr_args /* { - struct vnode *a_vp; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_setextattr(struct vop_setextattr_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + int error; + struct unionfs_node *unp; + struct vnode *uvp; + struct vnode *lvp; + struct vnode *ovp; + struct ucred *cred; + struct thread *td; + + error = EROFS; + unp = VTOUNIONFS(ap->a_vp); + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + ovp = NULLVP; + cred = ap->a_cred; + td = ap->a_td; + + UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: enter (un_flag=%x)\n", unp->un_flag); + + if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + + if (unp->un_flag & UNIONFS_OPENEXTU) + ovp = unp->un_uppervp; + else if (unp->un_flag & UNIONFS_OPENEXTL) + ovp = unp->un_lowervp; + + if (NULLVP == ovp) + return (EOPNOTSUPP); + + if (ovp == lvp && lvp->v_type == VREG) { + VOP_CLOSEEXTATTR(lvp, 0, cred, td); + if (NULLVP == uvp && + (error = unionfs_copyfile(unp, 1, cred, td))) { +unionfs_setextattr_reopen: + if ((unp->un_flag & UNIONFS_OPENEXTL) && + VOP_OPENEXTATTR(lvp, cred, td)) { +#ifdef DIAGNOSTIC + panic("unionfs: VOP_OPENEXTATTR failed"); +#endif + unp->un_flag &= ~UNIONFS_OPENEXTL; + } + goto unionfs_setextattr_abort; + } + uvp = unp->un_uppervp; + if ((error = VOP_OPENEXTATTR(uvp, cred, td))) + goto unionfs_setextattr_reopen; + unp->un_flag &= ~UNIONFS_OPENEXTL; + unp->un_flag |= UNIONFS_OPENEXTU; + ovp = uvp; + } - vp = union_lock_other(un, ap->a_td); - ap->a_vp = vp; - error = VOP_OPENEXTATTR_AP(ap); - union_unlock_other(vp, ap->a_td); + if (ovp == uvp) + error = VOP_SETEXTATTR(ovp, ap->a_attrnamespace, ap->a_name, + ap->a_uio, cred, td); + +unionfs_setextattr_abort: + UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: leave (%d)\n", error); return (error); } static int -union_deleteextattr(ap) - struct vop_deleteextattr_args /* { - struct vnode *a_vp; - int a_attrnamespace; - const char *a_name; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_listextattr(struct vop_listextattr_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + struct unionfs_node *unp; + struct vnode *vp; - vp = union_lock_other(un, ap->a_td); - ap->a_vp = vp; - error = VOP_DELETEEXTATTR_AP(ap); - union_unlock_other(vp, ap->a_td); + unp = VTOUNIONFS(ap->a_vp); + vp = NULLVP; - return (error); + if (unp->un_flag & UNIONFS_OPENEXTU) + vp = unp->un_uppervp; + else if (unp->un_flag & UNIONFS_OPENEXTL) + vp = unp->un_lowervp; + + if (NULLVP == vp) + return (EOPNOTSUPP); + + return (VOP_LISTEXTATTR(vp, ap->a_attrnamespace, ap->a_uio, + ap->a_size, ap->a_cred, ap->a_td)); } static int -union_setextattr(ap) - struct vop_setextattr_args /* { - struct vnode *a_vp; - int a_attrnamespace; - const char *a_name; - struct uio *a_uio; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_deleteextattr(struct vop_deleteextattr_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + int error; + struct unionfs_node *unp; + struct vnode *uvp; + struct vnode *lvp; + struct vnode *ovp; + struct ucred *cred; + struct thread *td; + + error = EROFS; + unp = VTOUNIONFS(ap->a_vp); + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + ovp = NULLVP; + cred = ap->a_cred; + td = ap->a_td; + + UNIONFS_INTERNAL_DEBUG("unionfs_deleteextattr: enter (un_flag=%x)\n", unp->un_flag); + + if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + + if (unp->un_flag & UNIONFS_OPENEXTU) + ovp = unp->un_uppervp; + else if (unp->un_flag & UNIONFS_OPENEXTL) + ovp = unp->un_lowervp; + + if (NULLVP == ovp) + return (EOPNOTSUPP); + + if (ovp == lvp && lvp->v_type == VREG) { + VOP_CLOSEEXTATTR(lvp, 0, cred, td); + if (NULLVP == uvp && + (error = unionfs_copyfile(unp, 1, cred, td))) { +unionfs_deleteextattr_reopen: + if ((unp->un_flag & UNIONFS_OPENEXTL) && + VOP_OPENEXTATTR(lvp, cred, td)) { +#ifdef DIAGNOSTIC + panic("unionfs: VOP_OPENEXTATTR failed"); +#endif + unp->un_flag &= ~UNIONFS_OPENEXTL; + } + goto unionfs_deleteextattr_abort; + } + uvp = unp->un_uppervp; + if ((error = VOP_OPENEXTATTR(uvp, cred, td))) + goto unionfs_deleteextattr_reopen; + unp->un_flag &= ~UNIONFS_OPENEXTL; + unp->un_flag |= UNIONFS_OPENEXTU; + ovp = uvp; + } + + if (ovp == uvp) + error = VOP_DELETEEXTATTR(ovp, ap->a_attrnamespace, ap->a_name, + ap->a_cred, ap->a_td); - vp = union_lock_other(un, ap->a_td); - ap->a_vp = vp; - error = VOP_SETEXTATTR_AP(ap); - union_unlock_other(vp, ap->a_td); +unionfs_deleteextattr_abort: + UNIONFS_INTERNAL_DEBUG("unionfs_deleteextattr: leave (%d)\n", error); return (error); } static int -union_setlabel(ap) - struct vop_setlabel_args /* { - struct vnode *a_vp; - struct label *a_label; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; +unionfs_setlabel(struct vop_setlabel_args *ap) { - int error; - struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp; + int error; + struct unionfs_node *unp; + struct vnode *uvp; + struct vnode *lvp; + struct thread *td; + + UNIONFS_INTERNAL_DEBUG("unionfs_setlabel: enter\n"); + + error = EROFS; + unp = VTOUNIONFS(ap->a_vp); + uvp = unp->un_uppervp; + lvp = unp->un_lowervp; + td = ap->a_td; - vp = union_lock_other(un, ap->a_td); - ap->a_vp = vp; - error = VOP_SETLABEL_AP(ap); - union_unlock_other(vp, ap->a_td); + if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + + if (NULLVP == uvp && lvp->v_type == VREG) { + if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td))) + return (error); + uvp = unp->un_uppervp; + } + + if (NULLVP != uvp) + error = VOP_SETLABEL(uvp, ap->a_label, ap->a_cred, td); + + UNIONFS_INTERNAL_DEBUG("unionfs_setlabel: leave (%d)\n", error); return (error); } -/* - * Global vfs data structures - */ -struct vop_vector union_vnodeops = { +struct vop_vector unionfs_vnodeops = { .vop_default = &default_vnodeops, - .vop_access = union_access, - .vop_aclcheck = union_aclcheck, - .vop_advlock = union_advlock, + .vop_access = unionfs_access, + .vop_aclcheck = unionfs_aclcheck, + .vop_advlock = unionfs_advlock, .vop_bmap = VOP_EOPNOTSUPP, - .vop_close = union_close, - .vop_closeextattr = union_closeextattr, - .vop_create = union_create, - .vop_deleteextattr = union_deleteextattr, - .vop_fsync = union_fsync, - .vop_getacl = union_getacl, - .vop_getattr = union_getattr, - .vop_getextattr = union_getextattr, - .vop_getwritemount = union_getwritemount, - .vop_inactive = union_inactive, - .vop_ioctl = union_ioctl, - .vop_lease = union_lease, - .vop_link = union_link, - .vop_listextattr = union_listextattr, - .vop_lookup = union_lookup, - .vop_mkdir = union_mkdir, - .vop_mknod = union_mknod, - .vop_open = union_open, - .vop_openextattr = union_openextattr, - .vop_pathconf = union_pathconf, - .vop_poll = union_poll, - .vop_print = union_print, - .vop_read = union_read, - .vop_readdir = union_readdir, - .vop_readlink = union_readlink, - .vop_reclaim = union_reclaim, - .vop_remove = union_remove, - .vop_rename = union_rename, - .vop_rmdir = union_rmdir, - .vop_setacl = union_setacl, - .vop_setattr = union_setattr, - .vop_setextattr = union_setextattr, - .vop_setlabel = union_setlabel, - .vop_strategy = union_strategy, - .vop_symlink = union_symlink, - .vop_whiteout = union_whiteout, - .vop_write = union_write, + .vop_close = unionfs_close, + .vop_closeextattr = unionfs_closeextattr, + .vop_create = unionfs_create, + .vop_deleteextattr = unionfs_deleteextattr, + .vop_fsync = unionfs_fsync, + .vop_getacl = unionfs_getacl, + .vop_getattr = unionfs_getattr, + .vop_getextattr = unionfs_getextattr, + .vop_getwritemount = unionfs_getwritemount, + .vop_inactive = unionfs_inactive, + .vop_ioctl = unionfs_ioctl, + .vop_lease = unionfs_lease, + .vop_link = unionfs_link, + .vop_listextattr = unionfs_listextattr, + .vop_lock = unionfs_lock, + .vop_lookup = unionfs_lookup, + .vop_mkdir = unionfs_mkdir, + .vop_mknod = unionfs_mknod, + .vop_open = unionfs_open, + .vop_openextattr = unionfs_openextattr, + .vop_pathconf = unionfs_pathconf, + .vop_poll = unionfs_poll, + .vop_print = unionfs_print, + .vop_read = unionfs_read, + .vop_readdir = unionfs_readdir, + .vop_readlink = unionfs_readlink, + .vop_reclaim = unionfs_reclaim, + .vop_remove = unionfs_remove, + .vop_rename = unionfs_rename, + .vop_rmdir = unionfs_rmdir, + .vop_setacl = unionfs_setacl, + .vop_setattr = unionfs_setattr, + .vop_setextattr = unionfs_setextattr, + .vop_setlabel = unionfs_setlabel, + .vop_strategy = unionfs_strategy, + .vop_symlink = unionfs_symlink, + .vop_unlock = unionfs_unlock, + .vop_whiteout = unionfs_whiteout, + .vop_write = unionfs_write, };