Index: kern_descrip.c =================================================================== RCS file: /usr/home/ncvs/src/sys/kern/kern_descrip.c,v retrieving revision 1.311 diff -u -r1.311 kern_descrip.c --- kern_descrip.c 16 Jun 2007 23:41:43 -0000 1.311 +++ kern_descrip.c 2 Jul 2007 19:35:33 -0000 @@ -341,6 +341,18 @@ return (error); } +static inline struct file * +fdtofp(int fd, struct filedesc *fdp) +{ + struct file *fp; + + FILEDESC_LOCK_ASSERT(fdp); + if ((unsigned)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) + return (NULL); + return (fp); +} + int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) { @@ -352,46 +364,23 @@ struct vnode *vp; u_int newmin; int error, flg, tmp; - int giant_locked; - - /* - * XXXRW: Some fcntl() calls require Giant -- others don't. Try to - * avoid grabbing Giant for calls we know don't need it. - */ - switch (cmd) { - case F_DUPFD: - case F_GETFD: - case F_SETFD: - case F_GETFL: - giant_locked = 0; - break; - - default: - giant_locked = 1; - mtx_lock(&Giant); - } + int vfslocked; + vfslocked = 0; error = 0; flg = F_POSIX; p = td->td_proc; fdp = p->p_fd; - /* - * XXXRW: It could be an exclusive lock is not [always] needed here. - */ - FILEDESC_XLOCK(fdp); - if ((unsigned)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) { - FILEDESC_XUNLOCK(fdp); - error = EBADF; - goto done2; - } - pop = &fdp->fd_ofileflags[fd]; - switch (cmd) { case F_DUPFD: - /* mtx_assert(&Giant, MA_NOTOWNED); */ - FILEDESC_XUNLOCK(fdp); + FILEDESC_SLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } + FILEDESC_SUNLOCK(fdp); newmin = arg; PROC_LOCK(p); if (newmin >= lim_cur(p, RLIMIT_NOFILE) || @@ -405,34 +394,56 @@ break; case F_GETFD: - /* mtx_assert(&Giant, MA_NOTOWNED); */ + FILEDESC_SLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } + pop = &fdp->fd_ofileflags[fd]; td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); break; case F_SETFD: - /* mtx_assert(&Giant, MA_NOTOWNED); */ + FILEDESC_XLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_XUNLOCK(fdp); + error = EBADF; + break; + } + pop = &fdp->fd_ofileflags[fd]; *pop = (*pop &~ UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); FILEDESC_XUNLOCK(fdp); break; case F_GETFL: - /* mtx_assert(&Giant, MA_NOTOWNED); */ + FILEDESC_SLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } FILE_LOCK(fp); td->td_retval[0] = OFLAGS(fp->f_flag); FILE_UNLOCK(fp); - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); break; case F_SETFL: - mtx_assert(&Giant, MA_OWNED); + FILEDESC_SLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } FILE_LOCK(fp); fhold_locked(fp); fp->f_flag &= ~FCNTLFLAGS; fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; FILE_UNLOCK(fp); - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); if (error) { @@ -454,9 +465,14 @@ break; case F_GETOWN: - mtx_assert(&Giant, MA_OWNED); + FILEDESC_SLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } fhold(fp); - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); if (error == 0) td->td_retval[0] = tmp; @@ -464,33 +480,41 @@ break; case F_SETOWN: - mtx_assert(&Giant, MA_OWNED); + FILEDESC_SLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } fhold(fp); - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); tmp = arg; error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); fdrop(fp, td); break; case F_SETLKW: - mtx_assert(&Giant, MA_OWNED); flg |= F_WAIT; /* FALLTHROUGH F_SETLK */ case F_SETLK: - mtx_assert(&Giant, MA_OWNED); + FILEDESC_SLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); error = EBADF; break; } - flp = (struct flock *)arg; if (flp->l_whence == SEEK_CUR) { if (fp->f_offset < 0 || (flp->l_start > 0 && fp->f_offset > OFF_MAX - flp->l_start)) { - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; break; } @@ -501,9 +525,9 @@ * VOP_ADVLOCK() may block. */ fhold(fp); - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; - + vfslocked = VFS_LOCK_GIANT(vp->v_mount); switch (flp->l_type) { case F_RDLCK: if ((fp->f_flag & FREAD) == 0) { @@ -535,33 +559,43 @@ error = EINVAL; break; } + VFS_UNLOCK_GIANT(vfslocked); + vfslocked = 0; /* Check for race with close */ - FILEDESC_XLOCK(fdp); + FILEDESC_SLOCK(fdp); if ((unsigned) fd >= fdp->fd_nfiles || fp != fdp->fd_ofiles[fd]) { - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); flp->l_whence = SEEK_SET; flp->l_start = 0; flp->l_len = 0; flp->l_type = F_UNLCK; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, F_POSIX); + VFS_UNLOCK_GIANT(vfslocked); + vfslocked = 0; } else - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); fdrop(fp, td); break; case F_GETLK: - mtx_assert(&Giant, MA_OWNED); + FILEDESC_SLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); error = EBADF; break; } flp = (struct flock *)arg; if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && flp->l_type != F_UNLCK) { - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); error = EINVAL; break; } @@ -570,7 +604,7 @@ fp->f_offset > OFF_MAX - flp->l_start) || (flp->l_start < 0 && fp->f_offset < OFF_MIN - flp->l_start)) { - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; break; } @@ -580,20 +614,20 @@ * VOP_ADVLOCK() may block. */ fhold(fp); - FILEDESC_XUNLOCK(fdp); + FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, F_POSIX); + VFS_UNLOCK_GIANT(vfslocked); + vfslocked = 0; fdrop(fp, td); break; default: - FILEDESC_XUNLOCK(fdp); error = EINVAL; break; } -done2: - if (giant_locked) - mtx_unlock(&Giant); + VFS_UNLOCK_GIANT(vfslocked); return (error); } @@ -2174,6 +2208,7 @@ struct file *fp; struct vnode *vp; struct flock lf; + int vfslocked; int error; if ((error = fget(td, uap->fd, &fp)) != 0) @@ -2183,8 +2218,8 @@ return (EOPNOTSUPP); } - mtx_lock(&Giant); vp = fp->f_vnode; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; @@ -2211,7 +2246,7 @@ (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); done2: fdrop(fp, td); - mtx_unlock(&Giant); + VFS_UNLOCK_GIANT(vfslocked); return (error); } /* Index: kern_lockf.c =================================================================== RCS file: /usr/home/ncvs/src/sys/kern/kern_lockf.c,v retrieving revision 1.55 diff -u -r1.55 kern_lockf.c --- kern_lockf.c 5 Jun 2007 00:00:54 -0000 1.55 +++ kern_lockf.c 2 Jul 2007 19:35:33 -0000 @@ -73,14 +73,14 @@ #define NOLOCKF (struct lockf *)0 #define SELF 0x1 #define OTHERS 0x2 -static int lf_clearlock(struct lockf *); +static int lf_clearlock(struct lockf *, struct lockf **); static int lf_findoverlap(struct lockf *, struct lockf *, int, struct lockf ***, struct lockf **); static struct lockf * lf_getblock(struct lockf *); static int lf_getlock(struct lockf *, struct flock *); -static int lf_setlock(struct lockf *); -static void lf_split(struct lockf *, struct lockf *); +static int lf_setlock(struct lockf *, struct vnode *, struct lockf **); +static void lf_split(struct lockf *, struct lockf *, struct lockf **); static void lf_wakelock(struct lockf *); #ifdef LOCKF_DEBUG static void lf_print(char *, struct lockf *); @@ -102,12 +102,13 @@ struct lockf **head; u_quad_t size; { - register struct flock *fl = ap->a_fl; - register struct lockf *lock; + struct flock *fl = ap->a_fl; + struct lockf *lock; + struct vnode *vp = ap->a_vp; off_t start, end, oadd; + struct lockf *split; int error; - mtx_lock(&Giant); /* * Convert the flock structure into a start and end. */ @@ -124,40 +125,29 @@ case SEEK_END: if (size > OFF_MAX || - (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) { - error = EOVERFLOW; - goto out; - } + (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) + return (EOVERFLOW); start = size + fl->l_start; break; default: - error = EINVAL; - goto out; - } - if (start < 0) { - error = EINVAL; - goto out; + return (EINVAL); } + if (start < 0) + return (EINVAL); if (fl->l_len < 0) { - if (start == 0) { - error = EINVAL; - goto out; - } + if (start == 0) + return (EINVAL); end = start - 1; start += fl->l_len; - if (start < 0) { - error = EINVAL; - goto out; - } + if (start < 0) + return (EINVAL); } else if (fl->l_len == 0) end = -1; else { oadd = fl->l_len - 1; - if (oadd > OFF_MAX - start) { - error = EOVERFLOW; - goto out; - } + if (oadd > OFF_MAX - start) + return (EOVERFLOW); end = start + oadd; } /* @@ -166,11 +156,16 @@ if (*head == (struct lockf *)0) { if (ap->a_op != F_SETLK) { fl->l_type = F_UNLCK; - error = 0; - goto out; + return (0); } } /* + * Allocate a spare structure in case we have to split. + */ + split = NULL; + if (ap->a_op == F_SETLK || ap->a_op == F_UNLCK) + MALLOC(split, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK); + /* * Create the lockf structure */ MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK); @@ -192,29 +187,30 @@ /* * Do the requested operation. */ + VI_LOCK(vp); switch(ap->a_op) { case F_SETLK: - error = lf_setlock(lock); - goto out; + error = lf_setlock(lock, vp, &split); + break; case F_UNLCK: - error = lf_clearlock(lock); + error = lf_clearlock(lock, &split); FREE(lock, M_LOCKF); - goto out; + break; case F_GETLK: error = lf_getlock(lock, fl); FREE(lock, M_LOCKF); - goto out; + break; default: free(lock, M_LOCKF); error = EINVAL; - goto out; + break; } - /* NOTREACHED */ -out: - mtx_unlock(&Giant); + VI_UNLOCK(vp); + if (split) + FREE(split, M_LOCKF); return (error); } @@ -222,10 +218,12 @@ * Set a byte-range lock. */ static int -lf_setlock(lock) - register struct lockf *lock; +lf_setlock(lock, vp, split) + struct lockf *lock; + struct vnode *vp; + struct lockf **split; { - register struct lockf *block; + struct lockf *block; struct lockf **head = lock->lf_head; struct lockf **prev, *overlap, *ltmp; static char lockstr[] = "lockf"; @@ -310,7 +308,7 @@ if ((lock->lf_flags & F_FLOCK) && lock->lf_type == F_WRLCK) { lock->lf_type = F_UNLCK; - (void) lf_clearlock(lock); + (void) lf_clearlock(lock, split); lock->lf_type = F_WRLCK; } /* @@ -325,7 +323,7 @@ lf_printlist("lf_setlock", block); } #endif /* LOCKF_DEBUG */ - error = tsleep(lock, priority, lockstr, 0); + error = msleep(lock, VI_MTX(vp), priority, lockstr, 0); /* * We may have been awakened by a signal and/or by a * debugger continuing us (in which cases we must remove @@ -402,7 +400,7 @@ lock->lf_next = overlap; overlap->lf_start = lock->lf_end + 1; } else - lf_split(overlap, lock); + lf_split(overlap, lock, split); lf_wakelock(overlap); break; @@ -479,8 +477,9 @@ * and remove it (or shrink it), then wakeup anyone we can. */ static int -lf_clearlock(unlock) - register struct lockf *unlock; +lf_clearlock(unlock, split) + struct lockf *unlock; + struct lockf **split; { struct lockf **head = unlock->lf_head; register struct lockf *lf = *head; @@ -514,7 +513,7 @@ overlap->lf_start = unlock->lf_end + 1; break; } - lf_split(overlap, unlock); + lf_split(overlap, unlock, split); overlap->lf_next = unlock->lf_next; break; @@ -722,11 +721,12 @@ * two or three locks as necessary. */ static void -lf_split(lock1, lock2) - register struct lockf *lock1; - register struct lockf *lock2; +lf_split(lock1, lock2, split) + struct lockf *lock1; + struct lockf *lock2; + struct lockf **split; { - register struct lockf *splitlock; + struct lockf *splitlock; #ifdef LOCKF_DEBUG if (lockf_debug & 2) { @@ -750,9 +750,11 @@ } /* * Make a new lock consisting of the last part of - * the encompassing lock + * the encompassing lock. We use the preallocated + * splitlock so we don't have to block. */ - MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK); + splitlock = *split; + *split = NULL; bcopy(lock1, splitlock, sizeof *splitlock); splitlock->lf_start = lock2->lf_end + 1; TAILQ_INIT(&splitlock->lf_blkhd);