--- /usr/src/sys/kern/kern_descrip.c 2008-05-25 16:57:43.000000000 +0200 +++ src/sys/kern/kern_descrip.c 2008-05-26 16:23:32.000000000 +0200 @@ -1449,6 +1449,7 @@ * descriptor to the list of open files at that point, otherwise * put it at the front of the list of open files. */ + sx_init(&fp->f_lock, "f_offset lock"); refcount_init(&fp->f_count, 1); if (resultfp) fhold(fp); @@ -2227,6 +2228,7 @@ */ if (fp->f_cdevpriv != NULL) devfs_fpdrop(fp); + sx_destroy(&fp->f_lock); atomic_subtract_int(&openfiles, 1); crfree(fp->f_cred); uma_zfree(file_zone, fp); --- /usr/src/sys/kern/vfs_vnops.c 2008-04-01 21:26:46.000000000 +0200 +++ src/sys/kern/vfs_vnops.c 2008-05-26 16:45:16.000000000 +0200 @@ -511,45 +511,23 @@ ioflag |= IO_NDELAY; if (fp->f_flag & O_DIRECT) ioflag |= IO_DIRECT; + ioflag |= sequential_heuristic(uio, fp); + if ((flags & FOF_OFFSET) == 0) + uio->uio_offset = fp->f_offset; vfslocked = VFS_LOCK_GIANT(vp->v_mount); VOP_LEASE(vp, td, fp->f_cred, LEASE_READ); - /* - * According to McKusick the vn lock was protecting f_offset here. - * It is now protected by the FOFFSET_LOCKED flag. - */ - if ((flags & FOF_OFFSET) == 0) { - mtxp = mtx_pool_find(mtxpool_sleep, fp); - mtx_lock(mtxp); - while(fp->f_vnread_flags & FOFFSET_LOCKED) { - fp->f_vnread_flags |= FOFFSET_LOCK_WAITING; - msleep(&fp->f_vnread_flags, mtxp, PUSER -1, - "vnread offlock", 0); - } - fp->f_vnread_flags |= FOFFSET_LOCKED; - mtx_unlock(mtxp); - vn_lock(vp, LK_SHARED | LK_RETRY); - uio->uio_offset = fp->f_offset; - } else - vn_lock(vp, LK_SHARED | LK_RETRY); - - ioflag |= sequential_heuristic(uio, fp); + vn_lock(vp, LK_SHARED | LK_RETRY); #ifdef MAC error = mac_vnode_check_read(active_cred, fp->f_cred, vp); if (error == 0) #endif error = VOP_READ(vp, uio, ioflag, fp->f_cred); - if ((flags & FOF_OFFSET) == 0) { - fp->f_offset = uio->uio_offset; - mtx_lock(mtxp); - if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING) - wakeup(&fp->f_vnread_flags); - fp->f_vnread_flags = 0; - mtx_unlock(mtxp); - } - fp->f_nextoff = uio->uio_offset; VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); + if ((flags & FOF_OFFSET) == 0) + fp->f_offset = uio->uio_offset; + fp->f_nextoff = uio->uio_offset; return (error); } @@ -571,6 +549,9 @@ KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); + sx_xlock(&fp->f_lock); + if ((flags & FOF_OFFSET) == 0) + uio->uio_offset = fp->f_offset; vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); if (vp->v_type == VREG) @@ -585,28 +566,29 @@ if ((fp->f_flag & O_FSYNC) || (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) ioflag |= IO_SYNC; + ioflag |= sequential_heuristic(uio, fp); mp = NULL; if (vp->v_type != VCHR && - (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + VFS_UNLOCK_GIANT(vfslocked); goto unlock; + } VOP_LEASE(vp, td, fp->f_cred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - if ((flags & FOF_OFFSET) == 0) - uio->uio_offset = fp->f_offset; - ioflag |= sequential_heuristic(uio, fp); #ifdef MAC error = mac_vnode_check_write(active_cred, fp->f_cred, vp); if (error == 0) #endif error = VOP_WRITE(vp, uio, ioflag, fp->f_cred); - if ((flags & FOF_OFFSET) == 0) - fp->f_offset = uio->uio_offset; - fp->f_nextoff = uio->uio_offset; VOP_UNLOCK(vp, 0); if (vp->v_type != VCHR) vn_finished_write(mp); -unlock: VFS_UNLOCK_GIANT(vfslocked); + if ((flags & FOF_OFFSET) == 0) + fp->f_offset = uio->uio_offset; + fp->f_nextoff = uio->uio_offset; +unlock: + sx_xunlock(&fp->f_lock); return (error); } --- /usr/src/sys/kern/vfs_syscalls.c 2008-05-26 02:29:28.000000000 +0200 +++ src/sys/kern/vfs_syscalls.c 2008-05-26 16:23:32.000000000 +0200 @@ -1909,7 +1909,7 @@ struct file *fp; struct vnode *vp; struct vattr vattr; - off_t offset; + off_t offset, toff; int error, noneg; int vfslocked; @@ -1923,15 +1923,15 @@ vfslocked = VFS_LOCK_GIANT(vp->v_mount); noneg = (vp->v_type != VCHR); offset = uap->offset; + toff = fp->f_offset; switch (uap->whence) { case L_INCR: if (noneg && - (fp->f_offset < 0 || - (offset > 0 && fp->f_offset > OFF_MAX - offset))) { + (toff < 0 || (offset > 0 && toff > OFF_MAX - offset))) { error = EOVERFLOW; break; } - offset += fp->f_offset; + offset += toff; break; case L_XTND: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); @@ -1963,7 +1963,7 @@ if (error != 0) goto drop; fp->f_offset = offset; - *(off_t *)(td->td_retval) = fp->f_offset; + *(off_t *)(td->td_retval) = offset; drop: fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); --- /usr/src/sys/sys/file.h 2008-05-25 16:57:43.000000000 +0200 +++ src/sys/sys/file.h 2008-05-26 16:39:59.000000000 +0200 @@ -42,6 +42,7 @@ #include #include #include +#include struct stat; struct thread; @@ -104,39 +105,36 @@ * * Below is the list of locks that protects members in struct file. * - * (f) protected with mtx_lock(mtx_pool_find(fp)) * (d) cdevpriv_mtx + * (o) f_lock for write accesses * none not locked */ struct file { + struct sx f_lock; /* f_offset writes lock. */ void *f_data; /* file descriptor specific data */ struct fileops *f_ops; /* File operations */ struct ucred *f_cred; /* associated credentials. */ struct vnode *f_vnode; /* NULL or applicable vnode */ short f_type; /* descriptor type */ - short f_vnread_flags; /* (f) Sleep lock for f_offset */ volatile u_int f_flag; /* see fcntl.h */ volatile u_int f_count; /* reference count */ /* * DTYPE_VNODE specific fields. */ - int f_seqcount; /* Count of sequential accesses. */ - off_t f_nextoff; /* next expected read/write offset. */ + int f_seqcount; /* (o) Count of sequential accesses. */ + off_t f_nextoff; /* (o) Next expected r/w offset. */ struct cdev_privdata *f_cdevpriv; /* (d) Private data for the cdev. */ /* * DFLAG_SEEKABLE specific fields */ - off_t f_offset; + off_t f_offset; /* (o) Moving offset within the file. */ /* * Mandatory Access control information. */ void *f_label; /* Place-holder for MAC label. */ }; -#define FOFFSET_LOCKED 0x1 -#define FOFFSET_LOCK_WAITING 0x2 - #endif /* _KERNEL */ /*