diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c index 0791d2e..d75559f 100644 --- a/sys/fs/unionfs/union_subr.c +++ b/sys/fs/unionfs/union_subr.c @@ -947,7 +947,9 @@ unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp, vput(vp); goto unionfs_vn_create_on_upper_free_out1; } + VI_LOCK(vp); vp->v_writecount++; + VI_UNLOCK(vp); *vpp = vp; unionfs_vn_create_on_upper_free_out1: @@ -1082,7 +1084,9 @@ unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred, } } VOP_CLOSE(uvp, FWRITE, cred, td); + VI_LOCK(uvp); uvp->v_writecount--; + VI_UNLOCK(uvp); vn_finished_write(mp); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index d92555f..2801334 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -2423,6 +2423,19 @@ loop: * vnodes open for writing. */ if (flags & WRITECLOSE) { + if (vp->v_object != NULL) { + VM_OBJECT_LOCK(vp->v_object); + vm_object_page_clean(vp->v_object, 0, 0, 0); + VM_OBJECT_UNLOCK(vp->v_object); + } + error = VOP_FSYNC(vp, MNT_WAIT, td); + if (error != 0) { + VOP_UNLOCK(vp, 0); + vdrop(vp); + MNT_VNODE_FOREACH_ABORT(mp, mvp); + return (error); + } + error = VOP_GETATTR(vp, &vattr, td->td_ucred); VI_LOCK(vp); diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 0a8ef46..33c1c3a 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -4470,16 +4470,22 @@ fhopen(td, uap) if (error) goto bad; - if (fmode & FWRITE) + if (fmode & FWRITE) { + VI_LOCK(vp); vp->v_writecount++; + VI_UNLOCK(vp); + } /* * end of vn_open code */ if ((error = falloc(td, &nfp, &indx)) != 0) { - if (fmode & FWRITE) + if (fmode & FWRITE) { + VI_LOCK(vp); vp->v_writecount--; + VI_UNLOCK(vp); + } goto bad; } /* An extra reference on `nfp' has been held for us by falloc(). */ diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 03e8d93..eaa115d 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -234,8 +234,11 @@ restart: if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0) goto bad; - if (fmode & FWRITE) + if (fmode & FWRITE) { + VI_LOCK(vp); vp->v_writecount++; + VI_UNLOCK(vp); + } *flagp = fmode; ASSERT_VOP_LOCKED(vp, "vn_open_cred"); if (!mpsafe) @@ -297,7 +300,9 @@ vn_close(vp, flags, file_cred, td) if (flags & FWRITE) { VNASSERT(vp->v_writecount > 0, vp, ("vn_close: negative writecount")); + VI_LOCK(vp); vp->v_writecount--; + VI_UNLOCK(vp); } error = VOP_CLOSE(vp, flags, file_cred, td); vput(vp); diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index b38c1d0..b2e47ce 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -155,7 +155,12 @@ struct vnode { int v_usecount; /* i ref count of users */ u_long v_iflag; /* i vnode flags (see below) */ u_long v_vflag; /* v vnode flags */ - int v_writecount; /* v ref count of writers */ + int v_writecount; /* i/v ref count of writers */ + /* + * v_writecount is protected by vnode interlock. When + * v_writecount is decremented from 1 to 0, or + * incremented from 0 to 1, vnode lock must be held. + */ /* * The machinery of being a vnode diff --git a/sys/ufs/ufs/ufs_extattr.c b/sys/ufs/ufs/ufs_extattr.c index 032d9cc..e40f379 100644 --- a/sys/ufs/ufs/ufs_extattr.c +++ b/sys/ufs/ufs/ufs_extattr.c @@ -334,7 +334,9 @@ ufs_extattr_enable_with_open(struct ufsmount *ump, struct vnode *vp, return (error); } + VI_LOCK(vp); vp->v_writecount++; + VI_UNLOCK(vp); vref(vp); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 414d4e6..026843d 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -458,13 +459,20 @@ _vm_map_lock(vm_map_t map, const char *file, int line) } void -_vm_map_unlock(vm_map_t map, const char *file, int line) +_vm_map_unlock(vm_map_t map, boolean_t process_freelist, const char *file, + int line) { vm_map_entry_t free_entry, entry; vm_object_t object; + struct vnode *vp; + struct mount *mp; + int vfslocked; - free_entry = map->deferred_freelist; - map->deferred_freelist = NULL; + if (process_freelist) { + free_entry = map->deferred_freelist; + map->deferred_freelist = NULL; + } else + free_entry = NULL; if (map->system_map) _mtx_unlock_flags(&map->system_mtx, 0, file, line); @@ -475,9 +483,30 @@ _vm_map_unlock(vm_map_t map, const char *file, int line) entry = free_entry; free_entry = free_entry->next; - if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { - object = entry->object.vm_object; + if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && + (object = entry->object.vm_object) != NULL) { + mp = NULL; + vfslocked = 0; + VM_OBJECT_LOCK(object); + if (object->type == OBJT_VNODE && + entry->eflags & MAP_ENTRY_VN_WRITECNT) { + vp = object->handle; + vhold(vp); + VM_OBJECT_UNLOCK(object); + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vn_start_write(vp, &mp, V_WAIT); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + VI_LOCK(vp); + vp->v_writecount--; + VI_UNLOCK(vp); + VOP_UNLOCK(vp, 0); + vdrop(vp); + } else + VM_OBJECT_UNLOCK(object); vm_object_deallocate(object); + if (mp != NULL) + vn_finished_write(mp); + VFS_UNLOCK_GIANT(vfslocked); } vm_map_entry_dispose(map, entry); @@ -634,7 +663,7 @@ vm_map_unlock_and_wait(vm_map_t map, int timo) { mtx_lock(&map_sleep_mtx); - vm_map_unlock(map); + _vm_map_unlock(map, FALSE, LOCK_FILE, LOCK_LINE); return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", timo)); } @@ -1080,6 +1109,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_map_entry_t prev_entry; vm_map_entry_t temp_entry; vm_eflags_t protoeflags; + struct vnode *vp; struct uidinfo *uip; boolean_t charge_prev_obj; @@ -1238,6 +1268,16 @@ charged: */ vm_map_entry_link(map, prev_entry, new_entry); map->size += new_entry->end - new_entry->start; + if (object != NULL && object->type == OBJT_VNODE && + (new_entry->max_protection & VM_PROT_WRITE) != 0 && + (cow & MAP_COPY_ON_WRITE) == 0) { + vp = object->handle; + ASSERT_VOP_ELOCKED(vp, "vm_map_insert inc v_writecount"); + VI_LOCK(vp); + vp->v_writecount++; + VI_UNLOCK(vp); + new_entry->eflags |= MAP_ENTRY_VN_WRITECNT; + } #if 0 /* @@ -1449,20 +1489,10 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) if (entry->prev != &map->header) vm_map_entry_resize_free(map, entry->prev); - /* - * If the backing object is a vnode object, - * vm_object_deallocate() calls vrele(). - * However, vrele() does not lock the vnode - * because the vnode has additional - * references. Thus, the map lock can be kept - * without causing a lock-order reversal with - * the vnode lock. - */ - if (prev->object.vm_object) - vm_object_deallocate(prev->object.vm_object); + prev->next = map->deferred_freelist; + map->deferred_freelist = prev; if (prev->uip != NULL) uifree(prev->uip); - vm_map_entry_dispose(map, prev); } } @@ -1483,14 +1513,10 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) entry->end = next->end; vm_map_entry_resize_free(map, entry); - /* - * See comment above. - */ - if (next->object.vm_object) - vm_object_deallocate(next->object.vm_object); + next->next = map->deferred_freelist; + map->deferred_freelist = next; if (next->uip != NULL) uifree(next->uip); - vm_map_entry_dispose(map, next); } } } @@ -1515,6 +1541,8 @@ static void _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) { vm_map_entry_t new_entry; + vm_object_t object; + struct vnode *vp; VM_MAP_ASSERT_LOCKED(map); @@ -1533,7 +1561,6 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) * put this improvement. */ if (entry->object.vm_object == NULL && !map->system_map) { - vm_object_t object; object = vm_object_allocate(OBJT_DEFAULT, atop(entry->end - entry->start)); entry->object.vm_object = object; @@ -1567,7 +1594,16 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) vm_map_entry_link(map, entry->prev, new_entry); if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { - vm_object_reference(new_entry->object.vm_object); + object = new_entry->object.vm_object; + vm_object_reference(object); + if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) { + vp = object->handle; + VI_LOCK(vp); + KASSERT(vp->v_writecount > 0, + ("vm_map_clip_start: v_writecnt")); + vp->v_writecount++; + VI_UNLOCK(vp); + } } } @@ -1592,6 +1628,8 @@ static void _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) { vm_map_entry_t new_entry; + vm_object_t object; + struct vnode *vp; VM_MAP_ASSERT_LOCKED(map); @@ -1603,7 +1641,6 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) * put this improvement. */ if (entry->object.vm_object == NULL && !map->system_map) { - vm_object_t object; object = vm_object_allocate(OBJT_DEFAULT, atop(entry->end - entry->start)); entry->object.vm_object = object; @@ -1639,7 +1676,16 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) vm_map_entry_link(map, entry, new_entry); if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { - vm_object_reference(new_entry->object.vm_object); + object = new_entry->object.vm_object; + vm_object_reference(object); + if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) { + vp = object->handle; + VI_LOCK(vp); + KASSERT(vp->v_writecount > 0, + ("vm_map_clip_end: v_writecnt")); + vp->v_writecount++; + VI_UNLOCK(vp); + } } } @@ -2836,6 +2882,7 @@ vm_map_copy_entry( vm_ooffset_t *fork_charge) { vm_object_t src_object; + vm_map_entry_t fake_entry; vm_offset_t size; struct uidinfo *uip; int charged; @@ -2901,6 +2948,27 @@ vm_map_copy_entry( src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); dst_entry->offset = src_entry->offset; + if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) { + /* + * MAP_ENTRY_VN_WRITECNT cannot + * indicate write reference from + * src_entry, since the entry is + * marked as needs copy. + * Allocate a fake entry that is used + * to decrement v_writecount at the + * appropriate time. + * Attach fake_entry to the dst_map, + * since it is unlocked last in + * vmspace_fork(). + */ + fake_entry = vm_map_entry_create(dst_map); + fake_entry->eflags = MAP_ENTRY_VN_WRITECNT; + src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT; + vm_object_reference(src_object); + fake_entry->object.vm_object = src_object; + fake_entry->next = dst_map->deferred_freelist; + dst_map->deferred_freelist = fake_entry; + } } else { dst_entry->object.vm_object = NULL; dst_entry->offset = 0; @@ -2974,6 +3042,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) vm_map_entry_t old_entry; vm_map_entry_t new_entry; vm_object_t object; + struct vnode *vp; int locked; vm_map_lock(old_map); @@ -3056,6 +3125,14 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION); new_entry->wired_count = 0; + if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) { + vp = new_entry->object.vm_object->handle; + VI_LOCK(vp); + KASSERT(vp->v_writecount > 0, + ("vmspace_fork: v_writecount")); + vp->v_writecount++; + VI_UNLOCK(vp); + } /* * Insert the entry into the new map -- we know we're @@ -3080,8 +3157,11 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) */ new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; + /* + * Copied entry is COW over the old object. + */ new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | - MAP_ENTRY_IN_TRANSITION); + MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT); new_entry->wired_count = 0; new_entry->object.vm_object = NULL; new_entry->uip = NULL; diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 0c586ab..5f81d36 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -139,6 +139,7 @@ struct vm_map_entry { #define MAP_ENTRY_GROWS_UP 0x2000 /* Bottom-up stacks */ #define MAP_ENTRY_WIRE_SKIPPED 0x4000 +#define MAP_ENTRY_VN_WRITECNT 0x8000 /* writeable vnode mapping */ #ifdef _KERNEL static __inline u_char @@ -266,7 +267,8 @@ vmspace_pmap(struct vmspace *vmspace) */ void _vm_map_lock(vm_map_t map, const char *file, int line); -void _vm_map_unlock(vm_map_t map, const char *file, int line); +void _vm_map_unlock(vm_map_t map, boolean_t process_freelist, const char *file, + int line); void _vm_map_lock_read(vm_map_t map, const char *file, int line); void _vm_map_unlock_read(vm_map_t map, const char *file, int line); int _vm_map_trylock(vm_map_t map, const char *file, int line); @@ -278,7 +280,7 @@ int vm_map_unlock_and_wait(vm_map_t map, int timo); void vm_map_wakeup(vm_map_t map); #define vm_map_lock(map) _vm_map_lock(map, LOCK_FILE, LOCK_LINE) -#define vm_map_unlock(map) _vm_map_unlock(map, LOCK_FILE, LOCK_LINE) +#define vm_map_unlock(map) _vm_map_unlock(map, TRUE, LOCK_FILE, LOCK_LINE) #define vm_map_lock_read(map) _vm_map_lock_read(map, LOCK_FILE, LOCK_LINE) #define vm_map_unlock_read(map) _vm_map_unlock_read(map, LOCK_FILE, LOCK_LINE) #define vm_map_trylock(map) _vm_map_trylock(map, LOCK_FILE, LOCK_LINE) diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 4963a60..9d17adc 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -117,7 +117,8 @@ vmmapentry_rsrc_init(dummy) } static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, - int *, struct vnode *, vm_ooffset_t *, vm_object_t *); + int *, struct vnode *, vm_ooffset_t *, vm_object_t *, struct vnode **, + int *); static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, struct cdev *, vm_ooffset_t *, vm_object_t *); static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, @@ -1143,25 +1144,36 @@ munlock(td, uap) * * Helper function for vm_mmap. Perform sanity check specific for mmap * operations on vnodes. + * + * For regular files, function returns with *rvp locked, since + * vm_map_insert shall bump v_writecount for shared writable mappings. + * + * For VCHR vnodes, lock is held over call to vm_mmap_cdev() to keep + * vp->v_rdev valid. */ int vm_mmap_vnode(struct thread *td, vm_size_t objsize, vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, - struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp) + struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, + struct vnode **rvp, int *vfslocked) { struct vattr va; vm_object_t obj; vm_offset_t foff; struct mount *mp; struct ucred *cred; - int error, flags; - int vfslocked; + int error, flags, locktype; mp = vp->v_mount; cred = td->td_ucred; - vfslocked = VFS_LOCK_GIANT(mp); - if ((error = vget(vp, LK_SHARED, td)) != 0) { - VFS_UNLOCK_GIANT(vfslocked); + + if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) + locktype = LK_EXCLUSIVE; + else + locktype = LK_SHARED; + *vfslocked = VFS_LOCK_GIANT(mp); + if ((error = vget(vp, locktype, td)) != 0) { + VFS_UNLOCK_GIANT(*vfslocked); return (error); } foff = *foffp; @@ -1178,11 +1190,15 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize, if (obj->handle != vp) { vput(vp); vp = (struct vnode*)obj->handle; - vget(vp, LK_SHARED, td); + vget(vp, locktype, td); } + *rvp = vp; } else if (vp->v_type == VCHR) { error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, vp->v_rdev, foffp, objp); + vput(vp); + VFS_UNLOCK_GIANT(*vfslocked); + *rvp = NULL; if (error == 0) goto mark_atime; goto done; @@ -1226,8 +1242,11 @@ mark_atime: vfs_mark_atime(vp, cred); done: - vput(vp); - VFS_UNLOCK_GIANT(vfslocked); + if (error != 0 && *rvp != NULL) { + vput(vp); + VFS_UNLOCK_GIANT(*vfslocked); + *rvp = NULL; + } return (error); } @@ -1350,8 +1369,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, boolean_t fitit; vm_object_t object = NULL; int rv = KERN_SUCCESS; - int docow, error; + int docow, error, vfslocked; struct thread *td = curthread; + struct vnode *vp; if (size == 0) return (0); @@ -1385,6 +1405,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, return (EINVAL); fitit = FALSE; } + vfslocked = 0; + vp = NULL; + /* * Lookup/allocate object. */ @@ -1395,7 +1418,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, break; case OBJT_VNODE: error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, - handle, &foff, &object); + handle, &foff, &object, &vp, &vfslocked); break; case OBJT_SWAP: error = vm_mmap_shm(td, size, prot, &maxprot, &flags, @@ -1443,6 +1466,10 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, rv = vm_map_fixed(map, object, foff, *addr, size, prot, maxprot, docow); + if (vp != NULL) { + vput(vp); + VFS_UNLOCK_GIANT(vfslocked); + } if (rv != KERN_SUCCESS) { /* * Lose the object reference. Will destroy the