Index: vm/vm_kern.c =================================================================== --- vm/vm_kern.c (.../vmcontention/sys) (revision 254189) +++ vm/vm_kern.c (.../vmobj-readlock/sys) (revision 254189) @@ -397,14 +397,14 @@ kmem_unback(vm_object_t object, vm_offset_t addr, ("kmem_unback: only supports kernel objects.")); offset = addr - VM_MIN_KERNEL_ADDRESS; - VM_OBJECT_WLOCK(object); + VM_OBJECT_RLOCK(object); pmap_remove(kernel_pmap, addr, addr + size); for (i = 0; i < size; i += PAGE_SIZE) { m = vm_page_lookup(object, OFF_TO_IDX(offset + i)); vm_page_unwire(m, 0); vm_page_free(m); } - VM_OBJECT_WUNLOCK(object); + VM_OBJECT_RUNLOCK(object); } /* Index: vm/vm_fault.c =================================================================== --- vm/vm_fault.c (.../vmcontention/sys) (revision 254189) +++ vm/vm_fault.c (.../vmobj-readlock/sys) (revision 254189) @@ -392,7 +392,8 @@ RetryFault:; unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { - vm_page_sleep_if_busy(fs.m, "vmpfw"); + vm_page_sleep_if_busy(fs.m, "vmpfw", + VM_ALLOC_NOBUSY, FALSE); } vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); Index: vm/vm_glue.c =================================================================== --- vm/vm_glue.c (.../vmcontention/sys) (revision 254189) +++ vm/vm_glue.c (.../vmobj-readlock/sys) (revision 254189) @@ -231,10 +231,19 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset vm_pindex_t pindex; int rv; - VM_OBJECT_WLOCK(object); + VM_OBJECT_RLOCK(object); pindex = OFF_TO_IDX(offset); +retry: m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { + if (!VM_OBJECT_LOCK_TRYUPGRADE(object)) { + VM_OBJECT_RUNLOCK(object); + VM_OBJECT_WLOCK(object); + vm_page_lock(m); + vm_page_free(m); + vm_page_unlock(m); + goto retry; + } ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, pindex); @@ -253,7 +262,10 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset vm_page_hold(m); vm_page_unlock(m); out: - VM_OBJECT_WUNLOCK(object); + if (VM_OBJECT_WOWNED(object)) + VM_OBJECT_WUNLOCK(object); + else + VM_OBJECT_RUNLOCK(object); return (m); } @@ -500,7 +512,7 @@ vm_thread_swapout(struct thread *td) pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; pmap_qremove(td->td_kstack, pages); - VM_OBJECT_WLOCK(ksobj); + VM_OBJECT_RLOCK(ksobj); for (i = 0; i < pages; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) @@ -510,7 +522,7 @@ vm_thread_swapout(struct thread *td) vm_page_unwire(m, 0); vm_page_unlock(m); } - VM_OBJECT_WUNLOCK(ksobj); + VM_OBJECT_RUNLOCK(ksobj); } /* Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (.../vmcontention/sys) (revision 254189) +++ vm/vm_object.c (.../vmobj-readlock/sys) (revision 254189) @@ -872,7 +872,8 @@ rescan: np = TAILQ_NEXT(p, listq); if (p->valid == 0) continue; - if (vm_page_sleep_if_busy(p, "vpcwai")) { + if (vm_page_sleep_if_busy(p, "vpcwai", VM_ALLOC_NOBUSY, + FALSE)) { if (object->generation != curgeneration) { if ((flags & OBJPC_SYNC) != 0) goto rescan; @@ -1213,15 +1214,15 @@ vm_object_shadow( * Don't create the new object if the old object isn't shared. */ if (source != NULL) { - VM_OBJECT_WLOCK(source); + VM_OBJECT_RLOCK(source); if (source->ref_count == 1 && source->handle == NULL && (source->type == OBJT_DEFAULT || source->type == OBJT_SWAP)) { - VM_OBJECT_WUNLOCK(source); + VM_OBJECT_RUNLOCK(source); return; } - VM_OBJECT_WUNLOCK(source); + VM_OBJECT_RUNLOCK(source); } /* Index: vm/vm_object.h =================================================================== --- vm/vm_object.h (.../vmcontention/sys) (revision 254189) +++ vm/vm_object.h (.../vmobj-readlock/sys) (revision 254189) @@ -226,6 +226,8 @@ extern struct vm_object kmem_object_store; rw_assert(&(object)->lock, RA_WLOCKED) #define VM_OBJECT_LOCK_DOWNGRADE(object) \ rw_downgrade(&(object)->lock) +#define VM_OBJECT_LOCK_TRYUPGRADE(object) \ + rw_try_upgrade(&(object)->lock) #define VM_OBJECT_RLOCK(object) \ rw_rlock(&(object)->lock) #define VM_OBJECT_RUNLOCK(object) \ @@ -238,6 +240,8 @@ extern struct vm_object kmem_object_store; rw_try_wlock(&(object)->lock) #define VM_OBJECT_WLOCK(object) \ rw_wlock(&(object)->lock) +#define VM_OBJECT_WOWNED(object) \ + rw_wowned(&(object)->lock) #define VM_OBJECT_WUNLOCK(object) \ rw_wunlock(&(object)->lock) Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (.../vmcontention/sys) (revision 254189) +++ vm/vm_page.c (.../vmobj-readlock/sys) (revision 254189) @@ -875,26 +875,52 @@ vm_page_readahead_finish(vm_page_t m) * be locked. */ int -vm_page_sleep_if_busy(vm_page_t m, const char *msg) +vm_page_sleep_if_busy(vm_page_t m, const char *msg, int busyflags, + boolean_t pref) { vm_object_t obj; + int cond, iswowned; vm_page_lock_assert(m, MA_NOTOWNED); - VM_OBJECT_ASSERT_WLOCKED(m->object); - if (vm_page_busied(m)) { + /* + * The page-specific object must be cached because page + * identity can change during the sleep, causing the + * re-lock of a different object. + * It is assumed that a reference to the object is already + * held by the callers. + */ + obj = m->object; + VM_OBJECT_ASSERT_LOCKED(obj); + iswowned = VM_OBJECT_WOWNED(obj); + KASSERT((busyflags & VM_ALLOC_NOBUSY) == 0 || iswowned, + ("vm_page_sleep_if_busy: VM_ALLOC_NOBUSY with read object lock")); + + if ((busyflags & VM_ALLOC_NOBUSY) != 0) { + cond = vm_page_busy_locked(m); + } else if ((busyflags & VM_ALLOC_RBUSY) != 0) + cond = !vm_page_busy_tryrlock(m); + else + cond = !vm_page_busy_trywlock(m); + if (cond) { + /* - * The page-specific object must be cached because page - * identity can change during the sleep, causing the - * re-lock of a different object. - * It is assumed that a reference to the object is already - * held by the callers. + * Some consumers may want to reference the page before + * unlocking and sleeping so that the page daemon is less + * likely to reclaim it. */ - obj = m->object; + if (pref) + vm_page_aflag_set(m, PGA_REFERENCED); vm_page_lock(m); - VM_OBJECT_WUNLOCK(obj); + if (iswowned) + VM_OBJECT_WUNLOCK(obj); + else + VM_OBJECT_RUNLOCK(obj); vm_page_busy_sleep(m, msg); - VM_OBJECT_WLOCK(obj); + if (iswowned) + VM_OBJECT_WLOCK(obj); + else + VM_OBJECT_RLOCK(obj); return (TRUE); } return (FALSE); @@ -2691,52 +2717,52 @@ vm_page_t vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; - int sleep; + int origwlock; - VM_OBJECT_ASSERT_WLOCKED(object); + VM_OBJECT_ASSERT_LOCKED(object); + origwlock = VM_OBJECT_WOWNED(object); KASSERT((allocflags & VM_ALLOC_RETRY) != 0, ("vm_page_grab: VM_ALLOC_RETRY is required")); - KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || - (allocflags & VM_ALLOC_IGN_SBUSY) != 0, - ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); + KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 || origwlock != 0, + ("vm_page_grab: VM_ALLOC_NOBUSY with object read lock")); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { - sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? - vm_page_xbusied(m) : vm_page_busied(m); - if (sleep) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_aflag_set(m, PGA_REFERENCED); - vm_page_lock(m); - VM_OBJECT_WUNLOCK(object); - vm_page_busy_sleep(m, "pgrbwt"); - VM_OBJECT_WLOCK(object); + if (vm_page_sleep_if_busy(m, "pgrbwt", allocflags & + (VM_ALLOC_NOBUSY | VM_ALLOC_RBUSY), TRUE)) goto retrylookup; - } else { + else { if ((allocflags & VM_ALLOC_WIRED) != 0) { vm_page_lock(m); vm_page_wire(m); vm_page_unlock(m); } - if ((allocflags & - (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) - vm_page_xbusy(m); - if ((allocflags & VM_ALLOC_SBUSY) != 0) - vm_page_sbusy(m); + + /* + * If the lock state changed in the meanwhile, + * unwind back. + */ + if (VM_OBJECT_WOWNED(object) != origwlock) + VM_OBJECT_LOCK_DOWNGRADE(object); return (m); } } - m = vm_page_alloc(object, pindex, allocflags & ~(VM_ALLOC_RETRY | - VM_ALLOC_IGN_SBUSY)); + if (!VM_OBJECT_WOWNED(object) && !VM_OBJECT_LOCK_TRYUPGRADE(object)) { + VM_OBJECT_RUNLOCK(object); + VM_OBJECT_WLOCK(object); + goto retrylookup; + } + m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); if (m == NULL) { VM_OBJECT_WUNLOCK(object); VM_WAIT; VM_OBJECT_WLOCK(object); goto retrylookup; - } else if (m->valid != 0) + } + + /* If the lock state changed in the meanwhile, unwind back. */ + if (VM_OBJECT_WOWNED(object) != origwlock) + VM_OBJECT_LOCK_DOWNGRADE(object); + if (m->valid != 0) return (m); if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (.../vmcontention/sys) (revision 254189) +++ vm/vm_page.h (.../vmobj-readlock/sys) (revision 254189) @@ -395,7 +395,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); #define VM_ALLOC_NOBUSY 0x0200 /* Do not busy the page */ #define VM_ALLOC_IFCACHED 0x0400 /* Fail if the page is not cached */ #define VM_ALLOC_IFNOTCACHED 0x0800 /* Fail if the page is cached */ -#define VM_ALLOC_IGN_SBUSY 0x1000 /* vm_page_grab() only */ +#define VM_ALLOC_UNUSED13 0x1000 /* -- available -- */ #define VM_ALLOC_NODUMP 0x2000 /* don't include in dump */ #define VM_ALLOC_SBUSY 0x4000 /* Shared busy the page */ @@ -466,7 +466,8 @@ void vm_page_requeue(vm_page_t m); void vm_page_requeue_locked(vm_page_t m); int vm_page_sbusied(vm_page_t m); void vm_page_set_valid_range(vm_page_t m, int base, int size); -int vm_page_sleep_if_busy(vm_page_t m, const char *msg); +int vm_page_sleep_if_busy(vm_page_t m, const char *msg, int busyflags, + boolean_t pref); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); int vm_page_trysbusy(vm_page_t m); Index: cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (.../vmcontention/sys) (revision 254189) +++ cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (.../vmobj-readlock/sys) (revision 254189) @@ -330,25 +330,14 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, vm_page_t pp; obj = vp->v_object; - zfs_vmobject_assert_wlocked(obj); + zfs_vmobject_assert_locked(obj); for (;;) { if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && pp->valid) { - if (vm_page_xbusied(pp)) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_reference(pp); - vm_page_lock(pp); - zfs_vmobject_wunlock(obj); - vm_page_busy_sleep(pp, "zfsmwb"); - zfs_vmobject_wlock(obj); + if (vm_page_sleep_if_busy(pp, "zfsmwb", + VM_ALLOC_RBUSY, TRUE)) continue; - } - vm_page_sbusy(pp); } else if (pp == NULL) { pp = vm_page_alloc(obj, OFF_TO_IDX(start), VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | @@ -533,21 +522,21 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) ASSERT(obj != NULL); ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); - zfs_vmobject_wlock(obj); + zfs_vmobject_rlock(obj); for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { int bytes = MIN(PAGESIZE, len); - pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | - VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY); + pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_RBUSY | + VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (pp->valid == 0) { - zfs_vmobject_wunlock(obj); + zfs_vmobject_runlock(obj); va = zfs_map_page(pp, &sf); error = dmu_read(os, zp->z_id, start, bytes, va, DMU_READ_PREFETCH); if (bytes != PAGESIZE && error == 0) bzero(va + bytes, PAGESIZE - bytes); zfs_unmap_page(sf); - zfs_vmobject_wlock(obj); + zfs_vmobject_rlock(obj); vm_page_sunbusy(pp); vm_page_lock(pp); if (error) { @@ -567,7 +556,7 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) uio->uio_offset += bytes; len -= bytes; } - zfs_vmobject_wunlock(obj); + zfs_vmobject_runlock(obj); return (error); } @@ -599,7 +588,7 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) start = uio->uio_loffset; off = start & PAGEOFFSET; - zfs_vmobject_wlock(obj); + zfs_vmobject_rlock(obj); for (start &= PAGEMASK; len > 0; start += PAGESIZE) { vm_page_t pp; uint64_t bytes = MIN(PAGESIZE - off, len); @@ -608,23 +597,23 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) struct sf_buf *sf; caddr_t va; - zfs_vmobject_wunlock(obj); + zfs_vmobject_runlock(obj); va = zfs_map_page(pp, &sf); error = uiomove(va + off, bytes, UIO_READ, uio); zfs_unmap_page(sf); - zfs_vmobject_wlock(obj); + zfs_vmobject_rlock(obj); page_unhold(pp); } else { - zfs_vmobject_wunlock(obj); + zfs_vmobject_runlock(obj); error = dmu_read_uio(os, zp->z_id, uio, bytes); - zfs_vmobject_wlock(obj); + zfs_vmobject_rlock(obj); } len -= bytes; off = 0; if (error) break; } - zfs_vmobject_wunlock(obj); + zfs_vmobject_runlock(obj); return (error); } Index: cddl/compat/opensolaris/sys/vm.h =================================================================== --- cddl/compat/opensolaris/sys/vm.h (.../vmcontention/sys) (revision 254189) +++ cddl/compat/opensolaris/sys/vm.h (.../vmobj-readlock/sys) (revision 254189) @@ -35,7 +35,10 @@ extern const int zfs_vm_pagerret_bad; extern const int zfs_vm_pagerret_error; extern const int zfs_vm_pagerret_ok; +void zfs_vmobject_assert_locked(vm_object_t object); void zfs_vmobject_assert_wlocked(vm_object_t object); +void zfs_vmobject_rlock(vm_object_t object); +void zfs_vmobject_runlock(vm_object_t object); void zfs_vmobject_wlock(vm_object_t object); void zfs_vmobject_wunlock(vm_object_t object); Index: cddl/compat/opensolaris/kern/opensolaris_vm.c =================================================================== --- cddl/compat/opensolaris/kern/opensolaris_vm.c (.../vmcontention/sys) (revision 254189) +++ cddl/compat/opensolaris/kern/opensolaris_vm.c (.../vmobj-readlock/sys) (revision 254189) @@ -41,19 +41,39 @@ const int zfs_vm_pagerret_bad = VM_PAGER_BAD; const int zfs_vm_pagerret_error = VM_PAGER_ERROR; const int zfs_vm_pagerret_ok = VM_PAGER_OK; +/* + * For assertions skipping FILE/LINE will not be too helpful, but + * they must be hard functions for compatibility reasons. + */ void +zfs_vmobject_assert_locked(vm_object_t object) +{ + + VM_OBJECT_ASSERT_LOCKED(object); +} + +void zfs_vmobject_assert_wlocked(vm_object_t object) { - /* - * This is not ideal because FILE/LINE used by assertions will not - * be too helpful, but it must be an hard function for - * compatibility reasons. - */ VM_OBJECT_ASSERT_WLOCKED(object); } void +zfs_vmobject_rlock(vm_object_t object) +{ + + VM_OBJECT_RLOCK(object); +} + +void +zfs_vmobject_runlock(vm_object_t object) +{ + + VM_OBJECT_RUNLOCK(object); +} + +void zfs_vmobject_wlock(vm_object_t object) { Index: kern/vfs_bio.c =================================================================== --- kern/vfs_bio.c (.../vmcontention/sys) (revision 254189) +++ kern/vfs_bio.c (.../vmobj-readlock/sys) (revision 254189) @@ -3305,7 +3305,7 @@ int allocbuf(struct buf *bp, int size) { int newbsize, mbsize; - int i; + int i, onpages; BUF_ASSERT_HELD(bp); @@ -3433,7 +3433,7 @@ allocbuf(struct buf *bp, int size) (bp->b_npages - desiredpages)); } else BUF_CHECK_UNMAPPED(bp); - VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); + VM_OBJECT_RLOCK(bp->b_bufobj->bo_object); for (i = desiredpages; i < bp->b_npages; i++) { /* * the page is not freed here -- it @@ -3443,16 +3443,17 @@ allocbuf(struct buf *bp, int size) m = bp->b_pages[i]; KASSERT(m != bogus_page, ("allocbuf: bogus page found")); - while (vm_page_sleep_if_busy(m, - "biodep")) + while (vm_page_sleep_if_busy(m, "biodep", 0, + FALSE)) continue; bp->b_pages[i] = NULL; vm_page_lock(m); vm_page_unwire(m, 0); vm_page_unlock(m); + vm_page_busy_wunlock(m); } - VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); + VM_OBJECT_RUNLOCK(bp->b_bufobj->bo_object); bp->b_npages = desiredpages; } } else if (size > bp->b_bcount) { @@ -3473,7 +3474,8 @@ allocbuf(struct buf *bp, int size) obj = bp->b_bufobj->bo_object; - VM_OBJECT_WLOCK(obj); + VM_OBJECT_RLOCK(obj); + onpages = bp->b_npages; while (bp->b_npages < desiredpages) { vm_page_t m; @@ -3488,9 +3490,9 @@ allocbuf(struct buf *bp, int size) * pages are vfs_busy_pages(). */ m = vm_page_grab(obj, OFF_TO_IDX(bp->b_offset) + - bp->b_npages, VM_ALLOC_NOBUSY | + bp->b_npages, VM_ALLOC_RBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | - VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY | + VM_ALLOC_RETRY | VM_ALLOC_COUNT(desiredpages - bp->b_npages)); if (m->valid == 0) bp->b_flags &= ~B_CACHE; @@ -3535,8 +3537,15 @@ allocbuf(struct buf *bp, int size) toff += tinc; tinc = PAGE_SIZE; } - VM_OBJECT_WUNLOCK(obj); + while ((bp->b_npages - onpages) != 0) { + vm_page_t m; + m = bp->b_pages[onpages]; + vm_page_busy_runlock(m); + ++onpages; + } + VM_OBJECT_RUNLOCK(obj); + /* * Step 3, fixup the KVM pmap. */ Index: kern/subr_uio.c =================================================================== --- kern/subr_uio.c (.../vmcontention/sys) (revision 254189) +++ kern/subr_uio.c (.../vmobj-readlock/sys) (revision 254189) @@ -108,7 +108,8 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_o VM_OBJECT_WLOCK(uobject); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { - if (vm_page_sleep_if_busy(user_pg, "vm_pgmoveco")) + if (vm_page_sleep_if_busy(user_pg, "vm_pgmoveco", + VM_ALLOC_NOBUSY, FALSE)) goto retry; vm_page_lock(user_pg); pmap_remove_all(user_pg); Index: kern/vfs_cluster.c =================================================================== --- kern/vfs_cluster.c (.../vmcontention/sys) (revision 254189) +++ kern/vfs_cluster.c (.../vmobj-readlock/sys) (revision 254189) @@ -414,20 +414,20 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize */ off = tbp->b_offset; tsize = size; - VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object); + VM_OBJECT_RLOCK(tbp->b_bufobj->bo_object); for (j = 0; tsize > 0; j++) { toff = off & PAGE_MASK; tinc = tsize; if (toff + tinc > PAGE_SIZE) tinc = PAGE_SIZE - toff; - VM_OBJECT_ASSERT_WLOCKED(tbp->b_pages[j]->object); + VM_OBJECT_ASSERT_RLOCKED(tbp->b_pages[j]->object); if ((tbp->b_pages[j]->valid & vm_page_bits(toff, tinc)) != 0) break; off += tinc; tsize -= tinc; } - VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); + VM_OBJECT_RUNLOCK(tbp->b_bufobj->bo_object); if (tsize > 0) { bqrelse(tbp); break; @@ -494,13 +494,13 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize * Fully valid pages in the cluster are already good and do not need * to be re-read from disk. Replace the page with bogus_page */ - VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); + VM_OBJECT_RLOCK(bp->b_bufobj->bo_object); for (j = 0; j < bp->b_npages; j++) { - VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[j]->object); + VM_OBJECT_ASSERT_RLOCKED(bp->b_pages[j]->object); if (bp->b_pages[j]->valid == VM_PAGE_BITS_ALL) bp->b_pages[j] = bogus_page; } - VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); + VM_OBJECT_RUNLOCK(bp->b_bufobj->bo_object); if (bp->b_bufsize > bp->b_kvasize) panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n", bp->b_bufsize, bp->b_kvasize); Index: kern/uipc_shm.c =================================================================== --- kern/uipc_shm.c (.../vmcontention/sys) (revision 254189) +++ kern/uipc_shm.c (.../vmobj-readlock/sys) (revision 254189) @@ -281,7 +281,8 @@ shm_dotruncate(struct shmfd *shmfd, off_t length) retry: m = vm_page_lookup(object, idx); if (m != NULL) { - if (vm_page_sleep_if_busy(m, "shmtrc")) + if (vm_page_sleep_if_busy(m, "shmtrc", + VM_ALLOC_NOBUSY, FALSE)) goto retry; } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); Index: fs/tmpfs/tmpfs_vnops.c =================================================================== --- fs/tmpfs/tmpfs_vnops.c (.../vmcontention/sys) (revision 254189) +++ fs/tmpfs/tmpfs_vnops.c (.../vmobj-readlock/sys) (revision 254189) @@ -445,7 +445,7 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t id vm_page_t m; int error, rv; - VM_OBJECT_WLOCK(tobj); + VM_OBJECT_RLOCK(tobj); /* * Parallel reads of the page content from disk are prevented @@ -457,8 +457,17 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t id * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ +retry: m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { + if (!VM_OBJECT_LOCK_TRYUPGRADE(tobj)) { + VM_OBJECT_RUNLOCK(tobj); + VM_OBJECT_WLOCK(tobj); + vm_page_lock(m); + vm_page_free(m); + vm_page_unlock(m); + goto retry; + } if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &m, 1, 0); m = vm_page_lookup(tobj, idx); @@ -486,7 +495,10 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t id vm_page_lock(m); vm_page_hold(m); vm_page_unlock(m); - VM_OBJECT_WUNLOCK(tobj); + if (VM_OBJECT_WOWNED(tobj)) + VM_OBJECT_WUNLOCK(tobj); + else + VM_OBJECT_RUNLOCK(tobj); error = uiomove_fromphys(&m, offset, tlen, uio); vm_page_lock(m); vm_page_unhold(m); @@ -571,9 +583,18 @@ tmpfs_mappedwrite(vm_object_t tobj, size_t len, st offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); - VM_OBJECT_WLOCK(tobj); + VM_OBJECT_RLOCK(tobj); +retry: tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (tpg->valid != VM_PAGE_BITS_ALL) { + if (!VM_OBJECT_LOCK_TRYUPGRADE(tobj)) { + VM_OBJECT_RUNLOCK(tobj); + VM_OBJECT_WLOCK(tobj); + vm_page_lock(tpg); + vm_page_free(tpg); + vm_page_unlock(tpg); + goto retry; + } if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &tpg, 1, 0); tpg = vm_page_lookup(tobj, idx); @@ -601,7 +622,10 @@ tmpfs_mappedwrite(vm_object_t tobj, size_t len, st vm_page_lock(tpg); vm_page_hold(tpg); vm_page_unlock(tpg); - VM_OBJECT_WUNLOCK(tobj); + if (VM_OBJECT_WOWNED(tobj)) + VM_OBJECT_WUNLOCK(tobj); + else + VM_OBJECT_RUNLOCK(tobj); error = uiomove_fromphys(&tpg, offset, tlen, uio); VM_OBJECT_WLOCK(tobj); if (error == 0) Index: fs/tmpfs/tmpfs_subr.c =================================================================== --- fs/tmpfs/tmpfs_subr.c (.../vmcontention/sys) (revision 254189) +++ fs/tmpfs/tmpfs_subr.c (.../vmobj-readlock/sys) (revision 254189) @@ -1355,7 +1355,8 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, retry: m = vm_page_lookup(uobj, idx); if (m != NULL) { - if (vm_page_sleep_if_busy(m, "tmfssz")) + if (vm_page_sleep_if_busy(m, "tmfssz", + VM_ALLOC_NOBUSY, FALSE)) goto retry; MPASS(m->valid == VM_PAGE_BITS_ALL); } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { Index: dev/drm2/i915/i915_gem.c =================================================================== --- dev/drm2/i915/i915_gem.c (.../vmcontention/sys) (revision 254189) +++ dev/drm2/i915/i915_gem.c (.../vmobj-readlock/sys) (revision 254189) @@ -2342,7 +2342,8 @@ retry: m = vm_page_lookup(devobj, i); if (m == NULL) continue; - if (vm_page_sleep_if_busy(m, "915unm")) + if (vm_page_sleep_if_busy(m, "915unm", VM_ALLOC_NOBUSY, + FALSE)) goto retry; cdev_pager_free_page(devobj, m); } Index: dev/agp/agp.c =================================================================== --- dev/agp/agp.c (.../vmcontention/sys) (revision 254189) +++ dev/agp/agp.c (.../vmobj-readlock/sys) (revision 254189) @@ -545,7 +545,7 @@ agp_generic_bind_memory(device_t dev, struct agp_m * because vm_page_grab() may sleep and we can't hold a mutex * while sleeping. */ - VM_OBJECT_WLOCK(mem->am_obj); + VM_OBJECT_RLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { /* * Find a page from the object and wire it @@ -558,14 +558,14 @@ agp_generic_bind_memory(device_t dev, struct agp_m VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY); AGP_DPF("found page pa=%#jx\n", (uintmax_t)VM_PAGE_TO_PHYS(m)); } - VM_OBJECT_WUNLOCK(mem->am_obj); + VM_OBJECT_RUNLOCK(mem->am_obj); mtx_lock(&sc->as_lock); if (mem->am_is_bound) { device_printf(dev, "memory already bound\n"); error = EINVAL; - VM_OBJECT_WLOCK(mem->am_obj); + VM_OBJECT_RLOCK(mem->am_obj); i = 0; goto bad; } @@ -574,7 +574,7 @@ agp_generic_bind_memory(device_t dev, struct agp_m * Bind the individual pages and flush the chipset's * TLB. */ - VM_OBJECT_WLOCK(mem->am_obj); + VM_OBJECT_RLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(i)); @@ -602,7 +602,7 @@ agp_generic_bind_memory(device_t dev, struct agp_m } vm_page_xunbusy(m); } - VM_OBJECT_WUNLOCK(mem->am_obj); + VM_OBJECT_RUNLOCK(mem->am_obj); /* * Flush the cpu cache since we are providing a new mapping @@ -623,7 +623,7 @@ agp_generic_bind_memory(device_t dev, struct agp_m return 0; bad: mtx_unlock(&sc->as_lock); - VM_OBJECT_ASSERT_WLOCKED(mem->am_obj); + VM_OBJECT_ASSERT_LOCKED(mem->am_obj); for (k = 0; k < mem->am_size; k += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k)); if (k >= i) @@ -632,7 +632,7 @@ bad: vm_page_unwire(m, 0); vm_page_unlock(m); } - VM_OBJECT_WUNLOCK(mem->am_obj); + VM_OBJECT_RUNLOCK(mem->am_obj); return error; } @@ -659,14 +659,14 @@ agp_generic_unbind_memory(device_t dev, struct agp */ for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) AGP_UNBIND_PAGE(dev, mem->am_offset + i); - VM_OBJECT_WLOCK(mem->am_obj); + VM_OBJECT_RLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, atop(i)); vm_page_lock(m); vm_page_unwire(m, 0); vm_page_unlock(m); } - VM_OBJECT_WUNLOCK(mem->am_obj); + VM_OBJECT_RUNLOCK(mem->am_obj); agp_flush_cache(); AGP_FLUSH_TLB(dev); Index: dev/agp/agp_i810.c =================================================================== --- dev/agp/agp_i810.c (.../vmcontention/sys) (revision 254189) +++ dev/agp/agp_i810.c (.../vmobj-readlock/sys) (revision 254189) @@ -2006,12 +2006,12 @@ agp_i810_free_memory(device_t dev, struct agp_memo /* * Unwire the page which we wired in alloc_memory. */ - VM_OBJECT_WLOCK(mem->am_obj); + VM_OBJECT_RLOCK(mem->am_obj); m = vm_page_lookup(mem->am_obj, 0); vm_page_lock(m); vm_page_unwire(m, 0); vm_page_unlock(m); - VM_OBJECT_WUNLOCK(mem->am_obj); + VM_OBJECT_RUNLOCK(mem->am_obj); } else { contigfree(sc->argb_cursor, mem->am_size, M_AGP); sc->argb_cursor = NULL;