Index: i386/i386/pmap.c =================================================================== --- i386/i386/pmap.c (.../vmcontention/sys) (revision 253964) +++ i386/i386/pmap.c (.../vmobj-readlock/sys) (revision 253964) @@ -3460,7 +3460,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); mpte = NULL; @@ -4554,13 +4554,12 @@ pmap_is_modified(vm_page_t m) ("pmap_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || @@ -4689,13 +4688,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); @@ -4846,13 +4844,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: i386/xen/pmap.c =================================================================== --- i386/xen/pmap.c (.../vmcontention/sys) (revision 253964) +++ i386/xen/pmap.c (.../vmobj-readlock/sys) (revision 253964) @@ -2667,7 +2667,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); mpte = NULL; @@ -3696,13 +3696,12 @@ pmap_is_modified(vm_page_t m) rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&pvh_global_lock); sched_pin(); @@ -3827,13 +3826,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); @@ -3933,13 +3931,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (.../vmcontention/sys) (revision 253964) +++ cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (.../vmobj-readlock/sys) (revision 253964) @@ -335,20 +335,24 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, for (;;) { if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && pp->valid) { - if ((pp->oflags & VPO_BUSY) != 0) { + if (vm_page_xbusied(pp)) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_reference(pp); - vm_page_sleep(pp, "zfsmwb"); + vm_page_lock(pp); + zfs_vmobject_wunlock(obj); + vm_page_busy_sleep(pp, "zfsmwb"); + zfs_vmobject_wlock(obj); continue; } + vm_page_sbusy(pp); } else if (pp == NULL) { pp = vm_page_alloc(obj, OFF_TO_IDX(start), VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | - VM_ALLOC_NOBUSY); + VM_ALLOC_SBUSY); } else { ASSERT(pp != NULL && !pp->valid); pp = NULL; @@ -357,7 +361,6 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, if (pp != NULL) { ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); vm_object_pip_add(obj, 1); - vm_page_io_start(pp); pmap_remove_write(pp); vm_page_clear_dirty(pp, off, nbytes); } @@ -370,7 +373,7 @@ static void page_unbusy(vm_page_t pp) { - vm_page_io_finish(pp); + vm_page_sunbusy(pp); vm_object_pip_subtract(pp->object, 1); } @@ -386,14 +389,17 @@ page_hold(vnode_t *vp, int64_t start) for (;;) { if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && pp->valid) { - if ((pp->oflags & VPO_BUSY) != 0) { + if (vm_page_xbusied(pp)) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_reference(pp); - vm_page_sleep(pp, "zfsmwb"); + vm_page_lock(pp); + zfs_vmobject_wunlock(obj); + vm_page_busy_sleep(pp, "zfsmwb"); + zfs_vmobject_wlock(obj); continue; } @@ -467,7 +473,7 @@ update_pages(vnode_t *vp, int64_t start, int len, ("zfs update_pages: unaligned data in putpages case")); KASSERT(pp->valid == VM_PAGE_BITS_ALL, ("zfs update_pages: invalid page in putpages case")); - KASSERT(pp->busy > 0, + KASSERT(vm_page_sbusied(pp), ("zfs update_pages: unbusy page in putpages case")); KASSERT(!pmap_page_is_write_mapped(pp), ("zfs update_pages: writable page in putpages case")); @@ -503,7 +509,7 @@ update_pages(vnode_t *vp, int64_t start, int len, * ZFS to populate a range of page cache pages with data. * * NOTE: this function could be optimized to pre-allocate - * all pages in advance, drain VPO_BUSY on all of them, + * all pages in advance, drain exclusive busy on all of them, * map them into contiguous KVA region and populate them * in one single dmu_read() call. */ @@ -531,10 +537,9 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { int bytes = MIN(PAGESIZE, len); - pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_NOBUSY | + pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY); if (pp->valid == 0) { - vm_page_io_start(pp); zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); error = dmu_read(os, zp->z_id, start, bytes, va, @@ -543,18 +548,19 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) bzero(va + bytes, PAGESIZE - bytes); zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - vm_page_io_finish(pp); + vm_page_sunbusy(pp); vm_page_lock(pp); if (error) { if (pp->wire_count == 0 && pp->valid == 0 && - pp->busy == 0 && !(pp->oflags & VPO_BUSY)) + !vm_page_busied(pp)) vm_page_free(pp); } else { pp->valid = VM_PAGE_BITS_ALL; vm_page_activate(pp); } vm_page_unlock(pp); - } + } else + vm_page_sunbusy(pp); if (error) break; uio->uio_resid -= bytes; Index: dev/drm2/i915/i915_gem.c =================================================================== --- dev/drm2/i915/i915_gem.c (.../vmcontention/sys) (revision 253964) +++ dev/drm2/i915/i915_gem.c (.../vmobj-readlock/sys) (revision 253964) @@ -1356,9 +1356,8 @@ i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffse *mres = NULL; } else oldm = NULL; + VM_OBJECT_WUNLOCK(vm_obj); retry: - VM_OBJECT_WUNLOCK(vm_obj); -unlocked_vmobj: cause = ret = 0; m = NULL; @@ -1379,9 +1378,11 @@ retry: VM_OBJECT_WLOCK(vm_obj); m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); if (m != NULL) { - if ((m->flags & VPO_BUSY) != 0) { + if (vm_page_busied(m)) { DRM_UNLOCK(dev); - vm_page_sleep(m, "915pee"); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(vm_obj); + vm_page_busy_sleep(m, "915pee"); goto retry; } goto have_page; @@ -1435,16 +1436,18 @@ retry: ("not fictitious %p", m)); KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); - if ((m->flags & VPO_BUSY) != 0) { + if (vm_page_busied(m)) { DRM_UNLOCK(dev); - vm_page_sleep(m, "915pbs"); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(vm_obj); + vm_page_busy_sleep(m, "915pbs"); goto retry; } m->valid = VM_PAGE_BITS_ALL; vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); have_page: *mres = m; - vm_page_busy(m); + vm_page_xbusy(m); CTR4(KTR_DRM, "fault %p %jx %x phys %x", gem_obj, offset, prot, m->phys_addr); @@ -1465,7 +1468,7 @@ out: -ret, cause); if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) { kern_yield(PRI_USER); - goto unlocked_vmobj; + goto retry; } VM_OBJECT_WLOCK(vm_obj); vm_object_pip_wakeup(vm_obj); @@ -2330,7 +2333,7 @@ retry: m = vm_page_lookup(devobj, i); if (m == NULL) continue; - if (vm_page_sleep_if_busy(m, true, "915unm")) + if (vm_page_sleep_if_busy(m, "915unm")) goto retry; cdev_pager_free_page(devobj, m); } @@ -2504,10 +2507,8 @@ i915_gem_wire_page(vm_object_t object, vm_pindex_t int rv; VM_OBJECT_ASSERT_WLOCKED(object); - m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | - VM_ALLOC_RETRY); + m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(m); if (vm_pager_has_page(object, pindex, NULL, NULL)) { rv = vm_pager_get_pages(object, &m, 1, 0); m = vm_page_lookup(object, pindex); @@ -2524,11 +2525,11 @@ i915_gem_wire_page(vm_object_t object, vm_pindex_t m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } - vm_page_wakeup(m); } vm_page_lock(m); vm_page_wire(m); vm_page_unlock(m); + vm_page_xunbusy(m); atomic_add_long(&i915_gem_wired_pages_cnt, 1); return (m); } Index: dev/drm2/ttm/ttm_tt.c =================================================================== --- dev/drm2/ttm/ttm_tt.c (.../vmcontention/sys) (revision 253964) +++ dev/drm2/ttm/ttm_tt.c (.../vmobj-readlock/sys) (revision 253964) @@ -288,10 +288,8 @@ int ttm_tt_swapin(struct ttm_tt *ttm) VM_OBJECT_WLOCK(obj); vm_object_pip_add(obj, 1); for (i = 0; i < ttm->num_pages; ++i) { - from_page = vm_page_grab(obj, i, VM_ALLOC_NOBUSY | - VM_ALLOC_RETRY); + from_page = vm_page_grab(obj, i, VM_ALLOC_RETRY); if (from_page->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(from_page); if (vm_pager_has_page(obj, i, NULL, NULL)) { rv = vm_pager_get_pages(obj, &from_page, 1, 0); if (rv != VM_PAGER_OK) { @@ -303,8 +301,8 @@ int ttm_tt_swapin(struct ttm_tt *ttm) } } else vm_page_zero_invalid(from_page, TRUE); - vm_page_wakeup(from_page); } + vm_page_xunbusy(from_page); to_page = ttm->pages[i]; if (unlikely(to_page == NULL)) { ret = -ENOMEM; @@ -357,7 +355,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, vm_object_t pmap_copy_page(from_page, to_page); vm_page_dirty(to_page); to_page->valid = VM_PAGE_BITS_ALL; - vm_page_wakeup(to_page); + vm_page_xunbusy(to_page); } vm_object_pip_wakeup(obj); VM_OBJECT_WUNLOCK(obj); Index: dev/drm2/ttm/ttm_bo_vm.c =================================================================== --- dev/drm2/ttm/ttm_bo_vm.c (.../vmcontention/sys) (revision 253964) +++ dev/drm2/ttm/ttm_bo_vm.c (.../vmobj-readlock/sys) (revision 253964) @@ -212,8 +212,11 @@ reserve: } VM_OBJECT_WLOCK(vm_obj); - if ((m->flags & VPO_BUSY) != 0) { - vm_page_sleep(m, "ttmpbs"); + if (vm_page_busied(m)) { + vm_page_lock(m); + VM_OBJECT_WUNLOCK(vm_obj); + vm_page_busy_sleep(m, "ttmpbs"); + VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; @@ -228,7 +231,7 @@ reserve: ("inconsistent insert bo %p m %p m1 %p offset %jx", bo, m, m1, (uintmax_t)offset)); } - vm_page_busy(m); + vm_page_xbusy(m); if (oldm != NULL) { vm_page_lock(oldm); Index: dev/md/md.c =================================================================== --- dev/md/md.c (.../vmcontention/sys) (revision 253964) +++ dev/md/md.c (.../vmobj-readlock/sys) (revision 253964) @@ -834,7 +834,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) else rv = vm_pager_get_pages(sc->object, &m, 1, 0); if (rv == VM_PAGER_ERROR) { - vm_page_wakeup(m); + vm_page_xunbusy(m); break; } else if (rv == VM_PAGER_FAIL) { /* @@ -859,7 +859,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { - vm_page_wakeup(m); + vm_page_xunbusy(m); break; } if ((bp->bio_flags & BIO_UNMAPPED) != 0) { @@ -875,7 +875,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { - vm_page_wakeup(m); + vm_page_xunbusy(m); break; } if (len != PAGE_SIZE) { @@ -885,7 +885,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) } else vm_pager_page_unswapped(m); } - vm_page_wakeup(m); + vm_page_xunbusy(m); vm_page_lock(m); if (bp->bio_cmd == BIO_DELETE && len == PAGE_SIZE) vm_page_free(m); Index: dev/agp/agp.c =================================================================== --- dev/agp/agp.c (.../vmcontention/sys) (revision 253964) +++ dev/agp/agp.c (.../vmobj-readlock/sys) (revision 253964) @@ -600,7 +600,7 @@ agp_generic_bind_memory(device_t dev, struct agp_m goto bad; } } - vm_page_wakeup(m); + vm_page_xunbusy(m); } VM_OBJECT_WUNLOCK(mem->am_obj); @@ -627,7 +627,7 @@ bad: for (k = 0; k < mem->am_size; k += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k)); if (k >= i) - vm_page_wakeup(m); + vm_page_xunbusy(m); vm_page_lock(m); vm_page_unwire(m, 0); vm_page_unlock(m); Index: kern/subr_uio.c =================================================================== --- kern/subr_uio.c (.../vmcontention/sys) (revision 253964) +++ kern/subr_uio.c (.../vmobj-readlock/sys) (revision 253964) @@ -107,7 +107,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_o VM_OBJECT_WLOCK(uobject); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { - if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) + if (vm_page_sleep_if_busy(user_pg, "vm_pgmoveco")) goto retry; vm_page_lock(user_pg); pmap_remove_all(user_pg); Index: kern/uipc_shm.c =================================================================== --- kern/uipc_shm.c (.../vmcontention/sys) (revision 253964) +++ kern/uipc_shm.c (.../vmobj-readlock/sys) (revision 253964) @@ -281,11 +281,8 @@ shm_dotruncate(struct shmfd *shmfd, off_t length) retry: m = vm_page_lookup(object, idx); if (m != NULL) { - if ((m->oflags & VPO_BUSY) != 0 || - m->busy != 0) { - vm_page_sleep(m, "shmtrc"); + if (vm_page_sleep_if_busy(m, "shmtrc")) goto retry; - } } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); if (m == NULL) { @@ -305,7 +302,7 @@ retry: if (rv == VM_PAGER_OK) { vm_page_deactivate(m); vm_page_unlock(m); - vm_page_wakeup(m); + vm_page_xunbusy(m); } else { vm_page_free(m); vm_page_unlock(m); Index: kern/vfs_cluster.c =================================================================== --- kern/vfs_cluster.c (.../vmcontention/sys) (revision 253964) +++ kern/vfs_cluster.c (.../vmobj-readlock/sys) (revision 253964) @@ -466,7 +466,7 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize for (j = 0; j < tbp->b_npages; j += 1) { vm_page_t m; m = tbp->b_pages[j]; - vm_page_io_start(m); + vm_page_sbusy(m); vm_object_pip_add(m->object, 1); if ((bp->b_npages == 0) || (bp->b_pages[bp->b_npages-1] != m)) { @@ -947,7 +947,7 @@ cluster_wbuild(struct vnode *vp, long size, daddr_ if (i != 0) { /* if not first buffer */ for (j = 0; j < tbp->b_npages; j += 1) { m = tbp->b_pages[j]; - if (m->oflags & VPO_BUSY) { + if (vm_page_xbusied(m)) { VM_OBJECT_WUNLOCK( tbp->b_object); bqrelse(tbp); @@ -957,7 +957,7 @@ cluster_wbuild(struct vnode *vp, long size, daddr_ } for (j = 0; j < tbp->b_npages; j += 1) { m = tbp->b_pages[j]; - vm_page_io_start(m); + vm_page_sbusy(m); vm_object_pip_add(m->object, 1); if ((bp->b_npages == 0) || (bp->b_pages[bp->b_npages - 1] != m)) { Index: kern/kern_exec.c =================================================================== --- kern/kern_exec.c (.../vmcontention/sys) (revision 253964) +++ kern/kern_exec.c (.../vmobj-readlock/sys) (revision 253964) @@ -937,10 +937,8 @@ exec_map_first_page(imgp) object->pg_color = 0; } #endif - ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | - VM_ALLOC_RETRY); + ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (ma[0]->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(ma[0]); initial_pagein = VM_INITIAL_PAGEIN; if (initial_pagein > object->size) initial_pagein = object->size; @@ -948,9 +946,8 @@ exec_map_first_page(imgp) if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) break; - if ((ma[i]->oflags & VPO_BUSY) || ma[i]->busy) + if (vm_page_tryxbusy(ma[i])) break; - vm_page_busy(ma[i]); } else { ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); @@ -970,8 +967,8 @@ exec_map_first_page(imgp) VM_OBJECT_WUNLOCK(object); return (EIO); } - vm_page_wakeup(ma[0]); } + vm_page_xunbusy(ma[0]); vm_page_lock(ma[0]); vm_page_hold(ma[0]); vm_page_unlock(ma[0]); Index: kern/uipc_syscalls.c =================================================================== --- kern/uipc_syscalls.c (.../vmcontention/sys) (revision 253964) +++ kern/uipc_syscalls.c (.../vmobj-readlock/sys) (revision 253964) @@ -2272,7 +2272,7 @@ retry_space: * then free it. */ if (pg->wire_count == 0 && pg->valid == 0 && - pg->busy == 0 && !(pg->oflags & VPO_BUSY)) + !vm_page_busied(pg)) vm_page_free(pg); vm_page_unlock(pg); VM_OBJECT_WUNLOCK(obj); Index: kern/vfs_bio.c =================================================================== --- kern/vfs_bio.c (.../vmcontention/sys) (revision 253964) +++ kern/vfs_bio.c (.../vmobj-readlock/sys) (revision 253964) @@ -584,7 +584,7 @@ vfs_buf_test_cache(struct buf *bp, vm_page_t m) { - VM_OBJECT_ASSERT_WLOCKED(m->object); + VM_OBJECT_ASSERT_LOCKED(m->object); if (bp->b_flags & B_CACHE) { int base = (foff + off) & PAGE_MASK; if (vm_page_is_valid(m, base, size) == 0) @@ -1852,26 +1852,19 @@ vfs_vmio_release(struct buf *bp) */ vm_page_lock(m); vm_page_unwire(m, 0); + /* - * We don't mess with busy pages, it is - * the responsibility of the process that - * busied the pages to deal with them. + * Might as well free the page if we can and it has + * no valid data. We also free the page if the + * buffer was used for direct I/O */ - if ((m->oflags & VPO_BUSY) == 0 && m->busy == 0 && - m->wire_count == 0) { - /* - * Might as well free the page if we can and it has - * no valid data. We also free the page if the - * buffer was used for direct I/O - */ - if ((bp->b_flags & B_ASYNC) == 0 && !m->valid) { + if ((bp->b_flags & B_ASYNC) == 0 && !m->valid) { + if (m->wire_count == 0 && !vm_page_busied(m)) vm_page_free(m); - } else if (bp->b_flags & B_DIRECT) { - vm_page_try_to_free(m); - } else if (buf_vm_page_count_severe()) { - vm_page_try_to_cache(m); - } - } + } else if (bp->b_flags & B_DIRECT) + vm_page_try_to_free(m); + else if (buf_vm_page_count_severe()) + vm_page_try_to_cache(m); vm_page_unlock(m); } VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); @@ -3450,7 +3443,7 @@ allocbuf(struct buf *bp, int size) m = bp->b_pages[i]; KASSERT(m != bogus_page, ("allocbuf: bogus page found")); - while (vm_page_sleep_if_busy(m, TRUE, + while (vm_page_sleep_if_busy(m, "biodep")) continue; @@ -3489,10 +3482,10 @@ allocbuf(struct buf *bp, int size) * here could interfere with paging I/O, no * matter which process we are. * - * We can only test VPO_BUSY here. Blocking on - * m->busy might lead to a deadlock: - * vm_fault->getpages->cluster_read->allocbuf - * Thus, we specify VM_ALLOC_IGN_SBUSY. + * Only exclusive busy can be tested here. + * Blocking on shared busy might lead to + * deadlocks once allocbuf() is called after + * pages are vfs_busy_pages(). */ m = vm_page_grab(obj, OFF_TO_IDX(bp->b_offset) + bp->b_npages, VM_ALLOC_NOBUSY | @@ -3852,7 +3845,7 @@ bufdone_finish(struct buf *bp) vfs_page_set_valid(bp, foff, m); } - vm_page_io_finish(m); + vm_page_sunbusy(m); vm_object_pip_subtract(obj, 1); foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; iosize -= resid; @@ -3914,7 +3907,7 @@ vfs_unbusy_pages(struct buf *bp) BUF_CHECK_UNMAPPED(bp); } vm_object_pip_subtract(obj, 1); - vm_page_io_finish(m); + vm_page_sunbusy(m); } vm_object_pip_wakeupn(obj, 0); VM_OBJECT_WUNLOCK(obj); @@ -3987,8 +3980,8 @@ vfs_page_set_validclean(struct buf *bp, vm_ooffset } /* - * Ensure that all buffer pages are not busied by VPO_BUSY flag. If - * any page is busy, drain the flag. + * Ensure that all buffer pages are not exclusive busied. If any page is + * exclusive busy, drain it. */ static void vfs_drain_busy_pages(struct buf *bp) @@ -4000,22 +3993,26 @@ vfs_drain_busy_pages(struct buf *bp) last_busied = 0; for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; - if ((m->oflags & VPO_BUSY) != 0) { + if (vm_page_xbusied(m)) { for (; last_busied < i; last_busied++) - vm_page_busy(bp->b_pages[last_busied]); - while ((m->oflags & VPO_BUSY) != 0) - vm_page_sleep(m, "vbpage"); + vm_page_xbusy(bp->b_pages[last_busied]); + while (vm_page_xbusied(m)) { + vm_page_lock(m); + VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); + vm_page_busy_sleep(m, "vbpage"); + VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); + } } } for (i = 0; i < last_busied; i++) - vm_page_wakeup(bp->b_pages[i]); + vm_page_xunbusy(bp->b_pages[i]); } /* * This routine is called before a device strategy routine. * It is used to tell the VM system that paging I/O is in * progress, and treat the pages associated with the buffer - * almost as being VPO_BUSY. Also the object paging_in_progress + * almost as being exclusive busy. Also the object paging_in_progress * flag is handled to make sure that the object doesn't become * inconsistant. * @@ -4048,7 +4045,7 @@ vfs_busy_pages(struct buf *bp, int clear_modify) if ((bp->b_flags & B_CLUSTER) == 0) { vm_object_pip_add(obj, 1); - vm_page_io_start(m); + vm_page_sbusy(m); } /* * When readying a buffer for a read ( i.e @@ -4268,7 +4265,7 @@ vm_hold_free_pages(struct buf *bp, int newbsize) for (index = newnpages; index < bp->b_npages; index++) { p = bp->b_pages[index]; bp->b_pages[index] = NULL; - if (p->busy != 0) + if (vm_page_sbusied(p)) printf("vm_hold_free_pages: blkno: %jd, lblkno: %jd\n", (intmax_t)bp->b_blkno, (intmax_t)bp->b_lblkno); p->wire_count--; Index: ia64/ia64/pmap.c =================================================================== --- ia64/ia64/pmap.c (.../vmcontention/sys) (revision 253964) +++ ia64/ia64/pmap.c (.../vmobj-readlock/sys) (revision 253964) @@ -1677,7 +1677,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t va &= ~PAGE_MASK; KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); - KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0, + KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m), ("pmap_enter: page %p is not busy", m)); /* @@ -2234,13 +2234,12 @@ pmap_is_modified(vm_page_t m) rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can be dirty. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { @@ -2323,13 +2322,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can be modified. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -2396,13 +2395,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { Index: fs/tmpfs/tmpfs_vnops.c =================================================================== --- fs/tmpfs/tmpfs_vnops.c (.../vmcontention/sys) (revision 253964) +++ fs/tmpfs/tmpfs_vnops.c (.../vmobj-readlock/sys) (revision 253964) @@ -449,7 +449,7 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t id /* * Parallel reads of the page content from disk are prevented - * by VPO_BUSY. + * by exclusive busy. * * Although the tmpfs vnode lock is held here, it is * nonetheless safe to sleep waiting for a free page. The @@ -457,10 +457,8 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t id * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ - m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | - VM_ALLOC_NOBUSY); + m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(m); if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &m, 1, 0); m = vm_page_lookup(tobj, idx); @@ -483,8 +481,8 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t id } } else vm_page_zero_invalid(m, TRUE); - vm_page_wakeup(m); } + vm_page_xunbusy(m); vm_page_lock(m); vm_page_hold(m); vm_page_unlock(m); @@ -574,10 +572,8 @@ tmpfs_mappedwrite(vm_object_t tobj, size_t len, st tlen = MIN(PAGE_SIZE - offset, len); VM_OBJECT_WLOCK(tobj); - tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | - VM_ALLOC_RETRY); + tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (tpg->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(tpg); if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &tpg, 1, 0); tpg = vm_page_lookup(tobj, idx); @@ -600,8 +596,8 @@ tmpfs_mappedwrite(vm_object_t tobj, size_t len, st } } else vm_page_zero_invalid(tpg, TRUE); - vm_page_wakeup(tpg); } + vm_page_xunbusy(tpg); vm_page_lock(tpg); vm_page_hold(tpg); vm_page_unlock(tpg); Index: fs/tmpfs/tmpfs_subr.c =================================================================== --- fs/tmpfs/tmpfs_subr.c (.../vmcontention/sys) (revision 253964) +++ fs/tmpfs/tmpfs_subr.c (.../vmobj-readlock/sys) (revision 253964) @@ -1331,11 +1331,8 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, retry: m = vm_page_lookup(uobj, idx); if (m != NULL) { - if ((m->oflags & VPO_BUSY) != 0 || - m->busy != 0) { - vm_page_sleep(m, "tmfssz"); + if (vm_page_sleep_if_busy(m, "tmfssz")) goto retry; - } MPASS(m->valid == VM_PAGE_BITS_ALL); } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL); @@ -1355,7 +1352,7 @@ retry: if (rv == VM_PAGER_OK) { vm_page_deactivate(m); vm_page_unlock(m); - vm_page_wakeup(m); + vm_page_xunbusy(m); } else { vm_page_free(m); vm_page_unlock(m); Index: fs/fuse/fuse_vnops.c =================================================================== --- fs/fuse/fuse_vnops.c (.../vmcontention/sys) (revision 253964) +++ fs/fuse/fuse_vnops.c (.../vmobj-readlock/sys) (revision 253964) @@ -1854,36 +1854,8 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) */ ; } - if (i != ap->a_reqpage) { - /* - * Whether or not to leave the page activated is up in - * the air, but we should put the page on a page queue - * somewhere (it already is in the object). Result: - * It appears that emperical results show that - * deactivating pages is best. - */ - - /* - * Just in case someone was asking for this page we - * now tell them that it is ok to use. - */ - if (!error) { - if (m->oflags & VPO_WANTED) { - fuse_vm_page_lock(m); - vm_page_activate(m); - fuse_vm_page_unlock(m); - } else { - fuse_vm_page_lock(m); - vm_page_deactivate(m); - fuse_vm_page_unlock(m); - } - vm_page_wakeup(m); - } else { - fuse_vm_page_lock(m); - vm_page_free(m); - fuse_vm_page_unlock(m); - } - } + if (i != ap->a_reqpage) + vm_page_readahead_finish(m); } fuse_vm_page_unlock_queues(); VM_OBJECT_WUNLOCK(vp->v_object); Index: mips/mips/pmap.c =================================================================== --- mips/mips/pmap.c (.../vmcontention/sys) (revision 253964) +++ mips/mips/pmap.c (.../vmobj-readlock/sys) (revision 253964) @@ -2014,7 +2014,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); - KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0, + KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m), ("pmap_enter: page %p is not busy", m)); pa = VM_PAGE_TO_PHYS(m); newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, access, prot); @@ -2812,13 +2812,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { @@ -2878,13 +2877,12 @@ pmap_is_modified(vm_page_t m) ("pmap_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PTE_D set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_testbit(m, PTE_D); @@ -2931,13 +2929,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (.../vmcontention/sys) (revision 253964) +++ amd64/amd64/pmap.c (.../vmobj-readlock/sys) (revision 253964) @@ -3489,7 +3489,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); pa = VM_PAGE_TO_PHYS(m); newpte = (pt_entry_t)(pa | PG_A | PG_V); @@ -4597,13 +4597,12 @@ pmap_is_modified(vm_page_t m) ("pmap_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || @@ -4728,13 +4727,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); if ((m->flags & PG_FICTITIOUS) != 0) @@ -4877,13 +4875,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: arm/arm/pmap-v6.c =================================================================== --- arm/arm/pmap-v6.c (.../vmcontention/sys) (revision 253964) +++ arm/arm/pmap-v6.c (.../vmobj-readlock/sys) (revision 253964) @@ -2671,8 +2671,8 @@ pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_ pa = systempage.pv_pa; m = NULL; } else { - KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || - (flags & M_NOWAIT) != 0, + KASSERT((m->oflags & VPO_UNMANAGED) != 0 || + vm_page_xbusied(m) || (flags & M_NOWAIT) != 0, ("pmap_enter_locked: page %p is not busy", m)); pa = VM_PAGE_TO_PHYS(m); } @@ -3935,13 +3935,12 @@ pmap_is_modified(vm_page_t m) ("pmap_is_modified: page %p is not managed", m)); rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { @@ -3969,13 +3968,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no mappings can be modified. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -4010,13 +4009,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) != 0 || - (m->aflags & PGA_WRITEABLE) != 0) + if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0) pmap_clearbit(m, PVF_WRITE); } Index: arm/arm/pmap.c =================================================================== --- arm/arm/pmap.c (.../vmcontention/sys) (revision 253964) +++ arm/arm/pmap.c (.../vmobj-readlock/sys) (revision 253964) @@ -3319,8 +3319,8 @@ pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_ pa = systempage.pv_pa; m = NULL; } else { - KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || - (flags & M_NOWAIT) != 0, + KASSERT((m->oflags & VPO_UNMANAGED) != 0 || + vm_page_xbusied(m) || (flags & M_NOWAIT) != 0, ("pmap_enter_locked: page %p is not busy", m)); pa = VM_PAGE_TO_PHYS(m); } @@ -4555,13 +4555,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no mappings can be modified. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -4612,13 +4612,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) != 0 || - (m->aflags & PGA_WRITEABLE) != 0) + if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0) pmap_clearbit(m, PVF_WRITE); } Index: powerpc/aim/mmu_oea64.c =================================================================== --- powerpc/aim/mmu_oea64.c (.../vmcontention/sys) (revision 253964) +++ powerpc/aim/mmu_oea64.c (.../vmobj-readlock/sys) (revision 253964) @@ -1260,7 +1260,7 @@ moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t v pvo_flags = PVO_MANAGED; } - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); /* XXX change the pvo head for fake pages */ @@ -1522,13 +1522,12 @@ moea64_is_modified(mmu_t mmu, vm_page_t m) ("moea64_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have LPTE_CHG set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (moea64_query_bit(mmu, m, LPTE_CHG)); } @@ -1562,13 +1561,13 @@ moea64_clear_modify(mmu_t mmu, vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("moea64_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("moea64_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG * set. If the object containing the page is locked and the page is - * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -1590,13 +1589,12 @@ moea64_remove_write(mmu_t mmu, vm_page_t m) ("moea64_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; powerpc_sync(); LOCK_TABLE_RD(); Index: powerpc/aim/mmu_oea.c =================================================================== --- powerpc/aim/mmu_oea.c (.../vmcontention/sys) (revision 253964) +++ powerpc/aim/mmu_oea.c (.../vmobj-readlock/sys) (revision 253964) @@ -1158,7 +1158,7 @@ moea_enter_locked(pmap_t pmap, vm_offset_t va, vm_ if (pmap_bootstrapped) rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); /* XXX change the pvo head for fake pages */ @@ -1326,13 +1326,12 @@ moea_is_modified(mmu_t mmu, vm_page_t m) ("moea_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PTE_CHG set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = moea_query_bit(m, PTE_CHG); @@ -1371,13 +1370,13 @@ moea_clear_modify(mmu_t mmu, vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("moea_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("moea_clear_modify: page %p is exclusive busy", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_CHG * set. If the object containing the page is locked and the page is - * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -1401,13 +1400,12 @@ moea_remove_write(mmu_t mmu, vm_page_t m) ("moea_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); lo = moea_attr_fetch(m); Index: powerpc/booke/pmap.c =================================================================== --- powerpc/booke/pmap.c (.../vmcontention/sys) (revision 253964) +++ powerpc/booke/pmap.c (.../vmobj-readlock/sys) (revision 253964) @@ -1563,7 +1563,7 @@ mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_ KASSERT((va <= VM_MAXUSER_ADDRESS), ("mmu_booke_enter_locked: user pmap, non user va")); } - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -1959,13 +1959,12 @@ mmu_booke_remove_write(mmu_t mmu, vm_page_t m) ("mmu_booke_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { @@ -2204,13 +2203,12 @@ mmu_booke_is_modified(mmu_t mmu, vm_page_t m) rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can be modified. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { @@ -2281,13 +2279,13 @@ mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("mmu_booke_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("mmu_booke_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PG_AWRITEABLE, then no PTEs can be modified. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PG_AWRITEABLE cannot be concurrently set. + * exclusive busied, then PG_AWRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: sparc64/sparc64/pmap.c =================================================================== --- sparc64/sparc64/pmap.c (.../vmcontention/sys) (revision 253964) +++ sparc64/sparc64/pmap.c (.../vmobj-readlock/sys) (revision 253964) @@ -1493,7 +1493,7 @@ pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_pa rw_assert(&tte_list_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pm, MA_OWNED); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); PMAP_STATS_INC(pmap_nenter); pa = VM_PAGE_TO_PHYS(m); @@ -2067,13 +2067,12 @@ pmap_is_modified(vm_page_t m) rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no TTEs can have TD_W set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&tte_list_global_lock); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { @@ -2140,13 +2139,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_xbusied(m), + ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -2190,13 +2189,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by - * another thread while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no page table entries need updating. + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&tte_list_global_lock); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { Index: vm/vm_pageout.c =================================================================== --- vm/vm_pageout.c (.../vmcontention/sys) (revision 253964) +++ vm/vm_pageout.c (.../vmobj-readlock/sys) (revision 253964) @@ -228,8 +228,8 @@ static void vm_pageout_page_stats(void); /* * Initialize a dummy page for marking the caller's place in the specified * paging queue. In principle, this function only needs to set the flag - * PG_MARKER. Nonetheless, it sets the flag VPO_BUSY and initializes the hold - * count to one as safety precautions. + * PG_MARKER. Nonetheless, it wirte busies and initializes the hold count + * to one as safety precautions. */ static void vm_pageout_init_marker(vm_page_t marker, u_short queue) @@ -237,7 +237,7 @@ vm_pageout_init_marker(vm_page_t marker, u_short q bzero(marker, sizeof(*marker)); marker->flags = PG_MARKER; - marker->oflags = VPO_BUSY; + marker->busy_lock = VPB_SINGLE_EXCLUSIVER; marker->queue = queue; marker->hold_count = 1; } @@ -357,8 +357,7 @@ vm_pageout_clean(vm_page_t m) /* * Can't clean the page if it's busy or held. */ - KASSERT(m->busy == 0 && (m->oflags & VPO_BUSY) == 0, - ("vm_pageout_clean: page %p is busy", m)); + vm_page_assert_unbusied(m); KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m)); vm_page_unlock(m); @@ -396,8 +395,7 @@ more: break; } - if ((p = vm_page_prev(pb)) == NULL || - (p->oflags & VPO_BUSY) != 0 || p->busy != 0) { + if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) { ib = 0; break; } @@ -426,8 +424,7 @@ more: pindex + is < object->size) { vm_page_t p; - if ((p = vm_page_next(ps)) == NULL || - (p->oflags & VPO_BUSY) != 0 || p->busy != 0) + if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) break; vm_page_lock(p); vm_page_test_dirty(p); @@ -497,7 +494,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int fla KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); - vm_page_io_start(mc[i]); + vm_page_sbusy(mc[i]); pmap_remove_write(mc[i]); } vm_object_pip_add(object, count); @@ -553,7 +550,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int fla */ if (pageout_status[i] != VM_PAGER_PEND) { vm_object_pip_wakeup(object); - vm_page_io_finish(mt); + vm_page_sunbusy(mt); if (vm_page_count_severe()) { vm_page_lock(mt); vm_page_try_to_cache(mt); @@ -594,8 +591,7 @@ vm_pageout_launder(int queue, int tries, vm_paddr_ object = m->object; if ((!VM_OBJECT_TRYWLOCK(object) && (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) || (m->oflags & VPO_BUSY) != 0 || - m->busy != 0) { + m->hold_count != 0)) || vm_page_busied(m)) { vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); continue; @@ -730,7 +726,7 @@ vm_pageout_object_deactivate_pages(pmap_t pmap, vm TAILQ_FOREACH(p, &object->memq, listq) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; - if ((p->oflags & VPO_BUSY) != 0 || p->busy != 0) + if (vm_page_busied(p)) continue; PCPU_INC(cnt.v_pdpages); vm_page_lock(p); @@ -972,7 +968,7 @@ vm_pageout_scan(int pass) * pages, because they may leave the inactive queue * shortly after page scan is finished. */ - if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) { + if (vm_page_busied(m)) { vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); addl_page_shortage++; @@ -1191,7 +1187,7 @@ vm_pageout_scan(int pass) * page back onto the end of the queue so that * statistics are more correct if we don't. */ - if (m->busy || (m->oflags & VPO_BUSY)) { + if (vm_page_busied(m)) { vm_page_unlock(m); goto unlock_and_continue; } @@ -1301,9 +1297,7 @@ relock_queues: /* * Don't deactivate pages that are busy. */ - if ((m->busy != 0) || - (m->oflags & VPO_BUSY) || - (m->hold_count != 0)) { + if (vm_page_busied(m) || m->hold_count != 0) { vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); vm_page_requeue_locked(m); @@ -1562,9 +1556,7 @@ vm_pageout_page_stats(void) /* * Don't deactivate pages that are busy. */ - if ((m->busy != 0) || - (m->oflags & VPO_BUSY) || - (m->hold_count != 0)) { + if (vm_page_busied(m) || m->hold_count != 0) { vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); vm_page_requeue_locked(m); Index: vm/vm_phys.c =================================================================== --- vm/vm_phys.c (.../vmcontention/sys) (revision 253964) +++ vm/vm_phys.c (.../vmobj-readlock/sys) (revision 253964) @@ -559,7 +559,8 @@ vm_phys_fictitious_reg_range(vm_paddr_t start, vm_ } for (i = 0; i < page_count; i++) { vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr); - fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED); + fp[i].oflags &= ~VPO_UNMANAGED; + fp[i].busy_lock = VPB_UNBUSIED; } mtx_lock(&vm_phys_fictitious_reg_mtx); for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) { Index: vm/swap_pager.c =================================================================== --- vm/swap_pager.c (.../vmcontention/sys) (revision 253964) +++ vm/swap_pager.c (.../vmobj-readlock/sys) (revision 253964) @@ -1219,9 +1219,10 @@ swap_pager_getpages(vm_object_t object, vm_page_t */ VM_OBJECT_WLOCK(object); while ((mreq->oflags & VPO_SWAPINPROG) != 0) { - mreq->oflags |= VPO_WANTED; + mreq->oflags |= VPO_SWAPSLEEP; PCPU_INC(cnt.v_intrans); - if (VM_OBJECT_SLEEP(object, mreq, PSWP, "swread", hz * 20)) { + if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP, + "swread", hz * 20)) { printf( "swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n", bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount); @@ -1459,12 +1460,6 @@ swap_pager_putpages(vm_object_t object, vm_page_t * Completion routine for asynchronous reads and writes from/to swap. * Also called manually by synchronous code to finish up a bp. * - * For READ operations, the pages are VPO_BUSY'd. For WRITE operations, - * the pages are vm_page_t->busy'd. For READ operations, we VPO_BUSY - * unbusy all pages except the 'main' request page. For WRITE - * operations, we vm_page_t->busy'd unbusy all pages ( we can do this - * because we marked them all VM_PAGER_PEND on return from putpages ). - * * This routine may not sleep. */ static void @@ -1514,6 +1509,10 @@ swp_pager_async_iodone(struct buf *bp) vm_page_t m = bp->b_pages[i]; m->oflags &= ~VPO_SWAPINPROG; + if (m->oflags & VPO_SWAPSLEEP) { + m->oflags &= ~VPO_SWAPSLEEP; + wakeup(&object->paging_in_progress); + } if (bp->b_ioflags & BIO_ERROR) { /* @@ -1542,8 +1541,11 @@ swp_pager_async_iodone(struct buf *bp) m->valid = 0; if (i != bp->b_pager.pg_reqpage) swp_pager_free_nrpage(m); - else + else { + vm_page_lock(m); vm_page_flash(m); + vm_page_unlock(m); + } /* * If i == bp->b_pager.pg_reqpage, do not wake * the page up. The caller needs to. @@ -1558,7 +1560,7 @@ swp_pager_async_iodone(struct buf *bp) vm_page_lock(m); vm_page_activate(m); vm_page_unlock(m); - vm_page_io_finish(m); + vm_page_sunbusy(m); } } else if (bp->b_iocmd == BIO_READ) { /* @@ -1575,7 +1577,7 @@ swp_pager_async_iodone(struct buf *bp) * Note that the requested page, reqpage, is left * busied, but we still have to wake it up. The * other pages are released (unbusied) by - * vm_page_wakeup(). + * vm_page_xunbusy(). */ KASSERT(!pmap_page_is_mapped(m), ("swp_pager_async_iodone: page %p is mapped", m)); @@ -1595,9 +1597,12 @@ swp_pager_async_iodone(struct buf *bp) vm_page_lock(m); vm_page_deactivate(m); vm_page_unlock(m); - vm_page_wakeup(m); - } else + vm_page_xunbusy(m); + } else { + vm_page_lock(m); vm_page_flash(m); + vm_page_unlock(m); + } } else { /* * For write success, clear the dirty @@ -1608,7 +1613,7 @@ swp_pager_async_iodone(struct buf *bp) ("swp_pager_async_iodone: page %p is not write" " protected", m)); vm_page_undirty(m); - vm_page_io_finish(m); + vm_page_sunbusy(m); if (vm_page_count_severe()) { vm_page_lock(m); vm_page_try_to_cache(m); @@ -1706,19 +1711,18 @@ swp_pager_force_pagein(vm_object_t object, vm_pind vm_page_t m; vm_object_pip_add(object, 1); - m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | - VM_ALLOC_NOBUSY); + m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid == VM_PAGE_BITS_ALL) { vm_object_pip_subtract(object, 1); vm_page_dirty(m); vm_page_lock(m); vm_page_activate(m); vm_page_unlock(m); + vm_page_xunbusy(m); vm_pager_page_unswapped(m); return; } - vm_page_busy(m); if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_subtract(object, 1); @@ -1726,7 +1730,7 @@ swp_pager_force_pagein(vm_object_t object, vm_pind vm_page_lock(m); vm_page_deactivate(m); vm_page_unlock(m); - vm_page_wakeup(m); + vm_page_xunbusy(m); vm_pager_page_unswapped(m); } Index: vm/vm_glue.c =================================================================== --- vm/vm_glue.c (.../vmcontention/sys) (revision 253964) +++ vm/vm_glue.c (.../vmobj-readlock/sys) (revision 253964) @@ -231,10 +231,8 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset VM_OBJECT_WLOCK(object); pindex = OFF_TO_IDX(offset); - m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | - VM_ALLOC_NOBUSY); + m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(m); ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, pindex); @@ -247,8 +245,8 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset m = NULL; goto out; } - vm_page_wakeup(m); } + vm_page_xunbusy(m); vm_page_lock(m); vm_page_hold(m); vm_page_unlock(m); @@ -529,13 +527,11 @@ vm_thread_swapin(struct thread *td) VM_ALLOC_WIRED); for (i = 0; i < pages; i++) { if (ma[i]->valid != VM_PAGE_BITS_ALL) { - KASSERT(ma[i]->oflags & VPO_BUSY, - ("lost busy 1")); + vm_page_assert_xbusied(ma[i]); vm_object_pip_add(ksobj, 1); for (j = i + 1; j < pages; j++) { - KASSERT(ma[j]->valid == VM_PAGE_BITS_ALL || - (ma[j]->oflags & VPO_BUSY), - ("lost busy 2")); + if (ma[j]->valid != VM_PAGE_BITS_ALL) + vm_page_assert_xbusied(ma[j]); if (ma[j]->valid == VM_PAGE_BITS_ALL) break; } @@ -546,9 +542,9 @@ vm_thread_swapin(struct thread *td) vm_object_pip_wakeup(ksobj); for (k = i; k < j; k++) ma[k] = vm_page_lookup(ksobj, k); - vm_page_wakeup(ma[i]); - } else if (ma[i]->oflags & VPO_BUSY) - vm_page_wakeup(ma[i]); + vm_page_xunbusy(ma[i]); + } else if (vm_page_xbusied(ma[i])) + vm_page_xunbusy(ma[i]); } VM_OBJECT_WUNLOCK(ksobj); pmap_qenter(td->td_kstack, ma, pages); Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (.../vmcontention/sys) (revision 253964) +++ vm/vm_object.c (.../vmobj-readlock/sys) (revision 253964) @@ -744,8 +744,7 @@ vm_object_terminate(vm_object_t object) * the object, the page and object are reset to any empty state. */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { - KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, - ("vm_object_terminate: freeing busy page %p", p)); + vm_page_assert_unbusied(p); vm_page_lock(p); /* * Optimize the page's removal from the object by resetting @@ -871,7 +870,7 @@ rescan: np = TAILQ_NEXT(p, listq); if (p->valid == 0) continue; - if (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) { + if (vm_page_sleep_if_busy(p, "vpcwai")) { if (object->generation != curgeneration) { if ((flags & OBJPC_SYNC) != 0) goto rescan; @@ -939,7 +938,7 @@ vm_object_page_collect_flush(vm_object_t object, v for (tp = p; count < vm_pageout_page_count; count++) { tp = vm_page_next(tp); - if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0) + if (tp == NULL || vm_page_busied(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; @@ -947,7 +946,7 @@ vm_object_page_collect_flush(vm_object_t object, v for (p_first = p; count < vm_pageout_page_count; count++) { tp = vm_page_prev(p_first); - if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0) + if (tp == NULL || vm_page_busied(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; @@ -1156,7 +1155,7 @@ shadowlookup: ("vm_object_madvise: page %p is fictitious", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_object_madvise: page %p is not managed", m)); - if ((m->oflags & VPO_BUSY) || m->busy) { + if (vm_page_busied(m)) { if (advise == MADV_WILLNEED) { /* * Reference the page before unlocking and @@ -1165,11 +1164,10 @@ shadowlookup: */ vm_page_aflag_set(m, PGA_REFERENCED); } - vm_page_unlock(m); if (object != tobject) VM_OBJECT_WUNLOCK(object); - m->oflags |= VPO_WANTED; - VM_OBJECT_SLEEP(tobject, m, PDROP | PVM, "madvpo", 0); + VM_OBJECT_WUNLOCK(tobject); + vm_page_busy_sleep(m, "madvpo"); VM_OBJECT_WLOCK(object); goto relookup; } @@ -1344,10 +1342,12 @@ retry: * We do not have to VM_PROT_NONE the page as mappings should * not be changed by this operation. */ - if ((m->oflags & VPO_BUSY) || m->busy) { + if (vm_page_busied(m)) { VM_OBJECT_WUNLOCK(new_object); - m->oflags |= VPO_WANTED; - VM_OBJECT_SLEEP(orig_object, m, PVM, "spltwt", 0); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(orig_object); + vm_page_busy_sleep(m, "spltwt"); + VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; } @@ -1371,7 +1371,7 @@ retry: vm_page_unlock(m); /* page automatically made dirty by rename and cache handled */ if (orig_object->type == OBJT_SWAP) - vm_page_busy(m); + vm_page_xbusy(m); } if (orig_object->type == OBJT_SWAP) { /* @@ -1380,7 +1380,7 @@ retry: */ swap_pager_copy(orig_object, new_object, offidxstart, 0); TAILQ_FOREACH(m, &new_object->memq, listq) - vm_page_wakeup(m); + vm_page_xunbusy(m); /* * Transfer any cached pages from orig_object to new_object. @@ -1496,18 +1496,16 @@ vm_object_backing_scan(vm_object_t object, int op) vm_page_t pp; if (op & OBSC_COLLAPSE_NOWAIT) { - if ((p->oflags & VPO_BUSY) || - !p->valid || - p->busy) { + if (!p->valid || vm_page_busied(p)) { p = next; continue; } } else if (op & OBSC_COLLAPSE_WAIT) { - if ((p->oflags & VPO_BUSY) || p->busy) { + if (vm_page_busied(p)) { VM_OBJECT_WUNLOCK(object); - p->oflags |= VPO_WANTED; - VM_OBJECT_SLEEP(backing_object, p, - PDROP | PVM, "vmocol", 0); + vm_page_lock(p); + VM_OBJECT_WUNLOCK(backing_object); + vm_page_busy_sleep(p, "vmocol"); VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(backing_object); /* @@ -1905,8 +1903,12 @@ again: } goto next; } - if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) + if (vm_page_busied(p)) { + VM_OBJECT_WUNLOCK(object); + vm_page_busy_sleep(p, "vmopar"); + VM_OBJECT_WLOCK(object); goto again; + } KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { @@ -2033,7 +2035,7 @@ vm_object_populate(vm_object_t object, vm_pindex_t if (pindex > start) { m = vm_page_lookup(object, start); while (m != NULL && m->pindex < pindex) { - vm_page_wakeup(m); + vm_page_xunbusy(m); m = TAILQ_NEXT(m, listq); } } Index: vm/vm_fault.c =================================================================== --- vm/vm_fault.c (.../vmcontention/sys) (revision 253964) +++ vm/vm_fault.c (.../vmobj-readlock/sys) (revision 253964) @@ -141,7 +141,7 @@ static inline void release_page(struct faultstate *fs) { - vm_page_wakeup(fs->m); + vm_page_xunbusy(fs->m); vm_page_lock(fs->m); vm_page_deactivate(fs->m); vm_page_unlock(fs->m); @@ -353,21 +353,21 @@ RetryFault:; /* * Wait/Retry if the page is busy. We have to do this - * if the page is busy via either VPO_BUSY or - * vm_page_t->busy because the vm_pager may be using - * vm_page_t->busy for pageouts ( and even pageins if - * it is the vnode pager ), and we could end up trying - * to pagein and pageout the same page simultaneously. + * if the page is either exclusive or shared busy + * because the vm_pager may be using read busy for + * pageouts (and even pageins if it is the vnode + * pager), and we could end up trying to pagein and + * pageout the same page simultaneously. * * We can theoretically allow the busy case on a read * fault if the page is marked valid, but since such * pages are typically already pmap'd, putting that * special case in might be more effort then it is * worth. We cannot under any circumstances mess - * around with a vm_page_t->busy page except, perhaps, + * around with a shared busied page except, perhaps, * to pmap it. */ - if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { + if (vm_page_busied(fs.m)) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less @@ -392,8 +392,7 @@ RetryFault:; unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { - vm_page_sleep_if_busy(fs.m, TRUE, - "vmpfw"); + vm_page_sleep_if_busy(fs.m, "vmpfw"); } vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); @@ -410,7 +409,7 @@ RetryFault:; * (readable), jump to readrest, else break-out ( we * found the page ). */ - vm_page_busy(fs.m); + vm_page_xbusy(fs.m); if (fs.m->valid != VM_PAGE_BITS_ALL) goto readrest; break; @@ -516,7 +515,7 @@ readrest: /* * Call the pager to retrieve the data, if any, after * releasing the lock on the map. We hold a ref on - * fs.object and the pages are VPO_BUSY'd. + * fs.object and the pages are exclusive busied. */ unlock_map(&fs); @@ -565,7 +564,7 @@ vnode_locked: * return value is the index into the marray for the * vm_page_t passed to the routine. * - * fs.m plus the additional pages are VPO_BUSY'd. + * fs.m plus the additional pages are exclusive busied. */ faultcount = vm_fault_additional_pages( fs.m, behind, ahead, marray, &reqpage); @@ -691,8 +690,7 @@ vnode_locked: } } - KASSERT((fs.m->oflags & VPO_BUSY) != 0, - ("vm_fault: not busy after main loop")); + vm_page_assert_xbusied(fs.m); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock @@ -757,7 +755,7 @@ vnode_locked: vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); vm_page_unlock(fs.m); - vm_page_busy(fs.m); + vm_page_xbusy(fs.m); fs.first_m = fs.m; fs.m = NULL; PCPU_INC(cnt.v_cow_optim); @@ -905,12 +903,9 @@ vnode_locked: } } + vm_page_assert_xbusied(fs.m); + /* - * Page had better still be busy - */ - KASSERT(fs.m->oflags & VPO_BUSY, - ("vm_fault: page %p not busy!", fs.m)); - /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ @@ -946,7 +941,7 @@ vnode_locked: vm_page_hold(fs.m); } vm_page_unlock(fs.m); - vm_page_wakeup(fs.m); + vm_page_xunbusy(fs.m); /* * Unlock everything, and return @@ -991,13 +986,12 @@ vm_fault_cache_behind(const struct faultstate *fs, if (pindex < OFF_TO_IDX(fs->entry->offset)) pindex = OFF_TO_IDX(fs->entry->offset); m = first_object != object ? fs->first_m : fs->m; - KASSERT((m->oflags & VPO_BUSY) != 0, - ("vm_fault_cache_behind: page %p is not busy", m)); + vm_page_assert_xbusied(m); m_prev = vm_page_prev(m); while ((m = m_prev) != NULL && m->pindex >= pindex && m->valid == VM_PAGE_BITS_ALL) { m_prev = vm_page_prev(m); - if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) + if (vm_page_busied(m)) continue; vm_page_lock(m); if (m->hold_count == 0 && m->wire_count == 0) { @@ -1378,7 +1372,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src vm_page_activate(dst_m); vm_page_unlock(dst_m); } - vm_page_wakeup(dst_m); + vm_page_xunbusy(dst_m); } VM_OBJECT_WUNLOCK(dst_object); if (upgrade) { Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (.../vmcontention/sys) (revision 253964) +++ vm/vm_page.c (.../vmobj-readlock/sys) (revision 253964) @@ -469,66 +469,170 @@ vm_page_reference(vm_page_t m) vm_page_aflag_set(m, PGA_REFERENCED); } +/* + * vm_page_busy_downgrade: + * + * Downgrade an exclusive busy page into a single shared busy page. + */ void -vm_page_busy(vm_page_t m) +vm_page_busy_downgrade(vm_page_t m) { + u_int x; - VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("vm_page_busy: page already busy!!!")); - m->oflags |= VPO_BUSY; + vm_page_assert_xbusied(m); + + for (;;) { + x = m->busy_lock; + x &= VPB_BIT_WAITERS; + if (atomic_cmpset_rel_int(&m->busy_lock, + VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1) | x)) + break; + } } /* - * vm_page_flash: + * vm_page_sbusied: * - * wakeup anyone waiting for the page. + * Return a positive value if the page is shared busied, 0 otherwise. */ +int +vm_page_sbusied(vm_page_t m) +{ + u_int x; + + x = m->busy_lock; + return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED); +} + +/* + * vm_page_sunbusy: + * + * Shared unbusy a page. + */ void -vm_page_flash(vm_page_t m) +vm_page_sunbusy(vm_page_t m) { + u_int x; - VM_OBJECT_ASSERT_WLOCKED(m->object); - if (m->oflags & VPO_WANTED) { - m->oflags &= ~VPO_WANTED; + vm_page_assert_sbusied(m); + + for (;;) { + x = m->busy_lock; + if (VPB_SHARERS(x) > 1) { + if (atomic_cmpset_int(&m->busy_lock, x, + x - VPB_ONE_SHARER)) + break; + continue; + } + if ((x & VPB_BIT_WAITERS) == 0) { + KASSERT(x == VPB_SHARERS_WORD(1), + ("vm_page_sunbusy: invalid lock state")); + if (atomic_cmpset_int(&m->busy_lock, + VPB_SHARERS_WORD(1), VPB_UNBUSIED)) + break; + continue; + } + KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS), + ("vm_page_sunbusy: invalid lock state for waiters")); + + vm_page_lock(m); + if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) { + vm_page_unlock(m); + continue; + } wakeup(m); + vm_page_unlock(m); + break; } } /* - * vm_page_wakeup: + * vm_page_busy_sleep: * - * clear the VPO_BUSY flag and wakeup anyone waiting for the - * page. + * Sleep and release the page lock, using the page pointer as wchan. + * This is used to implement the hard-path of busying mechanism. * + * The given page must be locked. */ void -vm_page_wakeup(vm_page_t m) +vm_page_busy_sleep(vm_page_t m, const char *wmesg) { + u_int x; - VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT(m->oflags & VPO_BUSY, ("vm_page_wakeup: page not busy!!!")); - m->oflags &= ~VPO_BUSY; - vm_page_flash(m); + vm_page_lock_assert(m, MA_OWNED); + + x = m->busy_lock; + if (x == VPB_UNBUSIED) { + vm_page_unlock(m); + return; + } + if ((x & VPB_BIT_WAITERS) == 0 && + !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS)) { + vm_page_unlock(m); + return; + } + msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0); } +/* + * vm_page_trysbusy: + * + * Try to shared busy a page. + * If the operation succeeds 1 is returned otherwise 0. + * The operation never sleeps. + */ +int +vm_page_trysbusy(vm_page_t m) +{ + u_int x; + + x = m->busy_lock; + return ((x & VPB_BIT_SHARED) != 0 && + atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER)); +} + +/* + * vm_page_xunbusy_hard: + * + * Called after the first try the exclusive unbusy of a page failed. + * It is assumed that the waiters bit is on. + */ void -vm_page_io_start(vm_page_t m) +vm_page_xunbusy_hard(vm_page_t m) { - VM_OBJECT_ASSERT_WLOCKED(m->object); - m->busy++; + vm_page_assert_xbusied(m); + + vm_page_lock(m); + atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); + wakeup(m); + vm_page_unlock(m); } +/* + * vm_page_flash: + * + * Wakeup anyone waiting for the page. + * The ownership bits do not change. + * + * The given page must be locked. + */ void -vm_page_io_finish(vm_page_t m) +vm_page_flash(vm_page_t m) { + u_int x; - VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT(m->busy > 0, ("vm_page_io_finish: page %p is not busy", m)); - m->busy--; - if (m->busy == 0) - vm_page_flash(m); + vm_page_lock_assert(m, MA_OWNED); + + for (;;) { + x = m->busy_lock; + if ((x & VPB_BIT_WAITERS) == 0) + return; + if (atomic_cmpset_int(&m->busy_lock, x, + x & (~VPB_BIT_WAITERS))) + break; + } + wakeup(m); } /* @@ -643,7 +747,8 @@ vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm /* Fictitious pages don't use "segind". */ m->flags = PG_FICTITIOUS; /* Fictitious pages don't use "order" or "pool". */ - m->oflags = VPO_BUSY | VPO_UNMANAGED; + m->oflags = VPO_UNMANAGED; + m->busy_lock = VPB_SINGLE_EXCLUSIVER; m->wire_count = 1; pmap_page_init(m); memattr: @@ -723,16 +828,13 @@ vm_page_readahead_finish(vm_page_t m) * deactivating the page is usually the best choice, * unless the page is wanted by another thread. */ - if (m->oflags & VPO_WANTED) { - vm_page_lock(m); + vm_page_lock(m); + if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); - vm_page_unlock(m); - } else { - vm_page_lock(m); + else vm_page_deactivate(m); - vm_page_unlock(m); - } - vm_page_wakeup(m); + vm_page_unlock(m); + vm_page_xunbusy(m); } else { /* * Free the completely invalid page. Such page state @@ -747,29 +849,38 @@ vm_page_readahead_finish(vm_page_t m) } /* - * vm_page_sleep: + * vm_page_sleep_if_busy: * - * Sleep and release the page lock. + * Sleep and release the page queues lock if the page is busied. + * Returns TRUE if the thread slept. * - * The object containing the given page must be locked. + * The given page must be unlocked and object containing it must + * be locked. */ -void -vm_page_sleep(vm_page_t m, const char *msg) +int +vm_page_sleep_if_busy(vm_page_t m, const char *msg) { + vm_object_t obj; + vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); - if (mtx_owned(vm_page_lockptr(m))) - vm_page_unlock(m); - /* - * It's possible that while we sleep, the page will get - * unbusied and freed. If we are holding the object - * lock, we will assume we hold a reference to the object - * such that even if m->object changes, we can re-lock - * it. - */ - m->oflags |= VPO_WANTED; - VM_OBJECT_SLEEP(m->object, m, PVM, msg, 0); + if (vm_page_busied(m)) { + /* + * The page-specific object must be cached because page + * identity can change during the sleep, causing the + * re-lock of a different object. + * It is assumed that a reference to the object is already + * held by the callers. + */ + obj = m->object; + vm_page_lock(m); + VM_OBJECT_WUNLOCK(obj); + vm_page_busy_sleep(m, msg); + VM_OBJECT_WLOCK(obj); + return (TRUE); + } + return (FALSE); } /* @@ -894,15 +1005,24 @@ void vm_page_remove(vm_page_t m) { vm_object_t object; + boolean_t lockacq; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_lock_assert(m, MA_OWNED); if ((object = m->object) == NULL) return; VM_OBJECT_ASSERT_WLOCKED(object); - if (m->oflags & VPO_BUSY) { - m->oflags &= ~VPO_BUSY; + if (vm_page_xbusied(m)) { + lockacq = FALSE; + if ((m->oflags & VPO_UNMANAGED) != 0 && + !mtx_owned(vm_page_lockptr(m))) { + lockacq = TRUE; + vm_page_lock(m); + } vm_page_flash(m); + atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); + if (lockacq) + vm_page_unlock(m); } /* @@ -1171,8 +1291,7 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t * vm_page_alloc: * * Allocate and return a page that is associated with the specified - * object and offset pair. By default, this page has the flag VPO_BUSY - * set. + * object and offset pair. By default, this page is exclusive busied. * * The caller must always specify an allocation class. * @@ -1187,10 +1306,11 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t * VM_ALLOC_IFCACHED return page only if it is cached * VM_ALLOC_IFNOTCACHED return NULL, do not reactivate if the page * is cached - * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page + * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and - * should not have the flag VPO_BUSY set + * should not be exclusive busy + * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * @@ -1205,8 +1325,12 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind int flags, req_class; mpred = 0; /* XXX: pacify gcc */ - KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0), - ("vm_page_alloc: inconsistent object/req")); + KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && + (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && + ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != + (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), + ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object, + req)); if (object != NULL) VM_OBJECT_ASSERT_WLOCKED(object); @@ -1287,7 +1411,8 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue)); KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m)); KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m)); - KASSERT(m->busy == 0, ("vm_page_alloc: page %p is busy", m)); + KASSERT(!vm_page_sbusied(m), + ("vm_page_alloc: page %p is busy", m)); KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("vm_page_alloc: page %p has unexpected memattr %d", m, @@ -1331,8 +1456,11 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind m->aflags = 0; m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; - if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ)) == 0) - m->oflags |= VPO_BUSY; + m->busy_lock = VPB_UNBUSIED; + if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) + m->busy_lock = VPB_SINGLE_EXCLUSIVER; + if ((req & VM_ALLOC_SBUSY) != 0) + m->busy_lock = VPB_SHARERS_WORD(1); if (req & VM_ALLOC_WIRED) { /* * The page lock is not required for wiring a page until that @@ -1400,9 +1528,10 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: - * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page + * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NOOBJ page is not associated with an object and - * should not have the flag VPO_BUSY set + * should not be exclusive busy + * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * @@ -1418,8 +1547,12 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex u_int flags, oflags; int req_class; - KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0), - ("vm_page_alloc_contig: inconsistent object/req")); + KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && + (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && + ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != + (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), + ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object, + req)); if (object != NULL) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_PHYS, @@ -1495,8 +1628,6 @@ retry: atomic_add_int(&cnt.v_wire_count, npages); oflags = VPO_UNMANAGED; if (object != NULL) { - if ((req & VM_ALLOC_NOBUSY) == 0) - oflags |= VPO_BUSY; if (object->memattr != VM_MEMATTR_DEFAULT && memattr == VM_MEMATTR_DEFAULT) memattr = object->memattr; @@ -1504,6 +1635,13 @@ retry: for (m = m_ret; m < &m_ret[npages]; m++) { m->aflags = 0; m->flags = (m->flags | PG_NODUMP) & flags; + m->busy_lock = VPB_UNBUSIED; + if (object != NULL) { + if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) + m->busy_lock = VPB_SINGLE_EXCLUSIVER; + if ((req & VM_ALLOC_SBUSY) != 0) + m->busy_lock = VPB_SHARERS_WORD(1); + } if ((req & VM_ALLOC_WIRED) != 0) m->wire_count = 1; /* Unmanaged pages don't use "act_count". */ @@ -1546,7 +1684,7 @@ vm_page_alloc_init(vm_page_t m) ("vm_page_alloc_init: page %p is wired", m)); KASSERT(m->hold_count == 0, ("vm_page_alloc_init: page %p is held", m)); - KASSERT(m->busy == 0, + KASSERT(!vm_page_sbusied(m), ("vm_page_alloc_init: page %p is busy", m)); KASSERT(m->dirty == 0, ("vm_page_alloc_init: page %p is dirty", m)); @@ -1905,7 +2043,7 @@ vm_page_free_toq(vm_page_t m) if (VM_PAGE_IS_FREE(m)) panic("vm_page_free: freeing free page %p", m); - else if (m->busy != 0) + else if (vm_page_sbusied(m)) panic("vm_page_free: freeing busy page %p", m); /* @@ -2116,8 +2254,8 @@ vm_page_try_to_cache(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); - if (m->dirty || m->hold_count || m->busy || m->wire_count || - (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0) + if (m->dirty || m->hold_count || m->wire_count || + (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m)) return (0); pmap_remove_all(m); if (m->dirty) @@ -2139,8 +2277,8 @@ vm_page_try_to_free(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); if (m->object != NULL) VM_OBJECT_ASSERT_WLOCKED(m->object); - if (m->dirty || m->hold_count || m->busy || m->wire_count || - (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0) + if (m->dirty || m->hold_count || m->wire_count || + (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m)) return (0); pmap_remove_all(m); if (m->dirty) @@ -2165,7 +2303,7 @@ vm_page_cache(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) || m->busy || + if (vm_page_busied(m) || (m->oflags & VPO_UNMANAGED) || m->hold_count || m->wire_count) panic("vm_page_cache: attempting to cache busy page"); KASSERT(!pmap_page_is_mapped(m), @@ -2351,21 +2489,29 @@ vm_page_t vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; + int sleep; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((allocflags & VM_ALLOC_RETRY) != 0, ("vm_page_grab: VM_ALLOC_RETRY is required")); + KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || + (allocflags & VM_ALLOC_IGN_SBUSY) != 0, + ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { - if ((m->oflags & VPO_BUSY) != 0 || - ((allocflags & VM_ALLOC_IGN_SBUSY) == 0 && m->busy != 0)) { + sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? + vm_page_xbusied(m) : vm_page_busied(m); + if (sleep) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(m, PGA_REFERENCED); - vm_page_sleep(m, "pgrbwt"); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(object); + vm_page_busy_sleep(m, "pgrbwt"); + VM_OBJECT_WLOCK(object); goto retrylookup; } else { if ((allocflags & VM_ALLOC_WIRED) != 0) { @@ -2373,8 +2519,11 @@ retrylookup: vm_page_wire(m); vm_page_unlock(m); } - if ((allocflags & VM_ALLOC_NOBUSY) == 0) - vm_page_busy(m); + if ((allocflags & + (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) + vm_page_xbusy(m); + if ((allocflags & VM_ALLOC_SBUSY) != 0) + vm_page_sbusy(m); return (m); } } @@ -2482,12 +2631,12 @@ vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits #endif /* - * If the object is locked and the page is neither VPO_BUSY nor + * If the object is locked and the page is neither exclusive busy nor * write mapped, then the page's dirty field cannot possibly be * set by a concurrent pmap operation. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && !pmap_page_is_write_mapped(m)) + if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) m->dirty &= ~pagebits; else { /* @@ -2696,7 +2845,7 @@ vm_page_is_valid(vm_page_t m, int base, int size) { vm_page_bits_t bits; - VM_OBJECT_ASSERT_WLOCKED(m->object); + VM_OBJECT_ASSERT_LOCKED(m->object); bits = vm_page_bits(base, size); return (m->valid != 0 && (m->valid & bits) == bits); } @@ -2856,12 +3005,11 @@ vm_page_object_lock_assert(vm_page_t m) /* * Certain of the page's fields may only be modified by the - * holder of the containing object's lock or the setter of the - * page's VPO_BUSY flag. Unfortunately, the setter of the - * VPO_BUSY flag is not recorded, and thus cannot be checked - * here. + * holder of the containing object's lock or the exclusive busy. + * holder. Unfortunately, the holder of the write busy is + * not recorded, and thus cannot be checked here. */ - if (m->object != NULL && (m->oflags & VPO_BUSY) == 0) + if (m->object != NULL && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); } #endif @@ -2919,9 +3067,9 @@ DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) m = (vm_page_t)addr; db_printf( "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n" - " af 0x%x of 0x%x f 0x%x act %d busy %d valid 0x%x dirty 0x%x\n", + " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags, - m->flags, m->act_count, m->busy, m->valid, m->dirty); + m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (.../vmcontention/sys) (revision 253964) +++ vm/vm_page.h (.../vmobj-readlock/sys) (revision 253964) @@ -144,11 +144,12 @@ struct vm_page { uint8_t oflags; /* page VPO_* flags (O) */ uint16_t flags; /* page PG_* flags (P) */ u_char act_count; /* page usage count (P) */ - u_char busy; /* page busy count (O) */ + u_char __pad0; /* unused padding */ /* NOTE that these must support one bit per DEV_BSIZE in a page!!! */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ + volatile u_int busy_lock; /* busy owners lock */ }; /* @@ -165,12 +166,35 @@ struct vm_page { * mappings, and such pages are also not on any PQ queue. * */ -#define VPO_BUSY 0x01 /* page is in transit */ -#define VPO_WANTED 0x02 /* someone is waiting for page */ +#define VPO_UNUSED01 0x01 /* --available-- */ +#define VPO_SWAPSLEEP 0x02 /* waiting for swap to finish */ #define VPO_UNMANAGED 0x04 /* no PV management for page */ #define VPO_SWAPINPROG 0x08 /* swap I/O in progress on page */ #define VPO_NOSYNC 0x10 /* do not collect for syncer */ +/* + * Busy page implementation details. + * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation, + * even if the support for owner identity is removed because of size + * constraints. Checks on lock recursion are then not possible, while the + * lock assertions effectiveness is someway reduced. + */ +#define VPB_BIT_SHARED 0x01 +#define VPB_BIT_EXCLUSIVE 0x02 +#define VPB_BIT_WAITERS 0x04 +#define VPB_BIT_FLAGMASK \ + (VPB_BIT_SHARED | VPB_BIT_EXCLUSIVE | VPB_BIT_WAITERS) + +#define VPB_SHARERS_SHIFT 3 +#define VPB_SHARERS(x) \ + (((x) & ~VPB_BIT_FLAGMASK) >> VPB_SHARERS_SHIFT) +#define VPB_SHARERS_WORD(x) ((x) << VPB_SHARERS_SHIFT | VPB_BIT_SHARED) +#define VPB_ONE_SHARER (1 << VPB_SHARERS_SHIFT) + +#define VPB_SINGLE_EXCLUSIVER VPB_BIT_EXCLUSIVE + +#define VPB_UNBUSIED VPB_SHARERS_WORD(0) + #define PQ_NONE 255 #define PQ_INACTIVE 0 #define PQ_ACTIVE 1 @@ -248,8 +272,9 @@ extern struct mtx_padalign pa_lock[]; * directly set this flag. They should call vm_page_reference() instead. * * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter(). When it - * does so, the page must be VPO_BUSY. The MI VM layer must never access this - * flag directly. Instead, it should call pmap_page_is_write_mapped(). + * does so, the page must be exclusive busied. The MI VM layer must never + * access this flag directly. Instead, it should call + * pmap_page_is_write_mapped(). * * PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has * at least one executable mapping. It is not consumed by the MI VM layer. @@ -336,6 +361,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); #define VM_ALLOC_IFNOTCACHED 0x0800 /* Fail if the page is cached */ #define VM_ALLOC_IGN_SBUSY 0x1000 /* vm_page_grab() only */ #define VM_ALLOC_NODUMP 0x2000 /* don't include in dump */ +#define VM_ALLOC_SBUSY 0x4000 /* Shared busy the page */ #define VM_ALLOC_COUNT_SHIFT 16 #define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT) @@ -359,15 +385,13 @@ malloc2vm_flags(int malloc_flags) } #endif -void vm_page_busy(vm_page_t m); +void vm_page_busy_downgrade(vm_page_t m); +void vm_page_busy_sleep(vm_page_t m, const char *msg); void vm_page_flash(vm_page_t m); -void vm_page_io_start(vm_page_t m); -void vm_page_io_finish(vm_page_t m); void vm_page_hold(vm_page_t mem); void vm_page_unhold(vm_page_t mem); void vm_page_free(vm_page_t m); void vm_page_free_zero(vm_page_t m); -void vm_page_wakeup(vm_page_t m); void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); @@ -401,13 +425,17 @@ void vm_page_remove (vm_page_t); void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); void vm_page_requeue(vm_page_t m); void vm_page_requeue_locked(vm_page_t m); +int vm_page_sbusied(vm_page_t m); void vm_page_set_valid_range(vm_page_t m, int base, int size); -void vm_page_sleep(vm_page_t m, const char *msg); +int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); +void vm_page_sunbusy(vm_page_t m); +int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unwire (vm_page_t, int); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_wire (vm_page_t); +void vm_page_xunbusy_hard(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); void vm_page_clear_dirty (vm_page_t, int, int); void vm_page_set_invalid (vm_page_t, int, int); @@ -430,6 +458,48 @@ void vm_page_assert_locked_KBI(vm_page_t m, const void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line); #endif +#define vm_page_assert_sbusied(m) \ + KASSERT(vm_page_sbusied(m), \ + ("vm_page_assert_sbusied: page %p not shared busy @ %s:%d", \ + (void *)m, __FILE__, __LINE__)); + +#define vm_page_assert_unbusied(m) \ + KASSERT(!vm_page_busied(m), \ + ("vm_page_assert_unbusied: page %p busy @ %s:%d", \ + (void *)m, __FILE__, __LINE__)); + +#define vm_page_assert_xbusied(m) \ + KASSERT(vm_page_xbusied(m), \ + ("vm_page_assert_xbusied: page %p not exclusive busy @ %s:%d", \ + (void *)m, __FILE__, __LINE__)); + +#define vm_page_busied(m) \ + ((m)->busy_lock != VPB_UNBUSIED) + +#define vm_page_sbusy(m) do { \ + if (!vm_page_trysbusy(m)) \ + panic("%s: page %p failed shared busing", __func__, m); \ +} while (0) + +#define vm_page_tryxbusy(m) \ + (atomic_cmpset_acq_int(&m->busy_lock, VPB_UNBUSIED, \ + VPB_SINGLE_EXCLUSIVER)) + +#define vm_page_xbusied(m) \ + ((m->busy_lock & VPB_SINGLE_EXCLUSIVER) != 0) + +#define vm_page_xbusy(m) do { \ + if (!vm_page_tryxbusy(m)) \ + panic("%s: page %p failed exclusive busing", __func__, \ + m); \ +} while (0) + +#define vm_page_xunbusy(m) do { \ + if (!atomic_cmpset_rel_int(&(m)->busy_lock, \ + VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED)) \ + vm_page_xunbusy_hard(m); \ +} while (0) + #ifdef INVARIANTS void vm_page_object_lock_assert(vm_page_t m); #define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m) @@ -484,11 +554,11 @@ vm_page_aflag_set(vm_page_t m, uint8_t bits) /* * The PGA_WRITEABLE flag can only be set if the page is managed and - * VPO_BUSY. Currently, this flag is only set by pmap_enter(). + * exclusive busied. Currently, this flag is only set by pmap_enter(). */ KASSERT((bits & PGA_WRITEABLE) == 0 || - (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY, - ("vm_page_aflag_set: PGA_WRITEABLE and !VPO_BUSY")); + (m->oflags & VPO_UNMANAGED) == 0 || vm_page_xbusied(m), + ("vm_page_aflag_set: PGA_WRITEABLE and not exclusive busy")); /* * Access the whole 32-bit word containing the aflags field with an @@ -544,27 +614,6 @@ vm_page_remque(vm_page_t m) } /* - * vm_page_sleep_if_busy: - * - * Sleep and release the page queues lock if VPO_BUSY is set or, - * if also_m_busy is TRUE, busy is non-zero. Returns TRUE if the - * thread slept and the page queues lock was released. - * Otherwise, retains the page queues lock and returns FALSE. - * - * The object containing the given page must be locked. - */ -static __inline int -vm_page_sleep_if_busy(vm_page_t m, int also_m_busy, const char *msg) -{ - - if ((m->oflags & VPO_BUSY) || (also_m_busy && m->busy)) { - vm_page_sleep(m, msg); - return (TRUE); - } - return (FALSE); -} - -/* * vm_page_undirty: * * Set page to not be dirty. Note: does not clear pmap modify bits Index: vm/vnode_pager.c =================================================================== --- vm/vnode_pager.c (.../vmcontention/sys) (revision 253964) +++ vm/vnode_pager.c (.../vmobj-readlock/sys) (revision 253964) @@ -1135,8 +1135,7 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_ * pmap operation. */ m = ma[ncount - 1]; - KASSERT(m->busy > 0, - ("vnode_pager_generic_putpages: page %p is not busy", m)); + vm_page_assert_sbusied(m); KASSERT(!pmap_page_is_write_mapped(m), ("vnode_pager_generic_putpages: page %p is not read-only", m)); vm_page_clear_dirty(m, pgoff, PAGE_SIZE - Index: vm/vm_kern.c =================================================================== --- vm/vm_kern.c (.../vmcontention/sys) (revision 253964) +++ vm/vm_kern.c (.../vmobj-readlock/sys) (revision 253964) @@ -563,7 +563,7 @@ retry: */ pmap_enter(kernel_pmap, addr + i, VM_PROT_ALL, m, VM_PROT_ALL, TRUE); - vm_page_wakeup(m); + vm_page_xunbusy(m); } VM_OBJECT_WUNLOCK(kmem_object); Index: vm/phys_pager.c =================================================================== --- vm/phys_pager.c (.../vmcontention/sys) (revision 253964) +++ vm/phys_pager.c (.../vmobj-readlock/sys) (revision 253964) @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -152,10 +153,12 @@ phys_pager_getpages(vm_object_t object, vm_page_t KASSERT(m[i]->dirty == 0, ("phys_pager_getpages: dirty page %p", m[i])); /* The requested page must remain busy, the others not. */ - if (i == reqpage) + if (i == reqpage) { + vm_page_lock(m[i]); vm_page_flash(m[i]); - else - vm_page_wakeup(m[i]); + vm_page_unlock(m[i]); + } else + vm_page_xunbusy(m[i]); } return (VM_PAGER_OK); }