Index: i386/i386/pmap.c =================================================================== --- i386/i386/pmap.c (.../vmcontention/sys) (revision 252418) +++ i386/i386/pmap.c (.../vmobj-readlock/sys) (revision 252418) @@ -3422,7 +3422,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_busy_wlocked(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); mpte = NULL; @@ -4516,13 +4516,12 @@ pmap_is_modified(vm_page_t m) ("pmap_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || @@ -4651,13 +4650,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); @@ -4808,13 +4806,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("pmap_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: i386/xen/pmap.c =================================================================== --- i386/xen/pmap.c (.../vmcontention/sys) (revision 252418) +++ i386/xen/pmap.c (.../vmobj-readlock/sys) (revision 252418) @@ -2667,7 +2667,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_busy_wlocked(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); mpte = NULL; @@ -3696,13 +3696,12 @@ pmap_is_modified(vm_page_t m) rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&pvh_global_lock); sched_pin(); @@ -3827,13 +3826,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); @@ -3933,13 +3931,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("pmap_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (.../vmcontention/sys) (revision 252418) +++ cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (.../vmobj-readlock/sys) (revision 252418) @@ -324,7 +324,8 @@ zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, } static vm_page_t -page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) +page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes, + boolean_t alloc) { vm_object_t obj; vm_page_t pp; @@ -335,20 +336,26 @@ static vm_page_t for (;;) { if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && pp->valid) { - if ((pp->oflags & VPO_BUSY) != 0) { + if (vm_page_busy_wlocked(pp)) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_reference(pp); - vm_page_sleep(pp, "zfsmwb"); + vm_page_lock(pp); + zfs_vmobject_wunlock(obj); + vm_page_busy_sleep(pp, "zfsmwb"); + zfs_vmobject_wlock(obj); continue; } + vm_page_busy_rlock(pp); } else if (pp == NULL) { + if (!alloc) + break; pp = vm_page_alloc(obj, OFF_TO_IDX(start), VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | - VM_ALLOC_NOBUSY); + VM_ALLOC_RBUSY); } else { ASSERT(pp != NULL && !pp->valid); pp = NULL; @@ -356,8 +363,9 @@ static vm_page_t if (pp != NULL) { ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); + if (!alloc) + break; vm_object_pip_add(obj, 1); - vm_page_io_start(pp); pmap_remove_write(pp); vm_page_clear_dirty(pp, off, nbytes); } @@ -367,57 +375,14 @@ static vm_page_t } static void -page_unbusy(vm_page_t pp) +page_unbusy(vm_page_t pp, boolean_t unalloc) { - vm_page_io_finish(pp); - vm_object_pip_subtract(pp->object, 1); + vm_page_busy_runlock(pp); + if (unalloc) + vm_object_pip_subtract(pp->object, 1); } -static vm_page_t -page_hold(vnode_t *vp, int64_t start) -{ - vm_object_t obj; - vm_page_t pp; - - obj = vp->v_object; - zfs_vmobject_assert_wlocked(obj); - - for (;;) { - if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && - pp->valid) { - if ((pp->oflags & VPO_BUSY) != 0) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_reference(pp); - vm_page_sleep(pp, "zfsmwb"); - continue; - } - - ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_lock(pp); - vm_page_hold(pp); - vm_page_unlock(pp); - - } else - pp = NULL; - break; - } - return (pp); -} - -static void -page_unhold(vm_page_t pp) -{ - - vm_page_lock(pp); - vm_page_unhold(pp); - vm_page_unlock(pp); -} - static caddr_t zfs_map_page(vm_page_t pp, struct sf_buf **sfp) { @@ -467,7 +432,7 @@ update_pages(vnode_t *vp, int64_t start, int len, ("zfs update_pages: unaligned data in putpages case")); KASSERT(pp->valid == VM_PAGE_BITS_ALL, ("zfs update_pages: invalid page in putpages case")); - KASSERT(pp->busy > 0, + KASSERT(vm_page_busy_rlocked(pp), ("zfs update_pages: unbusy page in putpages case")); KASSERT(!pmap_page_is_write_mapped(pp), ("zfs update_pages: writable page in putpages case")); @@ -479,7 +444,8 @@ update_pages(vnode_t *vp, int64_t start, int len, zfs_vmobject_wlock(obj); vm_page_undirty(pp); - } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { + } else if ((pp = page_busy(vp, start, off, nbytes, + TRUE)) != NULL) { zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); @@ -488,7 +454,7 @@ update_pages(vnode_t *vp, int64_t start, int len, zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - page_unbusy(pp); + page_unbusy(pp, TRUE); } len -= nbytes; off = 0; @@ -503,7 +469,7 @@ update_pages(vnode_t *vp, int64_t start, int len, * ZFS to populate a range of page cache pages with data. * * NOTE: this function could be optimized to pre-allocate - * all pages in advance, drain VPO_BUSY on all of them, + * all pages in advance, drain write busy on all of them, * map them into contiguous KVA region and populate them * in one single dmu_read() call. */ @@ -531,10 +497,9 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { int bytes = MIN(PAGESIZE, len); - pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_NOBUSY | - VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY); + pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_RBUSY | + VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_IGN_RBUSY); if (pp->valid == 0) { - vm_page_io_start(pp); zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); error = dmu_read(os, zp->z_id, start, bytes, va, @@ -543,7 +508,7 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) bzero(va + bytes, PAGESIZE - bytes); zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - vm_page_io_finish(pp); + vm_page_busy_runlock(pp); vm_page_lock(pp); if (error) { vm_page_free(pp); @@ -552,7 +517,8 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) vm_page_activate(pp); } vm_page_unlock(pp); - } + } else + vm_page_busy_runlock(pp); if (error) break; uio->uio_resid -= bytes; @@ -596,7 +562,7 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) vm_page_t pp; uint64_t bytes = MIN(PAGESIZE - off, len); - if (pp = page_hold(vp, start)) { + if (pp = page_busy(vp, start, 0, 0, FALSE)) { struct sf_buf *sf; caddr_t va; @@ -605,7 +571,7 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) error = uiomove(va + off, bytes, UIO_READ, uio); zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - page_unhold(pp); + page_unbusy(pp, FALSE); } else { zfs_vmobject_wunlock(obj); error = dmu_read_uio(os, zp->z_id, uio, bytes); Index: dev/agp/agp.c =================================================================== --- dev/agp/agp.c (.../vmcontention/sys) (revision 252418) +++ dev/agp/agp.c (.../vmobj-readlock/sys) (revision 252418) @@ -600,7 +600,7 @@ agp_generic_bind_memory(device_t dev, struct agp_m goto bad; } } - vm_page_wakeup(m); + vm_page_busy_wunlock(m); } VM_OBJECT_WUNLOCK(mem->am_obj); @@ -627,7 +627,7 @@ bad: for (k = 0; k < mem->am_size; k += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k)); if (k >= i) - vm_page_wakeup(m); + vm_page_busy_wunlock(m); vm_page_lock(m); vm_page_unwire(m, 0); vm_page_unlock(m); Index: dev/md/md.c =================================================================== --- dev/md/md.c (.../vmcontention/sys) (revision 252418) +++ dev/md/md.c (.../vmobj-readlock/sys) (revision 252418) @@ -834,7 +834,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) else rv = vm_pager_get_pages(sc->object, &m, 1, 0); if (rv == VM_PAGER_ERROR) { - vm_page_wakeup(m); + vm_page_busy_wunlock(m); break; } else if (rv == VM_PAGER_FAIL) { /* @@ -859,7 +859,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { - vm_page_wakeup(m); + vm_page_busy_wunlock(m); break; } if ((bp->bio_flags & BIO_UNMAPPED) != 0) { @@ -875,7 +875,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { - vm_page_wakeup(m); + vm_page_busy_wunlock(m); break; } if (len != PAGE_SIZE) { @@ -885,7 +885,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) } else vm_pager_page_unswapped(m); } - vm_page_wakeup(m); + vm_page_busy_wunlock(m); vm_page_lock(m); if (bp->bio_cmd == BIO_DELETE && len == PAGE_SIZE) vm_page_free(m); Index: dev/drm2/ttm/ttm_bo_vm.c =================================================================== --- dev/drm2/ttm/ttm_bo_vm.c (.../vmcontention/sys) (revision 252418) +++ dev/drm2/ttm/ttm_bo_vm.c (.../vmobj-readlock/sys) (revision 252418) @@ -212,8 +212,11 @@ reserve: } VM_OBJECT_WLOCK(vm_obj); - if ((m->flags & VPO_BUSY) != 0) { - vm_page_sleep(m, "ttmpbs"); + if (vm_page_busy_locked(m)) { + vm_page_lock(m); + VM_OBJECT_WUNLOCK(vm_obj); + vm_page_busy_sleep(m, "ttmpbs"); + VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; @@ -223,7 +226,7 @@ reserve: vm_page_lock(m); vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); vm_page_unlock(m); - vm_page_busy(m); + vm_page_busy_wlock(m); if (oldm != NULL) { vm_page_lock(oldm); Index: dev/drm2/ttm/ttm_tt.c =================================================================== --- dev/drm2/ttm/ttm_tt.c (.../vmcontention/sys) (revision 252418) +++ dev/drm2/ttm/ttm_tt.c (.../vmobj-readlock/sys) (revision 252418) @@ -288,10 +288,8 @@ int ttm_tt_swapin(struct ttm_tt *ttm) VM_OBJECT_WLOCK(obj); vm_object_pip_add(obj, 1); for (i = 0; i < ttm->num_pages; ++i) { - from_page = vm_page_grab(obj, i, VM_ALLOC_NOBUSY | - VM_ALLOC_RETRY); + from_page = vm_page_grab(obj, i, VM_ALLOC_RETRY); if (from_page->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(from_page); if (vm_pager_has_page(obj, i, NULL, NULL)) { rv = vm_pager_get_pages(obj, &from_page, 1, 0); if (rv != VM_PAGER_OK) { @@ -303,8 +301,8 @@ int ttm_tt_swapin(struct ttm_tt *ttm) } } else vm_page_zero_invalid(from_page, TRUE); - vm_page_wakeup(from_page); } + vm_page_busy_wunlock(from_page); to_page = ttm->pages[i]; if (unlikely(to_page == NULL)) { ret = -ENOMEM; @@ -357,7 +355,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, vm_object_t pmap_copy_page(from_page, to_page); vm_page_dirty(to_page); to_page->valid = VM_PAGE_BITS_ALL; - vm_page_wakeup(to_page); + vm_page_busy_wunlock(to_page); } vm_object_pip_wakeup(obj); VM_OBJECT_WUNLOCK(obj); Index: dev/drm2/i915/i915_gem.c =================================================================== --- dev/drm2/i915/i915_gem.c (.../vmcontention/sys) (revision 252418) +++ dev/drm2/i915/i915_gem.c (.../vmobj-readlock/sys) (revision 252418) @@ -1356,9 +1356,8 @@ i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffse *mres = NULL; } else oldm = NULL; + VM_OBJECT_WUNLOCK(vm_obj); retry: - VM_OBJECT_WUNLOCK(vm_obj); -unlocked_vmobj: cause = ret = 0; m = NULL; @@ -1379,9 +1378,11 @@ retry: VM_OBJECT_WLOCK(vm_obj); m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); if (m != NULL) { - if ((m->flags & VPO_BUSY) != 0) { + if (vm_page_busy_locked(m)) { DRM_UNLOCK(dev); - vm_page_sleep(m, "915pee"); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(vm_obj); + vm_page_busy_sleep(m, "915pee"); goto retry; } goto have_page; @@ -1435,16 +1436,18 @@ retry: ("not fictitious %p", m)); KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); - if ((m->flags & VPO_BUSY) != 0) { + if (vm_page_busy_locked(m)) { DRM_UNLOCK(dev); - vm_page_sleep(m, "915pbs"); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(vm_obj); + vm_page_busy_sleep(m, "915pbs"); goto retry; } m->valid = VM_PAGE_BITS_ALL; vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); have_page: *mres = m; - vm_page_busy(m); + vm_page_busy_wlock(m); CTR4(KTR_DRM, "fault %p %jx %x phys %x", gem_obj, offset, prot, m->phys_addr); @@ -1465,7 +1468,7 @@ out: -ret, cause); if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) { kern_yield(PRI_USER); - goto unlocked_vmobj; + goto retry; } VM_OBJECT_WLOCK(vm_obj); vm_object_pip_wakeup(vm_obj); @@ -2330,7 +2333,7 @@ retry: m = vm_page_lookup(devobj, i); if (m == NULL) continue; - if (vm_page_sleep_if_busy(m, true, "915unm")) + if (vm_page_sleep_if_busy(m, "915unm")) goto retry; cdev_pager_free_page(devobj, m); } @@ -2504,10 +2507,8 @@ i915_gem_wire_page(vm_object_t object, vm_pindex_t int rv; VM_OBJECT_ASSERT_WLOCKED(object); - m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | - VM_ALLOC_RETRY); + m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(m); if (vm_pager_has_page(object, pindex, NULL, NULL)) { rv = vm_pager_get_pages(object, &m, 1, 0); m = vm_page_lookup(object, pindex); @@ -2524,11 +2525,11 @@ i915_gem_wire_page(vm_object_t object, vm_pindex_t m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } - vm_page_wakeup(m); } vm_page_lock(m); vm_page_wire(m); vm_page_unlock(m); + vm_page_busy_wunlock(m); atomic_add_long(&i915_gem_wired_pages_cnt, 1); return (m); } Index: kern/vfs_bio.c =================================================================== --- kern/vfs_bio.c (.../vmcontention/sys) (revision 252418) +++ kern/vfs_bio.c (.../vmobj-readlock/sys) (revision 252418) @@ -582,7 +582,7 @@ vfs_buf_test_cache(struct buf *bp, vm_page_t m) { - VM_OBJECT_ASSERT_WLOCKED(m->object); + VM_OBJECT_ASSERT_LOCKED(m->object); if (bp->b_flags & B_CACHE) { int base = (foff + off) & PAGE_MASK; if (vm_page_is_valid(m, base, size) == 0) @@ -1849,26 +1849,19 @@ vfs_vmio_release(struct buf *bp) */ vm_page_lock(m); vm_page_unwire(m, 0); + /* - * We don't mess with busy pages, it is - * the responsibility of the process that - * busied the pages to deal with them. + * Might as well free the page if we can and it has + * no valid data. We also free the page if the + * buffer was used for direct I/O */ - if ((m->oflags & VPO_BUSY) == 0 && m->busy == 0 && - m->wire_count == 0) { - /* - * Might as well free the page if we can and it has - * no valid data. We also free the page if the - * buffer was used for direct I/O - */ - if ((bp->b_flags & B_ASYNC) == 0 && !m->valid) { + if ((bp->b_flags & B_ASYNC) == 0 && !m->valid) { + if (m->wire_count == 0 && !vm_page_busy_locked(m)) vm_page_free(m); - } else if (bp->b_flags & B_DIRECT) { - vm_page_try_to_free(m); - } else if (buf_vm_page_count_severe()) { - vm_page_try_to_cache(m); - } - } + } else if (bp->b_flags & B_DIRECT) + vm_page_try_to_free(m); + else if (buf_vm_page_count_severe()) + vm_page_try_to_cache(m); vm_page_unlock(m); } VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); @@ -3447,7 +3440,7 @@ allocbuf(struct buf *bp, int size) m = bp->b_pages[i]; KASSERT(m != bogus_page, ("allocbuf: bogus page found")); - while (vm_page_sleep_if_busy(m, TRUE, + while (vm_page_sleep_if_busy(m, "biodep")) continue; @@ -3486,15 +3479,15 @@ allocbuf(struct buf *bp, int size) * here could interfere with paging I/O, no * matter which process we are. * - * We can only test VPO_BUSY here. Blocking on - * m->busy might lead to a deadlock: - * vm_fault->getpages->cluster_read->allocbuf - * Thus, we specify VM_ALLOC_IGN_SBUSY. + * We can only test write busy here. + * Blocking on read busy might lead to + * deadlocks once allocbuf() is called after + * pages are vfs_busy_pages(). */ m = vm_page_grab(obj, OFF_TO_IDX(bp->b_offset) + bp->b_npages, VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | - VM_ALLOC_RETRY | VM_ALLOC_IGN_SBUSY | + VM_ALLOC_RETRY | VM_ALLOC_IGN_RBUSY | VM_ALLOC_COUNT(desiredpages - bp->b_npages)); if (m->valid == 0) bp->b_flags &= ~B_CACHE; @@ -3849,7 +3842,7 @@ bufdone_finish(struct buf *bp) vfs_page_set_valid(bp, foff, m); } - vm_page_io_finish(m); + vm_page_busy_runlock(m); vm_object_pip_subtract(obj, 1); foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; iosize -= resid; @@ -3911,7 +3904,7 @@ vfs_unbusy_pages(struct buf *bp) BUF_CHECK_UNMAPPED(bp); } vm_object_pip_subtract(obj, 1); - vm_page_io_finish(m); + vm_page_busy_runlock(m); } vm_object_pip_wakeupn(obj, 0); VM_OBJECT_WUNLOCK(obj); @@ -3984,8 +3977,8 @@ vfs_page_set_validclean(struct buf *bp, vm_ooffset } /* - * Ensure that all buffer pages are not busied by VPO_BUSY flag. If - * any page is busy, drain the flag. + * Ensure that all buffer pages are not write busied. If any page is write + * busy, drain it. */ static void vfs_drain_busy_pages(struct buf *bp) @@ -3997,22 +3990,26 @@ vfs_drain_busy_pages(struct buf *bp) last_busied = 0; for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; - if ((m->oflags & VPO_BUSY) != 0) { + if (vm_page_busy_wlocked(m)) { for (; last_busied < i; last_busied++) - vm_page_busy(bp->b_pages[last_busied]); - while ((m->oflags & VPO_BUSY) != 0) - vm_page_sleep(m, "vbpage"); + vm_page_busy_wlock(bp->b_pages[last_busied]); + while (vm_page_busy_wlocked(m)) { + vm_page_lock(m); + VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); + vm_page_busy_sleep(m, "vbpage"); + VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); + } } } for (i = 0; i < last_busied; i++) - vm_page_wakeup(bp->b_pages[i]); + vm_page_busy_wunlock(bp->b_pages[i]); } /* * This routine is called before a device strategy routine. * It is used to tell the VM system that paging I/O is in * progress, and treat the pages associated with the buffer - * almost as being VPO_BUSY. Also the object paging_in_progress + * almost as being write busy. Also the object paging_in_progress * flag is handled to make sure that the object doesn't become * inconsistant. * @@ -4045,7 +4042,7 @@ vfs_busy_pages(struct buf *bp, int clear_modify) if ((bp->b_flags & B_CLUSTER) == 0) { vm_object_pip_add(obj, 1); - vm_page_io_start(m); + vm_page_busy_rlock(m); } /* * When readying a buffer for a read ( i.e @@ -4265,7 +4262,7 @@ vm_hold_free_pages(struct buf *bp, int newbsize) for (index = newnpages; index < bp->b_npages; index++) { p = bp->b_pages[index]; bp->b_pages[index] = NULL; - if (p->busy != 0) + if (vm_page_busy_rlocked(p)) printf("vm_hold_free_pages: blkno: %jd, lblkno: %jd\n", (intmax_t)bp->b_blkno, (intmax_t)bp->b_lblkno); p->wire_count--; Index: kern/sys_process.c =================================================================== --- kern/sys_process.c (.../vmcontention/sys) (revision 252418) +++ kern/sys_process.c (.../vmobj-readlock/sys) (revision 252418) @@ -263,6 +263,7 @@ proc_rwmem(struct proc *p, struct uio *uio) writing = uio->uio_rw == UIO_WRITE; reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ; fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; + fault_flags |= VM_FAULT_IOBUSY; /* * Only map in one page at a time. We don't have to, but it @@ -287,9 +288,9 @@ proc_rwmem(struct proc *p, struct uio *uio) len = min(PAGE_SIZE - page_offset, uio->uio_resid); /* - * Fault and hold the page on behalf of the process. + * Fault and busy the page on behalf of the process. */ - error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m); + error = vm_fault_handle(map, pageno, reqprot, fault_flags, &m); if (error != KERN_SUCCESS) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; @@ -315,9 +316,9 @@ proc_rwmem(struct proc *p, struct uio *uio) /* * Release the page. */ - vm_page_lock(m); - vm_page_unhold(m); - vm_page_unlock(m); + VM_OBJECT_WLOCK(m->object); + vm_page_busy_runlock(m); + VM_OBJECT_WUNLOCK(m->object); } while (error == 0 && uio->uio_resid > 0); Index: kern/subr_uio.c =================================================================== --- kern/subr_uio.c (.../vmcontention/sys) (revision 252418) +++ kern/subr_uio.c (.../vmobj-readlock/sys) (revision 252418) @@ -107,7 +107,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_o VM_OBJECT_WLOCK(uobject); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { - if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) + if (vm_page_sleep_if_busy(user_pg, "vm_pgmoveco")) goto retry; vm_page_lock(user_pg); pmap_remove_all(user_pg); Index: kern/imgact_elf.c =================================================================== --- kern/imgact_elf.c (.../vmcontention/sys) (revision 252418) +++ kern/imgact_elf.c (.../vmobj-readlock/sys) (revision 252418) @@ -378,7 +378,7 @@ __elfN(map_partial)(vm_map_t map, vm_object_t obje off = offset - trunc_page(offset); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, end - start); - vm_imgact_unmap_page(sf); + vm_imgact_unmap_page(object, sf); if (error) { return (KERN_FAILURE); } @@ -433,7 +433,7 @@ __elfN(map_insert)(vm_map_t map, vm_object_t objec sz = PAGE_SIZE - off; error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, sz); - vm_imgact_unmap_page(sf); + vm_imgact_unmap_page(object, sf); if (error) { return (KERN_FAILURE); } @@ -553,7 +553,7 @@ __elfN(load_section)(struct image_params *imgp, vm trunc_page(offset + filsz); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)map_addr, copy_len); - vm_imgact_unmap_page(sf); + vm_imgact_unmap_page(object, sf); if (error) { return (error); } Index: kern/uipc_shm.c =================================================================== --- kern/uipc_shm.c (.../vmcontention/sys) (revision 252418) +++ kern/uipc_shm.c (.../vmobj-readlock/sys) (revision 252418) @@ -281,11 +281,8 @@ shm_dotruncate(struct shmfd *shmfd, off_t length) retry: m = vm_page_lookup(object, idx); if (m != NULL) { - if ((m->oflags & VPO_BUSY) != 0 || - m->busy != 0) { - vm_page_sleep(m, "shmtrc"); + if (vm_page_sleep_if_busy(m, "shmtrc")) goto retry; - } } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); if (m == NULL) { @@ -305,7 +302,7 @@ retry: if (rv == VM_PAGER_OK) { vm_page_deactivate(m); vm_page_unlock(m); - vm_page_wakeup(m); + vm_page_busy_wunlock(m); } else { vm_page_free(m); vm_page_unlock(m); Index: kern/vfs_cluster.c =================================================================== --- kern/vfs_cluster.c (.../vmcontention/sys) (revision 252418) +++ kern/vfs_cluster.c (.../vmobj-readlock/sys) (revision 252418) @@ -466,7 +466,7 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize for (j = 0; j < tbp->b_npages; j += 1) { vm_page_t m; m = tbp->b_pages[j]; - vm_page_io_start(m); + vm_page_busy_rlock(m); vm_object_pip_add(m->object, 1); if ((bp->b_npages == 0) || (bp->b_pages[bp->b_npages-1] != m)) { @@ -947,7 +947,7 @@ cluster_wbuild(struct vnode *vp, long size, daddr_ if (i != 0) { /* if not first buffer */ for (j = 0; j < tbp->b_npages; j += 1) { m = tbp->b_pages[j]; - if (m->oflags & VPO_BUSY) { + if (vm_page_busy_wlocked(m)) { VM_OBJECT_WUNLOCK( tbp->b_object); bqrelse(tbp); @@ -957,7 +957,7 @@ cluster_wbuild(struct vnode *vp, long size, daddr_ } for (j = 0; j < tbp->b_npages; j += 1) { m = tbp->b_pages[j]; - vm_page_io_start(m); + vm_page_busy_rlock(m); vm_object_pip_add(m->object, 1); if ((bp->b_npages == 0) || (bp->b_pages[bp->b_npages - 1] != m)) { Index: kern/kern_exec.c =================================================================== --- kern/kern_exec.c (.../vmcontention/sys) (revision 252418) +++ kern/kern_exec.c (.../vmobj-readlock/sys) (revision 252418) @@ -937,10 +937,8 @@ exec_map_first_page(imgp) object->pg_color = 0; } #endif - ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | - VM_ALLOC_RETRY); + ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (ma[0]->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(ma[0]); initial_pagein = VM_INITIAL_PAGEIN; if (initial_pagein > object->size) initial_pagein = object->size; @@ -948,9 +946,8 @@ exec_map_first_page(imgp) if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) break; - if ((ma[i]->oflags & VPO_BUSY) || ma[i]->busy) + if (vm_page_busy_trywlock(ma[i])) break; - vm_page_busy(ma[i]); } else { ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); @@ -970,8 +967,8 @@ exec_map_first_page(imgp) VM_OBJECT_WUNLOCK(object); return (EIO); } - vm_page_wakeup(ma[0]); } + vm_page_busy_wunlock(ma[0]); vm_page_lock(ma[0]); vm_page_hold(ma[0]); vm_page_unlock(ma[0]); Index: kern/uipc_syscalls.c =================================================================== --- kern/uipc_syscalls.c (.../vmcontention/sys) (revision 252418) +++ kern/uipc_syscalls.c (.../vmobj-readlock/sys) (revision 252418) @@ -2219,11 +2219,6 @@ retry_space: else { ssize_t resid; - /* - * Ensure that our page is still around - * when the I/O completes. - */ - vm_page_io_start(pg); VM_OBJECT_WUNLOCK(obj); /* @@ -2237,11 +2232,9 @@ retry_space: trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); - VM_OBJECT_WLOCK(obj); - vm_page_io_finish(pg); - if (!error) - VM_OBJECT_WUNLOCK(obj); mbstat.sf_iocnt++; + if (error) + VM_OBJECT_WLOCK(obj); } if (error) { vm_page_lock(pg); @@ -2252,7 +2245,7 @@ retry_space: * then free it. */ if (pg->wire_count == 0 && pg->valid == 0 && - pg->busy == 0 && !(pg->oflags & VPO_BUSY)) + !vm_page_busy_locked(pg)) vm_page_free(pg); vm_page_unlock(pg); VM_OBJECT_WUNLOCK(obj); Index: ia64/ia64/pmap.c =================================================================== --- ia64/ia64/pmap.c (.../vmcontention/sys) (revision 252418) +++ ia64/ia64/pmap.c (.../vmobj-readlock/sys) (revision 252418) @@ -1677,7 +1677,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t va &= ~PAGE_MASK; KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); - KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0, + KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_busy_wlocked(m), ("pmap_enter: page %p is not busy", m)); /* @@ -2234,13 +2234,12 @@ pmap_is_modified(vm_page_t m) rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can be dirty. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { @@ -2323,13 +2322,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("pmap_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can be modified. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -2396,13 +2395,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { Index: mips/mips/pmap.c =================================================================== --- mips/mips/pmap.c (.../vmcontention/sys) (revision 252418) +++ mips/mips/pmap.c (.../vmobj-readlock/sys) (revision 252418) @@ -2014,7 +2014,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); - KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0, + KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_busy_wlocked(m), ("pmap_enter: page %p is not busy", m)); pa = VM_PAGE_TO_PHYS(m); newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, access, prot); @@ -2812,13 +2812,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { @@ -2878,13 +2877,12 @@ pmap_is_modified(vm_page_t m) ("pmap_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PTE_D set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_testbit(m, PTE_D); @@ -2931,13 +2929,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("pmap_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: fs/fuse/fuse_vnops.c =================================================================== --- fs/fuse/fuse_vnops.c (.../vmcontention/sys) (revision 252418) +++ fs/fuse/fuse_vnops.c (.../vmobj-readlock/sys) (revision 252418) @@ -1854,36 +1854,8 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) */ ; } - if (i != ap->a_reqpage) { - /* - * Whether or not to leave the page activated is up in - * the air, but we should put the page on a page queue - * somewhere (it already is in the object). Result: - * It appears that emperical results show that - * deactivating pages is best. - */ - - /* - * Just in case someone was asking for this page we - * now tell them that it is ok to use. - */ - if (!error) { - if (m->oflags & VPO_WANTED) { - fuse_vm_page_lock(m); - vm_page_activate(m); - fuse_vm_page_unlock(m); - } else { - fuse_vm_page_lock(m); - vm_page_deactivate(m); - fuse_vm_page_unlock(m); - } - vm_page_wakeup(m); - } else { - fuse_vm_page_lock(m); - vm_page_free(m); - fuse_vm_page_unlock(m); - } - } + if (i != ap->a_reqpage) + vm_page_readahead_finish(m); } fuse_vm_page_unlock_queues(); VM_OBJECT_WUNLOCK(vp->v_object); Index: fs/tmpfs/tmpfs_vnops.c =================================================================== --- fs/tmpfs/tmpfs_vnops.c (.../vmcontention/sys) (revision 252418) +++ fs/tmpfs/tmpfs_vnops.c (.../vmobj-readlock/sys) (revision 252418) @@ -448,21 +448,14 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t id VM_OBJECT_WLOCK(tobj); /* - * The kern_sendfile() code calls vn_rdwr() with the page - * soft-busied. Ignore the soft-busy state here. Parallel - * reads of the page content from disk are prevented by - * VPO_BUSY. - * * Although the tmpfs vnode lock is held here, it is * nonetheless safe to sleep waiting for a free page. The * pageout daemon does not need to acquire the tmpfs vnode * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ - m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | - VM_ALLOC_IGN_SBUSY | VM_ALLOC_NOBUSY); + m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(m); if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &m, 1, 0); m = vm_page_lookup(tobj, idx); @@ -485,15 +478,14 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t id } } else vm_page_zero_invalid(m, TRUE); - vm_page_wakeup(m); } - vm_page_lock(m); - vm_page_hold(m); - vm_page_unlock(m); + vm_page_busy_downgrade(m); VM_OBJECT_WUNLOCK(tobj); error = uiomove_fromphys(&m, offset, tlen, uio); + VM_OBJECT_WLOCK(tobj); + vm_page_busy_runlock(m); + VM_OBJECT_WUNLOCK(tobj); vm_page_lock(m); - vm_page_unhold(m); if (m->queue == PQ_NONE) { vm_page_deactivate(m); } else { @@ -576,10 +568,8 @@ tmpfs_mappedwrite(vm_object_t tobj, size_t len, st tlen = MIN(PAGE_SIZE - offset, len); VM_OBJECT_WLOCK(tobj); - tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | - VM_ALLOC_RETRY); + tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (tpg->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(tpg); if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &tpg, 1, 0); tpg = vm_page_lookup(tobj, idx); @@ -602,18 +592,15 @@ tmpfs_mappedwrite(vm_object_t tobj, size_t len, st } } else vm_page_zero_invalid(tpg, TRUE); - vm_page_wakeup(tpg); } - vm_page_lock(tpg); - vm_page_hold(tpg); - vm_page_unlock(tpg); + vm_page_busy_downgrade(tpg); VM_OBJECT_WUNLOCK(tobj); error = uiomove_fromphys(&tpg, offset, tlen, uio); VM_OBJECT_WLOCK(tobj); + vm_page_busy_runlock(tpg); if (error == 0) vm_page_dirty(tpg); vm_page_lock(tpg); - vm_page_unhold(tpg); if (tpg->queue == PQ_NONE) { vm_page_deactivate(tpg); } else { Index: fs/tmpfs/tmpfs_subr.c =================================================================== --- fs/tmpfs/tmpfs_subr.c (.../vmcontention/sys) (revision 252418) +++ fs/tmpfs/tmpfs_subr.c (.../vmobj-readlock/sys) (revision 252418) @@ -1331,11 +1331,8 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, retry: m = vm_page_lookup(uobj, idx); if (m != NULL) { - if ((m->oflags & VPO_BUSY) != 0 || - m->busy != 0) { - vm_page_sleep(m, "tmfssz"); + if (vm_page_sleep_if_busy(m, "tmfssz")) goto retry; - } MPASS(m->valid == VM_PAGE_BITS_ALL); } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL); @@ -1355,7 +1352,7 @@ retry: if (rv == VM_PAGER_OK) { vm_page_deactivate(m); vm_page_unlock(m); - vm_page_wakeup(m); + vm_page_busy_wunlock(m); } else { vm_page_free(m); vm_page_unlock(m); Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (.../vmcontention/sys) (revision 252418) +++ amd64/amd64/pmap.c (.../vmobj-readlock/sys) (revision 252418) @@ -3451,7 +3451,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_busy_wlocked(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); pa = VM_PAGE_TO_PHYS(m); newpte = (pt_entry_t)(pa | PG_A | PG_V); @@ -4538,13 +4538,12 @@ pmap_is_modified(vm_page_t m) ("pmap_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || @@ -4669,13 +4668,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); if ((m->flags & PG_FICTITIOUS) != 0) @@ -4818,13 +4816,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("pmap_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: arm/arm/pmap-v6.c =================================================================== --- arm/arm/pmap-v6.c (.../vmcontention/sys) (revision 252418) +++ arm/arm/pmap-v6.c (.../vmobj-readlock/sys) (revision 252418) @@ -2672,8 +2672,8 @@ pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_ pa = systempage.pv_pa; m = NULL; } else { - KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || - (flags & M_NOWAIT) != 0, + KASSERT((m->oflags & VPO_UNMANAGED) != 0 || + vm_page_busy_wlocked(m) || (flags & M_NOWAIT) != 0, ("pmap_enter_locked: page %p is not busy", m)); pa = VM_PAGE_TO_PHYS(m); } @@ -3931,13 +3931,12 @@ pmap_is_modified(vm_page_t m) ("pmap_is_modified: page %p is not managed", m)); rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { @@ -3965,13 +3964,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("pmap_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no mappings can be modified. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -4006,13 +4005,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) != 0 || - (m->aflags & PGA_WRITEABLE) != 0) + if (vm_page_busy_wlocked(m) || (m->aflags & PGA_WRITEABLE) != 0) pmap_clearbit(m, PVF_WRITE); } Index: arm/arm/pmap.c =================================================================== --- arm/arm/pmap.c (.../vmcontention/sys) (revision 252418) +++ arm/arm/pmap.c (.../vmobj-readlock/sys) (revision 252418) @@ -3319,8 +3319,8 @@ pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_ pa = systempage.pv_pa; m = NULL; } else { - KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || - (flags & M_NOWAIT) != 0, + KASSERT((m->oflags & VPO_UNMANAGED) != 0 || + vm_page_busy_wlocked(m) || (flags & M_NOWAIT) != 0, ("pmap_enter_locked: page %p is not busy", m)); pa = VM_PAGE_TO_PHYS(m); } @@ -4555,13 +4555,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("pmap_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no mappings can be modified. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -4612,13 +4612,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) != 0 || - (m->aflags & PGA_WRITEABLE) != 0) + if (vm_page_busy_wlocked(m) || (m->aflags & PGA_WRITEABLE) != 0) pmap_clearbit(m, PVF_WRITE); } Index: powerpc/aim/mmu_oea.c =================================================================== --- powerpc/aim/mmu_oea.c (.../vmcontention/sys) (revision 252418) +++ powerpc/aim/mmu_oea.c (.../vmobj-readlock/sys) (revision 252418) @@ -1158,7 +1158,7 @@ moea_enter_locked(pmap_t pmap, vm_offset_t va, vm_ if (pmap_bootstrapped) rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_busy_wlocked(m)) VM_OBJECT_ASSERT_LOCKED(m->object); /* XXX change the pvo head for fake pages */ @@ -1326,13 +1326,12 @@ moea_is_modified(mmu_t mmu, vm_page_t m) ("moea_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PTE_CHG set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = moea_query_bit(m, PTE_CHG); @@ -1371,13 +1370,13 @@ moea_clear_modify(mmu_t mmu, vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("moea_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("moea_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_CHG * set. If the object containing the page is locked and the page is - * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * not write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -1401,13 +1400,12 @@ moea_remove_write(mmu_t mmu, vm_page_t m) ("moea_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); lo = moea_attr_fetch(m); Index: powerpc/aim/mmu_oea64.c =================================================================== --- powerpc/aim/mmu_oea64.c (.../vmcontention/sys) (revision 252418) +++ powerpc/aim/mmu_oea64.c (.../vmobj-readlock/sys) (revision 252418) @@ -1260,7 +1260,7 @@ moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t v pvo_flags = PVO_MANAGED; } - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_busy_wlocked(m)) VM_OBJECT_ASSERT_LOCKED(m->object); /* XXX change the pvo head for fake pages */ @@ -1522,13 +1522,12 @@ moea64_is_modified(mmu_t mmu, vm_page_t m) ("moea64_is_modified: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have LPTE_CHG set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (moea64_query_bit(mmu, m, LPTE_CHG)); } @@ -1562,13 +1561,13 @@ moea64_clear_modify(mmu_t mmu, vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("moea64_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("moea64_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG * set. If the object containing the page is locked and the page is - * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * not write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -1590,13 +1589,12 @@ moea64_remove_write(mmu_t mmu, vm_page_t m) ("moea64_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return; powerpc_sync(); LOCK_TABLE_RD(); Index: powerpc/booke/pmap.c =================================================================== --- powerpc/booke/pmap.c (.../vmcontention/sys) (revision 252418) +++ powerpc/booke/pmap.c (.../vmobj-readlock/sys) (revision 252418) @@ -1563,7 +1563,7 @@ mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_ KASSERT((va <= VM_MAXUSER_ADDRESS), ("mmu_booke_enter_locked: user pmap, non user va")); } - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_busy_wlocked(m)) VM_OBJECT_ASSERT_LOCKED(m->object); PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -1959,13 +1959,12 @@ mmu_booke_remove_write(mmu_t mmu, vm_page_t m) ("mmu_booke_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { @@ -2204,13 +2203,12 @@ mmu_booke_is_modified(mmu_t mmu, vm_page_t m) rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can be modified. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { @@ -2281,13 +2279,13 @@ mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("mmu_booke_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("mmu_booke_clear_modify: page %p is write locked", m)); /* * If the page is not PG_AWRITEABLE, then no PTEs can be modified. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PG_AWRITEABLE cannot be concurrently set. + * write busied, then PG_AWRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; Index: sparc64/sparc64/pmap.c =================================================================== --- sparc64/sparc64/pmap.c (.../vmcontention/sys) (revision 252418) +++ sparc64/sparc64/pmap.c (.../vmobj-readlock/sys) (revision 252418) @@ -1494,7 +1494,7 @@ pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_pa rw_assert(&tte_list_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pm, MA_OWNED); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0) + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_busy_wlocked(m)) VM_OBJECT_ASSERT_LOCKED(m->object); PMAP_STATS_INC(pmap_nenter); pa = VM_PAGE_TO_PHYS(m); @@ -2068,13 +2068,12 @@ pmap_is_modified(vm_page_t m) rv = FALSE; /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be + * If the page is not write busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no TTEs can have TD_W set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&tte_list_global_lock); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { @@ -2141,13 +2140,13 @@ pmap_clear_modify(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("pmap_clear_modify: page %p is busy", m)); + KASSERT(!vm_page_busy_wlocked(m), + ("pmap_clear_modify: page %p is write locked", m)); /* * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set. * If the object containing the page is locked and the page is not - * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. + * write busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; @@ -2191,13 +2190,12 @@ pmap_remove_write(vm_page_t m) ("pmap_remove_write: page %p is not managed", m)); /* - * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by + * If the page is not write busied, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && - (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_busy_wlocked(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&tte_list_global_lock); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { Index: vm/vm_fault.c =================================================================== --- vm/vm_fault.c (.../vmcontention/sys) (revision 252418) +++ vm/vm_fault.c (.../vmobj-readlock/sys) (revision 252418) @@ -141,7 +141,7 @@ static inline void release_page(struct faultstate *fs) { - vm_page_wakeup(fs->m); + vm_page_busy_wunlock(fs->m); vm_page_lock(fs->m); vm_page_deactivate(fs->m); vm_page_unlock(fs->m); @@ -221,8 +221,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_ if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) ktrfault(vaddr, fault_type); #endif - result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags, - NULL); + result = vm_fault_handle(map, trunc_page(vaddr), fault_type, + fault_flags, NULL); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) ktrfaultend(result); @@ -231,7 +231,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_ } int -vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, +vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; @@ -340,21 +340,21 @@ RetryFault:; /* * Wait/Retry if the page is busy. We have to do this - * if the page is busy via either VPO_BUSY or - * vm_page_t->busy because the vm_pager may be using - * vm_page_t->busy for pageouts ( and even pageins if - * it is the vnode pager ), and we could end up trying - * to pagein and pageout the same page simultaneously. + * if the page is either write or read busy because + * the vm_pager may be using read busy for pageouts + * (and even pageins if it is the vnode pager), and we + * could end up trying to pagein and pageout the same + * page simultaneously. * * We can theoretically allow the busy case on a read * fault if the page is marked valid, but since such * pages are typically already pmap'd, putting that * special case in might be more effort then it is * worth. We cannot under any circumstances mess - * around with a vm_page_t->busy page except, perhaps, + * around with a read busied page except, perhaps, * to pmap it. */ - if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { + if (vm_page_busy_locked(fs.m)) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less @@ -379,8 +379,7 @@ RetryFault:; unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { - vm_page_sleep_if_busy(fs.m, TRUE, - "vmpfw"); + vm_page_sleep_if_busy(fs.m, "vmpfw"); } vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); @@ -397,7 +396,7 @@ RetryFault:; * (readable), jump to readrest, else break-out ( we * found the page ). */ - vm_page_busy(fs.m); + vm_page_busy_wlock(fs.m); if (fs.m->valid != VM_PAGE_BITS_ALL) goto readrest; break; @@ -503,7 +502,7 @@ readrest: /* * Call the pager to retrieve the data, if any, after * releasing the lock on the map. We hold a ref on - * fs.object and the pages are VPO_BUSY'd. + * fs.object and the pages are write busied. */ unlock_map(&fs); @@ -552,7 +551,7 @@ vnode_locked: * return value is the index into the marray for the * vm_page_t passed to the routine. * - * fs.m plus the additional pages are VPO_BUSY'd. + * fs.m plus the additional pages are write busied. */ faultcount = vm_fault_additional_pages( fs.m, behind, ahead, marray, &reqpage); @@ -678,8 +677,7 @@ vnode_locked: } } - KASSERT((fs.m->oflags & VPO_BUSY) != 0, - ("vm_fault: not busy after main loop")); + vm_page_busy_assert_wlocked(fs.m); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock @@ -744,7 +742,7 @@ vnode_locked: vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); vm_page_unlock(fs.m); - vm_page_busy(fs.m); + vm_page_busy_wlock(fs.m); fs.first_m = fs.m; fs.m = NULL; PCPU_INC(cnt.v_cow_optim); @@ -892,12 +890,9 @@ vnode_locked: } } + vm_page_busy_assert_wlocked(fs.m); + /* - * Page had better still be busy - */ - KASSERT(fs.m->oflags & VPO_BUSY, - ("vm_fault: page %p not busy!", fs.m)); - /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ @@ -930,10 +925,14 @@ vnode_locked: vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; - vm_page_hold(fs.m); + if (fault_flags & VM_FAULT_IOBUSY) + vm_page_busy_downgrade(fs.m); + else + vm_page_hold(fs.m); } vm_page_unlock(fs.m); - vm_page_wakeup(fs.m); + if (m_hold == NULL || (fault_flags & VM_FAULT_IOBUSY) == 0) + vm_page_busy_wunlock(fs.m); /* * Unlock everything, and return @@ -978,13 +977,12 @@ vm_fault_cache_behind(const struct faultstate *fs, if (pindex < OFF_TO_IDX(fs->entry->offset)) pindex = OFF_TO_IDX(fs->entry->offset); m = first_object != object ? fs->first_m : fs->m; - KASSERT((m->oflags & VPO_BUSY) != 0, - ("vm_fault_cache_behind: page %p is not busy", m)); + vm_page_busy_assert_wlocked(m); m_prev = vm_page_prev(m); while ((m = m_prev) != NULL && m->pindex >= pindex && m->valid == VM_PAGE_BITS_ALL) { m_prev = vm_page_prev(m); - if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) + if (vm_page_busy_locked(m)) continue; vm_page_lock(m); if (m->hold_count == 0 && m->wire_count == 0) { @@ -1132,7 +1130,7 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_ * and hold these pages. */ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) - if (*mp == NULL && vm_fault_hold(map, va, prot, + if (*mp == NULL && vm_fault_handle(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } @@ -1360,12 +1358,13 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src vm_page_lock(dst_m); vm_page_wire(dst_m); vm_page_unlock(dst_m); + vm_page_busy_wunlock(dst_m); } else { vm_page_lock(dst_m); vm_page_activate(dst_m); vm_page_unlock(dst_m); + vm_page_busy_wunlock(dst_m); } - vm_page_wakeup(dst_m); } VM_OBJECT_WUNLOCK(dst_object); if (upgrade) { Index: vm/vm_phys.c =================================================================== --- vm/vm_phys.c (.../vmcontention/sys) (revision 252418) +++ vm/vm_phys.c (.../vmobj-readlock/sys) (revision 252418) @@ -560,7 +560,8 @@ vm_phys_fictitious_reg_range(vm_paddr_t start, vm_ for (i = 0; i < page_count; i++) { vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr); pmap_page_init(&fp[i]); - fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED); + fp[i].oflags &= ~VPO_UNMANAGED; + fp[i].busy_lock = VPB_UNLOCKED; } mtx_lock(&vm_phys_fictitious_reg_mtx); for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) { Index: vm/vm_glue.c =================================================================== --- vm/vm_glue.c (.../vmcontention/sys) (revision 252418) +++ vm/vm_glue.c (.../vmobj-readlock/sys) (revision 252418) @@ -233,7 +233,7 @@ vsunlock(void *addr, size_t len) * Return the pinned page if successful; otherwise, return NULL. */ static vm_page_t -vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) +vm_imgact_page_iostart(vm_object_t object, vm_ooffset_t offset) { vm_page_t m, ma[1]; vm_pindex_t pindex; @@ -241,10 +241,8 @@ static vm_page_t VM_OBJECT_WLOCK(object); pindex = OFF_TO_IDX(offset); - m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | - VM_ALLOC_NOBUSY); + m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { - vm_page_busy(m); ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, pindex); @@ -257,11 +255,9 @@ static vm_page_t m = NULL; goto out; } - vm_page_wakeup(m); } - vm_page_lock(m); - vm_page_hold(m); - vm_page_unlock(m); + vm_page_busy_wunlock(m); + vm_page_busy_rlock(m); out: VM_OBJECT_WUNLOCK(object); return (m); @@ -276,7 +272,7 @@ vm_imgact_map_page(vm_object_t object, vm_ooffset_ { vm_page_t m; - m = vm_imgact_hold_page(object, offset); + m = vm_imgact_page_iostart(object, offset); if (m == NULL) return (NULL); sched_pin(); @@ -287,16 +283,16 @@ vm_imgact_map_page(vm_object_t object, vm_ooffset_ * Destroy the given CPU private mapping and unpin the page that it mapped. */ void -vm_imgact_unmap_page(struct sf_buf *sf) +vm_imgact_unmap_page(vm_object_t object, struct sf_buf *sf) { vm_page_t m; m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); - vm_page_lock(m); - vm_page_unhold(m); - vm_page_unlock(m); + VM_OBJECT_WLOCK(object); + vm_page_busy_runlock(m); + VM_OBJECT_WUNLOCK(object); } void @@ -539,13 +535,11 @@ vm_thread_swapin(struct thread *td) VM_ALLOC_WIRED); for (i = 0; i < pages; i++) { if (ma[i]->valid != VM_PAGE_BITS_ALL) { - KASSERT(ma[i]->oflags & VPO_BUSY, - ("lost busy 1")); + vm_page_busy_assert_wlocked(ma[i]); vm_object_pip_add(ksobj, 1); for (j = i + 1; j < pages; j++) { - KASSERT(ma[j]->valid == VM_PAGE_BITS_ALL || - (ma[j]->oflags & VPO_BUSY), - ("lost busy 2")); + if (ma[j]->valid != VM_PAGE_BITS_ALL) + vm_page_busy_assert_wlocked(ma[j]); if (ma[j]->valid == VM_PAGE_BITS_ALL) break; } @@ -556,9 +550,9 @@ vm_thread_swapin(struct thread *td) vm_object_pip_wakeup(ksobj); for (k = i; k < j; k++) ma[k] = vm_page_lookup(ksobj, k); - vm_page_wakeup(ma[i]); - } else if (ma[i]->oflags & VPO_BUSY) - vm_page_wakeup(ma[i]); + vm_page_busy_wunlock(ma[i]); + } else if (vm_page_busy_wlocked(ma[i])) + vm_page_busy_wunlock(ma[i]); } VM_OBJECT_WUNLOCK(ksobj); pmap_qenter(td->td_kstack, ma, pages); Index: vm/phys_pager.c =================================================================== --- vm/phys_pager.c (.../vmcontention/sys) (revision 252418) +++ vm/phys_pager.c (.../vmobj-readlock/sys) (revision 252418) @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -152,10 +153,12 @@ phys_pager_getpages(vm_object_t object, vm_page_t KASSERT(m[i]->dirty == 0, ("phys_pager_getpages: dirty page %p", m[i])); /* The requested page must remain busy, the others not. */ - if (i == reqpage) + if (i == reqpage) { + vm_page_lock(m[i]); vm_page_flash(m[i]); - else - vm_page_wakeup(m[i]); + vm_page_unlock(m[i]); + } else + vm_page_busy_wunlock(m[i]); } return (VM_PAGER_OK); } Index: vm/vm_pageout.c =================================================================== --- vm/vm_pageout.c (.../vmcontention/sys) (revision 252418) +++ vm/vm_pageout.c (.../vmobj-readlock/sys) (revision 252418) @@ -232,8 +232,8 @@ static void vm_pageout_page_stats(void); /* * Initialize a dummy page for marking the caller's place in the specified * paging queue. In principle, this function only needs to set the flag - * PG_MARKER. Nonetheless, it sets the flag VPO_BUSY and initializes the hold - * count to one as safety precautions. + * PG_MARKER. Nonetheless, it wirte busies and initializes the hold count + * to one as safety precautions. */ static void vm_pageout_init_marker(vm_page_t marker, u_short queue) @@ -241,7 +241,7 @@ vm_pageout_init_marker(vm_page_t marker, u_short q bzero(marker, sizeof(*marker)); marker->flags = PG_MARKER; - marker->oflags = VPO_BUSY; + marker->busy_lock = VPB_SINGLE_WRITER; marker->queue = queue; marker->hold_count = 1; } @@ -361,8 +361,7 @@ vm_pageout_clean(vm_page_t m) /* * Can't clean the page if it's busy or held. */ - KASSERT(m->busy == 0 && (m->oflags & VPO_BUSY) == 0, - ("vm_pageout_clean: page %p is busy", m)); + vm_page_busy_assert_unlocked(m); KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m)); vm_page_unlock(m); @@ -400,8 +399,7 @@ more: break; } - if ((p = vm_page_prev(pb)) == NULL || - (p->oflags & VPO_BUSY) != 0 || p->busy != 0) { + if ((p = vm_page_prev(pb)) == NULL || vm_page_busy_locked(p)) { ib = 0; break; } @@ -430,8 +428,7 @@ more: pindex + is < object->size) { vm_page_t p; - if ((p = vm_page_next(ps)) == NULL || - (p->oflags & VPO_BUSY) != 0 || p->busy != 0) + if ((p = vm_page_next(ps)) == NULL || vm_page_busy_locked(p)) break; vm_page_lock(p); vm_page_test_dirty(p); @@ -501,7 +498,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int fla KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); - vm_page_io_start(mc[i]); + vm_page_busy_rlock(mc[i]); pmap_remove_write(mc[i]); } vm_object_pip_add(object, count); @@ -557,7 +554,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int fla */ if (pageout_status[i] != VM_PAGER_PEND) { vm_object_pip_wakeup(object); - vm_page_io_finish(mt); + vm_page_busy_runlock(mt); if (vm_page_count_severe()) { vm_page_lock(mt); vm_page_try_to_cache(mt); @@ -598,8 +595,7 @@ vm_pageout_launder(int queue, int tries, vm_paddr_ object = m->object; if ((!VM_OBJECT_TRYWLOCK(object) && (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) || (m->oflags & VPO_BUSY) != 0 || - m->busy != 0) { + m->hold_count != 0)) || vm_page_busy_locked(m)) { vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); continue; @@ -734,7 +730,7 @@ vm_pageout_object_deactivate_pages(pmap_t pmap, vm TAILQ_FOREACH(p, &object->memq, listq) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; - if ((p->oflags & VPO_BUSY) != 0 || p->busy != 0) + if (vm_page_busy_locked(p)) continue; PCPU_INC(cnt.v_pdpages); vm_page_lock(p); @@ -978,7 +974,7 @@ vm_pageout_scan(int pass) * pages, because they may leave the inactive queue * shortly after page scan is finished. */ - if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) { + if (vm_page_busy_locked(m)) { vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); addl_page_shortage++; @@ -1201,7 +1197,7 @@ vm_pageout_scan(int pass) * page back onto the end of the queue so that * statistics are more correct if we don't. */ - if (m->busy || (m->oflags & VPO_BUSY)) { + if (vm_page_busy_locked(m)) { vm_page_unlock(m); goto unlock_and_continue; } @@ -1311,9 +1307,7 @@ relock_queues: /* * Don't deactivate pages that are busy. */ - if ((m->busy != 0) || - (m->oflags & VPO_BUSY) || - (m->hold_count != 0)) { + if (vm_page_busy_locked(m) || m->hold_count != 0) { vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); vm_page_requeue_locked(m); @@ -1583,9 +1577,7 @@ vm_pageout_page_stats(void) /* * Don't deactivate pages that are busy. */ - if ((m->busy != 0) || - (m->oflags & VPO_BUSY) || - (m->hold_count != 0)) { + if (vm_page_busy_locked(m) || m->hold_count != 0) { vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); vm_page_requeue_locked(m); Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (.../vmcontention/sys) (revision 252418) +++ vm/vm_object.c (.../vmobj-readlock/sys) (revision 252418) @@ -744,8 +744,7 @@ vm_object_terminate(vm_object_t object) * the object, the page and object are reset to any empty state. */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { - KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, - ("vm_object_terminate: freeing busy page %p", p)); + vm_page_busy_assert_unlocked(p); vm_page_lock(p); /* * Optimize the page's removal from the object by resetting @@ -871,7 +870,7 @@ rescan: np = TAILQ_NEXT(p, listq); if (p->valid == 0) continue; - if (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) { + if (vm_page_sleep_if_busy(p, "vpcwai")) { if (object->generation != curgeneration) { if ((flags & OBJPC_SYNC) != 0) goto rescan; @@ -939,7 +938,7 @@ vm_object_page_collect_flush(vm_object_t object, v for (tp = p; count < vm_pageout_page_count; count++) { tp = vm_page_next(tp); - if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0) + if (tp == NULL || vm_page_busy_locked(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; @@ -947,7 +946,7 @@ vm_object_page_collect_flush(vm_object_t object, v for (p_first = p; count < vm_pageout_page_count; count++) { tp = vm_page_prev(p_first); - if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0) + if (tp == NULL || vm_page_busy_locked(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; @@ -1156,7 +1155,7 @@ shadowlookup: ("vm_object_madvise: page %p is fictitious", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_object_madvise: page %p is not managed", m)); - if ((m->oflags & VPO_BUSY) || m->busy) { + if (vm_page_busy_locked(m)) { if (advise == MADV_WILLNEED) { /* * Reference the page before unlocking and @@ -1165,11 +1164,10 @@ shadowlookup: */ vm_page_aflag_set(m, PGA_REFERENCED); } - vm_page_unlock(m); if (object != tobject) VM_OBJECT_WUNLOCK(object); - m->oflags |= VPO_WANTED; - VM_OBJECT_SLEEP(tobject, m, PDROP | PVM, "madvpo", 0); + VM_OBJECT_WUNLOCK(tobject); + vm_page_busy_sleep(m, "madvpo"); VM_OBJECT_WLOCK(object); goto relookup; } @@ -1344,10 +1342,12 @@ retry: * We do not have to VM_PROT_NONE the page as mappings should * not be changed by this operation. */ - if ((m->oflags & VPO_BUSY) || m->busy) { + if (vm_page_busy_locked(m)) { VM_OBJECT_WUNLOCK(new_object); - m->oflags |= VPO_WANTED; - VM_OBJECT_SLEEP(orig_object, m, PVM, "spltwt", 0); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(orig_object); + vm_page_busy_sleep(m, "spltwt"); + VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; } @@ -1371,7 +1371,7 @@ retry: vm_page_unlock(m); /* page automatically made dirty by rename and cache handled */ if (orig_object->type == OBJT_SWAP) - vm_page_busy(m); + vm_page_busy_wlock(m); } if (orig_object->type == OBJT_SWAP) { /* @@ -1380,7 +1380,7 @@ retry: */ swap_pager_copy(orig_object, new_object, offidxstart, 0); TAILQ_FOREACH(m, &new_object->memq, listq) - vm_page_wakeup(m); + vm_page_busy_wunlock(m); /* * Transfer any cached pages from orig_object to new_object. @@ -1496,18 +1496,16 @@ vm_object_backing_scan(vm_object_t object, int op) vm_page_t pp; if (op & OBSC_COLLAPSE_NOWAIT) { - if ((p->oflags & VPO_BUSY) || - !p->valid || - p->busy) { + if (!p->valid || vm_page_busy_locked(p)) { p = next; continue; } } else if (op & OBSC_COLLAPSE_WAIT) { - if ((p->oflags & VPO_BUSY) || p->busy) { + if (vm_page_busy_locked(p)) { VM_OBJECT_WUNLOCK(object); - p->oflags |= VPO_WANTED; - VM_OBJECT_SLEEP(backing_object, p, - PDROP | PVM, "vmocol", 0); + vm_page_lock(p); + VM_OBJECT_WUNLOCK(backing_object); + vm_page_busy_sleep(p, "vmocol"); VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(backing_object); /* @@ -1905,8 +1903,12 @@ again: vm_page_unlock(p); continue; } - if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) + if (vm_page_busy_locked(p)) { + VM_OBJECT_WUNLOCK(object); + vm_page_busy_sleep(p, "vmopar"); + VM_OBJECT_WLOCK(object); goto again; + } KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { @@ -2032,7 +2034,7 @@ vm_object_populate(vm_object_t object, vm_pindex_t if (pindex > start) { m = vm_page_lookup(object, start); while (m != NULL && m->pindex < pindex) { - vm_page_wakeup(m); + vm_page_busy_wunlock(m); m = TAILQ_NEXT(m, listq); } } Index: vm/vm_extern.h =================================================================== --- vm/vm_extern.h (.../vmcontention/sys) (revision 252418) +++ vm/vm_extern.h (.../vmobj-readlock/sys) (revision 252418) @@ -63,7 +63,7 @@ void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_ma vm_ooffset_t *); int vm_fault_disable_pagefaults(void); void vm_fault_enable_pagefaults(int save); -int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, +int vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold); int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count); @@ -87,7 +87,7 @@ void vnode_pager_setsize(struct vnode *, vm_ooffse int vslock(void *, size_t); void vsunlock(void *, size_t); struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset); -void vm_imgact_unmap_page(struct sf_buf *sf); +void vm_imgact_unmap_page(vm_object_t, struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); int vm_mlock(struct proc *, struct ucred *, const void *, size_t); Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (.../vmcontention/sys) (revision 252418) +++ vm/vm_page.c (.../vmobj-readlock/sys) (revision 252418) @@ -470,65 +470,147 @@ vm_page_reference(vm_page_t m) } void -vm_page_busy(vm_page_t m) +vm_page_busy_downgrade(vm_page_t m) { + u_int retry, x; - VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("vm_page_busy: page already busy!!!")); - m->oflags |= VPO_BUSY; + vm_page_busy_assert_wlocked(m); + + retry = 0; + for (;;) { + if (retry++ > 0) + panic("vm_page_busy_downgrade: failed loop %p", m); + x = m->busy_lock; + x &= VPB_LOCK_WAITERS; + if (atomic_cmpset_rel_int(&m->busy_lock, + VPB_SINGLE_WRITER | x, VPB_READERS_LOCK(1) | x)) + break; + } } -/* - * vm_page_flash: - * - * wakeup anyone waiting for the page. - */ +int +vm_page_busy_rlocked(vm_page_t m) +{ + u_int x; + + x = m->busy_lock; + return ((x & VPB_LOCK_READ) != 0 && x != VPB_UNLOCKED); +} + void -vm_page_flash(vm_page_t m) +vm_page_busy_runlock(vm_page_t m) { + u_int retry, x; - VM_OBJECT_ASSERT_WLOCKED(m->object); - if (m->oflags & VPO_WANTED) { - m->oflags &= ~VPO_WANTED; + vm_page_busy_assert_rlocked(m); + + retry = 0; + for (;;) { + if (retry++ > 10000) + panic("vm_page_busy_runlock: failed loop %p", m); + x = m->busy_lock; + if (VPB_READERS(x) > 1) { + if (atomic_cmpset_int(&m->busy_lock, x, + x - VPB_ONE_READER)) + break; + continue; + } + if ((x & VPB_LOCK_WAITERS) == 0) { + KASSERT(x == VPB_READERS_LOCK(1), + ("vm_page_busy_runlock: invalid lock state")); + if (atomic_cmpset_int(&m->busy_lock, + VPB_READERS_LOCK(1), VPB_UNLOCKED)) + break; + continue; + } + KASSERT(x == (VPB_READERS_LOCK(1) | VPB_LOCK_WAITERS), + ("vm_page_busy_runlock: invalid lock state for waiters")); + + vm_page_lock(m); + if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNLOCKED)) { + vm_page_unlock(m); + continue; + } wakeup(m); + vm_page_unlock(m); + break; } } /* - * vm_page_wakeup: + * vm_page_busy_sleep: * - * clear the VPO_BUSY flag and wakeup anyone waiting for the - * page. + * Sleep and release the page lock, using the page pointer as wchan. + * This is used to implement the hard-path of busying mechanism. * + * The given page must be locked. */ void -vm_page_wakeup(vm_page_t m) +vm_page_busy_sleep(vm_page_t m, const char *wmesg) { + u_int x; - VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT(m->oflags & VPO_BUSY, ("vm_page_wakeup: page not busy!!!")); - m->oflags &= ~VPO_BUSY; - vm_page_flash(m); + vm_page_lock_assert(m, MA_OWNED); + + x = m->busy_lock; + if (x == VPB_UNLOCKED) { + vm_page_unlock(m); + return; + } + if ((x & VPB_LOCK_WAITERS) == 0 && + !atomic_cmpset_int(&m->busy_lock, x, x | VPB_LOCK_WAITERS)) { + vm_page_unlock(m); + return; + } + msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0); } +int +vm_page_busy_tryrlock(vm_page_t m) +{ + u_int x; + + x = m->busy_lock; + return ((x & VPB_LOCK_READ) != 0 && + atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_READER)); +} + void -vm_page_io_start(vm_page_t m) +vm_page_busy_wunlock_hard(vm_page_t m) { - VM_OBJECT_ASSERT_WLOCKED(m->object); - m->busy++; + vm_page_busy_assert_wlocked(m); + + vm_page_lock(m); + atomic_store_rel_int(&m->busy_lock, VPB_UNLOCKED); + wakeup(m); + vm_page_unlock(m); } +/* + * vm_page_flash: + * + * wakeup anyone waiting for the page. + */ void -vm_page_io_finish(vm_page_t m) +vm_page_flash(vm_page_t m) { + u_int retry, x; - VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT(m->busy > 0, ("vm_page_io_finish: page %p is not busy", m)); - m->busy--; - if (m->busy == 0) - vm_page_flash(m); + vm_page_lock_assert(m, MA_OWNED); + + retry = 0; + for (;;) { + if (retry++ > 1000) + panic("vm_page_flash: failed loop %p", m); + x = m->busy_lock; + if ((x & VPB_LOCK_WAITERS) == 0) + return; + if (atomic_cmpset_int(&m->busy_lock, x, + x & (~VPB_LOCK_WAITERS))) + break; + } + wakeup(m); } /* @@ -643,7 +725,8 @@ vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm /* Fictitious pages don't use "segind". */ m->flags = PG_FICTITIOUS; /* Fictitious pages don't use "order" or "pool". */ - m->oflags = VPO_BUSY | VPO_UNMANAGED; + m->oflags = VPO_UNMANAGED; + m->busy_lock = VPB_SINGLE_WRITER; m->wire_count = 1; memattr: pmap_page_set_memattr(m, memattr); @@ -722,16 +805,13 @@ vm_page_readahead_finish(vm_page_t m) * deactivating the page is usually the best choice, * unless the page is wanted by another thread. */ - if (m->oflags & VPO_WANTED) { - vm_page_lock(m); + vm_page_lock(m); + if ((m->busy_lock & VPB_LOCK_WAITERS) != 0) vm_page_activate(m); - vm_page_unlock(m); - } else { - vm_page_lock(m); + else vm_page_deactivate(m); - vm_page_unlock(m); - } - vm_page_wakeup(m); + vm_page_unlock(m); + vm_page_busy_wunlock(m); } else { /* * Free the completely invalid page. Such page state @@ -746,29 +826,38 @@ vm_page_readahead_finish(vm_page_t m) } /* - * vm_page_sleep: + * vm_page_sleep_if_busy: * - * Sleep and release the page lock. + * Sleep and release the page queues lock if the page is busied. + * Returns TRUE if the thread slept. * - * The object containing the given page must be locked. + * The given page must be unlocked and object containing it must + * be locked. */ -void -vm_page_sleep(vm_page_t m, const char *msg) +int +vm_page_sleep_if_busy(vm_page_t m, const char *msg) { + vm_object_t obj; + vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); - if (mtx_owned(vm_page_lockptr(m))) - vm_page_unlock(m); - /* - * It's possible that while we sleep, the page will get - * unbusied and freed. If we are holding the object - * lock, we will assume we hold a reference to the object - * such that even if m->object changes, we can re-lock - * it. - */ - m->oflags |= VPO_WANTED; - VM_OBJECT_SLEEP(m->object, m, PVM, msg, 0); + if (vm_page_busy_locked(m)) { + /* + * The page-specific object must be cached because page + * identity can change during the sleep, causing the + * re-lock of a different object. + * It is assumed that a reference to the object is already + * held by the callers. + */ + obj = m->object; + vm_page_lock(m); + VM_OBJECT_WUNLOCK(obj); + vm_page_busy_sleep(m, msg); + VM_OBJECT_WLOCK(obj); + return (TRUE); + } + return (FALSE); } /* @@ -893,15 +982,24 @@ void vm_page_remove(vm_page_t m) { vm_object_t object; + boolean_t lockacq; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_lock_assert(m, MA_OWNED); if ((object = m->object) == NULL) return; VM_OBJECT_ASSERT_WLOCKED(object); - if (m->oflags & VPO_BUSY) { - m->oflags &= ~VPO_BUSY; + if (vm_page_busy_wlocked(m)) { + lockacq = FALSE; + if ((m->oflags & VPO_UNMANAGED) != 0 && + !mtx_owned(vm_page_lockptr(m))) { + lockacq = TRUE; + vm_page_lock(m); + } vm_page_flash(m); + atomic_store_rel_int(&m->busy_lock, VPB_UNLOCKED); + if (lockacq) + vm_page_unlock(m); } /* @@ -1170,8 +1268,7 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t * vm_page_alloc: * * Allocate and return a page that is associated with the specified - * object and offset pair. By default, this page has the flag VPO_BUSY - * set. + * object and offset pair. By default, this page is write busied. * * The caller must always specify an allocation class. * @@ -1186,10 +1283,11 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t * VM_ALLOC_IFCACHED return page only if it is cached * VM_ALLOC_IFNOTCACHED return NULL, do not reactivate if the page * is cached - * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page + * VM_ALLOC_NOBUSY do not write busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and - * should not have the flag VPO_BUSY set + * should not be write busy + * VM_ALLOC_RBUSY read busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * @@ -1204,8 +1302,12 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind int flags, req_class; mpred = 0; /* XXX: pacify gcc */ - KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0), - ("vm_page_alloc: inconsistent object/req")); + KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && + (object != NULL || (req & VM_ALLOC_RBUSY) == 0) && + ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_RBUSY)) != + (VM_ALLOC_NOBUSY | VM_ALLOC_RBUSY)), + ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object, + req)); if (object != NULL) VM_OBJECT_ASSERT_WLOCKED(object); @@ -1286,7 +1388,8 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue)); KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m)); KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m)); - KASSERT(m->busy == 0, ("vm_page_alloc: page %p is busy", m)); + KASSERT(!vm_page_busy_rlocked(m), + ("vm_page_alloc: page %p is busy", m)); KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("vm_page_alloc: page %p has unexpected memattr %d", m, @@ -1330,8 +1433,11 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind m->aflags = 0; m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; - if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ)) == 0) - m->oflags |= VPO_BUSY; + m->busy_lock = VPB_UNLOCKED; + if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_RBUSY)) == 0) + m->busy_lock = VPB_SINGLE_WRITER; + if ((req & VM_ALLOC_RBUSY) != 0) + m->busy_lock = VPB_READERS_LOCK(1); if (req & VM_ALLOC_WIRED) { /* * The page lock is not required for wiring a page until that @@ -1399,9 +1505,10 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: - * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page + * VM_ALLOC_NOBUSY do not write busy the page * VM_ALLOC_NOOBJ page is not associated with an object and - * should not have the flag VPO_BUSY set + * should not be write busy + * VM_ALLOC_RBUSY read busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * @@ -1417,8 +1524,12 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex u_int flags, oflags; int req_class; - KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0), - ("vm_page_alloc_contig: inconsistent object/req")); + KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && + (object != NULL || (req & VM_ALLOC_RBUSY) == 0) && + ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_RBUSY)) != + (VM_ALLOC_NOBUSY | VM_ALLOC_RBUSY)), + ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object, + req)); if (object != NULL) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_PHYS, @@ -1494,8 +1605,6 @@ retry: atomic_add_int(&cnt.v_wire_count, npages); oflags = VPO_UNMANAGED; if (object != NULL) { - if ((req & VM_ALLOC_NOBUSY) == 0) - oflags |= VPO_BUSY; if (object->memattr != VM_MEMATTR_DEFAULT && memattr == VM_MEMATTR_DEFAULT) memattr = object->memattr; @@ -1503,6 +1612,13 @@ retry: for (m = m_ret; m < &m_ret[npages]; m++) { m->aflags = 0; m->flags = (m->flags | PG_NODUMP) & flags; + m->busy_lock = VPB_UNLOCKED; + if (object != NULL) { + if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_RBUSY)) == 0) + m->busy_lock = VPB_SINGLE_WRITER; + if ((req & VM_ALLOC_RBUSY) != 0) + m->busy_lock = VPB_READERS_LOCK(1); + } if ((req & VM_ALLOC_WIRED) != 0) m->wire_count = 1; /* Unmanaged pages don't use "act_count". */ @@ -1545,7 +1661,7 @@ vm_page_alloc_init(vm_page_t m) ("vm_page_alloc_init: page %p is wired", m)); KASSERT(m->hold_count == 0, ("vm_page_alloc_init: page %p is held", m)); - KASSERT(m->busy == 0, + KASSERT(!vm_page_busy_rlocked(m), ("vm_page_alloc_init: page %p is busy", m)); KASSERT(m->dirty == 0, ("vm_page_alloc_init: page %p is dirty", m)); @@ -1904,7 +2020,7 @@ vm_page_free_toq(vm_page_t m) if (VM_PAGE_IS_FREE(m)) panic("vm_page_free: freeing free page %p", m); - else if (m->busy != 0) + else if (vm_page_busy_rlocked(m)) panic("vm_page_free: freeing busy page %p", m); /* @@ -2115,8 +2231,8 @@ vm_page_try_to_cache(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); - if (m->dirty || m->hold_count || m->busy || m->wire_count || - (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0) + if (m->dirty || m->hold_count || m->wire_count || + (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busy_locked(m)) return (0); pmap_remove_all(m); if (m->dirty) @@ -2138,8 +2254,8 @@ vm_page_try_to_free(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); if (m->object != NULL) VM_OBJECT_ASSERT_WLOCKED(m->object); - if (m->dirty || m->hold_count || m->busy || m->wire_count || - (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0) + if (m->dirty || m->hold_count || m->wire_count || + (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busy_locked(m)) return (0); pmap_remove_all(m); if (m->dirty) @@ -2164,7 +2280,7 @@ vm_page_cache(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); - if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) || m->busy || + if (vm_page_busy_locked(m) || (m->oflags & VPO_UNMANAGED) || m->hold_count || m->wire_count) panic("vm_page_cache: attempting to cache busy page"); KASSERT(!pmap_page_is_mapped(m), @@ -2350,21 +2466,29 @@ vm_page_t vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; + int sleep; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((allocflags & VM_ALLOC_RETRY) != 0, ("vm_page_grab: VM_ALLOC_RETRY is required")); + KASSERT((allocflags & VM_ALLOC_RBUSY) == 0 || + (allocflags & VM_ALLOC_IGN_RBUSY) != 0, + ("vm_page_grab: VM_ALLOC_RBUSY/VM_ALLOC_IGN_RBUSY mismatch")); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { - if ((m->oflags & VPO_BUSY) != 0 || - ((allocflags & VM_ALLOC_IGN_SBUSY) == 0 && m->busy != 0)) { + sleep = (allocflags & VM_ALLOC_IGN_RBUSY) != 0 ? + vm_page_busy_wlocked(m) : vm_page_busy_locked(m); + if (sleep) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(m, PGA_REFERENCED); - vm_page_sleep(m, "pgrbwt"); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(object); + vm_page_busy_sleep(m, "pgrbwt"); + VM_OBJECT_WLOCK(object); goto retrylookup; } else { if ((allocflags & VM_ALLOC_WIRED) != 0) { @@ -2372,13 +2496,16 @@ retrylookup: vm_page_wire(m); vm_page_unlock(m); } - if ((allocflags & VM_ALLOC_NOBUSY) == 0) - vm_page_busy(m); + if ((allocflags & + (VM_ALLOC_NOBUSY | VM_ALLOC_RBUSY)) == 0) + vm_page_busy_wlock(m); + if ((allocflags & VM_ALLOC_RBUSY) != 0) + vm_page_busy_rlock(m); return (m); } } m = vm_page_alloc(object, pindex, allocflags & ~(VM_ALLOC_RETRY | - VM_ALLOC_IGN_SBUSY)); + VM_ALLOC_IGN_RBUSY)); if (m == NULL) { VM_OBJECT_WUNLOCK(object); VM_WAIT; @@ -2481,12 +2608,12 @@ vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits #endif /* - * If the object is locked and the page is neither VPO_BUSY nor + * If the object is locked and the page is neither write busy nor * write mapped, then the page's dirty field cannot possibly be * set by a concurrent pmap operation. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if ((m->oflags & VPO_BUSY) == 0 && !pmap_page_is_write_mapped(m)) + if (!vm_page_busy_wlocked(m) && !pmap_page_is_write_mapped(m)) m->dirty &= ~pagebits; else { /* @@ -2630,8 +2757,7 @@ vm_page_set_invalid(vm_page_t m, int base, int siz vm_page_bits_t bits; VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_BUSY) == 0, - ("vm_page_set_invalid: page %p is busy", m)); + vm_page_busy_assert_unlocked(m); bits = vm_page_bits(base, size); if (m->valid == VM_PAGE_BITS_ALL && bits != 0) pmap_remove_all(m); @@ -2697,7 +2823,7 @@ vm_page_is_valid(vm_page_t m, int base, int size) { vm_page_bits_t bits; - VM_OBJECT_ASSERT_WLOCKED(m->object); + VM_OBJECT_ASSERT_LOCKED(m->object); bits = vm_page_bits(base, size); return (m->valid != 0 && (m->valid & bits) == bits); } @@ -2857,12 +2983,11 @@ vm_page_object_lock_assert(vm_page_t m) /* * Certain of the page's fields may only be modified by the - * holder of the containing object's lock or the setter of the - * page's VPO_BUSY flag. Unfortunately, the setter of the - * VPO_BUSY flag is not recorded, and thus cannot be checked - * here. + * holder of the containing object's lock or the write busy. + * holder. Unfortunately, the holder of the write busy is + * not recorded, and thus cannot be checked here. */ - if (m->object != NULL && (m->oflags & VPO_BUSY) == 0) + if (m->object != NULL && !vm_page_busy_wlocked(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); } #endif @@ -2920,9 +3045,9 @@ DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) m = (vm_page_t)addr; db_printf( "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n" - " af 0x%x of 0x%x f 0x%x act %d busy %d valid 0x%x dirty 0x%x\n", + " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags, - m->flags, m->act_count, m->busy, m->valid, m->dirty); + m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (.../vmcontention/sys) (revision 252418) +++ vm/vm_page.h (.../vmobj-readlock/sys) (revision 252418) @@ -109,6 +109,17 @@ * contains the dirty field. In the machine-independent layer, * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). + * + * Page content access is regulated (mostly) by the busy mechanism. + * When the page content is changing (for example, during a page READ + * operation) the page owner must acquire an hard busy token. Similarly, + * when the page content is just being accessed for reading purposes + * (for example, during a page WRITE operation) the page owner must + * acquire a soft busy token. + * The hard busy mechanism is controlled using vm_page_busy() and + * vm_page_unbusy() interfaces. Likewise the soft busy mechanism is + * controlled through the usage of vm_page_io_start() and + * vm_page_io_finish(). */ #if PAGE_SIZE == 4096 @@ -144,11 +155,12 @@ struct vm_page { uint8_t oflags; /* page VPO_* flags (O) */ uint16_t flags; /* page PG_* flags (P) */ u_char act_count; /* page usage count (P) */ - u_char busy; /* page busy count (O) */ + u_char basy; /* page busy count (O) */ /* NOTE that these must support one bit per DEV_BSIZE in a page!!! */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ + u_int busy_lock; /* busy owners lock */ }; /* @@ -165,12 +177,31 @@ struct vm_page { * mappings, and such pages are also not on any PQ queue. * */ -#define VPO_BUSY 0x01 /* page is in transit */ -#define VPO_WANTED 0x02 /* someone is waiting for page */ +#define VPO_UNUSED01 0x01 /* --available-- */ +#define VPO_SWAPSLEEP 0x02 /* waiting for swap to finish */ #define VPO_UNMANAGED 0x04 /* no PV management for page */ #define VPO_SWAPINPROG 0x08 /* swap I/O in progress on page */ #define VPO_NOSYNC 0x10 /* do not collect for syncer */ +/* + * ARXXX: Insert comments for busy here. + */ +#define VPB_LOCK_READ 0x01 +#define VPB_LOCK_WRITE 0x02 +#define VPB_LOCK_WAITERS 0x04 +#define VPB_LOCK_FLAGMASK \ + (VPB_LOCK_READ | VPB_LOCK_WRITE | VPB_LOCK_WAITERS) + +#define VPB_READERS_SHIFT 3 +#define VPB_READERS(x) \ + (((x) & ~VPB_LOCK_FLAGMASK) >> VPB_READERS_SHIFT) +#define VPB_READERS_LOCK(x) ((x) << VPB_READERS_SHIFT | VPB_LOCK_READ) +#define VPB_ONE_READER (1 << VPB_READERS_SHIFT) + +#define VPB_SINGLE_WRITER VPB_LOCK_WRITE + +#define VPB_UNLOCKED VPB_READERS_LOCK(0) + #define PQ_NONE 255 #define PQ_INACTIVE 0 #define PQ_ACTIVE 1 @@ -248,8 +279,8 @@ extern struct mtx_padalign pa_lock[]; * directly set this flag. They should call vm_page_reference() instead. * * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter(). When it - * does so, the page must be VPO_BUSY. The MI VM layer must never access this - * flag directly. Instead, it should call pmap_page_is_write_mapped(). + * does so, the page must be write busied. The MI VM layer must never access + * this flag directly. Instead, it should call pmap_page_is_write_mapped(). * * PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has * at least one executable mapping. It is not consumed by the MI VM layer. @@ -334,8 +365,9 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); #define VM_ALLOC_NOBUSY 0x0200 /* Do not busy the page */ #define VM_ALLOC_IFCACHED 0x0400 /* Fail if the page is not cached */ #define VM_ALLOC_IFNOTCACHED 0x0800 /* Fail if the page is cached */ -#define VM_ALLOC_IGN_SBUSY 0x1000 /* vm_page_grab() only */ +#define VM_ALLOC_IGN_RBUSY 0x1000 /* vm_page_grab() only */ #define VM_ALLOC_NODUMP 0x2000 /* don't include in dump */ +#define VM_ALLOC_RBUSY 0x4000 /* Read busy the page */ #define VM_ALLOC_COUNT_SHIFT 16 #define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT) @@ -359,15 +391,17 @@ malloc2vm_flags(int malloc_flags) } #endif -void vm_page_busy(vm_page_t m); +void vm_page_busy_downgrade(vm_page_t m); +int vm_page_busy_rlocked(vm_page_t m); +void vm_page_busy_runlock(vm_page_t m); +void vm_page_busy_sleep(vm_page_t m, const char *msg); +int vm_page_busy_tryrlock(vm_page_t m); +void vm_page_busy_wunlock_hard(vm_page_t m); void vm_page_flash(vm_page_t m); -void vm_page_io_start(vm_page_t m); -void vm_page_io_finish(vm_page_t m); void vm_page_hold(vm_page_t mem); void vm_page_unhold(vm_page_t mem); void vm_page_free(vm_page_t m); void vm_page_free_zero(vm_page_t m); -void vm_page_wakeup(vm_page_t m); void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); @@ -402,7 +436,7 @@ void vm_page_rename (vm_page_t, vm_object_t, vm_pi void vm_page_requeue(vm_page_t m); void vm_page_requeue_locked(vm_page_t m); void vm_page_set_valid_range(vm_page_t m, int base, int size); -void vm_page_sleep(vm_page_t m, const char *msg); +int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unwire (vm_page_t, int); @@ -430,6 +464,46 @@ void vm_page_assert_locked_KBI(vm_page_t m, const void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line); #endif +#define vm_page_busy_assert_rlocked(m) \ + KASSERT(vm_page_busy_rlocked(m), \ + ("vm_page_busy_assert_rlocked: page %p not read busy @ %s:%d", \ + (void *)m, __FILE__, __LINE__)); + +#define vm_page_busy_assert_unlocked(m) \ + KASSERT(!vm_page_busy_locked(m), \ + ("vm_page_busy_assert_wlocked: page %p busy @ %s:%d", \ + (void *)m, __FILE__, __LINE__)); + +#define vm_page_busy_assert_wlocked(m) \ + KASSERT(vm_page_busy_wlocked(m), \ + ("vm_page_busy_assert_wlocked: page %p not write busy @ %s:%d", \ + (void *)m, __FILE__, __LINE__)); + +#define vm_page_busy_locked(m) \ + ((m)->busy_lock != VPB_UNLOCKED) + +#define vm_page_busy_rlock(m) do { \ + if (!vm_page_busy_tryrlock(m)) \ + panic("%s: page %p failed read busing", __func__, m); \ +} while (0) + +#define vm_page_busy_trywlock(m) \ + (atomic_cmpset_acq_int(&m->busy_lock, VPB_UNLOCKED, VPB_SINGLE_WRITER)) + +#define vm_page_busy_wlock(m) do { \ + if (!vm_page_busy_trywlock(m)) \ + panic("%s: page %p failed write busing", __func__, m); \ +} while (0) + +#define vm_page_busy_wlocked(m) \ + ((m->busy_lock & VPB_SINGLE_WRITER) != 0) + +#define vm_page_busy_wunlock(m) do { \ + if (!atomic_cmpset_rel_int(&(m)->busy_lock, VPB_SINGLE_WRITER, \ + VPB_UNLOCKED)) \ + vm_page_busy_wunlock_hard(m); \ +} while (0) + #ifdef INVARIANTS void vm_page_object_lock_assert(vm_page_t m); #define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m) @@ -484,11 +558,11 @@ vm_page_aflag_set(vm_page_t m, uint8_t bits) /* * The PGA_WRITEABLE flag can only be set if the page is managed and - * VPO_BUSY. Currently, this flag is only set by pmap_enter(). + * write busied. Currently, this flag is only set by pmap_enter(). */ KASSERT((bits & PGA_WRITEABLE) == 0 || - (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY, - ("vm_page_aflag_set: PGA_WRITEABLE and !VPO_BUSY")); + (m->oflags & VPO_UNMANAGED) == 0 || vm_page_busy_wlocked(m), + ("vm_page_aflag_set: PGA_WRITEABLE and now write busy")); /* * Access the whole 32-bit word containing the aflags field with an @@ -544,27 +618,6 @@ vm_page_remque(vm_page_t m) } /* - * vm_page_sleep_if_busy: - * - * Sleep and release the page queues lock if VPO_BUSY is set or, - * if also_m_busy is TRUE, busy is non-zero. Returns TRUE if the - * thread slept and the page queues lock was released. - * Otherwise, retains the page queues lock and returns FALSE. - * - * The object containing the given page must be locked. - */ -static __inline int -vm_page_sleep_if_busy(vm_page_t m, int also_m_busy, const char *msg) -{ - - if ((m->oflags & VPO_BUSY) || (also_m_busy && m->busy)) { - vm_page_sleep(m, msg); - return (TRUE); - } - return (FALSE); -} - -/* * vm_page_undirty: * * Set page to not be dirty. Note: does not clear pmap modify bits Index: vm/vm_map.h =================================================================== --- vm/vm_map.h (.../vmcontention/sys) (revision 252418) +++ vm/vm_map.h (.../vmobj-readlock/sys) (revision 252418) @@ -328,6 +328,7 @@ long vmspace_resident_count(struct vmspace *vmspac #define VM_FAULT_NORMAL 0 /* Nothing special */ #define VM_FAULT_CHANGE_WIRING 1 /* Change the wiring as appropriate */ #define VM_FAULT_DIRTY 2 /* Dirty the page; use w/VM_PROT_COPY */ +#define VM_FAULT_IOBUSY 4 /* Busy the faulted page */ /* * Initially, mappings are slightly sequential. The maximum window size must Index: vm/swap_pager.c =================================================================== --- vm/swap_pager.c (.../vmcontention/sys) (revision 252418) +++ vm/swap_pager.c (.../vmobj-readlock/sys) (revision 252418) @@ -1219,9 +1219,10 @@ swap_pager_getpages(vm_object_t object, vm_page_t */ VM_OBJECT_WLOCK(object); while ((mreq->oflags & VPO_SWAPINPROG) != 0) { - mreq->oflags |= VPO_WANTED; + mreq->oflags |= VPO_SWAPSLEEP; PCPU_INC(cnt.v_intrans); - if (VM_OBJECT_SLEEP(object, mreq, PSWP, "swread", hz * 20)) { + if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP, + "swread", hz * 20)) { printf( "swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n", bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount); @@ -1459,12 +1460,6 @@ swap_pager_putpages(vm_object_t object, vm_page_t * Completion routine for asynchronous reads and writes from/to swap. * Also called manually by synchronous code to finish up a bp. * - * For READ operations, the pages are VPO_BUSY'd. For WRITE operations, - * the pages are vm_page_t->busy'd. For READ operations, we VPO_BUSY - * unbusy all pages except the 'main' request page. For WRITE - * operations, we vm_page_t->busy'd unbusy all pages ( we can do this - * because we marked them all VM_PAGER_PEND on return from putpages ). - * * This routine may not sleep. */ static void @@ -1514,6 +1509,10 @@ swp_pager_async_iodone(struct buf *bp) vm_page_t m = bp->b_pages[i]; m->oflags &= ~VPO_SWAPINPROG; + if (m->oflags & VPO_SWAPSLEEP) { + m->oflags &= ~VPO_SWAPSLEEP; + wakeup(&object->paging_in_progress); + } if (bp->b_ioflags & BIO_ERROR) { /* @@ -1542,8 +1541,11 @@ swp_pager_async_iodone(struct buf *bp) m->valid = 0; if (i != bp->b_pager.pg_reqpage) swp_pager_free_nrpage(m); - else + else { + vm_page_lock(m); vm_page_flash(m); + vm_page_unlock(m); + } /* * If i == bp->b_pager.pg_reqpage, do not wake * the page up. The caller needs to. @@ -1558,7 +1560,7 @@ swp_pager_async_iodone(struct buf *bp) vm_page_lock(m); vm_page_activate(m); vm_page_unlock(m); - vm_page_io_finish(m); + vm_page_busy_runlock(m); } } else if (bp->b_iocmd == BIO_READ) { /* @@ -1575,7 +1577,7 @@ swp_pager_async_iodone(struct buf *bp) * Note that the requested page, reqpage, is left * busied, but we still have to wake it up. The * other pages are released (unbusied) by - * vm_page_wakeup(). + * vm_page_busy_wunlock(). */ KASSERT(!pmap_page_is_mapped(m), ("swp_pager_async_iodone: page %p is mapped", m)); @@ -1595,9 +1597,12 @@ swp_pager_async_iodone(struct buf *bp) vm_page_lock(m); vm_page_deactivate(m); vm_page_unlock(m); - vm_page_wakeup(m); - } else + vm_page_busy_wunlock(m); + } else { + vm_page_lock(m); vm_page_flash(m); + vm_page_unlock(m); + } } else { /* * For write success, clear the dirty @@ -1608,7 +1613,7 @@ swp_pager_async_iodone(struct buf *bp) ("swp_pager_async_iodone: page %p is not write" " protected", m)); vm_page_undirty(m); - vm_page_io_finish(m); + vm_page_busy_runlock(m); if (vm_page_count_severe()) { vm_page_lock(m); vm_page_try_to_cache(m); @@ -1706,19 +1711,18 @@ swp_pager_force_pagein(vm_object_t object, vm_pind vm_page_t m; vm_object_pip_add(object, 1); - m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | - VM_ALLOC_NOBUSY); + m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid == VM_PAGE_BITS_ALL) { vm_object_pip_subtract(object, 1); vm_page_dirty(m); vm_page_lock(m); vm_page_activate(m); vm_page_unlock(m); + vm_page_busy_wunlock(m); vm_pager_page_unswapped(m); return; } - vm_page_busy(m); if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_subtract(object, 1); @@ -1726,7 +1730,7 @@ swp_pager_force_pagein(vm_object_t object, vm_pind vm_page_lock(m); vm_page_deactivate(m); vm_page_unlock(m); - vm_page_wakeup(m); + vm_page_busy_wunlock(m); vm_pager_page_unswapped(m); } Index: vm/vnode_pager.c =================================================================== --- vm/vnode_pager.c (.../vmcontention/sys) (revision 252418) +++ vm/vnode_pager.c (.../vmobj-readlock/sys) (revision 252418) @@ -1140,8 +1140,7 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_ * pmap operation. */ m = ma[ncount - 1]; - KASSERT(m->busy > 0, - ("vnode_pager_generic_putpages: page %p is not busy", m)); + vm_page_busy_assert_rlocked(m); KASSERT(!pmap_page_is_write_mapped(m), ("vnode_pager_generic_putpages: page %p is not read-only", m)); vm_page_clear_dirty(m, pgoff, PAGE_SIZE - Index: vm/vm_kern.c =================================================================== --- vm/vm_kern.c (.../vmcontention/sys) (revision 252418) +++ vm/vm_kern.c (.../vmobj-readlock/sys) (revision 252418) @@ -563,7 +563,7 @@ retry: */ pmap_enter(kernel_pmap, addr + i, VM_PROT_ALL, m, VM_PROT_ALL, TRUE); - vm_page_wakeup(m); + vm_page_busy_wunlock(m); } VM_OBJECT_WUNLOCK(kmem_object); Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /user/attilio/vmcontention/sys:r248156-252416