Index: nfsclient/nfs_bio.c =================================================================== --- nfsclient/nfs_bio.c (revision 188509) +++ nfsclient/nfs_bio.c (working copy) @@ -135,19 +135,19 @@ nfs_getpages(struct vop_getpages_args *ap) vm_page_t m = pages[ap->a_reqpage]; VM_OBJECT_LOCK(object); - vm_page_lock_queues(); if (m->valid != 0) { /* handled by vm_fault now */ /* vm_page_zero_invalid(m, TRUE); */ for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return(0); } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); } @@ -180,12 +180,13 @@ nfs_getpages(struct vop_getpages_args *ap) if (error && (uio.uio_resid == count)) { nfs_printf("nfs_getpages: error %d\n", error); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return VM_PAGER_ERROR; } @@ -198,12 +199,12 @@ nfs_getpages(struct vop_getpages_args *ap) size = count - uio.uio_resid; VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; m = pages[i]; + vm_page_lock(m); if (nextoff <= size) { /* * Read operation filled an entire page @@ -249,6 +250,7 @@ nfs_getpages(struct vop_getpages_args *ap) vm_page_free(m); } } + vm_page_unlock(m); } vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); Index: ufs/ffs/ffs_vnops.c =================================================================== --- ufs/ffs/ffs_vnops.c (revision 188509) +++ ufs/ffs/ffs_vnops.c (working copy) @@ -838,13 +838,13 @@ ffs_getpages(ap) if (mreq->valid) { if (mreq->valid != VM_PAGE_BITS_ALL) vm_page_zero_invalid(mreq, TRUE); - vm_page_lock_queues(); for (i = 0; i < pcount; i++) { if (i != ap->a_reqpage) { + vm_page_lock(ap->a_m[i]); vm_page_free(ap->a_m[i]); + vm_page_unlock(ap->a_m[i]); } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(mreq->object); return VM_PAGER_OK; } Index: kern/uipc_syscalls.c =================================================================== --- kern/uipc_syscalls.c (revision 188509) +++ kern/uipc_syscalls.c (working copy) @@ -1695,7 +1695,7 @@ sf_buf_mext(void *addr, void *args) m = sf_buf_page(args); sf_buf_free(args); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); /* * Check for the object going away on us. This can @@ -1704,7 +1704,7 @@ sf_buf_mext(void *addr, void *args) */ if (m->wire_count == 0 && m->object == NULL) vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); if (addr == NULL) return; sfs = addr; @@ -2094,7 +2094,7 @@ retry_space: mbstat.sf_iocnt++; } if (error) { - vm_page_lock_queues(); + vm_page_lock(pg); vm_page_unwire(pg, 0); /* * See if anyone else might know about @@ -2106,7 +2106,7 @@ retry_space: pg->hold_count == 0) { vm_page_free(pg); } - vm_page_unlock_queues(); + vm_page_unlock(pg); VM_OBJECT_UNLOCK(obj); if (error == EAGAIN) error = 0; /* not a real error */ @@ -2120,14 +2120,14 @@ retry_space: if ((sf = sf_buf_alloc(pg, (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) { mbstat.sf_allocfail++; - vm_page_lock_queues(); + vm_page_lock(pg); vm_page_unwire(pg, 0); /* * XXX: Not same check as above!? */ if (pg->wire_count == 0 && pg->object == NULL) vm_page_free(pg); - vm_page_unlock_queues(); + vm_page_unlock(pg); error = (mnw ? EAGAIN : EINTR); break; } Index: kern/vfs_bio.c =================================================================== --- kern/vfs_bio.c (revision 188509) +++ kern/vfs_bio.c (working copy) @@ -1287,9 +1287,9 @@ brelse(struct buf *bp) (PAGE_SIZE - poffset) : resid; KASSERT(presid >= 0, ("brelse: extra page")); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_set_invalid(m, poffset, presid); - vm_page_unlock_queues(); + vm_page_unlock(m); if (had_bogus) printf("avoided corruption bug in bogus_page/brelse code\n"); } @@ -1497,10 +1497,10 @@ vfs_vmio_release(struct buf *bp) vm_page_t m; VM_OBJECT_LOCK(bp->b_bufobj->bo_object); - vm_page_lock_queues(); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; bp->b_pages[i] = NULL; + vm_page_lock(m); /* * In order to keep page LRU ordering consistent, put * everything on the inactive queue. @@ -1511,8 +1511,10 @@ vfs_vmio_release(struct buf *bp) * the responsibility of the process that * busied the pages to deal with them. */ - if ((m->oflags & VPO_BUSY) || (m->busy != 0)) + if ((m->oflags & VPO_BUSY) || (m->busy != 0)) { + vm_page_unlock(m); continue; + } if (m->wire_count == 0) { /* @@ -1529,8 +1531,8 @@ vfs_vmio_release(struct buf *bp) vm_page_try_to_cache(m); } } + vm_page_unlock(m); } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); @@ -2309,13 +2311,15 @@ vfs_setdirty_locked_object(struct buf *bp) vm_offset_t boffset; vm_offset_t eoffset; - vm_page_lock_queues(); /* * test the pages to see if they have been modified directly * by users through the VM system. */ - for (i = 0; i < bp->b_npages; i++) + for (i = 0; i < bp->b_npages; i++) { + vm_page_lock(bp->b_pages[i]); vm_page_test_dirty(bp->b_pages[i]); + vm_page_unlock(bp->b_pages[i]); + } /* * Calculate the encompassing dirty range, boffset and eoffset, @@ -2335,7 +2339,6 @@ vfs_setdirty_locked_object(struct buf *bp) } eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK); - vm_page_unlock_queues(); /* * Fit it to the buffer. */ @@ -2801,7 +2804,6 @@ allocbuf(struct buf *bp, int size) vm_page_t m; VM_OBJECT_LOCK(bp->b_bufobj->bo_object); - vm_page_lock_queues(); for (i = desiredpages; i < bp->b_npages; i++) { /* * the page is not freed here -- it @@ -2811,13 +2813,14 @@ allocbuf(struct buf *bp, int size) m = bp->b_pages[i]; KASSERT(m != bogus_page, ("allocbuf: bogus page found")); + vm_page_lock(m); while (vm_page_sleep_if_busy(m, TRUE, "biodep")) - vm_page_lock_queues(); + vm_page_lock(m); bp->b_pages[i] = NULL; vm_page_unwire(m, 0); + vm_page_unlock(m); } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); pmap_qremove((vm_offset_t) trunc_page((vm_offset_t)bp->b_data) + (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); @@ -2891,9 +2894,9 @@ allocbuf(struct buf *bp, int size) /* * We have a good page. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wire(m); - vm_page_unlock_queues(); + vm_page_unlock(m); bp->b_pages[bp->b_npages] = m; ++bp->b_npages; } @@ -3162,7 +3165,6 @@ bufdone_finish(struct buf *bp) vm_object_t obj; int iosize; struct vnode *vp = bp->b_vp; - boolean_t are_queues_locked; obj = bp->b_bufobj->bo_object; @@ -3199,11 +3201,6 @@ bufdone_finish(struct buf *bp) !(bp->b_ioflags & BIO_ERROR)) { bp->b_flags |= B_CACHE; } - if (bp->b_iocmd == BIO_READ) { - vm_page_lock_queues(); - are_queues_locked = TRUE; - } else - are_queues_locked = FALSE; for (i = 0; i < bp->b_npages; i++) { int bogusflag = 0; int resid; @@ -3239,7 +3236,9 @@ bufdone_finish(struct buf *bp) * only need to do this here in the read case. */ if ((bp->b_iocmd == BIO_READ) && !bogusflag && resid > 0) { + vm_page_lock(m); vfs_page_set_valid(bp, foff, m); + vm_page_unlock(m); } /* @@ -3272,8 +3271,6 @@ bufdone_finish(struct buf *bp) foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; iosize -= resid; } - if (are_queues_locked) - vm_page_unlock_queues(); vm_object_pip_wakeupn(obj, 0); VM_OBJECT_UNLOCK(obj); } @@ -3341,7 +3338,7 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t of { vm_ooffset_t soff, eoff; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); /* * Start and end offsets in buffer. eoff - soff may not cross a * page boundry or cross the end of the buffer. The end of the @@ -3404,10 +3401,10 @@ retry: goto retry; } bogus = 0; - vm_page_lock_queues(); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; + vm_page_lock(m); if ((bp->b_flags & B_CLUSTER) == 0) { vm_object_pip_add(obj, 1); vm_page_io_start(m); @@ -3435,9 +3432,9 @@ retry: bp->b_pages[i] = bogus_page; bogus++; } + vm_page_unlock(m); foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(obj); if (bogus) pmap_qenter(trunc_page((vm_offset_t)bp->b_data), @@ -3466,7 +3463,6 @@ vfs_clean_pages(struct buf *bp) KASSERT(bp->b_offset != NOOFFSET, ("vfs_clean_pages: no buffer offset")); VM_OBJECT_LOCK(bp->b_bufobj->bo_object); - vm_page_lock_queues(); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; @@ -3474,11 +3470,12 @@ vfs_clean_pages(struct buf *bp) if (eoff > bp->b_offset + bp->b_bufsize) eoff = bp->b_offset + bp->b_bufsize; + vm_page_lock(m); vfs_page_set_valid(bp, foff, m); /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */ + vm_page_unlock(m); foff = noff; } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); } @@ -3509,17 +3506,17 @@ vfs_bio_set_validclean(struct buf *bp, int base, i n = PAGE_SIZE - (base & PAGE_MASK); VM_OBJECT_LOCK(bp->b_bufobj->bo_object); - vm_page_lock_queues(); for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) { m = bp->b_pages[i]; if (n > size) n = size; + vm_page_lock(m); vm_page_set_validclean(m, base & PAGE_MASK, n); + vm_page_unlock(m); base += n; size -= n; n = PAGE_SIZE; } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); } @@ -3659,10 +3656,10 @@ vm_hold_free_pages(struct buf *bp, vm_offset_t fro } bp->b_pages[index] = NULL; pmap_qremove(pg, 1); - vm_page_lock_queues(); + vm_page_lock(p); vm_page_unwire(p, 0); vm_page_free(p); - vm_page_unlock_queues(); + vm_page_unlock(p); } } VM_OBJECT_UNLOCK(kernel_object); @@ -3709,12 +3706,15 @@ vmapbuf(struct buf *bp) retry: if (vm_fault_quick(addr >= bp->b_data ? addr : bp->b_data, prot) < 0) { - vm_page_lock_queues(); for (i = 0; i < pidx; ++i) { - vm_page_unhold(bp->b_pages[i]); + vm_page_t m; + + m = bp->b_pages[i]; bp->b_pages[i] = NULL; + vm_page_lock(m); + vm_page_unhold(m); + vm_page_unlock(m); } - vm_page_unlock_queues(); return(-1); } m = pmap_extract_and_hold(pmap, (vm_offset_t)addr, prot); @@ -3740,15 +3740,18 @@ retry: void vunmapbuf(struct buf *bp) { + vm_page_t m; int pidx; int npages; npages = bp->b_npages; pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages); - vm_page_lock_queues(); - for (pidx = 0; pidx < npages; pidx++) - vm_page_unhold(bp->b_pages[pidx]); - vm_page_unlock_queues(); + for (pidx = 0; pidx < npages; pidx++) { + m = bp->b_pages[pidx]; + vm_page_lock(m); + vm_page_unhold(m); + vm_page_unlock(m); + } bp->b_data = bp->b_saveaddr; } Index: kern/kern_subr.c =================================================================== --- kern/kern_subr.c (revision 188509) +++ kern/kern_subr.c (working copy) @@ -107,9 +107,10 @@ retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) goto retry; - vm_page_lock_queues(); + vm_page_lock(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); + vm_page_unlock(user_pg); } else { /* * Even if a physical page does not exist in the @@ -118,11 +119,11 @@ retry: */ if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); - vm_page_lock_queues(); } + vm_page_lock(kern_pg); vm_page_insert(kern_pg, uobject, upindex); vm_page_dirty(kern_pg); - vm_page_unlock_queues(); + vm_page_unlock(kern_pg); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); Index: kern/kern_exec.c =================================================================== --- kern/kern_exec.c (revision 188509) +++ kern/kern_exec.c (working copy) @@ -941,17 +941,17 @@ exec_map_first_page(imgp) if ((rv != VM_PAGER_OK) || (ma[0] == NULL) || (ma[0]->valid == 0)) { if (ma[0]) { - vm_page_lock_queues(); + vm_page_lock(ma[0]); vm_page_free(ma[0]); - vm_page_unlock_queues(); + vm_page_unlock(ma[0]); } VM_OBJECT_UNLOCK(object); return (EIO); } } - vm_page_lock_queues(); + vm_page_lock(ma[0]); vm_page_hold(ma[0]); - vm_page_unlock_queues(); + vm_page_unlock(ma[0]); vm_page_wakeup(ma[0]); VM_OBJECT_UNLOCK(object); @@ -971,9 +971,9 @@ exec_unmap_first_page(imgp) m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } } Index: kern/uipc_shm.c =================================================================== --- kern/uipc_shm.c (revision 188509) +++ kern/uipc_shm.c (working copy) @@ -286,11 +286,11 @@ shm_dotruncate(struct shmfd *shmfd, off_t length) int size = PAGE_SIZE - base; pmap_zero_page_area(m, base, size); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_set_validclean(m, base, size); if (m->dirty != 0) m->dirty = VM_PAGE_BITS_ALL; - vm_page_unlock_queues(); + vm_page_unlock(m); } else if ((length & PAGE_MASK) && __predict_false(object->cache != NULL)) { vm_page_cache_free(object, OFF_TO_IDX(length), Index: kern/sys_pipe.c =================================================================== --- kern/sys_pipe.c (revision 188509) +++ kern/sys_pipe.c (working copy) @@ -762,17 +762,16 @@ pipe_build_write_buffer(wpipe, uio) endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { - /* - * vm_fault_quick() can sleep. Consequently, - * vm_page_lock_queue() and vm_page_unlock_queue() - * should not be performed outside of this loop. - */ race: if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { - vm_page_lock_queues(); - for (j = 0; j < i; j++) - vm_page_unhold(wpipe->pipe_map.ms[j]); - vm_page_unlock_queues(); + for (j = 0; j < i; j++) { + vm_page_t m; + + m = wpipe->pipe_map.ms[j]; + vm_page_lock(m); + vm_page_unhold(m); + vm_page_unlock(m); + } return (EFAULT); } wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, @@ -809,14 +808,16 @@ static void pipe_destroy_write_buffer(wpipe) struct pipe *wpipe; { + vm_page_t m; int i; PIPE_LOCK_ASSERT(wpipe, MA_OWNED); - vm_page_lock_queues(); for (i = 0; i < wpipe->pipe_map.npages; i++) { + m = wpipe->pipe_map.ms[i]; + vm_page_lock(m); vm_page_unhold(wpipe->pipe_map.ms[i]); + vm_page_unlock(m); } - vm_page_unlock_queues(); wpipe->pipe_map.npages = 0; } Index: kern/uipc_cow.c =================================================================== --- kern/uipc_cow.c (revision 188509) +++ kern/uipc_cow.c (working copy) @@ -80,7 +80,7 @@ socow_iodone(void *addr, void *args) pp = sf_buf_page(sf); sf_buf_free(sf); /* remove COW mapping */ - vm_page_lock_queues(); + vm_page_lock(pp); vm_page_cowclear(pp); vm_page_unwire(pp, 0); /* @@ -90,7 +90,7 @@ socow_iodone(void *addr, void *args) */ if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); socow_stats.iodone++; } @@ -128,10 +128,10 @@ socow_setup(struct mbuf *m0, struct uio *uio) /* * set up COW */ - vm_page_lock_queues(); + vm_page_lock(pp); if (vm_page_cowsetup(pp) != 0) { vm_page_unhold(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); return (0); } @@ -140,14 +140,14 @@ socow_setup(struct mbuf *m0, struct uio *uio) */ vm_page_wire(pp); vm_page_unhold(pp); - vm_page_unlock_queues(); + vm_page_unlock(m); /* * Allocate an sf buf */ sf = sf_buf_alloc(pp, SFB_CATCH); if (!sf) { - vm_page_lock_queues(); + vm_page_lock(pp); vm_page_cowclear(pp); vm_page_unwire(pp, 0); /* @@ -157,7 +157,7 @@ socow_setup(struct mbuf *m0, struct uio *uio) */ if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); socow_stats.fail_sf_buf++; return(0); } Index: kern/sys_process.c =================================================================== --- kern/sys_process.c (revision 188509) +++ kern/sys_process.c (working copy) @@ -310,9 +310,9 @@ proc_rwmem(struct proc *p, struct uio *uio) /* * Hold the page in memory. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_hold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); /* * We're done with tmap now. @@ -327,9 +327,9 @@ proc_rwmem(struct proc *p, struct uio *uio) /* * Release the page. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } while (error == 0 && uio->uio_resid > 0); Index: dev/md/md.c =================================================================== --- dev/md/md.c (revision 188509) +++ dev/md/md.c (working copy) @@ -629,9 +629,9 @@ mdstart_swap(struct md_s *sc, struct bio *bp) if (rv == VM_PAGER_ERROR) { sf_buf_free(sf); sched_unpin(); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wakeup(m); - vm_page_unlock_queues(); + vm_page_unlock(m); break; } bcopy((void *)(sf_buf_kva(sf) + offs), p, len); @@ -641,9 +641,9 @@ mdstart_swap(struct md_s *sc, struct bio *bp) if (rv == VM_PAGER_ERROR) { sf_buf_free(sf); sched_unpin(); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wakeup(m); - vm_page_unlock_queues(); + vm_page_unlock(m); break; } bcopy(p, (void *)(sf_buf_kva(sf) + offs), len); @@ -655,9 +655,9 @@ mdstart_swap(struct md_s *sc, struct bio *bp) if (rv == VM_PAGER_ERROR) { sf_buf_free(sf); sched_unpin(); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wakeup(m); - vm_page_unlock_queues(); + vm_page_unlock(m); break; } bzero((void *)(sf_buf_kva(sf) + offs), len); @@ -667,12 +667,12 @@ mdstart_swap(struct md_s *sc, struct bio *bp) } sf_buf_free(sf); sched_unpin(); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wakeup(m); vm_page_activate(m); if (bp->bio_cmd == BIO_WRITE) vm_page_dirty(m); - vm_page_unlock_queues(); + vm_page_unlock(m); /* Actions on further pages start at offset 0 */ p += PAGE_SIZE - offs; Index: vm/vm_kern.c =================================================================== --- vm/vm_kern.c (revision 188509) +++ vm/vm_kern.c (working copy) @@ -349,10 +349,10 @@ retry: i -= PAGE_SIZE; m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(kmem_object); freelist = NULL; Index: vm/vm_pageout.c =================================================================== --- vm/vm_pageout.c (revision 188509) +++ vm/vm_pageout.c (working copy) @@ -286,7 +286,7 @@ vm_pageout_clean(m) int ib, is, page_base; vm_pindex_t pindex = m->pindex; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); /* @@ -305,7 +305,9 @@ vm_pageout_clean(m) ((m->busy != 0) || (m->oflags & VPO_BUSY))) { return 0; } - + vm_page_io_start(m); + pmap_remove_write(m); + vm_page_unlock(m); mc[vm_pageout_page_count] = m; pageout_count = 1; page_base = vm_pageout_page_count; @@ -349,14 +351,19 @@ more: ib = 0; break; } + vm_page_lock(p); vm_page_test_dirty(p); if ((p->dirty & p->valid) == 0 || p->queue != PQ_INACTIVE || p->wire_count != 0 || /* may be held by buf cache */ p->hold_count != 0) { /* may be undergoing I/O */ ib = 0; + vm_page_unlock(p); break; } + vm_page_io_start(m); + pmap_remove_write(m); + vm_page_unlock(p); mc[--page_base] = p; ++pageout_count; ++ib; @@ -377,13 +384,18 @@ more: if ((p->oflags & VPO_BUSY) || p->busy) { break; } + vm_page_lock(p); vm_page_test_dirty(p); if ((p->dirty & p->valid) == 0 || p->queue != PQ_INACTIVE || p->wire_count != 0 || /* may be held by buf cache */ p->hold_count != 0) { /* may be undergoing I/O */ + vm_page_unlock(p); break; } + vm_page_io_start(m); + pmap_remove_write(m); + vm_page_unlock(p); mc[page_base + pageout_count] = p; ++pageout_count; ++is; @@ -420,12 +432,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int fla int numpagedout = 0; int i; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); /* - * Initiate I/O. Bump the vm_page_t->busy counter and - * mark the pages read-only. - * * We do not have to fixup the clean/dirty bits here... we can * allow the pager to do it after the I/O completes. * @@ -436,18 +444,15 @@ vm_pageout_flush(vm_page_t *mc, int count, int fla KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); - vm_page_io_start(mc[i]); - pmap_remove_write(mc[i]); } - vm_page_unlock_queues(); vm_object_pip_add(object, count); vm_pager_put_pages(object, mc, count, flags, pageout_status); - vm_page_lock_queues(); for (i = 0; i < count; i++) { vm_page_t mt = mc[i]; + vm_page_lock(mt); KASSERT(pageout_status[i] == VM_PAGER_PEND || (mt->flags & PG_WRITEABLE) == 0, ("vm_pageout_flush: page %p is not write protected", mt)); @@ -490,6 +495,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int fla if (vm_page_count_severe()) vm_page_try_to_cache(mt); } + vm_page_unlock(mt); } return numpagedout; } @@ -532,20 +538,19 @@ vm_pageout_object_deactivate_pages(pmap, first_obj */ rcount = object->resident_page_count; p = TAILQ_FIRST(&object->memq); - vm_page_lock_queues(); while (p && (rcount-- > 0)) { - if (pmap_resident_count(pmap) <= desired) { - vm_page_unlock_queues(); + if (pmap_resident_count(pmap) <= desired) goto unlock_return; - } next = TAILQ_NEXT(p, listq); cnt.v_pdpages++; + vm_page_lock(p); if (p->wire_count != 0 || p->hold_count != 0 || p->busy != 0 || (p->oflags & VPO_BUSY) || (p->flags & PG_UNMANAGED) || !pmap_page_exists_quick(pmap, p)) { + vm_page_unlock(p); p = next; continue; } @@ -570,18 +575,20 @@ vm_pageout_object_deactivate_pages(pmap, first_obj vm_page_requeue(p); } } else { - vm_page_activate(p); + vm_page_lock_queues(); + vm_page_activate_locked(p); vm_page_flag_clear(p, PG_REFERENCED); if (p->act_count < (ACT_MAX - ACT_ADVANCE)) p->act_count += ACT_ADVANCE; - vm_page_requeue(p); + vm_page_requeue_locked(p); + vm_page_unlock_queues(); } } else if (p->queue == PQ_INACTIVE) { pmap_remove_all(p); } + vm_page_unlock(p); p = next; } - vm_page_unlock_queues(); if ((backing_object = object->backing_object) == NULL) goto unlock_return; VM_OBJECT_LOCK(backing_object); @@ -732,11 +739,10 @@ vm_pageout_scan(int pass) maxlaunder = 1; if (pass) maxlaunder = 10000; - vm_page_lock_queues(); rescan0: addl_page_shortage = addl_page_shortage_init; maxscan = cnt.v_inactive_count; - + vm_page_lock_queues(); for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl); m != NULL && maxscan-- > 0 && page_shortage > 0; m = next) { @@ -760,26 +766,32 @@ rescan0: * A held page may be undergoing I/O, so skip it. */ if (m->hold_count) { - vm_page_requeue(m); + vm_page_requeue_locked(m); addl_page_shortage++; continue; } + /* * Don't mess with busy pages, keep in the front of the * queue, most likely are being paged out. */ if (!VM_OBJECT_TRYLOCK(object) && - (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) { + !vm_pageout_fallback_object_lock(m, &next)) { VM_OBJECT_UNLOCK(object); addl_page_shortage++; continue; } - if (m->busy || (m->oflags & VPO_BUSY)) { + if (vm_page_trylock(m) == 0) { VM_OBJECT_UNLOCK(object); addl_page_shortage++; continue; } + if (m->busy || (m->oflags & VPO_BUSY) || m->hold_count) { + vm_page_unlock(m); + VM_OBJECT_UNLOCK(object); + addl_page_shortage++; + continue; + } /* * If the object is not being used, we ignore previous @@ -800,9 +812,10 @@ rescan0: */ } else if (((m->flags & PG_REFERENCED) == 0) && (actcount = pmap_ts_referenced(m))) { - vm_page_activate(m); + vm_page_activate_locked(m); + m->act_count += (actcount + ACT_ADVANCE); + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); - m->act_count += (actcount + ACT_ADVANCE); continue; } @@ -815,9 +828,10 @@ rescan0: if ((m->flags & PG_REFERENCED) != 0) { vm_page_flag_clear(m, PG_REFERENCED); actcount = pmap_ts_referenced(m); - vm_page_activate(m); + vm_page_activate_locked(m); + m->act_count += (actcount + ACT_ADVANCE + 1); + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); - m->act_count += (actcount + ACT_ADVANCE + 1); continue; } @@ -875,7 +889,7 @@ rescan0: * the thrash point for a heavily loaded machine. */ vm_page_flag_set(m, PG_WINATCFLS); - vm_page_requeue(m); + vm_page_requeue_locked(m); } else if (maxlaunder > 0) { /* * We always want to try to flush some dirty pages if @@ -902,8 +916,9 @@ rescan0: * Those objects are in a "rundown" state. */ if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); - vm_page_requeue(m); + vm_page_requeue_locked(m); continue; } @@ -941,6 +956,8 @@ rescan0: * of time. */ if (object->type == OBJT_VNODE) { + vm_page_unlock(m); + vm_page_unlock_queues(); vp = object->handle; if (vp->v_type == VREG && vn_start_write(vp, &mp, V_NOWAIT) != 0) { @@ -948,23 +965,24 @@ rescan0: ++pageout_lock_miss; if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; + vm_page_lock_queues(); goto unlock_and_continue; } - vm_page_unlock_queues(); vm_object_reference_locked(object); VM_OBJECT_UNLOCK(object); vfslocked = VFS_LOCK_GIANT(vp->v_mount); if (vget(vp, LK_EXCLUSIVE | LK_TIMELOCK, curthread)) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); ++pageout_lock_miss; if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; vp = NULL; + vm_page_lock_queues(); goto unlock_and_continue; } VM_OBJECT_LOCK(object); + vm_page_lock(m); vm_page_lock_queues(); /* * The page might have been moved to another @@ -975,6 +993,7 @@ rescan0: if (VM_PAGE_GETQUEUE(m) != PQ_INACTIVE || m->object != object || TAILQ_NEXT(m, pageq) != &marker) { + vm_page_unlock(m); if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; goto unlock_and_continue; @@ -987,6 +1006,7 @@ rescan0: * statistics are more correct if we don't. */ if (m->busy || (m->oflags & VPO_BUSY)) { + vm_page_unlock(m); goto unlock_and_continue; } @@ -995,7 +1015,8 @@ rescan0: * be undergoing I/O, so skip it */ if (m->hold_count) { - vm_page_requeue(m); + vm_page_unlock(m); + vm_page_requeue_locked(m); if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; goto unlock_and_continue; @@ -1012,20 +1033,22 @@ rescan0: * the (future) cleaned page. Otherwise we could wind * up laundering or cleaning too many pages. */ + vm_page_unlock_queues(); if (vm_pageout_clean(m) != 0) { --page_shortage; --maxlaunder; - } + } else + vm_page_unlock(m); + vm_page_lock_queues(); unlock_and_continue: VM_OBJECT_UNLOCK(object); if (mp != NULL) { - vm_page_unlock_queues(); if (vp != NULL) vput(vp); VFS_UNLOCK_GIANT(vfslocked); vm_object_deallocate(object); vn_finished_write(mp); - vm_page_lock_queues(); + vm_page_lock(m); } next = TAILQ_NEXT(&marker, pageq); TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, @@ -1050,6 +1073,7 @@ unlock_and_continue: */ pcount = cnt.v_active_count; m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { @@ -1069,14 +1093,20 @@ unlock_and_continue: continue; } + if (vm_page_trylock(m) == 0) { + VM_OBJECT_UNLOCK(object); + m = next; + continue; + } /* * Don't deactivate pages that are busy. */ if ((m->busy != 0) || (m->oflags & VPO_BUSY) || (m->hold_count != 0)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); - vm_page_requeue(m); + vm_page_requeue_locked(m); m = next; continue; } @@ -1113,7 +1143,7 @@ unlock_and_continue: * page activation count stats. */ if (actcount && (object->ref_count != 0)) { - vm_page_requeue(m); + vm_page_requeue_locked(m); } else { m->act_count -= min(m->act_count, ACT_DECLINE); if (vm_pageout_algorithm || @@ -1125,14 +1155,15 @@ unlock_and_continue: if (m->dirty == 0) vm_page_cache(m); else - vm_page_deactivate(m); + vm_page_deactivate_locked(m); } else { - vm_page_deactivate(m); + vm_page_deactivate_locked(m); } } else { - vm_page_requeue(m); + vm_page_requeue_locked(m); } } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } @@ -1316,6 +1347,11 @@ vm_pageout_page_stats() m = next; continue; } + if (vm_page_trylock(m) == 0) { + VM_OBJECT_UNLOCK(object); + m = next; + continue; + } /* * Don't deactivate pages that are busy. @@ -1323,8 +1359,9 @@ vm_pageout_page_stats() if ((m->busy != 0) || (m->oflags & VPO_BUSY) || (m->hold_count != 0)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); - vm_page_requeue(m); + vm_page_requeue_locked(m); m = next; continue; } @@ -1340,7 +1377,7 @@ vm_pageout_page_stats() m->act_count += ACT_ADVANCE + actcount; if (m->act_count > ACT_MAX) m->act_count = ACT_MAX; - vm_page_requeue(m); + vm_page_requeue_locked(m); } else { if (m->act_count == 0) { /* @@ -1353,12 +1390,13 @@ vm_pageout_page_stats() * of doing the operation. */ pmap_remove_all(m); - vm_page_deactivate(m); + vm_page_deactivate_locked(m); } else { m->act_count -= min(m->act_count, ACT_DECLINE); - vm_page_requeue(m); + vm_page_requeue_locked(m); } } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } Index: vm/vm_map.c =================================================================== --- vm/vm_map.c (revision 188509) +++ vm/vm_map.c (working copy) @@ -1576,7 +1576,6 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_offset_t start; vm_page_t p, p_start; vm_pindex_t psize, tmpidx; - boolean_t are_queues_locked; if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL) return; @@ -1600,7 +1599,6 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, psize = object->size - pindex; } - are_queues_locked = FALSE; start = 0; p_start = NULL; @@ -1635,25 +1633,15 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, p_start = p; } } else if (p_start != NULL) { - if (!are_queues_locked) { - are_queues_locked = TRUE; - vm_page_lock_queues(); - } pmap_enter_object(map->pmap, start, addr + ptoa(tmpidx), p_start, prot); p_start = NULL; } } if (p_start != NULL) { - if (!are_queues_locked) { - are_queues_locked = TRUE; - vm_page_lock_queues(); - } pmap_enter_object(map->pmap, start, addr + ptoa(psize), p_start, prot); } - if (are_queues_locked) - vm_page_unlock_queues(); unlock_return: VM_OBJECT_UNLOCK(object); } Index: vm/swap_pager.c =================================================================== --- vm/swap_pager.c (revision 188509) +++ vm/swap_pager.c (working copy) @@ -985,12 +985,16 @@ swap_pager_getpages(vm_object_t object, vm_page_t if (0 < i || j < count) { int k; - vm_page_lock_queues(); - for (k = 0; k < i; ++k) + for (k = 0; k < i; ++k) { + vm_page_lock(m[k]); vm_page_free(m[k]); - for (k = j; k < count; ++k) + vm_page_unlock(m[k]); + } + for (k = j; k < count; ++k) { + vm_page_lock(m[k]); vm_page_free(m[k]); - vm_page_unlock_queues(); + vm_page_unlock(m[k]); + } } /* @@ -1067,9 +1071,9 @@ swap_pager_getpages(vm_object_t object, vm_page_t VM_OBJECT_LOCK(object); while ((mreq->oflags & VPO_SWAPINPROG) != 0) { mreq->oflags |= VPO_WANTED; - vm_page_lock_queues(); + vm_page_lock(mreq); vm_page_flag_set(mreq, PG_REFERENCED); - vm_page_unlock_queues(); + vm_page_unlock(mreq); PCPU_INC(cnt.v_intrans); if (msleep(mreq, VM_OBJECT_MTX(object), PSWP, "swread", hz*20)) { printf( @@ -1352,7 +1356,6 @@ swp_pager_async_iodone(struct buf *bp) object = bp->b_pages[0]->object; VM_OBJECT_LOCK(object); } - vm_page_lock_queues(); /* * cleanup pages. If an error occurs writing to swap, we are in * very serious trouble. If it happens to be a disk error, though, @@ -1364,6 +1367,7 @@ swp_pager_async_iodone(struct buf *bp) for (i = 0; i < bp->b_npages; ++i) { vm_page_t m = bp->b_pages[i]; + vm_page_lock(m); m->oflags &= ~VPO_SWAPINPROG; if (bp->b_ioflags & BIO_ERROR) { @@ -1461,8 +1465,8 @@ swp_pager_async_iodone(struct buf *bp) if (vm_page_count_severe()) vm_page_try_to_cache(m); } + vm_page_unlock(m); } - vm_page_unlock_queues(); /* * adjust pip. NOTE: the original parent may still have its own @@ -1558,10 +1562,10 @@ swp_pager_force_pagein(vm_object_t object, vm_pind m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL|VM_ALLOC_RETRY); if (m->valid == VM_PAGE_BITS_ALL) { vm_object_pip_subtract(object, 1); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_activate(m); vm_page_dirty(m); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); vm_pager_page_unswapped(m); return; @@ -1570,10 +1574,10 @@ swp_pager_force_pagein(vm_object_t object, vm_pind if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_subtract(object, 1); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_dirty(m); vm_page_dontneed(m); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); vm_pager_page_unswapped(m); } Index: vm/vm_mmap.c =================================================================== --- vm/vm_mmap.c (revision 188509) +++ vm/vm_mmap.c (working copy) @@ -869,7 +869,7 @@ RestartScan: */ if (m != NULL && m->valid != 0) { mincoreinfo = MINCORE_INCORE; - vm_page_lock_queues(); + vm_page_lock(m); if (m->dirty || pmap_is_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; @@ -878,7 +878,7 @@ RestartScan: vm_page_flag_set(m, PG_REFERENCED); mincoreinfo |= MINCORE_REFERENCED_OTHER; } - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(current->object.vm_object); } Index: vm/vm_glue.c =================================================================== --- vm/vm_glue.c (revision 188509) +++ vm/vm_glue.c (working copy) @@ -260,16 +260,16 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset if (m == NULL) goto out; if (m->valid == 0 || rv != VM_PAGER_OK) { - vm_page_lock_queues(); + vm_page_lock(m); vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); m = NULL; goto out; } } - vm_page_lock_queues(); + vm_page_lock(m); vm_page_hold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); out: VM_OBJECT_UNLOCK(object); @@ -303,9 +303,9 @@ vm_imgact_unmap_page(struct sf_buf *sf) m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } #ifndef KSTACK_MAX_PAGES @@ -396,10 +396,10 @@ vm_thread_dispose(struct thread *td) m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_dispose: kstack already missing?"); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(ksobj); vm_object_deallocate(ksobj); @@ -427,10 +427,10 @@ vm_thread_swapout(struct thread *td) m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_swapout: kstack already missing?"); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_dirty(m); vm_page_unwire(m, 0); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(ksobj); } @@ -458,9 +458,9 @@ vm_thread_swapin(struct thread *td) m->valid = VM_PAGE_BITS_ALL; } ma[i] = m; - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wire(m); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); } VM_OBJECT_UNLOCK(ksobj); Index: vm/pmap.h =================================================================== --- vm/pmap.h (revision 188509) +++ vm/pmap.h (working copy) @@ -132,6 +132,7 @@ void pmap_zero_page_area(vm_page_t, int off, int void pmap_zero_page_idle(vm_page_t); int pmap_mincore(pmap_t pmap, vm_offset_t addr); void pmap_activate(struct thread *td); +struct mtx *pmap_page_lockptr(vm_page_t); #define pmap_resident_count(pm) ((pm)->pm_stats.resident_count) #define pmap_wired_count(pm) ((pm)->pm_stats.wired_count) Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (revision 188509) +++ vm/vm_object.c (working copy) @@ -668,8 +668,8 @@ vm_object_terminate(vm_object_t object) * removes them from paging queues. Don't free wired pages, just * remove them from the object. */ - vm_page_lock_queues(); while ((p = TAILQ_FIRST(&object->memq)) != NULL) { + vm_page_lock(p); KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, ("vm_object_terminate: freeing busy page %p " "p->busy = %d, p->oflags %x\n", p, p->busy, p->oflags)); @@ -679,8 +679,8 @@ vm_object_terminate(vm_object_t object) } else { vm_page_remove(p); } + vm_page_unlock(p); } - vm_page_unlock_queues(); #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) @@ -740,7 +740,6 @@ vm_object_page_clean(vm_object_t object, vm_pindex tend = end; } - vm_page_lock_queues(); /* * If the caller is smart and only msync()s a range he knows is * dirty, we may be able to avoid an object scan. This results in @@ -769,8 +768,10 @@ vm_object_page_clean(vm_object_t object, vm_pindex ++tscan; continue; } + vm_page_lock(p); vm_page_test_dirty(p); if ((p->dirty & p->valid) == 0) { + vm_page_unlock(p); if (--scanlimit == 0) break; ++tscan; @@ -781,6 +782,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex * this is a nosync page, we can't continue. */ if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) { + vm_page_unlock(p); if (--scanlimit == 0) break; ++tscan; @@ -790,7 +792,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex /* * This returns 0 if it was unable to busy the first - * page (i.e. had to sleep). + * page (i.e. had to sleep) and always unlocks p. */ tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags); } @@ -802,7 +804,6 @@ vm_object_page_clean(vm_object_t object, vm_pindex * return immediately. */ if (tscan >= tend && (tstart || tend < object->size)) { - vm_page_unlock_queues(); vm_object_clear_flag(object, OBJ_CLEANING); return; } @@ -820,10 +821,13 @@ vm_object_page_clean(vm_object_t object, vm_pindex clearobjflags = 1; TAILQ_FOREACH(p, &object->memq, listq) { p->oflags |= VPO_CLEANCHK; - if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) + if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) { clearobjflags = 0; - else + } else { + vm_page_lock(p); pmap_remove_write(p); + vm_page_unlock(p); + } } if (clearobjflags && (tstart == 0) && (tend == object->size)) { @@ -856,8 +860,10 @@ again: continue; } + vm_page_lock(p); vm_page_test_dirty(p); if ((p->dirty & p->valid) == 0) { + vm_page_unlock(p); p->oflags &= ~VPO_CLEANCHK; continue; } @@ -868,10 +874,11 @@ again: * not cleared in this case so we do not have to set them. */ if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) { + vm_page_unlock(p); p->oflags &= ~VPO_CLEANCHK; continue; } - + /* Always unlocks p. */ n = vm_object_page_collect_flush(object, p, curgeneration, pagerflags); if (n == 0) @@ -889,7 +896,6 @@ again: goto again; } } - vm_page_unlock_queues(); #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc); #endif @@ -911,14 +917,18 @@ vm_object_page_collect_flush(vm_object_t object, v vm_page_t mab[vm_pageout_page_count]; vm_page_t ma[vm_pageout_page_count]; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(p, MA_OWNED); pi = p->pindex; while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) { - vm_page_lock_queues(); if (object->generation != curgeneration) { return(0); } + vm_page_lock(p); } + vm_page_io_start(p); + pmap_remove_write(p); + vm_page_unlock(p); + maxf = 0; for(i = 1; i < vm_pageout_page_count; i++) { vm_page_t tp; @@ -929,11 +939,15 @@ vm_object_page_collect_flush(vm_object_t object, v (tp->oflags & VPO_CLEANCHK) == 0) || (tp->busy != 0)) break; + vm_page_lock(tp); vm_page_test_dirty(tp); if ((tp->dirty & tp->valid) == 0) { + vm_page_unlock(tp); tp->oflags &= ~VPO_CLEANCHK; break; } + vm_page_io_start(tp); + pmap_remove_write(tp); maf[ i - 1 ] = tp; maxf++; continue; @@ -953,11 +967,16 @@ vm_object_page_collect_flush(vm_object_t object, v (tp->oflags & VPO_CLEANCHK) == 0) || (tp->busy != 0)) break; + vm_page_lock(tp); vm_page_test_dirty(tp); if ((tp->dirty & tp->valid) == 0) { + vm_page_unlock(tp); tp->oflags &= ~VPO_CLEANCHK; break; } + vm_page_io_start(tp); + pmap_remove_write(tp); + vm_page_unlock(tp); mab[ i - 1 ] = tp; maxb++; continue; @@ -983,7 +1002,9 @@ vm_object_page_collect_flush(vm_object_t object, v vm_pageout_flush(ma, runlen, pagerflags); for (i = 0; i < runlen; i++) { if (ma[i]->valid & ma[i]->dirty) { + vm_page_lock(ma[i]); pmap_remove_write(ma[i]); + vm_page_unlock(ma[i]); ma[i]->oflags |= VPO_CLEANCHK; /* @@ -1157,17 +1178,17 @@ shadowlookup: * page queues to mess with. Things can break if we mess * with pages in any of the below states. */ - vm_page_lock_queues(); + vm_page_lock(m); if (m->hold_count || m->wire_count || (m->flags & PG_UNMANAGED) || m->valid != VM_PAGE_BITS_ALL) { - vm_page_unlock_queues(); + vm_page_unlock(m); goto unlock_tobject; } if ((m->oflags & VPO_BUSY) || m->busy) { vm_page_flag_set(m, PG_REFERENCED); - vm_page_unlock_queues(); + vm_page_unlock(m); if (object != tobject) VM_OBJECT_UNLOCK(object); m->oflags |= VPO_WANTED; @@ -1200,7 +1221,7 @@ shadowlookup: m->act_count = 0; vm_page_dontneed(m); } - vm_page_unlock_queues(); + vm_page_unlock(m); if (advise == MADV_FREE && tobject->type == OBJT_SWAP) swap_pager_freespace(tobject, tpindex, 1); unlock_tobject: @@ -1355,7 +1376,6 @@ retry: m = TAILQ_NEXT(m, listq); } } - vm_page_lock_queues(); for (; m != NULL && (idx = m->pindex - offidxstart) < size; m = m_next) { m_next = TAILQ_NEXT(m, listq); @@ -1367,9 +1387,10 @@ retry: * We do not have to VM_PROT_NONE the page as mappings should * not be changed by this operation. */ + vm_page_lock(m); if ((m->oflags & VPO_BUSY) || m->busy) { vm_page_flag_set(m, PG_REFERENCED); - vm_page_unlock_queues(); + vm_page_unlock(m); VM_OBJECT_UNLOCK(new_object); m->oflags |= VPO_WANTED; msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0); @@ -1379,8 +1400,8 @@ retry: vm_page_rename(m, new_object, idx); /* page automatically made dirty by rename and cache handled */ vm_page_busy(m); + vm_page_unlock(m); } - vm_page_unlock_queues(); if (orig_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case the orig_object's @@ -1506,9 +1527,9 @@ vm_object_backing_scan(vm_object_t object, int op) } } else if (op & OBSC_COLLAPSE_WAIT) { if ((p->oflags & VPO_BUSY) || p->busy) { - vm_page_lock_queues(); + vm_page_lock(p); vm_page_flag_set(p, PG_REFERENCED); - vm_page_unlock_queues(); + vm_page_unlock(p); VM_OBJECT_UNLOCK(object); p->oflags |= VPO_WANTED; msleep(p, VM_OBJECT_MTX(backing_object), @@ -1551,14 +1572,14 @@ vm_object_backing_scan(vm_object_t object, int op) * Page is out of the parent object's range, we * can simply destroy it. */ - vm_page_lock_queues(); + vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); - vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -1575,14 +1596,14 @@ vm_object_backing_scan(vm_object_t object, int op) * * Leave the parent's page alone */ - vm_page_lock_queues(); + vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); - vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -1602,9 +1623,9 @@ vm_object_backing_scan(vm_object_t object, int op) * If the page was mapped to a process, it can remain * mapped through the rename. */ - vm_page_lock_queues(); + vm_page_lock(p); vm_page_rename(p, object, new_pindex); - vm_page_unlock_queues(); + vm_page_unlock(p); /* page automatically made dirty by rename */ } p = next; @@ -1869,7 +1890,6 @@ vm_object_page_remove(vm_object_t object, vm_pinde vm_object_pip_add(object, 1); again: - vm_page_lock_queues(); if ((p = TAILQ_FIRST(&object->memq)) != NULL) { if (p->pindex < start) { p = vm_page_splay(start, object->root); @@ -1887,6 +1907,7 @@ again: p = next) { next = TAILQ_NEXT(p, listq); + vm_page_lock(p); /* * If the page is wired for any reason besides the * existence of managed, wired mappings, then it cannot @@ -1902,6 +1923,7 @@ again: pmap_remove_all(p); /* Account for removal of managed, wired mappings. */ p->wire_count -= wirings; + vm_page_unlock(p); if (!clean_only) p->valid = 0; continue; @@ -1912,16 +1934,18 @@ again: ("vm_object_page_remove: page %p is fictitious", p)); if (clean_only && p->valid) { pmap_remove_write(p); - if (p->valid & p->dirty) + if (p->valid & p->dirty) { + vm_page_unlock(p); continue; + } } pmap_remove_all(p); /* Account for removal of managed, wired mappings. */ if (wirings != 0) p->wire_count -= wirings; vm_page_free(p); + vm_page_unlock(p); } - vm_page_unlock_queues(); vm_object_pip_wakeup(object); skipmemq: if (__predict_false(object->cache != NULL)) Index: vm/vm_fault.c =================================================================== --- vm/vm_fault.c (revision 188509) +++ vm/vm_fault.c (working copy) @@ -136,12 +136,14 @@ struct faultstate { static inline void release_page(struct faultstate *fs) { + vm_page_t m; - vm_page_wakeup(fs->m); - vm_page_lock_queues(); - vm_page_deactivate(fs->m); - vm_page_unlock_queues(); + m = fs->m; fs->m = NULL; + vm_page_wakeup(m); + vm_page_lock(m); + vm_page_deactivate(m); + vm_page_unlock(m); } static inline void @@ -157,17 +159,19 @@ unlock_map(struct faultstate *fs) static void unlock_and_deallocate(struct faultstate *fs) { + vm_page_t m; vm_object_pip_wakeup(fs->object); VM_OBJECT_UNLOCK(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_LOCK(fs->first_object); - vm_page_lock_queues(); - vm_page_free(fs->first_m); - vm_page_unlock_queues(); + m = fs->first_m; + fs->first_m = NULL; + vm_page_lock(m); + vm_page_free(m); + vm_page_unlock(m); vm_object_pip_wakeup(fs->first_object); VM_OBJECT_UNLOCK(fs->first_object); - fs->first_m = NULL; } vm_object_deallocate(fs->first_object); unlock_map(fs); @@ -212,7 +216,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_ { vm_prot_t prot; int is_first_object_locked, result; - boolean_t are_queues_locked, growstack, wired; + boolean_t growstack, wired; int map_generation; vm_object_t next_object; vm_page_t marray[VM_FAULT_READ]; @@ -333,12 +337,12 @@ RetryFault:; * removes the page from the backing object, * which is not what we want. */ - vm_page_lock_queues(); + vm_page_lock(fs.m); if ((fs.m->cow) && (fault_type & VM_PROT_WRITE) && (fs.object == fs.first_object)) { vm_page_cowfault(fs.m); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); unlock_and_deallocate(&fs); goto RetryFault; } @@ -360,13 +364,13 @@ RetryFault:; * to pmap it. */ if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { - vm_page_unlock_queues(); + vm_page_unlock(fs.m); VM_OBJECT_UNLOCK(fs.object); if (fs.object != fs.first_object) { VM_OBJECT_LOCK(fs.first_object); - vm_page_lock_queues(); + vm_page_lock(fs.first_m); vm_page_free(fs.first_m); - vm_page_unlock_queues(); + vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_UNLOCK(fs.first_object); fs.first_m = NULL; @@ -385,7 +389,7 @@ RetryFault:; goto RetryFault; } vm_pageq_remove(fs.m); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the @@ -480,7 +484,6 @@ readrest: else firstpindex = fs.first_pindex - 2 * VM_FAULT_READ; - are_queues_locked = FALSE; /* * note: partially valid pages cannot be * included in the lookahead - NFS piecemeal @@ -497,22 +500,17 @@ readrest: if (mt->busy || (mt->oflags & VPO_BUSY)) continue; - if (!are_queues_locked) { - are_queues_locked = TRUE; - vm_page_lock_queues(); + vm_page_lock(mt); + if (mt->hold_count == 0 && + mt->wire_count == 0) { + pmap_remove_all(mt); + if (mt->dirty) + vm_page_deactivate(mt); + else + vm_page_cache(mt); } - if (mt->hold_count || - mt->wire_count) - continue; - pmap_remove_all(mt); - if (mt->dirty) { - vm_page_deactivate(mt); - } else { - vm_page_cache(mt); - } + vm_page_unlock(mt); } - if (are_queues_locked) - vm_page_unlock_queues(); ahead += behind; behind = 0; } @@ -641,17 +639,17 @@ vnode_locked: */ if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { - vm_page_lock_queues(); + vm_page_lock(fs.m); vm_page_free(fs.m); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } if (fs.object != fs.first_object) { - vm_page_lock_queues(); + vm_page_lock(fs.m); vm_page_free(fs.m); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); fs.m = NULL; /* * XXX - we cannot just fall out at this @@ -764,18 +762,20 @@ vnode_locked: * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { - vm_page_lock_queues(); /* * get rid of the unnecessary page */ + vm_page_lock(fs.first_m); vm_page_free(fs.first_m); + vm_page_unlock(fs.first_m); /* * grab the page and put it into the * process'es object. The page is * automatically made dirty. */ + vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; @@ -932,7 +932,7 @@ vnode_locked: vm_fault_prefault(fs.map->pmap, vaddr, fs.entry); } VM_OBJECT_LOCK(fs.object); - vm_page_lock_queues(); + vm_page_lock(fs.m); vm_page_flag_set(fs.m, PG_REFERENCED); /* @@ -947,7 +947,7 @@ vnode_locked: } else { vm_page_activate(fs.m); } - vm_page_unlock_queues(); + vm_page_unlock(fs.m); vm_page_wakeup(fs.m); /* @@ -1026,9 +1026,9 @@ vm_fault_prefault(pmap_t pmap, vm_offset_t addra, (m->busy == 0) && (m->flags & PG_FICTITIOUS) == 0) { - vm_page_lock_queues(); + vm_page_lock(m); pmap_enter_quick(pmap, addr, m, entry->protection); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(lobject); } @@ -1093,6 +1093,7 @@ vm_fault_unwire(vm_map_t map, vm_offset_t start, v { vm_paddr_t pa; vm_offset_t va; + vm_page_t m; pmap_t pmap; pmap = vm_map_pmap(map); @@ -1106,9 +1107,10 @@ vm_fault_unwire(vm_map_t map, vm_offset_t start, v if (pa != 0) { pmap_change_wiring(pmap, va, FALSE); if (!fictitious) { - vm_page_lock_queues(); - vm_page_unwire(PHYS_TO_VM_PAGE(pa), 1); - vm_page_unlock_queues(); + m = PHYS_TO_VM_PAGE(pa); + vm_page_lock(m); + vm_page_unwire(m, 1); + vm_page_unlock(m); } } } @@ -1225,9 +1227,9 @@ vm_fault_copy_entry(dst_map, src_map, dst_entry, s * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_LOCK(dst_object); - vm_page_lock_queues(); + vm_page_lock(dst_m); vm_page_activate(dst_m); - vm_page_unlock_queues(); + vm_page_unlock(dst_m); vm_page_wakeup(dst_m); } VM_OBJECT_UNLOCK(dst_object); Index: vm/device_pager.c =================================================================== --- vm/device_pager.c (revision 188509) +++ vm/device_pager.c (working copy) @@ -242,12 +242,13 @@ dev_pager_getpages(object, m, count, reqpage) VM_OBJECT_LOCK(object); dev_pager_updatefake(m[reqpage], paddr); if (count > 1) { - vm_page_lock_queues(); for (i = 0; i < count; i++) { - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); + vm_page_unlock(m[i]); + } } - vm_page_unlock_queues(); } } else { /* @@ -257,10 +258,11 @@ dev_pager_getpages(object, m, count, reqpage) page = dev_pager_getfake(paddr); VM_OBJECT_LOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, page, pageq); - vm_page_lock_queues(); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } vm_page_insert(page, object, offset); m[reqpage] = page; } Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (revision 188509) +++ vm/vm_page.c (working copy) @@ -406,7 +406,7 @@ void vm_page_flag_set(vm_page_t m, unsigned short bits) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); m->flags |= bits; } @@ -414,7 +414,7 @@ void vm_page_flag_clear(vm_page_t m, unsigned short bits) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); m->flags &= ~bits; } @@ -489,7 +489,7 @@ void vm_page_hold(vm_page_t mem) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(mem, MA_OWNED); mem->hold_count++; } @@ -497,7 +497,7 @@ void vm_page_unhold(vm_page_t mem) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(mem, MA_OWNED); --mem->hold_count; KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); if (mem->hold_count == 0 && VM_PAGE_INQUEUE2(mem, PQ_HOLD)) @@ -542,10 +542,10 @@ vm_page_sleep(vm_page_t m, const char *msg) { VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); - if (!mtx_owned(&vm_page_queue_mtx)) - vm_page_lock_queues(); + if (!mtx_owned(vm_page_lockptr(m))) + vm_page_lock(m); vm_page_flag_set(m, PG_REFERENCED); - vm_page_unlock_queues(); + vm_page_unlock(m); /* * It's possible that while we sleep, the page will get @@ -728,7 +728,7 @@ vm_page_remove(vm_page_t m) m->oflags &= ~VPO_BUSY; vm_page_flash(m); } - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); /* * Now remove from the object's list of backed pages. @@ -1236,11 +1236,13 @@ vm_waitpfault(void) * The page queues must be locked. */ void -vm_page_requeue(vm_page_t m) +vm_page_requeue_locked(vm_page_t m) { - int queue = VM_PAGE_GETQUEUE(m); + int queue; struct vpgqueues *vpq; + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + queue = VM_PAGE_GETQUEUE(m); if (queue != PQ_NONE) { vpq = &vm_page_queues[queue]; TAILQ_REMOVE(&vpq->pl, m, pageq); @@ -1248,6 +1250,14 @@ void } } +void +vm_page_requeue(vm_page_t m) +{ + vm_page_lock_queues(); + vm_page_requeue_locked(m); + vm_page_unlock_queues(); +} + /* * vm_pageq_remove: * @@ -1257,11 +1267,13 @@ void * This routine may not block. */ void -vm_pageq_remove(vm_page_t m) +vm_pageq_remove_locked(vm_page_t m) { - int queue = VM_PAGE_GETQUEUE(m); + int queue; struct vpgqueues *pq; + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + queue = VM_PAGE_GETQUEUE(m); if (queue != PQ_NONE) { VM_PAGE_SETQUEUE2(m, PQ_NONE); pq = &vm_page_queues[queue]; @@ -1269,6 +1281,13 @@ void (*pq->cnt)--; } } +void +vm_pageq_remove(vm_page_t m) +{ + vm_page_lock_queues(); + vm_pageq_remove_locked(m); + vm_page_unlock_queues(); +} /* * vm_page_enqueue: @@ -1278,16 +1297,25 @@ void * The page queues must be locked. */ static void -vm_page_enqueue(int queue, vm_page_t m) +vm_page_enqueue_locked(int queue, vm_page_t m) { struct vpgqueues *vpq; + mtx_assert(&vm_page_queue_mtx, MA_OWNED); vpq = &vm_page_queues[queue]; VM_PAGE_SETQUEUE2(m, queue); TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); ++*vpq->cnt; } +static void +vm_page_enqueue(int queue, vm_page_t m) +{ + vm_page_lock_queues(); + vm_page_enqueue_locked(queue, m); + vm_page_unlock_queues(); +} + /* * vm_page_activate: * @@ -1299,9 +1327,10 @@ static void * This routine may not block. */ void -vm_page_activate(vm_page_t m) +vm_page_activate_locked(vm_page_t m) { + vm_page_lock_assert(m, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (VM_PAGE_GETKNOWNQUEUE2(m) != PQ_ACTIVE) { vm_pageq_remove(m); @@ -1316,6 +1345,14 @@ void } } +void +vm_page_activate(vm_page_t m) +{ + vm_page_lock_queues(); + vm_page_activate_locked(m); + vm_page_unlock_queues(); +} + /* * vm_page_free_wakeup: * @@ -1365,11 +1402,14 @@ void vm_page_free_toq(vm_page_t m) { - if (VM_PAGE_GETQUEUE(m) != PQ_NONE) - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - KASSERT(!pmap_page_is_mapped(m), - ("vm_page_free_toq: freeing mapped page %p", m)); PCPU_INC(cnt.v_tfree); +#ifdef INVARIANTS + if (VM_PAGE_GETQUEUE(m) != PQ_NONE || m->object != NULL || + m->hold_count) { + vm_page_lock_assert(m, MA_OWNED); + KASSERT(!pmap_page_is_mapped(m), + ("vm_page_free_toq: freeing mapped page %p", m)); + } if (m->busy || VM_PAGE_IS_FREE(m)) { printf( @@ -1381,6 +1421,10 @@ vm_page_free_toq(vm_page_t m) else panic("vm_page_free: freeing busy page"); } + KASSERT(m->wire_count == 0, + ("vm_page_free: freeing wired page. Count: %d, pindex: 0x%lx", + m->wire_count, (long)m->pindex)); +#endif /* * unqueue, then remove page. Note that we cannot destroy @@ -1388,7 +1432,6 @@ vm_page_free_toq(vm_page_t m) * callback routine until after we've put the page on the * appropriate free queue. */ - vm_pageq_remove(m); vm_page_remove(m); /* @@ -1396,23 +1439,21 @@ vm_page_free_toq(vm_page_t m) * return, otherwise delay object association removal. */ if ((m->flags & PG_FICTITIOUS) != 0) { + vm_pageq_remove(m); return; } m->valid = 0; vm_page_undirty(m); - if (m->wire_count != 0) { - if (m->wire_count > 1) { - panic("vm_page_free: invalid wire count (%d), pindex: 0x%lx", - m->wire_count, (long)m->pindex); - } - panic("vm_page_free: freeing wired page"); - } if (m->hold_count != 0) { m->flags &= ~PG_ZERO; - vm_page_enqueue(PQ_HOLD, m); + vm_page_lock_queues(); + vm_pageq_remove_locked(m); + vm_page_enqueue_locked(PQ_HOLD, m); + vm_page_unlock_queues(); } else { + vm_pageq_remove(m); mtx_lock(&vm_page_queue_free_mtx); m->flags |= PG_FREE; cnt.v_free_count++; @@ -1450,7 +1491,7 @@ vm_page_wire(vm_page_t m) * and only unqueue the page if it is on some queue (if it is unmanaged * it is already off the queues). */ - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->flags & PG_FICTITIOUS) return; if (m->wire_count == 0) { @@ -1494,7 +1535,7 @@ void vm_page_unwire(vm_page_t m, int activate) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->flags & PG_FICTITIOUS) return; if (m->wire_count > 0) { @@ -1531,7 +1572,7 @@ _vm_page_deactivate(vm_page_t m, int athead) { mtx_assert(&vm_page_queue_mtx, MA_OWNED); - + vm_page_lock_assert(m, MA_OWNED); /* * Ignore if already inactive. */ @@ -1539,7 +1580,7 @@ _vm_page_deactivate(vm_page_t m, int athead) return; if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { vm_page_flag_clear(m, PG_WINATCFLS); - vm_pageq_remove(m); + vm_pageq_remove_locked(m); if (athead) TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); else @@ -1550,9 +1591,19 @@ _vm_page_deactivate(vm_page_t m, int athead) } void +vm_page_deactivate_locked(vm_page_t m) +{ + + _vm_page_deactivate(m, 0); +} + +void vm_page_deactivate(vm_page_t m) { - _vm_page_deactivate(m, 0); + + vm_page_lock_queues(); + _vm_page_deactivate(m, 0); + vm_page_unlock_queues(); } /* @@ -1564,7 +1615,7 @@ int vm_page_try_to_cache(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if (m->dirty || m->hold_count || m->busy || m->wire_count || (m->oflags & VPO_BUSY) || (m->flags & PG_UNMANAGED)) { @@ -1587,7 +1638,7 @@ int vm_page_try_to_free(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->object != NULL) VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if (m->dirty || m->hold_count || m->busy || m->wire_count || @@ -1614,7 +1665,7 @@ vm_page_cache(vm_page_t m) vm_object_t object; vm_page_t root; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if ((m->flags & PG_UNMANAGED) || (m->oflags & VPO_BUSY) || m->busy || @@ -1740,7 +1791,7 @@ vm_page_dontneed(vm_page_t m) int dnw; int head; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); dnw = ++dnweight; /* @@ -1776,7 +1827,9 @@ vm_page_dontneed(vm_page_t m) */ head = 1; } + vm_page_lock_queues(); _vm_page_deactivate(m, head); + vm_page_unlock_queues(); } /* @@ -1801,9 +1854,9 @@ retrylookup: goto retrylookup; } else { if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wire(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } if ((allocflags & VM_ALLOC_NOBUSY) == 0) vm_page_busy(m); @@ -1870,7 +1923,7 @@ vm_page_set_validclean(vm_page_t m, int base, int int frag; int endoff; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if (size == 0) /* handle degenerate case */ return; @@ -1929,7 +1982,7 @@ void vm_page_clear_dirty(vm_page_t m, int base, int size) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); m->dirty &= ~vm_page_bits(base, size); } @@ -1948,7 +2001,7 @@ vm_page_set_invalid(vm_page_t m, int base, int siz VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); bits = vm_page_bits(base, size); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->valid == VM_PAGE_BITS_ALL && bits != 0) pmap_remove_all(m); m->valid &= ~bits; @@ -2050,6 +2103,8 @@ vm_page_cowfault(vm_page_t m) vm_object_t object; vm_pindex_t pindex; + /* XXX Not properly locked. */ + panic("vm_page_cowfault: Not properly locked\n"); object = m->object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->paging_in_progress != 0, @@ -2063,18 +2118,18 @@ vm_page_cowfault(vm_page_t m) mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); if (mnew == NULL) { vm_page_insert(m, object, pindex); - vm_page_unlock_queues(); + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); if (m == vm_page_lookup(object, pindex)) { - vm_page_lock_queues(); + vm_page_lock(m); goto retry_alloc; } else { /* * Page disappeared during the wait. */ - vm_page_lock_queues(); + vm_page_lock(m); return; } } @@ -2101,7 +2156,7 @@ void vm_page_cowclear(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->cow) { m->cow--; /* @@ -2117,7 +2172,7 @@ int vm_page_cowsetup(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->cow == USHRT_MAX - 1) return (EBUSY); m->cow++; Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (revision 188509) +++ vm/vm_page.h (working copy) @@ -265,7 +265,14 @@ PHYS_TO_VM_PAGE(vm_paddr_t pa) extern struct mtx vm_page_queue_mtx; #define vm_page_lock_queues() mtx_lock(&vm_page_queue_mtx) #define vm_page_unlock_queues() mtx_unlock(&vm_page_queue_mtx) +#define vm_page_trylock_queues() mtx_trylock(&vm_page_queue_mtx) +#define vm_page_lockptr(m) pmap_page_lockptr(m) +#define vm_page_lock(m) mtx_lock(vm_page_lockptr((m))) +#define vm_page_unlock(m) mtx_unlock(vm_page_lockptr((m))) +#define vm_page_trylock(m) mtx_trylock(vm_page_lockptr((m))) +#define vm_page_lock_assert(m, a) mtx_assert(vm_page_lockptr((m)), (a)) + #if PAGE_SIZE == 4096 #define VM_PAGE_BITS_ALL 0xffu #elif PAGE_SIZE == 8192 @@ -304,8 +311,10 @@ void vm_page_dirty(vm_page_t m); void vm_page_wakeup(vm_page_t m); void vm_pageq_remove(vm_page_t m); +void vm_pageq_remove_locked(vm_page_t m); void vm_page_activate (vm_page_t); +void vm_page_activate_locked (vm_page_t); vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache (register vm_page_t); @@ -316,11 +325,13 @@ int vm_page_try_to_cache (vm_page_t); int vm_page_try_to_free (vm_page_t); void vm_page_dontneed (register vm_page_t); void vm_page_deactivate (vm_page_t); +void vm_page_deactivate_locked (vm_page_t); void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); void vm_page_remove (vm_page_t); void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); void vm_page_requeue(vm_page_t m); +void vm_page_requeue_locked(vm_page_t m); void vm_page_sleep(vm_page_t m, const char *msg); vm_page_t vm_page_splay(vm_pindex_t, vm_page_t); vm_offset_t vm_page_startup(vm_offset_t vaddr); @@ -342,7 +353,7 @@ void vm_page_cowclear (vm_page_t); /* * vm_page_sleep_if_busy: * - * Sleep and release the page queues lock if VPO_BUSY is set or, + * Sleep and release the page lock if VPO_BUSY is set or, * if also_m_busy is TRUE, busy is non-zero. Returns TRUE if the * thread slept and the page queues lock was released. * Otherwise, retains the page queues lock and returns FALSE. Index: vm/vm_contig.c =================================================================== --- vm/vm_contig.c (revision 188509) +++ vm/vm_contig.c (working copy) @@ -103,6 +103,11 @@ vm_contig_launder_page(vm_page_t m, vm_page_t *nex VM_OBJECT_UNLOCK(object); return (EAGAIN); } + if (vm_page_trylock(m) == 0) { + VM_OBJECT_UNLOCK(object); + return (EAGAIN); + } + vm_page_unlock_queues(); if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) { VM_OBJECT_UNLOCK(object); vm_page_lock_queues(); @@ -113,11 +118,13 @@ vm_contig_launder_page(vm_page_t m, vm_page_t *nex pmap_remove_all(m); if (m->dirty) { if ((object->flags & OBJ_DEAD) != 0) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); + vm_page_lock_queues(); return (EAGAIN); } if (object->type == OBJT_VNODE) { - vm_page_unlock_queues(); + vm_page_unlock(m); vp = object->handle; vm_object_reference_locked(object); VM_OBJECT_UNLOCK(object); @@ -138,11 +145,14 @@ vm_contig_launder_page(vm_page_t m, vm_page_t *nex m_tmp = m; vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC); VM_OBJECT_UNLOCK(object); + vm_page_lock_queues(); return (0); } } else if (m->hold_count == 0) vm_page_cache(m); + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); + vm_page_lock_queues(); return (0); } Index: vm/vnode_pager.c =================================================================== --- vm/vnode_pager.c (revision 188509) +++ vm/vnode_pager.c (working copy) @@ -415,11 +415,11 @@ vnode_pager_setsize(vp, nsize) * bits. This would prevent bogus_page * replacement from working properly. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_set_validclean(m, base, size); if (m->dirty != 0) m->dirty = VM_PAGE_BITS_ALL; - vm_page_unlock_queues(); + vm_page_unlock(m); } else if ((nsize & PAGE_MASK) && __predict_false(object->cache != NULL)) { vm_page_cache_free(object, OFF_TO_IDX(nsize), @@ -545,23 +545,23 @@ vnode_pager_input_smlfs(object, m) break; VM_OBJECT_LOCK(object); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize); - vm_page_unlock_queues(); + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); } else { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize); - vm_page_unlock_queues(); + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize); } } sf_buf_free(sf); - vm_page_lock_queues(); + vm_page_lock(m); pmap_clear_modify(m); - vm_page_unlock_queues(); + vm_page_unlock(m); if (error) { return VM_PAGER_ERROR; } @@ -630,10 +630,10 @@ vnode_pager_input_old(object, m) VM_OBJECT_LOCK(object); } - vm_page_lock_queues(); + vm_page_lock(m); pmap_clear_modify(m); vm_page_undirty(m); - vm_page_unlock_queues(); + vm_page_unlock(m); if (!error) m->valid = VM_PAGE_BITS_ALL; return error ? VM_PAGER_ERROR : VM_PAGER_OK; @@ -721,11 +721,12 @@ vnode_pager_generic_getpages(vp, m, bytecount, req error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL); if (error == EOPNOTSUPP) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } PCPU_INC(cnt.v_vnodein); PCPU_INC(cnt.v_vnodepgsin); error = vnode_pager_input_old(object, m[reqpage]); @@ -733,11 +734,12 @@ vnode_pager_generic_getpages(vp, m, bytecount, req return (error); } else if (error != 0) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); @@ -749,11 +751,12 @@ vnode_pager_generic_getpages(vp, m, bytecount, req } else if ((PAGE_SIZE / bsize) > 1 && (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); PCPU_INC(cnt.v_vnodein); PCPU_INC(cnt.v_vnodepgsin); @@ -767,22 +770,24 @@ vnode_pager_generic_getpages(vp, m, bytecount, req */ VM_OBJECT_LOCK(object); if (m[reqpage]->valid == VM_PAGE_BITS_ALL) { - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return VM_PAGER_OK; } else if (reqblock == -1) { pmap_zero_page(m[reqpage]); vm_page_undirty(m[reqpage]); m[reqpage]->valid = VM_PAGE_BITS_ALL; - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_OK); } @@ -801,11 +806,12 @@ vnode_pager_generic_getpages(vp, m, bytecount, req if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr, &runpg) != 0) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); } @@ -819,9 +825,9 @@ vnode_pager_generic_getpages(vp, m, bytecount, req (object->un_pager.vnp.vnp_size >> 32), (uintmax_t)object->un_pager.vnp.vnp_size); } - vm_page_lock_queues(); + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); VM_OBJECT_UNLOCK(object); runend = i + 1; first = runend; @@ -830,18 +836,20 @@ vnode_pager_generic_getpages(vp, m, bytecount, req runend = i + runpg; if (runend <= reqpage) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); - for (j = i; j < runend; j++) + for (j = i; j < runend; j++) { + vm_page_lock(m[j]); vm_page_free(m[j]); - vm_page_unlock_queues(); + vm_page_unlock(m[j]); + } VM_OBJECT_UNLOCK(object); } else { if (runpg < (count - first)) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); - for (i = first + runpg; i < count; i++) + for (i = first + runpg; i < count; i++) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); count = first + runpg; } @@ -932,13 +940,13 @@ vnode_pager_generic_getpages(vp, m, bytecount, req relpbuf(bp, &vnode_pbuf_freecnt); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) { vm_page_t mt; nextoff = tfoff + PAGE_SIZE; mt = m[i]; + vm_page_lock(mt); if (nextoff <= object->un_pager.vnp.vnp_size) { /* * Read filled up entire page. @@ -987,8 +995,8 @@ vnode_pager_generic_getpages(vp, m, bytecount, req vm_page_free(mt); } } + vm_page_unlock(mt); } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); if (error) { printf("vnode_pager_getpages: I/O read error\n"); @@ -1114,10 +1122,13 @@ vnode_pager_generic_putpages(vp, m, bytecount, fla maxsize = object->un_pager.vnp.vnp_size - poffset; ncount = btoc(maxsize); if ((pgoff = (int)maxsize & PAGE_MASK) != 0) { - vm_page_lock_queues(); - vm_page_clear_dirty(m[ncount - 1], pgoff, + vm_page_t p; + + p = m[ncount - 1]; + vm_page_lock(p); + vm_page_clear_dirty(p, pgoff, PAGE_SIZE - pgoff); - vm_page_unlock_queues(); + vm_page_unlock(p); } } else { maxsize = 0; Index: vm/uma_core.c =================================================================== --- vm/uma_core.c (revision 188509) +++ vm/uma_core.c (working copy) @@ -1022,10 +1022,10 @@ obj_alloc(uma_zone_t zone, int bytes, u_int8_t *fl while (pages != startpages) { pages--; p = TAILQ_LAST(&object->memq, pglist); - vm_page_lock_queues(); + vm_page_lock(p); vm_page_unwire(p, 0); vm_page_free(p); - vm_page_unlock_queues(); + vm_page_unlock(p); } retkva = 0; goto done; Index: amd64/include/pmap.h =================================================================== --- amd64/include/pmap.h (revision 188509) +++ amd64/include/pmap.h (working copy) @@ -251,6 +251,7 @@ struct pmap { /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ vm_page_t pm_root; /* spare page table pages */ + vm_page_t pm_free; /* Temporary free pages. */ }; typedef struct pmap *pmap_t; Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (revision 188509) +++ amd64/amd64/pmap.c (working copy) @@ -168,6 +168,17 @@ __FBSDID("$FreeBSD$"); #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) +#define PA_LOCKPTR(pa) &pa_lock[pa_index((pa)) % PA_LOCK_COUNT] +#define PA_LOCK(pa) mtx_lock(PA_LOCKPTR(pa)) +#define PA_TRYLOCK(pa) mtx_trylock(PA_LOCKPTR(pa)) +#define PA_UNLOCK(pa) mtx_unlock(PA_LOCKPTR(pa)) +#define PA_LOCK_ASSERT(pa, a) mtx_assert(PA_LOCKPTR(pa), (a)) + +#define PA_LOCK_COUNT 64 + +struct mtx pa_lock[PA_LOCK_COUNT]; +struct mtx pv_lock; + struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ @@ -184,6 +195,14 @@ static int pg_ps_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RD, &pg_ps_enabled, 0, "Are large page mappings enabled?"); +static int pmap_tryrelock_calls; +SYSCTL_INT(_vm_pmap, OID_AUTO, tryrelock_calls, CTLFLAG_RD, + &pmap_tryrelock_calls, 0, "Number of tryrelock calls"); + +static int pmap_tryrelock_restart; +SYSCTL_INT(_vm_pmap, OID_AUTO, tryrelock_restart, CTLFLAG_RD, + &pmap_tryrelock_restart, 0, "Number of tryrelock restarts"); + static u_int64_t KPTphys; /* phys addr of kernel level 1 */ static u_int64_t KPDphys; /* phys addr of kernel level 2 */ u_int64_t KPDPphys; /* phys addr of kernel level 3 */ @@ -252,10 +271,13 @@ static void pmap_insert_entry(pmap_t pmap, vm_offs static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); -static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags); -static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); +static vm_page_t pmap_allocpde(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, + int flags); +static vm_page_t pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, + int flags); -static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags); +static vm_page_t _pmap_allocpte(pmap_t pmap, vm_paddr_t pa, + vm_pindex_t ptepindex, int flags); static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t* free); static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, vm_page_t *); @@ -413,6 +435,37 @@ vtopde(vm_offset_t va) return (PDmap + ((va >> PDRSHIFT) & mask)); } +/* + * Try to acquire a physical address lock while a pmap is locked. If we + * fail to trylock we unlock and lock the pmap directly and cache the + * locked pa in *locked. The caller should then restart their loop in case + * the virtual to physical mapping has changed. + */ +static int +pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) +{ + vm_paddr_t lockpa; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + atomic_add_int((volatile int *)&pmap_tryrelock_calls, 1); + lockpa = *locked; + *locked = pa; + if (lockpa) { + PA_LOCK_ASSERT(lockpa, MA_OWNED); + if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) + return (0); + PA_UNLOCK(lockpa); + } + if (PA_TRYLOCK(pa)) + return 0; + PMAP_UNLOCK(pmap); + PA_LOCK(pa); + PMAP_LOCK(pmap); + atomic_add_int((volatile int *)&pmap_tryrelock_restart, 1); + + return (EAGAIN); +} + static u_int64_t allocpages(vm_paddr_t *firstaddr, int n) { @@ -522,6 +575,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) { vm_offset_t va; pt_entry_t *pte, *unused; + int i; /* * Create an initial set of page tables to run the kernel in. @@ -544,6 +598,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys); kernel_pmap->pm_root = NULL; + kernel_pmap->pm_free = NULL; kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); @@ -580,6 +635,11 @@ pmap_bootstrap(vm_paddr_t *firstaddr) /* Initialize the PAT MSR. */ pmap_init_pat(); + + /* Setup page locks. */ + for (i = 0; i < PA_LOCK_COUNT; i++) + mtx_init(&pa_lock[i], "page lock", NULL, MTX_DEF | MTX_RECURSE); + mtx_init(&pv_lock, "pv list lock", NULL, MTX_DEF); } /* @@ -634,6 +694,13 @@ pmap_page_init(vm_page_t m) TAILQ_INIT(&m->md.pv_list); } +struct mtx * +pmap_page_lockptr(vm_page_t m) +{ + KASSERT(m != NULL, ("pmap_page_lockptr: NULL page")); + return (PA_LOCKPTR(VM_PAGE_TO_PHYS(m))); +} + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -1023,29 +1090,35 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, { pd_entry_t pde, *pdep; pt_entry_t pte; + vm_paddr_t pa; vm_page_t m; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | - (va & PDRMASK)); + if (pa_tryrelock(pmap, pde & PG_PS_FRAME, &pa)) + goto retry; + m = PHYS_TO_VM_PAGE(pa | (va & PDRMASK)); vm_page_hold(m); } } else { pte = *pmap_pde_to_pte(pdep, va); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - m = PHYS_TO_VM_PAGE(pte & PG_FRAME); + if (pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; + m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } } - vm_page_unlock_queues(); + if (pa) + PA_UNLOCK(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1398,6 +1471,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys); pmap->pm_root = NULL; + pmap->pm_free = NULL; pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1435,6 +1509,7 @@ pmap_pinit(pmap_t pmap) pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; pmap->pm_root = NULL; + pmap->pm_free = NULL; pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1452,7 +1527,7 @@ pmap_pinit(pmap_t pmap) * race conditions. */ static vm_page_t -_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags) +_pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_pindex_t ptepindex, int flags) { vm_page_t m, pdppg, pdpg; @@ -1467,9 +1542,9 @@ static vm_page_t VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (flags & M_WAITOK) { PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); + PA_UNLOCK(pa); VM_WAIT; - vm_page_lock_queues(); + PA_LOCK(pa); PMAP_LOCK(pmap); } @@ -1511,7 +1586,7 @@ static vm_page_t pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pdp, recurse */ - if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index, + if (_pmap_allocpte(pmap, pa, NUPDE + NUPDPE + pml4index, flags) == NULL) { --m->wire_count; vm_page_free(m); @@ -1543,7 +1618,7 @@ static vm_page_t pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pd, recurse */ - if (_pmap_allocpte(pmap, NUPDE + pdpindex, + if (_pmap_allocpte(pmap, pa, NUPDE + pdpindex, flags) == NULL) { --m->wire_count; vm_page_free(m); @@ -1556,7 +1631,7 @@ static vm_page_t pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; if ((*pdp & PG_V) == 0) { /* Have to allocate a new pd, recurse */ - if (_pmap_allocpte(pmap, NUPDE + pdpindex, + if (_pmap_allocpte(pmap, pa, NUPDE + pdpindex, flags) == NULL) { --m->wire_count; vm_page_free(m); @@ -1579,7 +1654,7 @@ static vm_page_t } static vm_page_t -pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags) +pmap_allocpde(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, int flags) { vm_pindex_t pdpindex, ptepindex; pdp_entry_t *pdpe; @@ -1598,7 +1673,7 @@ retry: /* Allocate a pd page. */ ptepindex = pmap_pde_pindex(va); pdpindex = ptepindex >> NPDPEPGSHIFT; - pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, flags); + pdpg = _pmap_allocpte(pmap, pa, NUPDE + pdpindex, flags); if (pdpg == NULL && (flags & M_WAITOK)) goto retry; } @@ -1606,10 +1681,11 @@ retry: } static vm_page_t -pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) +pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, int flags) { vm_pindex_t ptepindex; pd_entry_t *pd; + vm_paddr_t lockedpa; vm_page_t m; KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || @@ -1619,6 +1695,7 @@ static vm_page_t /* * Calculate pagetable page index */ + lockedpa = pa; ptepindex = pmap_pde_pindex(va); retry: /* @@ -1631,6 +1708,8 @@ retry: * normal 4K page. */ if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + if (pa_tryrelock(pmap, *pd & PG_FRAME, &lockedpa)) + goto retry; if (!pmap_demote_pde(pmap, pd, va)) { /* * Invalidation of the 2MB page mapping may have caused @@ -1639,6 +1718,13 @@ retry: pd = NULL; } } + if (pa) { + if (pa_tryrelock(pmap, pa, &lockedpa)) + goto retry; + } else if (lockedpa) { + PA_UNLOCK(lockedpa); + lockedpa = 0; + } /* * If the page table page is mapped, we just increment the @@ -1652,7 +1738,7 @@ retry: * Here if the pte page isn't mapped, or if it has been * deallocated. */ - m = _pmap_allocpte(pmap, ptepindex, flags); + m = _pmap_allocpte(pmap, pa, ptepindex, flags); if (m == NULL && (flags & M_WAITOK)) goto retry; } @@ -1869,9 +1955,14 @@ pmap_collect(pmap_t locked_pmap, struct vpgqueues vm_offset_t va; vm_page_t m, free; + vm_page_lock_queues(); TAILQ_FOREACH(m, &vpq->pl, pageq) { - if (m->hold_count || m->busy) + if (m->hold_count || m->busy || vm_page_trylock(m) == 0) continue; + if (m->hold_count || m->busy) { + vm_page_unlock(m); + continue; + } TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); @@ -1906,7 +1997,9 @@ pmap_collect(pmap_t locked_pmap, struct vpgqueues if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } + vm_page_unlock(m); } + vm_page_unlock_queues(); } @@ -1920,8 +2013,8 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) struct pv_chunk *pc; int idx, field, bit; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_lock(&pv_lock); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; @@ -1934,8 +2027,10 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || - pc->pc_map[2] != PC_FREE2) + pc->pc_map[2] != PC_FREE2) { + mtx_unlock(&pv_lock); return; + } PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); @@ -1943,7 +2038,8 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); - vm_page_unwire(m, 0); + m->wire_count--; + mtx_unlock(&pv_lock); vm_page_free(m); } @@ -1964,7 +2060,7 @@ get_pv_entry(pmap_t pmap, int try) vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + mtx_lock(&pv_lock); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) @@ -1992,6 +2088,7 @@ retry: TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(pv_entry_spare--); + mtx_unlock(&pv_lock); return (pv); } } @@ -2003,6 +2100,7 @@ retry: if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); + mtx_unlock(&pv_lock); return (NULL); } /* @@ -2033,6 +2131,7 @@ retry: pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); + mtx_unlock(&pv_lock); return (pv); } @@ -2047,7 +2146,6 @@ pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, { pv_entry_t pv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); @@ -2070,7 +2168,7 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm vm_offset_t va_last; vm_page_t m; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PA_LOCK_ASSERT(pa, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 2mpage aligned")); @@ -2108,7 +2206,7 @@ pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, v vm_offset_t va_last; vm_page_t m; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PA_LOCK_ASSERT(pa, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 2mpage aligned")); @@ -2154,7 +2252,7 @@ pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_off { struct md_page *pvh; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -2173,7 +2271,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_ pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); @@ -2188,7 +2286,7 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; @@ -2207,7 +2305,7 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm struct md_page *pvh; pv_entry_t pv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PA_LOCK_ASSERT(pa, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; @@ -2361,6 +2459,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_o pmap_invalidate_page(kernel_pmap, sva); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; if (oldpde & PG_MANAGED) { + PA_LOCK_ASSERT(oldpde & PG_PS_FRAME, MA_OWNED); pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); eva = sva + NBPDR; @@ -2392,6 +2491,9 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_o return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free)); } +#define PMAP_REMOVE_LAST 0x1 +#define PMAP_REMOVE_UNLOCKED 0x2 + /* * pmap_remove_pte: do the things to unmap a page in a process */ @@ -2401,8 +2503,10 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_o { pt_entry_t oldpte; vm_page_t m; + int ret; PMAP_LOCK_ASSERT(pmap, MA_OWNED); + ret = 0; oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; @@ -2413,15 +2517,23 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_o if (oldpte & PG_G) pmap_invalidate_page(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; + if (pmap_unuse_pt(pmap, va, ptepde, free)) + ret = PMAP_REMOVE_LAST; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); + if (vm_page_trylock(m) == 0) { + PMAP_UNLOCK(pmap); + vm_page_lock(m); + PMAP_LOCK(pmap); + } if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); pmap_remove_entry(pmap, m, va); + vm_page_unlock(m); } - return (pmap_unuse_pt(pmap, va, ptepde, free)); + return (ret); } /* @@ -2457,6 +2569,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse pd_entry_t ptpaddr, *pde; pt_entry_t *pte; vm_page_t free = NULL; + vm_paddr_t pa; int anyvalid; /* @@ -2465,11 +2578,12 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse if (pmap->pm_stats.resident_count == 0) return; + pa = 0; anyvalid = 0; - vm_page_lock_queues(); PMAP_LOCK(pmap); +restart: /* * special handling of removing one page. a very * common operation and easy to short circuit some @@ -2524,6 +2638,10 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { + if (pa_tryrelock(pmap, ptpaddr & PG_FRAME, &pa)) { + va_next = sva; + continue; + } /* * Are we removing the entire large page? If not, * demote the mapping and fall through. @@ -2540,9 +2658,13 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse } else if (!pmap_demote_pde(pmap, pde, sva)) { /* The large page mapping was destroyed. */ continue; - } else - ptpaddr = *pde; + } + ptpaddr = *pde; } + if (pa) { + PA_UNLOCK(pa); + pa = 0; + } /* * Limit our scan to either the end of the va represented @@ -2554,6 +2676,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { + int ret; if (*pte == 0) continue; @@ -2563,14 +2686,20 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse */ if ((*pte & PG_G) == 0) anyvalid = 1; - if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free)) + ret = pmap_remove_pte(pmap, pte, sva, ptpaddr, &free); + if (ret & PMAP_REMOVE_LAST) break; + if (ret & PMAP_REMOVE_UNLOCKED) { + va_next = sva + PAGE_SIZE; + goto restart; + } } } + if (pa) + PA_UNLOCK(pa); out: if (anyvalid) pmap_invalidate_all(pmap); - vm_page_unlock_queues(); PMAP_UNLOCK(pmap); pmap_free_zero_pages(free); } @@ -2601,7 +2730,7 @@ pmap_remove_all(vm_page_t m) KASSERT((m->flags & PG_FICTITIOUS) == 0, ("pmap_remove_all: page %p is fictitious", m)); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { va = pv->pv_va; @@ -2702,6 +2831,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offs pd_entry_t ptpaddr, *pde; pt_entry_t *pte; int anychanged; + vm_paddr_t pa; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); @@ -2712,10 +2842,10 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offs (VM_PROT_WRITE|VM_PROT_EXECUTE)) return; + pa = 0; anychanged = 0; - - vm_page_lock_queues(); PMAP_LOCK(pmap); +restart: for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); @@ -2783,6 +2913,8 @@ retry: continue; if (pbits & PG_MANAGED) { m = NULL; + if (pa_tryrelock(pmap, pbits & PG_FRAME, &pa)) + goto restart; if (pbits & PG_A) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_flag_set(m, PG_REFERENCED); @@ -2811,9 +2943,10 @@ retry: } } } + if (pa) + PA_UNLOCK(pa); if (anychanged) pmap_invalidate_all(pmap); - vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -2949,7 +3082,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t vm_paddr_t pa; pd_entry_t *pde; pt_entry_t *pte; - vm_paddr_t opa; + vm_paddr_t opa, lockedpa; pt_entry_t origpte, newpte; vm_page_t mpte, om; boolean_t invlva; @@ -2961,16 +3094,16 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t mpte = NULL; - vm_page_lock_queues(); + lockedpa = pa = VM_PAGE_TO_PHYS(m); + PA_LOCK(pa); PMAP_LOCK(pmap); - +restart: /* * In the case that a page table page is not * resident, we are creating it here. */ - if (va < VM_MAXUSER_ADDRESS) { - mpte = pmap_allocpte(pmap, va, M_WAITOK); - } + if (va < VM_MAXUSER_ADDRESS) + mpte = pmap_allocpte(pmap, lockedpa, va, M_WAITOK); pde = pmap_pde(pmap, va); if (pde != NULL && (*pde & PG_V) != 0) { @@ -2980,10 +3113,11 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t } else panic("pmap_enter: invalid page directory va=%#lx", va); - pa = VM_PAGE_TO_PHYS(m); om = NULL; origpte = *pte; opa = origpte & PG_FRAME; + if (pa_tryrelock(pmap, opa ? opa : pa, &lockedpa)) + goto restart; /* * Mapping has not changed, must be protection or wiring change. @@ -3021,11 +3155,18 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t * handle validating new mapping. */ if (opa) { + origpte = pte_load_clear(pte); if (origpte & PG_W) pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); + vm_page_lock_assert(om, MA_OWNED); pmap_remove_entry(pmap, om, va); + if (origpte & PG_A) + vm_page_flag_set(om, PG_REFERENCED); + if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + vm_page_dirty(om); + om = NULL; } if (mpte != NULL) { mpte->wire_count--; @@ -3033,9 +3174,15 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } + if (origpte & PG_A || + (origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + pmap_invalidate_page(pmap, va); + origpte = 0; } else pmap->pm_stats.resident_count++; + if (pa_tryrelock(pmap, pa, &lockedpa)) + goto restart; /* * Enter on the PV list if part of our managed memory. */ @@ -3053,6 +3200,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t pmap->pm_stats.wired_count++; validate: + vm_page_lock_assert(m, MA_OWNED); /* * Now validate mapping with desired protection/wiring. */ @@ -3083,14 +3231,14 @@ validate: origpte = pte_load_store(pte, newpte); if (origpte & PG_A) { if (origpte & PG_MANAGED) - vm_page_flag_set(om, PG_REFERENCED); - if (opa != VM_PAGE_TO_PHYS(m) || ((origpte & - PG_NX) == 0 && (newpte & PG_NX))) + vm_page_flag_set(m, PG_REFERENCED); + if (((origpte & PG_NX) == 0 && + (newpte & PG_NX))) invlva = TRUE; } if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) - vm_page_dirty(om); + vm_page_dirty(m); if ((newpte & PG_RW) == 0) invlva = TRUE; } @@ -3108,7 +3256,7 @@ validate: pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va); - vm_page_unlock_queues(); + PA_UNLOCK(pa); PMAP_UNLOCK(pmap); } @@ -3124,9 +3272,9 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_pag pd_entry_t *pde, newpde; vm_page_t free, mpde; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if ((mpde = pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) { + if ((mpde = pmap_allocpde(pmap, 0, va, M_NOWAIT)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); @@ -3204,9 +3352,10 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, psize = atop(end - start); mpte = NULL; m = m_start; - PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); + vm_page_lock(m); + PMAP_LOCK(pmap); if ((va & PDRMASK) == 0 && va + NBPDR <= end && (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 && pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 && @@ -3215,9 +3364,10 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte); + PMAP_UNLOCK(pmap); + vm_page_unlock(m); m = TAILQ_NEXT(m, listq); } - PMAP_UNLOCK(pmap); } /* @@ -3249,7 +3399,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t v KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* @@ -3282,7 +3432,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t v mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME); mpte->wire_count++; } else { - mpte = _pmap_allocpte(pmap, ptepindex, + mpte = _pmap_allocpte(pmap, 0, ptepindex, M_NOWAIT); if (mpte == NULL) return (mpte); @@ -3393,16 +3543,16 @@ retry: m[0] = p; if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { - vm_page_lock_queues(); + vm_page_lock(p); vm_page_free(p); - vm_page_unlock_queues(); + vm_page_unlock(p); return; } p = vm_page_lookup(object, pindex); - vm_page_lock_queues(); + vm_page_lock(p); vm_page_wakeup(p); - vm_page_unlock_queues(); + vm_page_unlock(p); } ptepa = VM_PAGE_TO_PHYS(p); @@ -3414,17 +3564,17 @@ retry: PMAP_LOCK(pmap); for (va = addr; va < addr + size; va += NBPDR) { while ((pdpg = - pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) { + pmap_allocpde(pmap, 0, va, M_NOWAIT)) == NULL) { PMAP_UNLOCK(pmap); - vm_page_lock_queues(); + vm_page_lock(p); vm_page_busy(p); - vm_page_unlock_queues(); + vm_page_unlock(p); VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); - vm_page_lock_queues(); + vm_page_lock(p); vm_page_wakeup(p); - vm_page_unlock_queues(); + vm_page_unlock(p); PMAP_LOCK(pmap); } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); @@ -3460,27 +3610,20 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, bo { pd_entry_t *pde; pt_entry_t *pte; - boolean_t are_queues_locked; + vm_paddr_t pa; - are_queues_locked = FALSE; - /* * Wiring is not a hardware characteristic so there is no need to * invalidate TLB. */ + pa = 0; + PMAP_LOCK(pmap); retry: - PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) { if (!wired != ((*pde & PG_W) == 0)) { - if (!are_queues_locked) { - are_queues_locked = TRUE; - if (!mtx_trylock(&vm_page_queue_mtx)) { - PMAP_UNLOCK(pmap); - vm_page_lock_queues(); - goto retry; - } - } + if (pa_tryrelock(pmap, *pde & PG_FRAME, &pa)) + goto retry; if (!pmap_demote_pde(pmap, pde, va)) panic("pmap_change_wiring: demotion failed"); } else @@ -3495,8 +3638,8 @@ retry: atomic_clear_long(pte, PG_W); } out: - if (are_queues_locked) - vm_page_unlock_queues(); + if (pa) + PA_UNLOCK(pa); PMAP_UNLOCK(pmap); } @@ -3518,6 +3661,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t va_next; + vm_paddr_t pa; if (dst_addr != src_addr) return; @@ -3525,7 +3669,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off if (!pmap_is_current(src_pmap)) return; - vm_page_lock_queues(); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); @@ -3569,20 +3712,23 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off continue; if (srcptepaddr & PG_PS) { - dstmpde = pmap_allocpde(dst_pmap, addr, M_NOWAIT); + pa = srcptepaddr & PG_PS_FRAME; + if (PA_TRYLOCK(pa) == 0) + continue; + dstmpde = pmap_allocpde(dst_pmap, 0, addr, M_NOWAIT); if (dstmpde == NULL) break; pde = (pd_entry_t *) PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpde)); pde = &pde[pmap_pde_index(addr)]; if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || - pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & - PG_PS_FRAME))) { + pmap_pv_insert_pde(dst_pmap, addr, pa))) { *pde = srcptepaddr & ~PG_W; dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; } else dstmpde->wire_count--; + PA_UNLOCK(pa); continue; } @@ -3601,7 +3747,13 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { - dstmpte = pmap_allocpte(dst_pmap, addr, + vm_page_t p; + + pa = ptetemp & PG_FRAME; + if (PA_TRYLOCK(pa) == 0) + break; + p = PHYS_TO_VM_PAGE(pa); + dstmpte = pmap_allocpte(dst_pmap, 0, addr, M_NOWAIT); if (dstmpte == NULL) break; @@ -3610,7 +3762,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off dst_pte = &dst_pte[pmap_pte_index(addr)]; if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, - PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { + p)) { /* * Clear the wired, modified, and * accessed (referenced) bits @@ -3628,6 +3780,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off pmap_free_zero_pages(free); } } + PA_UNLOCK(pa); if (dstmpte->wire_count >= srcmpte->wire_count) break; } @@ -3635,7 +3788,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off src_pte++; } } - vm_page_unlock_queues(); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } @@ -3715,7 +3867,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) if (m->flags & PG_FICTITIOUS) return FALSE; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { return TRUE; @@ -3748,6 +3900,7 @@ pmap_page_wired_mappings(vm_page_t m) { int count; + vm_page_lock_assert(m, MA_OWNED); count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); @@ -3767,7 +3920,6 @@ pmap_pvh_wired_mappings(struct md_page *pvh, int c pt_entry_t *pte; pv_entry_t pv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -3790,7 +3942,7 @@ pmap_page_is_mapped(vm_page_t m) if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) return (FALSE); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); return (!TAILQ_EMPTY(&pvh->pv_list)); @@ -3816,6 +3968,7 @@ pmap_remove_pages(pmap_t pmap) pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; + vm_paddr_t pa; int field, idx; int64_t bit; uint64_t inuse, bitmask; @@ -3825,8 +3978,9 @@ pmap_remove_pages(pmap_t pmap) printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +restart: TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; for (field = 0; field < _NPCM; field++) { @@ -3862,7 +4016,10 @@ pmap_remove_pages(pmap_t pmap) continue; } - m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); + if (pa_tryrelock(pmap, tpte & PG_FRAME, &pa)) + goto restart; + + m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == (tpte & PG_FRAME), ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, @@ -3923,18 +4080,21 @@ pmap_remove_pages(pmap_t pmap) } } if (allfree) { + mtx_lock(&pv_lock); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); - vm_page_unwire(m, 0); + m->wire_count--; + mtx_unlock(&pv_lock); vm_page_free(m); } } + if (pa) + PA_UNLOCK(pa); pmap_invalidate_all(pmap); - vm_page_unlock_queues(); PMAP_UNLOCK(pmap); pmap_free_zero_pages(free); } @@ -3949,6 +4109,7 @@ boolean_t pmap_is_modified(vm_page_t m) { + vm_page_lock_assert(m, MA_OWNED); if (m->flags & PG_FICTITIOUS) return (FALSE); if (pmap_is_modified_pvh(&m->md)) @@ -3969,7 +4130,6 @@ pmap_is_modified_pvh(struct md_page *pvh) pmap_t pmap; boolean_t rv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { pmap = PV_PMAP(pv); @@ -4023,7 +4183,7 @@ pmap_remove_write(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0 || (m->flags & PG_WRITEABLE) == 0) return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { va = pv->pv_va; @@ -4081,7 +4241,7 @@ pmap_ts_referenced(vm_page_t m) if (m->flags & PG_FICTITIOUS) return (rtval); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { va = pv->pv_va; @@ -4153,7 +4313,7 @@ pmap_clear_modify(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { va = pv->pv_va; @@ -4218,7 +4378,7 @@ pmap_clear_reference(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { va = pv->pv_va; @@ -4702,10 +4862,10 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) /* * Modified by someone else */ - vm_page_lock_queues(); + vm_page_lock(m); if (m->dirty || pmap_is_modified(m)) val |= MINCORE_MODIFIED_OTHER; - vm_page_unlock_queues(); + vm_page_unlock(m); } /* * Referenced by us @@ -4716,13 +4876,13 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) /* * Referenced by someone else */ - vm_page_lock_queues(); + vm_page_lock(m); if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { val |= MINCORE_REFERENCED_OTHER; vm_page_flag_set(m, PG_REFERENCED); } - vm_page_unlock_queues(); + vm_page_unlock(m); } } return val;