Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys:r197750,198463,198721,198855,199490,199819,199869-199870,206823,206885,207155,207161,207163,207205,207210,207213,207262,207308,207373-207374,207410,207412,207419,207437-207438,207448,207450-207452,207460,207519,207530-207531,207534-207535,207539-207541,207544,207548,207551-207552,207571,207573-207574,207576-207577,207584,207601,207617,207644,207649,207669,207694,207700,207702,207706,207708,207728,207738-207740,207746-207747,207752,207759,207796,207798,207805-207806,207822-207823,207846,207905,208175,208264,208278,208504,208524,208574,208609,208645-208646,208651,208657,208665,208667,208686-208688,208745,208764,208772,208791,208810,208846,208990,209048,209173,209211,209226,209320-209321,209610,209647,209651,212573,216333,216516,216555,216899,217171,217177,217478-217479,218113,218773,218950 Index: arm/arm/pmap.c =================================================================== --- arm/arm/pmap.c (revision 218945) +++ arm/arm/pmap.c (working copy) @@ -1470,7 +1470,7 @@ u_int oflags; int count = 0; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); if (maskbits & PVF_WRITE) maskbits |= PVF_MOD; @@ -1480,6 +1480,7 @@ pg->md.pvh_attrs &= ~(maskbits & (PVF_MOD | PVF_REF)); if (TAILQ_EMPTY(&pg->md.pv_list)) { + vm_page_unlock_queues(); return (0); } @@ -1615,6 +1616,7 @@ if (maskbits & PVF_WRITE) vm_page_flag_clear(pg, PG_WRITEABLE); + vm_page_unlock_queues(); return (count); } @@ -3163,18 +3165,11 @@ pmap_t curpm; int flags = 0; -#if defined(PMAP_DEBUG) - /* - * XXX This makes pmap_remove_all() illegal for non-managed pages! - */ - if (m->flags & PG_FICTITIOUS) { - panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m)); - } -#endif - + KASSERT((m->flags & PG_FICTITIOUS) == 0, + ("pmap_remove_all: page %p is fictitious", m)); if (TAILQ_EMPTY(&m->md.pv_list)) return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); pmap_remove_write(m); curpm = vmspace_pmap(curproc->p_vmspace); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { @@ -3225,6 +3220,7 @@ pmap_tlb_flushD(curpm); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } @@ -3374,8 +3370,12 @@ if (va == vector_page) { pa = systempage.pv_pa; m = NULL; - } else + } else { + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0 || (flags & M_NOWAIT) != 0, + ("pmap_enter_locked: page %p is not busy", m)); pa = VM_PAGE_TO_PHYS(m); + } nflags = 0; if (prot & VM_PROT_WRITE) nflags |= PVF_WRITE; @@ -3460,7 +3460,8 @@ if (prot & VM_PROT_WRITE) { npte |= L2_S_PROT_W; - if (m != NULL) + if (m != NULL && + (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) vm_page_flag_set(m, PG_WRITEABLE); } npte |= pte_l2_s_cache_mode; @@ -3638,12 +3639,14 @@ psize = atop(end - start); m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { pmap_enter_locked(pmap, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE, M_NOWAIT); m = TAILQ_NEXT(m, listq); } + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -3660,9 +3663,11 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { + vm_page_lock_queues(); PMAP_LOCK(pmap); pmap_enter_locked(pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE, M_NOWAIT); + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -3785,13 +3790,14 @@ struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; - vm_paddr_t pa; + vm_paddr_t pa, paddr; vm_page_t m = NULL; u_int l1idx; l1idx = L1_IDX(va); + paddr = 0; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { /* @@ -3803,6 +3809,8 @@ pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); + if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) + goto retry; if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); @@ -3819,7 +3827,6 @@ if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); return (NULL); } @@ -3828,7 +3835,6 @@ if (pte == 0) { PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); return (NULL); } if (pte & L2_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { @@ -3841,13 +3847,15 @@ pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); break; } + if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) + goto retry; m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); + PA_UNLOCK_COND(paddr); return (m); } @@ -4455,24 +4463,23 @@ { pv_entry_t pv; int loops = 0; + boolean_t rv; - if (m->flags & PG_FICTITIOUS) - return (FALSE); - - /* - * Not found, check current mappings returning immediately - */ - for (pv = TAILQ_FIRST(&m->md.pv_list); - pv; - pv = TAILQ_NEXT(pv, pv_list)) { + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_page_exists_quick: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (pv->pv_pmap == pmap) { - return (TRUE); + rv = TRUE; + break; } loops++; if (loops >= 16) break; } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -4490,10 +4497,11 @@ count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) if ((pv->pv_flags & PVF_WIRED) != 0) count++; + vm_page_unlock_queues(); return (count); } @@ -4506,8 +4514,8 @@ pmap_ts_referenced(vm_page_t m) { - if (m->flags & PG_FICTITIOUS) - return (0); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); return (pmap_clearbit(m, PVF_REF)); } @@ -4516,6 +4524,8 @@ pmap_is_modified(vm_page_t m) { + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_modified: page %p is not managed", m)); if (m->md.pvh_attrs & PVF_MOD) return (TRUE); @@ -4530,12 +4540,40 @@ pmap_clear_modify(vm_page_t m) { + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("pmap_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no mappings can be modified. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) + return; if (m->md.pvh_attrs & PVF_MOD) pmap_clearbit(m, PVF_MOD); } /* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + return ((m->md.pvh_attrs & PVF_REF) != 0); +} + +/* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. @@ -4544,6 +4582,8 @@ pmap_clear_reference(vm_page_t m) { + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_reference: page %p is not managed", m)); if (m->md.pvh_attrs & PVF_REF) pmap_clearbit(m, PVF_REF); } @@ -4556,7 +4596,17 @@ pmap_remove_write(vm_page_t m) { - if (m->flags & PG_WRITEABLE) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) != 0 || + (m->flags & PG_WRITEABLE) != 0) pmap_clearbit(m, PVF_WRITE); } @@ -4565,7 +4615,7 @@ * perform the pmap work for mincore */ int -pmap_mincore(pmap_t pmap, vm_offset_t addr) +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { printf("pmap_mincore()\n"); Index: powerpc/booke/pmap.c =================================================================== --- powerpc/booke/pmap.c (revision 218945) +++ powerpc/booke/pmap.c (working copy) @@ -288,10 +288,12 @@ static void mmu_booke_init(mmu_t); static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); +static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t); static boolean_t mmu_booke_ts_referenced(mmu_t, vm_page_t); static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); -static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t); +static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t, + vm_paddr_t *); static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, vm_object_t, vm_pindex_t, vm_size_t); static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); @@ -342,6 +344,7 @@ MMUMETHOD(mmu_init, mmu_booke_init), MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), + MMUMETHOD(mmu_is_referenced, mmu_booke_is_referenced), MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), MMUMETHOD(mmu_map, mmu_booke_map), MMUMETHOD(mmu_mincore, mmu_booke_mincore), @@ -1555,6 +1558,9 @@ KASSERT((va <= VM_MAXUSER_ADDRESS), ("mmu_booke_enter_locked: user pmap, non user va")); } + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0 || VM_OBJECT_LOCKED(m->object), + ("mmu_booke_enter_locked: page %p is not busy", m)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -1591,7 +1597,8 @@ if (!su) flags |= PTE_UW; - vm_page_flag_set(m, PG_WRITEABLE); + if ((flags & PTE_MANAGED) != 0) + vm_page_flag_set(m, PG_WRITEABLE); } else { /* Handle modified pages, sense modify status. */ @@ -1657,7 +1664,8 @@ if (!su) flags |= PTE_UW; - vm_page_flag_set(m, PG_WRITEABLE); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) + vm_page_flag_set(m, PG_WRITEABLE); } if (prot & VM_PROT_EXECUTE) { @@ -1706,12 +1714,14 @@ psize = atop(end - start); m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); m = TAILQ_NEXT(m, listq); } + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -1720,9 +1730,11 @@ vm_prot_t prot) { + vm_page_lock_queues(); PMAP_LOCK(pmap); mmu_booke_enter_locked(mmu, pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -1781,8 +1793,7 @@ pv_entry_t pv, pvn; uint8_t hold_flag; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - + vm_page_lock_queues(); for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { pvn = TAILQ_NEXT(pv, pv_link); @@ -1792,6 +1803,7 @@ PMAP_UNLOCK(pv->pv_pmap); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } /* @@ -1913,16 +1925,11 @@ tlb_miss_lock(); /* Handle modified pages. */ - if (PTE_ISMODIFIED(pte)) + if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) vm_page_dirty(m); - /* Referenced pages. */ - if (PTE_ISREFERENCED(pte)) - vm_page_flag_set(m, PG_REFERENCED); - tlb0_flush_entry(va); - pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED | - PTE_REFERENCED); + pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); @@ -1942,11 +1949,19 @@ pv_entry_t pv; pte_t *pte; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("mmu_booke_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; - + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { @@ -1960,13 +1975,8 @@ if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); - /* Referenced pages. */ - if (PTE_ISREFERENCED(pte)) - vm_page_flag_set(m, PG_REFERENCED); - /* Flush mapping from TLB0. */ - pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED | - PTE_REFERENCED); + pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); @@ -1975,6 +1985,7 @@ PMAP_UNLOCK(pv->pv_pmap); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } static void @@ -2032,11 +2043,12 @@ pte_t *pte; vm_page_t m; uint32_t pte_wbit; - + vm_paddr_t pa; + m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); - +retry: pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) { if (pmap == kernel_pmap) @@ -2045,12 +2057,14 @@ pte_wbit = PTE_UW; if ((pte->flags & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) + goto retry; m = PHYS_TO_VM_PAGE(PTE_PA(pte)); vm_page_hold(m); } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -2147,26 +2161,35 @@ { pte_t *pte; pv_entry_t pv; + boolean_t rv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (FALSE); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("mmu_booke_is_modified: page %p is not managed", m)); + rv = FALSE; + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no PTEs can be modified. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) + return (rv); + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); - if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { - if (!PTE_ISVALID(pte)) - goto make_sure_to_unlock; - - if (PTE_ISMODIFIED(pte)) { - PMAP_UNLOCK(pv->pv_pmap); - return (TRUE); - } + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) { + if (PTE_ISMODIFIED(pte)) + rv = TRUE; } -make_sure_to_unlock: PMAP_UNLOCK(pv->pv_pmap); + if (rv) + break; } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -2181,6 +2204,36 @@ } /* + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +static boolean_t +mmu_booke_is_referenced(mmu_t mmu, vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + boolean_t rv; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("mmu_booke_is_referenced: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) { + if (PTE_ISREFERENCED(pte)) + rv = TRUE; + } + PMAP_UNLOCK(pv->pv_pmap); + if (rv) + break; + } + vm_page_unlock_queues(); + return (rv); +} + +/* * Clear the modify bits on the specified physical page. */ static void @@ -2189,16 +2242,24 @@ pte_t *pte; pv_entry_t pv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("mmu_booke_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("mmu_booke_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no PTEs can be modified. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) return; - + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); - if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { - if (!PTE_ISVALID(pte)) - goto make_sure_to_unlock; - + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) { mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); @@ -2211,9 +2272,9 @@ tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } -make_sure_to_unlock: PMAP_UNLOCK(pv->pv_pmap); } + vm_page_unlock_queues(); } /* @@ -2233,17 +2294,14 @@ pv_entry_t pv; int count; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (0); - + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("mmu_booke_ts_referenced: page %p is not managed", m)); count = 0; + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); - if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { - if (!PTE_ISVALID(pte)) - goto make_sure_to_unlock; - + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) { if (PTE_ISREFERENCED(pte)) { mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); @@ -2260,9 +2318,9 @@ } } } -make_sure_to_unlock: PMAP_UNLOCK(pv->pv_pmap); } + vm_page_unlock_queues(); return (count); } @@ -2275,16 +2333,13 @@ pte_t *pte; pv_entry_t pv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return; - + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("mmu_booke_clear_reference: page %p is not managed", m)); + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); - if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { - if (!PTE_ISVALID(pte)) - goto make_sure_to_unlock; - + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) { if (PTE_ISREFERENCED(pte)) { mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); @@ -2296,9 +2351,9 @@ mtx_unlock_spin(&tlbivax_mutex); } } -make_sure_to_unlock: PMAP_UNLOCK(pv->pv_pmap); } + vm_page_unlock_queues(); } /* @@ -2337,20 +2392,23 @@ { pv_entry_t pv; int loops; + boolean_t rv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (FALSE); - + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("mmu_booke_page_exists_quick: page %p is not managed", m)); loops = 0; + rv = FALSE; + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { - if (pv->pv_pmap == pmap) - return (TRUE); - + if (pv->pv_pmap == pmap) { + rv = TRUE; + break; + } if (++loops >= 16) break; } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -2366,8 +2424,7 @@ if ((m->flags & PG_FICTITIOUS) != 0) return (count); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) @@ -2375,7 +2432,7 @@ count++; PMAP_UNLOCK(pv->pv_pmap); } - + vm_page_unlock_queues(); return (count); } @@ -2598,7 +2655,8 @@ * Perform the pmap work for mincore. */ static int -mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr) +mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, + vm_paddr_t *locked_pa) { TODO; Index: powerpc/powerpc/pmap_dispatch.c =================================================================== --- powerpc/powerpc/pmap_dispatch.c (revision 218945) +++ powerpc/powerpc/pmap_dispatch.c (working copy) @@ -195,6 +195,14 @@ } boolean_t +pmap_is_referenced(vm_page_t m) +{ + + CTR2(KTR_PMAP, "%s(%p)", __func__, m); + return (MMU_IS_REFERENCED(mmu_obj, m)); +} + +boolean_t pmap_ts_referenced(vm_page_t m) { @@ -352,11 +360,11 @@ } int -pmap_mincore(pmap_t pmap, vm_offset_t addr) +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { CTR3(KTR_PMAP, "%s(%p, %#x)", __func__, pmap, addr); - return (MMU_MINCORE(mmu_obj, pmap, addr)); + return (MMU_MINCORE(mmu_obj, pmap, addr, locked_pa)); } void Index: powerpc/powerpc/mmu_if.m =================================================================== --- powerpc/powerpc/mmu_if.m (revision 218945) +++ powerpc/powerpc/mmu_if.m (working copy) @@ -90,7 +90,8 @@ return; } - static int mmu_null_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr) + static int mmu_null_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, + vm_paddr_t *locked_pa) { return (0); } @@ -364,6 +365,20 @@ /** + * @brief Return whether or not the specified physical page was referenced + * in any physical maps. + * + * @params _pg physical page + * + * @retval boolean TRUE if page has been referenced + */ +METHOD boolean_t is_referenced { + mmu_t _mmu; + vm_page_t _pg; +}; + + +/** * @brief Return a count of referenced bits for a page, clearing those bits. * Not all referenced bits need to be cleared, but it is necessary that 0 * only be returned when there are none set. @@ -637,12 +652,11 @@ /** - * @brief Extract mincore(2) information from a mapping. This routine is - * optional and is an optimisation: the mincore code will call is_modified - * and ts_referenced if no result is returned. + * @brief Extract mincore(2) information from a mapping. * * @param _pmap physical map * @param _addr page virtual address + * @param _locked_pa page physical address * * @retval 0 no result * @retval non-zero mincore(2) flag values @@ -651,6 +665,7 @@ mmu_t _mmu; pmap_t _pmap; vm_offset_t _addr; + vm_paddr_t *_locked_pa; } DEFAULT mmu_null_mincore; Index: powerpc/include/pmap.h =================================================================== --- powerpc/include/pmap.h (revision 218945) +++ powerpc/include/pmap.h (working copy) @@ -123,7 +123,6 @@ struct mtx pm_mtx; /* pmap mutex */ tlbtid_t pm_tid[MAXCPU]; /* TID to identify this pmap entries in TLB */ cpumask_t pm_active; /* active on cpus */ - int pm_refs; /* ref count */ struct pmap_statistics pm_stats; /* pmap statistics */ /* Page table directory, array of pointers to page tables. */ Index: powerpc/aim/mmu_oea.c =================================================================== --- powerpc/aim/mmu_oea.c (revision 218945) +++ powerpc/aim/mmu_oea.c (working copy) @@ -285,7 +285,7 @@ vm_prot_t, boolean_t); static void moea_syncicache(vm_offset_t, vm_size_t); static boolean_t moea_query_bit(vm_page_t, int); -static u_int moea_clear_bit(vm_page_t, int, int *); +static u_int moea_clear_bit(vm_page_t, int); static void moea_kremove(mmu_t, vm_offset_t); int moea_pte_spill(vm_offset_t); @@ -304,6 +304,7 @@ vm_page_t moea_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); void moea_init(mmu_t); boolean_t moea_is_modified(mmu_t, vm_page_t); +boolean_t moea_is_referenced(mmu_t, vm_page_t); boolean_t moea_ts_referenced(mmu_t, vm_page_t); vm_offset_t moea_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); boolean_t moea_page_exists_quick(mmu_t, pmap_t, vm_page_t); @@ -346,6 +347,7 @@ MMUMETHOD(mmu_extract_and_hold, moea_extract_and_hold), MMUMETHOD(mmu_init, moea_init), MMUMETHOD(mmu_is_modified, moea_is_modified), + MMUMETHOD(mmu_is_referenced, moea_is_referenced), MMUMETHOD(mmu_ts_referenced, moea_ts_referenced), MMUMETHOD(mmu_map, moea_map), MMUMETHOD(mmu_page_exists_quick,moea_page_exists_quick), @@ -905,6 +907,7 @@ m.phys_addr = translations[i].om_pa + off; m.md.mdpg_cache_attrs = VM_MEMATTR_DEFAULT; + m.oflags = VPO_BUSY; PMAP_LOCK(&ofw_pmap); moea_enter_locked(&ofw_pmap, translations[i].om_va + off, &m, @@ -1130,6 +1133,9 @@ if (pmap_bootstrapped) mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0 || VM_OBJECT_LOCKED(m->object), + ("moea_enter_locked: page %p is not busy", m)); /* XXX change the pvo head for fake pages */ if ((m->flags & PG_FICTITIOUS) == PG_FICTITIOUS) { @@ -1154,7 +1160,8 @@ if (prot & VM_PROT_WRITE) { pte_lo |= PTE_BW; - if (pmap_bootstrapped) + if (pmap_bootstrapped && + (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) vm_page_flag_set(m, PG_WRITEABLE); } else pte_lo |= PTE_BR; @@ -1211,12 +1218,14 @@ psize = atop(end - start); m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pm); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { moea_enter_locked(pm, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); m = TAILQ_NEXT(m, listq); } + vm_page_unlock_queues(); PMAP_UNLOCK(pm); } @@ -1225,11 +1234,12 @@ vm_prot_t prot) { + vm_page_lock_queues(); PMAP_LOCK(pm); moea_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); + vm_page_unlock_queues(); PMAP_UNLOCK(pm); - } vm_paddr_t @@ -1258,18 +1268,22 @@ { struct pvo_entry *pvo; vm_page_t m; - + vm_paddr_t pa; + m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) && ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1288,12 +1302,30 @@ } boolean_t +moea_is_referenced(mmu_t mmu, vm_page_t m) +{ + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea_is_referenced: page %p is not managed", m)); + return (moea_query_bit(m, PTE_REF)); +} + +boolean_t moea_is_modified(mmu_t mmu, vm_page_t m) { - if ((m->flags & (PG_FICTITIOUS |PG_UNMANAGED)) != 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea_is_modified: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no PTEs can have PTE_CHG set. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) return (FALSE); - return (moea_query_bit(m, PTE_CHG)); } @@ -1301,18 +1333,29 @@ moea_clear_reference(mmu_t mmu, vm_page_t m) { - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return; - moea_clear_bit(m, PTE_REF, NULL); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea_clear_reference: page %p is not managed", m)); + moea_clear_bit(m, PTE_REF); } void moea_clear_modify(mmu_t mmu, vm_page_t m) { - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("moea_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no PTEs can have PTE_CHG + * set. If the object containing the page is locked and the page is + * not VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) return; - moea_clear_bit(m, PTE_CHG, NULL); + moea_clear_bit(m, PTE_CHG); } /* @@ -1326,10 +1369,19 @@ pmap_t pmap; u_int lo; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; + vm_page_lock_queues(); lo = moea_attr_fetch(m); powerpc_sync(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { @@ -1355,6 +1407,7 @@ vm_page_dirty(m); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } /* @@ -1372,14 +1425,10 @@ boolean_t moea_ts_referenced(mmu_t mmu, vm_page_t m) { - int count; - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (0); - - count = moea_clear_bit(m, PTE_REF, NULL); - - return (count); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea_ts_referenced: page %p is not managed", m)); + return (moea_clear_bit(m, PTE_REF)); } /* @@ -1533,19 +1582,23 @@ { int loops; struct pvo_entry *pvo; + boolean_t rv; - if (!moea_initialized || (m->flags & PG_FICTITIOUS)) - return FALSE; - + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea_page_exists_quick: page %p is not managed", m)); loops = 0; + rv = FALSE; + vm_page_lock_queues(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { - if (pvo->pvo_pmap == pmap) - return (TRUE); + if (pvo->pvo_pmap == pmap) { + rv = TRUE; + break; + } if (++loops >= 16) break; } - - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -1559,12 +1612,13 @@ int count; count = 0; - if (!moea_initialized || (m->flags & PG_FICTITIOUS) != 0) + if ((m->flags & PG_FICTITIOUS) != 0) return (count); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) if ((pvo->pvo_vaddr & PVO_WIRED) != 0) count++; + vm_page_unlock_queues(); return (count); } @@ -1780,8 +1834,7 @@ struct pvo_entry *pvo, *next_pvo; pmap_t pmap; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - + vm_page_lock_queues(); pvo_head = vm_page_to_pvoh(m); for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) { next_pvo = LIST_NEXT(pvo, pvo_vlink); @@ -1797,6 +1850,7 @@ vm_page_dirty(m); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } /* @@ -2273,6 +2327,7 @@ if (moea_attr_fetch(m) & ptebit) return (TRUE); + vm_page_lock_queues(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { MOEA_PVO_CHECK(pvo); /* sanity check */ @@ -2283,6 +2338,7 @@ if (pvo->pvo_pte.pte.pte_lo & ptebit) { moea_attr_save(m, ptebit); MOEA_PVO_CHECK(pvo); /* sanity check */ + vm_page_unlock_queues(); return (TRUE); } } @@ -2308,26 +2364,28 @@ if (pvo->pvo_pte.pte.pte_lo & ptebit) { moea_attr_save(m, ptebit); MOEA_PVO_CHECK(pvo); /* sanity check */ + vm_page_unlock_queues(); return (TRUE); } } } + vm_page_unlock_queues(); return (FALSE); } static u_int -moea_clear_bit(vm_page_t m, int ptebit, int *origbit) +moea_clear_bit(vm_page_t m, int ptebit) { u_int count; struct pvo_entry *pvo; struct pte *pt; - int rv; + vm_page_lock_queues(); + /* * Clear the cached value. */ - rv = moea_attr_fetch(m); moea_attr_clear(m, ptebit); /* @@ -2355,15 +2413,11 @@ } mtx_unlock(&moea_table_mutex); } - rv |= pvo->pvo_pte.pte.pte_lo; pvo->pvo_pte.pte.pte_lo &= ~ptebit; MOEA_PVO_CHECK(pvo); /* sanity check */ } - if (origbit != NULL) { - *origbit = rv; - } - + vm_page_unlock_queues(); return (count); } Index: powerpc/aim/mmu_oea64.c =================================================================== --- powerpc/aim/mmu_oea64.c (revision 218945) +++ powerpc/aim/mmu_oea64.c (working copy) @@ -356,7 +356,7 @@ static void moea64_enter_locked(pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t); static boolean_t moea64_query_bit(vm_page_t, u_int64_t); -static u_int moea64_clear_bit(vm_page_t, u_int64_t, u_int64_t *); +static u_int moea64_clear_bit(vm_page_t, u_int64_t); static void moea64_kremove(mmu_t, vm_offset_t); static void moea64_syncicache(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_size_t sz); @@ -377,6 +377,7 @@ vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); void moea64_init(mmu_t); boolean_t moea64_is_modified(mmu_t, vm_page_t); +boolean_t moea64_is_referenced(mmu_t, vm_page_t); boolean_t moea64_ts_referenced(mmu_t, vm_page_t); vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); @@ -417,6 +418,7 @@ MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), MMUMETHOD(mmu_init, moea64_init), MMUMETHOD(mmu_is_modified, moea64_is_modified), + MMUMETHOD(mmu_is_referenced, moea64_is_referenced), MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), MMUMETHOD(mmu_map, moea64_map), MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), @@ -1235,6 +1237,9 @@ if (pmap_bootstrapped) mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0 || VM_OBJECT_LOCKED(m->object), + ("moea64_enter_locked: page %p is not busy", m)); /* XXX change the pvo head for fake pages */ if ((m->flags & PG_FICTITIOUS) == PG_FICTITIOUS) { @@ -1247,7 +1252,8 @@ if (prot & VM_PROT_WRITE) { pte_lo |= LPTE_BW; - if (pmap_bootstrapped) + if (pmap_bootstrapped && + (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) vm_page_flag_set(m, PG_WRITEABLE); } else pte_lo |= LPTE_BR; @@ -1327,12 +1333,14 @@ psize = atop(end - start); m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pm); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { moea64_enter_locked(pm, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); m = TAILQ_NEXT(m, listq); } + vm_page_unlock_queues(); PMAP_UNLOCK(pm); } @@ -1340,11 +1348,13 @@ moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot) { + + vm_page_lock_queues(); PMAP_LOCK(pm); moea64_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); + vm_page_unlock_queues(); PMAP_UNLOCK(pm); - } vm_paddr_t @@ -1373,18 +1383,23 @@ { struct pvo_entry *pvo; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) == LPTE_RW || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, + pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1464,12 +1479,30 @@ } boolean_t +moea64_is_referenced(mmu_t mmu, vm_page_t m) +{ + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea64_is_referenced: page %p is not managed", m)); + return (moea64_query_bit(m, PTE_REF)); +} + +boolean_t moea64_is_modified(mmu_t mmu, vm_page_t m) { - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea64_is_modified: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no PTEs can have LPTE_CHG set. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) return (FALSE); - return (moea64_query_bit(m, LPTE_CHG)); } @@ -1477,18 +1510,29 @@ moea64_clear_reference(mmu_t mmu, vm_page_t m) { - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return; - moea64_clear_bit(m, LPTE_REF, NULL); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea64_clear_reference: page %p is not managed", m)); + moea64_clear_bit(m, LPTE_REF); } void moea64_clear_modify(mmu_t mmu, vm_page_t m) { - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea64_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("moea64_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no PTEs can have LPTE_CHG + * set. If the object containing the page is locked and the page is + * not VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) return; - moea64_clear_bit(m, LPTE_CHG, NULL); + moea64_clear_bit(m, LPTE_CHG); } /* @@ -1502,10 +1546,19 @@ pmap_t pmap; uint64_t lo; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea64_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; + vm_page_lock_queues(); lo = moea64_attr_fetch(m); SYNC(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { @@ -1532,6 +1585,7 @@ vm_page_dirty(m); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } /* @@ -1549,14 +1603,10 @@ boolean_t moea64_ts_referenced(mmu_t mmu, vm_page_t m) { - int count; - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (0); - - count = moea64_clear_bit(m, LPTE_REF, NULL); - - return (count); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea64_ts_referenced: page %p is not managed", m)); + return (moea64_clear_bit(m, LPTE_REF)); } /* @@ -1705,21 +1755,23 @@ { int loops; struct pvo_entry *pvo; + boolean_t rv; - if (!moea64_initialized || (m->flags & PG_FICTITIOUS)) - return FALSE; - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("moea64_page_exists_quick: page %p is not managed", m)); loops = 0; + rv = FALSE; + vm_page_lock_queues(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { - if (pvo->pvo_pmap == pmap) - return (TRUE); + if (pvo->pvo_pmap == pmap) { + rv = TRUE; + break; + } if (++loops >= 16) break; } - - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -1733,12 +1785,13 @@ int count; count = 0; - if (!moea64_initialized || (m->flags & PG_FICTITIOUS) != 0) + if ((m->flags & PG_FICTITIOUS) != 0) return (count); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) if ((pvo->pvo_vaddr & PVO_WIRED) != 0) count++; + vm_page_unlock_queues(); return (count); } @@ -1962,8 +2015,7 @@ struct pvo_entry *pvo, *next_pvo; pmap_t pmap; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - + vm_page_lock_queues(); pvo_head = vm_page_to_pvoh(m); for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) { next_pvo = LIST_NEXT(pvo, pvo_vlink); @@ -1979,6 +2031,7 @@ vm_page_dirty(m); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } /* @@ -2386,7 +2439,7 @@ if (moea64_attr_fetch(m) & ptebit) return (TRUE); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { MOEA_PVO_CHECK(pvo); /* sanity check */ @@ -2398,6 +2451,7 @@ if (pvo->pvo_pte.lpte.pte_lo & ptebit) { moea64_attr_save(m, ptebit); MOEA_PVO_CHECK(pvo); /* sanity check */ + vm_page_unlock_queues(); return (TRUE); } } @@ -2425,29 +2479,29 @@ moea64_attr_save(m, ptebit); MOEA_PVO_CHECK(pvo); /* sanity check */ + vm_page_unlock_queues(); return (TRUE); } } UNLOCK_TABLE(); } + vm_page_unlock_queues(); return (FALSE); } static u_int -moea64_clear_bit(vm_page_t m, u_int64_t ptebit, u_int64_t *origbit) +moea64_clear_bit(vm_page_t m, u_int64_t ptebit) { u_int count; struct pvo_entry *pvo; struct lpte *pt; - uint64_t rv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); /* * Clear the cached value. */ - rv = moea64_attr_fetch(m); moea64_attr_clear(m, ptebit); /* @@ -2476,16 +2530,12 @@ moea64_pte_clear(pt, pvo->pvo_pmap, PVO_VADDR(pvo), ptebit); } } - rv |= pvo->pvo_pte.lpte.pte_lo; pvo->pvo_pte.lpte.pte_lo &= ~ptebit; MOEA_PVO_CHECK(pvo); /* sanity check */ UNLOCK_TABLE(); } - if (origbit != NULL) { - *origbit = rv; - } - + vm_page_unlock_queues(); return (count); } Index: sparc64/sparc64/pmap.c =================================================================== --- sparc64/sparc64/pmap.c (revision 218945) +++ sparc64/sparc64/pmap.c (working copy) @@ -698,13 +698,17 @@ { struct tte *tp; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; + PMAP_LOCK(pm); +retry: if (pm == kernel_pmap) { if (va >= VM_MIN_DIRECT_ADDRESS) { tp = NULL; m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va)); + (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va), &pa); vm_page_hold(m); } else { tp = tsb_kvtotte(va); @@ -712,17 +716,17 @@ tp = NULL; } } else { - PMAP_LOCK(pm); tp = tsb_tte_lookup(pm, va); } if (tp != NULL && ((tp->tte_data & TD_SW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa)) + goto retry; m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp)); vm_page_hold(m); } - vm_page_unlock_queues(); - if (pm != kernel_pmap) - PMAP_UNLOCK(pm); + PA_UNLOCK_COND(pa); + PMAP_UNLOCK(pm); return (m); } @@ -1097,7 +1101,7 @@ * Allocate an object for it. */ if (pm->pm_tsb_obj == NULL) - pm->pm_tsb_obj = vm_object_allocate(OBJT_DEFAULT, TSB_PAGES); + pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES); mtx_lock_spin(&sched_lock); for (i = 0; i < MAXCPU; i++) @@ -1161,16 +1165,10 @@ KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1")); while (!TAILQ_EMPTY(&obj->memq)) { m = TAILQ_FIRST(&obj->memq); - vm_page_lock_queues(); - if (vm_page_sleep_if_busy(m, FALSE, "pmaprl")) - continue; - KASSERT(m->hold_count == 0, - ("pmap_release: freeing held tsb page")); m->md.pmap = NULL; m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(m); - vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(obj); pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES); @@ -1255,7 +1253,7 @@ struct tte *tp; vm_offset_t va; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) { tpn = TAILQ_NEXT(tp, tte_link); if ((tp->tte_data & TD_PV) == 0) @@ -1278,6 +1276,7 @@ PMAP_UNLOCK(pm); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } int @@ -1287,13 +1286,10 @@ u_long data; vm_page_t m; - data = atomic_clear_long(&tp->tte_data, TD_REF | TD_SW | TD_W); - if ((data & TD_PV) != 0) { + data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W); + if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) { m = PHYS_TO_VM_PAGE(TD_PA(data)); - if ((data & TD_REF) != 0) - vm_page_flag_set(m, PG_REFERENCED); - if ((data & TD_W) != 0) - vm_page_dirty(m); + vm_page_dirty(m); } return (1); } @@ -1368,6 +1364,9 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pm, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0 || VM_OBJECT_LOCKED(m->object), + ("pmap_enter_locked: page %p is not busy", m)); PMAP_STATS_INC(pmap_nenter); pa = VM_PAGE_TO_PHYS(m); @@ -1424,7 +1423,8 @@ tp->tte_data |= TD_SW; if (wired) tp->tte_data |= TD_W; - vm_page_flag_set(m, PG_WRITEABLE); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) + vm_page_flag_set(m, PG_WRITEABLE); } else if ((data & TD_W) != 0) vm_page_dirty(m); @@ -1444,7 +1444,7 @@ } else { /* * If there is an existing mapping, but its for a different - * phsyical address, delete the old mapping. + * physical address, delete the old mapping. */ if (tp != NULL) { CTR0(KTR_PMAP, "pmap_enter_locked: replace"); @@ -1464,7 +1464,8 @@ data |= TD_P; if ((prot & VM_PROT_WRITE) != 0) { data |= TD_SW; - vm_page_flag_set(m, PG_WRITEABLE); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) + vm_page_flag_set(m, PG_WRITEABLE); } if (prot & VM_PROT_EXECUTE) { data |= TD_EXEC; @@ -1507,12 +1508,14 @@ psize = atop(end - start); m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pm); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { pmap_enter_locked(pm, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); m = TAILQ_NEXT(m, listq); } + vm_page_unlock_queues(); PMAP_UNLOCK(pm); } @@ -1520,9 +1523,11 @@ pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot) { + vm_page_lock_queues(); PMAP_LOCK(pm); pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); + vm_page_unlock_queues(); PMAP_UNLOCK(pm); } @@ -1799,20 +1804,25 @@ { struct tte *tp; int loops; + boolean_t rv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (FALSE); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_page_exists_quick: page %p is not managed", m)); loops = 0; + rv = FALSE; + vm_page_lock_queues(); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { if ((tp->tte_data & TD_PV) == 0) continue; - if (TTE_GET_PMAP(tp) == pm) - return (TRUE); + if (TTE_GET_PMAP(tp) == pm) { + rv = TRUE; + break; + } if (++loops >= 16) break; } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -1828,10 +1838,11 @@ count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED)) count++; + vm_page_unlock_queues(); return (count); } @@ -1853,14 +1864,19 @@ pmap_page_is_mapped(vm_page_t m) { struct tte *tp; + boolean_t rv; + rv = FALSE; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (FALSE); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + return (rv); + vm_page_lock_queues(); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) - if ((tp->tte_data & TD_PV) != 0) - return (TRUE); - return (FALSE); + if ((tp->tte_data & TD_PV) != 0) { + rv = TRUE; + break; + } + vm_page_unlock_queues(); + return (rv); } /* @@ -1882,10 +1898,10 @@ u_long data; int count; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (0); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); count = 0; + vm_page_lock_queues(); if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) { tpf = tp; do { @@ -1899,6 +1915,7 @@ break; } while ((tp = tpn) != NULL && tp != tpf); } + vm_page_unlock_queues(); return (count); } @@ -1906,17 +1923,32 @@ pmap_is_modified(vm_page_t m) { struct tte *tp; + boolean_t rv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (FALSE); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_modified: page %p is not managed", m)); + rv = FALSE; + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no TTEs can have TD_W set. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) + return (rv); + vm_page_lock_queues(); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { if ((tp->tte_data & TD_PV) == 0) continue; - if ((tp->tte_data & TD_W) != 0) - return (TRUE); + if ((tp->tte_data & TD_W) != 0) { + rv = TRUE; + break; + } } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -1936,15 +1968,52 @@ return (rv); } +/* + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + struct tte *tp; + boolean_t rv; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); + TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { + if ((tp->tte_data & TD_PV) == 0) + continue; + if ((tp->tte_data & TD_REF) != 0) { + rv = TRUE; + break; + } + } + vm_page_unlock_queues(); + return (rv); +} + void pmap_clear_modify(vm_page_t m) { struct tte *tp; u_long data; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("pmap_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no TTEs can have TD_W set. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) return; + vm_page_lock_queues(); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { if ((tp->tte_data & TD_PV) == 0) continue; @@ -1952,6 +2021,7 @@ if ((data & TD_W) != 0) tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp)); } + vm_page_unlock_queues(); } void @@ -1960,9 +2030,9 @@ struct tte *tp; u_long data; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return; + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_reference: page %p is not managed", m)); + vm_page_lock_queues(); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { if ((tp->tte_data & TD_PV) == 0) continue; @@ -1970,6 +2040,7 @@ if ((data & TD_REF) != 0) tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp)); } + vm_page_unlock_queues(); } void @@ -1978,10 +2049,19 @@ struct tte *tp; u_long data; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; + vm_page_lock_queues(); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { if ((tp->tte_data & TD_PV) == 0) continue; @@ -1992,10 +2072,11 @@ } } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } int -pmap_mincore(pmap_t pm, vm_offset_t addr) +pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa) { /* TODO; */ Index: nfsclient/nfs_bio.c =================================================================== --- nfsclient/nfs_bio.c (revision 218945) +++ nfsclient/nfs_bio.c (working copy) @@ -131,12 +131,13 @@ */ VM_OBJECT_LOCK(object); if (pages[ap->a_reqpage]->valid != 0) { - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (0); } @@ -171,12 +172,13 @@ if (error && (uio.uio_resid == count)) { nfs_printf("nfs_getpages: error %d\n", error); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); } @@ -189,7 +191,6 @@ size = count - uio.uio_resid; VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -232,17 +233,23 @@ * now tell them that it is ok to use. */ if (!error) { - if (m->oflags & VPO_WANTED) + if (m->oflags & VPO_WANTED) { + vm_page_lock(m); vm_page_activate(m); - else + vm_page_unlock(m); + } else { + vm_page_lock(m); vm_page_deactivate(m); + vm_page_unlock(m); + } vm_page_wakeup(m); } else { + vm_page_lock(m); vm_page_free(m); + vm_page_unlock(m); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (0); } Index: ufs/ffs/ffs_vnops.c =================================================================== --- ufs/ffs/ffs_vnops.c (revision 218945) +++ ufs/ffs/ffs_vnops.c (working copy) @@ -859,13 +859,13 @@ if (mreq->valid) { if (mreq->valid != VM_PAGE_BITS_ALL) vm_page_zero_invalid(mreq, TRUE); - vm_page_lock_queues(); for (i = 0; i < pcount; i++) { if (i != ap->a_reqpage) { + vm_page_lock(ap->a_m[i]); vm_page_free(ap->a_m[i]); + vm_page_unlock(ap->a_m[i]); } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(mreq->object); return VM_PAGER_OK; } Index: kern/uipc_syscalls.c =================================================================== --- kern/uipc_syscalls.c (revision 218945) +++ kern/uipc_syscalls.c (working copy) @@ -1715,7 +1715,7 @@ m = sf_buf_page(args); sf_buf_free(args); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); /* * Check for the object going away on us. This can @@ -1724,7 +1724,7 @@ */ if (m->wire_count == 0 && m->object == NULL) vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); if (addr == NULL) return; sfs = addr; @@ -2108,7 +2108,7 @@ mbstat.sf_iocnt++; } if (error) { - vm_page_lock_queues(); + vm_page_lock(pg); vm_page_unwire(pg, 0); /* * See if anyone else might know about @@ -2117,10 +2117,9 @@ */ if (pg->wire_count == 0 && pg->valid == 0 && pg->busy == 0 && !(pg->oflags & VPO_BUSY) && - pg->hold_count == 0) { + pg->hold_count == 0) vm_page_free(pg); - } - vm_page_unlock_queues(); + vm_page_unlock(pg); VM_OBJECT_UNLOCK(obj); if (error == EAGAIN) error = 0; /* not a real error */ @@ -2140,14 +2139,11 @@ SFB_CATCH); if (sf == NULL) { mbstat.sf_allocfail++; - vm_page_lock_queues(); + vm_page_lock(pg); vm_page_unwire(pg, 0); - /* - * XXX: Not same check as above!? - */ - if (pg->wire_count == 0 && pg->object == NULL) - vm_page_free(pg); - vm_page_unlock_queues(); + KASSERT(pg->object != NULL, + ("kern_sendfile: object disappeared")); + vm_page_unlock(pg); if (m == NULL) error = (mnw ? EAGAIN : EINTR); break; Index: kern/vfs_bio.c =================================================================== --- kern/vfs_bio.c (revision 218945) +++ kern/vfs_bio.c (working copy) @@ -1360,9 +1360,7 @@ (PAGE_SIZE - poffset) : resid; KASSERT(presid >= 0, ("brelse: extra page")); - vm_page_lock_queues(); vm_page_set_invalid(m, poffset, presid); - vm_page_unlock_queues(); if (had_bogus) printf("avoided corruption bug in bogus_page/brelse code\n"); } @@ -1603,7 +1601,6 @@ vm_page_t m; VM_OBJECT_LOCK(bp->b_bufobj->bo_object); - vm_page_lock_queues(); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; bp->b_pages[i] = NULL; @@ -1611,16 +1608,15 @@ * In order to keep page LRU ordering consistent, put * everything on the inactive queue. */ + vm_page_lock(m); vm_page_unwire(m, 0); /* * We don't mess with busy pages, it is * the responsibility of the process that * busied the pages to deal with them. */ - if ((m->oflags & VPO_BUSY) || (m->busy != 0)) - continue; - - if (m->wire_count == 0) { + if ((m->oflags & VPO_BUSY) == 0 && m->busy == 0 && + m->wire_count == 0) { /* * Might as well free the page if we can and it has * no valid data. We also free the page if the @@ -1635,8 +1631,8 @@ vm_page_try_to_cache(m); } } + vm_page_unlock(m); } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); @@ -2470,7 +2466,6 @@ VM_OBJECT_LOCK(bp->b_bufobj->bo_object); vfs_drain_busy_pages(bp); vfs_setdirty_locked_object(bp); - vm_page_lock_queues(); for (i = 0; i < bp->b_npages; i++) { noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; eoff = noff; @@ -2481,7 +2476,6 @@ /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */ foff = noff; } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); } @@ -2502,7 +2496,6 @@ vm_offset_t boffset; vm_offset_t eoffset; - vm_page_lock_queues(); /* * test the pages to see if they have been modified directly * by users through the VM system. @@ -2528,7 +2521,6 @@ } eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK); - vm_page_unlock_queues(); /* * Fit it to the buffer. */ @@ -2996,7 +2988,6 @@ vm_page_t m; VM_OBJECT_LOCK(bp->b_bufobj->bo_object); - vm_page_lock_queues(); for (i = desiredpages; i < bp->b_npages; i++) { /* * the page is not freed here -- it @@ -3006,13 +2997,15 @@ m = bp->b_pages[i]; KASSERT(m != bogus_page, ("allocbuf: bogus page found")); - while (vm_page_sleep_if_busy(m, TRUE, "biodep")) - vm_page_lock_queues(); + while (vm_page_sleep_if_busy(m, TRUE, + "biodep")) + continue; bp->b_pages[i] = NULL; + vm_page_lock(m); vm_page_unwire(m, 0); + vm_page_unlock(m); } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); pmap_qremove((vm_offset_t) trunc_page((vm_offset_t)bp->b_data) + (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); @@ -3078,15 +3071,24 @@ * vm_fault->getpages->cluster_read->allocbuf * */ - if (vm_page_sleep_if_busy(m, FALSE, "pgtblk")) + if ((m->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking + * and sleeping so that the page daemon + * is less likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(m, PG_REFERENCED); + vm_page_sleep(m, "pgtblk"); continue; + } /* * We have a good page. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wire(m); - vm_page_unlock_queues(); + vm_page_unlock(m); bp->b_pages[bp->b_npages] = m; ++bp->b_npages; } @@ -3561,7 +3563,6 @@ { vm_ooffset_t soff, eoff; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); /* * Start and end offsets in buffer. eoff - soff may not cross a * page boundry or cross the end of the buffer. The end of the @@ -3643,8 +3644,6 @@ if (bp->b_bufsize != 0) vfs_setdirty_locked_object(bp); bogus = 0; - if (clear_modify) - vm_page_lock_queues(); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; @@ -3677,8 +3676,6 @@ } foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; } - if (clear_modify) - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(obj); if (bogus) pmap_qenter(trunc_page((vm_offset_t)bp->b_data), @@ -3892,12 +3889,12 @@ retry: if (vm_fault_quick(addr >= bp->b_data ? addr : bp->b_data, prot) < 0) { - vm_page_lock_queues(); for (i = 0; i < pidx; ++i) { + vm_page_lock(bp->b_pages[i]); vm_page_unhold(bp->b_pages[i]); + vm_page_unlock(bp->b_pages[i]); bp->b_pages[i] = NULL; } - vm_page_unlock_queues(); return(-1); } m = pmap_extract_and_hold(pmap, (vm_offset_t)addr, prot); @@ -3928,11 +3925,12 @@ npages = bp->b_npages; pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages); - vm_page_lock_queues(); - for (pidx = 0; pidx < npages; pidx++) + for (pidx = 0; pidx < npages; pidx++) { + vm_page_lock(bp->b_pages[pidx]); vm_page_unhold(bp->b_pages[pidx]); - vm_page_unlock_queues(); - + vm_page_unlock(bp->b_pages[pidx]); + } + bp->b_data = bp->b_saveaddr; } Index: kern/kern_subr.c =================================================================== --- kern/kern_subr.c (revision 218945) +++ kern/kern_subr.c (working copy) @@ -107,9 +107,10 @@ if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) goto retry; - vm_page_lock_queues(); + vm_page_lock(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); + vm_page_unlock(user_pg); } else { /* * Even if a physical page does not exist in the @@ -118,11 +119,9 @@ */ if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); - vm_page_lock_queues(); } vm_page_insert(kern_pg, uobject, upindex); vm_page_dirty(kern_pg); - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); Index: kern/kern_exec.c =================================================================== --- kern/kern_exec.c (revision 218945) +++ kern/kern_exec.c (working copy) @@ -944,18 +944,18 @@ rv = vm_pager_get_pages(object, ma, initial_pagein, 0); ma[0] = vm_page_lookup(object, 0); if ((rv != VM_PAGER_OK) || (ma[0] == NULL)) { - if (ma[0]) { - vm_page_lock_queues(); + if (ma[0] != NULL) { + vm_page_lock(ma[0]); vm_page_free(ma[0]); - vm_page_unlock_queues(); + vm_page_unlock(ma[0]); } VM_OBJECT_UNLOCK(object); return (EIO); } } - vm_page_lock_queues(); + vm_page_lock(ma[0]); vm_page_hold(ma[0]); - vm_page_unlock_queues(); + vm_page_unlock(ma[0]); vm_page_wakeup(ma[0]); VM_OBJECT_UNLOCK(object); @@ -975,9 +975,9 @@ m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } } Index: kern/uipc_shm.c =================================================================== --- kern/uipc_shm.c (revision 218945) +++ kern/uipc_shm.c (working copy) @@ -304,9 +304,7 @@ */ base = roundup2(base, DEV_BSIZE); - vm_page_lock_queues(); vm_page_clear_dirty(m, base, PAGE_SIZE - base); - vm_page_unlock_queues(); } else if ((length & PAGE_MASK) && __predict_false(object->cache != NULL)) { vm_page_cache_free(object, OFF_TO_IDX(length), Index: kern/sys_pipe.c =================================================================== --- kern/sys_pipe.c (revision 218945) +++ kern/sys_pipe.c (working copy) @@ -761,16 +761,15 @@ return (EFAULT); for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { /* - * vm_fault_quick() can sleep. Consequently, - * vm_page_lock_queue() and vm_page_unlock_queue() - * should not be performed outside of this loop. + * vm_fault_quick() can sleep. */ race: if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { - vm_page_lock_queues(); - for (j = 0; j < i; j++) + for (j = 0; j < i; j++) { + vm_page_lock(wpipe->pipe_map.ms[j]); vm_page_unhold(wpipe->pipe_map.ms[j]); - vm_page_unlock_queues(); + vm_page_unlock(wpipe->pipe_map.ms[j]); + } return (EFAULT); } wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, @@ -810,11 +809,11 @@ int i; PIPE_LOCK_ASSERT(wpipe, MA_OWNED); - vm_page_lock_queues(); for (i = 0; i < wpipe->pipe_map.npages; i++) { + vm_page_lock(wpipe->pipe_map.ms[i]); vm_page_unhold(wpipe->pipe_map.ms[i]); + vm_page_unlock(wpipe->pipe_map.ms[i]); } - vm_page_unlock_queues(); wpipe->pipe_map.npages = 0; } Index: kern/uipc_cow.c =================================================================== --- kern/uipc_cow.c (revision 218945) +++ kern/uipc_cow.c (working copy) @@ -80,7 +80,7 @@ pp = sf_buf_page(sf); sf_buf_free(sf); /* remove COW mapping */ - vm_page_lock_queues(); + vm_page_lock(pp); vm_page_cowclear(pp); vm_page_unwire(pp, 0); /* @@ -90,7 +90,7 @@ */ if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); socow_stats.iodone++; } @@ -128,10 +128,10 @@ /* * set up COW */ - vm_page_lock_queues(); + vm_page_lock(pp); if (vm_page_cowsetup(pp) != 0) { vm_page_unhold(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); return (0); } @@ -140,14 +140,13 @@ */ vm_page_wire(pp); vm_page_unhold(pp); - vm_page_unlock_queues(); - + vm_page_unlock(pp); /* * Allocate an sf buf */ sf = sf_buf_alloc(pp, SFB_CATCH); - if (!sf) { - vm_page_lock_queues(); + if (sf == NULL) { + vm_page_lock(pp); vm_page_cowclear(pp); vm_page_unwire(pp, 0); /* @@ -157,7 +156,7 @@ */ if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); socow_stats.fail_sf_buf++; return(0); } Index: kern/sys_process.c =================================================================== --- kern/sys_process.c (revision 218945) +++ kern/sys_process.c (working copy) @@ -59,6 +59,7 @@ #include #include #include +#include #include #ifdef COMPAT_FREEBSD32 @@ -237,10 +238,10 @@ proc_rwmem(struct proc *p, struct uio *uio) { vm_map_t map; - vm_object_t backing_object, object = NULL; - vm_offset_t pageno = 0; /* page number */ + vm_object_t backing_object, object; + vm_offset_t pageno; /* page number */ vm_prot_t reqprot; - int error, fault_flags, writing; + int error, writing; /* * Assert that someone has locked this vmspace. (Should be @@ -256,9 +257,7 @@ map = &p->p_vmspace->vm_map; writing = uio->uio_rw == UIO_WRITE; - reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : - VM_PROT_READ; - fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; + reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ; /* * Only map in one page at a time. We don't have to, but it @@ -293,7 +292,7 @@ /* * Fault the page on behalf of the process */ - error = vm_fault(map, pageno, reqprot, fault_flags); + error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL); if (error) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; @@ -303,8 +302,8 @@ } /* - * Now we need to get the page. out_entry, out_prot, wired, - * and single_use aren't used. One would think the vm code + * Now we need to get the page. out_entry and wired + * aren't used. One would think the vm code * would be a *bit* nicer... We use tmap because * vm_map_lookup() can change the map argument. */ @@ -327,6 +326,10 @@ VM_OBJECT_UNLOCK(object); object = backing_object; } + if (writing && m != NULL) { + vm_page_dirty(m); + vm_pager_page_unswapped(m); + } VM_OBJECT_UNLOCK(object); if (m == NULL) { vm_map_lookup_done(tmap, out_entry); @@ -337,9 +340,9 @@ /* * Hold the page in memory. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_hold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); /* * We're done with tmap now. @@ -358,9 +361,9 @@ /* * Release the page. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } while (error == 0 && uio->uio_resid > 0); Index: kern/subr_witness.c =================================================================== --- kern/subr_witness.c (revision 218945) +++ kern/subr_witness.c (working copy) @@ -604,6 +604,15 @@ { "cdev", &lock_class_mtx_sleep }, { NULL, NULL }, /* + * VM + * + */ + { "vm object", &lock_class_mtx_sleep }, + { "page lock", &lock_class_mtx_sleep }, + { "vm page queue mutex", &lock_class_mtx_sleep }, + { "pmap", &lock_class_mtx_sleep }, + { NULL, NULL }, + /* * kqueue/VFS interaction */ { "kqueue", &lock_class_mtx_sleep }, Index: ia64/ia64/pmap.c =================================================================== --- ia64/ia64/pmap.c (revision 218945) +++ ia64/ia64/pmap.c (working copy) @@ -1051,18 +1051,22 @@ struct ia64_lpte *pte; pmap_t oldpmap; vm_page_t m; + vm_paddr_t pa; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); +retry: pte = pmap_find_vhpt(va); if (pte != NULL && pmap_present(pte) && (pmap_prot(pte) & prot) == prot) { m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); + if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa)) + goto retry; vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); pmap_switch(oldpmap); PMAP_UNLOCK(pmap); return (m); @@ -1411,15 +1415,9 @@ pmap_t oldpmap; pv_entry_t pv; -#if defined(DIAGNOSTIC) - /* - * XXX This makes pmap_remove_all() illegal for non-managed pages! - */ - if (m->flags & PG_FICTITIOUS) { - panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m)); - } -#endif - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & PG_FICTITIOUS) == 0, + ("pmap_remove_all: page %p is fictitious", m)); + vm_page_lock_queues(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { struct ia64_lpte *pte; pmap_t pmap = pv->pv_pmap; @@ -1436,6 +1434,7 @@ PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } /* @@ -1473,19 +1472,13 @@ if (pmap_prot(pte) == prot) continue; - if (pmap_managed(pte)) { - vm_offset_t pa = pmap_ppn(pte); + if ((prot & VM_PROT_WRITE) == 0 && + pmap_managed(pte) && pmap_dirty(pte)) { + vm_paddr_t pa = pmap_ppn(pte); vm_page_t m = PHYS_TO_VM_PAGE(pa); - if (pmap_dirty(pte)) { - vm_page_dirty(m); - pmap_clear_dirty(pte); - } - - if (pmap_accessed(pte)) { - vm_page_flag_set(m, PG_REFERENCED); - pmap_clear_accessed(pte); - } + vm_page_dirty(m); + pmap_clear_dirty(pte); } if (prot & VM_PROT_EXECUTE) @@ -1527,10 +1520,10 @@ oldpmap = pmap_switch(pmap); va &= ~PAGE_MASK; -#ifdef DIAGNOSTIC - if (va > VM_MAX_KERNEL_ADDRESS) - panic("pmap_enter: toobig"); -#endif + KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0, + ("pmap_enter: page %p is not busy", m)); /* * Find (or create) a pte for the given mapping. @@ -1626,7 +1619,7 @@ if (icache_inval) ia64_sync_icache(va, PAGE_SIZE); - if ((prot & VM_PROT_WRITE) != 0) + if ((prot & VM_PROT_WRITE) != 0 && managed) vm_page_flag_set(m, PG_WRITEABLE); vm_page_unlock_queues(); pmap_switch(oldpmap); @@ -1656,12 +1649,14 @@ VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); psize = atop(end - start); m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot); m = TAILQ_NEXT(m, listq); } + vm_page_unlock_queues(); pmap_switch(oldpmap); PMAP_UNLOCK(pmap); } @@ -1680,9 +1675,11 @@ { pmap_t oldpmap; + vm_page_lock_queues(); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); pmap_enter_quick_locked(pmap, va, m, prot); + vm_page_unlock_queues(); pmap_switch(oldpmap); PMAP_UNLOCK(pmap); } @@ -1864,23 +1861,23 @@ { pv_entry_t pv; int loops = 0; + boolean_t rv; - if (m->flags & PG_FICTITIOUS) - return FALSE; - - /* - * Not found, check current mappings returning immediately if found. - */ - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_page_exists_quick: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (pv->pv_pmap == pmap) { - return TRUE; + rv = TRUE; + break; } loops++; if (loops >= 16) break; } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -1900,7 +1897,7 @@ count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = pv->pv_pmap; PMAP_LOCK(pmap); @@ -1912,6 +1909,7 @@ pmap_switch(oldpmap); PMAP_UNLOCK(pmap); } + vm_page_unlock_queues(); return (count); } @@ -1975,9 +1973,9 @@ pv_entry_t pv; int count = 0; - if (m->flags & PG_FICTITIOUS) - return 0; - + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { PMAP_LOCK(pv->pv_pmap); oldpmap = pmap_switch(pv->pv_pmap); @@ -1991,8 +1989,8 @@ pmap_switch(oldpmap); PMAP_UNLOCK(pv->pv_pmap); } - - return count; + vm_page_unlock_queues(); + return (count); } /* @@ -2009,10 +2007,20 @@ pv_entry_t pv; boolean_t rv; + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_modified: page %p is not managed", m)); rv = FALSE; - if (m->flags & PG_FICTITIOUS) + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no PTEs can be dirty. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) return (rv); - + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { PMAP_LOCK(pv->pv_pmap); oldpmap = pmap_switch(pv->pv_pmap); @@ -2024,7 +2032,7 @@ if (rv) break; } - + vm_page_unlock_queues(); return (rv); } @@ -2046,6 +2054,39 @@ } /* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + struct ia64_lpte *pte; + pmap_t oldpmap; + pv_entry_t pv; + boolean_t rv; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + PMAP_LOCK(pv->pv_pmap); + oldpmap = pmap_switch(pv->pv_pmap); + pte = pmap_find_vhpt(pv->pv_va); + pmap_switch(oldpmap); + KASSERT(pte != NULL, ("pte")); + rv = pmap_accessed(pte) ? TRUE : FALSE; + PMAP_UNLOCK(pv->pv_pmap); + if (rv) + break; + } + vm_page_unlock_queues(); + return (rv); +} + +/* * Clear the modify bits on the specified physical page. */ void @@ -2055,9 +2096,20 @@ pmap_t oldpmap; pv_entry_t pv; - if (m->flags & PG_FICTITIOUS) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("pmap_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no PTEs can be modified. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) return; - + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { PMAP_LOCK(pv->pv_pmap); oldpmap = pmap_switch(pv->pv_pmap); @@ -2070,6 +2122,7 @@ pmap_switch(oldpmap); PMAP_UNLOCK(pv->pv_pmap); } + vm_page_unlock_queues(); } /* @@ -2084,9 +2137,9 @@ pmap_t oldpmap; pv_entry_t pv; - if (m->flags & PG_FICTITIOUS) - return; - + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_reference: page %p is not managed", m)); + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { PMAP_LOCK(pv->pv_pmap); oldpmap = pmap_switch(pv->pv_pmap); @@ -2099,6 +2152,7 @@ pmap_switch(oldpmap); PMAP_UNLOCK(pv->pv_pmap); } + vm_page_unlock_queues(); } /* @@ -2112,10 +2166,19 @@ pv_entry_t pv; vm_prot_t prot; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & PG_FICTITIOUS) != 0 || + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = pv->pv_pmap; PMAP_LOCK(pmap); @@ -2136,6 +2199,7 @@ PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } /* @@ -2165,13 +2229,15 @@ * perform the pmap work for mincore */ int -pmap_mincore(pmap_t pmap, vm_offset_t addr) +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pmap_t oldpmap; struct ia64_lpte *pte, tpte; - int val = 0; + vm_paddr_t pa; + int val; PMAP_LOCK(pmap); +retry: oldpmap = pmap_switch(pmap); pte = pmap_find_vhpt(addr); if (pte != NULL) { @@ -2179,55 +2245,27 @@ pte = &tpte; } pmap_switch(oldpmap); + if (pte == NULL || !pmap_present(pte)) { + val = 0; + goto out; + } + val = MINCORE_INCORE; + if (pmap_dirty(pte)) + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + if (pmap_accessed(pte)) + val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; + if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != + (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && + pmap_managed(pte)) { + pa = pmap_ppn(pte); + /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ + if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) + goto retry; + } else +out: + PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); - - if (pte == NULL) - return 0; - - if (pmap_present(pte)) { - vm_page_t m; - vm_offset_t pa; - - val = MINCORE_INCORE; - if (!pmap_managed(pte)) - return val; - - pa = pmap_ppn(pte); - - m = PHYS_TO_VM_PAGE(pa); - - /* - * Modified by us - */ - if (pmap_dirty(pte)) - val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; - else { - /* - * Modified by someone - */ - vm_page_lock_queues(); - if (pmap_is_modified(m)) - val |= MINCORE_MODIFIED_OTHER; - vm_page_unlock_queues(); - } - /* - * Referenced by us - */ - if (pmap_accessed(pte)) - val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; - else { - /* - * Referenced by someone - */ - vm_page_lock_queues(); - if (pmap_ts_referenced(m)) { - val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } - vm_page_unlock_queues(); - } - } - return val; + return (val); } void Index: fs/nfsclient/nfs_clbio.c =================================================================== --- fs/nfsclient/nfs_clbio.c (revision 218945) +++ fs/nfsclient/nfs_clbio.c (working copy) @@ -134,12 +134,13 @@ */ VM_OBJECT_LOCK(object); if (pages[ap->a_reqpage]->valid != 0) { - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (0); } @@ -174,12 +175,13 @@ if (error && (uio.uio_resid == count)) { ncl_printf("nfs_getpages: error %d\n", error); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); } @@ -192,7 +194,6 @@ size = count - uio.uio_resid; VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -235,17 +236,23 @@ * now tell them that it is ok to use. */ if (!error) { - if (m->oflags & VPO_WANTED) + if (m->oflags & VPO_WANTED) { + vm_page_lock(m); vm_page_activate(m); - else + vm_page_unlock(m); + } else { + vm_page_lock(m); vm_page_deactivate(m); + vm_page_unlock(m); + } vm_page_wakeup(m); } else { + vm_page_lock(m); vm_page_free(m); + vm_page_unlock(m); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (0); } Index: fs/tmpfs/tmpfs_vnops.c =================================================================== --- fs/tmpfs/tmpfs_vnops.c (revision 218945) +++ fs/tmpfs/tmpfs_vnops.c (working copy) @@ -460,9 +460,9 @@ error = uiomove_fromphys(&m, offset, tlen, uio); VM_OBJECT_LOCK(tobj); out: - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, TRUE); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); vm_object_pip_subtract(tobj, 1); VM_OBJECT_UNLOCK(tobj); @@ -515,8 +515,16 @@ lookupvpg: if (((m = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(m, offset, tlen)) { - if (vm_page_sleep_if_busy(m, FALSE, "tmfsmr")) + if ((m->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and sleeping so + * that the page daemon is less likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(m, PG_REFERENCED); + vm_page_sleep(m, "tmfsmr"); goto lookupvpg; + } vm_page_busy(m); VM_OBJECT_UNLOCK(vobj); error = uiomove_fromphys(&m, offset, tlen, uio); @@ -527,8 +535,16 @@ } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { KASSERT(offset == 0, ("unexpected offset in tmpfs_mappedread for sendfile")); - if (vm_page_sleep_if_busy(m, FALSE, "tmfsmr")) + if ((m->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and sleeping so + * that the page daemon is less likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(m, PG_REFERENCED); + vm_page_sleep(m, "tmfsmr"); goto lookupvpg; + } vm_page_busy(m); VM_OBJECT_UNLOCK(vobj); sched_pin(); @@ -630,12 +646,18 @@ lookupvpg: if (((vpg = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(vpg, offset, tlen)) { - if (vm_page_sleep_if_busy(vpg, FALSE, "tmfsmw")) + if ((vpg->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and sleeping so + * that the page daemon is less likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(vpg, PG_REFERENCED); + vm_page_sleep(vpg, "tmfsmw"); goto lookupvpg; + } vm_page_busy(vpg); - vm_page_lock_queues(); vm_page_undirty(vpg); - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(vobj); error = uiomove_fromphys(&vpg, offset, tlen, uio); } else { @@ -668,14 +690,14 @@ out: if (vobj != NULL) VM_OBJECT_LOCK(vobj); - vm_page_lock_queues(); if (error == 0) { KASSERT(tpg->valid == VM_PAGE_BITS_ALL, ("parts of tpg invalid")); vm_page_dirty(tpg); } + vm_page_lock(tpg); vm_page_unwire(tpg, TRUE); - vm_page_unlock_queues(); + vm_page_unlock(tpg); vm_page_wakeup(tpg); if (vpg != NULL) vm_page_wakeup(vpg); Index: fs/smbfs/smbfs_io.c =================================================================== --- fs/smbfs/smbfs_io.c (revision 218945) +++ fs/smbfs/smbfs_io.c (working copy) @@ -456,12 +456,13 @@ VM_OBJECT_LOCK(object); if (m->valid != 0) { - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return 0; } @@ -494,19 +495,19 @@ VM_OBJECT_LOCK(object); if (error && (uio.uio_resid == count)) { printf("smbfs_getpages: error %d\n",error); - vm_page_lock_queues(); for (i = 0; i < npages; i++) { - if (reqpage != i) + if (reqpage != i) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return VM_PAGER_ERROR; } size = count - uio.uio_resid; - vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -550,17 +551,23 @@ * now tell them that it is ok to use. */ if (!error) { - if (m->oflags & VPO_WANTED) + if (m->oflags & VPO_WANTED) { + vm_page_lock(m); vm_page_activate(m); - else + vm_page_unlock(m); + } else { + vm_page_lock(m); vm_page_deactivate(m); + vm_page_unlock(m); + } vm_page_wakeup(m); } else { + vm_page_lock(m); vm_page_free(m); + vm_page_unlock(m); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return 0; #endif /* SMBFS_RWGENERIC */ @@ -650,12 +657,10 @@ if (!error) { int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; - vm_page_lock_queues(); for (i = 0; i < nwritten; i++) { rtvals[i] = VM_PAGER_OK; vm_page_undirty(pages[i]); } - vm_page_unlock_queues(); } return rtvals[0]; #endif /* SMBFS_RWGENERIC */ Index: fs/nwfs/nwfs_io.c =================================================================== --- fs/nwfs/nwfs_io.c (revision 218945) +++ fs/nwfs/nwfs_io.c (working copy) @@ -443,19 +443,19 @@ VM_OBJECT_LOCK(object); if (error && (uio.uio_resid == count)) { printf("nwfs_getpages: error %d\n",error); - vm_page_lock_queues(); for (i = 0; i < npages; i++) { - if (ap->a_reqpage != i) + if (ap->a_reqpage != i) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return VM_PAGER_ERROR; } size = count - uio.uio_resid; - vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -486,17 +486,23 @@ * now tell them that it is ok to use. */ if (!error) { - if (m->oflags & VPO_WANTED) + if (m->oflags & VPO_WANTED) { + vm_page_lock(m); vm_page_activate(m); - else + vm_page_unlock(m); + } else { + vm_page_lock(m); vm_page_deactivate(m); + vm_page_unlock(m); + } vm_page_wakeup(m); } else { + vm_page_lock(m); vm_page_free(m); + vm_page_unlock(m); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return 0; #endif /* NWFS_RWCACHE */ @@ -580,12 +586,10 @@ if (!error) { int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; - vm_page_lock_queues(); for (i = 0; i < nwritten; i++) { rtvals[i] = VM_PAGER_OK; vm_page_undirty(pages[i]); } - vm_page_unlock_queues(); } return rtvals[0]; #endif /* NWFS_RWCACHE */ Index: mips/mips/pmap.c =================================================================== --- mips/mips/pmap.c (revision 218945) +++ mips/mips/pmap.c (working copy) @@ -801,17 +801,22 @@ { pt_entry_t pte; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: pte = *pmap_pte(pmap, va); if (pte != 0 && pte_test(&pte, PTE_V) && (pte_test(&pte, PTE_D) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, TLBLO_PTE_TO_PA(pte), &pa)) + goto retry; + m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(pte)); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -2654,13 +2659,23 @@ vm_offset_t va; pt_entry_t *pte; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & PG_WRITEABLE) == 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) return; /* * Loop over all current mappings setting/clearing as appropos. */ + vm_page_lock_queues(); for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) { npv = TAILQ_NEXT(pv, pv_plist); pte = pmap_pte(pv->pv_pmap, pv->pv_va); @@ -2672,6 +2687,7 @@ VM_PROT_READ | VM_PROT_EXECUTE); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } /* @@ -2683,11 +2699,12 @@ pmap_ts_referenced(vm_page_t m) { - if (m->flags & PG_FICTITIOUS) - return (0); - + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); if (m->md.pv_flags & PV_TABLE_REF) { + vm_page_lock_queues(); m->md.pv_flags &= ~PV_TABLE_REF; + vm_page_unlock_queues(); return (1); } return (0); @@ -2702,14 +2719,27 @@ boolean_t pmap_is_modified(vm_page_t m) { + boolean_t rv; - if (m->flags & PG_FICTITIOUS) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_modified: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no PTEs can have PTE_M set. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) return (FALSE); - + vm_page_lock_queues(); if (m->md.pv_flags & PV_TABLE_MOD) - return (TRUE); + rv = TRUE; else - return (pmap_testbit(m, PTE_D)); + rv = pmap_testbit(m, PTE_D); + vm_page_unlock_queues(); + return (rv); } /* N/C */ @@ -2744,16 +2774,44 @@ void pmap_clear_modify(vm_page_t m) { - if (m->flags & PG_FICTITIOUS) + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("pmap_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no PTEs can have PTE_M set. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); if (m->md.pv_flags & PV_TABLE_MOD) { pmap_changebit(m, PTE_D, FALSE); m->md.pv_flags &= ~PV_TABLE_MOD; } + vm_page_unlock_queues(); } /* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + return ((m->md.pv_flags & PV_TABLE_REF) != 0); +} + +/* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. @@ -2761,13 +2819,14 @@ void pmap_clear_reference(vm_page_t m) { - if (m->flags & PG_FICTITIOUS) - return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_reference: page %p is not managed", m)); + vm_page_lock_queues(); if (m->md.pv_flags & PV_TABLE_REF) { m->md.pv_flags &= ~PV_TABLE_REF; } + vm_page_unlock_queues(); } /* @@ -2842,51 +2901,47 @@ * perform the pmap work for mincore */ int -pmap_mincore(pmap_t pmap, vm_offset_t addr) +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pt_entry_t *ptep, pte; + vm_offset_t pa; vm_page_t m; - int val = 0; + int val; + boolean_t managed; PMAP_LOCK(pmap); +retry: ptep = pmap_pte(pmap, addr); pte = (ptep != NULL) ? *ptep : 0; - PMAP_UNLOCK(pmap); - - if (pte_test(&pte, PTE_V)) { - vm_offset_t pa; - - val = MINCORE_INCORE; - pa = TLBLO_PTE_TO_PA(pte); - if (!page_is_managed(pa)) - return (val); - + if (!pte_test(&pte, PTE_V)) { + val = 0; + goto out; + } + val = MINCORE_INCORE; + if (pte_test(&pte, PTE_D)) + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + pa = TLBLO_PTE_TO_PA(pte); + managed = page_is_managed(pa); + if (managed) { + /* + * This may falsely report the given address as + * MINCORE_REFERENCED. Unfortunately, due to the lack of + * per-PTE reference information, it is impossible to + * determine if the address is MINCORE_REFERENCED. + */ m = PHYS_TO_VM_PAGE(pa); - - /* - * Modified by us - */ - if (pte_test(&pte, PTE_D)) - val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; - /* - * Modified by someone - */ - else { - vm_page_lock_queues(); - if (m->dirty || pmap_is_modified(m)) - val |= MINCORE_MODIFIED_OTHER; - vm_page_unlock_queues(); - } - /* - * Referenced by us or someone - */ - vm_page_lock_queues(); - if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { + if ((m->flags & PG_REFERENCED) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } - vm_page_unlock_queues(); } + if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != + (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { + /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ + if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) + goto retry; + } else +out: + PA_UNLOCK_COND(*locked_pa); + PMAP_UNLOCK(pmap); return (val); } Index: dev/agp/agp.c =================================================================== --- dev/agp/agp.c (revision 218945) +++ dev/agp/agp.c (working copy) @@ -623,9 +623,9 @@ m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k)); if (k >= i) vm_page_wakeup(m); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(mem->am_obj); @@ -657,9 +657,9 @@ VM_OBJECT_LOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, atop(i)); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(mem->am_obj); Index: dev/agp/agp_i810.c =================================================================== --- dev/agp/agp_i810.c (revision 218945) +++ dev/agp/agp_i810.c (working copy) @@ -1012,10 +1012,10 @@ VM_OBJECT_LOCK(mem->am_obj); m = vm_page_lookup(mem->am_obj, 0); + vm_page_lock(m); + vm_page_unwire(m, 0); + vm_page_unlock(m); VM_OBJECT_UNLOCK(mem->am_obj); - vm_page_lock_queues(); - vm_page_unwire(m, 0); - vm_page_unlock_queues(); } else { contigfree(sc->argb_cursor, mem->am_size, M_AGP); sc->argb_cursor = NULL; Index: dev/ti/if_ti.c =================================================================== --- dev/ti/if_ti.c (revision 218945) +++ dev/ti/if_ti.c (working copy) @@ -1488,10 +1488,8 @@ } sf[i] = sf_buf_alloc(frame, SFB_NOWAIT); if (sf[i] == NULL) { - vm_page_lock_queues(); vm_page_unwire(frame, 0); vm_page_free(frame); - vm_page_unlock_queues(); device_printf(sc->ti_dev, "buffer allocation " "failed -- packet dropped!\n"); printf(" index %d page %d\n", idx, i); Index: dev/md/md.c =================================================================== --- dev/md/md.c (revision 218945) +++ dev/md/md.c (working copy) @@ -667,14 +667,14 @@ sf_buf_free(sf); sched_unpin(); vm_page_wakeup(m); - vm_page_lock_queues(); + vm_page_lock(m); if (bp->bio_cmd == BIO_DELETE && len == PAGE_SIZE) vm_page_free(m); else vm_page_activate(m); + vm_page_unlock(m); if (bp->bio_cmd == BIO_WRITE) vm_page_dirty(m); - vm_page_unlock_queues(); /* Actions on further pages start at offset 0 */ p += PAGE_SIZE - offs; Index: dev/cxgb/ulp/tom/cxgb_vm.c =================================================================== --- dev/cxgb/ulp/tom/cxgb_vm.c (revision 218945) +++ dev/cxgb/ulp/tom/cxgb_vm.c (working copy) @@ -90,11 +90,9 @@ * (and R/W if for write) if so just mark pages as held (and * dirty if for write) and return */ - vm_page_lock_queues(); for (pages = mp, faults = 0, va = addr; va < end; va += PAGE_SIZE, pages++) { /* - * page queue mutex is recursable so this is OK * it would be really nice if we had an unlocked * version of this so we were only acquiring the * pmap lock 1 time as opposed to potentially @@ -110,11 +108,13 @@ * will never have the modified bit set if * they are only changed via DMA */ - if (prot & VM_PROT_WRITE) + if (prot & VM_PROT_WRITE) { + vm_page_lock_queues(); vm_page_dirty(m); + vm_page_unlock_queues(); + } } - vm_page_unlock_queues(); if (faults == 0) return (0); @@ -142,13 +142,13 @@ error: log(LOG_WARNING, "vm_fault bad return rv=%d va=0x%zx\n", rv, va); - vm_page_lock_queues(); for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) if (*pages) { + vm_page_lock(*pages); vm_page_unhold(*pages); + vm_page_unlock(*pages); *pages = NULL; } - vm_page_unlock_queues(); return (EFAULT); } @@ -157,10 +157,10 @@ { KASSERT(count >= 0, ("negative count %d", count)); - vm_page_lock_queues(); while (count--) { + vm_page_lock(*mp); vm_page_unhold(*mp); + vm_page_unlock(*mp); mp++; } - vm_page_unlock_queues(); } Property changes on: dev/xen/xenpci ___________________________________________________________________ Added: svn:mergeinfo Merged /head/sys/dev/xen/xenpci:r197750,198463,198721,198855,199490,199819,199869-199870,206823,206885,207155,207161,207163,207205,207210,207213,207262,207305,207308,207373-207374,207410,207412,207419,207437-207438,207448,207450-207452,207460,207519,207530-207531,207534-207535,207539-207541,207544,207548,207551-207552,207571,207573-207574,207576-207577,207584,207601,207617,207644,207649,207669,207694,207700,207702,207706,207708,207728,207738-207740,207746-207747,207752,207759,207796,207798,207805-207806,207822-207823,207846,207905,208175,208264,208278,208340,208504,208524,208532,208574,208609,208616,208645-208646,208651,208657,208659,208665,208667,208686-208688,208745,208764,208772,208791,208810,208846,208866,208990,209048,209173,209211,209226,209320-209321,209407,209610,209647-209648,209650-209651,209686,209702,211217,211958,212573,215471,215574,215610,215796,216333,216516,216555,216799,216899,217171,217177,217478-217479,218113,218773,218950 Index: sun4v/sun4v/pmap.c =================================================================== --- sun4v/sun4v/pmap.c (revision 218945) +++ sun4v/sun4v/pmap.c (working copy) @@ -189,6 +189,9 @@ static void free_pv_entry(pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap); +static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, + vm_offset_t va); static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); @@ -966,14 +969,33 @@ pmap_clear_modify(vm_page_t m) { KDPRINTF("pmap_clear_modify(0x%lx)\n", VM_PAGE_TO_PHYS(m)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("pmap_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no TTEs can have VTD_W set. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) + return; + vm_page_lock_queues(); tte_clear_phys_bit(m, VTD_W); + vm_page_unlock_queues(); } void pmap_clear_reference(vm_page_t m) { KDPRINTF("pmap_clear_reference(0x%lx)\n", VM_PAGE_TO_PHYS(m)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_reference: page %p is not managed", m)); + vm_page_lock_queues(); tte_clear_phys_bit(m, VTD_REF); + vm_page_unlock_queues(); } void @@ -1058,9 +1080,13 @@ { vm_paddr_t pa, opa; uint64_t tte_data, otte_data; + pv_entry_t pv; vm_page_t om; int invlva; + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0, + ("pmap_enter: page %p is not busy", m)); if (pmap->pm_context) DPRINTF("pmap_enter(va=%lx, pa=0x%lx, prot=%x)\n", va, VM_PAGE_TO_PHYS(m), prot); @@ -1082,6 +1108,7 @@ pmap_add_tte(pmap, va, m, &tte_data, wired); } else if (pa != opa) { + pv = NULL; /* * Mapping has changed, handle validating new mapping. * @@ -1091,10 +1118,23 @@ if (otte_data & VTD_MANAGED) { om = PHYS_TO_VM_PAGE(opa); - pmap_remove_entry(pmap, om, va); + pv = pmap_pvh_remove(&om->md, pmap, va); } - pmap_add_tte(pmap, va, m, &tte_data, wired); + if (wired) + pmap->pm_stats.wired_count++; + + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { + if (pv == NULL) + pv = get_pv_entry(pmap); + pv->pv_va = va; + pv->pv_pmap = pmap; + TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count++; + tte_data |= VTD_MANAGED; + } else if (pv != NULL) + free_pv_entry(pv); } else /* (pa == opa) */ { /* @@ -1112,10 +1152,6 @@ else if (!wired && (otte_data & VTD_WIRED)) pmap->pm_stats.wired_count--; - /* - * We might be turning off write access to the page, - * so we go ahead and sense modify status. - */ if (otte_data & VTD_MANAGED) { om = m; tte_data |= VTD_MANAGED; @@ -1127,7 +1163,8 @@ */ if ((prot & VM_PROT_WRITE) != 0) { tte_data |= VTD_SW_W; - vm_page_flag_set(m, PG_WRITEABLE); + if ((tte_data & VTD_MANAGED) != 0) + vm_page_flag_set(m, PG_WRITEABLE); } if ((prot & VM_PROT_EXECUTE) != 0) tte_data |= VTD_X; @@ -1151,6 +1188,9 @@ if ((pa & VTD_SW_W) != 0) invlva = TRUE; } + if ((otte_data & VTD_MANAGED) != 0 && + TAILQ_EMPTY(&om->md.pv_list)) + vm_page_flag_clear(om, PG_WRITEABLE); if (invlva) pmap_invalidate_page(pmap, va, TRUE); } @@ -1200,19 +1240,24 @@ VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); psize = atop(end - start); m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot); m = TAILQ_NEXT(m, listq); } + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { + + vm_page_lock_queues(); PMAP_LOCK(pmap); pmap_enter_quick_locked(pmap, va, m, prot); + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -1275,17 +1320,21 @@ { tte_t tte_data; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: tte_data = tte_hash_lookup(pmap->pm_hash, va); if (tte_data != 0 && ((tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, TTE_GET_PA(tte_data), &pa)) + goto retry; m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data)); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); @@ -1580,8 +1629,24 @@ boolean_t pmap_is_modified(vm_page_t m) { + boolean_t rv; - return (tte_get_phys_bit(m, VTD_W)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_modified: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no TTEs can have VTD_W set. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) + return (FALSE); + vm_page_lock_queues(); + rv = tte_get_phys_bit(m, VTD_W); + vm_page_unlock_queues(); + return (rv); } @@ -1592,6 +1657,23 @@ } /* + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + boolean_t rv; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + vm_page_lock_queues(); + rv = tte_get_phys_bit(m, VTD_REF); + vm_page_unlock_queues(); + return (rv); +} + +/* * Extract the physical page address associated with the given kernel virtual * address. */ @@ -1632,7 +1714,7 @@ } int -pmap_mincore(pmap_t pmap, vm_offset_t addr) +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { return (0); } @@ -1657,20 +1739,23 @@ { pv_entry_t pv; int loops = 0; + boolean_t rv; - if (m->flags & PG_FICTITIOUS) - return FALSE; - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_page_exists_quick: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (pv->pv_pmap == pmap) { - return TRUE; + rv = TRUE; + break; } loops++; if (loops >= 16) break; } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -1699,7 +1784,7 @@ count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = pv->pv_pmap; PMAP_LOCK(pmap); @@ -1708,6 +1793,7 @@ count++; PMAP_UNLOCK(pmap); } + vm_page_unlock_queues(); return (count); } @@ -1717,12 +1803,25 @@ void pmap_remove_write(vm_page_t m) { - if ((m->flags & PG_WRITEABLE) == 0) + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); tte_clear_phys_bit(m, VTD_SW_W|VTD_W); vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } + /* * Initialize the pmap associated with process 0. */ @@ -1813,17 +1912,10 @@ if (!anychanged && (otte_data & VTD_W)) anychanged = 1; - if (otte_data & VTD_MANAGED) { - m = NULL; - - if (otte_data & VTD_REF) { - m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data)); - vm_page_flag_set(m, PG_REFERENCED); - } - if (otte_data & VTD_W) { - m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data)); - vm_page_dirty(m); - } + if ((otte_data & (VTD_MANAGED | VTD_W)) == (VTD_MANAGED | + VTD_W)) { + m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data)); + vm_page_dirty(m); } } @@ -1948,7 +2040,7 @@ uint64_t tte_data; DPRINTF("pmap_remove_all 0x%lx\n", VM_PAGE_TO_PHYS(m)); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { PMAP_LOCK(pv->pv_pmap); pv->pv_pmap->pm_stats.resident_count--; @@ -1978,18 +2070,20 @@ free_pv_entry(pv); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } -static void -pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) +static pv_entry_t +pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; if (pmap != kernel_pmap) - DPRINTF("pmap_remove_entry(va=0x%lx, pa=0x%lx)\n", va, VM_PAGE_TO_PHYS(m)); + DPRINTF("pmap_pvh_remove(va=0x%lx, pa=0x%lx)\n", va, + VM_PAGE_TO_PHYS(member2struct(vm_page, md, pvh))); PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (m->md.pv_list_count < pmap->pm_stats.resident_count) { - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + if (pvh->pv_list_count < pmap->pm_stats.resident_count) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { if (pmap == pv->pv_pmap && va == pv->pv_va) break; } @@ -1999,13 +2093,33 @@ break; } } - KASSERT(pv != NULL, ("pmap_remove_entry: pv not found va=0x%lx pa=0x%lx", va, VM_PAGE_TO_PHYS(m))); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - m->md.pv_list_count--; + if (pv != NULL) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + pvh->pv_list_count--; + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); + } + return (pv); +} + +static void +pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) +{ + pv_entry_t pv; + + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pvh_free: pv not found va=0x%lx pa=0x%lx", + va, VM_PAGE_TO_PHYS(member2struct(vm_page, md, pvh)))); + free_pv_entry(pv); +} + +static void +pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) +{ + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); - free_pv_entry(pv); } @@ -2199,17 +2313,15 @@ int pmap_ts_referenced(vm_page_t m) { - int rv; pv_entry_t pv, pvf, pvn; pmap_t pmap; tte_t otte_data; + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); rv = 0; - if (m->flags & PG_FICTITIOUS) - return (rv); - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; @@ -2237,6 +2349,7 @@ PMAP_UNLOCK(pmap); } while ((pv = pvn) != NULL && pv != pvf); } + vm_page_unlock_queues(); return (rv); } Index: vm/vm_kern.c =================================================================== --- vm/vm_kern.c (revision 218945) +++ vm/vm_kern.c (working copy) @@ -401,10 +401,8 @@ i -= PAGE_SIZE; m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); - vm_page_lock_queues(); vm_page_unwire(m, 0); vm_page_free(m); - vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(kmem_object); vm_map_delete(map, addr, addr + size); Index: vm/vm_pageout.c =================================================================== --- vm/vm_pageout.c (revision 218945) +++ vm/vm_pageout.c (working copy) @@ -215,6 +215,17 @@ #endif static void vm_pageout_page_stats(void); +static void +vm_pageout_init_marker(vm_page_t marker, u_short queue) +{ + + bzero(marker, sizeof(*marker)); + marker->flags = PG_FICTITIOUS | PG_MARKER; + marker->oflags = VPO_BUSY; + marker->queue = queue; + marker->wire_count = 1; +} + /* * vm_pageout_fallback_object_lock: * @@ -237,22 +248,16 @@ u_short queue; vm_object_t object; - /* - * Initialize our marker - */ - bzero(&marker, sizeof(marker)); - marker.flags = PG_FICTITIOUS | PG_MARKER; - marker.oflags = VPO_BUSY; - marker.queue = m->queue; - marker.wire_count = 1; - queue = m->queue; + vm_pageout_init_marker(&marker, queue); object = m->object; TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, m, &marker, pageq); vm_page_unlock_queues(); + vm_page_unlock(m); VM_OBJECT_LOCK(object); + vm_page_lock(m); vm_page_lock_queues(); /* Page queue might have changed. */ @@ -266,6 +271,43 @@ } /* + * Lock the page while holding the page queue lock. Use marker page + * to detect page queue changes and maintain notion of next page on + * page queue. Return TRUE if no changes were detected, FALSE + * otherwise. The page is locked on return. The page queue lock might + * be dropped and reacquired. + * + * This function depends on normal struct vm_page being type stable. + */ +boolean_t +vm_pageout_page_lock(vm_page_t m, vm_page_t *next) +{ + struct vm_page marker; + boolean_t unchanged; + u_short queue; + + vm_page_lock_assert(m, MA_NOTOWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + + if (vm_page_trylock(m)) + return (TRUE); + + queue = m->queue; + vm_pageout_init_marker(&marker, queue); + + TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, m, &marker, pageq); + vm_page_unlock_queues(); + vm_page_lock(m); + vm_page_lock_queues(); + + /* Page queue might have changed. */ + *next = TAILQ_NEXT(&marker, pageq); + unchanged = (m->queue == queue && &marker == TAILQ_NEXT(m, pageq)); + TAILQ_REMOVE(&vm_page_queues[queue].pl, &marker, pageq); + return (unchanged); +} + +/* * vm_pageout_clean: * * Clean the page and remove it from the laundry. @@ -275,8 +317,7 @@ * late and we cannot do anything that will mess with the page. */ static int -vm_pageout_clean(m) - vm_page_t m; +vm_pageout_clean(vm_page_t m) { vm_object_t object; vm_page_t mc[2*vm_pageout_page_count], pb, ps; @@ -284,8 +325,9 @@ int ib, is, page_base; vm_pindex_t pindex = m->pindex; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); + object = m->object; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); /* * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP @@ -299,10 +341,10 @@ /* * Can't clean the page if it's busy or held. */ - if ((m->hold_count != 0) || - ((m->busy != 0) || (m->oflags & VPO_BUSY))) { - return 0; - } + KASSERT(m->busy == 0 && (m->oflags & VPO_BUSY) == 0, + ("vm_pageout_clean: page %p is busy", m)); + KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m)); + vm_page_unlock(m); mc[vm_pageout_page_count] = pb = ps = m; pageout_count = 1; @@ -329,7 +371,6 @@ * first and attempt to align our cluster, then do a * forward scan if room remains. */ - object = m->object; more: while (ib && pageout_count < vm_pageout_page_count) { vm_page_t p; @@ -344,13 +385,16 @@ ib = 0; break; } + vm_page_lock(p); vm_page_test_dirty(p); if (p->dirty == 0 || p->queue != PQ_INACTIVE || p->hold_count != 0) { /* may be undergoing I/O */ + vm_page_unlock(p); ib = 0; break; } + vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; ++ib; @@ -369,12 +413,15 @@ if ((p = vm_page_next(ps)) == NULL || (p->oflags & VPO_BUSY) != 0 || p->busy != 0) break; + vm_page_lock(p); vm_page_test_dirty(p); if (p->dirty == 0 || p->queue != PQ_INACTIVE || p->hold_count != 0) { /* may be undergoing I/O */ + vm_page_unlock(p); break; } + vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; ++is; @@ -414,8 +461,9 @@ int numpagedout = 0; int i, runlen; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); + /* * Initiate I/O. Bump the vm_page_t->busy counter and * mark the pages read-only. @@ -433,13 +481,11 @@ vm_page_io_start(mc[i]); pmap_remove_write(mc[i]); } - vm_page_unlock_queues(); vm_object_pip_add(object, count); vm_pager_put_pages(object, mc, count, flags, pageout_status); runlen = count - mreq; - vm_page_lock_queues(); for (i = 0; i < count; i++) { vm_page_t mt = mc[i]; @@ -466,7 +512,9 @@ * page so it doesn't clog the inactive list. (We * will try paging out it again later). */ + vm_page_lock(mt); vm_page_activate(mt); + vm_page_unlock(mt); break; case VM_PAGER_AGAIN: if (i >= mreq && i - mreq < runlen) @@ -483,8 +531,11 @@ if (pageout_status[i] != VM_PAGER_PEND) { vm_object_pip_wakeup(object); vm_page_io_finish(mt); - if (vm_page_count_severe()) + if (vm_page_count_severe()) { + vm_page_lock(mt); vm_page_try_to_cache(mt); + vm_page_unlock(mt); + } } } if (prunlen != NULL) @@ -496,92 +547,86 @@ /* * vm_pageout_object_deactivate_pages * - * deactivate enough pages to satisfy the inactive target - * requirements or if vm_page_proc_limit is set, then - * deactivate all of the pages in the object and its - * backing_objects. + * Deactivate enough pages to satisfy the inactive target + * requirements. * * The object and map must be locked. */ static void -vm_pageout_object_deactivate_pages(pmap, first_object, desired) - pmap_t pmap; - vm_object_t first_object; - long desired; +vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, + long desired) { vm_object_t backing_object, object; - vm_page_t p, next; - int actcount, rcount, remove_mode; + vm_page_t p; + int actcount, remove_mode; VM_OBJECT_LOCK_ASSERT(first_object, MA_OWNED); if (first_object->type == OBJT_DEVICE || - first_object->type == OBJT_SG || - first_object->type == OBJT_PHYS) + first_object->type == OBJT_SG) return; for (object = first_object;; object = backing_object) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; - if (object->paging_in_progress) + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + if (object->type == OBJT_PHYS || object->paging_in_progress) goto unlock_return; remove_mode = 0; if (object->shadow_count > 1) remove_mode = 1; /* - * scan the objects entire memory queue + * Scan the object's entire memory queue. */ - rcount = object->resident_page_count; - p = TAILQ_FIRST(&object->memq); - vm_page_lock_queues(); - while (p && (rcount-- > 0)) { - if (pmap_resident_count(pmap) <= desired) { - vm_page_unlock_queues(); + TAILQ_FOREACH(p, &object->memq, listq) { + if (pmap_resident_count(pmap) <= desired) goto unlock_return; - } - next = TAILQ_NEXT(p, listq); - cnt.v_pdpages++; - if (p->wire_count != 0 || - p->hold_count != 0 || - p->busy != 0 || - (p->oflags & VPO_BUSY) || - (p->flags & PG_UNMANAGED) || + if ((p->oflags & VPO_BUSY) != 0 || p->busy != 0) + continue; + PCPU_INC(cnt.v_pdpages); + vm_page_lock(p); + if (p->wire_count != 0 || p->hold_count != 0 || !pmap_page_exists_quick(pmap, p)) { - p = next; + vm_page_unlock(p); continue; } actcount = pmap_ts_referenced(p); - if (actcount) { - vm_page_flag_set(p, PG_REFERENCED); - } else if (p->flags & PG_REFERENCED) { - actcount = 1; + if ((p->flags & PG_REFERENCED) != 0) { + if (actcount == 0) + actcount = 1; + vm_page_lock_queues(); + vm_page_flag_clear(p, PG_REFERENCED); + vm_page_unlock_queues(); } - if ((p->queue != PQ_ACTIVE) && - (p->flags & PG_REFERENCED)) { + if (p->queue != PQ_ACTIVE && actcount != 0) { vm_page_activate(p); p->act_count += actcount; - vm_page_flag_clear(p, PG_REFERENCED); } else if (p->queue == PQ_ACTIVE) { - if ((p->flags & PG_REFERENCED) == 0) { - p->act_count -= min(p->act_count, ACT_DECLINE); - if (!remove_mode && (vm_pageout_algorithm || (p->act_count == 0))) { + if (actcount == 0) { + p->act_count -= min(p->act_count, + ACT_DECLINE); + if (!remove_mode && + (vm_pageout_algorithm || + p->act_count == 0)) { pmap_remove_all(p); vm_page_deactivate(p); } else { + vm_page_lock_queues(); vm_page_requeue(p); + vm_page_unlock_queues(); } } else { vm_page_activate(p); - vm_page_flag_clear(p, PG_REFERENCED); - if (p->act_count < (ACT_MAX - ACT_ADVANCE)) + if (p->act_count < ACT_MAX - + ACT_ADVANCE) p->act_count += ACT_ADVANCE; + vm_page_lock_queues(); vm_page_requeue(p); + vm_page_unlock_queues(); } - } else if (p->queue == PQ_INACTIVE) { + } else if (p->queue == PQ_INACTIVE) pmap_remove_all(p); - } - p = next; + vm_page_unlock(p); } - vm_page_unlock_queues(); if ((backing_object = object->backing_object) == NULL) goto unlock_return; VM_OBJECT_LOCK(backing_object); @@ -706,14 +751,7 @@ */ page_shortage = vm_paging_target() + addl_page_shortage_init; - /* - * Initialize our marker - */ - bzero(&marker, sizeof(marker)); - marker.flags = PG_FICTITIOUS | PG_MARKER; - marker.oflags = VPO_BUSY; - marker.queue = PQ_INACTIVE; - marker.wire_count = 1; + vm_pageout_init_marker(&marker, PQ_INACTIVE); /* * Start scanning the inactive queue for pages we can move to the @@ -746,12 +784,10 @@ cnt.v_pdpages++; - if (VM_PAGE_GETQUEUE(m) != PQ_INACTIVE) { + if (m->queue != PQ_INACTIVE) goto rescan0; - } next = TAILQ_NEXT(m, pageq); - object = m->object; /* * skip marker pages @@ -760,25 +796,39 @@ continue; /* + * Lock the page. + */ + if (!vm_pageout_page_lock(m, &next)) { + vm_page_unlock(m); + addl_page_shortage++; + continue; + } + + /* * A held page may be undergoing I/O, so skip it. */ if (m->hold_count) { + vm_page_unlock(m); vm_page_requeue(m); addl_page_shortage++; continue; } + /* * Don't mess with busy pages, keep in the front of the * queue, most likely are being paged out. */ + object = m->object; if (!VM_OBJECT_TRYLOCK(object) && (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) { + m->hold_count != 0)) { VM_OBJECT_UNLOCK(object); + vm_page_unlock(m); addl_page_shortage++; continue; } if (m->busy || (m->oflags & VPO_BUSY)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); addl_page_shortage++; continue; @@ -805,8 +855,9 @@ } else if (((m->flags & PG_REFERENCED) == 0) && (actcount = pmap_ts_referenced(m))) { vm_page_activate(m); + vm_page_unlock(m); + m->act_count += actcount + ACT_ADVANCE; VM_OBJECT_UNLOCK(object); - m->act_count += (actcount + ACT_ADVANCE); continue; } @@ -820,8 +871,9 @@ vm_page_flag_clear(m, PG_REFERENCED); actcount = pmap_ts_referenced(m); vm_page_activate(m); + vm_page_unlock(m); + m->act_count += actcount + ACT_ADVANCE + 1; VM_OBJECT_UNLOCK(object); - m->act_count += (actcount + ACT_ADVANCE + 1); continue; } @@ -907,6 +959,7 @@ * Those objects are in a "rundown" state. */ if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); vm_page_requeue(m); continue; @@ -946,6 +999,8 @@ * of time. */ if (object->type == OBJT_VNODE) { + vm_page_unlock_queues(); + vm_page_unlock(m); vp = object->handle; if (vp->v_type == VREG && vn_start_write(vp, &mp, V_NOWAIT) != 0) { @@ -953,11 +1008,11 @@ ++pageout_lock_miss; if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; + vm_page_lock_queues(); goto unlock_and_continue; } KASSERT(mp != NULL, ("vp %p with NULL v_mount", vp)); - vm_page_unlock_queues(); vm_object_reference_locked(object); VM_OBJECT_UNLOCK(object); vfslocked = VFS_LOCK_GIANT(vp->v_mount); @@ -972,6 +1027,7 @@ goto unlock_and_continue; } VM_OBJECT_LOCK(object); + vm_page_lock(m); vm_page_lock_queues(); /* * The page might have been moved to another @@ -979,9 +1035,10 @@ * above. The page might have been freed and * reused for another vnode. */ - if (VM_PAGE_GETQUEUE(m) != PQ_INACTIVE || + if (m->queue != PQ_INACTIVE || m->object != object || TAILQ_NEXT(m, pageq) != &marker) { + vm_page_unlock(m); if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; goto unlock_and_continue; @@ -994,6 +1051,7 @@ * statistics are more correct if we don't. */ if (m->busy || (m->oflags & VPO_BUSY)) { + vm_page_unlock(m); goto unlock_and_continue; } @@ -1002,6 +1060,7 @@ * be undergoing I/O, so skip it */ if (m->hold_count) { + vm_page_unlock(m); vm_page_requeue(m); if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; @@ -1019,11 +1078,14 @@ * the (future) cleaned page. Otherwise we could wind * up laundering or cleaning too many pages. */ + vm_page_unlock_queues(); if (vm_pageout_clean(m) != 0) { --page_shortage; --maxlaunder; } + vm_page_lock_queues(); unlock_and_continue: + vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_UNLOCK(object); if (mp != NULL) { vm_page_unlock_queues(); @@ -1037,8 +1099,10 @@ next = TAILQ_NEXT(&marker, pageq); TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq); + vm_page_lock_assert(m, MA_NOTOWNED); continue; } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); } @@ -1057,21 +1121,28 @@ */ pcount = cnt.v_active_count; m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { - KASSERT(VM_PAGE_INQUEUE2(m, PQ_ACTIVE), + KASSERT(m->queue == PQ_ACTIVE, ("vm_pageout_scan: page %p isn't active", m)); next = TAILQ_NEXT(m, pageq); - object = m->object; if ((m->flags & PG_MARKER) != 0) { m = next; continue; } + if (!vm_pageout_page_lock(m, &next)) { + vm_page_unlock(m); + m = next; + continue; + } + object = m->object; if (!VM_OBJECT_TRYLOCK(object) && !vm_pageout_fallback_object_lock(m, &next)) { VM_OBJECT_UNLOCK(object); + vm_page_unlock(m); m = next; continue; } @@ -1082,6 +1153,7 @@ if ((m->busy != 0) || (m->oflags & VPO_BUSY) || (m->hold_count != 0)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); vm_page_requeue(m); m = next; @@ -1128,7 +1200,8 @@ m->act_count == 0) { page_shortage--; if (object->ref_count == 0) { - pmap_remove_all(m); + KASSERT(!pmap_page_is_mapped(m), + ("vm_pageout_scan: page %p is mapped", m)); if (m->dirty == 0) vm_page_cache(m); else @@ -1140,6 +1213,7 @@ vm_page_requeue(m); } } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } @@ -1292,7 +1366,6 @@ static int fullintervalcount = 0; int page_shortage; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); page_shortage = (cnt.v_inactive_target + cnt.v_cache_max + cnt.v_free_min) - (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count); @@ -1300,6 +1373,7 @@ if (page_shortage <= 0) return; + vm_page_lock_queues(); pcount = cnt.v_active_count; fullintervalcount += vm_pageout_stats_interval; if (fullintervalcount < vm_pageout_full_stats_interval) { @@ -1315,19 +1389,25 @@ while ((m != NULL) && (pcount-- > 0)) { int actcount; - KASSERT(VM_PAGE_INQUEUE2(m, PQ_ACTIVE), + KASSERT(m->queue == PQ_ACTIVE, ("vm_pageout_page_stats: page %p isn't active", m)); next = TAILQ_NEXT(m, pageq); - object = m->object; - if ((m->flags & PG_MARKER) != 0) { m = next; continue; } + vm_page_lock_assert(m, MA_NOTOWNED); + if (!vm_pageout_page_lock(m, &next)) { + vm_page_unlock(m); + m = next; + continue; + } + object = m->object; if (!VM_OBJECT_TRYLOCK(object) && !vm_pageout_fallback_object_lock(m, &next)) { VM_OBJECT_UNLOCK(object); + vm_page_unlock(m); m = next; continue; } @@ -1338,6 +1418,7 @@ if ((m->busy != 0) || (m->oflags & VPO_BUSY) || (m->hold_count != 0)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); vm_page_requeue(m); m = next; @@ -1374,9 +1455,11 @@ vm_page_requeue(m); } } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } + vm_page_unlock_queues(); } /* @@ -1498,9 +1581,7 @@ if (error && !vm_pages_needed) { mtx_unlock(&vm_page_queue_free_mtx); pass = 0; - vm_page_lock_queues(); vm_pageout_page_stats(); - vm_page_unlock_queues(); continue; } } Index: vm/vm_param.h =================================================================== --- vm/vm_param.h (revision 218945) +++ vm/vm_param.h (working copy) @@ -126,6 +126,14 @@ #define KERN_NOT_RECEIVER 7 #define KERN_NO_ACCESS 8 +#ifndef PA_LOCK_COUNT +#ifdef SMP +#define PA_LOCK_COUNT 32 +#else +#define PA_LOCK_COUNT 1 +#endif /* !SMP */ +#endif /* !PA_LOCK_COUNT */ + #ifndef ASSEMBLER #ifdef _KERNEL #define num_pages(x) \ Index: vm/vm_pageout.h =================================================================== --- vm/vm_pageout.h (revision 218945) +++ vm/vm_pageout.h (working copy) @@ -104,6 +104,7 @@ boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); int vm_pageout_flush(vm_page_t *, int, int, int, int *); void vm_pageout_oom(int shortage); +boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *); void vm_contig_grow_cache(int, vm_paddr_t, vm_paddr_t); #endif #endif /* _VM_VM_PAGEOUT_H_ */ Index: vm/vm_map.c =================================================================== --- vm/vm_map.c (revision 218945) +++ vm/vm_map.c (working copy) @@ -1773,7 +1773,6 @@ vm_offset_t start; vm_page_t p, p_start; vm_pindex_t psize, tmpidx; - boolean_t are_queues_locked; if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL) return; @@ -1795,7 +1794,6 @@ psize = object->size - pindex; } - are_queues_locked = FALSE; start = 0; p_start = NULL; @@ -1823,25 +1821,14 @@ p_start = p; } } else if (p_start != NULL) { - if (!are_queues_locked) { - are_queues_locked = TRUE; - vm_page_lock_queues(); - } pmap_enter_object(map->pmap, start, addr + ptoa(tmpidx), p_start, prot); p_start = NULL; } } - if (p_start != NULL) { - if (!are_queues_locked) { - are_queues_locked = TRUE; - vm_page_lock_queues(); - } + if (p_start != NULL) pmap_enter_object(map->pmap, start, addr + ptoa(psize), p_start, prot); - } - if (are_queues_locked) - vm_page_unlock_queues(); unlock_return: VM_OBJECT_UNLOCK(object); } @@ -2435,7 +2422,7 @@ vm_map_busy(map); vm_map_unlock(map); rv = vm_fault_wire(map, saved_start, saved_end, - user_wire, fictitious); + fictitious); vm_map_lock(map); vm_map_unbusy(map); if (last_timestamp + 1 != map->timestamp) { @@ -3635,23 +3622,16 @@ /* * Check whether this task is allowed to have this page. - * Note the special case for MAP_ENTRY_COW - * pages with an override. This is to implement a forced - * COW for debuggers. */ - if (fault_type & VM_PROT_OVERRIDE_WRITE) - prot = entry->max_protection; - else - prot = entry->protection; + prot = entry->protection; fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); - if ((fault_type & prot) != fault_type) { + if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) { vm_map_unlock_read(map); return (KERN_PROTECTION_FAILURE); } if ((entry->eflags & MAP_ENTRY_USER_WIRED) && (entry->eflags & MAP_ENTRY_COW) && - (fault_type & VM_PROT_WRITE) && - (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) { + (fault_type & VM_PROT_WRITE)) { vm_map_unlock_read(map); return (KERN_PROTECTION_FAILURE); } @@ -3662,7 +3642,7 @@ */ *wired = (entry->wired_count != 0); if (*wired) - prot = fault_type = entry->protection; + fault_type = entry->protection; size = entry->end - entry->start; /* * If the entry was copy-on-write, we either ... @@ -3675,7 +3655,8 @@ * If we don't need to write the page, we just demote the * permissions allowed. */ - if (fault_type & VM_PROT_WRITE) { + if ((fault_type & VM_PROT_WRITE) != 0 || + (fault_typea & VM_PROT_COPY) != 0) { /* * Make a new object, and place it in the object * chain. Note that no new references have appeared @@ -3798,21 +3779,14 @@ /* * Check whether this task is allowed to have this page. - * Note the special case for MAP_ENTRY_COW - * pages with an override. This is to implement a forced - * COW for debuggers. */ - if (fault_type & VM_PROT_OVERRIDE_WRITE) - prot = entry->max_protection; - else - prot = entry->protection; + prot = entry->protection; fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; if ((fault_type & prot) != fault_type) return (KERN_PROTECTION_FAILURE); if ((entry->eflags & MAP_ENTRY_USER_WIRED) && (entry->eflags & MAP_ENTRY_COW) && - (fault_type & VM_PROT_WRITE) && - (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) + (fault_type & VM_PROT_WRITE)) return (KERN_PROTECTION_FAILURE); /* @@ -3821,7 +3795,7 @@ */ *wired = (entry->wired_count != 0); if (*wired) - prot = fault_type = entry->protection; + fault_type = entry->protection; if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { /* Index: vm/vm_map.h =================================================================== --- vm/vm_map.h (revision 218945) +++ vm/vm_map.h (working copy) @@ -325,8 +325,6 @@ */ #define VM_FAULT_NORMAL 0 /* Nothing special */ #define VM_FAULT_CHANGE_WIRING 1 /* Change the wiring as appropriate */ -#define VM_FAULT_USER_WIRE 2 /* Likewise, but for user purposes */ -#define VM_FAULT_WIRE_MASK (VM_FAULT_CHANGE_WIRING|VM_FAULT_USER_WIRE) #define VM_FAULT_DIRTY 8 /* Dirty the page */ /* Index: vm/sg_pager.c =================================================================== --- vm/sg_pager.c (revision 218945) +++ vm/sg_pager.c (working copy) @@ -198,10 +198,11 @@ TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq); /* Free the original pages and insert this fake page into the object. */ - vm_page_lock_queues(); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } vm_page_insert(page, object, offset); m[reqpage] = page; page->valid = VM_PAGE_BITS_ALL; Index: vm/swap_pager.c =================================================================== --- vm/swap_pager.c (revision 218945) +++ vm/swap_pager.c (working copy) @@ -378,8 +378,10 @@ swp_pager_free_nrpage(vm_page_t m) { + vm_page_lock(m); if (m->wire_count == 0) vm_page_free(m); + vm_page_unlock(m); } /* @@ -1133,12 +1135,10 @@ if (0 < i || j < count) { int k; - vm_page_lock_queues(); for (k = 0; k < i; ++k) swp_pager_free_nrpage(m[k]); for (k = j; k < count; ++k) swp_pager_free_nrpage(m[k]); - vm_page_unlock_queues(); } /* @@ -1493,7 +1493,7 @@ object = bp->b_pages[0]->object; VM_OBJECT_LOCK(object); } - vm_page_lock_queues(); + /* * cleanup pages. If an error occurs writing to swap, we are in * very serious trouble. If it happens to be a disk error, though, @@ -1547,7 +1547,9 @@ * then finish the I/O. */ vm_page_dirty(m); + vm_page_lock(m); vm_page_activate(m); + vm_page_unlock(m); vm_page_io_finish(m); } } else if (bp->b_iocmd == BIO_READ) { @@ -1582,11 +1584,12 @@ * left busy. */ if (i != bp->b_pager.pg_reqpage) { + vm_page_lock(m); vm_page_deactivate(m); + vm_page_unlock(m); vm_page_wakeup(m); - } else { + } else vm_page_flash(m); - } } else { /* * For write success, clear the dirty @@ -1598,11 +1601,13 @@ " protected", m)); vm_page_undirty(m); vm_page_io_finish(m); - if (vm_page_count_severe()) + if (vm_page_count_severe()) { + vm_page_lock(m); vm_page_try_to_cache(m); + vm_page_unlock(m); + } } } - vm_page_unlock_queues(); /* * adjust pip. NOTE: the original parent may still have its own @@ -1696,10 +1701,10 @@ m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL|VM_ALLOC_RETRY); if (m->valid == VM_PAGE_BITS_ALL) { vm_object_pip_subtract(object, 1); - vm_page_lock_queues(); + vm_page_dirty(m); + vm_page_lock(m); vm_page_activate(m); - vm_page_dirty(m); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); vm_pager_page_unswapped(m); return; @@ -1708,10 +1713,10 @@ if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_subtract(object, 1); - vm_page_lock_queues(); vm_page_dirty(m); - vm_page_dontneed(m); - vm_page_unlock_queues(); + vm_page_lock(m); + vm_page_deactivate(m); + vm_page_unlock(m); vm_page_wakeup(m); vm_pager_page_unswapped(m); } Index: vm/vm_mmap.c =================================================================== --- vm/vm_mmap.c (revision 218945) +++ vm/vm_mmap.c (working copy) @@ -778,8 +778,13 @@ int vecindex, lastvecindex; vm_map_entry_t current; vm_map_entry_t entry; + vm_object_t object; + vm_paddr_t locked_pa; + vm_page_t m; + vm_pindex_t pindex; int mincoreinfo; unsigned int timestamp; + boolean_t locked; /* * Make sure that the addresses presented are valid for user @@ -853,38 +858,81 @@ * it can provide info as to whether we are the * one referencing or modifying the page. */ - mincoreinfo = pmap_mincore(pmap, addr); - if (!mincoreinfo) { - vm_pindex_t pindex; - vm_ooffset_t offset; - vm_page_t m; + object = NULL; + locked_pa = 0; + retry: + m = NULL; + mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); + if (locked_pa != 0) { /* - * calculate the page index into the object + * The page is mapped by this process but not + * both accessed and modified. It is also + * managed. Acquire the object lock so that + * other mappings might be examined. */ - offset = current->offset + (addr - current->start); - pindex = OFF_TO_IDX(offset); - VM_OBJECT_LOCK(current->object.vm_object); - m = vm_page_lookup(current->object.vm_object, - pindex); + m = PHYS_TO_VM_PAGE(locked_pa); + if (m->object != object) { + if (object != NULL) + VM_OBJECT_UNLOCK(object); + object = m->object; + locked = VM_OBJECT_TRYLOCK(object); + vm_page_unlock(m); + if (!locked) { + VM_OBJECT_LOCK(object); + vm_page_lock(m); + goto retry; + } + } else + vm_page_unlock(m); + KASSERT(m->valid == VM_PAGE_BITS_ALL, + ("mincore: page %p is mapped but invalid", + m)); + } else if (mincoreinfo == 0) { /* - * if the page is resident, then gather information about - * it. + * The page is not mapped by this process. If + * the object implements managed pages, then + * determine if the page is resident so that + * the mappings might be examined. */ - if (m != NULL && m->valid != 0) { - mincoreinfo = MINCORE_INCORE; - vm_page_lock_queues(); - if (m->dirty || - pmap_is_modified(m)) - mincoreinfo |= MINCORE_MODIFIED_OTHER; - if ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m)) { - vm_page_flag_set(m, PG_REFERENCED); - mincoreinfo |= MINCORE_REFERENCED_OTHER; - } - vm_page_unlock_queues(); + if (current->object.vm_object != object) { + if (object != NULL) + VM_OBJECT_UNLOCK(object); + object = current->object.vm_object; + VM_OBJECT_LOCK(object); } - VM_OBJECT_UNLOCK(current->object.vm_object); + if (object->type == OBJT_DEFAULT || + object->type == OBJT_SWAP || + object->type == OBJT_VNODE) { + pindex = OFF_TO_IDX(current->offset + + (addr - current->start)); + m = vm_page_lookup(object, pindex); + if (m != NULL && m->valid == 0) + m = NULL; + if (m != NULL) + mincoreinfo = MINCORE_INCORE; + } } + if (m != NULL) { + /* Examine other mappings to the page. */ + if (m->dirty == 0 && pmap_is_modified(m)) + vm_page_dirty(m); + if (m->dirty != 0) + mincoreinfo |= MINCORE_MODIFIED_OTHER; + /* + * The first test for PG_REFERENCED is an + * optimization. The second test is + * required because a concurrent pmap + * operation could clear the last reference + * and set PG_REFERENCED before the call to + * pmap_is_referenced(). + */ + if ((m->flags & PG_REFERENCED) != 0 || + pmap_is_referenced(m) || + (m->flags & PG_REFERENCED) != 0) + mincoreinfo |= MINCORE_REFERENCED_OTHER; + } + if (object != NULL) + VM_OBJECT_UNLOCK(object); /* * subyte may page fault. In case it needs to modify Index: vm/vm_glue.c =================================================================== --- vm/vm_glue.c (revision 218945) +++ vm/vm_glue.c (working copy) @@ -261,16 +261,16 @@ if (m == NULL) goto out; if (rv != VM_PAGER_OK) { - vm_page_lock_queues(); + vm_page_lock(m); vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); m = NULL; goto out; } } - vm_page_lock_queues(); + vm_page_lock(m); vm_page_hold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); out: VM_OBJECT_UNLOCK(object); @@ -304,9 +304,9 @@ m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } void @@ -438,10 +438,10 @@ m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_dispose: kstack already missing?"); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(ksobj); vm_object_deallocate(ksobj); @@ -526,10 +526,10 @@ m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_swapout: kstack already missing?"); - vm_page_lock_queues(); vm_page_dirty(m); + vm_page_lock(m); vm_page_unwire(m, 0); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(ksobj); } Index: vm/pmap.h =================================================================== --- vm/pmap.h (revision 218945) +++ vm/pmap.h (working copy) @@ -119,8 +119,11 @@ void pmap_init(void); boolean_t pmap_is_modified(vm_page_t m); boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t va); +boolean_t pmap_is_referenced(vm_page_t m); boolean_t pmap_ts_referenced(vm_page_t m); vm_offset_t pmap_map(vm_offset_t *, vm_paddr_t, vm_paddr_t, int); +int pmap_mincore(pmap_t pmap, vm_offset_t addr, + vm_paddr_t *locked_pa); void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size); boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m); @@ -140,7 +143,6 @@ void pmap_zero_page(vm_page_t); void pmap_zero_page_area(vm_page_t, int off, int size); void pmap_zero_page_idle(vm_page_t); -int pmap_mincore(pmap_t pmap, vm_offset_t addr); void pmap_activate(struct thread *td); #define pmap_resident_count(pm) ((pm)->pm_stats.resident_count) Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (revision 218945) +++ vm/vm_object.c (working copy) @@ -96,10 +96,6 @@ #include #include -static int msync_flush_flags = 0; -SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags, CTLFLAG_RW, &msync_flush_flags, 0, - "Does nothing; kept for backward compatibility"); - static int old_msync; SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0, "Use old (insecure) msync behavior"); @@ -716,19 +712,18 @@ * removes them from paging queues. Don't free wired pages, just * remove them from the object. */ - vm_page_lock_queues(); while ((p = TAILQ_FIRST(&object->memq)) != NULL) { KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, ("vm_object_terminate: freeing busy page %p " "p->busy = %d, p->oflags %x\n", p, p->busy, p->oflags)); + vm_page_lock(p); if (p->wire_count == 0) { vm_page_free(p); - cnt.v_pfree++; - } else { + PCPU_INC(cnt.v_pfree); + } else vm_page_remove(p); - } + vm_page_unlock(p); } - vm_page_unlock_queues(); #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) @@ -787,6 +782,7 @@ vm_pindex_t pi, tend; int clearobjflags, curgeneration, n, pagerflags; + mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_VNODE, ("Not a vnode object")); if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 || @@ -801,8 +797,6 @@ vm_object_set_flag(object, OBJ_CLEANING); - vm_page_lock_queues(); - /* * Make the page read-only so we can then clear the object flags. * @@ -823,7 +817,6 @@ if (p->valid == 0) continue; if (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) { - vm_page_lock_queues(); if (object->generation != curgeneration) goto rescan; np = vm_page_find_least(object, pi); @@ -838,7 +831,6 @@ goto rescan; np = vm_page_find_least(object, pi + n); } - vm_page_unlock_queues(); #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0); #endif @@ -855,7 +847,9 @@ vm_page_t ma[vm_pageout_page_count], p_first, tp; int count, i, mreq, runlen; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); + vm_page_lock_assert(p, MA_NOTOWNED); + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); count = 1; mreq = 0; @@ -1043,20 +1037,25 @@ /* * If the page is not in a normal state, skip it. */ - vm_page_lock_queues(); + vm_page_lock(m); if (m->hold_count != 0 || m->wire_count != 0) { - vm_page_unlock_queues(); + vm_page_unlock(m); goto unlock_tobject; } + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("vm_object_madvise: page %p is not managed", m)); if ((m->oflags & VPO_BUSY) || m->busy) { - if (advise == MADV_WILLNEED) + if (advise == MADV_WILLNEED) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ + vm_page_lock_queues(); vm_page_flag_set(m, PG_REFERENCED); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + } + vm_page_unlock(m); if (object != tobject) VM_OBJECT_UNLOCK(object); m->oflags |= VPO_WANTED; @@ -1090,7 +1089,7 @@ m->act_count = 0; vm_page_dontneed(m); } - vm_page_unlock_queues(); + vm_page_unlock(m); if (advise == MADV_FREE && tobject->type == OBJT_SWAP) swap_pager_freespace(tobject, tpindex, 1); unlock_tobject: @@ -1245,7 +1244,6 @@ } retry: m = vm_page_find_least(orig_object, offidxstart); - vm_page_lock_queues(); for (; m != NULL && (idx = m->pindex - offidxstart) < size; m = m_next) { m_next = TAILQ_NEXT(m, listq); @@ -1258,18 +1256,18 @@ * not be changed by this operation. */ if ((m->oflags & VPO_BUSY) || m->busy) { - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(new_object); m->oflags |= VPO_WANTED; msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0); VM_OBJECT_LOCK(new_object); goto retry; } + vm_page_lock(m); vm_page_rename(m, new_object, idx); + vm_page_unlock(m); /* page automatically made dirty by rename and cache handled */ vm_page_busy(m); } - vm_page_unlock_queues(); if (orig_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case the orig_object's @@ -1437,14 +1435,14 @@ * Page is out of the parent object's range, we * can simply destroy it. */ - vm_page_lock_queues(); + vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); - vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -1461,14 +1459,14 @@ * * Leave the parent's page alone */ - vm_page_lock_queues(); + vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); - vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -1488,9 +1486,9 @@ * If the page was mapped to a process, it can remain * mapped through the rename. */ - vm_page_lock_queues(); + vm_page_lock(p); vm_page_rename(p, object, new_pindex); - vm_page_unlock_queues(); + vm_page_unlock(p); /* page automatically made dirty by rename */ } p = next; @@ -1746,7 +1744,7 @@ vm_object_pip_add(object, 1); again: p = vm_page_find_least(object, start); - vm_page_lock_queues(); + /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex @@ -1765,6 +1763,7 @@ * cannot be freed. They can, however, be invalidated * if "clean_only" is FALSE. */ + vm_page_lock(p); if ((wirings = p->wire_count) != 0 && (wirings = pmap_page_wired_mappings(p)) != p->wire_count) { /* Fictitious pages do not have managed mappings. */ @@ -1776,6 +1775,7 @@ p->valid = 0; vm_page_undirty(p); } + vm_page_unlock(p); continue; } if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) @@ -1784,16 +1784,18 @@ ("vm_object_page_remove: page %p is fictitious", p)); if (clean_only && p->valid) { pmap_remove_write(p); - if (p->dirty) + if (p->dirty) { + vm_page_unlock(p); continue; + } } pmap_remove_all(p); /* Account for removal of managed, wired mappings. */ if (wirings != 0) p->wire_count -= wirings; vm_page_free(p); + vm_page_unlock(p); } - vm_page_unlock_queues(); vm_object_pip_wakeup(object); skipmemq: if (__predict_false(object->cache != NULL)) @@ -1828,9 +1830,9 @@ if (m == NULL) break; if (rv != VM_PAGER_OK) { - vm_page_lock_queues(); + vm_page_lock(m); vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); break; } } Index: vm/vm_extern.h =================================================================== --- vm/vm_extern.h (revision 218945) +++ vm/vm_extern.h (working copy) @@ -62,7 +62,7 @@ void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t, vm_ooffset_t *); void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); -int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t); +int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int); void vm_waitproc(struct proc *); int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t); Index: vm/vm_fault.c =================================================================== --- vm/vm_fault.c (revision 218945) +++ vm/vm_fault.c (working copy) @@ -138,9 +138,9 @@ { vm_page_wakeup(fs->m); - vm_page_lock_queues(); + vm_page_lock(fs->m); vm_page_deactivate(fs->m); - vm_page_unlock_queues(); + vm_page_unlock(fs->m); fs->m = NULL; } @@ -162,9 +162,9 @@ VM_OBJECT_UNLOCK(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_LOCK(fs->first_object); - vm_page_lock_queues(); + vm_page_lock(fs->first_m); vm_page_free(fs->first_m); - vm_page_unlock_queues(); + vm_page_unlock(fs->first_m); vm_object_pip_wakeup(fs->first_object); VM_OBJECT_UNLOCK(fs->first_object); fs->first_m = NULL; @@ -186,7 +186,7 @@ * default objects are zero-fill, there is no real pager. */ #define TRYPAGER (fs.object->type != OBJT_DEFAULT && \ - (((fault_flags & VM_FAULT_WIRE_MASK) == 0) || wired)) + ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 || wired)) /* * vm_fault: @@ -212,7 +212,7 @@ { vm_prot_t prot; int is_first_object_locked, result; - boolean_t are_queues_locked, growstack, wired; + boolean_t growstack, wired; int map_generation; vm_object_t next_object; vm_page_t marray[VM_FAULT_READ], mt, mt_prev; @@ -239,42 +239,15 @@ result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); if (result != KERN_SUCCESS) { - if (result != KERN_PROTECTION_FAILURE || - (fault_flags & VM_FAULT_WIRE_MASK) != VM_FAULT_USER_WIRE) { - if (growstack && result == KERN_INVALID_ADDRESS && - map != kernel_map && curproc != NULL) { - result = vm_map_growstack(curproc, vaddr); - if (result != KERN_SUCCESS) - return (KERN_FAILURE); - growstack = FALSE; - goto RetryFault; - } - return (result); + if (growstack && result == KERN_INVALID_ADDRESS && + map != kernel_map) { + result = vm_map_growstack(curproc, vaddr); + if (result != KERN_SUCCESS) + return (KERN_FAILURE); + growstack = FALSE; + goto RetryFault; } - - /* - * If we are user-wiring a r/w segment, and it is COW, then - * we need to do the COW operation. Note that we don't COW - * currently RO sections now, because it is NOT desirable - * to COW .text. We simply keep .text from ever being COW'ed - * and take the heat that one cannot debug wired .text sections. - */ - result = vm_map_lookup(&fs.map, vaddr, - VM_PROT_READ|VM_PROT_WRITE|VM_PROT_OVERRIDE_WRITE, - &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); - if (result != KERN_SUCCESS) - return (result); - - /* - * If we don't COW now, on a user wire, the user will never - * be able to write to the mapping. If we don't make this - * restriction, the bookkeeping would be nearly impossible. - * - * XXX The following assignment modifies the map without - * holding a write lock on it. - */ - if ((fs.entry->protection & VM_PROT_WRITE) == 0) - fs.entry->max_protection &= ~VM_PROT_WRITE; + return (result); } map_generation = fs.map->timestamp; @@ -301,7 +274,7 @@ fs.lookup_still_valid = TRUE; if (wired) - fault_type = prot; + fault_type = prot | (fault_type & VM_PROT_COPY); fs.first_m = NULL; @@ -333,12 +306,11 @@ * removes the page from the backing object, * which is not what we want. */ - vm_page_lock_queues(); + vm_page_lock(fs.m); if ((fs.m->cow) && (fault_type & VM_PROT_WRITE) && (fs.object == fs.first_object)) { vm_page_cowfault(fs.m); - vm_page_unlock_queues(); unlock_and_deallocate(&fs); goto RetryFault; } @@ -360,7 +332,15 @@ * to pmap it. */ if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(fs.m, PG_REFERENCED); vm_page_unlock_queues(); + vm_page_unlock(fs.m); if (fs.object != fs.first_object) { if (!VM_OBJECT_TRYLOCK( fs.first_object)) { @@ -368,9 +348,9 @@ VM_OBJECT_LOCK(fs.first_object); VM_OBJECT_LOCK(fs.object); } - vm_page_lock_queues(); + vm_page_lock(fs.first_m); vm_page_free(fs.first_m); - vm_page_unlock_queues(); + vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_UNLOCK(fs.first_object); fs.first_m = NULL; @@ -388,7 +368,7 @@ goto RetryFault; } vm_pageq_remove(fs.m); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the @@ -499,7 +479,6 @@ ("vm_fault: mt %p not busy", mt)); mt_prev = vm_page_prev(mt); - are_queues_locked = FALSE; /* * note: partially valid pages cannot be * included in the lookahead - NFS piecemeal @@ -512,22 +491,19 @@ if (mt->busy || (mt->oflags & VPO_BUSY)) continue; - if (!are_queues_locked) { - are_queues_locked = TRUE; - vm_page_lock_queues(); - } + vm_page_lock(mt); if (mt->hold_count || - mt->wire_count) + mt->wire_count) { + vm_page_unlock(mt); continue; + } pmap_remove_all(mt); - if (mt->dirty) { + if (mt->dirty != 0) vm_page_deactivate(mt); - } else { + else vm_page_cache(mt); - } + vm_page_unlock(mt); } - if (are_queues_locked) - vm_page_unlock_queues(); ahead += behind; behind = 0; } @@ -656,17 +632,17 @@ */ if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { - vm_page_lock_queues(); + vm_page_lock(fs.m); vm_page_free(fs.m); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } if (fs.object != fs.first_object) { - vm_page_lock_queues(); + vm_page_lock(fs.m); vm_page_free(fs.m); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); fs.m = NULL; /* * XXX - we cannot just fall out at this @@ -744,7 +720,7 @@ /* * We only really need to copy if we want to write it. */ - if (fault_type & VM_PROT_WRITE) { + if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { /* * This allows pages to be virtually copied from a * backing_object into the first_object, where the @@ -779,18 +755,20 @@ * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { - vm_page_lock_queues(); /* * get rid of the unnecessary page */ + vm_page_lock(fs.first_m); vm_page_free(fs.first_m); + vm_page_unlock(fs.first_m); /* * grab the page and put it into the * process'es object. The page is * automatically made dirty. */ + vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; @@ -801,8 +779,16 @@ */ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; - } - if (fs.m) { + if (wired && (fault_flags & + VM_FAULT_CHANGE_WIRING) == 0) { + vm_page_lock(fs.first_m); + vm_page_wire(fs.first_m); + vm_page_unlock(fs.first_m); + + vm_page_lock(fs.m); + vm_page_unwire(fs.m, FALSE); + vm_page_unlock(fs.m); + } /* * We no longer need the old page or object. */ @@ -945,26 +931,23 @@ * won't find it (yet). */ pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired); - if (((fault_flags & VM_FAULT_WIRE_MASK) == 0) && (wired == 0)) { + if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0) vm_fault_prefault(fs.map->pmap, vaddr, fs.entry); - } VM_OBJECT_LOCK(fs.object); - vm_page_lock_queues(); - vm_page_flag_set(fs.m, PG_REFERENCED); + vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ - if (fault_flags & VM_FAULT_WIRE_MASK) { + if (fault_flags & VM_FAULT_CHANGE_WIRING) { if (wired) vm_page_wire(fs.m); else vm_page_unwire(fs.m, 1); - } else { + } else vm_page_activate(fs.m); - } - vm_page_unlock_queues(); + vm_page_unlock(fs.m); vm_page_wakeup(fs.m); /* @@ -1040,11 +1023,8 @@ break; } if (m->valid == VM_PAGE_BITS_ALL && - (m->flags & PG_FICTITIOUS) == 0) { - vm_page_lock_queues(); + (m->flags & PG_FICTITIOUS) == 0) pmap_enter_quick(pmap, addr, m, entry->protection); - vm_page_unlock_queues(); - } VM_OBJECT_UNLOCK(lobject); } } @@ -1074,7 +1054,7 @@ */ int vm_fault_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, - boolean_t user_wire, boolean_t fictitious) + boolean_t fictitious) { vm_offset_t va; int rv; @@ -1085,9 +1065,7 @@ * read-only sections. */ for (va = start; va < end; va += PAGE_SIZE) { - rv = vm_fault(map, va, - user_wire ? VM_PROT_READ : VM_PROT_READ | VM_PROT_WRITE, - user_wire ? VM_FAULT_USER_WIRE : VM_FAULT_CHANGE_WIRING); + rv = vm_fault(map, va, VM_PROT_NONE, VM_FAULT_CHANGE_WIRING); if (rv) { if (va != start) vm_fault_unwire(map, start, va, fictitious); @@ -1108,6 +1086,7 @@ { vm_paddr_t pa; vm_offset_t va; + vm_page_t m; pmap_t pmap; pmap = vm_map_pmap(map); @@ -1121,9 +1100,10 @@ if (pa != 0) { pmap_change_wiring(pmap, va, FALSE); if (!fictitious) { - vm_page_lock_queues(); - vm_page_unwire(PHYS_TO_VM_PAGE(pa), 1); - vm_page_unlock_queues(); + m = PHYS_TO_VM_PAGE(pa); + vm_page_lock(m); + vm_page_unwire(m, TRUE); + vm_page_unlock(m); } } } @@ -1193,7 +1173,7 @@ dst_object->uip = dst_entry->uip; dst_entry->uip = NULL; } - access = prot = dst_entry->max_protection; + access = prot = dst_entry->protection; /* * If not an upgrade, then enter the mappings in the pmap as * read and/or execute accesses. Otherwise, enter them as @@ -1266,13 +1246,20 @@ * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_LOCK(dst_object); - vm_page_lock_queues(); + if (upgrade) { + vm_page_lock(src_m); vm_page_unwire(src_m, 0); + vm_page_unlock(src_m); + + vm_page_lock(dst_m); vm_page_wire(dst_m); - } else + vm_page_unlock(dst_m); + } else { + vm_page_lock(dst_m); vm_page_activate(dst_m); - vm_page_unlock_queues(); + vm_page_unlock(dst_m); + } vm_page_wakeup(dst_m); } VM_OBJECT_UNLOCK(dst_object); Index: vm/device_pager.c =================================================================== --- vm/device_pager.c (revision 218945) +++ vm/device_pager.c (working copy) @@ -263,12 +263,14 @@ VM_OBJECT_LOCK(object); dev_pager_updatefake(page, paddr, memattr); if (count > 1) { - vm_page_lock_queues(); + for (i = 0; i < count; i++) { - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); + vm_page_unlock(m[i]); + } } - vm_page_unlock_queues(); } } else { /* @@ -278,10 +280,11 @@ page = dev_pager_getfake(paddr, memattr); VM_OBJECT_LOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, page, pageq); - vm_page_lock_queues(); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } vm_page_insert(page, object, offset); m[reqpage] = page; } Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (revision 218945) +++ vm/vm_page.c (working copy) @@ -115,6 +115,7 @@ #include #include +#include #include #include #include @@ -135,9 +136,11 @@ */ struct vpgqueues vm_page_queues[PQ_COUNT]; -struct mtx vm_page_queue_mtx; -struct mtx vm_page_queue_free_mtx; +struct vpglocks vm_page_queue_lock; +struct vpglocks vm_page_queue_free_lock; +struct vpglocks pa_lock[PA_LOCK_COUNT]; + vm_page_t vm_page_array = 0; int vm_page_array_size = 0; long first_page = 0; @@ -148,6 +151,12 @@ SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RD, &boot_pages, 0, "number of pages allocated for bootstrapping the VM system"); +static int pa_tryrelock_restart; +SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, + &pa_tryrelock_restart, 0, "Number of tryrelock restarts"); + +static void vm_page_clear_dirty_mask(vm_page_t m, int pagebits); +static void vm_page_queue_remove(int queue, vm_page_t m); static void vm_page_enqueue(int queue, vm_page_t m); /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ @@ -158,6 +167,34 @@ #endif /* + * Try to acquire a physical address lock while a pmap is locked. If we + * fail to trylock we unlock and lock the pmap directly and cache the + * locked pa in *locked. The caller should then restart their loop in case + * the virtual to physical mapping has changed. + */ +int +vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) +{ + vm_paddr_t lockpa; + + lockpa = *locked; + *locked = pa; + if (lockpa) { + PA_LOCK_ASSERT(lockpa, MA_OWNED); + if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) + return (0); + PA_UNLOCK(lockpa); + } + if (PA_TRYLOCK(pa)) + return (0); + PMAP_UNLOCK(pmap); + atomic_add_int(&pa_tryrelock_restart, 1); + PA_LOCK(pa); + PMAP_LOCK(pmap); + return (EAGAIN); +} + +/* * vm_set_page_size: * * Sets the page size, perhaps based upon the memory @@ -271,6 +308,10 @@ mtx_init(&vm_page_queue_free_mtx, "vm page queue free mutex", NULL, MTX_DEF); + /* Setup page locks. */ + for (i = 0; i < PA_LOCK_COUNT; i++) + mtx_init(&pa_lock[i].data, "page lock", NULL, MTX_DEF); + /* * Initialize the queue headers for the hold queue, the active queue, * and the inactive queue. @@ -408,6 +449,13 @@ { mtx_assert(&vm_page_queue_mtx, MA_OWNED); + /* + * The PG_WRITEABLE flag can only be set if the page is managed and + * VPO_BUSY. Currently, this flag is only set by pmap_enter(). + */ + KASSERT((bits & PG_WRITEABLE) == 0 || + ((m->flags & (PG_UNMANAGED | PG_FICTITIOUS)) == 0 && + (m->oflags & VPO_BUSY) != 0), ("PG_WRITEABLE and !VPO_BUSY")); m->flags |= bits; } @@ -416,6 +464,12 @@ { mtx_assert(&vm_page_queue_mtx, MA_OWNED); + /* + * The PG_REFERENCED flag can only be cleared if the object + * containing the page is locked. + */ + KASSERT((bits & PG_REFERENCED) == 0 || VM_OBJECT_LOCKED(m->object), + ("PG_REFERENCED and !VM_OBJECT_LOCKED")); m->flags &= ~bits; } @@ -490,7 +544,7 @@ vm_page_hold(vm_page_t mem) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(mem, MA_OWNED); mem->hold_count++; } @@ -498,10 +552,10 @@ vm_page_unhold(vm_page_t mem) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(mem, MA_OWNED); --mem->hold_count; KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); - if (mem->hold_count == 0 && VM_PAGE_INQUEUE2(mem, PQ_HOLD)) + if (mem->hold_count == 0 && mem->queue == PQ_HOLD) vm_page_free_toq(mem); } @@ -534,7 +588,7 @@ /* * vm_page_sleep: * - * Sleep and release the page queues lock. + * Sleep and release the page and page queues locks. * * The object containing the given page must be locked. */ @@ -543,10 +597,10 @@ { VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); - if (!mtx_owned(&vm_page_queue_mtx)) - vm_page_lock_queues(); - vm_page_flag_set(m, PG_REFERENCED); - vm_page_unlock_queues(); + if (mtx_owned(&vm_page_queue_mtx)) + vm_page_unlock_queues(); + if (mtx_owned(vm_page_lockptr(m))) + vm_page_unlock(m); /* * It's possible that while we sleep, the page will get @@ -724,6 +778,8 @@ vm_object_t object; vm_page_t root; + if ((m->flags & PG_UNMANAGED) == 0) + vm_page_lock_assert(m, MA_OWNED); if ((object = m->object) == NULL) return; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); @@ -731,7 +787,6 @@ m->oflags &= ~VPO_BUSY; vm_page_flash(m); } - mtx_assert(&vm_page_queue_mtx, MA_OWNED); /* * Now remove from the object's list of backed pages. @@ -1209,7 +1264,8 @@ } /* - * Initialize structure. Only the PG_ZERO flag is inherited. + * Only the PG_ZERO flag is inherited. The PG_CACHED or PG_FREE flag + * must be cleared before the free page queues lock is released. */ flags = 0; if (m->flags & PG_ZERO) { @@ -1220,16 +1276,20 @@ if (object == NULL || object->type == OBJT_PHYS) flags |= PG_UNMANAGED; m->flags = flags; + mtx_unlock(&vm_page_queue_free_mtx); if (req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ)) m->oflags = 0; else m->oflags = VPO_BUSY; if (req & VM_ALLOC_WIRED) { + /* + * The page lock is not required for wiring a page until that + * page is inserted into the object. + */ atomic_add_int(&cnt.v_wire_count, 1); m->wire_count = 1; } m->act_count = 0; - mtx_unlock(&vm_page_queue_free_mtx); if (object != NULL) { /* Ignore device objects; the pager sets "memattr" for them. */ @@ -1399,43 +1459,63 @@ /* * vm_page_requeue: * - * If the given page is contained within a page queue, move it to the tail - * of that queue. + * Move the given page to the tail of its present page queue. * * The page queues must be locked. */ void vm_page_requeue(vm_page_t m) { - int queue = VM_PAGE_GETQUEUE(m); struct vpgqueues *vpq; + int queue; - if (queue != PQ_NONE) { - vpq = &vm_page_queues[queue]; - TAILQ_REMOVE(&vpq->pl, m, pageq); - TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); - } + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + queue = m->queue; + KASSERT(queue != PQ_NONE, + ("vm_page_requeue: page %p is not queued", m)); + vpq = &vm_page_queues[queue]; + TAILQ_REMOVE(&vpq->pl, m, pageq); + TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); } /* + * vm_page_queue_remove: + * + * Remove the given page from the specified queue. + * + * The page and page queues must be locked. + */ +static __inline void +vm_page_queue_remove(int queue, vm_page_t m) +{ + struct vpgqueues *pq; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); + pq = &vm_page_queues[queue]; + TAILQ_REMOVE(&pq->pl, m, pageq); + (*pq->cnt)--; +} + +/* * vm_pageq_remove: * * Remove a page from its queue. * - * The queue containing the given page must be locked. + * The given page must be locked. * This routine may not block. */ void vm_pageq_remove(vm_page_t m) { - int queue = VM_PAGE_GETQUEUE(m); - struct vpgqueues *pq; + int queue; - if (queue != PQ_NONE) { - VM_PAGE_SETQUEUE2(m, PQ_NONE); - pq = &vm_page_queues[queue]; - TAILQ_REMOVE(&pq->pl, m, pageq); - (*pq->cnt)--; + vm_page_lock_assert(m, MA_OWNED); + if ((queue = m->queue) != PQ_NONE) { + vm_page_lock_queues(); + m->queue = PQ_NONE; + vm_page_queue_remove(queue, m); + vm_page_unlock_queues(); } } @@ -1452,7 +1532,7 @@ struct vpgqueues *vpq; vpq = &vm_page_queues[queue]; - VM_PAGE_SETQUEUE2(m, queue); + m->queue = queue; TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); ++*vpq->cnt; } @@ -1464,21 +1544,28 @@ * Ensure that act_count is at least ACT_INIT but do not otherwise * mess with it. * - * The page queues must be locked. + * The page must be locked. * This routine may not block. */ void vm_page_activate(vm_page_t m) { + int queue; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (VM_PAGE_GETKNOWNQUEUE2(m) != PQ_ACTIVE) { - vm_pageq_remove(m); + vm_page_lock_assert(m, MA_OWNED); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((queue = m->queue) != PQ_ACTIVE) { if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; + vm_page_lock_queues(); + if (queue != PQ_NONE) + vm_page_queue_remove(queue, m); vm_page_enqueue(PQ_ACTIVE, m); - } + vm_page_unlock_queues(); + } else + KASSERT(queue == PQ_NONE, + ("vm_page_activate: wired page %p is queued", m)); } else { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; @@ -1534,10 +1621,11 @@ vm_page_free_toq(vm_page_t m) { - if (VM_PAGE_GETQUEUE(m) != PQ_NONE) - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - KASSERT(!pmap_page_is_mapped(m), - ("vm_page_free_toq: freeing mapped page %p", m)); + if ((m->flags & PG_UNMANAGED) == 0) { + vm_page_lock_assert(m, MA_OWNED); + KASSERT(!pmap_page_is_mapped(m), + ("vm_page_free_toq: freeing mapped page %p", m)); + } PCPU_INC(cnt.v_tfree); if (m->busy || VM_PAGE_IS_FREE(m)) { @@ -1557,7 +1645,8 @@ * callback routine until after we've put the page on the * appropriate free queue. */ - vm_pageq_remove(m); + if ((m->flags & PG_UNMANAGED) == 0) + vm_pageq_remove(m); vm_page_remove(m); /* @@ -1580,7 +1669,9 @@ } if (m->hold_count != 0) { m->flags &= ~PG_ZERO; + vm_page_lock_queues(); vm_page_enqueue(PQ_HOLD, m); + vm_page_unlock_queues(); } else { /* * Restore the default memory attribute to the page. @@ -1617,7 +1708,9 @@ * another map, removing it from paging queues * as necessary. * - * The page queues must be locked. + * If the page is fictitious, then its wire count must remain one. + * + * The page must be locked. * This routine may not block. */ void @@ -1629,9 +1722,13 @@ * and only unqueue the page if it is on some queue (if it is unmanaged * it is already off the queues). */ - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (m->flags & PG_FICTITIOUS) + vm_page_lock_assert(m, MA_OWNED); + if ((m->flags & PG_FICTITIOUS) != 0) { + KASSERT(m->wire_count == 1, + ("vm_page_wire: fictitious page %p's wire count isn't one", + m)); return; + } if (m->wire_count == 0) { if ((m->flags & PG_UNMANAGED) == 0) vm_pageq_remove(m); @@ -1642,63 +1739,66 @@ } /* - * vm_page_unwire: + * vm_page_unwire: * - * Release one wiring of this page, potentially - * enabling it to be paged again. + * Release one wiring of the specified page, potentially enabling it to be + * paged again. If paging is enabled, then the value of the parameter + * "activate" determines to which queue the page is added. If "activate" is + * non-zero, then the page is added to the active queue. Otherwise, it is + * added to the inactive queue. * - * Many pages placed on the inactive queue should actually go - * into the cache, but it is difficult to figure out which. What - * we do instead, if the inactive target is well met, is to put - * clean pages at the head of the inactive queue instead of the tail. - * This will cause them to be moved to the cache more quickly and - * if not actively re-referenced, freed more quickly. If we just - * stick these pages at the end of the inactive queue, heavy filesystem - * meta-data accesses can cause an unnecessary paging load on memory bound - * processes. This optimization causes one-time-use metadata to be - * reused more quickly. + * However, unless the page belongs to an object, it is not enqueued because + * it cannot be paged out. * - * BUT, if we are in a low-memory situation we have no choice but to - * put clean pages on the cache queue. + * If a page is fictitious, then its wire count must alway be one. * - * A number of routines use vm_page_unwire() to guarantee that the page - * will go into either the inactive or active queues, and will NEVER - * be placed in the cache - for example, just after dirtying a page. - * dirty pages in the cache are not allowed. - * - * The page queues must be locked. - * This routine may not block. + * A managed page must be locked. */ void vm_page_unwire(vm_page_t m, int activate) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (m->flags & PG_FICTITIOUS) + if ((m->flags & PG_UNMANAGED) == 0) + vm_page_lock_assert(m, MA_OWNED); + if ((m->flags & PG_FICTITIOUS) != 0) { + KASSERT(m->wire_count == 1, + ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); return; + } if (m->wire_count > 0) { m->wire_count--; if (m->wire_count == 0) { atomic_subtract_int(&cnt.v_wire_count, 1); - if (m->flags & PG_UNMANAGED) { - ; - } else if (activate) + if ((m->flags & PG_UNMANAGED) != 0 || + m->object == NULL) + return; + vm_page_lock_queues(); + if (activate) vm_page_enqueue(PQ_ACTIVE, m); else { vm_page_flag_clear(m, PG_WINATCFLS); vm_page_enqueue(PQ_INACTIVE, m); } + vm_page_unlock_queues(); } - } else { - panic("vm_page_unwire: invalid wire count: %d", m->wire_count); - } + } else + panic("vm_page_unwire: page %p's wire count is zero", m); } - /* - * Move the specified page to the inactive queue. If the page has - * any associated swap, the swap is deallocated. + * Move the specified page to the inactive queue. * + * Many pages placed on the inactive queue should actually go + * into the cache, but it is difficult to figure out which. What + * we do instead, if the inactive target is well met, is to put + * clean pages at the head of the inactive queue instead of the tail. + * This will cause them to be moved to the cache more quickly and + * if not actively re-referenced, reclaimed more quickly. If we just + * stick these pages at the end of the inactive queue, heavy filesystem + * meta-data accesses can cause an unnecessary paging load on memory bound + * processes. This optimization causes one-time-use metadata to be + * reused more quickly. + * * Normally athead is 0 resulting in LRU operation. athead is set * to 1 if we want this page to be 'as if it were placed in the cache', * except without unmapping it from the process address space. @@ -1708,30 +1808,42 @@ static inline void _vm_page_deactivate(vm_page_t m, int athead) { + int queue; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); /* * Ignore if already inactive. */ - if (VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) + if ((queue = m->queue) == PQ_INACTIVE) return; if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { + vm_page_lock_queues(); vm_page_flag_clear(m, PG_WINATCFLS); - vm_pageq_remove(m); + if (queue != PQ_NONE) + vm_page_queue_remove(queue, m); if (athead) - TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); + TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, + pageq); else - TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); - VM_PAGE_SETQUEUE2(m, PQ_INACTIVE); + TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, + pageq); + m->queue = PQ_INACTIVE; cnt.v_inactive_count++; + vm_page_unlock_queues(); } } +/* + * Move the specified page to the inactive queue. + * + * The page must be locked. + */ void vm_page_deactivate(vm_page_t m) { - _vm_page_deactivate(m, 0); + + _vm_page_deactivate(m, 0); } /* @@ -1743,12 +1855,11 @@ vm_page_try_to_cache(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if (m->dirty || m->hold_count || m->busy || m->wire_count || - (m->oflags & VPO_BUSY) || (m->flags & PG_UNMANAGED)) { + (m->oflags & VPO_BUSY) || (m->flags & PG_UNMANAGED)) return (0); - } pmap_remove_all(m); if (m->dirty) return (0); @@ -1766,13 +1877,12 @@ vm_page_try_to_free(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->object != NULL) VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if (m->dirty || m->hold_count || m->busy || m->wire_count || - (m->oflags & VPO_BUSY) || (m->flags & PG_UNMANAGED)) { + (m->oflags & VPO_BUSY) || (m->flags & PG_UNMANAGED)) return (0); - } pmap_remove_all(m); if (m->dirty) return (0); @@ -1793,13 +1903,12 @@ vm_object_t object; vm_page_t root; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if ((m->flags & PG_UNMANAGED) || (m->oflags & VPO_BUSY) || m->busy || - m->hold_count || m->wire_count) { + m->hold_count || m->wire_count) panic("vm_page_cache: attempting to cache busy page"); - } pmap_remove_all(m); if (m->dirty != 0) panic("vm_page_cache: page %p is dirty", m); @@ -1816,7 +1925,7 @@ } KASSERT((m->flags & PG_CACHED) == 0, ("vm_page_cache: page %p is already cached", m)); - cnt.v_tcached++; + PCPU_INC(cnt.v_tcached); /* * Remove the page from the paging queues. @@ -1849,7 +1958,7 @@ * Insert the page into the object's collection of cached pages * and the physical memory allocator's cache/free page queues. */ - vm_page_flag_clear(m, PG_ZERO); + m->flags &= ~PG_ZERO; mtx_lock(&vm_page_queue_free_mtx); m->flags |= PG_CACHED; cnt.v_cache_count++; @@ -1920,18 +2029,18 @@ void vm_page_dontneed(vm_page_t m) { - static int dnweight; int dnw; int head; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - dnw = ++dnweight; + vm_page_lock_assert(m, MA_OWNED); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + dnw = PCPU_GET(dnweight); + PCPU_INC(dnweight); /* - * occassionally leave the page alone + * Occasionally leave the page alone. */ - if ((dnw & 0x01F0) == 0 || - VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) { + if ((dnw & 0x01F0) == 0 || m->queue == PQ_INACTIVE) { if (m->act_count >= ACT_INIT) --m->act_count; return; @@ -1940,9 +2049,18 @@ /* * Clear any references to the page. Otherwise, the page daemon will * immediately reactivate the page. + * + * Perform the pmap_clear_reference() first. Otherwise, a concurrent + * pmap operation, such as pmap_remove(), could clear a reference in + * the pmap and set PG_REFERENCED on the page before the + * pmap_clear_reference() had completed. Consequently, the page would + * appear referenced based upon an old reference that occurred before + * this function ran. */ + pmap_clear_reference(m); + vm_page_lock_queues(); vm_page_flag_clear(m, PG_REFERENCED); - pmap_clear_reference(m); + vm_page_unlock_queues(); if (m->dirty == 0 && pmap_is_modified(m)) vm_page_dirty(m); @@ -1979,15 +2097,25 @@ VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { - if (vm_page_sleep_if_busy(m, TRUE, "pgrbwt")) { + if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) { + if ((allocflags & VM_ALLOC_RETRY) != 0) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(m, PG_REFERENCED); + } + vm_page_sleep(m, "pgrbwt"); if ((allocflags & VM_ALLOC_RETRY) == 0) return (NULL); goto retrylookup; } else { if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wire(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } if ((allocflags & VM_ALLOC_NOBUSY) == 0) vm_page_busy(m); @@ -2088,6 +2216,28 @@ } /* + * Clear the given bits from the specified page's dirty field. + */ +static __inline void +vm_page_clear_dirty_mask(vm_page_t m, int pagebits) +{ + + /* + * If the object is locked and the page is neither VPO_BUSY nor + * PG_WRITEABLE, then the page's dirty field cannot possibly be + * modified by a concurrent pmap operation. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) + m->dirty &= ~pagebits; + else { + vm_page_lock_queues(); + m->dirty &= ~pagebits; + vm_page_unlock_queues(); + } +} + +/* * vm_page_set_validclean: * * Sets portions of a page valid and clean. The arguments are expected @@ -2102,11 +2252,9 @@ void vm_page_set_validclean(vm_page_t m, int base, int size) { - int pagebits; - int frag; - int endoff; + u_long oldvalid; + int endoff, frag, pagebits; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if (size == 0) /* handle degenerate case */ return; @@ -2142,6 +2290,7 @@ * clear dirty bits for DEV_BSIZE chunks that are fully within * the range. */ + oldvalid = m->valid; pagebits = vm_page_bits(base, size); m->valid |= pagebits; #if 0 /* NOT YET */ @@ -2154,19 +2303,35 @@ } pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); #endif - m->dirty &= ~pagebits; if (base == 0 && size == PAGE_SIZE) { - pmap_clear_modify(m); + /* + * The page can only be modified within the pmap if it is + * mapped, and it can only be mapped if it was previously + * fully valid. + */ + if (oldvalid == VM_PAGE_BITS_ALL) + /* + * Perform the pmap_clear_modify() first. Otherwise, + * a concurrent pmap operation, such as + * pmap_protect(), could clear a modification in the + * pmap and set the dirty field on the page before + * pmap_clear_modify() had begun and after the dirty + * field was cleared here. + */ + pmap_clear_modify(m); + m->dirty = 0; m->oflags &= ~VPO_NOSYNC; - } + } else if (oldvalid != VM_PAGE_BITS_ALL) + m->dirty &= ~pagebits; + else + vm_page_clear_dirty_mask(m, pagebits); } void vm_page_clear_dirty(vm_page_t m, int base, int size) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - m->dirty &= ~vm_page_bits(base, size); + vm_page_clear_dirty_mask(m, vm_page_bits(base, size)); } /* @@ -2183,10 +2348,13 @@ int bits; VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("vm_page_set_invalid: page %p is busy", m)); bits = vm_page_bits(base, size); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (m->valid == VM_PAGE_BITS_ALL && bits != 0) pmap_remove_all(m); + KASSERT(!pmap_page_is_mapped(m), + ("vm_page_set_invalid: page %p is mapped", m)); m->valid &= ~bits; m->dirty &= ~bits; } @@ -2263,9 +2431,10 @@ void vm_page_test_dirty(vm_page_t m) { - if ((m->dirty != VM_PAGE_BITS_ALL) && pmap_is_modified(m)) { + + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) vm_page_dirty(m); - } } int so_zerocp_fullpage = 0; @@ -2285,6 +2454,8 @@ vm_object_t object; vm_pindex_t pindex; + mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); + vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->paging_in_progress != 0, @@ -2298,18 +2469,17 @@ mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); if (mnew == NULL) { vm_page_insert(m, object, pindex); - vm_page_unlock_queues(); + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); if (m == vm_page_lookup(object, pindex)) { - vm_page_lock_queues(); + vm_page_lock(m); goto retry_alloc; } else { /* * Page disappeared during the wait. */ - vm_page_lock_queues(); return; } } @@ -2320,7 +2490,10 @@ * waiting to allocate a page. If so, put things back * the way they were */ + vm_page_unlock(m); + vm_page_lock(mnew); vm_page_free(mnew); + vm_page_unlock(mnew); vm_page_insert(m, object, pindex); } else { /* clear COW & copy page */ if (!so_zerocp_fullpage) @@ -2329,6 +2502,7 @@ vm_page_dirty(mnew); mnew->wire_count = m->wire_count - m->cow; m->wire_count = m->cow; + vm_page_unlock(m); } } @@ -2336,7 +2510,7 @@ vm_page_cowclear(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->cow) { m->cow--; /* @@ -2352,11 +2526,13 @@ vm_page_cowsetup(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (m->cow == USHRT_MAX - 1) + vm_page_lock_assert(m, MA_OWNED); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + m->cow == USHRT_MAX - 1 || !VM_OBJECT_TRYLOCK(m->object)) return (EBUSY); m->cow++; pmap_remove_write(m); + VM_OBJECT_UNLOCK(m->object); return (0); } Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (revision 218945) +++ vm/vm_page.h (working copy) @@ -90,14 +90,15 @@ * and sundry status bits. * * Fields in this structure are locked either by the lock on the - * object that the page belongs to (O) or by the lock on the page - * queues (P). + * object that the page belongs to (O), its corresponding page lock (P), + * or by the lock on the page queues (Q). + * */ TAILQ_HEAD(pglist, vm_page); struct vm_page { - TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (P) */ + TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (Q) */ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ struct vm_page *left; /* splay tree link (O) */ struct vm_page *right; /* splay tree link (O) */ @@ -106,31 +107,31 @@ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page */ struct md_page md; /* machine dependant stuff */ - uint8_t queue; /* page queue index */ + uint8_t queue; /* page queue index (P,Q) */ int8_t segind; u_short flags; /* see below */ uint8_t order; /* index of the buddy queue */ uint8_t pool; - u_short cow; /* page cow mapping count */ + u_short cow; /* page cow mapping count (P) */ u_int wire_count; /* wired down maps refs (P) */ - short hold_count; /* page hold count */ + short hold_count; /* page hold count (P) */ u_short oflags; /* page flags (O) */ - u_char act_count; /* page usage count */ + u_char act_count; /* page usage count (O) */ u_char busy; /* page busy count (O) */ /* NOTE that these must support one bit per DEV_BSIZE in a page!!! */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ #if PAGE_SIZE == 4096 u_char valid; /* map of valid DEV_BSIZE chunks (O) */ - u_char dirty; /* map of dirty DEV_BSIZE chunks */ + u_char dirty; /* map of dirty DEV_BSIZE chunks (O) */ #elif PAGE_SIZE == 8192 u_short valid; /* map of valid DEV_BSIZE chunks (O) */ - u_short dirty; /* map of dirty DEV_BSIZE chunks */ + u_short dirty; /* map of dirty DEV_BSIZE chunks (O) */ #elif PAGE_SIZE == 16384 u_int valid; /* map of valid DEV_BSIZE chunks (O) */ - u_int dirty; /* map of dirty DEV_BSIZE chunks */ + u_int dirty; /* map of dirty DEV_BSIZE chunks (O) */ #elif PAGE_SIZE == 32768 u_long valid; /* map of valid DEV_BSIZE chunks (O) */ - u_long dirty; /* map of dirty DEV_BSIZE chunks */ + u_long dirty; /* map of dirty DEV_BSIZE chunks (O) */ #endif }; @@ -151,26 +152,50 @@ #define PQ_HOLD 3 #define PQ_COUNT 4 -/* Returns the real queue a page is on. */ -#define VM_PAGE_GETQUEUE(m) ((m)->queue) - -/* Returns the well known queue a page is on. */ -#define VM_PAGE_GETKNOWNQUEUE2(m) VM_PAGE_GETQUEUE(m) - -/* Returns true if the page is in the named well known queue. */ -#define VM_PAGE_INQUEUE2(m, q) (VM_PAGE_GETKNOWNQUEUE2(m) == (q)) - -/* Sets the queue a page is on. */ -#define VM_PAGE_SETQUEUE2(m, q) (VM_PAGE_GETQUEUE(m) = (q)) - struct vpgqueues { struct pglist pl; int *cnt; }; extern struct vpgqueues vm_page_queues[PQ_COUNT]; -extern struct mtx vm_page_queue_free_mtx; +struct vpglocks { + struct mtx data; + char pad[CACHE_LINE_SIZE - sizeof(struct mtx)]; +} __aligned(CACHE_LINE_SIZE); + +extern struct vpglocks vm_page_queue_free_lock; +extern struct vpglocks pa_lock[]; + +#if defined(__arm__) +#define PDRSHIFT PDR_SHIFT +#elif !defined(PDRSHIFT) +#define PDRSHIFT 21 +#endif + +#define pa_index(pa) ((pa) >> PDRSHIFT) +#define PA_LOCKPTR(pa) &pa_lock[pa_index((pa)) % PA_LOCK_COUNT].data +#define PA_LOCKOBJPTR(pa) ((struct lock_object *)PA_LOCKPTR((pa))) +#define PA_LOCK(pa) mtx_lock(PA_LOCKPTR(pa)) +#define PA_TRYLOCK(pa) mtx_trylock(PA_LOCKPTR(pa)) +#define PA_UNLOCK(pa) mtx_unlock(PA_LOCKPTR(pa)) +#define PA_UNLOCK_COND(pa) \ + do { \ + if ((pa) != 0) { \ + PA_UNLOCK((pa)); \ + (pa) = 0; \ + } \ + } while (0) + +#define PA_LOCK_ASSERT(pa, a) mtx_assert(PA_LOCKPTR(pa), (a)) + +#define vm_page_lockptr(m) (PA_LOCKPTR(VM_PAGE_TO_PHYS((m)))) +#define vm_page_lock(m) mtx_lock(vm_page_lockptr((m))) +#define vm_page_unlock(m) mtx_unlock(vm_page_lockptr((m))) +#define vm_page_trylock(m) mtx_trylock(vm_page_lockptr((m))) +#define vm_page_lock_assert(m, a) mtx_assert(vm_page_lockptr((m)), (a)) + +#define vm_page_queue_free_mtx vm_page_queue_free_lock.data /* * These are the flags defined for vm_page. * @@ -180,6 +205,12 @@ * via the object/vm_page_t because there is no knowledge of their * pte mappings, nor can they be removed from their objects via * the object, and such pages are also not on any PQ queue. + * + * PG_REFERENCED may be cleared only if the object containing the page is + * locked. + * + * PG_WRITEABLE is set exclusively on managed pages by pmap_enter(). When it + * does so, the page must be VPO_BUSY. */ #define PG_CACHED 0x0001 /* page is cached */ #define PG_FREE 0x0002 /* page is free */ @@ -258,7 +289,9 @@ #endif } -extern struct mtx vm_page_queue_mtx; +extern struct vpglocks vm_page_queue_lock; + +#define vm_page_queue_mtx vm_page_queue_lock.data #define vm_page_lock_queues() mtx_lock(&vm_page_queue_mtx) #define vm_page_unlock_queues() mtx_unlock(&vm_page_queue_mtx) @@ -318,6 +351,7 @@ void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); vm_page_t vm_page_next(vm_page_t m); +int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); vm_page_t vm_page_prev(vm_page_t m); void vm_page_remove (vm_page_t); void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); Index: vm/vm_contig.c =================================================================== --- vm/vm_contig.c (revision 218945) +++ vm/vm_contig.c (working copy) @@ -97,9 +97,11 @@ int vfslocked; mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); object = m->object; if (!VM_OBJECT_TRYLOCK(object) && !vm_pageout_fallback_object_lock(m, next)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); return (EAGAIN); } @@ -111,7 +113,8 @@ vm_page_test_dirty(m); if (m->dirty == 0 && m->hold_count == 0) pmap_remove_all(m); - if (m->dirty) { + if (m->dirty != 0) { + vm_page_unlock(m); if ((object->flags & OBJ_DEAD) != 0) { VM_OBJECT_UNLOCK(object); return (EAGAIN); @@ -135,13 +138,18 @@ return (0); } else if (object->type == OBJT_SWAP || object->type == OBJT_DEFAULT) { + vm_page_unlock_queues(); m_tmp = m; vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, 0, NULL); VM_OBJECT_UNLOCK(object); + vm_page_lock_queues(); return (0); } - } else if (m->hold_count == 0) - vm_page_cache(m); + } else { + if (m->hold_count == 0) + vm_page_cache(m); + vm_page_unlock(m); + } VM_OBJECT_UNLOCK(object); return (0); } @@ -163,9 +171,14 @@ if (pa < low || pa + PAGE_SIZE > high) continue; - KASSERT(VM_PAGE_INQUEUE2(m, queue), + if (!vm_pageout_page_lock(m, &next)) { + vm_page_unlock(m); + continue; + } + KASSERT(m->queue == queue, ("vm_contig_launder: page %p's queue is not %d", m, queue)); error = vm_contig_launder_page(m, &next); + vm_page_lock_assert(m, MA_NOTOWNED); if (error == 0) return (TRUE); if (error == EBUSY) @@ -260,9 +273,7 @@ i -= PAGE_SIZE; m = vm_page_lookup(object, OFF_TO_IDX(offset + i)); - vm_page_lock_queues(); vm_page_free(m); - vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(object); vm_map_delete(map, addr, addr + size); Index: vm/vm.h =================================================================== --- vm/vm.h (revision 218945) +++ vm/vm.h (working copy) @@ -76,7 +76,7 @@ #define VM_PROT_READ ((vm_prot_t) 0x01) #define VM_PROT_WRITE ((vm_prot_t) 0x02) #define VM_PROT_EXECUTE ((vm_prot_t) 0x04) -#define VM_PROT_OVERRIDE_WRITE ((vm_prot_t) 0x08) /* copy-on-write */ +#define VM_PROT_COPY ((vm_prot_t) 0x08) /* copy-on-read */ #define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) #define VM_PROT_RW (VM_PROT_READ|VM_PROT_WRITE) Index: vm/vnode_pager.c =================================================================== --- vm/vnode_pager.c (revision 218945) +++ vm/vnode_pager.c (working copy) @@ -429,9 +429,7 @@ * bits. This would prevent bogus_page * replacement from working properly. */ - vm_page_lock_queues(); vm_page_clear_dirty(m, base, PAGE_SIZE - base); - vm_page_unlock_queues(); } else if ((nsize & PAGE_MASK) && __predict_false(object->cache != NULL)) { vm_page_cache_free(object, OFF_TO_IDX(nsize), @@ -719,11 +717,13 @@ error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL); if (error == EOPNOTSUPP) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); + for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } PCPU_INC(cnt.v_vnodein); PCPU_INC(cnt.v_vnodepgsin); error = vnode_pager_input_old(object, m[reqpage]); @@ -731,11 +731,12 @@ return (error); } else if (error != 0) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); @@ -747,11 +748,12 @@ } else if ((PAGE_SIZE / bsize) > 1 && (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); PCPU_INC(cnt.v_vnodein); PCPU_INC(cnt.v_vnodepgsin); @@ -765,11 +767,12 @@ */ VM_OBJECT_LOCK(object); if (m[reqpage]->valid == VM_PAGE_BITS_ALL) { - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return VM_PAGER_OK; } else if (reqblock == -1) { @@ -777,11 +780,12 @@ KASSERT(m[reqpage]->dirty == 0, ("vnode_pager_generic_getpages: page %p is dirty", m)); m[reqpage]->valid = VM_PAGE_BITS_ALL; - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_OK); } @@ -800,11 +804,12 @@ if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr, &runpg) != 0) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); } @@ -818,9 +823,9 @@ (object->un_pager.vnp.vnp_size >> 32), (uintmax_t)object->un_pager.vnp.vnp_size); } - vm_page_lock_queues(); + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); VM_OBJECT_UNLOCK(object); runend = i + 1; first = runend; @@ -829,18 +834,20 @@ runend = i + runpg; if (runend <= reqpage) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); - for (j = i; j < runend; j++) + for (j = i; j < runend; j++) { + vm_page_lock(m[j]); vm_page_free(m[j]); - vm_page_unlock_queues(); + vm_page_unlock(m[j]); + } VM_OBJECT_UNLOCK(object); } else { if (runpg < (count - first)) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); - for (i = first + runpg; i < count; i++) + for (i = first + runpg; i < count; i++) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); count = first + runpg; } @@ -931,7 +938,6 @@ relpbuf(bp, &vnode_pbuf_freecnt); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) { vm_page_t mt; @@ -980,17 +986,23 @@ * now tell them that it is ok to use */ if (!error) { - if (mt->oflags & VPO_WANTED) + if (mt->oflags & VPO_WANTED) { + vm_page_lock(mt); vm_page_activate(mt); - else + vm_page_unlock(mt); + } else { + vm_page_lock(mt); vm_page_deactivate(mt); + vm_page_unlock(mt); + } vm_page_wakeup(mt); } else { + vm_page_lock(mt); vm_page_free(mt); + vm_page_unlock(mt); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); if (error) { printf("vnode_pager_getpages: I/O read error\n"); @@ -1055,15 +1067,12 @@ * then delayed. */ int -vnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals) - struct vnode *vp; - vm_page_t *m; - int bytecount; - int flags; - int *rtvals; +vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount, + int flags, int *rtvals) { int i; vm_object_t object; + vm_page_t m; int count; int maxsize, ncount; @@ -1082,9 +1091,9 @@ for (i = 0; i < count; i++) rtvals[i] = VM_PAGER_AGAIN; - if ((int64_t)m[0]->pindex < 0) { + if ((int64_t)ma[0]->pindex < 0) { printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%lx)\n", - (long)m[0]->pindex, (u_long)m[0]->dirty); + (long)ma[0]->pindex, (u_long)ma[0]->dirty); rtvals[0] = VM_PAGER_BAD; return VM_PAGER_BAD; } @@ -1092,7 +1101,7 @@ maxsize = count * PAGE_SIZE; ncount = count; - poffset = IDX_TO_OFF(m[0]->pindex); + poffset = IDX_TO_OFF(ma[0]->pindex); /* * If the page-aligned write is larger then the actual file we @@ -1106,6 +1115,7 @@ * We do not under any circumstances truncate the valid bits, as * this will screw up bogus page replacement. */ + VM_OBJECT_LOCK(object); if (maxsize + poffset > object->un_pager.vnp.vnp_size) { if (object->un_pager.vnp.vnp_size > poffset) { int pgoff; @@ -1113,10 +1123,19 @@ maxsize = object->un_pager.vnp.vnp_size - poffset; ncount = btoc(maxsize); if ((pgoff = (int)maxsize & PAGE_MASK) != 0) { - vm_page_lock_queues(); - vm_page_clear_dirty(m[ncount - 1], pgoff, - PAGE_SIZE - pgoff); - vm_page_unlock_queues(); + /* + * If the object is locked and the following + * conditions hold, then the page's dirty + * field cannot be concurrently changed by a + * pmap operation. + */ + m = ma[ncount - 1]; + KASSERT(m->busy > 0, + ("vnode_pager_generic_putpages: page %p is not busy", m)); + KASSERT((m->flags & PG_WRITEABLE) == 0, + ("vnode_pager_generic_putpages: page %p is not read-only", m)); + vm_page_clear_dirty(m, pgoff, PAGE_SIZE - + pgoff); } } else { maxsize = 0; @@ -1128,6 +1147,7 @@ } } } + VM_OBJECT_UNLOCK(object); /* * pageouts are already clustered, use IO_ASYNC t o force a bawrite() @@ -1164,7 +1184,7 @@ if (auio.uio_resid) { if (ppscheck || ppsratecheck(&lastfail, &curfail, 1)) printf("vnode_pager_putpages: residual I/O %zd at %lu\n", - auio.uio_resid, (u_long)m[0]->pindex); + auio.uio_resid, (u_long)ma[0]->pindex); } for (i = 0; i < ncount; i++) { rtvals[i] = VM_PAGER_OK; Index: vm/uma_core.c =================================================================== --- vm/uma_core.c (revision 218945) +++ vm/uma_core.c (working copy) @@ -1037,10 +1037,8 @@ while (pages != startpages) { pages--; p = TAILQ_LAST(&object->memq, pglist); - vm_page_lock_queues(); vm_page_unwire(p, 0); vm_page_free(p); - vm_page_unlock_queues(); } retkva = 0; goto done; @@ -2952,13 +2950,11 @@ if (kva == 0) return (0); - if (obj == NULL) { - obj = vm_object_allocate(OBJT_DEFAULT, - pages); - } else { + if (obj == NULL) + obj = vm_object_allocate(OBJT_PHYS, pages); + else { VM_OBJECT_LOCK_INIT(obj, "uma object"); - _vm_object_allocate(OBJT_DEFAULT, - pages, obj); + _vm_object_allocate(OBJT_PHYS, pages, obj); } ZONE_LOCK(zone); keg->uk_kva = kva; Index: net/bpf_zerocopy.c =================================================================== --- net/bpf_zerocopy.c (revision 218945) +++ net/bpf_zerocopy.c (working copy) @@ -112,11 +112,11 @@ zbuf_page_free(vm_page_t pp) { - vm_page_lock_queues(); + vm_page_lock(pp); vm_page_unwire(pp, 0); if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); } /* @@ -168,10 +168,10 @@ VM_PROT_WRITE); if (pp == NULL) return (NULL); - vm_page_lock_queues(); + vm_page_lock(pp); vm_page_wire(pp); vm_page_unhold(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); sf = sf_buf_alloc(pp, SFB_NOWAIT); if (sf == NULL) { zbuf_page_free(pp); Property changes on: geom/sched ___________________________________________________________________ Added: svn:mergeinfo Merged /head/sys/geom/sched:r197750,198463,198721,198855,199490,199819,199869-199870,206823,206885,207155,207161,207163,207205,207210,207213,207262,207305,207308,207373-207374,207410,207412,207419,207437-207438,207448,207450-207452,207460,207519,207530-207531,207534-207535,207539-207541,207544,207548,207551-207552,207571,207573-207574,207576-207577,207584,207601,207617,207644,207649,207669,207694,207700,207702,207706,207708,207728,207738-207740,207746-207747,207752,207759,207796,207798,207805-207806,207822-207823,207846,207905,208175,208264,208278,208340,208504,208524,208532,208574,208609,208616,208645-208646,208651,208657,208659,208665,208667,208686-208688,208745,208764,208772,208791,208810,208846,208866,208990,209048,209173,209211,209226,209320-209321,209407,209610,209647-209648,209650-209651,209686,209702,211217,211958,212573,215471,215574,215610,215796,216333,216516,216555,216799,216899,217171,217177,217478-217479,218113,218773,218950 Index: i386/i386/pmap.c =================================================================== --- i386/i386/pmap.c (revision 218945) +++ i386/i386/pmap.c (working copy) @@ -297,6 +297,7 @@ static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); +static boolean_t pmap_is_referenced_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); @@ -1228,7 +1229,7 @@ } return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); } - return (0); + return (NULL); } /* @@ -1337,32 +1338,39 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde; - pt_entry_t pte; + pt_entry_t pte, *ptep; vm_page_t m; + vm_paddr_t pa; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | + (va & PDRMASK), &pa)) + goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); } } else { - sched_pin(); - pte = *pmap_pte_quick(pmap, va); + ptep = pmap_pte(pmap, va); + pte = *ptep; + pmap_pte_release(ptep); if (pte != 0 && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } - sched_unpin(); } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1607,9 +1615,9 @@ --m->wire_count; if (m->wire_count == 0) - return _pmap_unwire_pte_hold(pmap, m, free); + return (_pmap_unwire_pte_hold(pmap, m, free)); else - return 0; + return (0); } static int @@ -1643,7 +1651,7 @@ */ pmap_add_delayed_free_list(m, free, TRUE); - return 1; + return (1); } /* @@ -1657,10 +1665,10 @@ vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) - return 0; + return (0); ptepde = *pmap_pde(pmap, va); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); - return pmap_unwire_pte_hold(pmap, mpte, free); + return (pmap_unwire_pte_hold(pmap, mpte, free)); } /* @@ -1816,7 +1824,7 @@ pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); - return m; + return (m); } static vm_page_t @@ -2014,7 +2022,7 @@ { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; - return sysctl_handle_long(oidp, &ksize, 0, req); + return (sysctl_handle_long(oidp, &ksize, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); @@ -2024,7 +2032,7 @@ { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; - return sysctl_handle_long(oidp, &kfree, 0, req); + return (sysctl_handle_long(oidp, &kfree, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); @@ -2100,7 +2108,7 @@ pv_to_chunk(pv_entry_t pv) { - return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); + return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) @@ -2157,7 +2165,6 @@ static void pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) { - struct md_page *pvh; pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; @@ -2194,15 +2201,13 @@ pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - if (TAILQ_EMPTY(&m->md.pv_list)) { - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - if (TAILQ_EMPTY(&pvh->pv_list)) - vm_page_flag_clear(m, PG_WRITEABLE); - } free_pv_entry(pmap, pv); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } + if (TAILQ_EMPTY(&m->md.pv_list) && + TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); } sched_unpin(); } @@ -2910,7 +2915,8 @@ KASSERT((m->flags & PG_FICTITIOUS) == 0, ("pmap_remove_all: page %p is fictitious", m)); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + free = NULL; + vm_page_lock_queues(); sched_pin(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { @@ -2940,16 +2946,16 @@ */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - free = NULL; pmap_unuse_pt(pmap, pv->pv_va, &free); pmap_invalidate_page(pmap, pv->pv_va); - pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); sched_unpin(); + vm_page_unlock_queues(); + pmap_free_zero_pages(free); } /* @@ -2972,18 +2978,9 @@ if (oldpde & PG_MANAGED) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); - va < eva; va += PAGE_SIZE, m++) { - /* - * In contrast to the analogous operation on a 4KB page - * mapping, the mapping's PG_A flag is not cleared and - * the page's PG_REFERENCED flag is not set. The - * reason is that pmap_demote_pde() expects that a 2/4MB - * page mapping with a stored page table page has PG_A - * set. - */ + va < eva; va += PAGE_SIZE, m++) if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - } } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); @@ -3091,22 +3088,15 @@ obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; - if (pbits & PG_MANAGED) { - m = NULL; - if (pbits & PG_A) { + + if ((prot & VM_PROT_WRITE) == 0) { + if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == + (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); - vm_page_flag_set(m, PG_REFERENCED); - pbits &= ~PG_A; - } - if ((pbits & (PG_M | PG_RW)) == (PG_M | PG_RW)) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_dirty(m); } + pbits &= ~(PG_RW | PG_M); } - - if ((prot & VM_PROT_WRITE) == 0) - pbits &= ~(PG_RW | PG_M); #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; @@ -3282,18 +3272,22 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, vm_prot_t prot, boolean_t wired) { - vm_paddr_t pa; pd_entry_t *pde; pt_entry_t *pte; - vm_paddr_t opa; - pt_entry_t origpte, newpte; + pt_entry_t newpte, origpte; + pv_entry_t pv; + vm_paddr_t opa, pa; vm_page_t mpte, om; boolean_t invlva; va = trunc_page(va); KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, - ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); + ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", + va)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0, + ("pmap_enter: page %p is not busy", m)); mpte = NULL; @@ -3348,16 +3342,15 @@ if (mpte) mpte->wire_count--; - /* - * We might be turning off write access to the page, - * so we go ahead and sense modify status. - */ if (origpte & PG_MANAGED) { om = m; pa |= PG_MANAGED; } goto validate; } + + pv = NULL; + /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. @@ -3367,7 +3360,7 @@ pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); - pmap_remove_entry(pmap, om, va); + pv = pmap_pvh_remove(&om->md, pmap, va); } if (mpte != NULL) { mpte->wire_count--; @@ -3384,9 +3377,13 @@ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); - pmap_insert_entry(pmap, va, m); + if (pv == NULL) + pv = get_pv_entry(pmap, FALSE); + pv->pv_va = va; + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pa |= PG_MANAGED; - } + } else if (pv != NULL) + free_pv_entry(pmap, pv); /* * Increment counters @@ -3401,7 +3398,8 @@ newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V); if ((prot & VM_PROT_WRITE) != 0) { newpte |= PG_RW; - vm_page_flag_set(m, PG_WRITEABLE); + if ((newpte & PG_MANAGED) != 0) + vm_page_flag_set(m, PG_WRITEABLE); } #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) @@ -3442,6 +3440,10 @@ if ((prot & VM_PROT_WRITE) == 0) invlva = TRUE; } + if ((origpte & PG_MANAGED) != 0 && + TAILQ_EMPTY(&om->md.pv_list) && + TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)) + vm_page_flag_clear(om, PG_WRITEABLE); if (invlva) pmap_invalidate_page(pmap, va); } else @@ -3541,6 +3543,7 @@ psize = atop(end - start); mpte = NULL; m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); @@ -3554,6 +3557,7 @@ mpte); m = TAILQ_NEXT(m, listq); } + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -3570,8 +3574,10 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { + vm_page_lock_queues(); PMAP_LOCK(pmap); - (void) pmap_enter_quick_locked(pmap, va, m, prot, NULL); + (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -3679,7 +3685,7 @@ pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); - return mpte; + return (mpte); } /* @@ -4074,30 +4080,35 @@ struct md_page *pvh; pv_entry_t pv; int loops = 0; + boolean_t rv; - if (m->flags & PG_FICTITIOUS) - return FALSE; - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_page_exists_quick: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { - return TRUE; + rv = TRUE; + break; } loops++; if (loops >= 16) break; } - if (loops < 16) { + if (!rv && loops < 16) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { - if (PV_PMAP(pv) == pmap) - return (TRUE); + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } loops++; if (loops >= 16) break; } } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -4114,8 +4125,11 @@ count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); + vm_page_lock_queues(); count = pmap_pvh_wired_mappings(&m->md, count); - return (pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count)); + count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count); + vm_page_unlock_queues(); + return (count); } /* @@ -4151,16 +4165,15 @@ boolean_t pmap_page_is_mapped(vm_page_t m) { - struct md_page *pvh; + boolean_t rv; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) return (FALSE); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (TAILQ_EMPTY(&m->md.pv_list)) { - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - return (!TAILQ_EMPTY(&pvh->pv_list)); - } else - return (TRUE); + vm_page_lock_queues(); + rv = !TAILQ_EMPTY(&m->md.pv_list) || + !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); + vm_page_unlock_queues(); + return (rv); } /* @@ -4312,12 +4325,25 @@ boolean_t pmap_is_modified(vm_page_t m) { + boolean_t rv; - if (m->flags & PG_FICTITIOUS) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_modified: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no PTEs can have PG_M set. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) return (FALSE); - if (pmap_is_modified_pvh(&m->md)) - return (TRUE); - return (pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); + vm_page_lock_queues(); + rv = pmap_is_modified_pvh(&m->md) || + pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))); + vm_page_unlock_queues(); + return (rv); } /* @@ -4374,6 +4400,54 @@ } /* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + boolean_t rv; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + vm_page_lock_queues(); + rv = pmap_is_referenced_pvh(&m->md) || + pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))); + vm_page_unlock_queues(); + return (rv); +} + +/* + * Returns TRUE if any of the given mappings were referenced and FALSE + * otherwise. Both page and 4mpage mappings are supported. + */ +static boolean_t +pmap_is_referenced_pvh(struct md_page *pvh) +{ + pv_entry_t pv; + pt_entry_t *pte; + pmap_t pmap; + boolean_t rv; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + rv = FALSE; + sched_pin(); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte = pmap_pte_quick(pmap, pv->pv_va); + rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); + PMAP_UNLOCK(pmap); + if (rv) + break; + } + sched_unpin(); + return (rv); +} + +/* * Clear the write and modified bits in each of the given page's mappings. */ void @@ -4386,10 +4460,19 @@ pt_entry_t oldpte, *pte; vm_offset_t va; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & PG_FICTITIOUS) != 0 || + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; + vm_page_lock_queues(); sched_pin(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { @@ -4427,6 +4510,7 @@ } vm_page_flag_clear(m, PG_WRITEABLE); sched_unpin(); + vm_page_unlock_queues(); } /* @@ -4452,11 +4536,11 @@ vm_offset_t va; int rtval = 0; - if (m->flags & PG_FICTITIOUS) - return (rtval); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + vm_page_lock_queues(); sched_pin(); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { va = pv->pv_va; pmap = PV_PMAP(pv); @@ -4511,6 +4595,7 @@ } out: sched_unpin(); + vm_page_unlock_queues(); return (rtval); } @@ -4527,9 +4612,20 @@ pt_entry_t oldpte, *pte; vm_offset_t va; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & PG_FICTITIOUS) != 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("pmap_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) return; + vm_page_lock_queues(); sched_pin(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { @@ -4587,6 +4683,7 @@ PMAP_UNLOCK(pmap); } sched_unpin(); + vm_page_unlock_queues(); } /* @@ -4604,9 +4701,9 @@ pt_entry_t *pte; vm_offset_t va; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & PG_FICTITIOUS) != 0) - return; + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_reference: page %p is not managed", m)); + vm_page_lock_queues(); sched_pin(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { @@ -4650,6 +4747,7 @@ PMAP_UNLOCK(pmap); } sched_unpin(); + vm_page_unlock_queues(); } /* @@ -4919,75 +5017,52 @@ * perform the pmap work for mincore */ int -pmap_mincore(pmap_t pmap, vm_offset_t addr) +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *pdep; pt_entry_t *ptep, pte; vm_paddr_t pa; - vm_page_t m; - int val = 0; - + int val; + PMAP_LOCK(pmap); +retry: pdep = pmap_pde(pmap, addr); if (*pdep != 0) { if (*pdep & PG_PS) { pte = *pdep; - val = MINCORE_SUPER; /* Compute the physical address of the 4KB page. */ pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & PG_FRAME; + val = MINCORE_SUPER; } else { ptep = pmap_pte(pmap, addr); pte = *ptep; pmap_pte_release(ptep); pa = pte & PG_FRAME; + val = 0; } } else { pte = 0; pa = 0; + val = 0; } - PMAP_UNLOCK(pmap); - - if (pte != 0) { + if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; - if ((pte & PG_MANAGED) == 0) - return val; - - m = PHYS_TO_VM_PAGE(pa); - - /* - * Modified by us - */ if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) - val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; - else { - /* - * Modified by someone else - */ - vm_page_lock_queues(); - if (m->dirty || pmap_is_modified(m)) - val |= MINCORE_MODIFIED_OTHER; - vm_page_unlock_queues(); - } - /* - * Referenced by us - */ - if (pte & PG_A) - val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; - else { - /* - * Referenced by someone else - */ - vm_page_lock_queues(); - if ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m)) { - val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } - vm_page_unlock_queues(); - } - } - return val; + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + if ((pte & PG_A) != 0) + val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; + } + if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != + (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && + (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { + /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ + if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) + goto retry; + } else + PA_UNLOCK_COND(*locked_pa); + PMAP_UNLOCK(pmap); + return (val); } void @@ -5082,7 +5157,7 @@ printf("\n"); } sx_sunlock(&allproc_lock); - return npte; + return (npte); } pte = pmap_pte(pmap, va); if (pte && pmap_pte_v(pte)) { @@ -5107,7 +5182,7 @@ } } sx_sunlock(&allproc_lock); - return npte; + return (npte); } #endif Index: i386/xen/pmap.c =================================================================== --- i386/xen/pmap.c (revision 218945) +++ i386/xen/pmap.c (working copy) @@ -103,8 +103,6 @@ * and to when physical maps must be made correct. */ -#define PMAP_DIAGNOSTIC - #include "opt_cpu.h" #include "opt_pmap.h" #include "opt_msgbuf.h" @@ -168,11 +166,9 @@ #define PMAP_SHPGPERPROC 200 #endif -#if defined(DIAGNOSTIC) -#define PMAP_DIAGNOSTIC -#endif +#define DIAGNOSTIC -#if !defined(PMAP_DIAGNOSTIC) +#if !defined(DIAGNOSTIC) #define PMAP_INLINE __gnu89_inline #else #define PMAP_INLINE @@ -291,9 +287,18 @@ "Max number of PV entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, "Page share factor per proc"); +SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, + "2/4MB page mapping counters"); +static u_long pmap_pde_mappings; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_pde_mappings, 0, "2/4MB page mappings"); + static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); +static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, + vm_offset_t va); static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); @@ -303,7 +308,6 @@ vm_page_t *free); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); -static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); @@ -1217,14 +1221,19 @@ pd_entry_t pde; pt_entry_t pte; vm_page_t m; + vm_paddr_t pa; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pde = PT_GET(pmap_pde(pmap, va)); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | + (va & PDRMASK), &pa)) + goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); @@ -1236,13 +1245,15 @@ PT_SET_MA(PADDR1, 0); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } sched_unpin(); } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -2086,23 +2097,19 @@ ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); - if (tpte & PG_M) { - KASSERT((tpte & PG_RW), - ("pmap_collect: modified page not writable: va: %#x, pte: %#jx", - va, (uintmax_t)tpte)); + if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - } free = NULL; pmap_unuse_pt(pmap, va, &free); pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_flag_clear(m, PG_WRITEABLE); free_pv_entry(pmap, pv); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); } sched_unpin(); } @@ -2242,38 +2249,39 @@ return (pv); } -static void -pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) +static __inline pv_entry_t +pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; - PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pmap == PV_PMAP(pv) && va == pv->pv_va) + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + if (pmap == PV_PMAP(pv) && va == pv->pv_va) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); break; + } } - KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_flag_clear(m, PG_WRITEABLE); - free_pv_entry(pmap, pv); + return (pv); } -/* - * Create a pv entry for page at pa for - * (pmap, va). - */ static void -pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) +pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; - PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); + free_pv_entry(pmap, pv); +} + +static void +pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) +{ + mtx_assert(&vm_page_queue_mtx, MA_OWNED); - pv = get_pv_entry(pmap, FALSE); - pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + pmap_pvh_free(&m->md, pmap, va); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); } /* @@ -2322,12 +2330,8 @@ pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME); - if (oldpte & PG_M) { - KASSERT((oldpte & PG_RW), - ("pmap_remove_pte: modified page not writable: va: %#x, pte: %#jx", - va, (uintmax_t)oldpte)); + if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - } if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); pmap_remove_entry(pmap, m, va); @@ -2485,16 +2489,10 @@ pt_entry_t *pte, tpte; vm_page_t free; -#if defined(PMAP_DIAGNOSTIC) - /* - * XXX This makes pmap_remove_all() illegal for non-managed pages! - */ - if (m->flags & PG_FICTITIOUS) { - panic("pmap_remove_all: illegal for unmanaged page, va: 0x%jx", - VM_PAGE_TO_PHYS(m) & 0xffffffff); - } -#endif - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & PG_FICTITIOUS) == 0, + ("pmap_remove_all: page %p is fictitious", m)); + free = NULL; + vm_page_lock_queues(); sched_pin(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); @@ -2512,16 +2510,10 @@ /* * Update the vm_page_t clean and reference bits. */ - if (tpte & PG_M) { - KASSERT((tpte & PG_RW), - ("pmap_remove_all: modified page not writable: va: %#x, pte: %#jx", - pv->pv_va, (uintmax_t)tpte)); + if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - } - free = NULL; pmap_unuse_pt(pmap, pv->pv_va, &free); pmap_invalidate_page(pmap, pv->pv_va); - pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); @@ -2531,6 +2523,8 @@ if (*PMAP1) PT_SET_MA(PADDR1, 0); sched_unpin(); + vm_page_unlock_queues(); + pmap_free_zero_pages(free); } /* @@ -2613,22 +2607,16 @@ obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; - if (pbits & PG_MANAGED) { - m = NULL; - if (pbits & PG_A) { - m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & PG_FRAME); - vm_page_flag_set(m, PG_REFERENCED); - pbits &= ~PG_A; - } - if ((pbits & PG_M) != 0) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & PG_FRAME); + + if ((prot & VM_PROT_WRITE) == 0) { + if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == + (PG_MANAGED | PG_M | PG_RW)) { + m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & + PG_FRAME); vm_page_dirty(m); } + pbits &= ~(PG_RW | PG_M); } - - if ((prot & VM_PROT_WRITE) == 0) - pbits &= ~(PG_RW | PG_M); #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; @@ -2683,23 +2671,24 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, vm_prot_t prot, boolean_t wired) { - vm_paddr_t pa; pd_entry_t *pde; pt_entry_t *pte; - vm_paddr_t opa; - pt_entry_t origpte, newpte; + pt_entry_t newpte, origpte; + pv_entry_t pv; + vm_paddr_t opa, pa; vm_page_t mpte, om; boolean_t invlva; CTR6(KTR_PMAP, "pmap_enter: pmap=%08p va=0x%08x access=0x%x ma=0x%08x prot=0x%x wired=%d", pmap, va, access, xpmap_ptom(VM_PAGE_TO_PHYS(m)), prot, wired); va = trunc_page(va); -#ifdef PMAP_DIAGNOSTIC - if (va > VM_MAX_KERNEL_ADDRESS) - panic("pmap_enter: toobig"); - if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) - panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); -#endif + KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); + KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, + ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", + va)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0, + ("pmap_enter: page %p is not busy", m)); mpte = NULL; @@ -2714,16 +2703,6 @@ if (va < VM_MAXUSER_ADDRESS) { mpte = pmap_allocpte(pmap, va, M_WAITOK); } -#if 0 && defined(PMAP_DIAGNOSTIC) - else { - pd_entry_t *pdeaddr = pmap_pde(pmap, va); - origpte = *pdeaddr; - if ((origpte & PG_V) == 0) { - panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", - pmap->pm_pdir[PTDPTDI], origpte, va); - } - } -#endif pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) @@ -2734,7 +2713,7 @@ * Page Directory table entry not valid, we need a new PT page */ if (pte == NULL) { - panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n", + panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va); } @@ -2772,16 +2751,15 @@ if (mpte) mpte->wire_count--; - /* - * We might be turning off write access to the page, - * so we go ahead and sense modify status. - */ if (origpte & PG_MANAGED) { om = m; pa |= PG_MANAGED; } goto validate; } + + pv = NULL; + /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. @@ -2791,7 +2769,7 @@ pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); - pmap_remove_entry(pmap, om, va); + pv = pmap_pvh_remove(&om->md, pmap, va); } else if (va < VM_MAXUSER_ADDRESS) printf("va=0x%x is unmanaged :-( \n", va); @@ -2810,9 +2788,13 @@ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); - pmap_insert_entry(pmap, va, m); + if (pv == NULL) + pv = get_pv_entry(pmap, FALSE); + pv->pv_va = va; + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pa |= PG_MANAGED; - } + } else if (pv != NULL) + free_pv_entry(pmap, pv); /* * Increment counters @@ -2827,7 +2809,8 @@ newpte = (pt_entry_t)(pa | PG_V); if ((prot & VM_PROT_WRITE) != 0) { newpte |= PG_RW; - vm_page_flag_set(m, PG_WRITEABLE); + if ((newpte & PG_MANAGED) != 0) + vm_page_flag_set(m, PG_WRITEABLE); } #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) @@ -2861,15 +2844,15 @@ invlva = TRUE; #endif } - if (origpte & PG_M) { - KASSERT((origpte & PG_RW), - ("pmap_enter: modified page not writable: va: %#x, pte: %#jx", - va, (uintmax_t)origpte)); + if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) vm_page_dirty(om); if ((prot & VM_PROT_WRITE) == 0) invlva = TRUE; } + if ((origpte & PG_MANAGED) != 0 && + TAILQ_EMPTY(&om->md.pv_list)) + vm_page_flag_clear(om, PG_WRITEABLE); if (invlva) pmap_invalidate_page(pmap, va); } else{ @@ -2913,6 +2896,7 @@ mpte = NULL; m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m, @@ -2929,7 +2913,7 @@ error = HYPERVISOR_multicall(mcl, count); KASSERT(error == 0, ("bad multicall %d", error)); } - + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -2952,10 +2936,12 @@ CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x", pmap, va, m, prot); + vm_page_lock_queues(); PMAP_LOCK(pmap); - (void) pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL); + (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL); if (count) HYPERVISOR_multicall(&mcl, count); + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -3142,64 +3128,59 @@ vm_object_t object, vm_pindex_t pindex, vm_size_t size) { + pd_entry_t *pde; + vm_paddr_t pa, ptepa; vm_page_t p; + int pat_mode; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if (pseflag && - ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { - int i; - vm_page_t m[1]; - unsigned int ptepindex; - int npdes; - pd_entry_t ptepa; - - PMAP_LOCK(pmap); - if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) - goto out; - PMAP_UNLOCK(pmap); -retry: + (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { + if (!vm_object_populate(object, pindex, pindex + atop(size))) + return; p = vm_page_lookup(object, pindex); - if (p != NULL) { - if (vm_page_sleep_if_busy(p, FALSE, "init4p")) - goto retry; - } else { - p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); - if (p == NULL) - return; - m[0] = p; - - if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { - vm_page_lock_queues(); - vm_page_free(p); - vm_page_unlock_queues(); - return; - } - - p = vm_page_lookup(object, pindex); - vm_page_wakeup(p); - } - + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("pmap_object_init_pt: invalid page %p", p)); + pat_mode = p->md.pat_mode; + /* + * Abort the mapping if the first page is not physically + * aligned to a 2/4MB page boundary. + */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; - - p->valid = VM_PAGE_BITS_ALL; - + /* + * Skip the first page. Abort the mapping if the rest of + * the pages are not physically contiguous or have differing + * memory attributes. + */ + p = TAILQ_NEXT(p, listq); + for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; + pa += PAGE_SIZE) { + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("pmap_object_init_pt: invalid page %p", p)); + if (pa != VM_PAGE_TO_PHYS(p) || + pat_mode != p->md.pat_mode) + return; + p = TAILQ_NEXT(p, listq); + } + /* Map using 2/4MB pages. */ PMAP_LOCK(pmap); - pmap->pm_stats.resident_count += size >> PAGE_SHIFT; - npdes = size >> PDRSHIFT; - critical_enter(); - for(i = 0; i < npdes; i++) { - PD_SET_VA(pmap, ptepindex, - ptepa | PG_U | PG_M | PG_RW | PG_V | PG_PS, FALSE); - ptepa += NBPDR; - ptepindex += 1; + for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + + size; pa += NBPDR) { + pde = pmap_pde(pmap, addr); + if (*pde == 0) { + pde_store(pde, pa | PG_PS | PG_M | PG_A | + PG_U | PG_RW | PG_V); + pmap->pm_stats.resident_count += NBPDR / + PAGE_SIZE; + pmap_pde_mappings++; + } + /* Else continue on if the PDE is already valid. */ + addr += NBPDR; } - pmap_invalidate_all(pmap); - critical_exit(); -out: PMAP_UNLOCK(pmap); } } @@ -3288,8 +3269,8 @@ pd_entry_t srcptepaddr; unsigned ptepindex; - if (addr >= UPT_MIN_ADDRESS) - panic("pmap_copy: invalid to pmap_copy page tables"); + KASSERT(addr < UPT_MIN_ADDRESS, + ("pmap_copy: invalid to pmap_copy page tables")); pdnxt = (addr + NBPDR) & ~PDRMASK; ptepindex = addr >> PDRSHIFT; @@ -3308,8 +3289,8 @@ } srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); - if (srcmpte->wire_count == 0) - panic("pmap_copy: source page table page is unused"); + KASSERT(srcmpte->wire_count > 0, + ("pmap_copy: source page table page is unused")); if (pdnxt > end_addr) pdnxt = end_addr; @@ -3490,20 +3471,23 @@ { pv_entry_t pv; int loops = 0; + boolean_t rv; - if (m->flags & PG_FICTITIOUS) - return (FALSE); - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_page_exists_quick: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { - return TRUE; + rv = TRUE; + break; } loops++; if (loops >= 16) break; } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -3523,7 +3507,7 @@ count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); @@ -3534,6 +3518,7 @@ PMAP_UNLOCK(pmap); } sched_unpin(); + vm_page_unlock_queues(); return (count); } @@ -3544,16 +3529,15 @@ boolean_t pmap_page_is_mapped(vm_page_t m) { - struct md_page *pvh; + boolean_t rv; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) return (FALSE); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (TAILQ_EMPTY(&m->md.pv_list)) { - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - return (!TAILQ_EMPTY(&pvh->pv_list)); - } else - return (TRUE); + vm_page_lock_queues(); + rv = !TAILQ_EMPTY(&m->md.pv_list) || + !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); + vm_page_unlock_queues(); + return (rv); } /* @@ -3686,12 +3670,21 @@ pmap_t pmap; boolean_t rv; + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_modified: page %p is not managed", m)); rv = FALSE; - if (m->flags & PG_FICTITIOUS) + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no PTEs can have PG_M set. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) return (rv); - + vm_page_lock_queues(); sched_pin(); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -3704,6 +3697,7 @@ if (*PMAP1) PT_SET_MA(PADDR1, 0); sched_unpin(); + vm_page_unlock_queues(); return (rv); } @@ -3739,6 +3733,35 @@ return (rv); } +boolean_t +pmap_is_referenced(vm_page_t m) +{ + pv_entry_t pv; + pt_entry_t *pte; + pmap_t pmap; + boolean_t rv; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); + sched_pin(); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte = pmap_pte_quick(pmap, pv->pv_va); + rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); + PMAP_UNLOCK(pmap); + if (rv) + break; + } + if (*PMAP1) + PT_SET_MA(PADDR1, 0); + sched_unpin(); + vm_page_unlock_queues(); + return (rv); +} + void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len) { @@ -3779,10 +3802,19 @@ pmap_t pmap; pt_entry_t oldpte, *pte; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & PG_FICTITIOUS) != 0 || + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; + vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); @@ -3813,6 +3845,7 @@ if (*PMAP1) PT_SET_MA(PADDR1, 0); sched_unpin(); + vm_page_unlock_queues(); } /* @@ -3835,10 +3868,10 @@ pt_entry_t *pte; int rtval = 0; - if (m->flags & PG_FICTITIOUS) - return (rtval); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); + vm_page_lock_queues(); sched_pin(); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { @@ -3863,6 +3896,7 @@ PT_SET_MA(PADDR1, 0); sched_unpin(); + vm_page_unlock_queues(); return (rtval); } @@ -3876,9 +3910,20 @@ pmap_t pmap; pt_entry_t *pte; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & PG_FICTITIOUS) != 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("pmap_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) return; + vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); @@ -3896,6 +3941,7 @@ PMAP_UNLOCK(pmap); } sched_unpin(); + vm_page_unlock_queues(); } /* @@ -3910,9 +3956,9 @@ pmap_t pmap; pt_entry_t *pte; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((m->flags & PG_FICTITIOUS) != 0) - return; + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_reference: page %p is not managed", m)); + vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); @@ -3930,6 +3976,7 @@ PMAP_UNLOCK(pmap); } sched_unpin(); + vm_page_unlock_queues(); } /* @@ -4122,62 +4169,36 @@ * perform the pmap work for mincore */ int -pmap_mincore(pmap_t pmap, vm_offset_t addr) +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pt_entry_t *ptep, pte; - vm_page_t m; - int val = 0; + vm_paddr_t pa; + int val; PMAP_LOCK(pmap); +retry: ptep = pmap_pte(pmap, addr); pte = (ptep != NULL) ? PT_GET(ptep) : 0; pmap_pte_release(ptep); + val = 0; + if ((pte & PG_V) != 0) { + val |= MINCORE_INCORE; + if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + if ((pte & PG_A) != 0) + val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; + } + if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != + (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && + (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { + pa = pte & PG_FRAME; + /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ + if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) + goto retry; + } else + PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); - - if (pte != 0) { - vm_paddr_t pa; - - val = MINCORE_INCORE; - if ((pte & PG_MANAGED) == 0) - return val; - - pa = pte & PG_FRAME; - - m = PHYS_TO_VM_PAGE(pa); - - /* - * Modified by us - */ - if (pte & PG_M) - val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; - else { - /* - * Modified by someone else - */ - vm_page_lock_queues(); - if (m->dirty || pmap_is_modified(m)) - val |= MINCORE_MODIFIED_OTHER; - vm_page_unlock_queues(); - } - /* - * Referenced by us - */ - if (pte & PG_A) - val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; - else { - /* - * Referenced by someone else - */ - vm_page_lock_queues(); - if ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m)) { - val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } - vm_page_unlock_queues(); - } - } - return val; + return (val); } void Property changes on: contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/pf:r197750,198463,198721,198855,199490,199819,199869-199870,206823,206885,207155,207161,207163,207205,207210,207213,207262,207308,207373-207374,207410,207412,207419,207437-207438,207448,207450-207452,207460,207519,207530-207531,207534-207535,207539-207541,207544,207548,207551-207552,207571,207573-207574,207576-207577,207584,207601,207617,207644,207649,207669,207694,207700,207702,207706,207708,207728,207738-207740,207746-207747,207752,207759,207796,207798,207805-207806,207822-207823,207846,207905,208175,208264,208278,208504,208524,208574,208609,208645-208646,208651,208657,208665,208667,208686-208688,208745,208764,208772,208791,208810,208846,208990,209048,209173,209211,209226,209320-209321,209610,209647,209651,212573,216333,216516,216555,216899,217171,217177,217478-217479,218113,218773,218950 Property changes on: contrib/dev/acpica ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica:r197750,198463,198721,198855,199490,199819,199869-199870,206823,206885,207155,207161,207163,207205,207210,207213,207262,207308,207373-207374,207410,207412,207419,207437-207438,207448,207450-207452,207460,207519,207530-207531,207534-207535,207539-207541,207544,207548,207551-207552,207571,207573-207574,207576-207577,207584,207601,207617,207644,207649,207669,207694,207700,207702,207706,207708,207728,207738-207740,207746-207747,207752,207759,207796,207798,207805-207806,207822-207823,207846,207905,208175,208264,208278,208504,208524,208574,208609,208645-208646,208651,208657,208665,208667,208686-208688,208745,208764,208772,208791,208810,208846,208990,209048,209173,209211,209226,209320-209321,209610,209647,209651,212573,216333,216516,216555,216899,217171,217177,217478-217479,218113,218773,218950 Property changes on: cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/cddl/contrib/opensolaris:r197750,198463,198721,198855,199490,199819,199869-199870,206823,206885,207155,207161,207163,207205,207210,207213,207262,207308,207373-207374,207410,207412,207419,207437-207438,207448,207450-207452,207460,207519,207530-207531,207534-207535,207539-207541,207544,207548,207551-207552,207571,207573-207574,207576-207577,207584,207601,207617,207644,207649,207669,207694,207700,207702,207706,207708,207728,207738-207740,207746-207747,207752,207759,207796,207798,207805-207806,207822-207823,207846,207905,208175,208264,208278,208504,208524,208574,208609,208645-208646,208651,208657,208665,208667,208686-208688,208745,208764,208772,208791,208810,208846,208990,209048,209173,209211,209226,209320-209321,209610,209647,209651,212573,216333,216516,216555,216899,217171,217177,217478-217479,218113,218773,218950 Index: cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (revision 218945) +++ cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (working copy) @@ -327,9 +327,7 @@ if (vm_page_sleep_if_busy(pp, FALSE, "zfsmwb")) continue; vm_page_busy(pp); - vm_page_lock_queues(); vm_page_undirty(pp); - vm_page_unlock_queues(); } else { if (__predict_false(obj->cache != NULL)) { vm_page_cache_free(obj, OFF_TO_IDX(start), @@ -505,13 +503,13 @@ } VM_OBJECT_LOCK(obj); vm_page_io_finish(m); - vm_page_lock_queues(); + vm_page_lock(m); if (error == 0) { m->valid = VM_PAGE_BITS_ALL; vm_page_activate(m); } else vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); if (error == 0) { uio->uio_resid -= bytes; @@ -4232,13 +4230,14 @@ KASSERT(vp->v_object == object, ("mismatching object")); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); + for (i = 0; i < pcount; i++) { if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); + vm_page_unlock(m[i]); } } - vm_page_unlock_queues(); if (mreq->valid) { if (mreq->valid != VM_PAGE_BITS_ALL) Index: amd64/include/vmparam.h =================================================================== --- amd64/include/vmparam.h (revision 218945) +++ amd64/include/vmparam.h (working copy) @@ -145,6 +145,10 @@ #define VM_LEVEL_0_ORDER 9 #endif +#ifdef SMP +#define PA_LOCK_COUNT 256 +#endif + /* * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. Property changes on: amd64/include/xen ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/amd64/include/xen:r197750,198463,198721,198855,199490,199819,199869-199870,206823,206885,207155,207161,207163,207205,207210,207213,207262,207308,207373-207374,207410,207412,207419,207437-207438,207448,207450-207452,207460,207519,207530-207531,207534-207535,207539-207541,207544,207548,207551-207552,207571,207573-207574,207576-207577,207584,207601,207617,207644,207649,207669,207694,207700,207702,207706,207708,207728,207738-207740,207746-207747,207752,207759,207796,207798,207805-207806,207822-207823,207846,207905,208175,208264,208278,208504,208524,208574,208609,208645-208646,208651,208657,208665,208667,208686-208688,208745,208764,208772,208791,208810,208846,208990,209048,209173,209211,209226,209320-209321,209610,209647,209651,212573,216333,216516,216555,216899,217171,217177,217478-217479,218113,218773,218950 Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (revision 218945) +++ amd64/amd64/pmap.c (working copy) @@ -240,6 +240,7 @@ static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); +static boolean_t pmap_is_referenced_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); @@ -287,7 +288,7 @@ vm_offset_t newaddr = addr; newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); - return newaddr; + return (newaddr); } /********************/ @@ -298,7 +299,7 @@ static __inline vm_pindex_t pmap_pde_pindex(vm_offset_t va) { - return va >> PDRSHIFT; + return (va >> PDRSHIFT); } @@ -357,7 +358,7 @@ pml4e = pmap_pml4e(pmap, va); if ((*pml4e & PG_V) == 0) - return NULL; + return (NULL); return (pmap_pml4e_to_pdpe(pml4e, va)); } @@ -379,7 +380,7 @@ pdpe = pmap_pdpe(pmap, va); if (pdpe == NULL || (*pdpe & PG_V) == 0) - return NULL; + return (NULL); return (pmap_pdpe_to_pde(pdpe, va)); } @@ -401,13 +402,28 @@ pde = pmap_pde(pmap, va); if (pde == NULL || (*pde & PG_V) == 0) - return NULL; + return (NULL); if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ return ((pt_entry_t *)pde); return (pmap_pde_to_pte(pde, va)); } +static __inline void +pmap_resident_count_inc(pmap_t pmap, int count) +{ + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pmap->pm_stats.resident_count += count; +} + +static __inline void +pmap_resident_count_dec(pmap_t pmap, int count) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pmap->pm_stats.resident_count -= count; +} + PMAP_INLINE pt_entry_t * vtopte(vm_offset_t va) { @@ -812,7 +828,6 @@ SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pdpe_demotions, 0, "1GB page demotions"); - /*************************************************** * Low level helper routines..... ***************************************************/ @@ -1177,15 +1192,20 @@ { pd_entry_t pde, *pdep; pt_entry_t pte; + vm_paddr_t pa; vm_page_t m; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | + (va & PDRMASK), &pa)) + goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); @@ -1194,12 +1214,14 @@ pte = *pmap_pde_to_pte(pdep, va); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1229,7 +1251,7 @@ pa = (pa & PG_FRAME) | (va & PAGE_MASK); } } - return pa; + return (pa); } /*************************************************** @@ -1462,9 +1484,9 @@ --m->wire_count; if (m->wire_count == 0) - return _pmap_unwire_pte_hold(pmap, va, m, free); + return (_pmap_unwire_pte_hold(pmap, va, m, free)); else - return 0; + return (0); } static int @@ -1472,6 +1494,7 @@ vm_page_t *free) { + PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ @@ -1491,7 +1514,7 @@ pd = pmap_pde(pmap, va); *pd = 0; } - --pmap->pm_stats.resident_count; + pmap_resident_count_dec(pmap, 1); if (m->pindex < NUPDE) { /* We just released a PT, unhold the matching PD */ vm_page_t pdpg; @@ -1520,7 +1543,7 @@ */ pmap_add_delayed_free_list(m, free, TRUE); - return 1; + return (1); } /* @@ -1533,10 +1556,10 @@ vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) - return 0; + return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); - return pmap_unwire_pte_hold(pmap, va, mpte, free); + return (pmap_unwire_pte_hold(pmap, va, mpte, free)); } void @@ -1609,6 +1632,7 @@ (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Allocate a page table page. */ @@ -1726,9 +1750,9 @@ *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } - pmap->pm_stats.resident_count++; + pmap_resident_count_inc(pmap, 1); - return m; + return (m); } static vm_page_t @@ -2014,7 +2038,6 @@ static void pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) { - struct md_page *pvh; pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; @@ -2033,7 +2056,7 @@ PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) continue; - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); pde = pmap_pde(pmap, va); KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found" " a 2mpage in page %p's pv list", m)); @@ -2050,15 +2073,13 @@ pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - if (TAILQ_EMPTY(&m->md.pv_list)) { - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - if (TAILQ_EMPTY(&pvh->pv_list)) - vm_page_flag_clear(m, PG_WRITEABLE); - } free_pv_entry(pmap, pv); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } + if (TAILQ_EMPTY(&m->md.pv_list) && + TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); } } @@ -2434,7 +2455,7 @@ return (FALSE); } if (va < VM_MAXUSER_ADDRESS) - pmap->pm_stats.resident_count++; + pmap_resident_count_inc(pmap, 1); } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); @@ -2526,7 +2547,7 @@ */ if (oldpde & PG_G) pmap_invalidate_page(kernel_pmap, sva); - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); if (oldpde & PG_MANAGED) { pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); @@ -2549,7 +2570,7 @@ mpte = pmap_lookup_pt_page(pmap, sva); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); mpte->wire_count = 0; @@ -2574,7 +2595,7 @@ oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; - pmap->pm_stats.resident_count -= 1; + pmap_resident_count_dec(pmap, 1); if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) @@ -2770,12 +2791,13 @@ KASSERT((m->flags & PG_FICTITIOUS) == 0, ("pmap_remove_all: page %p is fictitious", m)); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + free = NULL; + vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); @@ -2783,7 +2805,7 @@ while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 2mpage in page %p's pv list", m)); @@ -2799,15 +2821,15 @@ */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - free = NULL; pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); pmap_invalidate_page(pmap, pv->pv_va); - pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); + pmap_free_zero_pages(free); } /* @@ -2830,18 +2852,9 @@ if (oldpde & PG_MANAGED) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); - va < eva; va += PAGE_SIZE, m++) { - /* - * In contrast to the analogous operation on a 4KB page - * mapping, the mapping's PG_A flag is not cleared and - * the page's PG_REFERENCED flag is not set. The - * reason is that pmap_demote_pde() expects that a 2MB - * page mapping with a stored page table page has PG_A - * set. - */ + va < eva; va += PAGE_SIZE, m++) if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - } } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); @@ -2950,23 +2963,15 @@ obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; - if (pbits & PG_MANAGED) { - m = NULL; - if (pbits & PG_A) { + + if ((prot & VM_PROT_WRITE) == 0) { + if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == + (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); - vm_page_flag_set(m, PG_REFERENCED); - pbits &= ~PG_A; - } - if ((pbits & (PG_M | PG_RW)) == (PG_M | PG_RW)) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(pbits & - PG_FRAME); vm_page_dirty(m); } + pbits &= ~(PG_RW | PG_M); } - - if ((prot & VM_PROT_WRITE) == 0) - pbits &= ~(PG_RW | PG_M); if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; @@ -3118,18 +3123,22 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, vm_prot_t prot, boolean_t wired) { - vm_paddr_t pa; pd_entry_t *pde; pt_entry_t *pte; - vm_paddr_t opa; - pt_entry_t origpte, newpte; + pt_entry_t newpte, origpte; + pv_entry_t pv; + vm_paddr_t opa, pa; vm_page_t mpte, om; boolean_t invlva; va = trunc_page(va); KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, - ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va)); + ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", + va)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0, + ("pmap_enter: page %p is not busy", m)); mpte = NULL; @@ -3140,9 +3149,8 @@ * In the case that a page table page is not * resident, we are creating it here. */ - if (va < VM_MAXUSER_ADDRESS) { + if (va < VM_MAXUSER_ADDRESS) mpte = pmap_allocpte(pmap, va, M_WAITOK); - } pde = pmap_pde(pmap, va); if (pde != NULL && (*pde & PG_V) != 0) { @@ -3178,16 +3186,15 @@ if (mpte) mpte->wire_count--; - /* - * We might be turning off write access to the page, - * so we go ahead and sense modify status. - */ if (origpte & PG_MANAGED) { om = m; pa |= PG_MANAGED; } goto validate; } + + pv = NULL; + /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. @@ -3197,7 +3204,7 @@ pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); - pmap_remove_entry(pmap, om, va); + pv = pmap_pvh_remove(&om->md, pmap, va); } if (mpte != NULL) { mpte->wire_count--; @@ -3206,7 +3213,7 @@ " va: 0x%lx", va)); } } else - pmap->pm_stats.resident_count++; + pmap_resident_count_inc(pmap, 1); /* * Enter on the PV list if part of our managed memory. @@ -3214,9 +3221,13 @@ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); - pmap_insert_entry(pmap, va, m); + if (pv == NULL) + pv = get_pv_entry(pmap, FALSE); + pv->pv_va = va; + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pa |= PG_MANAGED; - } + } else if (pv != NULL) + free_pv_entry(pmap, pv); /* * Increment counters @@ -3231,7 +3242,8 @@ newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V); if ((prot & VM_PROT_WRITE) != 0) { newpte |= PG_RW; - vm_page_flag_set(m, PG_WRITEABLE); + if ((newpte & PG_MANAGED) != 0) + vm_page_flag_set(m, PG_WRITEABLE); } if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; @@ -3266,6 +3278,10 @@ if ((newpte & PG_RW) == 0) invlva = TRUE; } + if ((origpte & PG_MANAGED) != 0 && + TAILQ_EMPTY(&om->md.pv_list) && + TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)) + vm_page_flag_clear(om, PG_WRITEABLE); if (invlva) pmap_invalidate_page(pmap, va); } else @@ -3340,7 +3356,7 @@ /* * Increment counters. */ - pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; + pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); /* * Map the superpage. @@ -3377,6 +3393,7 @@ psize = atop(end - start); mpte = NULL; m = m_start; + vm_page_lock_queues(); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); @@ -3390,7 +3407,8 @@ mpte); m = TAILQ_NEXT(m, listq); } - PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); } /* @@ -3406,8 +3424,10 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { + vm_page_lock_queues(); PMAP_LOCK(pmap); - (void) pmap_enter_quick_locked(pmap, va, m, prot, NULL); + (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -3494,7 +3514,7 @@ /* * Increment counters */ - pmap->pm_stats.resident_count++; + pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); if ((prot & VM_PROT_EXECUTE) == 0) @@ -3507,7 +3527,7 @@ pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); - return mpte; + return (mpte); } /* @@ -3599,8 +3619,7 @@ if ((*pde & PG_V) == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); - pmap->pm_stats.resident_count += NBPDR / - PAGE_SIZE; + pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); pmap_pde_mappings++; } else { /* Continue on if the PDE is already valid. */ @@ -3667,8 +3686,6 @@ PMAP_UNLOCK(pmap); } - - /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len @@ -3743,8 +3760,7 @@ pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & PG_PS_FRAME))) { *pde = srcptepaddr & ~PG_W; - dst_pmap->pm_stats.resident_count += - NBPDR / PAGE_SIZE; + pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE); } else dstmpde->wire_count--; continue; @@ -3787,7 +3803,7 @@ */ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); - dst_pmap->pm_stats.resident_count++; + pmap_resident_count_inc(dst_pmap, 1); } else { free = NULL; if (pmap_unwire_pte_hold(dst_pmap, @@ -3882,30 +3898,35 @@ struct md_page *pvh; pv_entry_t pv; int loops = 0; + boolean_t rv; - if (m->flags & PG_FICTITIOUS) - return FALSE; - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_page_exists_quick: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { - return TRUE; + rv = TRUE; + break; } loops++; if (loops >= 16) break; } - if (loops < 16) { + if (!rv && loops < 16) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { - if (PV_PMAP(pv) == pmap) - return (TRUE); + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } loops++; if (loops >= 16) break; } } - return (FALSE); + vm_page_unlock_queues(); + return (rv); } /* @@ -3922,8 +3943,11 @@ count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); + vm_page_lock_queues(); count = pmap_pvh_wired_mappings(&m->md, count); - return (pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count)); + count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count); + vm_page_unlock_queues(); + return (count); } /* @@ -3957,16 +3981,15 @@ boolean_t pmap_page_is_mapped(vm_page_t m) { - struct md_page *pvh; + boolean_t rv; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) return (FALSE); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (TAILQ_EMPTY(&m->md.pv_list)) { - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - return (!TAILQ_EMPTY(&pvh->pv_list)); - } else - return (TRUE); + vm_page_lock_queues(); + rv = !TAILQ_EMPTY(&m->md.pv_list) || + !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); + vm_page_unlock_queues(); + return (rv); } /* @@ -4060,7 +4083,7 @@ pv_entry_count--; pc->pc_map[field] |= bitmask; if ((tpte & PG_PS) != 0) { - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); if (TAILQ_EMPTY(&pvh->pv_list)) { @@ -4071,7 +4094,7 @@ mpte = pmap_lookup_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; @@ -4079,7 +4102,7 @@ atomic_subtract_int(&cnt.v_wire_count, 1); } } else { - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -4116,12 +4139,25 @@ boolean_t pmap_is_modified(vm_page_t m) { + boolean_t rv; - if (m->flags & PG_FICTITIOUS) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_modified: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no PTEs can have PG_M set. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) return (FALSE); - if (pmap_is_modified_pvh(&m->md)) - return (TRUE); - return (pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); + vm_page_lock_queues(); + rv = pmap_is_modified_pvh(&m->md) || + pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))); + vm_page_unlock_queues(); + return (rv); } /* @@ -4176,6 +4212,52 @@ } /* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + boolean_t rv; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + vm_page_lock_queues(); + rv = pmap_is_referenced_pvh(&m->md) || + pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))); + vm_page_unlock_queues(); + return (rv); +} + +/* + * Returns TRUE if any of the given mappings were referenced and FALSE + * otherwise. Both page and 2mpage mappings are supported. + */ +static boolean_t +pmap_is_referenced_pvh(struct md_page *pvh) +{ + pv_entry_t pv; + pt_entry_t *pte; + pmap_t pmap; + boolean_t rv; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + rv = FALSE; + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, pv->pv_va); + rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); + PMAP_UNLOCK(pmap); + if (rv) + break; + } + return (rv); +} + +/* * Clear the write and modified bits in each of the given page's mappings. */ void @@ -4188,15 +4270,24 @@ pt_entry_t oldpte, *pte; vm_offset_t va; - if ((m->flags & PG_FICTITIOUS) != 0 || + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_RW) != 0) (void)pmap_demote_pde(pmap, pde, va); @@ -4222,6 +4313,7 @@ PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); } /* @@ -4247,14 +4339,14 @@ vm_offset_t va; int rtval = 0; - if (m->flags & PG_FICTITIOUS) - return (rtval); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + vm_page_lock_queues(); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_A) != 0) { @@ -4274,7 +4366,7 @@ rtval++; if (rtval > 4) { PMAP_UNLOCK(pmap); - return (rtval); + goto out; } } } @@ -4303,6 +4395,8 @@ PMAP_UNLOCK(pmap); } while ((pv = pvn) != NULL && pv != pvf); } +out: + vm_page_unlock_queues(); return (rtval); } @@ -4319,14 +4413,25 @@ pt_entry_t oldpte, *pte; vm_offset_t va; - if ((m->flags & PG_FICTITIOUS) != 0) + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("pmap_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_RW) != 0) { @@ -4367,6 +4472,7 @@ } PMAP_UNLOCK(pmap); } + vm_page_unlock_queues(); } /* @@ -4384,14 +4490,14 @@ pt_entry_t *pte; vm_offset_t va; - if ((m->flags & PG_FICTITIOUS) != 0) - return; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_reference: page %p is not managed", m)); + vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_A) != 0) { @@ -4423,6 +4529,7 @@ } PMAP_UNLOCK(pmap); } + vm_page_unlock_queues(); } /* @@ -4832,73 +4939,50 @@ * perform the pmap work for mincore */ int -pmap_mincore(pmap_t pmap, vm_offset_t addr) +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *pdep; pt_entry_t pte; vm_paddr_t pa; - vm_page_t m; - int val = 0; - + int val; + PMAP_LOCK(pmap); +retry: pdep = pmap_pde(pmap, addr); if (pdep != NULL && (*pdep & PG_V)) { if (*pdep & PG_PS) { pte = *pdep; - val = MINCORE_SUPER; /* Compute the physical address of the 4KB page. */ pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & PG_FRAME; + val = MINCORE_SUPER; } else { pte = *pmap_pde_to_pte(pdep, addr); pa = pte & PG_FRAME; + val = 0; } } else { pte = 0; pa = 0; + val = 0; } - PMAP_UNLOCK(pmap); - - if (pte != 0) { + if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; - if ((pte & PG_MANAGED) == 0) - return val; - - m = PHYS_TO_VM_PAGE(pa); - - /* - * Modified by us - */ if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) - val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; - else { - /* - * Modified by someone else - */ - vm_page_lock_queues(); - if (m->dirty || pmap_is_modified(m)) - val |= MINCORE_MODIFIED_OTHER; - vm_page_unlock_queues(); - } - /* - * Referenced by us - */ - if (pte & PG_A) - val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; - else { - /* - * Referenced by someone else - */ - vm_page_lock_queues(); - if ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m)) { - val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } - vm_page_unlock_queues(); - } - } - return val; + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + if ((pte & PG_A) != 0) + val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; + } + if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != + (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && + (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { + /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ + if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) + goto retry; + } else + PA_UNLOCK_COND(*locked_pa); + PMAP_UNLOCK(pmap); + return (val); } void Index: sys/pcpu.h =================================================================== --- sys/pcpu.h (revision 218945) +++ sys/pcpu.h (working copy) @@ -164,6 +164,7 @@ long pc_cp_time[CPUSTATES]; /* statclock ticks */ struct device *pc_device; void *pc_netisr; /* netisr SWI cookie */ + int pc_dnweight; /* vm_page_dontneed() */ /* * Stuff for read mostly lock Index: sys/vmmeter.h =================================================================== --- sys/vmmeter.h (revision 218945) +++ sys/vmmeter.h (working copy) @@ -72,9 +72,9 @@ u_int v_pdwakeups; /* (f) times daemon has awaken from sleep */ u_int v_pdpages; /* (q) pages analyzed by daemon */ - u_int v_tcached; /* (q) total pages cached */ + u_int v_tcached; /* (p) total pages cached */ u_int v_dfree; /* (q) pages freed by daemon */ - u_int v_pfree; /* (q) pages freed by exiting processes */ + u_int v_pfree; /* (p) pages freed by exiting processes */ u_int v_tfree; /* (p) total pages freed */ /* * Distribution of page usages.