commit 31d272ce5b9ceae76bde38dfafd1f0a209d868a6 Author: User Date: Sat Jan 27 15:47:53 2007 -0600 Introduce a per-page mutex to protect the pv entries. diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 5197608..d24812b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -214,9 +214,9 @@ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); -static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); +static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, pt_entry_t *pte); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, - vm_page_t m); + vm_page_t m, pt_entry_t *pte); static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); @@ -229,6 +229,10 @@ static vm_offset_t pmap_kmem_choose(vm_offset_t addr); CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); +#define PV_LOCK(m) mtx_lock(&(m)->md.pv_lock) +#define PV_UNLOCK(m) mtx_unlock(&(m)->md.pv_lock) +#define PV_ASSERT_LOCKED(m) mtx_assert(&(m)->md.pv_lock, MA_OWNED) + /* * Move the kernel virtual free pointer to the next * 2MB. This is used to help improve performance @@ -606,6 +610,7 @@ pmap_page_init(vm_page_t m) TAILQ_INIT(&m->md.pv_list); m->md.pv_list_count = 0; + mtx_init(&m->md.pv_lock, NULL, "pv lock", MTX_DEF); } /* @@ -1590,8 +1595,8 @@ pmap_growkernel(vm_offset_t addr) ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); -CTASSERT(_NPCM == 3); -CTASSERT(_NPCPV == 168); +CTASSERT(_NPCM == 2); +CTASSERT(_NPCPV == 126); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) @@ -1606,7 +1611,7 @@ pv_to_chunk(pv_entry_t pv) #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful -static uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; +static uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1 }; SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); @@ -1660,10 +1665,12 @@ pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) TAILQ_FOREACH(m, &vpq->pl, pageq) { if (m->hold_count || m->busy) continue; + PV_LOCK(m); TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); /* Avoid deadlock and lock recursion. */ + /* XXX LOR with pv lock */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) @@ -1691,6 +1698,7 @@ pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } + PV_UNLOCK(m); } } @@ -1826,6 +1834,7 @@ pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PV_LOCK(m); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) break; @@ -1835,6 +1844,7 @@ pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) m->md.pv_list_count--; if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); + PV_UNLOCK(m); free_pv_entry(pmap, pv); } @@ -1843,23 +1853,26 @@ pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) * (pmap, va). */ static void -pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) +pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, pt_entry_t *pte) { pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PV_LOCK(m); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; + pv->pv_pte = pte; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); m->md.pv_list_count++; + PV_UNLOCK(m); } /* * Conditionally create a pv entry. */ static boolean_t -pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) +pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, pt_entry_t *pte) { pv_entry_t pv; @@ -1867,9 +1880,12 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { + PV_LOCK(m); pv->pv_va = va; + pv->pv_pte = pte; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); m->md.pv_list_count++; + PV_UNLOCK(m); return (TRUE); } else return (FALSE); @@ -2059,7 +2075,7 @@ pmap_remove_all(vm_page_t m) pv_entry_t pv; pmap_t pmap; pt_entry_t *pte, tpte; - pd_entry_t ptepde; + pt_entry_t ptepde; #if defined(PMAP_DIAGNOSTIC) /* @@ -2071,14 +2087,14 @@ pmap_remove_all(vm_page_t m) } #endif mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PV_LOCK(m); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); - PMAP_LOCK(pmap); - pmap->pm_stats.resident_count--; - pte = pmap_pte_pde(pmap, pv->pv_va, &ptepde); + atomic_subtract_long(&pmap->pm_stats.resident_count, 1); + pte = pv->pv_pte; tpte = pte_load_clear(pte); if (tpte & PG_W) - pmap->pm_stats.wired_count--; + atomic_subtract_long(&pmap->pm_stats.wired_count, 1); if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); @@ -2094,11 +2110,27 @@ pmap_remove_all(vm_page_t m) pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; + /* + * We can safely unlock the pv lock to acquire the pmap lock + * here, since we don't need to guarantee that all the + * removals are done atomically with respect to each other. + * XXX Or do we? + * + * XXX We might be able to avoid locking pmap by: + * - Only locking it when the page table page is actually + * free (wire_count == 0). + * - Having a pv chunk lock, separate from the pmap lock. + */ + PV_UNLOCK(m); + PMAP_LOCK(pmap); + pmap_pte_pde(pmap, pv->pv_va, &ptepde); pmap_unuse_pt(pmap, pv->pv_va, ptepde); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); + PV_LOCK(m); } vm_page_flag_clear(m, PG_WRITEABLE); + PV_UNLOCK(m); } /* @@ -2344,7 +2376,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); - pmap_insert_entry(pmap, va, m); + pmap_insert_entry(pmap, va, m, pte); pa |= PG_MANAGED; } @@ -2523,17 +2555,20 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, return (mpte); } + PV_LOCK(m); /* * Enter on the PV list if part of our managed memory. */ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && - !pmap_try_insert_pv_entry(pmap, va, m)) { + !pmap_try_insert_pv_entry(pmap, va, m, pte)) { + PV_UNLOCK(m); if (mpte != NULL) { pmap_unwire_pte_hold(pmap, va, mpte); mpte = NULL; } return (mpte); } + PV_UNLOCK(m); /* * Increment counters @@ -2788,9 +2823,11 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, dst_pte = (pt_entry_t *) PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); dst_pte = &dst_pte[pmap_pte_index(addr)]; + PV_LOCK(PHYS_TO_VM_PAGE(ptetemp & PG_FRAME)); if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, - PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { + PHYS_TO_VM_PAGE(ptetemp & PG_FRAME), dst_pte)) { + PV_UNLOCK(PHYS_TO_VM_PAGE(ptetemp & PG_FRAME)); /* * Clear the wired, modified, and * accessed (referenced) bits @@ -2802,6 +2839,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, } else pmap_unwire_pte_hold(dst_pmap, addr, dstmpte); + PV_UNLOCK(PHYS_TO_VM_PAGE(ptetemp & PG_FRAME)); if (dstmpte->wire_count >= srcmpte->wire_count) break; } @@ -2889,14 +2927,17 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) return FALSE; mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PV_LOCK(m); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { + PV_UNLOCK(m); return TRUE; } loops++; if (loops >= 16) break; } + PV_UNLOCK(m); return (FALSE); } @@ -2981,9 +3022,11 @@ pmap_remove_pages(pmap_t pmap) pv_entry_count--; pc->pc_map[field] |= bitmask; m->md.pv_list_count--; + PV_LOCK(m); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); + PV_UNLOCK(m); pmap_unuse_pt(pmap, pv->pv_va, *vtopde(pv->pv_va)); } @@ -3022,15 +3065,15 @@ pmap_is_modified(vm_page_t m) return (rv); mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PV_LOCK(m); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); - PMAP_LOCK(pmap); - pte = pmap_pte(pmap, pv->pv_va); + pte = pv->pv_pte; rv = (*pte & PG_M) != 0; - PMAP_UNLOCK(pmap); if (rv) break; } + PV_UNLOCK(m); return (rv); } @@ -3072,10 +3115,10 @@ pmap_remove_write(vm_page_t m) (m->flags & PG_WRITEABLE) == 0) return; mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PV_LOCK(m); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); - PMAP_LOCK(pmap); - pte = pmap_pte(pmap, pv->pv_va); + pte = pv->pv_pte; retry: oldpte = *pte; if (oldpte & PG_RW) { @@ -3086,9 +3129,9 @@ retry: vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } - PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); + PV_UNLOCK(m); } /* @@ -3108,12 +3151,13 @@ pmap_ts_referenced(vm_page_t m) { pv_entry_t pv, pvf, pvn; pmap_t pmap; - pt_entry_t *pte; + pt_entry_t oldpte, *pte; int rtval = 0; if (m->flags & PG_FICTITIOUS) return (rtval); mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PV_LOCK(m); if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { @@ -3121,18 +3165,21 @@ pmap_ts_referenced(vm_page_t m) TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pmap = PV_PMAP(pv); - PMAP_LOCK(pmap); - pte = pmap_pte(pmap, pv->pv_va); - if ((*pte & PG_A) != 0) { - atomic_clear_long(pte, PG_A); + pte = pv->pv_pte; +retry: + oldpte = *pte; + if ((oldpte & PG_A) != 0) { + if (!atomic_cmpset_long(pte, oldpte, oldpte & + ~PG_A)) + goto retry; pmap_invalidate_page(pmap, pv->pv_va); rtval++; if (rtval > 4) pvn = NULL; } - PMAP_UNLOCK(pmap); } while ((pv = pvn) != NULL && pv != pvf); } + PV_UNLOCK(m); return (rtval); } @@ -3144,21 +3191,24 @@ pmap_clear_modify(vm_page_t m) { pv_entry_t pv; pmap_t pmap; - pt_entry_t *pte; + pt_entry_t oldpte, *pte; if ((m->flags & PG_FICTITIOUS) != 0) return; mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PV_LOCK(m); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); - PMAP_LOCK(pmap); - pte = pmap_pte(pmap, pv->pv_va); - if (*pte & PG_M) { - atomic_clear_long(pte, PG_M); + pte = pv->pv_pte; +retry: + oldpte = *pte; + if (oldpte & PG_M) { + if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_M)) + goto retry; pmap_invalidate_page(pmap, pv->pv_va); } - PMAP_UNLOCK(pmap); } + PV_UNLOCK(m); } /* @@ -3171,21 +3221,24 @@ pmap_clear_reference(vm_page_t m) { pv_entry_t pv; pmap_t pmap; - pt_entry_t *pte; + pt_entry_t oldpte, *pte; if ((m->flags & PG_FICTITIOUS) != 0) return; mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PV_LOCK(m); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); - PMAP_LOCK(pmap); - pte = pmap_pte(pmap, pv->pv_va); - if (*pte & PG_A) { - atomic_clear_long(pte, PG_A); + pte = pv->pv_pte; +retry: + oldpte = *pte; + if (oldpte & PG_A) { + if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_A)) + goto retry; pmap_invalidate_page(pmap, pv->pv_va); } - PMAP_UNLOCK(pmap); } + PV_UNLOCK(m); } /* diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index e3b652b..43dfbef 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -233,6 +233,8 @@ struct pv_chunk; struct md_page { int pv_list_count; TAILQ_HEAD(,pv_entry) pv_list; + pt_entry_t pte; + struct mtx pv_lock; }; struct pmap { @@ -268,6 +270,7 @@ extern struct pmap kernel_pmap_store; */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ + pt_entry_t *pv_pte; TAILQ_ENTRY(pv_entry) pv_list; } *pv_entry_t; @@ -275,13 +278,13 @@ typedef struct pv_entry { * pv_entries are allocated in chunks per-process. This avoids the * need to track per-pmap assignments. */ -#define _NPCM 3 -#define _NPCPV 168 +#define _NPCM 2 +#define _NPCPV 126 struct pv_chunk { pmap_t pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */ - uint64_t pc_spare[2]; + uint64_t pc_spare[3]; struct pv_entry pc_pventry[_NPCPV]; };