diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f2ebee2ca550..053870ef5c24 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -5806,7 +5806,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, ("pmap_enter: no PV entry for %#lx", va)); if ((newpte & PG_MANAGED) == 0) free_pv_entry(pmap, pv); - if ((om->aflags & PGA_WRITEABLE) != 0 && + if ((vm_page_aflags(om) & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) @@ -6989,7 +6989,7 @@ pmap_remove_pages(pmap_t pmap) pvh->pv_gen++; if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) - if ((mt->aflags & PGA_WRITEABLE) != 0 && + if ((vm_page_aflags(mt) & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&mt->md.pv_list)) vm_page_aflag_clear(mt, PGA_WRITEABLE); } @@ -7007,7 +7007,7 @@ pmap_remove_pages(pmap_t pmap) pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; - if ((m->aflags & PGA_WRITEABLE) != 0 && + if ((vm_page_aflags(m) & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -7138,7 +7138,7 @@ pmap_is_modified(vm_page_t m) * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } @@ -7207,7 +7207,7 @@ pmap_remove_write(vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : @@ -7690,7 +7690,7 @@ pmap_clear_modify(vm_page_t m) * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index b0a15a1bab86..ac8ed1c88063 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -423,7 +423,8 @@ extern int pmap_pcid_enabled; extern int invpcid_works; #define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode) -#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) +#define pmap_page_is_write_mapped(m) \ + (((m)->astate.flags & PGA_WRITEABLE) != 0) #define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz)) struct thread; diff --git a/sys/arm/arm/pmap-v4.c b/sys/arm/arm/pmap-v4.c index e1f411ccc832..e746d66f9bf2 100644 --- a/sys/arm/arm/pmap-v4.c +++ b/sys/arm/arm/pmap-v4.c @@ -4104,7 +4104,7 @@ pmap_clear_modify(vm_page_t m) * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; if (m->md.pvh_attrs & PVF_MOD) pmap_clearbit(m, PVF_MOD); @@ -4143,7 +4143,7 @@ pmap_remove_write(vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0) + if (vm_page_xbusied(m) || (vm_page_aflags(m) & PGA_WRITEABLE) != 0) pmap_clearbit(m, PVF_WRITE); } diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 1d82ebf48cb2..2ad04723a7c7 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -5197,7 +5197,7 @@ pmap_is_modified(vm_page_t m) * is clear, no PTE2s can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || @@ -5540,7 +5540,7 @@ pmap_remove_write(vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) return; rw_wlock(&pvh_global_lock); sched_pin(); diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 919537e86b84..c5063828d6a1 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -3333,7 +3333,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, pv = pmap_pvh_remove(&om->md, pmap, va); if ((m->oflags & VPO_UNMANAGED) != 0) free_pv_entry(pmap, pv); - if ((om->aflags & PGA_WRITEABLE) != 0 && + if ((vm_page_aflags(om) & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) @@ -4372,7 +4372,7 @@ pmap_remove_pages(pmap_t pmap) pvh->pv_gen++; if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) - if ((mt->aflags & PGA_WRITEABLE) != 0 && + if (vm_page_aflags(mt) & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&mt->md.pv_list)) vm_page_aflag_clear(mt, PGA_WRITEABLE); } @@ -4394,7 +4394,7 @@ pmap_remove_pages(pmap_t pmap) TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; - if ((m->aflags & PGA_WRITEABLE) != 0 && + if (vm_page_aflags(m) & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh( @@ -4534,7 +4534,7 @@ pmap_is_modified(vm_page_t m) * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } @@ -4600,7 +4600,7 @@ pmap_remove_write(vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : @@ -4977,7 +4977,7 @@ pmap_clear_modify(vm_page_t m) * set. If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c index 59147515097f..9afc7db022b2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1718,12 +1718,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, bcopy((char *)db->db_data + bufoff, va, PAGESIZE); zfs_unmap_page(sf); m->valid = VM_PAGE_BITS_ALL; - vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); - vm_page_unlock(m); } *rbehind = i; @@ -1838,12 +1836,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, } zfs_unmap_page(sf); m->valid = VM_PAGE_BITS_ALL; - vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); - vm_page_unlock(m); } *rahead = i; zfs_vmobject_wunlock(vmobj); diff --git a/sys/dev/virtio/balloon/virtio_balloon.c b/sys/dev/virtio/balloon/virtio_balloon.c index 060d6d68afc7..32b9b41b8d94 100644 --- a/sys/dev/virtio/balloon/virtio_balloon.c +++ b/sys/dev/virtio/balloon/virtio_balloon.c @@ -332,8 +332,6 @@ vtballoon_inflate(struct vtballoon_softc *sc, int npages) sc->vtballoon_page_frames[i] = VM_PAGE_TO_PHYS(m) >> VIRTIO_BALLOON_PFN_SHIFT; - KASSERT(m->queue == PQ_NONE, - ("%s: allocated page %p on queue", __func__, m)); TAILQ_INSERT_TAIL(&sc->vtballoon_pages, m, plinks.q); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index f07f500e8977..3e2748ad1c88 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -3752,7 +3752,7 @@ __CONCAT(PMTYPE, enter)(pmap_t pmap, vm_offset_t va, vm_page_t m, ("pmap_enter: no PV entry for %#x", va)); if ((newpte & PG_MANAGED) == 0) free_pv_entry(pmap, pv); - if ((om->aflags & PGA_WRITEABLE) != 0 && + if ((vm_page_aflags(om) & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) @@ -4848,7 +4848,7 @@ __CONCAT(PMTYPE, is_modified)(vm_page_t m) * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || @@ -4979,7 +4979,7 @@ __CONCAT(PMTYPE, remove_write)(vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); @@ -5291,7 +5291,7 @@ __CONCAT(PMTYPE, clear_modify)(vm_page_t m) * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); diff --git a/sys/kern/subr_pidctrl.c b/sys/kern/subr_pidctrl.c index 9d83c2d6b77d..232ec19726b3 100644 --- a/sys/kern/subr_pidctrl.c +++ b/sys/kern/subr_pidctrl.c @@ -141,8 +141,7 @@ pidctrl_daemon(struct pidctrl *pc, int input) /* Compute P (proportional error), I (integral), D (derivative). */ pc->pc_error += error; - pc->pc_integral = - MAX(MIN(pc->pc_integral + error, pc->pc_bound), 0); + pc->pc_integral = MAX(MIN(pc->pc_integral + error, pc->pc_bound), 0); pc->pc_derivative = pc->pc_error - pc->pc_olderror; /* Divide by inverse gain values to produce output. */ diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 072618f793a0..571fe83397bd 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -2164,7 +2164,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, pv = pmap_pvh_remove(&om->md, pmap, va); if (!pte_test(&newpte, PTE_MANAGED)) free_pv_entry(pmap, pv); - if ((om->aflags & PGA_WRITEABLE) != 0 && + if (vm_page_aflags(m) & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list)) vm_page_aflag_clear(om, PGA_WRITEABLE); } @@ -2934,7 +2934,7 @@ pmap_remove_write(vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { @@ -2999,7 +2999,7 @@ pmap_is_modified(vm_page_t m) * is clear, no PTEs can have PTE_D set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_testbit(m, PTE_D); @@ -3143,7 +3143,7 @@ pmap_clear_modify(vm_page_t m) * If the object containing the page is locked and the page is not * write busied, then PGA_WRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { @@ -3270,7 +3270,7 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) * determine if the address is MINCORE_REFERENCED. */ m = PHYS_TO_VM_PAGE(pa); - if ((m->aflags & PGA_REFERENCED) != 0) + if ((vm_page_aflags(m) & PGA_REFERENCED) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 9eacac27707b..5ef269db98cc 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -1319,7 +1319,7 @@ moea_is_modified(mmu_t mmu, vm_page_t m) * is clear, no PTEs can have PTE_CHG set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = moea_query_bit(m, PTE_CHG); @@ -1355,7 +1355,7 @@ moea_clear_modify(mmu_t mmu, vm_page_t m) * set. If the object containing the page is locked and the page is * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); moea_clear_bit(m, PTE_CHG); @@ -1382,7 +1382,7 @@ moea_remove_write(mmu_t mmu, vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); lo = moea_attr_fetch(m); @@ -1915,7 +1915,8 @@ moea_remove_all(mmu_t mmu, vm_page_t m) moea_pvo_remove(pvo, -1); PMAP_UNLOCK(pmap); } - if ((m->aflags & PGA_WRITEABLE) && moea_query_bit(m, PTE_CHG)) { + if ((vm_page_aflags(m) & PGA_WRITEABLE) != 0 && + moea_query_bit(m, PTE_CHG)) { moea_attr_clear(m, PTE_CHG); vm_page_dirty(m); } diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 7ad86d5f1896..6361938e0dc9 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1467,7 +1467,7 @@ moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, * Flush the page from the instruction cache if this page is * mapped executable and cacheable. */ - if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) && + if (pmap != kernel_pmap && (vm_page_aflags(m) & PGA_EXECUTABLE) != 0 && (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { vm_page_aflag_set(m, PGA_EXECUTABLE); moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); @@ -1688,7 +1688,7 @@ moea64_is_modified(mmu_t mmu, vm_page_t m) * is clear, no PTEs can have LPTE_CHG set. */ VM_OBJECT_ASSERT_LOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (FALSE); return (moea64_query_bit(mmu, m, LPTE_CHG)); } @@ -1722,7 +1722,7 @@ moea64_clear_modify(mmu_t mmu, vm_page_t m) * set. If the object containing the page is locked and the page is * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; moea64_clear_bit(mmu, m, LPTE_CHG); } @@ -1746,7 +1746,7 @@ moea64_remove_write(mmu_t mmu, vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; powerpc_sync(); PV_PAGE_LOCK(m); @@ -2240,7 +2240,8 @@ moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot) if (refchg < 0) refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0; - if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) && + if (pm != kernel_pmap && pg != NULL && + (vm_page_aflags(pg) & PGA_EXECUTABLE) == 0 && (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { if ((pg->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(pg, PGA_EXECUTABLE); @@ -2454,7 +2455,8 @@ moea64_remove_all(mmu_t mmu, vm_page_t m) } KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings")); - KASSERT(!(m->aflags & PGA_WRITEABLE), ("Page still writable")); + KASSERT((vm_page_aflags(m) & PGA_WRITEABLE) == 0, + ("Page still writable")); PV_PAGE_UNLOCK(m); /* Clean up UMA allocations */ diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 140b1367325f..2374d1a9ad91 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -2694,7 +2694,7 @@ mmu_booke_remove_write(mmu_t mmu, vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { @@ -3040,7 +3040,7 @@ mmu_booke_is_modified(mmu_t mmu, vm_page_t m) * is clear, no PTEs can be modified. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { @@ -3119,7 +3119,7 @@ mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) * If the object containing the page is locked and the page is not * exclusive busied, then PG_AWRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index bbda832ff885..2196a6c153af 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -2825,7 +2825,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, ("pmap_enter: no PV entry for %#lx", va)); if ((new_l3 & PTE_SW_MANAGED) == 0) free_pv_entry(pmap, pv); - if ((om->aflags & PGA_WRITEABLE) != 0 && + if ((vm_page_aflags(om) & PGA_WRITEABLE) == 0 && TAILQ_EMPTY(&om->md.pv_list)) vm_page_aflag_clear(om, PGA_WRITEABLE); } @@ -3556,7 +3556,7 @@ pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv, if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[Ln_ENTRIES]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list) && - (mt->aflags & PGA_WRITEABLE) != 0) + (vm_page_aflags(mt) & PGA_WRITEABLE) != 0) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpte = pmap_remove_pt_page(pmap, pv->pv_va); @@ -3574,7 +3574,7 @@ pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv, TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (TAILQ_EMPTY(&m->md.pv_list) && - (m->aflags & PGA_WRITEABLE) != 0) { + (vm_page_aflags(m) & PGA_WRITEABLE) != 0) { pvh = pa_to_pvh(m->phys_addr); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); @@ -3789,7 +3789,7 @@ pmap_is_modified(vm_page_t m) * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } @@ -3855,7 +3855,7 @@ pmap_remove_write(vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : @@ -4115,7 +4115,7 @@ pmap_clear_modify(vm_page_t m) * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index 436c15623a6e..a038845e359f 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -2121,7 +2121,7 @@ pmap_is_modified(vm_page_t m) * is clear, no TTEs can have TD_W set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return (rv); rw_wlock(&tte_list_global_lock); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { @@ -2204,7 +2204,7 @@ pmap_clear_modify(vm_page_t m) * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ - if ((m->aflags & PGA_WRITEABLE) == 0) + if ((vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&tte_list_global_lock); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { @@ -2232,7 +2232,7 @@ pmap_remove_write(vm_page_t m) * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + if (!vm_page_xbusied(m) && (vm_page_aflags(m) & PGA_WRITEABLE) == 0) return; rw_wlock(&tte_list_global_lock); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 4ea49c7aa4a2..249d158ca6d3 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1648,12 +1648,6 @@ swp_pager_force_dirty(vm_page_t m) { vm_page_dirty(m); -#ifdef INVARIANTS - vm_page_lock(m); - if (!vm_page_wired(m) && m->queue == PQ_NONE) - panic("page %p is neither wired nor queued", m); - vm_page_unlock(m); -#endif vm_page_xunbusy(m); swap_pager_unswapped(m); } diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 35742af74bde..9f465b06624c 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -152,9 +152,7 @@ release_page(struct faultstate *fs) { vm_page_xunbusy(fs->m); - vm_page_lock(fs->m); vm_page_deactivate(fs->m); - vm_page_unlock(fs->m); fs->m = NULL; } @@ -375,9 +373,7 @@ vm_fault_populate_cleanup(vm_object_t object, vm_pindex_t first, for (pidx = first, m = vm_page_lookup(object, pidx); pidx <= last; pidx++, m = vm_page_next(m)) { vm_fault_populate_check_page(m); - vm_page_lock(m); vm_page_deactivate(m); - vm_page_unlock(m); vm_page_xunbusy(m); } } @@ -1324,9 +1320,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, if ((fault_flags & VM_FAULT_WIRE) != 0) { vm_page_wire(fs.m); } else { - vm_page_lock(fs.m); vm_page_activate(fs.m); - vm_page_unlock(fs.m); } if (m_hold != NULL) { *m_hold = fs.m; diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 23bdde163cec..882a77e0de30 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -935,9 +935,9 @@ kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec) * and set PGA_REFERENCED before the call to * pmap_is_referenced(). */ - if ((m->aflags & PGA_REFERENCED) != 0 || + if ((vm_page_aflags(m) & PGA_REFERENCED) != 0 || pmap_is_referenced(m) || - (m->aflags & PGA_REFERENCED) != 0) + (vm_page_aflags(m) & PGA_REFERENCED) != 0) mincoreinfo |= MINCORE_REFERENCED_OTHER; } if (object != NULL) diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index b65a74d136c6..7c6bc87ff332 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -2312,9 +2312,9 @@ sysctl_vm_object_list(SYSCTL_HANDLER_ARGS) * sysctl is only meant to give an * approximation of the system anyway. */ - if (m->queue == PQ_ACTIVE) + if (m->astate.queue == PQ_ACTIVE) kvo->kvo_active++; - else if (m->queue == PQ_INACTIVE) + else if (m->astate.queue == PQ_INACTIVE) kvo->kvo_inactive++; } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 99c3abe1f9e7..7343210f6e41 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -73,11 +73,12 @@ __FBSDID("$FreeBSD$"); #include #include -#include +#include #include #include #include #include +#include #include #include #include @@ -130,6 +131,34 @@ static int vm_min_waiters; static int vm_severe_waiters; static int vm_pageproc_waiters; +static SYSCTL_NODE(_vm_stats, OID_AUTO, page, CTLFLAG_RD, 0, + "VM page stats"); + +static counter_u64_t pqstate_commit_aborts = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, commit_aborts, CTLFLAG_RD, + &pqstate_commit_aborts, + "Failed page queue state updates"); + +static counter_u64_t queue_ops = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_ops, CTLFLAG_RD, + &queue_ops, + "Batched queue operations"); + +static counter_u64_t null_queue_ops = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, null_queue_ops, CTLFLAG_RD, + &null_queue_ops, + "Batched queue operations with no effect"); + +static void +counter_startup(void) +{ + + pqstate_commit_aborts = counter_u64_alloc(M_WAITOK); + queue_ops = counter_u64_alloc(M_WAITOK); + null_queue_ops = counter_u64_alloc(M_WAITOK); +} +SYSINIT(page_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL); + /* * bogus page -- for I/O to/from partially complete buffers, * or for paging into sparsely invalid regions. @@ -158,16 +187,17 @@ static uma_zone_t fakepg_zone; static void vm_page_alloc_check(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); -static void vm_page_dequeue_complete(vm_page_t m); static void vm_page_enqueue(vm_page_t m, uint8_t queue); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); -static void vm_page_mvqueue(vm_page_t m, uint8_t queue); +static void vm_page_mvqueue(vm_page_t m, const uint8_t queue, + const uint16_t nflag); static int vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, vm_paddr_t high); +static bool vm_page_release_toq(vm_page_t m, uint8_t queue, bool noreuse); static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req); static int vm_page_zone_import(void *arg, void **store, int cnt, int domain, @@ -440,10 +470,10 @@ vm_page_init_marker(vm_page_t marker, int queue, uint8_t aflags) { bzero(marker, sizeof(*marker)); - marker->flags = PG_MARKER; - marker->aflags = aflags; marker->busy_lock = VPB_SINGLE_EXCLUSIVER; - marker->queue = queue; + marker->astate.flags = aflags; + marker->astate.queue = queue; + marker->flags = PG_MARKER; } static void @@ -513,9 +543,10 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind) m->object = NULL; m->ref_count = 0; m->busy_lock = VPB_UNBUSIED; - m->flags = m->aflags = 0; + m->flags = 0; m->phys_addr = pa; - m->queue = PQ_NONE; + m->astate.flags = 0; + m->astate.queue = PQ_NONE; m->psind = 0; m->segind = segind; m->order = VM_NFREEORDER; @@ -1152,7 +1183,7 @@ vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) goto memattr; } m->phys_addr = paddr; - m->queue = PQ_NONE; + m->astate.queue = PQ_NONE; /* Fictitious pages don't use "segind". */ m->flags = PG_FICTITIOUS; /* Fictitious pages don't use "order" or "pool". */ @@ -1239,12 +1270,10 @@ vm_page_readahead_finish(vm_page_t m) * have shown that deactivating the page is usually the best choice, * unless the page is wanted by another thread. */ - vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); - vm_page_unlock(m); vm_page_xunbusy(m); } @@ -1607,7 +1636,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) mnew->pindex = pindex; atomic_set_int(&mnew->ref_count, VPRC_OBJREF); mold = vm_radix_replace(&object->rtree, mnew); - KASSERT(mold->queue == PQ_NONE, + KASSERT(mold->astate.queue == PQ_NONE, ("vm_page_replace: old page %p is on a paging queue", mold)); /* Keep the resident page list in sorted order. */ @@ -1883,7 +1912,7 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; m->flags = flags; - m->aflags = 0; + m->astate.flags = 0; m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; m->busy_lock = VPB_UNBUSIED; @@ -1899,7 +1928,7 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, vm_wire_add(1); m->ref_count = 1; } - m->act_count = 0; + m->astate.act_count = 0; if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { @@ -2093,12 +2122,12 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain, memattr = object->memattr; } for (m = m_ret; m < &m_ret[npages]; m++) { - m->aflags = 0; + m->astate.flags = 0; m->flags = (m->flags | PG_NODUMP) & flags; m->busy_lock = busy_lock; if ((req & VM_ALLOC_WIRED) != 0) m->ref_count = 1; - m->act_count = 0; + m->astate.act_count = 0; m->oflags = oflags; if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { @@ -2141,9 +2170,10 @@ vm_page_alloc_check(vm_page_t m) { KASSERT(m->object == NULL, ("page %p has object", m)); - KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0, + KASSERT(m->astate.queue == PQ_NONE && + (m->astate.flags & PGA_QUEUE_STATE_MASK) == 0, ("page %p has unexpected queue %d, flags %#x", - m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK))); + m, m->astate.queue, (m->astate.flags & PGA_QUEUE_STATE_MASK))); KASSERT(m->ref_count == 0, ("page %p has references", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); @@ -2217,7 +2247,7 @@ vm_page_alloc_freelist_domain(int domain, int freelist, int req) /* * Initialize the page. Only the PG_ZERO flag is inherited. */ - m->aflags = 0; + m->astate.flags = 0; flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; @@ -2396,8 +2426,7 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, vm_reserv_size(level)) - pa); #endif } else if (object->memattr == VM_MEMATTR_DEFAULT && - vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) && - !vm_page_wired(m)) { + !vm_page_busied(m) && !vm_page_wired(m)) { /* * The page is allocated but eligible for * relocation. Extend the current run by one @@ -2545,8 +2574,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, error = EINVAL; else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; - else if (vm_page_queue(m) != PQ_NONE && - !vm_page_busied(m) && !vm_page_wired(m)) { + else if (!vm_page_busied(m) && !vm_page_wired(m)) { KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); @@ -2607,7 +2635,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, error = EBUSY; goto unlock; } - m_new->aflags = m->aflags & + m_new->astate.flags = m->astate.flags & ~PGA_QUEUE_STATE_MASK; KASSERT(m_new->oflags == VPO_UNMANAGED, ("page %p is managed", m_new)); @@ -3075,65 +3103,141 @@ vm_waitpfault(struct domainset *dset, int timo) mtx_unlock(&vm_domainset_lock); } -static struct vm_pagequeue * -vm_page_pagequeue(vm_page_t m) +bool +vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new) { + vm_page_t next; + struct vm_pagequeue *pq; + int mask; - uint8_t queue; + if (old->queue != PQ_NONE && old->queue != new.queue) { + new.flags &= ~PGA_ENQUEUED; - if ((queue = atomic_load_8(&m->queue)) == PQ_NONE) - return (NULL); - return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue]); + pq = _vm_page_pagequeue(m, old->queue); + + /* + * The physical queue state might change at any point before the + * page queue lock is acquired, so we must verify that the lock + * is correct before proceeding. Once the page's queue index is + * changed, the page queue lock we hold will no longer + * synchronize the physical queue state of the page, so we must + * awkwardly remove the page from the queue and put it back if + * the commit fails. + */ + vm_pagequeue_lock(pq); + if (__predict_false(m->astate.queue != old->queue)) { + vm_pagequeue_unlock(pq); + *old = vm_page_astate_load(m); + return (false); + } + if (__predict_true((m->astate.flags & PGA_ENQUEUED) != 0)) { + next = TAILQ_NEXT(m, plinks.q); + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + } + if (__predict_false(!vm_page_astate_fcmpset(m, old, new))) { + if ((old->flags & PGA_ENQUEUED) != 0) { + if (next == NULL) + TAILQ_INSERT_TAIL(&pq->pq_pl, m, + plinks.q); + else + TAILQ_INSERT_BEFORE(next, m, plinks.q); + } + vm_pagequeue_unlock(pq); + counter_u64_add(pqstate_commit_aborts, 1); + return (false); + } + if ((old->flags & PGA_ENQUEUED) != 0) + vm_pagequeue_cnt_dec(pq); + vm_pagequeue_unlock(pq); + } else if (__predict_false(!vm_page_astate_fcmpset(m, old, new))) { + counter_u64_add(pqstate_commit_aborts, 1); + return (false); + } + + if (new.queue != PQ_NONE) { + mask = new.flags & PGA_QUEUE_OP_MASK; + if (mask != 0 && (old->flags & mask) != mask) + vm_page_pqbatch_submit(m, new.queue); + } + + return (true); } static inline void -vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m) +vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m, uint8_t queue) { + vm_page_t next; struct vm_domain *vmd; - uint8_t qflags; + vm_page_astate_t old, new; CRITICAL_ASSERT(curthread); vm_pagequeue_assert_locked(pq); + old = vm_page_astate_load(m); +retry: + if (__predict_false(old.queue != queue)) + return; + KASSERT(pq == _vm_page_pagequeue(m, queue), + ("page %p does not belong to queue %p", m, pq)); + KASSERT(old.queue != PQ_NONE || (old.flags & PGA_QUEUE_STATE_MASK) == 0, + ("page %p has unexpected queue state", m)); + /* - * The page daemon is allowed to set m->queue = PQ_NONE without - * the page queue lock held. In this case it is about to free the page, - * which must not have any queue state. + * Update the page's queue state before modifying the page queues + * themselves, to avoid having to roll back updates when a queue state + * update fails and requires a retry. */ - qflags = atomic_load_8(&m->aflags); - KASSERT(pq == vm_page_pagequeue(m) || - (qflags & PGA_QUEUE_STATE_MASK) == 0, - ("page %p doesn't belong to queue %p but has aflags %#x", - m, pq, qflags)); - - if ((qflags & PGA_DEQUEUE) != 0) { - if (__predict_true((qflags & PGA_ENQUEUED) != 0)) - vm_pagequeue_remove(pq, m); - vm_page_dequeue_complete(m); - } else if ((qflags & (PGA_REQUEUE | PGA_REQUEUE_HEAD)) != 0) { - if ((qflags & PGA_ENQUEUED) != 0) + new = old; + if ((old.flags & PGA_DEQUEUE) != 0) { + new.queue = PQ_NONE; + new.flags &= ~PGA_QUEUE_STATE_MASK; + if (__predict_true((old.flags & PGA_ENQUEUED) != 0)) { + next = TAILQ_NEXT(m, plinks.q); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - else { - vm_pagequeue_cnt_inc(pq); - vm_page_aflag_set(m, PGA_ENQUEUED); } + if (__predict_false(!vm_page_astate_fcmpset(m, &old, new))) { + if ((old.flags & PGA_ENQUEUED) != 0) { + if (next == NULL) + TAILQ_INSERT_TAIL(&pq->pq_pl, m, + plinks.q); + else + TAILQ_INSERT_BEFORE(next, m, plinks.q); + } + counter_u64_add(pqstate_commit_aborts, 1); + goto retry; + } + if ((old.flags & PGA_ENQUEUED) != 0) + vm_pagequeue_cnt_dec(pq); + counter_u64_add(queue_ops, 1); + } else if ((old.flags & (PGA_REQUEUE | PGA_REQUEUE_HEAD)) != 0) { + new.flags |= PGA_ENQUEUED; + new.flags &= ~(PGA_REQUEUE | PGA_REQUEUE_HEAD); + if (__predict_false(!vm_page_astate_fcmpset(m, &old, new))) { + counter_u64_add(pqstate_commit_aborts, 1); + goto retry; + } + + if ((old.flags & PGA_ENQUEUED) != 0) + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + else + vm_pagequeue_cnt_inc(pq); /* - * Give PGA_REQUEUE_HEAD precedence over PGA_REQUEUE. - * In particular, if both flags are set in close succession, - * only PGA_REQUEUE_HEAD will be applied, even if it was set - * first. + * Give PGA_REQUEUE_HEAD precedence over PGA_REQUEUE. In + * particular, if both flags are set in close succession, only + * PGA_REQUEUE_HEAD will be applied, even if it was set first. */ - if ((qflags & PGA_REQUEUE_HEAD) != 0) { - KASSERT(m->queue == PQ_INACTIVE, + if ((old.flags & PGA_REQUEUE_HEAD) != 0) { + KASSERT(old.queue == PQ_INACTIVE, ("head enqueue not supported for page %p", m)); vmd = vm_pagequeue_domain(m); TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q); - } else + } else { TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); - - vm_page_aflag_clear(m, qflags & (PGA_REQUEUE | - PGA_REQUEUE_HEAD)); + } + counter_u64_add(queue_ops, 1); + } else { + counter_u64_add(null_queue_ops, 1); } } @@ -3141,15 +3245,10 @@ static void vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq, uint8_t queue) { - vm_page_t m; int i; - for (i = 0; i < bq->bq_cnt; i++) { - m = bq->bq_pa[i]; - if (__predict_false(m->queue != queue)) - continue; - vm_pqbatch_process_page(pq, m); - } + for (i = 0; i < bq->bq_cnt; i++) + vm_pqbatch_process_page(pq, bq->bq_pa[i], queue); vm_batchqueue_init(bq); } @@ -3157,8 +3256,6 @@ vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq, * vm_page_pqbatch_submit: [ internal use only ] * * Enqueue a page in the specified page queue's batched work queue. - * The caller must have encoded the requested operation in the page - * structure's aflags field. */ void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue) @@ -3169,8 +3266,6 @@ vm_page_pqbatch_submit(vm_page_t m, uint8_t queue) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); - KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL, - ("missing synchronization for page %p", m)); KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue)); domain = vm_phys_domain(m); @@ -3189,21 +3284,7 @@ vm_page_pqbatch_submit(vm_page_t m, uint8_t queue) bq = DPCPU_PTR(pqbatch[domain][queue]); } vm_pqbatch_process(pq, bq, queue); - - /* - * The page may have been logically dequeued before we acquired the - * page queue lock. In this case, since we either hold the page lock - * or the page is being freed, a different thread cannot be concurrently - * enqueuing the page. - */ - if (__predict_true(m->queue == queue)) - vm_pqbatch_process_page(pq, m); - else { - KASSERT(m->queue == PQ_NONE, - ("invalid queue transition for page %p", m)); - KASSERT((m->aflags & PGA_ENQUEUED) == 0, - ("page %p is enqueued with invalid queue index", m)); - } + vm_pqbatch_process_page(pq, m, queue); vm_pagequeue_unlock(pq); critical_exit(); } @@ -3247,131 +3328,54 @@ vm_page_pqbatch_drain(void) thread_unlock(td); } -/* - * Complete the logical removal of a page from a page queue. We must be - * careful to synchronize with the page daemon, which may be concurrently - * examining the page with only the page lock held. The page must not be - * in a state where it appears to be logically enqueued. - */ -static void -vm_page_dequeue_complete(vm_page_t m) -{ - - m->queue = PQ_NONE; - atomic_thread_fence_rel(); - vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK); -} - -/* - * vm_page_dequeue_deferred: [ internal use only ] - * - * Request removal of the given page from its current page - * queue. Physical removal from the queue may be deferred - * indefinitely. - * - * The page must be locked. - */ -void -vm_page_dequeue_deferred(vm_page_t m) -{ - uint8_t queue; - - vm_page_assert_locked(m); - - if ((queue = vm_page_queue(m)) == PQ_NONE) - return; - - /* - * Set PGA_DEQUEUE if it is not already set to handle a concurrent call - * to vm_page_dequeue_deferred_free(). In particular, avoid modifying - * the page's queue state once vm_page_dequeue_deferred_free() has been - * called. In the event of a race, two batch queue entries for the page - * will be created, but the second will have no effect. - */ - if (vm_page_pqstate_cmpset(m, queue, queue, PGA_DEQUEUE, PGA_DEQUEUE)) - vm_page_pqbatch_submit(m, queue); -} - -/* - * A variant of vm_page_dequeue_deferred() that does not assert the page - * lock and is only to be called from vm_page_free_prep(). Because the - * page is being freed, we can assume that nothing other than the page - * daemon is scheduling queue operations on this page, so we get for - * free the mutual exclusion that is otherwise provided by the page lock. - * To handle races, the page daemon must take care to atomically check - * for PGA_DEQUEUE when updating queue state. - */ +/* XXX comment */ static void -vm_page_dequeue_deferred_free(vm_page_t m) +vm_page_dequeue_free(vm_page_t m) { - uint8_t queue; + vm_page_astate_t old, new; - KASSERT(m->ref_count == 0, ("page %p has references", m)); - - if ((m->aflags & PGA_DEQUEUE) != 0) - return; - atomic_thread_fence_acq(); - if ((queue = m->queue) == PQ_NONE) - return; - vm_page_aflag_set(m, PGA_DEQUEUE); - vm_page_pqbatch_submit(m, queue); + for (old = vm_page_astate_load(m);;) { + if (old.queue == PQ_NONE) { + KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0, + ("page %p has unexpected queue state flags %#x", + m, old.flags)); + break; + } + if ((old.flags & PGA_DEQUEUE) != 0) { + vm_page_pqbatch_submit(m, old.queue); + break; + } + new = old; + new.flags |= PGA_DEQUEUE; + if (vm_page_pqstate_commit(m, &old, new)) + break; + } } /* * vm_page_dequeue: * * Remove the page from whichever page queue it's in, if any. - * The page must either be locked or unallocated. This constraint - * ensures that the queue state of the page will remain consistent - * after this function returns. + * XXX */ void vm_page_dequeue(vm_page_t m) { - struct vm_pagequeue *pq, *pq1; - uint8_t aflags; + vm_page_astate_t old, new; - KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL, - ("page %p is allocated and unlocked", m)); - - for (pq = vm_page_pagequeue(m);; pq = pq1) { - if (pq == NULL) { - /* - * A thread may be concurrently executing - * vm_page_dequeue_complete(). Ensure that all queue - * state is cleared before we return. - */ - aflags = atomic_load_8(&m->aflags); - if ((aflags & PGA_QUEUE_STATE_MASK) == 0) - return; - KASSERT((aflags & PGA_DEQUEUE) != 0, + for (old = vm_page_astate_load(m);;) { + if (old.queue == PQ_NONE) { + KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0, ("page %p has unexpected queue state flags %#x", - m, aflags)); - - /* - * Busy wait until the thread updating queue state is - * finished. Such a thread must be executing in a - * critical section. - */ - cpu_spinwait(); - pq1 = vm_page_pagequeue(m); - continue; + m, old.flags)); + break; } - vm_pagequeue_lock(pq); - if ((pq1 = vm_page_pagequeue(m)) == pq) + new = old; + new.queue = PQ_NONE; + new.flags &= ~PGA_QUEUE_STATE_MASK; + if (vm_page_pqstate_commit(m, &old, new)) break; - vm_pagequeue_unlock(pq); } - KASSERT(pq == vm_page_pagequeue(m), - ("%s: page %p migrated directly between queues", __func__, m)); - KASSERT((m->aflags & PGA_DEQUEUE) != 0 || - mtx_owned(vm_page_lockptr(m)), - ("%s: queued unlocked page %p", __func__, m)); - - if ((m->aflags & PGA_ENQUEUED) != 0) - vm_pagequeue_remove(pq, m); - vm_page_dequeue_complete(m); - vm_pagequeue_unlock(pq); } /* @@ -3383,71 +3387,16 @@ vm_page_enqueue(vm_page_t m, uint8_t queue) { vm_page_assert_locked(m); - KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0, + KASSERT(m->astate.queue == PQ_NONE && + (m->astate.flags & PGA_QUEUE_STATE_MASK) == 0, ("%s: page %p is already enqueued", __func__, m)); - m->queue = queue; - if ((m->aflags & PGA_REQUEUE) == 0) + m->astate.queue = queue; + if ((m->astate.flags & PGA_REQUEUE) == 0) vm_page_aflag_set(m, PGA_REQUEUE); vm_page_pqbatch_submit(m, queue); } -/* - * vm_page_requeue: [ internal use only ] - * - * Schedule a requeue of the given page. - * - * The page must be locked. - */ -void -vm_page_requeue(vm_page_t m) -{ - - vm_page_assert_locked(m); - KASSERT(vm_page_queue(m) != PQ_NONE, - ("%s: page %p is not logically enqueued", __func__, m)); - - if ((m->aflags & PGA_REQUEUE) == 0) - vm_page_aflag_set(m, PGA_REQUEUE); - vm_page_pqbatch_submit(m, atomic_load_8(&m->queue)); -} - -/* - * vm_page_swapqueue: [ internal use only ] - * - * Move the page from one queue to another, or to the tail of its - * current queue, in the face of a possible concurrent call to - * vm_page_dequeue_deferred_free(). - */ -void -vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq) -{ - struct vm_pagequeue *pq; - - KASSERT(oldq < PQ_COUNT && newq < PQ_COUNT && oldq != newq, - ("vm_page_swapqueue: invalid queues (%d, %d)", oldq, newq)); - KASSERT((m->oflags & VPO_UNMANAGED) == 0, - ("vm_page_swapqueue: page %p is unmanaged", m)); - vm_page_assert_locked(m); - - /* - * Atomically update the queue field and set PGA_REQUEUE while - * ensuring that PGA_DEQUEUE has not been set. - */ - pq = &vm_pagequeue_domain(m)->vmd_pagequeues[oldq]; - vm_pagequeue_lock(pq); - if (!vm_page_pqstate_cmpset(m, oldq, newq, PGA_DEQUEUE, PGA_REQUEUE)) { - vm_pagequeue_unlock(pq); - return; - } - if ((m->aflags & PGA_ENQUEUED) != 0) { - vm_pagequeue_remove(pq, m); - vm_page_aflag_clear(m, PGA_ENQUEUED); - } - vm_pagequeue_unlock(pq); - vm_page_pqbatch_submit(m, newq); -} - /* * vm_page_free_prep: * @@ -3479,10 +3428,11 @@ vm_page_free_prep(vm_page_t m) } #endif if ((m->oflags & VPO_UNMANAGED) == 0) - KASSERT(!pmap_page_is_mapped(m), + KASSERT(!pmap_page_is_mapped(m) && (vm_page_aflags(m) & + (PGA_EXECUTABLE | PGA_WRITEABLE)) == 0, ("vm_page_free_prep: freeing mapped page %p", m)); else - KASSERT(m->queue == PQ_NONE, + KASSERT(m->astate.queue == PQ_NONE, ("vm_page_free_prep: unmanaged page %p is queued", m)); VM_CNT_INC(v_tfree); @@ -3511,7 +3461,7 @@ vm_page_free_prep(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->ref_count == 1, ("fictitious page %p is referenced", m)); - KASSERT(m->queue == PQ_NONE, + KASSERT(m->astate.queue == PQ_NONE, ("fictitious page %p is queued", m)); return (false); } @@ -3522,7 +3472,7 @@ vm_page_free_prep(vm_page_t m) * dequeue. */ if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_dequeue_deferred_free(m); + vm_page_dequeue_free(m); m->valid = 0; vm_page_undirty(m); @@ -3629,6 +3579,8 @@ vm_page_wire(vm_page_t m) old = atomic_fetchadd_int(&m->ref_count, 1); KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX, ("vm_page_wire: counter overflow for page %p", m)); + if ((m->oflags & VPO_UNMANAGED) == 0) + vm_page_aflag_set(m, PGA_DEQUEUE); if (VPRC_WIRE_COUNT(old) == 0) vm_wire_add(1); } @@ -3650,11 +3602,45 @@ vm_page_wire_mapped(vm_page_t m) return (false); } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1)); + if ((m->oflags & VPO_UNMANAGED) == 0) + vm_page_aflag_set(m, PGA_DEQUEUE); if (VPRC_WIRE_COUNT(old) == 0) vm_wire_add(1); return (true); } +/* XXX comment */ +static void +vm_page_unwire_managed(vm_page_t m, uint8_t queue, bool noreuse) +{ + u_int old; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_unwire_managed: page %p is unmanaged", m)); + + /* + * Update LRU state before releasing the wiring reference. + * Use a release store when updating the reference count to + * synchronize with vm_page_free_prep(). + */ + old = m->ref_count; + do { + KASSERT(VPRC_WIRE_COUNT(old) > 0, + ("vm_page_unwire: wire count underflow for page %p", m)); + if (VPRC_WIRE_COUNT(old) == 1 && + !vm_page_release_toq(m, queue, noreuse)) { + old = atomic_load_int(&m->ref_count); + continue; + } + } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); + + if (VPRC_WIRE_COUNT(old) == 1) { + vm_wire_sub(1); + if (old == 1) + vm_page_free(m); + } +} + /* * Release one wiring of the specified page, potentially allowing it to be * paged out. @@ -3669,8 +3655,6 @@ vm_page_wire_mapped(vm_page_t m) void vm_page_unwire(vm_page_t m, uint8_t queue) { - u_int old; - bool locked; KASSERT(queue < PQ_COUNT, ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); @@ -3678,42 +3662,8 @@ vm_page_unwire(vm_page_t m, uint8_t queue) if ((m->oflags & VPO_UNMANAGED) != 0) { if (vm_page_unwire_noq(m) && m->ref_count == 0) vm_page_free(m); - return; - } - - /* - * Update LRU state before releasing the wiring reference. - * We only need to do this once since we hold the page lock. - * Use a release store when updating the reference count to - * synchronize with vm_page_free_prep(). - */ - old = m->ref_count; - locked = false; - do { - KASSERT(VPRC_WIRE_COUNT(old) > 0, - ("vm_page_unwire: wire count underflow for page %p", m)); - if (!locked && VPRC_WIRE_COUNT(old) == 1) { - vm_page_lock(m); - locked = true; - if (queue == PQ_ACTIVE && vm_page_queue(m) == PQ_ACTIVE) - vm_page_reference(m); - else - vm_page_mvqueue(m, queue); - } - } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); - - /* - * Release the lock only after the wiring is released, to ensure that - * the page daemon does not encounter and dequeue the page while it is - * still wired. - */ - if (locked) - vm_page_unlock(m); - - if (VPRC_WIRE_COUNT(old) == 1) { - vm_wire_sub(1); - if (old == 1) - vm_page_free(m); + } else { + vm_page_unwire_managed(m, queue, false); } } @@ -3750,25 +3700,45 @@ vm_page_unwire_noq(vm_page_t m) * before releasing the page lock, otherwise the page daemon may immediately * dequeue the page. * + * In many cases this function's parameters are known at compile-time, so + * it is inlined into its callers so as to allow constant folding to remove + * branches. + * * A managed page must be locked. */ static __always_inline void -vm_page_mvqueue(vm_page_t m, const uint8_t nqueue) +vm_page_mvqueue(vm_page_t m, const uint8_t nqueue, const uint16_t nflag) { + vm_page_astate_t old, new; - vm_page_assert_locked(m); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_page_mvqueue: page %p is unmanaged", m)); - - if (vm_page_queue(m) != nqueue) { - vm_page_dequeue(m); - vm_page_enqueue(m, nqueue); - } else if (nqueue != PQ_ACTIVE) { - vm_page_requeue(m); + KASSERT(m->ref_count > 0, + ("vm_page_mvqueue: page %p is missing refs", m)); + KASSERT(nflag == PGA_REQUEUE || nflag == PGA_REQUEUE_HEAD, + ("vm_page_mvqueue: unexpected queue state flag")); + KASSERT(nflag != PGA_REQUEUE_HEAD || nqueue == PQ_INACTIVE, + ("vm_page_mvqueue: wrong queue %d for PGA_REQUEUE_HEAD", nqueue)); + + for (old = vm_page_astate_load(m);;) { + if ((old.flags & PGA_DEQUEUE) != 0) + break; + new = old; + if (nqueue == PQ_ACTIVE) + new.act_count = max(old.act_count, ACT_INIT); + + if (old.queue == nqueue) { + if (nqueue != PQ_ACTIVE) + new.flags |= nflag; + if (new._bits == old._bits) + break; + } else { + new.flags |= nflag; + new.queue = nqueue; + } + if (vm_page_pqstate_commit(m, &old, new)) + break; } - - if (nqueue == PQ_ACTIVE && m->act_count < ACT_INIT) - m->act_count = ACT_INIT; } /* @@ -3778,9 +3748,9 @@ void vm_page_activate(vm_page_t m) { - if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) + if ((m->oflags & VPO_UNMANAGED) != 0) return; - vm_page_mvqueue(m, PQ_ACTIVE); + vm_page_mvqueue(m, PQ_ACTIVE, PGA_REQUEUE); } /* @@ -3791,30 +3761,9 @@ void vm_page_deactivate(vm_page_t m) { - if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) + if ((m->oflags & VPO_UNMANAGED) != 0) return; - vm_page_mvqueue(m, PQ_INACTIVE); -} - -/* - * Move the specified page close to the head of the inactive queue, - * bypassing LRU. A marker page is used to maintain FIFO ordering. - * As with regular enqueues, we use a per-CPU batch queue to reduce - * contention on the page queue lock. - */ -static void -_vm_page_deactivate_noreuse(vm_page_t m) -{ - - vm_page_assert_locked(m); - - if (!vm_page_inactive(m)) { - vm_page_dequeue(m); - m->queue = PQ_INACTIVE; - } - if ((m->aflags & PGA_REQUEUE_HEAD) == 0) - vm_page_aflag_set(m, PGA_REQUEUE_HEAD); - vm_page_pqbatch_submit(m, PQ_INACTIVE); + vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE); } void @@ -3824,8 +3773,9 @@ vm_page_deactivate_noreuse(vm_page_t m) KASSERT(m->object != NULL, ("vm_page_deactivate_noreuse: page %p has no object", m)); - if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_wired(m)) - _vm_page_deactivate_noreuse(m); + if ((m->oflags & VPO_UNMANAGED) != 0) + return; + vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE_HEAD); } /* @@ -3837,7 +3787,7 @@ vm_page_launder(vm_page_t m) if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) return; - vm_page_mvqueue(m, PQ_LAUNDRY); + vm_page_mvqueue(m, PQ_LAUNDRY, PGA_REQUEUE); } /* @@ -3855,11 +3805,17 @@ vm_page_unswappable(vm_page_t m) vm_page_enqueue(m, PQ_UNSWAPPABLE); } -static void -vm_page_release_toq(vm_page_t m, int flags) +/* XXX comment */ +static bool +vm_page_release_toq(vm_page_t m, uint8_t nqueue, bool noreuse) { + vm_page_astate_t old, new; + uint16_t nflag; - vm_page_assert_locked(m); + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_release_toq: page %p is unmanaged", m)); + KASSERT(m->ref_count > 0, + ("vm_page_release_toq: page %p is missing refs", m)); /* * Use a check of the valid bits to determine whether we should @@ -3871,12 +3827,35 @@ vm_page_release_toq(vm_page_t m, int flags) * If we were asked to not cache the page, place it near the head of the * inactive queue so that is reclaimed sooner. */ - if ((flags & (VPR_TRYFREE | VPR_NOREUSE)) != 0 || m->valid == 0) - _vm_page_deactivate_noreuse(m); - else if (vm_page_active(m)) - vm_page_reference(m); - else - vm_page_mvqueue(m, PQ_INACTIVE); + nflag = (noreuse || m->valid == 0) ? PGA_REQUEUE_HEAD : PGA_REQUEUE; + + /* XXX explain */ + vm_page_aflag_clear(m, PGA_DEQUEUE); + + for (old = vm_page_astate_load(m);;) { + new = old; + if ((new.flags & PGA_DEQUEUE) != 0) + return (false); + if (nflag != PGA_REQUEUE_HEAD && old.queue == PQ_ACTIVE) { + new.flags |= PGA_REFERENCED; + } else { + if (nqueue == PQ_ACTIVE) + new.act_count = max(old.act_count, ACT_INIT); + else + new.flags |= nflag; + new.queue = nqueue; + } + + /* + * If the page queue state is not changing, we have nothing + * to do. + */ + if (new._bits == old._bits) + break; + if (vm_page_pqstate_commit(m, &old, new)) + break; + } + return (true); } /* @@ -3886,8 +3865,6 @@ void vm_page_release(vm_page_t m, int flags) { vm_object_t object; - u_int old; - bool locked; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_page_release: page %p is unmanaged", m)); @@ -3913,36 +3890,7 @@ vm_page_release(vm_page_t m, int flags) } } - /* - * Update LRU state before releasing the wiring reference. - * Use a release store when updating the reference count to - * synchronize with vm_page_free_prep(). - */ - old = m->ref_count; - locked = false; - do { - KASSERT(VPRC_WIRE_COUNT(old) > 0, - ("vm_page_unwire: wire count underflow for page %p", m)); - if (!locked && VPRC_WIRE_COUNT(old) == 1) { - vm_page_lock(m); - locked = true; - vm_page_release_toq(m, flags); - } - } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); - - /* - * Release the lock only after the wiring is released, to ensure that - * the page daemon does not encounter and dequeue the page while it is - * still wired. - */ - if (locked) - vm_page_unlock(m); - - if (VPRC_WIRE_COUNT(old) == 1) { - vm_wire_sub(1); - if (old == 1) - vm_page_free(m); - } + vm_page_unwire_managed(m, PQ_INACTIVE, flags != 0); } /* See vm_page_release(). */ @@ -3960,9 +3908,7 @@ vm_page_release_locked(vm_page_t m, int flags) m->dirty == 0 && !vm_page_busied(m)) { vm_page_free(m); } else { - vm_page_lock(m); - vm_page_release_toq(m, flags); - vm_page_unlock(m); + (void)vm_page_release_toq(m, PQ_INACTIVE, flags != 0); } } } @@ -4774,6 +4720,22 @@ vm_page_object_lock_assert(vm_page_t m) VM_OBJECT_ASSERT_WLOCKED(m->object); } +void +vm_page_pagequeue_lock_assert(vm_page_t m, uint8_t queue) +{ + + if ((m->flags & PG_MARKER) != 0) + return; + + /* + * The page's page queue index may only change while the + * current queue's lock is held. + */ + KASSERT(queue != PQ_NONE, + ("page %p does not belong to a queue", m)); + vm_pagequeue_assert_locked(_vm_page_pagequeue(m, queue)); +} + void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits) { @@ -4853,7 +4815,7 @@ DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n" " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, - m->queue, m->ref_count, m->aflags, m->oflags, - m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); + m->astate.queue, m->ref_count, m->astate.flags, m->oflags, + m->flags, m->astate.act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 0c3f3a9bade2..4d5726c0e39a 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -190,6 +190,15 @@ typedef uint32_t vm_page_bits_t; typedef uint64_t vm_page_bits_t; #endif +typedef union { + struct { + uint16_t flags; + uint8_t queue; + uint8_t act_count; + }; + uint32_t _bits; +} vm_page_astate_t; + struct vm_page { union { TAILQ_ENTRY(vm_page) q; /* page queue or free list (Q) */ @@ -212,15 +221,13 @@ struct vm_page { u_int ref_count; /* page references */ }; volatile u_int busy_lock; /* busy owners lock */ - uint16_t flags; /* page PG_* flags (P) */ + vm_page_astate_t astate; /* atomically updated state */ + uint8_t flags; /* page PG_* flags (P) */ uint8_t order; /* index of the buddy queue (F) */ uint8_t pool; /* vm_phys freepool index (F) */ - uint8_t aflags; /* access is atomic */ - uint8_t oflags; /* page VPO_* flags (O) */ - uint8_t queue; /* page queue index (Q) */ int8_t psind; /* pagesizes[] index (O) */ int8_t segind; /* vm_phys segment index (C) */ - u_char act_count; /* page usage count (P) */ + uint8_t oflags; /* page VPO_* flags (O) */ /* NOTE that these must support one bit per DEV_BSIZE in a page */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ @@ -399,8 +406,8 @@ extern struct mtx_padalign pa_lock[]; #define PGA_REQUEUE 0x20 /* page is due to be requeued */ #define PGA_REQUEUE_HEAD 0x40 /* page requeue should bypass LRU */ -#define PGA_QUEUE_STATE_MASK (PGA_ENQUEUED | PGA_DEQUEUE | PGA_REQUEUE | \ - PGA_REQUEUE_HEAD) +#define PGA_QUEUE_OP_MASK (PGA_DEQUEUE | PGA_REQUEUE | PGA_REQUEUE_HEAD) +#define PGA_QUEUE_STATE_MASK (PGA_ENQUEUED | PGA_QUEUE_OP_MASK) /* * Page flags. If changed at any other time than page allocation or @@ -410,11 +417,11 @@ extern struct mtx_padalign pa_lock[]; * allocated from a per-CPU cache. It is cleared the next time that the * page is allocated from the physical memory allocator. */ -#define PG_PCPU_CACHE 0x0001 /* was allocated from per-CPU caches */ -#define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */ -#define PG_ZERO 0x0008 /* page is zeroed */ -#define PG_MARKER 0x0010 /* special queue marker page */ -#define PG_NODUMP 0x0080 /* don't include this page in a dump */ +#define PG_PCPU_CACHE 0x01 /* was allocated from per-CPU caches */ +#define PG_FICTITIOUS 0x04 /* physical page doesn't exist */ +#define PG_ZERO 0x08 /* page is zeroed */ +#define PG_MARKER 0x10 /* special queue marker page */ +#define PG_NODUMP 0x80 /* don't include this page in a dump */ /* * Misc constants. @@ -572,7 +579,6 @@ int vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, void vm_page_deactivate(vm_page_t); void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); -void vm_page_dequeue_deferred(vm_page_t m); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); bool vm_page_free_prep(vm_page_t m); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); @@ -584,6 +590,8 @@ vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); void vm_page_pqbatch_drain(void); void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue); +bool vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, + vm_page_astate_t new); vm_page_t vm_page_prev(vm_page_t m); bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m); void vm_page_putfake(vm_page_t m); @@ -688,64 +696,52 @@ void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line); #ifdef INVARIANTS void vm_page_object_lock_assert(vm_page_t m); #define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m) +void vm_page_pagequeue_lock_assert(vm_page_t m, uint8_t queue); +#define VM_PAGE_PAGEQUEUE_LOCK_ASSERT(m, q) vm_page_pagequeue_lock_assert(m, q) void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits); #define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) \ vm_page_assert_pga_writeable(m, bits) #else #define VM_PAGE_OBJECT_LOCK_ASSERT(m) (void)0 +#define VM_PAGE_PAGEQUEUE_LOCK_ASSERT(m, q) (void)0 #define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) (void)0 #endif /* - * We want to use atomic updates for the aflags field, which is 8 bits wide. - * However, not all architectures support atomic operations on 8-bit + * We want to use atomic updates for the aflags field, which is 16 bits wide. + * However, not all architectures support atomic operations on 16-bit * destinations. In order that we can easily use a 32-bit operation, we * require that the aflags field be 32-bit aligned. */ -_Static_assert(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0, +_Static_assert(offsetof(struct vm_page, astate.flags) % sizeof(uint32_t) == 0, "aflags field is not 32-bit aligned"); +#define VM_PAGE_AFLAG_SHIFT __offsetof(vm_page_astate_t, flags) + /* - * We want to be able to update the aflags and queue fields atomically in - * the same operation. + * Return the atomic flag set for the page. */ -_Static_assert(offsetof(struct vm_page, aflags) / sizeof(uint32_t) == - offsetof(struct vm_page, queue) / sizeof(uint32_t), - "aflags and queue fields do not belong to the same 32-bit word"); -_Static_assert(offsetof(struct vm_page, queue) % sizeof(uint32_t) == 2, - "queue field is at an unexpected offset"); -_Static_assert(sizeof(((struct vm_page *)NULL)->queue) == 1, - "queue field has an unexpected size"); - -#if BYTE_ORDER == LITTLE_ENDIAN -#define VM_PAGE_AFLAG_SHIFT 0 -#define VM_PAGE_QUEUE_SHIFT 16 -#else -#define VM_PAGE_AFLAG_SHIFT 24 -#define VM_PAGE_QUEUE_SHIFT 8 -#endif -#define VM_PAGE_QUEUE_MASK (0xff << VM_PAGE_QUEUE_SHIFT) +static inline int +vm_page_aflags(vm_page_t m) +{ + + return (m->astate.flags); +} /* * Clear the given bits in the specified page. */ static inline void -vm_page_aflag_clear(vm_page_t m, uint8_t bits) +vm_page_aflag_clear(vm_page_t m, uint16_t bits) { uint32_t *addr, val; - /* - * The PGA_REFERENCED flag can only be cleared if the page is locked. - */ - if ((bits & PGA_REFERENCED) != 0) - vm_page_assert_locked(m); - /* * Access the whole 32-bit word containing the aflags field with an * atomic update. Parallel non-atomic updates to the other fields * within this word are handled properly by the atomic update. */ - addr = (void *)&m->aflags; + addr = (void *)&m->astate; val = bits << VM_PAGE_AFLAG_SHIFT; atomic_clear_32(addr, val); } @@ -754,7 +750,7 @@ vm_page_aflag_clear(vm_page_t m, uint8_t bits) * Set the given bits in the specified page. */ static inline void -vm_page_aflag_set(vm_page_t m, uint8_t bits) +vm_page_aflag_set(vm_page_t m, uint16_t bits) { uint32_t *addr, val; @@ -765,44 +761,43 @@ vm_page_aflag_set(vm_page_t m, uint8_t bits) * atomic update. Parallel non-atomic updates to the other fields * within this word are handled properly by the atomic update. */ - addr = (void *)&m->aflags; + addr = (void *)&m->astate; val = bits << VM_PAGE_AFLAG_SHIFT; atomic_set_32(addr, val); } -/* - * Atomically update the queue state of the page. The operation fails if - * any of the queue flags in "fflags" are set or if the "queue" field of - * the page does not match the expected value; if the operation is - * successful, the flags in "nflags" are set and all other queue state - * flags are cleared. - */ +static inline vm_page_astate_t +vm_page_astate_load(vm_page_t m) +{ + vm_page_astate_t astate; + + astate._bits = atomic_load_32(&m->astate); + return (astate); +} + static inline bool -vm_page_pqstate_cmpset(vm_page_t m, uint32_t oldq, uint32_t newq, - uint32_t fflags, uint32_t nflags) +vm_page_astate_fcmpset(vm_page_t m, vm_page_astate_t *old, + vm_page_astate_t new) { - uint32_t *addr, nval, oval, qsmask; - - vm_page_assert_locked(m); - - fflags <<= VM_PAGE_AFLAG_SHIFT; - nflags <<= VM_PAGE_AFLAG_SHIFT; - newq <<= VM_PAGE_QUEUE_SHIFT; - oldq <<= VM_PAGE_QUEUE_SHIFT; - qsmask = ((PGA_DEQUEUE | PGA_REQUEUE | PGA_REQUEUE_HEAD) << - VM_PAGE_AFLAG_SHIFT) | VM_PAGE_QUEUE_MASK; - - addr = (void *)&m->aflags; - oval = atomic_load_32(addr); - do { - if ((oval & fflags) != 0) - return (false); - if ((oval & VM_PAGE_QUEUE_MASK) != oldq) - return (false); - nval = (oval & ~qsmask) | nflags | newq; - } while (!atomic_fcmpset_32(addr, &oval, nval)); - - return (true); + int ret; + + KASSERT(new.queue == PQ_INACTIVE || (new.flags & PGA_REQUEUE_HEAD) == 0, + ("vm_page_astate_fcmpset: unexecpted head requeue for page %p", + m)); + KASSERT((new.flags & PGA_ENQUEUED) == 0 || new.queue != PQ_NONE, + ("vm_page_astate_fcmpset: setting PGA_ENQUEUED without a queue")); + KASSERT(new._bits != old->_bits, + ("vm_page_astate_fcmpset: bits are not changing")); + + ret = atomic_fcmpset_32(&m->astate._bits, &old->_bits, new._bits); + if (ret != 0) { + if (old->queue != PQ_NONE && old->queue != new.queue) + VM_PAGE_PAGEQUEUE_LOCK_ASSERT(m, old->queue); + KASSERT((new.flags & PGA_ENQUEUED) == 0 || old->queue == new.queue, + ("vm_page_astate_fcmpset: PGA_ENQUEUED set after queue change for page %p", m)); + } + + return (ret != 0); } /* @@ -858,19 +853,17 @@ vm_page_replace_checked(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, /* * vm_page_queue: * - * Return the index of the queue containing m. This index is guaranteed - * not to change while the page lock is held. + * Return the index of the queue containing m. */ static inline uint8_t vm_page_queue(vm_page_t m) { + vm_page_astate_t as; - vm_page_assert_locked(m); - - if ((m->aflags & PGA_DEQUEUE) != 0) + as = vm_page_astate_load(m); + if ((as.flags & PGA_DEQUEUE) != 0) return (PQ_NONE); - atomic_thread_fence_acq(); - return (m->queue); + return (as.queue); } static inline bool diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index c7f03129d070..848239eea411 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -218,7 +218,7 @@ vm_pageout_init_scan(struct scan_state *ss, struct vm_pagequeue *pq, { vm_pagequeue_assert_locked(pq); - KASSERT((marker->aflags & PGA_ENQUEUED) == 0, + KASSERT((vm_page_aflags(marker) & PGA_ENQUEUED) == 0, ("marker %p already enqueued", marker)); if (after == NULL) @@ -242,7 +242,7 @@ vm_pageout_end_scan(struct scan_state *ss) pq = ss->pq; vm_pagequeue_assert_locked(pq); - KASSERT((ss->marker->aflags & PGA_ENQUEUED) != 0, + KASSERT((vm_page_aflags(ss->marker) & PGA_ENQUEUED) != 0, ("marker %p not enqueued", ss->marker)); TAILQ_REMOVE(&pq->pq_pl, ss->marker, plinks.q); @@ -271,7 +271,7 @@ vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue) marker = ss->marker; pq = ss->pq; - KASSERT((marker->aflags & PGA_ENQUEUED) != 0, + KASSERT((marker->astate.flags & PGA_ENQUEUED) != 0, ("marker %p not enqueued", ss->marker)); vm_pagequeue_lock(pq); @@ -280,7 +280,7 @@ vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue) m = n, ss->scanned++) { n = TAILQ_NEXT(m, plinks.q); if ((m->flags & PG_MARKER) == 0) { - KASSERT((m->aflags & PGA_ENQUEUED) != 0, + KASSERT((m->astate.flags & PGA_ENQUEUED) != 0, ("page %p not enqueued", m)); KASSERT((m->flags & PG_FICTITIOUS) == 0, ("Fictitious page %p cannot be in page queue", m)); @@ -370,13 +370,10 @@ vm_pageout_cluster(vm_page_t m) ib = 0; break; } - vm_page_lock(p); if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { - vm_page_unlock(p); ib = 0; break; } - vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; ++ib; @@ -396,12 +393,8 @@ vm_pageout_cluster(vm_page_t m) vm_page_test_dirty(p); if (p->dirty == 0) break; - vm_page_lock(p); - if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { - vm_page_unlock(p); + if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) break; - } - vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; ++is; @@ -458,7 +451,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); - KASSERT((mc[i]->aflags & PGA_WRITEABLE) == 0, + KASSERT((vm_page_aflags(mc[i]) & PGA_WRITEABLE) == 0, ("vm_pageout_flush: writeable page %p", mc[i])); vm_page_sbusy(mc[i]); } @@ -577,7 +570,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout) vm_pindex_t pindex; int error, lockmode; - vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); error = 0; @@ -597,7 +589,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout) * of time. */ if (object->type == OBJT_VNODE) { - vm_page_unlock(m); vp = object->handle; if (vp->v_type == VREG && vn_start_write(vp, &mp, V_NOWAIT) != 0) { @@ -627,7 +618,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout) error = ENOENT; goto unlock_all; } - vm_page_lock(m); /* * While the object and page were unlocked, the page @@ -663,7 +653,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout) error = EBUSY; goto unlock_all; } - vm_page_unlock(m); /* * If a page is dirty, then it is either being washed @@ -699,14 +688,13 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) { struct scan_state ss; struct vm_pagequeue *pq; - struct mtx *mtx; vm_object_t object; vm_page_t m, marker; - int act_delta, error, numpagedout, queue, starting_target; + vm_page_astate_t old, new; + int act_delta, error, numpagedout, queue, refs, starting_target; int vnodes_skipped; bool pageout_ok; - mtx = NULL; object = NULL; starting_target = launder; vnodes_skipped = 0; @@ -734,77 +722,45 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) if (__predict_false((m->flags & PG_MARKER) != 0)) continue; - vm_page_change_lock(m, &mtx); - -recheck: /* - * The page may have been disassociated from the queue - * or even freed while locks were dropped. We thus must be - * careful whenever modifying page state. Once the object lock - * has been acquired, we have a stable reference to the page. + * Perform some quick and racy checks of the page's queue state. + * Bail if things are not as we expect. */ - if (vm_page_queue(m) != queue) + old = vm_page_astate_load(m); + if (old.queue != PQ_LAUNDRY || (old.flags & PGA_ENQUEUED) == 0) continue; - - /* - * A requeue was requested, so this page gets a second - * chance. - */ - if ((m->aflags & PGA_REQUEUE) != 0) { + if ((old.flags & PGA_QUEUE_OP_MASK) != 0) { vm_page_pqbatch_submit(m, queue); continue; } - /* - * Wired pages may not be freed. Complete their removal - * from the queue now to avoid needless revisits during - * future scans. This check is racy and must be reverified once - * we hold the object lock and have verified that the page - * is not busy. - */ - if (vm_page_wired(m)) { - vm_page_dequeue_deferred(m); - continue; - } - if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); - - /* - * A page's object pointer may be set to NULL before - * the object lock is acquired. - */ object = (vm_object_t)atomic_load_ptr(&m->object); - if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { - mtx_unlock(mtx); - /* Depends on type-stability. */ - VM_OBJECT_WLOCK(object); - mtx_lock(mtx); - goto recheck; + if (object == NULL) + continue; + VM_OBJECT_WLOCK(object); + if (m->object != object) { + VM_OBJECT_WUNLOCK(object); + object = NULL; + continue; } } - if (__predict_false(m->object == NULL)) - /* - * The page has been removed from its object. - */ - continue; - KASSERT(m->object == object, ("page %p does not belong to %p", - m, object)); if (vm_page_busied(m)) continue; /* - * Re-check for wirings now that we hold the object lock and - * have verified that the page is unbusied. If the page is - * mapped, it may still be wired by pmap lookups. The call to + * Check for wirings now that we hold the object lock and have + * verified that the page is unbusied. If the page is mapped, + * it may still be wired by pmap lookups. The call to * vm_page_try_remove_all() below atomically checks for such * wirings and removes mappings. If the page is unmapped, the * wire count is guaranteed not to increase. */ if (__predict_false(vm_page_wired(m))) { - vm_page_dequeue_deferred(m); + vm_page_pqbatch_submit(m, queue); continue; } @@ -824,46 +780,64 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) * that a reference from a concurrently destroyed mapping is * observed here and now. */ - if (object->ref_count != 0) - act_delta = pmap_ts_referenced(m); - else { - KASSERT(!pmap_page_is_mapped(m), - ("page %p is mapped", m)); - act_delta = 0; - } - if ((m->aflags & PGA_REFERENCED) != 0) { - vm_page_aflag_clear(m, PGA_REFERENCED); - act_delta++; - } - if (act_delta != 0) { - if (object->ref_count != 0) { - VM_CNT_INC(v_reactivated); - vm_page_activate(m); + refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0; - /* - * Increase the activation count if the page - * was referenced while in the laundry queue. - * This makes it less likely that the page will - * be returned prematurely to the inactive - * queue. - */ - m->act_count += act_delta + ACT_ADVANCE; + for (old = vm_page_astate_load(m);;) { + if (old.queue != queue || + (old.flags & PGA_ENQUEUED) == 0) + goto next_page; - /* - * If this was a background laundering, count - * activated pages towards our target. The - * purpose of background laundering is to ensure - * that pages are eventually cycled through the - * laundry queue, and an activation is a valid - * way out. - */ - if (!in_shortfall) - launder--; - continue; - } else if ((object->flags & OBJ_DEAD) == 0) { - vm_page_requeue(m); - continue; + if ((old.flags & PGA_QUEUE_OP_MASK) != 0) { + vm_page_pqbatch_submit(m, queue); + goto next_page; + } + + new = old; + act_delta = refs; + if ((old.flags & PGA_REFERENCED) != 0) { + new.flags &= ~PGA_REFERENCED; + act_delta++; + } + if (act_delta != 0) { + if (object->ref_count != 0) { + /* + * Increase the activation count if the + * page was referenced while in the + * laundry queue. This makes it less + * likely that the page will be returned + * prematurely to the inactive queue. + */ + new.act_count += ACT_ADVANCE + + act_delta; + if (new.act_count > ACT_MAX) + new.act_count = ACT_MAX; + + new.flags |= PGA_REQUEUE; + new.queue = PQ_ACTIVE; + if (!vm_page_pqstate_commit(m, &old, + new)) + continue; + + VM_CNT_INC(v_reactivated); + + /* + * If this was a background laundering, + * count activated pages towards our + * target. The purpose of background + * laundering is to ensure that pages + * are eventually cycled through the + * laundry queue, and an activation is a + * valid way out. + */ + if (!in_shortfall) + launder--; + goto next_page; + } else if ((object->flags & OBJ_DEAD) == 0) { + vm_page_launder(m); + goto next_page; + } } + break; } /* @@ -876,7 +850,7 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) if (object->ref_count != 0) { vm_page_test_dirty(m); if (m->dirty == 0 && !vm_page_try_remove_all(m)) { - vm_page_dequeue_deferred(m); + vm_page_pqbatch_submit(m, queue); continue; } } @@ -900,7 +874,7 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) else pageout_ok = true; if (!pageout_ok) { - vm_page_requeue(m); + vm_page_launder(m); continue; } @@ -925,13 +899,9 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) pageout_lock_miss++; vnodes_skipped++; } - mtx = NULL; object = NULL; } - } - if (mtx != NULL) { - mtx_unlock(mtx); - mtx = NULL; +next_page:; } if (object != NULL) { VM_OBJECT_WUNLOCK(object); @@ -1169,12 +1139,13 @@ static void vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) { struct scan_state ss; - struct mtx *mtx; vm_object_t object; vm_page_t m, marker; + vm_page_astate_t old, new; struct vm_pagequeue *pq; long min_scan; - int act_delta, max_scan, scan_tick; + int act_delta, max_scan, ps_delta, refs, scan_tick; + uint8_t nqueue; marker = &vmd->vmd_markers[PQ_ACTIVE]; pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; @@ -1208,7 +1179,6 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) * and scanning resumes. */ max_scan = page_shortage > 0 ? pq->pq_cnt : min_scan; - mtx = NULL; act_scan: vm_pageout_init_scan(&ss, pq, marker, &vmd->vmd_clock[0], max_scan); while ((m = vm_pageout_next(&ss, false)) != NULL) { @@ -1227,29 +1197,6 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) if (__predict_false((m->flags & PG_MARKER) != 0)) continue; - vm_page_change_lock(m, &mtx); - - /* - * The page may have been disassociated from the queue - * or even freed while locks were dropped. We thus must be - * careful whenever modifying page state. Once the object lock - * has been acquired, we have a stable reference to the page. - */ - if (vm_page_queue(m) != PQ_ACTIVE) - continue; - - /* - * Wired pages are dequeued lazily. - */ - if (vm_page_wired(m)) { - vm_page_dequeue_deferred(m); - continue; - } - - /* - * A page's object pointer may be set to NULL before - * the object lock is acquired. - */ object = (vm_object_t)atomic_load_ptr(&m->object); if (__predict_false(object == NULL)) /* @@ -1264,80 +1211,104 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) * that a reference from a concurrently destroyed mapping is * observed here and now. * - * Perform an unsynchronized object ref count check. While - * the page lock ensures that the page is not reallocated to - * another object, in particular, one with unmanaged mappings - * that cannot support pmap_ts_referenced(), two races are, + * Perform an unsynchronized object ref count check. While the + * page lock ensures that the page is not reallocated to another + * object, in particular, one with unmanaged mappings that + * cannot support pmap_ts_referenced(), two races are, * nonetheless, possible: + * * 1) The count was transitioning to zero, but we saw a non- - * zero value. pmap_ts_referenced() will return zero - * because the page is not mapped. - * 2) The count was transitioning to one, but we saw zero. - * This race delays the detection of a new reference. At - * worst, we will deactivate and reactivate the page. + * zero value. pmap_ts_referenced() will return zero because + * the page is not mapped. + * 2) The count was transitioning to one, but we saw zero. This + * race delays the detection of a new reference. At worst, + * we will deactivate and reactivate the page. */ - if (object->ref_count != 0) - act_delta = pmap_ts_referenced(m); - else - act_delta = 0; - if ((m->aflags & PGA_REFERENCED) != 0) { - vm_page_aflag_clear(m, PGA_REFERENCED); - act_delta++; - } + refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0; - /* - * Advance or decay the act_count based on recent usage. - */ - if (act_delta != 0) { - m->act_count += ACT_ADVANCE + act_delta; - if (m->act_count > ACT_MAX) - m->act_count = ACT_MAX; - } else - m->act_count -= min(m->act_count, ACT_DECLINE); + for (old = vm_page_astate_load(m);;) { + if (old.queue != PQ_ACTIVE || + (old.flags & PGA_ENQUEUED) == 0) + /* + * Something has moved the page out of the + * active queue. Don't touch it. + */ + break; + if ((old.flags & PGA_DEQUEUE) != 0) { + vm_page_pqbatch_submit(m, PQ_ACTIVE); + break; + } + + new = old; + act_delta = refs; + if ((old.flags & PGA_REFERENCED) != 0) { + new.flags &= ~PGA_REFERENCED; + act_delta++; + } - if (m->act_count == 0) { /* - * When not short for inactive pages, let dirty pages go - * through the inactive queue before moving to the - * laundry queues. This gives them some extra time to - * be reactivated, potentially avoiding an expensive - * pageout. However, during a page shortage, the - * inactive queue is necessarily small, and so dirty - * pages would only spend a trivial amount of time in - * the inactive queue. Therefore, we might as well - * place them directly in the laundry queue to reduce - * queuing overhead. + * Advance or decay the act_count based on recent usage. */ - if (page_shortage <= 0) { - vm_page_swapqueue(m, PQ_ACTIVE, PQ_INACTIVE); + if (act_delta != 0) { + new.act_count += ACT_ADVANCE + act_delta; + if (new.act_count > ACT_MAX) + new.act_count = ACT_MAX; + } else { + new.act_count -= min(new.act_count, ACT_DECLINE); + } + + if (new.act_count > 0) { + /* + * Adjust the activation count and keep the page + * in the active queue. The count might be left + * unchanged if it is saturated. + */ + if (new.act_count == old.act_count || + vm_page_astate_fcmpset(m, &old, new)) + break; } else { /* + * When not short for inactive pages, let dirty + * pages go through the inactive queue before + * moving to the laundry queues. This gives + * them some extra time to be reactivated, + * potentially avoiding an expensive pageout. + * However, during a page shortage, the inactive + * queue is necessarily small, and so dirty + * pages would only spend a trivial amount of + * time in the inactive queue. Therefore, we + * might as well place them directly in the + * laundry queue to reduce queuing overhead. + * * Calling vm_page_test_dirty() here would * require acquisition of the object's write * lock. However, during a page shortage, - * directing dirty pages into the laundry - * queue is only an optimization and not a + * directing dirty pages into the laundry queue + * is only an optimization and not a * requirement. Therefore, we simply rely on - * the opportunistic updates to the page's - * dirty field by the pmap. + * the opportunistic updates to the page's dirty + * field by the pmap. */ - if (m->dirty == 0) { - vm_page_swapqueue(m, PQ_ACTIVE, - PQ_INACTIVE); - page_shortage -= - act_scan_laundry_weight; + if (page_shortage <= 0) { + nqueue = PQ_INACTIVE; + ps_delta = 0; + } else if (m->dirty == 0) { + nqueue = PQ_INACTIVE; + ps_delta = act_scan_laundry_weight; } else { - vm_page_swapqueue(m, PQ_ACTIVE, - PQ_LAUNDRY); - page_shortage--; + nqueue = PQ_LAUNDRY; + ps_delta = 1; + } + + new.flags |= PGA_REQUEUE; + new.queue = nqueue; + if (vm_page_pqstate_commit(m, &old, new)) { + page_shortage -= ps_delta; + break; } } } } - if (mtx != NULL) { - mtx_unlock(mtx); - mtx = NULL; - } vm_pagequeue_lock(pq); TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q); TAILQ_INSERT_AFTER(&pq->pq_pl, marker, &vmd->vmd_clock[0], plinks.q); @@ -1349,20 +1320,30 @@ static int vm_pageout_reinsert_inactive_page(struct scan_state *ss, vm_page_t m) { struct vm_domain *vmd; + vm_page_astate_t old, new; - if (m->queue != PQ_INACTIVE || (m->aflags & PGA_ENQUEUED) != 0) - return (0); - vm_page_aflag_set(m, PGA_ENQUEUED); - if ((m->aflags & PGA_REQUEUE_HEAD) != 0) { - vmd = vm_pagequeue_domain(m); - TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q); - vm_page_aflag_clear(m, PGA_REQUEUE | PGA_REQUEUE_HEAD); - } else if ((m->aflags & PGA_REQUEUE) != 0) { - TAILQ_INSERT_TAIL(&ss->pq->pq_pl, m, plinks.q); - vm_page_aflag_clear(m, PGA_REQUEUE | PGA_REQUEUE_HEAD); - } else - TAILQ_INSERT_BEFORE(ss->marker, m, plinks.q); - return (1); + for (old = vm_page_astate_load(m);;) { + if (old.queue != PQ_INACTIVE || + (old.flags & (PGA_DEQUEUE | PGA_ENQUEUED)) != 0) + break; + + new = old; + new.flags |= PGA_ENQUEUED; + new.flags &= ~(PGA_REQUEUE | PGA_REQUEUE_HEAD); + if (!vm_page_astate_fcmpset(m, &old, new)) + continue; + + if ((old.flags & PGA_REQUEUE_HEAD) != 0) { + vmd = vm_pagequeue_domain(m); + TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q); + } else if ((old.flags & PGA_REQUEUE) != 0) { + TAILQ_INSERT_TAIL(&ss->pq->pq_pl, m, plinks.q); + } else { + TAILQ_INSERT_BEFORE(ss->marker, m, plinks.q); + } + return (1); + } + return (0); } /* @@ -1405,11 +1386,11 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, { struct scan_state ss; struct vm_batchqueue rq; - struct mtx *mtx; vm_page_t m, marker; + vm_page_astate_t old, new; struct vm_pagequeue *pq; vm_object_t object; - int act_delta, addl_page_shortage, deficit, page_shortage; + int act_delta, addl_page_shortage, deficit, page_shortage, refs; int starting_page_shortage; /* @@ -1429,7 +1410,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit); starting_page_shortage = page_shortage = shortage + deficit; - mtx = NULL; object = NULL; vm_batchqueue_init(&rq); @@ -1447,65 +1427,31 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, KASSERT((m->flags & PG_MARKER) == 0, ("marker page %p was dequeued", m)); - vm_page_change_lock(m, &mtx); - -recheck: /* - * The page may have been disassociated from the queue - * or even freed while locks were dropped. We thus must be - * careful whenever modifying page state. Once the object lock - * has been acquired, we have a stable reference to the page. + * Perform some quick and racy checks of the page's queue state. + * Bail if things are not as we expect. */ - if (vm_page_queue(m) != PQ_INACTIVE) { - addl_page_shortage++; + old = vm_page_astate_load(m); + if (old.queue != PQ_INACTIVE || (old.flags & PGA_ENQUEUED) != 0) continue; - } - - /* - * The page was re-enqueued after the page queue lock was - * dropped, or a requeue was requested. This page gets a second - * chance. - */ - if ((m->aflags & (PGA_ENQUEUED | PGA_REQUEUE | - PGA_REQUEUE_HEAD)) != 0) - goto reinsert; - - /* - * Wired pages may not be freed. Complete their removal - * from the queue now to avoid needless revisits during - * future scans. This check is racy and must be reverified once - * we hold the object lock and have verified that the page - * is not busy. - */ - if (vm_page_wired(m)) { - vm_page_dequeue_deferred(m); + if ((old.flags & PGA_QUEUE_OP_MASK) != 0) { + vm_page_pqbatch_submit(m, PQ_INACTIVE); continue; } if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); - - /* - * A page's object pointer may be set to NULL before - * the object lock is acquired. - */ object = (vm_object_t)atomic_load_ptr(&m->object); - if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { - mtx_unlock(mtx); - /* Depends on type-stability. */ - VM_OBJECT_WLOCK(object); - mtx_lock(mtx); - goto recheck; + if (object == NULL) + continue; + VM_OBJECT_WLOCK(object); + if (m->object != object) { + VM_OBJECT_WUNLOCK(object); + object = NULL; + goto reinsert; } } - if (__predict_false(m->object == NULL)) - /* - * The page has been removed from its object. - */ - continue; - KASSERT(m->object == object, ("page %p does not belong to %p", - m, object)); if (vm_page_busied(m)) { /* @@ -1521,15 +1467,15 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, } /* - * Re-check for wirings now that we hold the object lock and - * have verified that the page is unbusied. If the page is - * mapped, it may still be wired by pmap lookups. The call to + * Check for wirings now that we hold the object lock and have + * verified that the page is unbusied. If the page is mapped, + * it may still be wired by pmap lookups. The call to * vm_page_try_remove_all() below atomically checks for such * wirings and removes mappings. If the page is unmapped, the * wire count is guaranteed not to increase. */ if (__predict_false(vm_page_wired(m))) { - vm_page_dequeue_deferred(m); + vm_page_pqbatch_submit(m, PQ_INACTIVE); continue; } @@ -1549,35 +1495,52 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, * that a reference from a concurrently destroyed mapping is * observed here and now. */ - if (object->ref_count != 0) - act_delta = pmap_ts_referenced(m); - else { - KASSERT(!pmap_page_is_mapped(m), - ("page %p is mapped", m)); - act_delta = 0; - } - if ((m->aflags & PGA_REFERENCED) != 0) { - vm_page_aflag_clear(m, PGA_REFERENCED); - act_delta++; - } - if (act_delta != 0) { - if (object->ref_count != 0) { - VM_CNT_INC(v_reactivated); - vm_page_activate(m); + refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0; - /* - * Increase the activation count if the page - * was referenced while in the inactive queue. - * This makes it less likely that the page will - * be returned prematurely to the inactive - * queue. - */ - m->act_count += act_delta + ACT_ADVANCE; - continue; - } else if ((object->flags & OBJ_DEAD) == 0) { - vm_page_aflag_set(m, PGA_REQUEUE); - goto reinsert; + for (old = vm_page_astate_load(m);;) { + if (old.queue != PQ_INACTIVE || + (old.flags & PGA_ENQUEUED) != 0) + goto next_page; + + if ((old.flags & PGA_QUEUE_OP_MASK) != 0) { + vm_page_pqbatch_submit(m, PQ_INACTIVE); + goto next_page; } + + new = old; + act_delta = refs; + if ((old.flags & PGA_REFERENCED) != 0) { + new.flags &= ~PGA_REFERENCED; + act_delta++; + } + if (act_delta != 0) { + if (object->ref_count != 0) { + /* + * Increase the activation count if the + * page was referenced while in the + * inactive queue. This makes it less + * likely that the page will be returned + * prematurely to the inactive queue. + */ + new.act_count += ACT_ADVANCE + + act_delta; + if (new.act_count > ACT_MAX) + new.act_count = ACT_MAX; + + new.flags |= PGA_REQUEUE; + new.queue = PQ_ACTIVE; + if (!vm_page_pqstate_commit(m, &old, + new)) + continue; + + VM_CNT_INC(v_reactivated); + goto next_page; + } else if ((object->flags & OBJ_DEAD) == 0) { + vm_page_aflag_set(m, PGA_REQUEUE); + goto reinsert; + } + } + break; } /* @@ -1590,7 +1553,7 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, if (object->ref_count != 0) { vm_page_test_dirty(m); if (m->dirty == 0 && !vm_page_try_remove_all(m)) { - vm_page_dequeue_deferred(m); + vm_page_pqbatch_submit(m, PQ_INACTIVE); continue; } } @@ -1604,25 +1567,30 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, */ if (m->dirty == 0) { free_page: + /* XXX comment */ + old = vm_page_astate_load(m); + if (old.queue != PQ_INACTIVE || + (old.flags & PGA_QUEUE_STATE_MASK) != 0) { + vm_page_pqbatch_submit(m, PQ_INACTIVE); + goto next_page; + } + /* * Because we dequeued the page and have already * checked for concurrent dequeue and enqueue * requests, we can safely disassociate the page * from the inactive queue. */ - KASSERT((m->aflags & PGA_QUEUE_STATE_MASK) == 0, - ("page %p has queue state", m)); - m->queue = PQ_NONE; + m->astate.queue = PQ_NONE; vm_page_free(m); page_shortage--; } else if ((object->flags & OBJ_DEAD) == 0) vm_page_launder(m); +next_page: continue; reinsert: vm_pageout_reinsert_inactive(&ss, &rq, m); } - if (mtx != NULL) - mtx_unlock(mtx); if (object != NULL) VM_OBJECT_WUNLOCK(object); vm_pageout_reinsert_inactive(&ss, &rq, NULL); diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h index ba5e77ce6c8d..b3e244755a05 100644 --- a/sys/vm/vm_pagequeue.h +++ b/sys/vm/vm_pagequeue.h @@ -202,6 +202,8 @@ static inline void vm_pagequeue_remove(struct vm_pagequeue *pq, vm_page_t m) { + vm_pagequeue_assert_locked(pq); + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); } @@ -249,6 +251,22 @@ vm_pagequeue_domain(vm_page_t m) return (VM_DOMAIN(vm_phys_domain(m))); } +static inline struct vm_pagequeue * +_vm_page_pagequeue(vm_page_t m, uint8_t queue) +{ + + if (queue == PQ_NONE) + return (NULL); + return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue]); +} + +static inline struct vm_pagequeue * +vm_page_pagequeue(vm_page_t m) +{ + + return (_vm_page_pagequeue(m, atomic_load_8(&m->astate.queue))); +} + /* * Return the number of pages we need to free-up or cache * A positive number indicates that we do not have enough free pages. diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c index 2557dc6f4e55..9e50b4da0741 100644 --- a/sys/vm/vm_swapout.c +++ b/sys/vm/vm_swapout.c @@ -107,8 +107,9 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include +#include +#include #include #include #include @@ -169,6 +170,56 @@ static void swapout_procs(int action); static void vm_req_vmdaemon(int req); static void vm_thread_swapout(struct thread *td); +static void +vm_swapout_object_deactivate_page(vm_page_t m, int remove_mode) +{ + vm_page_astate_t old, new; + int act_delta, refs; + + refs = pmap_ts_referenced(m); + + for (old = vm_page_astate_load(m);;) { + if ((old.flags & PGA_DEQUEUE) != 0) + break; + + act_delta = refs; + if ((old.flags & PGA_REFERENCED) != 0) { + new.flags &= ~PGA_REFERENCED; + act_delta++; + } + + if (old.queue != PQ_ACTIVE && act_delta != 0) { + if (new.act_count == ACT_MAX) + break; + new.act_count += act_delta; + new.flags |= PGA_REQUEUE; + new.queue = PQ_ACTIVE; + if (vm_page_pqstate_commit(m, &old, new)) + break; + } else if (old.queue == PQ_ACTIVE) { + if (act_delta == 0) { + new.act_count -= min(new.act_count, + ACT_DECLINE); + if (!remove_mode && new.act_count == 0) { + (void)vm_page_try_remove_all(m); + + new.flags |= PGA_REQUEUE; + new.queue = PQ_INACTIVE; + } + if (vm_page_pqstate_commit(m, &old, new)) + break; + } else { + if (new.act_count < ACT_MAX - ACT_ADVANCE) + new.act_count += ACT_ADVANCE; + if (vm_page_astate_fcmpset(m, &old, new)) + break; + } + } else { + (void)vm_page_try_remove_all(m); + } + } +} + /* * vm_swapout_object_deactivate_pages * @@ -183,7 +234,7 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, { vm_object_t backing_object, object; vm_page_t p; - int act_delta, remove_mode; + int remove_mode; VM_OBJECT_ASSERT_LOCKED(first_object); if ((first_object->flags & OBJ_FICTITIOUS) != 0) @@ -219,37 +270,8 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, VM_CNT_INC(v_pdpages); if (!pmap_page_exists_quick(pmap, p)) continue; - act_delta = pmap_ts_referenced(p); - vm_page_lock(p); - if ((p->aflags & PGA_REFERENCED) != 0) { - if (act_delta == 0) - act_delta = 1; - vm_page_aflag_clear(p, PGA_REFERENCED); - } - if (!vm_page_active(p) && act_delta != 0) { - vm_page_activate(p); - p->act_count += act_delta; - } else if (vm_page_active(p)) { - /* - * The page daemon does not requeue pages - * after modifying their activation count. - */ - if (act_delta == 0) { - p->act_count -= min(p->act_count, - ACT_DECLINE); - if (!remove_mode && p->act_count == 0) { - (void)vm_page_try_remove_all(p); - vm_page_deactivate(p); - } - } else { - vm_page_activate(p); - if (p->act_count < ACT_MAX - - ACT_ADVANCE) - p->act_count += ACT_ADVANCE; - } - } else if (vm_page_inactive(p)) - (void)vm_page_try_remove_all(p); - vm_page_unlock(p); + + vm_swapout_object_deactivate_page(p, remove_mode); } if ((backing_object = object->backing_object) == NULL) goto unlock_return;