diff --git a/UPDATING b/UPDATING index 44438de07cb7..3aecd8801827 100644 --- a/UPDATING +++ b/UPDATING @@ -31,6 +31,14 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW: disable the most expensive debugging functionality run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20190513: + User-wired pages now have their own counter, + vm.stats.vm.v_user_wire_count. The vm.max_wired sysctl was renamed + to vm.max_user_wired and changed from an unsigned int to an unsigned + long. bhyve VMs wired with the -S are now subject to the user + wiring limit; the vm.max_user_wired sysctl may need to be tuned to + avoid running into the limit. + 20190507: The tap(4) driver has been folded into tun(4), and the module has been renamed to tuntap. You should update any kld_load="if_tap" or diff --git a/contrib/netbsd-tests/lib/libc/sys/t_mlock.c b/contrib/netbsd-tests/lib/libc/sys/t_mlock.c index e6c43943181b..61085f35cc21 100644 --- a/contrib/netbsd-tests/lib/libc/sys/t_mlock.c +++ b/contrib/netbsd-tests/lib/libc/sys/t_mlock.c @@ -51,7 +51,7 @@ __RCSID("$NetBSD: t_mlock.c,v 1.6 2016/08/09 12:02:44 kre Exp $"); #define _KMEMUSER #include -void set_vm_max_wired(int); +void set_vm_max_wired(u_long); void restore_vm_max_wired(void); #endif diff --git a/lib/libc/sys/mlock.2 b/lib/libc/sys/mlock.2 index 4a4816615420..040a8f8eaf9e 100644 --- a/lib/libc/sys/mlock.2 +++ b/lib/libc/sys/mlock.2 @@ -28,7 +28,7 @@ .\" @(#)mlock.2 8.2 (Berkeley) 12/11/93 .\" $FreeBSD$ .\" -.Dd March 20, 2018 +.Dd May 13, 2019 .Dt MLOCK 2 .Os .Sh NAME @@ -97,13 +97,13 @@ resource limit and the system-wide .Dq wired pages limit -.Va vm.max_wired . -.Va vm.max_wired +.Va vm.max_user_wired . +.Va vm.max_user_wired applies to the system as a whole, so the amount available to a single process at any given time is the difference between -.Va vm.max_wired +.Va vm.max_user_wired and -.Va vm.stats.vm.v_wire_count . +.Va vm.stats.vm.v_user_wire_count . .Pp If .Va security.bsd.unprivileged_mlock @@ -124,13 +124,11 @@ will fail if: is set to 0 and the caller is not the super-user. .It Bq Er EINVAL The address range given wraps around zero. -.It Bq Er EAGAIN -Locking the indicated range would exceed the system limit for locked memory. .It Bq Er ENOMEM Some portion of the indicated address range is not allocated. There was an error faulting/mapping a page. -Locking the indicated range would exceed the per-process limit for locked -memory. +Locking the indicated range would exceed the per-process or system-wide limits +for locked memory. .El The .Fn munlock @@ -171,11 +169,11 @@ system calls first appeared in Allocating too much wired memory can lead to a memory-allocation deadlock which requires a reboot to recover from. .Pp -The per-process resource limit is a limit on the amount of virtual -memory locked, while the system-wide limit is for the number of locked -physical pages. -Hence a process with two distinct locked mappings of the same physical page -counts as 2 pages against the per-process limit and as only a single page -in the system limit. +The per-process and system-wide resource limits of locked memory apply +to the amount of virtual memory locked, not the amount of locked physical +pages. +Hence two distinct locked mappings of the same physical page counts as +2 pages aginst the system limit, and also against the per-process limit +if both mappings belong to the same physical map. .Pp The per-process resource limit is not currently supported. diff --git a/lib/libc/sys/mlockall.2 b/lib/libc/sys/mlockall.2 index 23c644a8493f..341099ee758f 100644 --- a/lib/libc/sys/mlockall.2 +++ b/lib/libc/sys/mlockall.2 @@ -30,7 +30,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 25, 2012 +.Dd May 13, 2019 .Dt MLOCKALL 2 .Os .Sh NAME @@ -69,7 +69,7 @@ limited in how much they can lock down. A single process can lock the minimum of a system-wide .Dq wired pages limit -.Va vm.max_wired +.Va vm.max_user_wired and the per-process .Dv RLIMIT_MEMLOCK resource limit. @@ -138,9 +138,9 @@ and functions first appeared in .Fx 5.1 . .Sh BUGS -The per-process resource limit is a limit on the amount of virtual -memory locked, while the system-wide limit is for the number of locked -physical pages. -Hence a process with two distinct locked mappings of the same physical page -counts as 2 pages against the per-process limit and as only a single page -in the system limit. +The per-process and system-wide resource limits of locked memory apply +to the amount of virtual memory locked, not the amount of locked physical +pages. +Hence two distinct locked mappings of the same physical page counts as +2 pages aginst the system limit, and also against the per-process limit +if both mappings belong to the same physical map. diff --git a/lib/libc/tests/sys/mlock_helper.c b/lib/libc/tests/sys/mlock_helper.c index 86bbf9ad4d5a..a483207aa5d6 100644 --- a/lib/libc/tests/sys/mlock_helper.c +++ b/lib/libc/tests/sys/mlock_helper.c @@ -39,16 +39,16 @@ __FBSDID("$FreeBSD$"); #include #include -#define VM_MAX_WIRED "vm.max_wired" +#define VM_MAX_WIRED "vm.max_user_wired" static void -vm_max_wired_sysctl(int *old_value, int *new_value) +vm_max_wired_sysctl(u_long *old_value, u_long *new_value) { size_t old_len; - size_t new_len = (new_value == NULL ? 0 : sizeof(int)); + size_t new_len = (new_value == NULL ? 0 : sizeof(*new_value)); if (old_value == NULL) - printf("Setting the new value to %d\n", *new_value); + printf("Setting the new value to %lu\n", *new_value); else { ATF_REQUIRE_MSG(sysctlbyname(VM_MAX_WIRED, NULL, &old_len, new_value, new_len) == 0, @@ -60,14 +60,14 @@ vm_max_wired_sysctl(int *old_value, int *new_value) "sysctlbyname(%s) failed: %s", VM_MAX_WIRED, strerror(errno)); if (old_value != NULL) - printf("Saved the old value (%d)\n", *old_value); + printf("Saved the old value (%lu)\n", *old_value); } void -set_vm_max_wired(int new_value) +set_vm_max_wired(u_long new_value) { FILE *fp; - int old_value; + u_long old_value; fp = fopen(VM_MAX_WIRED, "w"); if (fp == NULL) { @@ -78,7 +78,7 @@ set_vm_max_wired(int new_value) vm_max_wired_sysctl(&old_value, NULL); - ATF_REQUIRE_MSG(fprintf(fp, "%d", old_value) > 0, + ATF_REQUIRE_MSG(fprintf(fp, "%lu", old_value) > 0, "saving %s failed", VM_MAX_WIRED); fclose(fp); @@ -90,7 +90,7 @@ void restore_vm_max_wired(void) { FILE *fp; - int saved_max_wired; + u_long saved_max_wired; fp = fopen(VM_MAX_WIRED, "r"); if (fp == NULL) { @@ -98,14 +98,14 @@ restore_vm_max_wired(void) return; } - if (fscanf(fp, "%d", &saved_max_wired) != 1) { + if (fscanf(fp, "%lu", &saved_max_wired) != 1) { perror("fscanf failed\n"); fclose(fp); return; } fclose(fp); - printf("old value in %s: %d\n", VM_MAX_WIRED, saved_max_wired); + printf("old value in %s: %lu\n", VM_MAX_WIRED, saved_max_wired); if (saved_max_wired == 0) /* This will cripple the test host */ return; diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index 85c23e5cd2f2..7d2f8fa14a94 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -2199,7 +2199,9 @@ MLINKS+=vm_map_lookup.9 vm_map_lookup_done.9 MLINKS+=vm_map_max.9 vm_map_min.9 \ vm_map_max.9 vm_map_pmap.9 MLINKS+=vm_map_stack.9 vm_map_growstack.9 -MLINKS+=vm_map_wire.9 vm_map_unwire.9 +MLINKS+=vm_map_wire.9 vm_map_wire_mapped.9 \ + vm_page_wire.9 vm_page_unwire.9 \ + vm_page_wire.9 vm_page_unwire_noq.9 MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \ vm_page_bits.9 vm_page_dirty.9 \ vm_page_bits.9 vm_page_is_valid.9 \ diff --git a/share/man/man9/vm_page_wire.9 b/share/man/man9/vm_page_wire.9 index 4722f71bdc6a..18f3a4194a19 100644 --- a/share/man/man9/vm_page_wire.9 +++ b/share/man/man9/vm_page_wire.9 @@ -26,12 +26,13 @@ .\" .\" $FreeBSD$ .\" -.Dd July 13, 2001 +.Dd June 3, 2019 .Dt VM_PAGE_WIRE 9 .Os .Sh NAME .Nm vm_page_wire , -.Nm vm_page_unwire +.Nm vm_page_unwire , +.Nm vm_page_unwire_noq .Nd "wire and unwire pages" .Sh SYNOPSIS .In sys/param.h @@ -39,29 +40,44 @@ .In vm/vm_page.h .Ft void .Fn vm_page_wire "vm_page_t m" +.Ft bool +.Fn vm_page_wire_mapped "vm_page_t m" .Ft void -.Fn vm_page_unwire "vm_page_t m" "int activate" +.Fn vm_page_unwire "vm_page_t m" "int queue" +.Ft bool +.Fn vm_page_unwire_noq "vm_page_t m" .Sh DESCRIPTION The .Fn vm_page_wire -function increments the wire count on a page, and removes it from -whatever queue it is on. +and +.Fn vm_page_wire_mapped +function wire the page, prevent it from being reclaimed by the page +daemon or when its containing object is destroyed. +Both functions require that the page belong to an object. +The +.Fn vm_page_wire_mapped +function is for use by the +.Xr pmap 9 +layer following a lookup. +This function may fail if mappings of the page are concurrently +being destroyed, in which case it will return false. .Pp The .Fn vm_page_unwire -function releases one of the wirings on the page. -When -.Va write_count -reaches zero the page is placed back onto either the active queue -(if -.Fa activate -is non-zero) or onto the inactive queue (if -.Fa activate -is zero). -If the page is unmanaged -.Dv ( PG_UNMANAGED -is set) then the page is left on -.Dv PQ_NONE . +and +.Fn vm_page_unwire_noq +functions release a wiring of a page. +The +.Fn vm_page_unwire +function takes a queue index and will insert the page into the +corresponding page queue upon releasing its last wiring. +If the page does not belong to an object and no other references +to the page exist, +.Fn vm_page_unwire +will free the page. +.Fn vm_page_unwire_noq +releases the wiring and returns true if it was the last wiring +of the page. .Sh AUTHORS This manual page was written by .An Chad David Aq Mt davidc@acns.ab.ca . diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 6a51696d8b41..bb5ea3f48c2b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -2546,31 +2546,23 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) m = NULL; PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); + PMAP_LOCK(pmap); -retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { - if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & - PG_PS_FRAME) | (va & PDRMASK), &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pde & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0) + m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | + (va & PDRMASK)); } else { pte = *pmap_pde_to_pte(pdep, va); - if ((pte & PG_V) && - ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, - &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pte & PG_V) != 0 && + ((pte & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0)) + m = PHYS_TO_VM_PAGE(pte & PG_FRAME); } - if (m != NULL) - vm_page_hold(m); + if (m != NULL && !vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -3690,7 +3682,7 @@ free_pv_chunk(struct pv_chunk *pc) /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } diff --git a/sys/amd64/sgx/sgx.c b/sys/amd64/sgx/sgx.c index d47d4a3596a5..3d45b60de3ef 100644 --- a/sys/amd64/sgx/sgx.c +++ b/sys/amd64/sgx/sgx.c @@ -358,7 +358,7 @@ sgx_page_remove(struct sgx_softc *sc, vm_page_t p) uint64_t offs; vm_page_lock(p); - vm_page_remove(p); + (void)vm_page_remove(p); vm_page_unlock(p); dprintf("%s: p->pidx %ld\n", __func__, p->pindex); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index c56b862fb1c1..5f845609009e 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -754,7 +754,8 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); - return (EFAULT); + return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : + EFAULT); } } @@ -1002,7 +1003,7 @@ vm_gpa_release(void *cookie) vm_page_t m = cookie; vm_page_lock(m); - vm_page_unhold(m); + vm_page_unwire(m, PQ_ACTIVE); vm_page_unlock(m); } diff --git a/sys/arm/arm/pmap-v4.c b/sys/arm/arm/pmap-v4.c index a1cfa1215fab..e1f411ccc832 100644 --- a/sys/arm/arm/pmap-v4.c +++ b/sys/arm/arm/pmap-v4.c @@ -3415,14 +3415,14 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; - vm_paddr_t pa, paddr; - vm_page_t m = NULL; + vm_paddr_t pa; + vm_page_t m; u_int l1idx; + l1idx = L1_IDX(va); - paddr = 0; + m = NULL; PMAP_LOCK(pmap); -retry: l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { /* @@ -3434,13 +3434,11 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); - if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) - goto retry; if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(pa); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - } else { /* * Note that we can't rely on the validity of the L1 @@ -3467,15 +3465,12 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); else pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); - if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) - goto retry; m = PHYS_TO_VM_PAGE(pa); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PMAP_UNLOCK(pmap); - PA_UNLOCK_COND(paddr); return (m); } diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 8a2c4c167f3e..1d82ebf48cb2 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -1986,23 +1986,20 @@ pmap_extract(pmap_t pmap, vm_offset_t va) vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { - vm_paddr_t pa, lockpa; + vm_paddr_t pa; pt1_entry_t pte1; pt2_entry_t pte2, *pte2p; vm_page_t m; - lockpa = 0; m = NULL; PMAP_LOCK(pmap); -retry: pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) { if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) { pa = pte1_pa(pte1) | (va & PTE1_OFFSET); - if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) - goto retry; m = PHYS_TO_VM_PAGE(pa); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } else if (pte1_is_link(pte1)) { pte2p = pmap_pte2(pmap, va); @@ -2011,13 +2008,11 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) if (pte2_is_valid(pte2) && (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) { pa = pte2_pa(pte2); - if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) - goto retry; m = PHYS_TO_VM_PAGE(pa); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(lockpa); PMAP_UNLOCK(pmap); return (m); } @@ -2973,7 +2968,7 @@ free_pv_chunk(struct pv_chunk *pc) /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); } @@ -6710,10 +6705,9 @@ pmap_pid_dump(int pid) pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pa); - printf("va: 0x%x, pa: 0x%x, h: %d, w:" - " %d, f: 0x%x", va, pa, - m->hold_count, m->wire_count, - m->flags); + printf("va: 0x%x, pa: 0x%x, w: %d, " + "f: 0x%x", va, pa, + m->wire_count, m->flags); npte2++; index++; if (index >= 2) { @@ -6823,8 +6817,8 @@ dump_link(pmap_t pmap, uint32_t pte1_idx, boolean_t invalid_ok) printf(" 0x%08X: 0x%08X, TEX%d, s:%d, g:%d, m:%p", va , pte2, pte2_class(pte2), !!(pte2 & PTE2_S), !(pte2 & PTE2_NG), m); if (m != NULL) { - printf(" v:%d h:%d w:%d f:0x%04X\n", m->valid, - m->hold_count, m->wire_count, m->flags); + printf(" v:%d w:%d f:0x%04X\n", m->valid, + m->wire_count, m->flags); } else { printf("\n"); } @@ -6933,8 +6927,8 @@ dump_pt2tab(pmap_t pmap) printf(" 0x%08X: 0x%08X, TEX%d, s:%d, m:%p", va, pte2, pte2_class(pte2), !!(pte2 & PTE2_S), m); if (m != NULL) - printf(" , h: %d, w: %d, f: 0x%04X pidx: %lld", - m->hold_count, m->wire_count, m->flags, m->pindex); + printf(" , w: %d, f: 0x%04X pidx: %lld", + m->wire_count, m->flags, m->pindex); printf("\n"); } } diff --git a/sys/arm/nvidia/drm2/tegra_bo.c b/sys/arm/nvidia/drm2/tegra_bo.c index 278d9758ee2e..1e721aacf36e 100644 --- a/sys/arm/nvidia/drm2/tegra_bo.c +++ b/sys/arm/nvidia/drm2/tegra_bo.c @@ -67,7 +67,7 @@ tegra_bo_destruct(struct tegra_bo *bo) cdev_pager_free_page(bo->cdev_pager, m); vm_page_lock(m); m->flags &= ~PG_FICTITIOUS; - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); vm_page_unlock(m); } diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 6b41dfd59f60..82351b140254 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1064,14 +1064,11 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *pte, tpte; vm_offset_t off; - vm_paddr_t pa; vm_page_t m; int lvl; - pa = 0; m = NULL; PMAP_LOCK(pmap); -retry: pte = pmap_pte(pmap, va, &lvl); if (pte != NULL) { tpte = pmap_load(pte); @@ -1096,14 +1093,11 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) default: off = 0; } - if (vm_page_pa_tryrelock(pmap, - (tpte & ~ATTR_MASK) | off, &pa)) - goto retry; m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 7de3c8d1f98f..0a20d30b5a6d 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -455,7 +455,7 @@ page_unbusy(vm_page_t pp) } static vm_page_t -page_hold(vnode_t *vp, int64_t start) +page_wire(vnode_t *vp, int64_t start) { vm_object_t obj; vm_page_t pp; @@ -481,10 +481,7 @@ page_hold(vnode_t *vp, int64_t start) } ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_lock(pp); - vm_page_hold(pp); - vm_page_unlock(pp); - + vm_page_wire(pp); } else pp = NULL; break; @@ -493,11 +490,11 @@ page_hold(vnode_t *vp, int64_t start) } static void -page_unhold(vm_page_t pp) +page_unwire(vm_page_t pp) { vm_page_lock(pp); - vm_page_unhold(pp); + vm_page_unwire(pp, PQ_ACTIVE); vm_page_unlock(pp); } @@ -647,7 +644,7 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) vm_page_t pp; uint64_t bytes = MIN(PAGESIZE - off, len); - if (pp = page_hold(vp, start)) { + if (pp = page_wire(vp, start)) { struct sf_buf *sf; caddr_t va; @@ -660,7 +657,7 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) #endif zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - page_unhold(pp); + page_unwire(pp); } else { zfs_vmobject_wunlock(obj); error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), diff --git a/sys/compat/linuxkpi/common/include/linux/gfp.h b/sys/compat/linuxkpi/common/include/linux/gfp.h index a20e3606ca05..49fefb4df610 100644 --- a/sys/compat/linuxkpi/common/include/linux/gfp.h +++ b/sys/compat/linuxkpi/common/include/linux/gfp.h @@ -52,12 +52,15 @@ #define __GFP_RETRY_MAYFAIL 0 #define __GFP_MOVABLE 0 #define __GFP_COMP 0 -#define __GFP_KSWAPD_RECLAIM 0 +#define __GFP_KSWAPD_RECLAIM 0 #define __GFP_IO 0 #define __GFP_NO_KSWAPD 0 #define __GFP_WAIT M_WAITOK #define __GFP_DMA32 (1U << 24) /* LinuxKPI only */ +#if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION == 50000 +#define __GFP_NOTWIRED (1U << 25) +#endif #define __GFP_BITS_SHIFT 25 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) #define __GFP_NOFAIL M_WAITOK @@ -74,7 +77,7 @@ #define GFP_TEMPORARY M_NOWAIT #define GFP_NATIVE_MASK (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_ZERO) #define GFP_TRANSHUGE 0 -#define GFP_TRANSHUGE_LIGHT 0 +#define GFP_TRANSHUGE_LIGHT 0 CTASSERT((__GFP_DMA32 & GFP_NATIVE_MASK) == 0); CTASSERT((__GFP_BITS_MASK & GFP_NATIVE_MASK) == GFP_NATIVE_MASK); @@ -98,6 +101,9 @@ static inline struct page * alloc_page(gfp_t flags) { +#ifdef __GFP_NOTWIRED + flags |= __GFP_NOTWIRED; +#endif return (linux_alloc_pages(flags, 0)); } @@ -105,6 +111,9 @@ static inline struct page * alloc_pages(gfp_t flags, unsigned int order) { +#ifdef __GFP_NOTWIRED + flags |= __GFP_NOTWIRED; +#endif return (linux_alloc_pages(flags, order)); } @@ -112,6 +121,9 @@ static inline struct page * alloc_pages_node(int node_id, gfp_t flags, unsigned int order) { +#ifdef __GFP_NOTWIRED + flags |= __GFP_NOTWIRED; +#endif return (linux_alloc_pages(flags, order)); } diff --git a/sys/compat/linuxkpi/common/include/linux/mm.h b/sys/compat/linuxkpi/common/include/linux/mm.h index a9168416b286..a194929d670c 100644 --- a/sys/compat/linuxkpi/common/include/linux/mm.h +++ b/sys/compat/linuxkpi/common/include/linux/mm.h @@ -227,9 +227,7 @@ mark_page_accessed(struct vm_page *page) static inline void get_page(struct vm_page *page) { - vm_page_lock(page); vm_page_wire(page); - vm_page_unlock(page); } extern long @@ -251,8 +249,7 @@ static inline void put_page(struct vm_page *page) { vm_page_lock(page); - if (vm_page_unwire(page, PQ_ACTIVE) && page->object == NULL) - vm_page_free(page); + vm_page_unwire(page, PQ_ACTIVE); vm_page_unlock(page); } diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index 59ed47c175b4..d5070caf7006 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -91,9 +91,14 @@ linux_alloc_pages(gfp_t flags, unsigned int order) if (PMAP_HAS_DMAP) { unsigned long npages = 1UL << order; - int req = (flags & M_ZERO) ? (VM_ALLOC_ZERO | VM_ALLOC_NOOBJ | - VM_ALLOC_NORMAL) : (VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL); - + int req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_NORMAL; + +#ifdef __GFP_NOTWIRED + if ((flags & __GFP_NOTWIRED) != 0) + req &= ~VM_ALLOC_WIRED; +#endif + if ((flags & M_ZERO) != 0) + req |= VM_ALLOC_ZERO; if (order == 0 && (flags & GFP_DMA32) == 0) { page = vm_page_alloc(NULL, 0, req); if (page == NULL) @@ -153,9 +158,8 @@ linux_free_pages(vm_page_t page, unsigned int order) for (x = 0; x != npages; x++) { vm_page_t pgo = page + x; - vm_page_lock(pgo); - vm_page_free(pgo); - vm_page_unlock(pgo); + if (vm_page_unwire_noq(pgo)) + vm_page_free(pgo); } } else { vm_offset_t vaddr; @@ -196,23 +200,11 @@ linux_get_user_pages_internal(vm_map_t map, unsigned long start, int nr_pages, vm_prot_t prot; size_t len; int count; - int i; prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; len = ((size_t)nr_pages) << PAGE_SHIFT; count = vm_fault_quick_hold_pages(map, start, len, prot, pages, nr_pages); - if (count == -1) - return (-EFAULT); - - for (i = 0; i != nr_pages; i++) { - struct page *pg = pages[i]; - - vm_page_lock(pg); - vm_page_wire(pg); - vm_page_unhold(pg); - vm_page_unlock(pg); - } - return (nr_pages); + return (count == -1 ? -EFAULT : nr_pages); } int @@ -242,11 +234,6 @@ __get_user_pages_fast(unsigned long start, int nr_pages, int write, if (*mp == NULL) break; - vm_page_lock(*mp); - vm_page_wire(*mp); - vm_page_unhold(*mp); - vm_page_unlock(*mp); - if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* @@ -311,7 +298,7 @@ linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp) rv = vm_pager_get_pages(obj, &page, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(page); - vm_page_unwire(page, PQ_NONE); + vm_page_unwire_noq(page); vm_page_free(page); vm_page_unlock(page); VM_OBJECT_WUNLOCK(obj); diff --git a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c index 86e63d58aabe..fa46832ead2a 100644 --- a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c +++ b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c @@ -378,8 +378,7 @@ static void pagelist_page_free(vm_page_t pp) { vm_page_lock(pp); - if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL) - vm_page_free(pp); + vm_page_unwire(pp, PQ_INACTIVE); vm_page_unlock(pp); } @@ -474,13 +473,6 @@ create_pagelist(char __user *buf, size_t count, unsigned short type, return (-ENOMEM); } - for (i = 0; i < actual_pages; i++) { - vm_page_lock(pages[i]); - vm_page_wire(pages[i]); - vm_page_unhold(pages[i]); - vm_page_unlock(pages[i]); - } - pagelist->length = count; pagelist->type = type; pagelist->offset = offset; diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index 9660ae83c806..6dd67c240567 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -112,15 +112,12 @@ free_pageset(struct tom_data *td, struct pageset *ps) if (ps->prsv.prsv_nppods > 0) t4_free_page_pods(&ps->prsv); - if (ps->flags & PS_WIRED) { - for (i = 0; i < ps->npages; i++) { - p = ps->pages[i]; - vm_page_lock(p); - vm_page_unwire(p, PQ_INACTIVE); - vm_page_unlock(p); - } - } else - vm_page_unhold_pages(ps->pages, ps->npages); + for (i = 0; i < ps->npages; i++) { + p = ps->pages[i]; + vm_page_lock(p); + vm_page_unwire(p, PQ_INACTIVE); + vm_page_unlock(p); + } mtx_lock(&ddp_orphan_pagesets_lock); TAILQ_INSERT_TAIL(&ddp_orphan_pagesets, ps, link); taskqueue_enqueue(taskqueue_thread, &ddp_orphan_task); @@ -150,7 +147,7 @@ recycle_pageset(struct toepcb *toep, struct pageset *ps) { DDP_ASSERT_LOCKED(toep); - if (!(toep->ddp.flags & DDP_DEAD) && ps->flags & PS_WIRED) { + if (!(toep->ddp.flags & DDP_DEAD)) { KASSERT(toep->ddp.cached_count + toep->ddp.active_count < nitems(toep->ddp.db), ("too many wired pagesets")); TAILQ_INSERT_HEAD(&toep->ddp.cached_pagesets, ps, link); @@ -1190,35 +1187,14 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid, return (0); } -static void -wire_pageset(struct pageset *ps) -{ - vm_page_t p; - int i; - - KASSERT(!(ps->flags & PS_WIRED), ("pageset already wired")); - - for (i = 0; i < ps->npages; i++) { - p = ps->pages[i]; - vm_page_lock(p); - vm_page_wire(p); - vm_page_unhold(p); - vm_page_unlock(p); - } - ps->flags |= PS_WIRED; -} - /* - * Prepare a pageset for DDP. This wires the pageset and sets up page - * pods. + * Prepare a pageset for DDP. This sets up page pods. */ static int prep_pageset(struct adapter *sc, struct toepcb *toep, struct pageset *ps) { struct tom_data *td = sc->tom_softc; - if (!(ps->flags & PS_WIRED)) - wire_pageset(ps); if (ps->prsv.prsv_nppods == 0 && !t4_alloc_page_pods_for_ps(&td->pr, ps)) { return (0); diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h index f32091755b95..44cb3b699cae 100644 --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -124,8 +124,7 @@ struct pageset { TAILQ_HEAD(pagesetq, pageset); -#define PS_WIRED 0x0001 /* Pages wired rather than held. */ -#define PS_PPODS_WRITTEN 0x0002 /* Page pods written to the card. */ +#define PS_PPODS_WRITTEN 0x0001 /* Page pods written to the card. */ #define EXT_FLAG_AIOTX EXT_FLAG_VENDOR1 diff --git a/sys/dev/drm2/ttm/ttm_page_alloc.c b/sys/dev/drm2/ttm/ttm_page_alloc.c index 4a5b9ed37d97..8895338797db 100644 --- a/sys/dev/drm2/ttm/ttm_page_alloc.c +++ b/sys/dev/drm2/ttm/ttm_page_alloc.c @@ -136,7 +136,7 @@ ttm_vm_page_free(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("ttm got unmanaged %p", m)); m->flags &= ~PG_FICTITIOUS; m->oflags |= VPO_UNMANAGED; - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } diff --git a/sys/dev/ti/if_ti.c b/sys/dev/ti/if_ti.c index 1aa9dd3ecbe1..b25eb64d2479 100644 --- a/sys/dev/ti/if_ti.c +++ b/sys/dev/ti/if_ti.c @@ -1623,7 +1623,7 @@ ti_newbuf_jumbo(struct ti_softc *sc, int idx, struct mbuf *m_old) } sf[i] = sf_buf_alloc(frame, SFB_NOWAIT); if (sf[i] == NULL) { - vm_page_unwire(frame, PQ_NONE); + vm_page_unwire_noq(frame); vm_page_free(frame); device_printf(sc->ti_dev, "buffer allocation " "failed -- packet dropped!\n"); diff --git a/sys/dev/xen/gntdev/gntdev.c b/sys/dev/xen/gntdev/gntdev.c index bf6685cf6ca5..ed42e177b860 100644 --- a/sys/dev/xen/gntdev/gntdev.c +++ b/sys/dev/xen/gntdev/gntdev.c @@ -278,7 +278,7 @@ gref_list_dtor(struct cleanup_data_struct *cleanup_data) continue; gnttab_free_grant_reference(gref->gref_id); } - vm_page_unwire(gref->page, PQ_NONE); + vm_page_unwire_noq(gref->page); vm_page_free(gref->page); gref->page = NULL; } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 2aee12d7f265..61a97e6019fb 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -1685,35 +1685,24 @@ __CONCAT(PMTYPE, extract_and_hold)(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pd_entry_t pde; pt_entry_t pte; vm_page_t m; - vm_paddr_t pa; - pa = 0; m = NULL; PMAP_LOCK(pmap); -retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { - if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & - PG_PS_FRAME) | (va & PDRMASK), &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) + m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | + (va & PDRMASK)); } else { pte = pmap_pte_ufast(pmap, va, pde); if (pte != 0 && - ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, - &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) + m = PHYS_TO_VM_PAGE(pte & PG_FRAME); } - if (m != NULL) - vm_page_hold(m); + if (m != NULL && !vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -2465,7 +2454,7 @@ free_pv_chunk(struct pv_chunk *pc) /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 61ec0507e855..5ee14e255efc 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -978,9 +978,7 @@ exec_map_first_page(struct image_params *imgp) if (ma[0]->valid != VM_PAGE_BITS_ALL) { vm_page_xbusy(ma[0]); if (!vm_pager_has_page(object, 0, NULL, &after)) { - vm_page_lock(ma[0]); vm_page_free(ma[0]); - vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); return (EIO); } @@ -1004,11 +1002,8 @@ exec_map_first_page(struct image_params *imgp) initial_pagein = i; rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL); if (rv != VM_PAGER_OK) { - for (i = 0; i < initial_pagein; i++) { - vm_page_lock(ma[i]); + for (i = 0; i < initial_pagein; i++) vm_page_free(ma[i]); - vm_page_unlock(ma[i]); - } VM_OBJECT_WUNLOCK(object); return (EIO); } @@ -1016,10 +1011,7 @@ exec_map_first_page(struct image_params *imgp) for (i = 1; i < initial_pagein; i++) vm_page_readahead_finish(ma[i]); } - vm_page_lock(ma[0]); - vm_page_hold(ma[0]); - vm_page_activate(ma[0]); - vm_page_unlock(ma[0]); + vm_page_wire(ma[0]); VM_OBJECT_WUNLOCK(object); imgp->firstpage = sf_buf_alloc(ma[0], 0); @@ -1038,7 +1030,7 @@ exec_unmap_first_page(struct image_params *imgp) sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_lock(m); - vm_page_unhold(m); + vm_page_unwire(m, PQ_ACTIVE); vm_page_unlock(m); } } diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index 50404ce57458..85fa7fc9ff63 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -119,76 +119,20 @@ sfstat_sysctl(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); -/* - * Detach mapped page and release resources back to the system. Called - * by mbuf(9) code when last reference to a page is freed. - */ -static void -sendfile_free_page(vm_page_t pg, bool nocache) -{ - bool freed; - - vm_page_lock(pg); - /* - * In either case check for the object going away on us. This can - * happen since we don't hold a reference to it. If so, we're - * responsible for freeing the page. In 'noncache' case try to free - * the page, but only if it is cheap to. - */ - if (vm_page_unwire_noq(pg)) { - vm_object_t obj; - - if ((obj = pg->object) == NULL) - vm_page_free(pg); - else { - freed = false; - if (nocache && !vm_page_xbusied(pg) && - VM_OBJECT_TRYWLOCK(obj)) { - /* Only free unmapped pages. */ - if (obj->ref_count == 0 || - !pmap_page_is_mapped(pg)) - /* - * The busy test before the object is - * locked cannot be relied upon. - */ - freed = vm_page_try_to_free(pg); - VM_OBJECT_WUNLOCK(obj); - } - if (!freed) { - /* - * If we were asked to not cache the page, place - * it near the head of the inactive queue so - * that it is reclaimed sooner. Otherwise, - * maintain LRU. - */ - if (nocache) - vm_page_deactivate_noreuse(pg); - else if (vm_page_active(pg)) - vm_page_reference(pg); - else - vm_page_deactivate(pg); - } - } - } - vm_page_unlock(pg); -} - static void sendfile_free_mext(struct mbuf *m) { struct sf_buf *sf; vm_page_t pg; - bool nocache; KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF, ("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m)); sf = m->m_ext.ext_arg1; pg = sf_buf_page(sf); - nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE; sf_buf_free(sf); - sendfile_free_page(pg, nocache); + vm_page_release(pg, (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0); if (m->m_ext.ext_flags & EXT_FLAG_SYNC) { struct sendfile_sync *sfs = m->m_ext.ext_arg2; diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index 4ea64fa0ec07..a1f81618119b 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -306,7 +306,7 @@ proc_rwmem(struct proc *p, struct uio *uio) * Release the page. */ vm_page_lock(m); - vm_page_unhold(m); + vm_page_unwire(m, PQ_ACTIVE); vm_page_unlock(m); } while (error == 0 && uio->uio_resid > 0); diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index fa169a52a2e2..9596b2b3e240 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -201,9 +201,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", obj, idx, m->valid, rv); - vm_page_lock(m); vm_page_free(m); - vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); return (EIO); } @@ -211,13 +209,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) vm_page_zero_invalid(m, TRUE); vm_page_xunbusy(m); } - vm_page_lock(m); - vm_page_hold(m); - if (vm_page_active(m)) - vm_page_reference(m); - else - vm_page_activate(m); - vm_page_unlock(m); + vm_page_wire(m); VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); if (uio->uio_rw == UIO_WRITE && error == 0) { @@ -227,7 +219,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) VM_OBJECT_WUNLOCK(obj); } vm_page_lock(m); - vm_page_unhold(m); + vm_page_unwire(m, PQ_ACTIVE); vm_page_unlock(m); return (error); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 3f5c72919f27..2530973a8f86 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -2893,47 +2893,6 @@ vfs_vmio_iodone(struct buf *bp) } } -/* - * Unwire a page held by a buf and either free it or update the page queues to - * reflect its recent use. - */ -static void -vfs_vmio_unwire(struct buf *bp, vm_page_t m) -{ - bool freed; - - vm_page_lock(m); - if (vm_page_unwire_noq(m)) { - if ((bp->b_flags & B_DIRECT) != 0) - freed = vm_page_try_to_free(m); - else - freed = false; - if (!freed) { - /* - * Use a racy check of the valid bits to determine - * whether we can accelerate reclamation of the page. - * The valid bits will be stable unless the page is - * being mapped or is referenced by multiple buffers, - * and in those cases we expect races to be rare. At - * worst we will either accelerate reclamation of a - * valid page and violate LRU, or unnecessarily defer - * reclamation of an invalid page. - * - * The B_NOREUSE flag marks data that is not expected to - * be reused, so accelerate reclamation in that case - * too. Otherwise, maintain LRU. - */ - if (m->valid == 0 || (bp->b_flags & B_NOREUSE) != 0) - vm_page_deactivate_noreuse(m); - else if (vm_page_active(m)) - vm_page_reference(m); - else - vm_page_deactivate(m); - } - } - vm_page_unlock(m); -} - /* * Perform page invalidation when a buffer is released. The fully invalid * pages will be reclaimed later in vfs_vmio_truncate(). @@ -2983,7 +2942,8 @@ vfs_vmio_invalidate(struct buf *bp) } if (pmap_page_wired_mappings(m) == 0) vm_page_set_invalid(m, poffset, presid); - vfs_vmio_unwire(bp, m); + vm_page_release_locked(m, + (bp->b_flags & (B_NOREUSE | B_DIRECT)) != 0); resid -= presid; poffset = 0; } @@ -3021,7 +2981,10 @@ vfs_vmio_truncate(struct buf *bp, int desiredpages) m = bp->b_pages[i]; KASSERT(m != bogus_page, ("allocbuf: bogus page found")); bp->b_pages[i] = NULL; - vfs_vmio_unwire(bp, m); + if (obj != NULL) + vm_page_release_locked(m, true); + else + vm_page_release(m, (bp->b_flags & B_NOREUSE) != 0); } if (obj != NULL) VM_OBJECT_WUNLOCK(obj); diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index a0de635ff396..235a18488400 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -795,26 +795,22 @@ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t pte, *ptep; - vm_paddr_t pa, pte_pa; + vm_paddr_t pa; vm_page_t m; m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: ptep = pmap_pte(pmap, va); if (ptep != NULL) { pte = *ptep; if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) || (prot & VM_PROT_WRITE) == 0)) { - pte_pa = TLBLO_PTE_TO_PA(pte); - if (vm_page_pa_tryrelock(pmap, pte_pa, &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pte_pa); - vm_page_hold(m); + pa = TLBLO_PTE_TO_PA(pte); + m = PHYS_TO_VM_PAGE(pa); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1553,7 +1549,7 @@ free_pv_chunk(struct pv_chunk *pc) /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } diff --git a/sys/net/bpf_zerocopy.c b/sys/net/bpf_zerocopy.c index 7fb7f636eb93..ba852218561f 100644 --- a/sys/net/bpf_zerocopy.c +++ b/sys/net/bpf_zerocopy.c @@ -116,8 +116,7 @@ zbuf_page_free(vm_page_t pp) { vm_page_lock(pp); - if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL) - vm_page_free(pp); + vm_page_unwire(pp, PQ_INACTIVE); vm_page_unlock(pp); } @@ -166,10 +165,6 @@ zbuf_sfbuf_get(struct vm_map *map, vm_offset_t uaddr) if (vm_fault_quick_hold_pages(map, uaddr, PAGE_SIZE, VM_PROT_READ | VM_PROT_WRITE, &pp, 1) < 0) return (NULL); - vm_page_lock(pp); - vm_page_wire(pp); - vm_page_unhold(pp); - vm_page_unlock(pp); sf = sf_buf_alloc(pp, SFB_NOWAIT); if (sf == NULL) { zbuf_page_free(pp); diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index d4c88eb8941f..186bdccbb069 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -1262,22 +1262,17 @@ moea_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; - vm_paddr_t pa; m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) && ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa)) - goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index bcf9893d9e99..33fb38b8a5db 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1553,21 +1553,15 @@ moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; - vm_paddr_t pa; m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) { - if (vm_page_pa_tryrelock(pmap, - pvo->pvo_pte.pa & LPTE_RPGN, &pa)) - goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 009dff7d5af6..1172950cf540 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -2934,12 +2934,9 @@ mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, pte_t *pte; vm_page_t m; uint32_t pte_wbit; - vm_paddr_t pa; - + m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) { if (pmap == kernel_pmap) @@ -2948,14 +2945,11 @@ mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, pte_wbit = PTE_UW; if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) - goto retry; m = PHYS_TO_VM_PAGE(PTE_PA(pte)); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index eeeb48bb4972..1d9998137b5a 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -867,24 +867,19 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *l3p, l3; vm_paddr_t phys; - vm_paddr_t pa; vm_page_t m; - pa = 0; m = NULL; PMAP_LOCK(pmap); -retry: l3p = pmap_l3(pmap, va); if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { if ((l3 & PTE_W) != 0 || (prot & VM_PROT_WRITE) == 0) { phys = PTE_TO_PHYS(l3); - if (vm_page_pa_tryrelock(pmap, phys, &pa)) - goto retry; m = PHYS_TO_VM_PAGE(phys); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1646,7 +1641,7 @@ free_pv_chunk(struct pv_chunk *pc) /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index a469d6f26c07..40b5806f6c0e 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -847,19 +847,15 @@ pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot) { struct tte *tp; vm_page_t m; - vm_paddr_t pa; m = NULL; - pa = 0; PMAP_LOCK(pm); -retry: if (pm == kernel_pmap) { if (va >= VM_MIN_DIRECT_ADDRESS) { tp = NULL; m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va)); - (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va), - &pa); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } else { tp = tsb_kvtotte(va); if ((tp->tte_data & TD_V) == 0) @@ -869,12 +865,10 @@ pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot) tp = tsb_tte_lookup(pm, va); if (tp != NULL && ((tp->tte_data & TD_SW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa)) - goto retry; m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp)); - vm_page_hold(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pm); return (m); } diff --git a/sys/sys/param.h b/sys/sys/param.h index fe51b414ca66..04d10e00c722 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -60,7 +60,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1300024 /* Master, propagated to newvers */ +#define __FreeBSD_version 1300025 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h index 579d16756e99..3714a06983bf 100644 --- a/sys/sys/vmmeter.h +++ b/sys/sys/vmmeter.h @@ -153,6 +153,8 @@ extern domainset_t vm_severe_domains; #define VM_CNT_INC(var) VM_CNT_ADD(var, 1) #define VM_CNT_FETCH(var) counter_u64_fetch(vm_cnt.var) +extern u_long vm_user_wire_count; + static inline void vm_wire_add(int cnt) { diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index c27d60869def..df74fec3b72b 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -235,9 +235,7 @@ cdev_pager_free_page(vm_object_t object, vm_page_t m) if (object->type == OBJT_MGTDEVICE) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m)); pmap_remove_all(m); - vm_page_lock(m); - vm_page_remove(m); - vm_page_unlock(m); + (void)vm_page_remove(m); } else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); } diff --git a/sys/vm/memguard.c b/sys/vm/memguard.c index be926a3dc842..b1c17fb161e8 100644 --- a/sys/vm/memguard.c +++ b/sys/vm/memguard.c @@ -259,7 +259,7 @@ v2sizep(vm_offset_t va) if (pa == 0) panic("MemGuard detected double-free of %p", (void *)va); p = PHYS_TO_VM_PAGE(pa); - KASSERT(p->wire_count != 0 && p->queue == PQ_NONE, + KASSERT(vm_page_wired(p) && p->queue == PQ_NONE, ("MEMGUARD: Expected wired page %p in vtomgfifo!", p)); return (&p->plinks.memguard.p); } @@ -274,7 +274,7 @@ v2sizev(vm_offset_t va) if (pa == 0) panic("MemGuard detected double-free of %p", (void *)va); p = PHYS_TO_VM_PAGE(pa); - KASSERT(p->wire_count != 0 && p->queue == PQ_NONE, + KASSERT(vm_page_wired(p) && p->queue == PQ_NONE, ("MEMGUARD: Expected wired page %p in vtomgfifo!", p)); return (&p->plinks.memguard.v); } diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 851398010ea7..c6b514fe0fde 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1685,7 +1685,7 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) vm_page_dirty(m); #ifdef INVARIANTS vm_page_lock(m); - if (m->wire_count == 0 && m->queue == PQ_NONE) + if (!vm_page_wired(m) && m->queue == PQ_NONE) panic("page %p is neither wired nor queued", m); vm_page_unlock(m); #endif diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index f72f8f0cf7a9..1954e5c86e9a 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1274,9 +1274,9 @@ pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, zkva += PAGE_SIZE; } return ((void*)addr); - fail: +fail: TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) { - vm_page_unwire(p, PQ_NONE); + vm_page_unwire_noq(p); vm_page_free(p); } return (NULL); @@ -1326,7 +1326,7 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, * exit. */ TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) { - vm_page_unwire(p, PQ_NONE); + vm_page_unwire_noq(p); vm_page_free(p); } return (NULL); @@ -1387,7 +1387,7 @@ pcpu_page_free(void *mem, vm_size_t size, uint8_t flags) for (curva = sva; curva < sva + size; curva += PAGE_SIZE) { paddr = pmap_kextract(curva); m = PHYS_TO_VM_PAGE(paddr); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } pmap_qremove(sva, size >> PAGE_SHIFT); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index eb5cb060c9c5..9effe66d50c4 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -250,18 +251,6 @@ vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot, vm_pager_page_unswapped(m); } -static void -vm_fault_fill_hold(vm_page_t *m_hold, vm_page_t m) -{ - - if (m_hold != NULL) { - *m_hold = m; - vm_page_lock(m); - vm_page_hold(m); - vm_page_unlock(m); - } -} - /* * Unlocks fs.first_object and fs.map on success. */ @@ -322,7 +311,10 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind); if (rv != KERN_SUCCESS) return (rv); - vm_fault_fill_hold(m_hold, m); + if (m_hold != NULL) { + *m_hold = m; + vm_page_wire(m); + } vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false); if (psind == 0 && !wired) vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); @@ -498,14 +490,15 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type, VM_OBJECT_WLOCK(fs->first_object); m_mtx = NULL; for (i = 0; i < npages; i++) { - vm_page_change_lock(&m[i], &m_mtx); - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE) != 0) { vm_page_wire(&m[i]); - else + } else { + vm_page_change_lock(&m[i], &m_mtx); vm_page_activate(&m[i]); + } if (m_hold != NULL && m[i].pindex == fs->first_pindex) { *m_hold = &m[i]; - vm_page_hold(&m[i]); + vm_page_wire(&m[i]); } vm_page_xunbusy_maybelocked(&m[i]); } @@ -563,6 +556,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, struct faultstate fs; struct vnode *vp; struct domainset *dset; + struct mtx *mtx; vm_object_t next_object, retry_object; vm_offset_t e_end, e_start; vm_pindex_t retry_pindex; @@ -1004,7 +998,7 @@ RetryFault:; */ if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { vm_page_lock(fs.m); - if (fs.m->wire_count == 0) + if (!vm_page_wired(fs.m)) vm_page_free(fs.m); else vm_page_xunbusy_maybelocked(fs.m); @@ -1027,7 +1021,7 @@ RetryFault:; */ if (fs.object != fs.first_object) { vm_page_lock(fs.m); - if (fs.m->wire_count == 0) + if (!vm_page_wired(fs.m)) vm_page_free(fs.m); else vm_page_xunbusy_maybelocked(fs.m); @@ -1142,15 +1136,25 @@ RetryFault:; * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { - vm_page_lock(fs.m); - vm_page_dequeue(fs.m); - vm_page_remove(fs.m); - vm_page_unlock(fs.m); - vm_page_lock(fs.first_m); + + /* + * Keep the page wired to ensure that it is not + * freed by another thread, such as the page + * daemon, while it is disassociated from an + * object. + */ + vm_page_wire(fs.m); + + mtx = NULL; + vm_page_change_lock(fs.m, &mtx); + (void)vm_page_remove(fs.m); + vm_page_change_lock(fs.first_m, &mtx); vm_page_replace_checked(fs.m, fs.first_object, fs.first_pindex, fs.first_m); vm_page_free(fs.first_m); - vm_page_unlock(fs.first_m); + vm_page_change_lock(fs.m, &mtx); + vm_page_unwire(fs.m, PQ_ACTIVE); + mtx_unlock(mtx); vm_page_dirty(fs.m); #if VM_NRESERVLEVEL > 0 /* @@ -1176,10 +1180,8 @@ RetryFault:; fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_WIRE) == 0) { - vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); - vm_page_unlock(fs.first_m); - + vm_page_lock(fs.m); vm_page_unwire(fs.m, PQ_INACTIVE); vm_page_unlock(fs.m); @@ -1315,21 +1317,22 @@ RetryFault:; faultcount > 0 ? behind : PFBAK, faultcount > 0 ? ahead : PFFOR, false); VM_OBJECT_WLOCK(fs.object); - vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE) != 0) { vm_page_wire(fs.m); - else + } else { + vm_page_lock(fs.m); vm_page_activate(fs.m); + vm_page_unlock(fs.m); + } if (m_hold != NULL) { *m_hold = fs.m; - vm_page_hold(fs.m); + vm_page_wire(fs.m); } - vm_page_unlock(fs.m); vm_page_xunbusy(fs.m); /* @@ -1600,7 +1603,7 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, for (mp = ma; mp < ma + count; mp++) if (*mp != NULL) { vm_page_lock(*mp); - vm_page_unhold(*mp); + vm_page_unwire(*mp, PQ_INACTIVE); vm_page_unlock(*mp); } return (-1); @@ -1801,11 +1804,9 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, vm_page_lock(src_m); vm_page_unwire(src_m, PQ_INACTIVE); vm_page_unlock(src_m); - vm_page_lock(dst_m); vm_page_wire(dst_m); - vm_page_unlock(dst_m); } else { - KASSERT(dst_m->wire_count > 0, + KASSERT(vm_page_wired(dst_m), ("dst_m %p is not wired", dst_m)); } } else { diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index fcffcd3bc34d..3b57ce842c33 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -181,21 +181,8 @@ vslock(void *addr, size_t len) if (last < (vm_offset_t)addr || end < (vm_offset_t)addr) return (EINVAL); npages = atop(end - start); - if (npages > vm_page_max_wired) + if (npages > vm_page_max_user_wired) return (ENOMEM); -#if 0 - /* - * XXX - not yet - * - * The limit for transient usage of wired pages should be - * larger than for "permanent" wired pages (mlock()). - * - * Also, the sysctl code, which is the only present user - * of vslock(), does a hard loop on EAGAIN. - */ - if (npages + vm_wire_count() > vm_page_max_wired) - return (EAGAIN); -#endif error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end, VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); if (error == KERN_SUCCESS) { @@ -236,12 +223,14 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) VM_OBJECT_WLOCK(object); pindex = OFF_TO_IDX(offset); - m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); + m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | + VM_ALLOC_WIRED); if (m->valid != VM_PAGE_BITS_ALL) { vm_page_xbusy(m); rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(m); + vm_page_unwire_noq(m); vm_page_free(m); vm_page_unlock(m); m = NULL; @@ -249,10 +238,6 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) } vm_page_xunbusy(m); } - vm_page_lock(m); - vm_page_hold(m); - vm_page_activate(m); - vm_page_unlock(m); out: VM_OBJECT_WUNLOCK(object); return (m); @@ -286,7 +271,7 @@ vm_imgact_unmap_page(struct sf_buf *sf) sf_buf_free(sf); sched_unpin(); vm_page_lock(m); - vm_page_unhold(m); + vm_page_unwire(m, PQ_ACTIVE); vm_page_unlock(m); } @@ -420,10 +405,8 @@ vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t ks, int pages) m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_dispose: kstack already missing?"); - vm_page_lock(m); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(ksobj); vm_object_deallocate(ksobj); diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index ac87a40a3b81..c382428e63df 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -586,7 +586,7 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) #endif for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); - vm_page_unwire(m, PQ_NONE); + vm_page_unwire_noq(m); vm_page_free(m); } VM_OBJECT_WUNLOCK(object); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 5371ed969c60..c77429f41554 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -2917,12 +2918,12 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, if (rv == KERN_SUCCESS && (!user_unwire || (entry->eflags & MAP_ENTRY_USER_WIRED))) { - if (user_unwire) - entry->eflags &= ~MAP_ENTRY_USER_WIRED; if (entry->wired_count == 1) vm_map_entry_unwire(map, entry); else entry->wired_count--; + if (user_unwire) + entry->eflags &= ~MAP_ENTRY_USER_WIRED; } KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, ("vm_map_unwire: in-transition flag missing %p", entry)); @@ -2942,6 +2943,28 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, return (rv); } +static void +vm_map_wire_user_count_sub(u_long npages) +{ + + atomic_subtract_long(&vm_user_wire_count, npages); +} + +static bool +vm_map_wire_user_count_add(u_long npages) +{ + u_long wired; + + wired = vm_user_wire_count; + do { + if (npages + wired > vm_page_max_user_wired) + return (false); + } while (!atomic_fcmpset_long(&vm_user_wire_count, &wired, + npages + wired)); + + return (true); +} + /* * vm_map_wire_entry_failure: * @@ -2978,37 +3001,49 @@ vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, entry->wired_count = -1; } +int +vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) +{ + int rv; + + vm_map_lock(map); + rv = vm_map_wire_locked(map, start, end, flags); + vm_map_unlock(map); + return (rv); +} + + /* - * vm_map_wire: + * vm_map_wire_locked: * - * Implements both kernel and user wiring. + * Implements both kernel and user wiring. Returns with the map locked, + * the map lock may be dropped. */ int -vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, - int flags) +vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) { vm_map_entry_t entry, first_entry, tmp_entry; vm_offset_t faddr, saved_end, saved_start; - unsigned int last_timestamp; + u_long npages; + u_int last_timestamp; int rv; boolean_t need_wakeup, result, user_wire; vm_prot_t prot; + VM_MAP_ASSERT_LOCKED(map); + if (start == end) return (KERN_SUCCESS); prot = 0; if (flags & VM_MAP_WIRE_WRITE) prot |= VM_PROT_WRITE; user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; - vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &first_entry)) { if (flags & VM_MAP_WIRE_HOLESOK) first_entry = first_entry->next; - else { - vm_map_unlock(map); + else return (KERN_INVALID_ADDRESS); - } } last_timestamp = map->timestamp; entry = first_entry; @@ -3042,7 +3077,6 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, /* * first_entry has been deleted. */ - vm_map_unlock(map); return (KERN_INVALID_ADDRESS); } end = saved_start; @@ -3082,13 +3116,22 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, } if (entry->wired_count == 0) { entry->wired_count++; - saved_start = entry->start; - saved_end = entry->end; + + npages = atop(entry->end - entry->start); + if (user_wire && !vm_map_wire_user_count_add(npages)) { + vm_map_wire_entry_failure(map, entry, + entry->start); + end = entry->end; + rv = KERN_RESOURCE_SHORTAGE; + goto done; + } /* * Release the map lock, relying on the in-transition * mark. Mark the map busy for fork. */ + saved_start = entry->start; + saved_end = entry->end; vm_map_busy(map); vm_map_unlock(map); @@ -3136,6 +3179,8 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, last_timestamp = map->timestamp; if (rv != KERN_SUCCESS) { vm_map_wire_entry_failure(map, entry, faddr); + if (user_wire) + vm_map_wire_user_count_sub(npages); end = entry->end; goto done; } @@ -3201,9 +3246,12 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, * Undo the wiring. Wiring succeeded on this entry * but failed on a later entry. */ - if (entry->wired_count == 1) + if (entry->wired_count == 1) { vm_map_entry_unwire(map, entry); - else + if (user_wire) + vm_map_wire_user_count_sub( + atop(entry->end - entry->start)); + } else entry->wired_count--; } next_entry_done: @@ -3220,7 +3268,6 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, } vm_map_simplify_entry(map, entry); } - vm_map_unlock(map); if (need_wakeup) vm_map_wakeup(map); return (rv); @@ -3338,13 +3385,18 @@ vm_map_sync( static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) { + vm_size_t size; VM_MAP_ASSERT_LOCKED(map); KASSERT(entry->wired_count > 0, ("vm_map_entry_unwire: entry %p isn't wired", entry)); + + size = entry->end - entry->start; + if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) + vm_map_wire_user_count_sub(atop(size)); pmap_unwire(map->pmap, entry->start, entry->end); - vm_object_unwire(entry->object.vm_object, entry->offset, entry->end - - entry->start, PQ_ACTIVE); + vm_object_unwire(entry->object.vm_object, entry->offset, size, + PQ_ACTIVE); entry->wired_count = 0; } @@ -4311,12 +4363,11 @@ vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry) * Heed the MAP_WIREFUTURE flag if it was set for this process. */ if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) { - vm_map_unlock(map); - vm_map_wire(map, grow_start, grow_start + grow_amount, + rv = vm_map_wire_locked(map, grow_start, + grow_start + grow_amount, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); - vm_map_lock_read(map); - } else - vm_map_lock_downgrade(map); + } + vm_map_lock_downgrade(map); out: #ifdef RACCT diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 8e3033b7fc40..fffda1394d71 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -422,7 +422,8 @@ int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int); int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int); int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags); -int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, +int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags); +int vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags); long vmspace_swap_count(struct vmspace *vmspace); void vm_map_entry_set_vnode_text(vm_map_entry_t entry, bool add); diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index dfd500814d25..a4d3b5307073 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -97,6 +97,8 @@ struct vmmeter __read_mostly vm_cnt = { .v_wire_count = EARLY_COUNTER, }; +u_long __exclusive_cache_line vm_user_wire_count; + static void vmcounter_startup(void) { @@ -394,6 +396,8 @@ sysctl_handle_vmstat_proc(SYSCTL_HANDLER_ARGS) #define VM_STATS_UINT(var, descr) \ SYSCTL_UINT(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr) +#define VM_STATS_ULONG(var, descr) \ + SYSCTL_ULONG(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr) VM_STATS_UINT(v_page_size, "Page size in bytes"); VM_STATS_UINT(v_page_count, "Total number of pages in system"); @@ -411,6 +415,9 @@ VM_STATS_UINT(v_pageout_free_min, "Min pages reserved for kernel"); VM_STATS_UINT(v_interrupt_free_min, "Reserved pages for interrupt code"); VM_STATS_UINT(v_free_severe, "Severe page depletion point"); +SYSCTL_ULONG(_vm_stats_vm, OID_AUTO, v_user_wire_count, CTLFLAG_RD, + &vm_user_wire_count, 0, "User-wired virtual memory"); + #ifdef COMPAT_FREEBSD11 /* * Provide compatibility sysctls for the benefit of old utilities which exit diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 3e3de54a7924..597ffe29d95d 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1003,7 +1003,7 @@ kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) if (last < addr || end < addr) return (EINVAL); npages = atop(end - start); - if (npages > vm_page_max_wired) + if (npages > vm_page_max_user_wired) return (ENOMEM); map = &proc->p_vmspace->vm_map; PROC_LOCK(proc); @@ -1013,8 +1013,6 @@ kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) return (ENOMEM); } PROC_UNLOCK(proc); - if (npages + vm_wire_count() > vm_page_max_wired) - return (EAGAIN); #ifdef RACCT if (racct_enable) { PROC_LOCK(proc); @@ -1091,7 +1089,12 @@ sys_mlockall(struct thread *td, struct mlockall_args *uap) */ error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); - error = (error == KERN_SUCCESS ? 0 : EAGAIN); + if (error == KERN_SUCCESS) + error = 0; + else if (error == KERN_RESOURCE_SHORTAGE) + error = ENOMEM; + else + error = EAGAIN; } #ifdef RACCT if (racct_enable && error != KERN_SUCCESS) { @@ -1558,10 +1561,14 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, * If the process has requested that all future mappings * be wired, then heed this. */ - if (map->flags & MAP_WIREFUTURE) { - vm_map_wire(map, *addr, *addr + size, - VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? - VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); + if ((map->flags & MAP_WIREFUTURE) != 0) { + vm_map_lock(map); + if ((map->flags & MAP_WIREFUTURE) != 0) + (void)vm_map_wire_locked(map, *addr, + *addr + size, VM_MAP_WIRE_USER | + ((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK : + VM_MAP_WIRE_NOHOLES)); + vm_map_unlock(map); } } return (vm_mmap_to_errno(rv)); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 22251c723c35..4fc0143f662d 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -699,12 +699,9 @@ static void vm_object_terminate_pages(vm_object_t object) { vm_page_t p, p_next; - struct mtx *mtx; VM_OBJECT_ASSERT_WLOCKED(object); - mtx = NULL; - /* * Free any remaining pageable pages. This also removes them from the * paging queues. However, don't free wired pages, just remove them @@ -713,20 +710,15 @@ vm_object_terminate_pages(vm_object_t object) */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { vm_page_assert_unbusied(p); - if ((object->flags & OBJ_UNMANAGED) == 0) - /* - * vm_page_free_prep() only needs the page - * lock for managed pages. - */ - vm_page_change_lock(p, &mtx); + KASSERT(p->object == object && p->ref_count > 0, + ("vm_object_terminate_pages: page %p is inconsistent", p)); + p->object = NULL; - if (p->wire_count != 0) - continue; - VM_CNT_INC(v_pfree); - vm_page_free(p); + if (vm_page_drop(p, -VPRC_OBJREF) == VPRC_OBJREF) { + VM_CNT_INC(v_pfree); + vm_page_free(p); + } } - if (mtx != NULL) - mtx_unlock(mtx); /* * If the object contained any pages, then reset it to an empty state. @@ -1212,7 +1204,7 @@ vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end, if (tm->valid != VM_PAGE_BITS_ALL) goto next_pindex; vm_page_lock(tm); - if (vm_page_held(tm)) { + if (vm_page_wired(tm)) { vm_page_unlock(tm); goto next_pindex; } @@ -1588,18 +1580,10 @@ vm_object_collapse_scan(vm_object_t object, int op) swap_pager_freespace(backing_object, p->pindex, 1); - /* - * Page is out of the parent object's range, we can - * simply destroy it. - */ - vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (p->wire_count == 0) + if (vm_page_remove(p)) vm_page_free(p); - else - vm_page_remove(p); - vm_page_unlock(p); continue; } @@ -1636,14 +1620,10 @@ vm_object_collapse_scan(vm_object_t object, int op) if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); - vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (p->wire_count == 0) + if (vm_page_remove(p)) vm_page_free(p); - else - vm_page_remove(p); - vm_page_unlock(p); continue; } @@ -1944,7 +1924,8 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, VM_OBJECT_WLOCK(object); goto again; } - if (p->wire_count != 0) { +wired: + if (vm_page_wired(p)) { if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) pmap_remove_all(p); @@ -1964,14 +1945,17 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { if ((options & OBJPR_NOTMAPPED) == 0 && - object->ref_count != 0) - pmap_remove_write(p); + object->ref_count != 0 && + !vm_page_try_remove_write(p)) + goto wired; if (p->dirty != 0) continue; } - if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) - pmap_remove_all(p); - vm_page_free(p); + if ((options & OBJPR_NOTMAPPED) == 0 && + object->ref_count != 0 && !vm_page_try_remove_all(p)) + goto wired; + if (vm_page_remove(p)) + vm_page_free(p); } if (mtx != NULL) mtx_unlock(mtx); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index a90a6f805b74..bc95878aeb80 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -62,23 +62,6 @@ * rights to redistribute these changes. */ -/* - * GENERAL RULES ON VM_PAGE MANIPULATION - * - * - A page queue lock is required when adding or removing a page from a - * page queue regardless of other locks or the busy state of a page. - * - * * In general, no thread besides the page daemon can acquire or - * hold more than one page queue lock at a time. - * - * * The page daemon can acquire and hold any pair of page queue - * locks in any order. - * - * - The object lock is required when inserting or removing - * pages from an object (vm_page_insert() or vm_page_remove()). - * - */ - /* * Resident memory management module. */ @@ -178,16 +161,17 @@ static void vm_page_dequeue_complete(vm_page_t m); static void vm_page_enqueue(vm_page_t m, uint8_t queue); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, - vm_pindex_t pindex, vm_page_t mpred); + vm_pindex_t pindex, vm_page_t mpred, const bool alloc); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); +static void vm_page_mvqueue(vm_page_t m, int queue); static int vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, vm_paddr_t high); static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req); -static int vm_page_import(void *arg, void **store, int cnt, int domain, +static int vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags); -static void vm_page_release(void *arg, void **store, int cnt); +static void vm_page_zone_release(void *arg, void **store, int cnt); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); @@ -221,7 +205,7 @@ vm_page_init_cache_zones(void *dummy __unused) continue; vmd->vmd_pgcache = uma_zcache_create("vm pgcache", sizeof(struct vm_page), NULL, NULL, NULL, NULL, - vm_page_import, vm_page_release, vmd, + vm_page_zone_import, vm_page_zone_release, vmd, UMA_ZONE_MAXBUCKET | UMA_ZONE_VM); (void )uma_zone_set_maxcache(vmd->vmd_pgcache, 0); } @@ -441,8 +425,7 @@ sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS) /* * Initialize a dummy page for use in scans of the specified paging queue. * In principle, this function only needs to set the flag PG_MARKER. - * Nonetheless, it write busies and initializes the hold count to one as - * safety precautions. + * Nonetheless, it write busies the page as a safety precaution. */ static void vm_page_init_marker(vm_page_t marker, int queue, uint8_t aflags) @@ -453,7 +436,6 @@ vm_page_init_marker(vm_page_t marker, int queue, uint8_t aflags) marker->aflags = aflags; marker->busy_lock = VPB_SINGLE_EXCLUSIVER; marker->queue = queue; - marker->hold_count = 1; } static void @@ -521,9 +503,8 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind) { m->object = NULL; - m->wire_count = 0; + m->ref_count = 0; m->busy_lock = VPB_UNBUSIED; - m->hold_count = 0; m->flags = m->aflags = 0; m->phys_addr = pa; m->queue = PQ_NONE; @@ -1105,31 +1086,6 @@ vm_page_change_lock(vm_page_t m, struct mtx **mtx) mtx_lock(mtx1); } -/* - * Keep page from being freed by the page daemon - * much of the same effect as wiring, except much lower - * overhead and should be used only for *very* temporary - * holding ("wiring"). - */ -void -vm_page_hold(vm_page_t mem) -{ - - vm_page_lock_assert(mem, MA_OWNED); - mem->hold_count++; -} - -void -vm_page_unhold(vm_page_t mem) -{ - - vm_page_lock_assert(mem, MA_OWNED); - KASSERT(mem->hold_count >= 1, ("vm_page_unhold: hold count < 0!!!")); - --mem->hold_count; - if (mem->hold_count == 0 && (mem->flags & PG_UNHOLDFREE) != 0) - vm_page_free_toq(mem); -} - /* * vm_page_unhold_pages: * @@ -1143,7 +1099,7 @@ vm_page_unhold_pages(vm_page_t *ma, int count) mtx = NULL; for (; count != 0; count--) { vm_page_change_lock(*ma, &mtx); - vm_page_unhold(*ma); + vm_page_unwire(*ma, PQ_ACTIVE); ma++; } if (mtx != NULL) @@ -1210,7 +1166,8 @@ vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) /* Fictitious pages don't use "order" or "pool". */ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_SINGLE_EXCLUSIVER; - m->wire_count = 1; + /* Fictitious pages are unevictable. */ + m->ref_count = 1; pmap_page_init(m); memattr: pmap_page_set_memattr(m, memattr); @@ -1370,7 +1327,7 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) VM_OBJECT_ASSERT_WLOCKED(object); mpred = vm_radix_lookup_le(&object->rtree, pindex); - return (vm_page_insert_after(m, object, pindex, mpred)); + return (vm_page_insert_after(m, object, pindex, mpred, false)); } /* @@ -1381,11 +1338,14 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) * The page "mpred" must immediately precede the offset "pindex" within * the specified object. * + * "alloc" should be true if the page is being allocated and false + * otherwise. + * * The object must be locked. */ static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, - vm_page_t mpred) + vm_page_t mpred, const bool alloc) { vm_page_t msucc; @@ -1405,10 +1365,14 @@ vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, ("vm_page_insert_after: msucc doesn't succeed pindex")); /* - * Record the object/offset pair in this page + * Record the object/offset pair in this page. */ m->object = object; m->pindex = pindex; + if (alloc) + m->ref_count |= VPRC_OBJREF; + else + atomic_set_int(&m->ref_count, VPRC_OBJREF); /* * Now link into the object's ordered list of backed pages. @@ -1416,6 +1380,10 @@ vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, if (vm_radix_insert(&object->rtree, m)) { m->object = NULL; m->pindex = 0; + if (alloc) + m->ref_count &= ~VPRC_OBJREF; + else + atomic_clear_int(&m->ref_count, VPRC_OBJREF); return (1); } vm_page_insert_radixdone(m, object, mpred); @@ -1440,11 +1408,13 @@ vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object != NULL && m->object == object, ("vm_page_insert_radixdone: page %p has inconsistent object", m)); + KASSERT((m->ref_count & VPRC_OBJREF) != 0, + ("vm_page_insert_radixdone: page %p is missing object ref", m)); if (mpred != NULL) { KASSERT(mpred->object == object, - ("vm_page_insert_after: object doesn't contain mpred")); + ("vm_page_insert_radixdone: object doesn't contain mpred")); KASSERT(mpred->pindex < m->pindex, - ("vm_page_insert_after: mpred doesn't precede pindex")); + ("vm_page_insert_radixdone: mpred doesn't precede pindex")); } if (mpred != NULL) @@ -1475,21 +1445,21 @@ vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) * vm_page_remove: * * Removes the specified page from its containing object, but does not - * invalidate any backing storage. + * invalidate any backing storage. Returns true if the object's reference + * was the last reference to the page, and false otherwise. * - * The object must be locked. The page must be locked if it is managed. + * The object must be locked. */ -void +bool vm_page_remove(vm_page_t m) { vm_object_t object; vm_page_t mrem; - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); - if ((object = m->object) == NULL) - return; + object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT((m->ref_count & VPRC_OBJREF) != 0, + ("page %p is missing its object ref", m)); if (vm_page_xbusied(m)) vm_page_xunbusy_maybelocked(m); mrem = vm_radix_remove(&object->rtree, m->pindex); @@ -1511,7 +1481,12 @@ vm_page_remove(vm_page_t m) if (object->resident_page_count == 0 && object->type == OBJT_VNODE) vdrop(object->handle); + /* + * Release the object reference. The caller may free the page + * after this point. + */ m->object = NULL; + return (vm_page_drop(m, -VPRC_OBJREF) == VPRC_OBJREF); } /* @@ -1592,8 +1567,6 @@ vm_page_prev(vm_page_t m) /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. - * - * The existing page must not be on a paging queue. */ vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) @@ -1603,8 +1576,6 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(mnew->object == NULL, ("vm_page_replace: page %p already in object", mnew)); - KASSERT(mnew->queue == PQ_NONE, - ("vm_page_replace: new page %p is on a paging queue", mnew)); /* * This function mostly follows vm_page_insert() and @@ -1614,6 +1585,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) mnew->object = object; mnew->pindex = pindex; + atomic_set_int(&mnew->ref_count, VPRC_OBJREF); mold = vm_radix_replace(&object->rtree, mnew); KASSERT(mold->queue == PQ_NONE, ("vm_page_replace: old page %p is on a paging queue", mold)); @@ -1623,6 +1595,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; + atomic_clear_int(&mold->ref_count, VPRC_OBJREF); vm_page_xunbusy_maybelocked(mold); /* @@ -1660,6 +1633,7 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) VM_OBJECT_ASSERT_WLOCKED(new_object); + KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m)); mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); KASSERT(mpred == NULL || mpred->pindex != new_pindex, ("vm_page_rename: pindex already renamed")); @@ -1682,11 +1656,13 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) */ m->pindex = opidx; vm_page_lock(m); - vm_page_remove(m); + (void)vm_page_remove(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; m->object = new_object; + atomic_set_int(&m->ref_count, VPRC_OBJREF); + vm_page_unlock(m); vm_page_insert_radixdone(m, new_object, mpred); vm_page_dirty(m); @@ -1905,15 +1881,15 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, * page is inserted into the object. */ vm_wire_add(1); - m->wire_count = 1; + m->ref_count = 1; } m->act_count = 0; if (object != NULL) { - if (vm_page_insert_after(m, object, pindex, mpred)) { + if (vm_page_insert_after(m, object, pindex, mpred, true)) { if (req & VM_ALLOC_WIRED) { vm_wire_sub(1); - m->wire_count = 0; + m->ref_count = 0; } KASSERT(m->object == NULL, ("page %p has object", m)); m->oflags = VPO_UNMANAGED; @@ -2107,11 +2083,12 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain, m->flags = (m->flags | PG_NODUMP) & flags; m->busy_lock = busy_lock; if ((req & VM_ALLOC_WIRED) != 0) - m->wire_count = 1; + m->ref_count = 1; m->act_count = 0; m->oflags = oflags; if (object != NULL) { - if (vm_page_insert_after(m, object, pindex, mpred)) { + if (vm_page_insert_after(m, object, pindex, mpred, + true)) { if ((req & VM_ALLOC_WIRED) != 0) vm_wire_sub(npages); KASSERT(m->object == NULL, @@ -2120,7 +2097,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain, for (m = m_ret; m < &m_ret[npages]; m++) { if (m <= mpred && (req & VM_ALLOC_WIRED) != 0) - m->wire_count = 0; + m->ref_count = 0; m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ @@ -2154,7 +2131,7 @@ vm_page_alloc_check(vm_page_t m) KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0, ("page %p has unexpected queue %d, flags %#x", m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK))); - KASSERT(!vm_page_held(m), ("page %p is held", m)); + KASSERT(m->ref_count == 0, ("page %p has references", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, @@ -2238,7 +2215,7 @@ vm_page_alloc_freelist_domain(int domain, int freelist, int req) * not belong to an object. */ vm_wire_add(1); - m->wire_count = 1; + m->ref_count = 1; } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; @@ -2246,7 +2223,7 @@ vm_page_alloc_freelist_domain(int domain, int freelist, int req) } static int -vm_page_import(void *arg, void **store, int cnt, int domain, int flags) +vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags) { struct vm_domain *vmd; int i; @@ -2267,7 +2244,7 @@ vm_page_import(void *arg, void **store, int cnt, int domain, int flags) } static void -vm_page_release(void *arg, void **store, int cnt) +vm_page_zone_release(void *arg, void **store, int cnt) { struct vm_domain *vmd; vm_page_t m; @@ -2327,8 +2304,8 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, for (m = m_start; m < m_end && run_len < npages; m += m_inc) { KASSERT((m->flags & PG_MARKER) == 0, ("page %p is PG_MARKER", m)); - KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->wire_count == 1, - ("fictitious page %p has invalid wire count", m)); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1, + ("fictitious page %p has invalid ref count", m)); /* * If the current page would be the start of a run, check its @@ -2357,7 +2334,7 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, vm_page_change_lock(m, &m_mtx); m_inc = 1; retry: - if (vm_page_held(m)) + if (vm_page_wired(m)) run_ext = 0; #if VM_NRESERVLEVEL > 0 else if ((level = vm_reserv_level(m)) >= 0 && @@ -2385,13 +2362,8 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, */ VM_OBJECT_RUNLOCK(object); goto retry; - } else if (vm_page_held(m)) { - run_ext = 0; - goto unlock; } } - KASSERT((m->flags & PG_UNHOLDFREE) == 0, - ("page %p is PG_UNHOLDFREE", m)); /* Don't care: PG_NODUMP, PG_ZERO. */ if (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && @@ -2407,7 +2379,8 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, vm_reserv_size(level)) - pa); #endif } else if (object->memattr == VM_MEMATTR_DEFAULT && - vm_page_queue(m) != PQ_NONE && !vm_page_busied(m)) { + vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) && + !vm_page_wired(m)) { /* * The page is allocated but eligible for * relocation. Extend the current run by one @@ -2423,7 +2396,6 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, run_ext = 1; } else run_ext = 0; -unlock: VM_OBJECT_RUNLOCK(object); #if VM_NRESERVLEVEL > 0 } else if (level >= 0) { @@ -2527,7 +2499,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, */ vm_page_change_lock(m, &m_mtx); retry: - if (vm_page_held(m)) + if (vm_page_wired(m)) error = EBUSY; else if ((object = m->object) != NULL) { /* @@ -2544,13 +2516,8 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, */ VM_OBJECT_WUNLOCK(object); goto retry; - } else if (vm_page_held(m)) { - error = EBUSY; - goto unlock; } } - KASSERT((m->flags & PG_UNHOLDFREE) == 0, - ("page %p is PG_UNHOLDFREE", m)); /* Don't care: PG_NODUMP, PG_ZERO. */ if (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && @@ -2559,7 +2526,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; else if (vm_page_queue(m) != PQ_NONE && - !vm_page_busied(m)) { + !vm_page_busied(m) && !vm_page_wired(m)) { KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); @@ -2608,8 +2575,6 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, error = ENOMEM; goto unlock; } - KASSERT(m_new->wire_count == 0, - ("page %p is wired", m_new)); /* * Replace "m" with the new page. For @@ -2617,8 +2582,11 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, * and dequeued. Finally, change "m" * as if vm_page_free() was called. */ - if (object->ref_count != 0) - pmap_remove_all(m); + if (object->ref_count != 0 && + !vm_page_try_remove_all(m)) { + error = EBUSY; + goto unlock; + } m_new->aflags = m->aflags & ~PGA_QUEUE_STATE_MASK; KASSERT(m_new->oflags == VPO_UNMANAGED, @@ -2645,7 +2613,6 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, } else { m->flags &= ~PG_ZERO; vm_page_dequeue(m); - vm_page_remove(m); if (vm_page_free_prep(m)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); @@ -3179,8 +3146,7 @@ vm_pqbatch_submit_page(vm_page_t m, uint8_t queue) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); - KASSERT(mtx_owned(vm_page_lockptr(m)) || - (m->object == NULL && (m->aflags & PGA_DEQUEUE) != 0), + KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL, ("missing synchronization for page %p", m)); KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue)); @@ -3309,7 +3275,7 @@ vm_page_dequeue_deferred_free(vm_page_t m) { uint8_t queue; - KASSERT(m->object == NULL, ("page %p has an object reference", m)); + KASSERT(m->ref_count == 0, ("page %p has references", m)); if ((m->aflags & PGA_DEQUEUE) != 0) return; @@ -3420,35 +3386,6 @@ vm_page_requeue(vm_page_t m) vm_pqbatch_submit_page(m, atomic_load_8(&m->queue)); } -/* - * vm_page_activate: - * - * Put the specified page on the active list (if appropriate). - * Ensure that act_count is at least ACT_INIT but do not otherwise - * mess with it. - * - * The page must be locked. - */ -void -vm_page_activate(vm_page_t m) -{ - - vm_page_assert_locked(m); - - if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0) - return; - if (vm_page_queue(m) == PQ_ACTIVE) { - if (m->act_count < ACT_INIT) - m->act_count = ACT_INIT; - return; - } - - vm_page_dequeue(m); - if (m->act_count < ACT_INIT) - m->act_count = ACT_INIT; - vm_page_enqueue(m, PQ_ACTIVE); -} - /* * vm_page_free_prep: * @@ -3463,6 +3400,12 @@ bool vm_page_free_prep(vm_page_t m) { + /* + * Synchronize with vm_page_drop(): ensure that all page modifications + * are visible before proceeding. + */ + atomic_thread_fence_acq(); + #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) { uint64_t *p; @@ -3473,11 +3416,10 @@ vm_page_free_prep(vm_page_t m) m, i, (uintmax_t)*p)); } #endif - if ((m->oflags & VPO_UNMANAGED) == 0) { - vm_page_lock_assert(m, MA_OWNED); + if ((m->oflags & VPO_UNMANAGED) == 0) KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_prep: freeing mapped page %p", m)); - } else + else KASSERT(m->queue == PQ_NONE, ("vm_page_free_prep: unmanaged page %p is queued", m)); VM_CNT_INC(v_tfree); @@ -3485,15 +3427,16 @@ vm_page_free_prep(vm_page_t m) if (vm_page_sbusied(m)) panic("vm_page_free_prep: freeing busy page %p", m); - vm_page_remove(m); + if (m->object != NULL) + (void)vm_page_remove(m); /* * If fictitious remove object association and * return. */ if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("fictitious page %p is not wired", m)); + KASSERT(m->ref_count == 1, + ("fictitious page %p is referenced", m)); KASSERT(m->queue == PQ_NONE, ("fictitious page %p is queued", m)); return (false); @@ -3510,15 +3453,8 @@ vm_page_free_prep(vm_page_t m) m->valid = 0; vm_page_undirty(m); - if (m->wire_count != 0) - panic("vm_page_free_prep: freeing wired page %p", m); - if (m->hold_count != 0) { - m->flags &= ~PG_ZERO; - KASSERT((m->flags & PG_UNHOLDFREE) == 0, - ("vm_page_free_prep: freeing PG_UNHOLDFREE page %p", m)); - m->flags |= PG_UNHOLDFREE; - return (false); - } + if (m->ref_count != 0) + panic("vm_page_free_prep: page %p has references", m); /* * Restore the default memory attribute to the page. @@ -3593,132 +3529,187 @@ vm_page_free_pages_toq(struct spglist *free, bool update_wire_count) } /* - * vm_page_wire: - * - * Mark this page as wired down. If the page is fictitious, then - * its wire count must remain one. - * - * The page must be locked. + * Mark this page as wired down, preventing reclamation by the page daemon + * or when the containing object is destroyed. */ void vm_page_wire(vm_page_t m) { + u_int old; - vm_page_assert_locked(m); - if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("vm_page_wire: fictitious page %p's wire count isn't one", - m)); - return; - } - if (m->wire_count == 0) { - KASSERT((m->oflags & VPO_UNMANAGED) == 0 || - m->queue == PQ_NONE, - ("vm_page_wire: unmanaged page %p is queued", m)); + KASSERT(m->object != NULL, + ("vm_page_wire: page %p does not belong to an object", m)); + if (!vm_page_busied(m)) + VM_OBJECT_ASSERT_LOCKED(m->object); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1, + ("vm_page_wire: fictitious page %p has zero refs", m)); + + old = atomic_fetchadd_int(&m->ref_count, 1); + KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX, + ("vm_page_wire: counter overflow for page %p", m)); + if (VPRC_WIRE_COUNT(old) == 0) vm_wire_add(1); - } - m->wire_count++; - KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); } /* - * vm_page_unwire: - * + * Attempt to wire a mapped page following a pmap lookup of that page. + * This may fail if a thread is concurrently tearing down mappings of the page. + */ +bool +vm_page_wire_mapped(vm_page_t m) +{ + u_int old; + + KASSERT(m->object != NULL, + ("vm_page_try_wire: page %p does not belong to an object", m)); + + old = m->ref_count; + do { + KASSERT(old > 0, + ("vm_page_try_wire: wiring unreferenced page %p", m)); + if ((old & VPRC_BLOCKED) != 0) + return (false); + } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1)); + + if (VPRC_WIRE_COUNT(old) == 0) + vm_wire_add(1); + return (true); +} + +/* * Release one wiring of the specified page, potentially allowing it to be - * paged out. Returns TRUE if the number of wirings transitions to zero and - * FALSE otherwise. + * paged out. * * Only managed pages belonging to an object can be paged out. If the number * of wirings transitions to zero and the page is eligible for page out, then - * the page is added to the specified paging queue (unless PQ_NONE is - * specified, in which case the page is dequeued if it belongs to a paging - * queue). - * - * If a page is fictitious, then its wire count must always be one. + * the page is added to the specified paging queue. If the released wiring + * represented the last reference to the page, the page is freed. * * A managed page must be locked. */ -bool +void vm_page_unwire(vm_page_t m, uint8_t queue) { - bool unwired; + u_int old; + bool queued; - KASSERT(queue < PQ_COUNT || queue == PQ_NONE, - ("vm_page_unwire: invalid queue %u request for page %p", - queue, m)); - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); + KASSERT(queue < PQ_COUNT, + ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); - unwired = vm_page_unwire_noq(m); - if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL) - return (unwired); + if ((m->oflags & VPO_UNMANAGED) != 0) { + if (vm_page_unwire_noq(m) && m->ref_count == 0) + vm_page_free(m); + return; + } - if (vm_page_queue(m) == queue) { - if (queue == PQ_ACTIVE) - vm_page_reference(m); - else if (queue != PQ_NONE) - vm_page_requeue(m); - } else { - vm_page_dequeue(m); - if (queue != PQ_NONE) { - vm_page_enqueue(m, queue); - if (queue == PQ_ACTIVE) - /* Initialize act_count. */ - vm_page_activate(m); + vm_page_assert_locked(m); + + /* + * Update LRU state before releasing the wiring reference. + * We only need to do this once since we hold the page lock. + * Use a release store when updating the reference count to + * synchronize with vm_page_free_prep(). + */ + old = m->ref_count; + queued = false; + do { + KASSERT(VPRC_WIRE_COUNT(old) > 0, + ("vm_page_unwire: wire count underflow for page %p", m)); + if (!queued && VPRC_WIRE_COUNT(old) == 1) { + if (queue == PQ_ACTIVE && vm_page_queue(m) == PQ_ACTIVE) + vm_page_reference(m); + else + vm_page_mvqueue(m, queue); + queued = true; } + } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); + + if (VPRC_WIRE_COUNT(old) == 1) { + vm_wire_sub(1); + if (old == 1) + vm_page_free(m); } - return (unwired); } /* - * - * vm_page_unwire_noq: - * * Unwire a page without (re-)inserting it into a page queue. It is up * to the caller to enqueue, requeue, or free the page as appropriate. - * In most cases, vm_page_unwire() should be used instead. + * In most cases involving managed pages, vm_page_unwire() should be used + * instead. */ bool vm_page_unwire_noq(vm_page_t m) { + u_int old; - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); - if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); + old = vm_page_drop(m, -1); + KASSERT(VPRC_WIRE_COUNT(old) != 0, + ("vm_page_unref: counter underflow for page %p", m)); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(old) > 1, + ("vm_page_unref: missing ref on fictitious page %p", m)); + + if (VPRC_WIRE_COUNT(old) > 1) return (false); + vm_wire_sub(1); + return (true); +} + +/* + * Ensure that the page is in the specified page queue. If the page is + * active or being moved to the active queue, ensure that its act_count is + * at least ACT_INIT but do not otherwise mess with it. Otherwise, ensure that + * the page is at the tail of its page queue. + * + * The page may be wired. The caller should release any wiring references + * before releasing the page lock, otherwise the page daemon may immediately + * dequeue the page. + */ +static __always_inline void +vm_page_mvqueue(vm_page_t m, const int nqueue) +{ + + vm_page_assert_locked(m); + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_mvqueue: page %p is unmanaged", m)); + + if (vm_page_queue(m) != nqueue) { + vm_page_dequeue(m); + vm_page_enqueue(m, nqueue); + } else if (nqueue != PQ_ACTIVE) { + vm_page_requeue(m); } - if (m->wire_count == 0) - panic("vm_page_unwire: page %p's wire count is zero", m); - m->wire_count--; - if (m->wire_count == 0) { - vm_wire_sub(1); - return (true); - } else - return (false); + + if (nqueue == PQ_ACTIVE && m->act_count < ACT_INIT) + m->act_count = ACT_INIT; +} + +/* + * Put the specified page on the active list (if appropriate). + * + * A managed page must be locked. + */ +void +vm_page_activate(vm_page_t m) +{ + + if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) + return; + vm_page_mvqueue(m, PQ_ACTIVE); } /* * Move the specified page to the tail of the inactive queue, or requeue * the page if it is already in the inactive queue. * - * The page must be locked. + * A managed page must be locked. */ void vm_page_deactivate(vm_page_t m) { - vm_page_assert_locked(m); - - if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) return; - - if (!vm_page_inactive(m)) { - vm_page_dequeue(m); - vm_page_enqueue(m, PQ_INACTIVE); - } else - vm_page_requeue(m); + vm_page_mvqueue(m, PQ_INACTIVE); } /* @@ -3726,18 +3717,13 @@ vm_page_deactivate(vm_page_t m) * bypassing LRU. A marker page is used to maintain FIFO ordering. * As with regular enqueues, we use a per-CPU batch queue to reduce * contention on the page queue lock. - * - * The page must be locked. */ -void -vm_page_deactivate_noreuse(vm_page_t m) +static void +_vm_page_deactivate_noreuse(vm_page_t m) { vm_page_assert_locked(m); - if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0) - return; - if (!vm_page_inactive(m)) { vm_page_dequeue(m); m->queue = PQ_INACTIVE; @@ -3747,38 +3733,40 @@ vm_page_deactivate_noreuse(vm_page_t m) vm_pqbatch_submit_page(m, PQ_INACTIVE); } +void +vm_page_deactivate_noreuse(vm_page_t m) +{ + + KASSERT(m->object != NULL, + ("vm_page_deactivate_noreuse: page %p has no object", m)); + + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_wired(m)) + _vm_page_deactivate_noreuse(m); +} + /* - * vm_page_launder + * Put a page in the laundry, or requeue it if it is already there. * - * Put a page in the laundry, or requeue it if it is already there. + * The page must be locked. */ void vm_page_launder(vm_page_t m) { - vm_page_assert_locked(m); - if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) return; - - if (vm_page_in_laundry(m)) - vm_page_requeue(m); - else { - vm_page_dequeue(m); - vm_page_enqueue(m, PQ_LAUNDRY); - } + vm_page_mvqueue(m, PQ_LAUNDRY); } /* - * vm_page_unswappable - * - * Put a page in the PQ_UNSWAPPABLE holding queue. + * Put a page in the PQ_UNSWAPPABLE holding queue. */ void vm_page_unswappable(vm_page_t m) { vm_page_assert_locked(m); - KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0, + KASSERT(!vm_page_wired(m) && (m->oflags & VPO_UNMANAGED) == 0, ("page %p already unswappable", m)); vm_page_dequeue(m); @@ -3786,30 +3774,158 @@ vm_page_unswappable(vm_page_t m) } /* - * Attempt to free the page. If it cannot be freed, do nothing. Returns true - * if the page is freed and false otherwise. - * - * The page must be managed. The page and its containing object must be - * locked. + * Release a wired page to the page cache, and optionally attempt to free it. + * The page's object must be locked. See the comment above vm_page_release(). */ -bool -vm_page_try_to_free(vm_page_t m) +void +vm_page_release_locked(vm_page_t m, bool nocache) { + vm_object_t object; + + object = m->object; + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_release_locked: page %p is unmanaged", m)); + + if (!vm_page_unwire_noq(m)) + return; + if (m->valid == 0 || nocache) { + if ((object->ref_count == 0 || !pmap_page_is_mapped(m)) && + m->dirty == 0 && !vm_page_busied(m) && !vm_page_wired(m)) { + vm_page_free(m); + } else { + vm_page_lock(m); + vm_page_deactivate_noreuse(m); + vm_page_unlock(m); + } + } else { + vm_page_lock(m); + if (vm_page_active(m)) + vm_page_reference(m); + else + vm_page_deactivate(m); + vm_page_unlock(m); + } +} + +/* + * Release a wired page to the page cache, and optionally attempt to free it. + * If the caller wishes to attempt to free the page, and the page is mapped, + * dirty, busy or wired, we do not free it but instead place it near the head of + * the inactive queue to accelerate reclamation. + */ +void +vm_page_release(vm_page_t m, bool nocache) +{ + vm_object_t object; + u_int old; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_release: page %p is unmanaged", m)); + + if (nocache) { + /* + * Attempt to free the page. The page may be renamed between + * objects so we must verify the page's object pointer after + * acquiring the lock and retry if they do not match. + */ + while ((object = m->object) != NULL) { + if (!VM_OBJECT_TRYWLOCK(object)) { + object = NULL; + break; + } + if (m->object == object) + break; + VM_OBJECT_WUNLOCK(object); + } + if (object != NULL) { + vm_page_release_locked(m, nocache); + VM_OBJECT_WUNLOCK(object); + return; + } + } + + /* + * Update LRU state before releasing the wiring reference. + * Use a release store when updating the reference count to + * synchronize with vm_page_free_prep(). + */ + old = m->ref_count; + do { + if (VPRC_WIRE_COUNT(old) == 1) { + vm_page_lock(m); + + /* + * Use a racy check of the valid bits to determine + * whether we can accelerate reclamation of the page. + * The valid bits will be stable unless the page is + * being mapped or is referenced by multiple buffers, + * and in those cases we expect races to be rare. At + * worst we will either accelerate reclamation of a + * valid page and violate LRU, or unnecessarily defer + * reclamation of an invalid page. + */ + if (m->valid == 0 || nocache) + _vm_page_deactivate_noreuse(m); + else if (vm_page_active(m)) + vm_page_reference(m); + else + vm_page_mvqueue(m, PQ_INACTIVE); + vm_page_unlock(m); + } + } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); + + if (VPRC_WIRE_COUNT(old) == 1) { + vm_wire_sub(1); + if (old == 1) + vm_page_free(m); + } +} + +/* + * Attempt to invoke the requested operation while blocking new wirings of the + * page. + */ +static bool +vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t)) +{ + u_int old; vm_page_assert_locked(m); - VM_OBJECT_ASSERT_WLOCKED(m->object); - KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); - if (m->dirty != 0 || vm_page_held(m) || vm_page_busied(m)) - return (false); - if (m->object->ref_count != 0) { - pmap_remove_all(m); - if (m->dirty != 0) + KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_try_blocked_op: page %p has no object", m)); + KASSERT(!vm_page_busied(m), + ("vm_page_try_blocked_op: page %p is busy", m)); + VM_OBJECT_ASSERT_LOCKED(m->object); + + old = m->ref_count; + do { + KASSERT(old != 0, + ("vm_page_try_blocked_op: page %p has no references", m)); + if (VPRC_WIRE_COUNT(old) != 0) return (false); - } - vm_page_free(m); + } while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED)); + + (op)(m); + + atomic_clear_int(&m->ref_count, VPRC_BLOCKED); return (true); } +bool +vm_page_try_remove_all(vm_page_t m) +{ + + return (vm_page_try_blocked_op(m, pmap_remove_all)); +} + +bool +vm_page_try_remove_write(vm_page_t m) +{ + + return (vm_page_try_blocked_op(m, pmap_remove_write)); +} + /* * vm_page_advise * @@ -3904,11 +4020,8 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) VM_OBJECT_WLOCK(object); goto retrylookup; } else { - if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock(m); + if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); - vm_page_unlock(m); - } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); @@ -4006,11 +4119,8 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, VM_OBJECT_WLOCK(object); goto retrylookup; } - if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock(m); + if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); - vm_page_unlock(m); - } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); @@ -4539,10 +4649,10 @@ DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) else m = (vm_page_t)addr; db_printf( - "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n" + "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n" " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, - m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags, + m->queue, m->ref_count, m->aflags, m->oflags, m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 401d3498f786..2a6fa6ed5375 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -115,24 +115,19 @@ * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). * - * The page structure contains two counters which prevent page reuse. - * Both counters are protected by the page lock (P). The hold - * counter counts transient references obtained via a pmap lookup, and - * is also used to prevent page reclamation in situations where it is - * undesirable to block other accesses to the page. The wire counter - * is used to implement mlock(2) and is non-zero for pages containing - * kernel memory. Pages that are wired or held will not be reclaimed - * or laundered by the page daemon, but are treated differently during - * a page queue scan: held pages remain at their position in the queue, - * while wired pages are removed from the queue and must later be - * re-enqueued appropriately by the unwiring thread. It is legal to - * call vm_page_free() on a held page; doing so causes it to be removed - * from its object and page queue, and the page is released to the - * allocator once the last hold reference is dropped. In contrast, - * wired pages may not be freed. - * - * In some pmap implementations, the wire count of a page table page is - * used to track the number of populated entries. + * The ref_count field tracks references to the page. References that + * prevent the page from being reclaimable are called wirings and are + * counted in the low bits of ref_count. Upper bits are reserved for + * special references that do not prevent reclamation of the page. + * Specifically, the containing object, if any, holds such a reference, + * and the page daemon takes a transient reference when it is scanning + * a page. Updates to ref_count are atomic unless the page is + * unallocated. To wire a page after it has been allocated, the object + * lock must be held, or the page must be busy, or the wiring thread + * must atomically take a reference and verify that the VPRC_BLOCKED + * bit is not set. No locks are required to unwire a page, but care + * must be taken to free the page if that wiring represented the last + * reference to the page. * * The busy lock is an embedded reader-writer lock which protects the * page's contents and identity (i.e., its tuple) and @@ -155,7 +150,11 @@ * be held. It is invalid for a page's queue field to transition * between two distinct page queue indices. That is, when updating * the queue field, either the new value or the old value must be - * PQ_NONE. + * PQ_NONE. There is one exception to this rule: the page daemon may + * transition the queue field from PQ_INACTIVE to PQ_NONE immediately + * prior to freeing a page during an inactive queue scan. At that + * point the page will have already been physically dequeued, and it + * is known that no other references to that vm_page structure exist. * * To avoid contention on page queue locks, page queue operations * (enqueue, dequeue, requeue) are batched using per-CPU queues. @@ -168,7 +167,9 @@ * may be freed before its pending batch queue entries have been * processed. The page lock (P) must be held to schedule a batched * queue operation, and the page queue lock must be held in order to - * process batch queue entries for the page queue. + * process batch queue entries for the page queue. When the page is + * being freed, the thread freeing the page is permitted to schedule + * a dequeue of the page without the page lock held. */ #if PAGE_SIZE == 4096 @@ -198,21 +199,23 @@ struct vm_page { } memguard; } plinks; TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - vm_object_t object; /* which object am I in (O,P) */ + vm_object_t object; /* which object am I in (O) */ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page (C) */ struct md_page md; /* machine dependent stuff */ - u_int wire_count; /* wired down maps refs (P) */ + union { + u_int wire_count; + u_int ref_count; /* page references */ + }; volatile u_int busy_lock; /* busy owners lock */ - uint16_t hold_count; /* page hold count (P) */ uint16_t flags; /* page PG_* flags (P) */ + uint8_t order; /* index of the buddy queue (F) */ + uint8_t pool; /* vm_phys freepool index (F) */ uint8_t aflags; /* access is atomic */ uint8_t oflags; /* page VPO_* flags (O) */ uint8_t queue; /* page queue index (Q) */ int8_t psind; /* pagesizes[] index (O) */ int8_t segind; /* vm_phys segment index (C) */ - uint8_t order; /* index of the buddy queue (F) */ - uint8_t pool; /* vm_phys freepool index (F) */ u_char act_count; /* page usage count (P) */ /* NOTE that these must support one bit per DEV_BSIZE in a page */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ @@ -220,6 +223,34 @@ struct vm_page { vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ }; +/* + * Special bits used in the ref_count field. + * + * ref_count is normally used to count wirings that prevent the page from being + * reclaimed, but also supports several special types of references that do not + * prevent reclamation. Accesses to the ref_count field must be atomic unless + * the page is unallocated. + * + * VPRC_PDREF is a transient reference acquired by the page daemon when + * scanning. Pages may be dequeued without the page lock held when they are + * being freed, and this reference ensures that the page daemon is not + * simultaneously manipulating the queue state of the page. The page lock must + * be held to set or clear this bit. + * + * VPRC_OBJREF is the reference held by the containing object. It can set or + * cleared only when the corresponding object's write lock is held. + * + * VPRC_BLOCKED is used to atomically block wirings via pmap lookups while + * attempting to tear down all mappings of a given page. The page lock and + * object write lock must both be held in order to set or clear this bit. + */ +#define VPRC_BLOCKED 0x20000000u /* mappings are being removed */ +#define VPRC_OBJREF 0x40000000u /* object reference, cleared with (O) */ +#define VPRC_PDREF 0x80000000u /* page daemon reference for scanning */ +#define _VPRC_REFMASK (VPRC_BLOCKED | VPRC_OBJREF | VPRC_PDREF) +#define VPRC_WIRE_COUNT(c) ((c) & ~_VPRC_REFMASK) +#define VPRC_WIRE_COUNT_MAX (~_VPRC_REFMASK) + /* * Page flags stored in oflags: * @@ -383,7 +414,6 @@ extern struct mtx_padalign pa_lock[]; #define PG_ZERO 0x0008 /* page is zeroed */ #define PG_MARKER 0x0010 /* special queue marker page */ #define PG_NODUMP 0x0080 /* don't include this page in a dump */ -#define PG_UNHOLDFREE 0x0100 /* delayed free of a held page */ /* * Misc constants. @@ -511,8 +541,6 @@ malloc2vm_flags(int malloc_flags) void vm_page_busy_downgrade(vm_page_t m); void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared); void vm_page_flash(vm_page_t m); -void vm_page_hold(vm_page_t mem); -void vm_page_unhold(vm_page_t mem); void vm_page_free(vm_page_t m); void vm_page_free_zero(vm_page_t m); @@ -561,8 +589,10 @@ bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); void vm_page_reference(vm_page_t m); -void vm_page_remove (vm_page_t); -int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); +void vm_page_release(vm_page_t m, bool nocache); +void vm_page_release_locked(vm_page_t m, bool nocache); +bool vm_page_remove(vm_page_t); +int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex); void vm_page_requeue(vm_page_t m); @@ -573,14 +603,16 @@ void vm_page_set_valid_range(vm_page_t m, int base, int size); int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); -bool vm_page_try_to_free(vm_page_t m); +bool vm_page_try_remove_all(vm_page_t m); +bool vm_page_try_remove_write(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unswappable(vm_page_t m); -bool vm_page_unwire(vm_page_t m, uint8_t queue); +void vm_page_unwire(vm_page_t m, uint8_t queue); bool vm_page_unwire_noq(vm_page_t m); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); -void vm_page_wire (vm_page_t); +void vm_page_wire(vm_page_t); +bool vm_page_wire_mapped(vm_page_t m); void vm_page_xunbusy_hard(vm_page_t m); void vm_page_xunbusy_maybelocked(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); @@ -811,15 +843,32 @@ vm_page_in_laundry(vm_page_t m) } /* - * vm_page_held: + * vm_page_drop: + * + * Release a reference to a page and return the old reference count. + */ +static inline u_int +vm_page_drop(vm_page_t m, u_int val) +{ + + /* + * Synchronize with vm_page_free_prep(): ensure that all updates to the + * page structure are visible before it is freed. + */ + atomic_thread_fence_rel(); + return (atomic_fetchadd_int(&m->ref_count, val)); +} + +/* + * vm_page_wired: * * Return true if a reference prevents the page from being reclaimable. */ static inline bool -vm_page_held(vm_page_t m) +vm_page_wired(vm_page_t m) { - return (m->hold_count > 0 || m->wire_count > 0); + return (VPRC_WIRE_COUNT(m->ref_count) > 0); } #endif /* _KERNEL */ diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 923ba703b9ab..d519bf64f563 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -194,9 +194,10 @@ SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RWTUN, int vm_pageout_page_count = 32; -int vm_page_max_wired; /* XXX max # of wired pages system-wide */ -SYSCTL_INT(_vm, OID_AUTO, max_wired, - CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); +u_long vm_page_max_user_wired; +SYSCTL_ULONG(_vm, OID_AUTO, max_user_wired, CTLFLAG_RW, + &vm_page_max_user_wired, 0, + "system-wide limit to user-wired page count"); static u_int isqrt(u_int num); static int vm_pageout_launder(struct vm_domain *vmd, int launder, @@ -313,6 +314,54 @@ vm_pageout_next(struct scan_state *ss, const bool dequeue) return (vm_batchqueue_pop(&ss->bq)); } +/* + * Lock a page and set a reference bit to ensure that it does not get freed out + * from under us. + */ +static bool +vm_pageout_lock_and_hold_page(vm_page_t m, struct mtx **mtx) +{ + u_int ref_count; + + vm_page_change_lock(m, mtx); + + ref_count = m->ref_count; + do { + if (ref_count == 0) + return (false); + } while (!atomic_fcmpset_int(&m->ref_count, &ref_count, ref_count | + VPRC_PDREF)); + return (true); +} + +/* + * Drop the page daemon's transient page reference and determine whether we need + * to free the page. + */ +static bool +vm_pageout_drop_page(vm_page_t m) +{ + + KASSERT((m->ref_count & VPRC_PDREF) != 0, + ("vm_pageout_drop_page: page %p missing pagedaemon ref", m)); + return (vm_page_drop(m, -VPRC_PDREF) == VPRC_PDREF); +} + +/* + * Drop the page daemon's transient reference once we know that the page's + * identity is stable. + */ +static void +vm_pageout_drop_page_quick(vm_page_t m) +{ + + VM_OBJECT_ASSERT_LOCKED(m->object); + KASSERT((m->ref_count & (VPRC_OBJREF | VPRC_PDREF)) == + (VPRC_OBJREF | VPRC_PDREF), + ("vm_pageout_drop_page_quick: page %p missing refs", m)); + atomic_clear_int(&m->ref_count, VPRC_PDREF); +} + /* * Scan for pages at adjacent offsets within the given page's object that are * eligible for laundering, form a cluster of these pages and the given page, @@ -326,16 +375,11 @@ vm_pageout_cluster(vm_page_t m) vm_pindex_t pindex; int ib, is, page_base, pageout_count; - vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); pindex = m->pindex; vm_page_assert_unbusied(m); - KASSERT(!vm_page_held(m), ("page %p is held", m)); - - pmap_remove_write(m); - vm_page_unlock(m); mc[vm_pageout_page_count] = pb = ps = m; pageout_count = 1; @@ -361,7 +405,8 @@ vm_pageout_cluster(vm_page_t m) ib = 0; break; } - if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) { + if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p) || + vm_page_wired(p)) { ib = 0; break; } @@ -371,12 +416,11 @@ vm_pageout_cluster(vm_page_t m) break; } vm_page_lock(p); - if (vm_page_held(p) || !vm_page_in_laundry(p)) { + if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { vm_page_unlock(p); ib = 0; break; } - pmap_remove_write(p); vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; @@ -391,17 +435,17 @@ vm_pageout_cluster(vm_page_t m) } while (pageout_count < vm_pageout_page_count && pindex + is < object->size) { - if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) + if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p) || + vm_page_wired(p)) break; vm_page_test_dirty(p); if (p->dirty == 0) break; vm_page_lock(p); - if (vm_page_held(p) || !vm_page_in_laundry(p)) { + if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { vm_page_unlock(p); break; } - pmap_remove_write(p); vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; @@ -649,13 +693,20 @@ vm_pageout_clean(vm_page_t m, int *numpagedout) * The page may have been busied or referenced while the object * and page locks were released. */ - if (vm_page_busied(m) || vm_page_held(m)) { + if (vm_page_busied(m) || vm_page_wired(m)) { vm_page_unlock(m); error = EBUSY; goto unlock_all; } } + if (!vm_page_try_remove_write(m)) { + vm_page_unlock(m); + error = EBUSY; + goto unlock_all; + } + vm_page_unlock(m); + /* * If a page is dirty, then it is either being washed * (but not yet cleaned) or it is still in the @@ -725,7 +776,8 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) if (__predict_false((m->flags & PG_MARKER) != 0)) continue; - vm_page_change_lock(m, &mtx); + if (!vm_pageout_lock_and_hold_page(m, &mtx)) + continue; recheck: /* @@ -733,7 +785,7 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) * while locks were dropped. */ if (vm_page_queue(m) != queue) - continue; + goto drop; /* * A requeue was requested, so this page gets a second @@ -741,21 +793,19 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) */ if ((m->aflags & PGA_REQUEUE) != 0) { vm_page_requeue(m); - continue; + goto drop; } /* - * Held pages are essentially stuck in the queue. - * * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during - * future scans. + * future scans. This check is racy and must be reverified once + * we hold the object lock and have verified that the page + * is not busy. */ - if (m->hold_count != 0) - continue; - if (m->wire_count != 0) { + if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); - continue; + goto drop; } if (object != m->object) { @@ -770,10 +820,33 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) goto recheck; } } + if (__predict_false(object == NULL)) + /* + * The page has been removed from its object. + * Drop our reference and move on. + */ + goto drop; + + /* + * We can drop our transient reference now that we hold + * the object lock. + */ + vm_pageout_drop_page_quick(m); if (vm_page_busied(m)) continue; + /* + * Re-check for wirings now that we hold the object lock. If + * the page is mapped, it may still be wired by pmap lookups. + * The call to vm_page_try_remove_all() below atomically checks + * for such wirings and removes mappings. + */ + if (__predict_false(vm_page_wired(m))) { + vm_page_dequeue_deferred(m); + continue; + } + /* * Invalid pages can be easily freed. They cannot be * mapped; vm_page_free() asserts this. @@ -841,8 +914,10 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0) - pmap_remove_all(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) { + vm_page_dequeue_deferred(m); + continue; + } } /* @@ -892,6 +967,11 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) mtx = NULL; object = NULL; } + + continue; +drop: + if (vm_pageout_drop_page(m)) + goto free_page; } if (mtx != NULL) { mtx_unlock(mtx); @@ -1134,6 +1214,7 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) { struct scan_state ss; struct mtx *mtx; + vm_object_t object; vm_page_t m, marker; struct vm_pagequeue *pq; long min_scan; @@ -1190,23 +1271,31 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) if (__predict_false((m->flags & PG_MARKER) != 0)) continue; - vm_page_change_lock(m, &mtx); + if (!vm_pageout_lock_and_hold_page(m, &mtx)) + continue; /* * The page may have been disassociated from the queue * while locks were dropped. */ if (vm_page_queue(m) != PQ_ACTIVE) - continue; + goto drop; /* * Wired pages are dequeued lazily. */ - if (m->wire_count != 0) { + if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); - continue; + goto drop; } + if (__predict_false((object = m->object) == NULL)) + /* + * The page has been removed from its object. + * Drop our reference and move on. + */ + goto drop; + /* * Check to see "how much" the page has been used. * @@ -1226,7 +1315,7 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) * This race delays the detection of a new reference. At * worst, we will deactivate and reactivate the page. */ - if (m->object->ref_count != 0) + if (object->ref_count != 0) act_delta = pmap_ts_referenced(m); else act_delta = 0; @@ -1281,6 +1370,9 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) } } } +drop: + if (vm_pageout_drop_page(m)) + vm_page_free(m); } if (mtx != NULL) { mtx_unlock(mtx); @@ -1395,7 +1487,8 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, KASSERT((m->flags & PG_MARKER) == 0, ("marker page %p was dequeued", m)); - vm_page_change_lock(m, &mtx); + if (!vm_pageout_lock_and_hold_page(m, &mtx)) + continue; recheck: /* @@ -1404,7 +1497,7 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, */ if (vm_page_queue(m) != PQ_INACTIVE) { addl_page_shortage++; - continue; + goto drop; } /* @@ -1413,32 +1506,28 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, * chance. */ if ((m->aflags & (PGA_ENQUEUED | PGA_REQUEUE | - PGA_REQUEUE_HEAD)) != 0) - goto reinsert; + PGA_REQUEUE_HEAD)) != 0) { + vm_pageout_reinsert_inactive(&ss, &rq, m); + goto drop; + } /* - * Held pages are essentially stuck in the queue. So, - * they ought to be discounted from the inactive count. - * See the description of addl_page_shortage above. - * * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during - * future scans. + * future scans. This check is racy and must be reverified once + * we hold the object lock and have verified that the page + * is not busy. */ - if (m->hold_count != 0) { - addl_page_shortage++; - goto reinsert; - } - if (m->wire_count != 0) { + if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); - continue; + goto drop; } if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); object = m->object; - if (!VM_OBJECT_TRYWLOCK(object)) { + if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); @@ -1446,6 +1535,18 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, goto recheck; } } + if (__predict_false(object == NULL)) + /* + * The page has been removed from its object. + * Drop our reference and move on. + */ + goto drop; + + /* + * We can drop our transient reference now that we hold + * the object lock. + */ + vm_pageout_drop_page_quick(m); if (vm_page_busied(m)) { /* @@ -1457,7 +1558,19 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, * inactive count. */ addl_page_shortage++; - goto reinsert; + vm_pageout_reinsert_inactive(&ss, &rq, m); + continue; + } + + /* + * Re-check for wirings now that we hold the object lock. If + * the page is mapped, it may still be wired by pmap lookups. + * The call to vm_page_try_remove_all() below atomically checks + * for such wirings and removes mappings. + */ + if (__predict_false(vm_page_wired(m))) { + vm_page_dequeue_deferred(m); + continue; } /* @@ -1503,7 +1616,8 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, continue; } else if ((object->flags & OBJ_DEAD) == 0) { vm_page_aflag_set(m, PGA_REQUEUE); - goto reinsert; + vm_pageout_reinsert_inactive(&ss, &rq, m); + continue; } } @@ -1516,8 +1630,10 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0) - pmap_remove_all(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) { + vm_page_dequeue_deferred(m); + continue; + } } /* @@ -1543,8 +1659,13 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, } else if ((object->flags & OBJ_DEAD) == 0) vm_page_launder(m); continue; -reinsert: - vm_pageout_reinsert_inactive(&ss, &rq, m); + +drop: + /* + * Drop our transient reference. + */ + if (vm_pageout_drop_page(m)) + goto free_page; } if (mtx != NULL) mtx_unlock(mtx); @@ -2041,8 +2162,8 @@ vm_pageout_init(void) if (vm_pageout_update_period == 0) vm_pageout_update_period = 600; - if (vm_page_max_wired == 0) - vm_page_max_wired = freecount / 3; + if (vm_page_max_user_wired == 0) + vm_page_max_user_wired = freecount / 3; } /* diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index bae7e937f274..57402801f580 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -75,7 +75,7 @@ * Exported data structures. */ -extern int vm_page_max_wired; +extern u_long vm_page_max_user_wired; extern int vm_pageout_page_count; #define VM_OOM_MEM 1 diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c index 140dabaf2fa3..20135c1d7f20 100644 --- a/sys/vm/vm_swapout.c +++ b/sys/vm/vm_swapout.c @@ -208,12 +208,12 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, goto unlock_return; if (should_yield()) goto unlock_return; - if (vm_page_busied(p)) + + if (vm_page_busied(p) || vm_page_wired(p)) continue; VM_CNT_INC(v_pdpages); vm_page_lock(p); - if (vm_page_held(p) || - !pmap_page_exists_quick(pmap, p)) { + if (!pmap_page_exists_quick(pmap, p)) { vm_page_unlock(p); continue; } @@ -231,8 +231,8 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, p->act_count -= min(p->act_count, ACT_DECLINE); if (!remove_mode && p->act_count == 0) { - pmap_remove_all(p); - vm_page_deactivate(p); + if (vm_page_try_remove_all(p)) + vm_page_deactivate(p); } else vm_page_requeue(p); } else { @@ -243,7 +243,7 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, vm_page_requeue(p); } } else if (vm_page_inactive(p)) - pmap_remove_all(p); + (void)vm_page_try_remove_all(p); vm_page_unlock(p); } if ((backing_object = object->backing_object) == NULL) diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index 58e14b16c93f..ff12c3008444 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -95,13 +95,11 @@ kern_break(struct thread *td, uintptr_t *addr) rlim_t datalim, lmemlim, vmemlim; int prot, rv; int error = 0; - boolean_t do_map_wirefuture; datalim = lim_cur(td, RLIMIT_DATA); lmemlim = lim_cur(td, RLIMIT_MEMLOCK); vmemlim = lim_cur(td, RLIMIT_VMEM); - do_map_wirefuture = FALSE; new = round_page(*addr); vm_map_lock(map); @@ -184,7 +182,14 @@ kern_break(struct thread *td, uintptr_t *addr) if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32)) prot |= VM_PROT_EXECUTE; #endif - rv = vm_map_insert(map, NULL, 0, old, new, prot, VM_PROT_ALL, 0); + rv = vm_map_insert(map, NULL, 0, old, new, prot, VM_PROT_ALL, + 0); + if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) { + rv = vm_map_wire_locked(map, old, new, + VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); + if (rv != KERN_SUCCESS) + vm_map_delete(map, old, new); + } if (rv != KERN_SUCCESS) { #ifdef RACCT if (racct_enable) { @@ -205,17 +210,6 @@ kern_break(struct thread *td, uintptr_t *addr) goto done; } vm->vm_dsize += btoc(new - old); - /* - * Handle the MAP_WIREFUTURE case for legacy applications, - * by marking the newly mapped range of pages as wired. - * We are not required to perform a corresponding - * vm_map_unwire() before vm_map_delete() below, as - * it will forcibly unwire the pages in the range. - * - * XXX If the pages cannot be wired, no error is returned. - */ - if ((map->flags & MAP_WIREFUTURE) == MAP_WIREFUTURE) - do_map_wirefuture = TRUE; } else if (new < old) { rv = vm_map_delete(map, new, old); if (rv != KERN_SUCCESS) { @@ -239,10 +233,6 @@ kern_break(struct thread *td, uintptr_t *addr) done: vm_map_unlock(map); - if (do_map_wirefuture) - (void) vm_map_wire(map, old, new, - VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); - if (error == 0) *addr = new; diff --git a/usr.bin/vmstat/vmstat.c b/usr.bin/vmstat/vmstat.c index 01b258bf668e..e8cec07fd46b 100644 --- a/usr.bin/vmstat/vmstat.c +++ b/usr.bin/vmstat/vmstat.c @@ -156,6 +156,7 @@ static struct __vmmeter { u_int v_free_min; u_int v_free_count; u_int v_wire_count; + u_long v_user_wire_count; u_int v_active_count; u_int v_inactive_target; u_int v_inactive_count; @@ -566,6 +567,7 @@ fill_vmmeter(struct __vmmeter *vmmp) GET_VM_STATS(vm, v_free_min); GET_VM_STATS(vm, v_free_count); GET_VM_STATS(vm, v_wire_count); + GET_VM_STATS(vm, v_user_wire_count); GET_VM_STATS(vm, v_active_count); GET_VM_STATS(vm, v_inactive_target); GET_VM_STATS(vm, v_inactive_count); @@ -1057,6 +1059,8 @@ dosum(void) sum.v_laundry_count); xo_emit("{:wired-pages/%9u} {N:pages wired down}\n", sum.v_wire_count); + xo_emit("{:virtual-user-wired-pages/%9lu} {N:virtual user pages wired " + "down}\n", sum.v_user_wire_count); xo_emit("{:free-pages/%9u} {N:pages free}\n", sum.v_free_count); xo_emit("{:bytes-per-page/%9u} {N:bytes per page}\n", sum.v_page_size);