diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 96fe61b..20dd38f 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -356,7 +356,7 @@ retry: vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(kmem_object); - vm_map_delete(map, addr, addr + size); + vm_map_delete(map, addr, addr + size, FALSE); vm_map_unlock(map); return (0); } @@ -380,11 +380,6 @@ retry: panic("kmem_malloc: entry not found or misaligned"); entry->wired_count = 1; - /* - * At this point, the kmem_object must be unlocked because - * vm_map_simplify_entry() calls vm_object_deallocate(), which - * locks the kmem_object. - */ vm_map_simplify_entry(map, entry); /* @@ -458,7 +453,8 @@ kmem_free_wakeup(map, addr, size) { vm_map_lock(map); - (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); + (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size), + FALSE); if (map->needs_wakeup) { map->needs_wakeup = FALSE; vm_map_wakeup(map); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 32fdfde..2c5821c 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -454,26 +454,50 @@ _vm_map_lock(vm_map_t map, const char *file, int line) } void -_vm_map_unlock(vm_map_t map, const char *file, int line) +_vm_map_unlock(vm_map_t map, boolean_t process_freelist, const char *file, + int line) { vm_map_entry_t free_entry, entry; vm_object_t object; + struct vnode *vp; + struct mount *mp; - free_entry = map->deferred_freelist; - map->deferred_freelist = NULL; + free_entry = NULL; /* to please gcc */ + if (process_freelist) { + free_entry = map->deferred_freelist; + map->deferred_freelist = NULL; + } if (map->system_map) _mtx_unlock_flags(&map->system_mtx, 0, file, line); else _sx_xunlock(&map->lock, file, line); + if (!process_freelist) + return; + while (free_entry != NULL) { entry = free_entry; free_entry = free_entry->next; + KASSERT((entry->eflags & MAP_ENTRY_LINKED) == 0 && + (entry->eflags & MAP_ENTRY_UNLINKED) != 0 && + (entry->eflags & MAP_ENTRY_ON_FREELIST) != 0, + ("vm_map_unlock: eflags %p", entry)); + if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { object = entry->object.vm_object; + mp = NULL; + if (entry->eflags & MAP_ENTRY_VN_WRITECNT) { + vp = object->handle; + vn_start_write(vp, &mp, V_WAIT); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + vp->v_writecount--; + VOP_UNLOCK(vp, 0); + } vm_object_deallocate(object); + if (mp != NULL) + vn_finished_write(mp); } vm_map_entry_dispose(map, entry); @@ -630,7 +654,7 @@ vm_map_unlock_and_wait(vm_map_t map, int timo) { mtx_lock(&map_sleep_mtx); - vm_map_unlock(map); + _vm_map_unlock(map, FALSE, LOCK_FILE, LOCK_LINE); return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", timo)); } @@ -692,6 +716,9 @@ _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) { map->header.next = map->header.prev = &map->header; +#ifdef INVARIANTS + map->header.eflags = MAP_ENTRY_LINKED; +#endif map->needs_wakeup = FALSE; map->system_map = 0; map->min_offset = min; @@ -742,6 +769,135 @@ vm_map_entry_create(vm_map_t map) } /* + * vm_map_entry_inc_vn_wcnt: [ internal use only ] + * + * Increments v_writecount for the vnode that backs writeable + * mapping by the entry. To satisfy lock order between map lock + * and vnode lock, map lock might be dropped. In this case, the + * entry is marked as in transition to prevent entry removal. + + * The increment is postponed for the map entry that is in + * transition, when the transition was not set by the caller. In + * this case, MAP_ENTRY_VN_WRITECNT flag is transfered to + * MAP_ENTRY_DEFER_WRITECNT, and the owner of the transition + * state should call this function before lifting the + * MAP_ENTRY_IN_TRANSITION flag. + * + * When called from vm_map_clip_start of vm_map_entry_clip_end + * and going to drop the map lock, protect both the entry and the + * second part of the clipped region, that is passed in the + * neighbour parameter. + */ +static void +vm_map_entry_inc_vn_wcnt(vm_map_t map, vm_map_entry_t entry, + vm_map_entry_t neighbour, boolean_t transition_owner) +{ + struct vnode *vp; + vm_offset_t start, end, n_start, n_end; + boolean_t clear_transition, need_wakeup; + int last_timestamp; + + VM_MAP_ASSERT_LOCKED(map); + KASSERT((entry->eflags & MAP_ENTRY_LINKED) != 0 && + (entry->eflags & MAP_ENTRY_UNLINKED) == 0 && + (entry->eflags & MAP_ENTRY_ON_FREELIST) == 0, + ("vm_map_entry_inc_vn_wcnt: eflags1 %p", entry)); + KASSERT(neighbour == NULL || entry->eflags == neighbour->eflags, + ("vm_map_entry_inc_vn_wcnt: wrong neighbour")); + + if ((entry->eflags & (MAP_ENTRY_VN_WRITECNT | + MAP_ENTRY_DEFER_WRITECNT)) == 0) + return; + KASSERT((entry->eflags & MAP_ENTRY_VN_WRITECNT) == 0 || + (entry->eflags & MAP_ENTRY_DEFER_WRITECNT) == 0, + ("vm_map_entry_inc_vn_wcnt: both %p", entry)); + KASSERT(!transition_owner || + (entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, + ("transition_owner but no transition %p", entry)); + if (!transition_owner && + (entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) { + if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) { + entry->eflags &= ~MAP_ENTRY_VN_WRITECNT; + entry->eflags |= MAP_ENTRY_DEFER_WRITECNT; + } + return; + } + if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) { + KASSERT(transition_owner, ("not transition owner %p", entry)); + clear_transition = FALSE; + } else { + clear_transition = TRUE; + entry->eflags |= MAP_ENTRY_IN_TRANSITION; + if (neighbour != NULL) + neighbour->eflags |= MAP_ENTRY_IN_TRANSITION; + } + start = entry->start; + end = entry->end; + if (neighbour != NULL) { + n_start = neighbour->start; + n_end = neighbour->end; + } else + n_start = n_end = 0; + vp = entry->object.vm_object->handle; + vhold(vp); + if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) + goto incr; + last_timestamp = map->timestamp; + vm_map_unlock(map); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + vm_map_lock(map); + if (last_timestamp + 1 != map->timestamp) { + if (!vm_map_lookup_entry(map, start, &entry)) + panic("vm_map_entry_inc_vn_wcnt: lost entry"); + if (clear_transition && neighbour != NULL && + !vm_map_lookup_entry(map, n_start, &neighbour)) + panic("vm_map_entry_inc_vn_wcnt: lost neighbour entry"); + } +incr: + for (need_wakeup = FALSE; entry != &map->header && entry->start < end; + entry = entry->next) { + /* + * Only clipping is allowed for an entry marked as in + * transition. Make a single pass to increment + * v_writecount for all clip splinters and fix their + * flags accordingly. + */ + KASSERT(entry->object.vm_object != NULL && + entry->object.vm_object->type == OBJT_VNODE && + entry->object.vm_object->handle == (void *)vp, + ("vm_map_entry_inc_vn_wcnt: not a clip")); + KASSERT((entry->eflags & MAP_ENTRY_LINKED) != 0 && + (entry->eflags & MAP_ENTRY_UNLINKED) == 0 && + (entry->eflags & MAP_ENTRY_ON_FREELIST) == 0, + ("vm_map_entry_inc_vn_wcnt: eflags2 %p", entry)); + vp->v_writecount += 1; + entry->eflags &= ~MAP_ENTRY_DEFER_WRITECNT; + entry->eflags |= MAP_ENTRY_VN_WRITECNT; + if (clear_transition) { + if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { + entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; + need_wakeup = TRUE; + } + entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; + } + } + VOP_UNLOCK(vp, 0); + vdrop(vp); + if (clear_transition && neighbour != NULL) { + for (; neighbour != &map->header && neighbour->start < n_end; + neighbour = neighbour->next) { + if (neighbour->eflags & MAP_ENTRY_NEEDS_WAKEUP) { + neighbour->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; + need_wakeup = TRUE; + } + neighbour->eflags &= ~MAP_ENTRY_IN_TRANSITION; + } + } + if (need_wakeup) + vm_map_wakeup(map); +} + +/* * vm_map_entry_set_behavior: * * Set the expected access behavior, either normal, random, or @@ -919,6 +1075,9 @@ vm_map_entry_link(vm_map_t map, entry->next->start) - entry->end; vm_map_entry_set_max_free(entry); map->root = entry; +#ifdef INVARIANTS + entry->eflags |= MAP_ENTRY_LINKED; +#endif } static void @@ -928,6 +1087,11 @@ vm_map_entry_unlink(vm_map_t map, vm_map_entry_t next, prev, root; VM_MAP_ASSERT_LOCKED(map); + KASSERT((entry->eflags & MAP_ENTRY_LINKED) != 0 && + (entry->eflags & MAP_ENTRY_UNLINKED) == 0 && + (entry->eflags & MAP_ENTRY_ON_FREELIST) == 0, + ("vm_map_entry_unlink: eflags %p", entry)); + if (entry != map->root) vm_map_entry_splay(entry->start, map->root); if (entry->left == NULL) @@ -948,6 +1112,10 @@ vm_map_entry_unlink(vm_map_t map, map->nentries--; CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, map->nentries, entry); +#ifdef INVARIANTS + entry->eflags &= ~MAP_ENTRY_LINKED; + entry->eflags |= MAP_ENTRY_UNLINKED; +#endif } /* @@ -1005,6 +1173,10 @@ vm_map_lookup_entry( *entry = &map->header; else if (address >= cur->start && cur->end > address) { *entry = cur; + KASSERT(((*entry)->eflags & MAP_ENTRY_LINKED) != 0 && + ((*entry)->eflags & MAP_ENTRY_UNLINKED) == 0 && + ((*entry)->eflags & MAP_ENTRY_ON_FREELIST) == 0, + ("vm_map_lookup_entry: eflags %p", *entry)); return (TRUE); } else if ((locked = vm_map_locked(map)) || sx_try_upgrade(&map->lock)) { @@ -1025,6 +1197,10 @@ vm_map_lookup_entry( */ if (address >= cur->start) { *entry = cur; + KASSERT(((*entry)->eflags & MAP_ENTRY_LINKED) != 0 && + ((*entry)->eflags & MAP_ENTRY_UNLINKED) == 0 && + ((*entry)->eflags & MAP_ENTRY_ON_FREELIST) == 0, + ("vm_map_lookup_entry: eflags %p", *entry)); if (cur->end > address) return (TRUE); } else @@ -1043,6 +1219,10 @@ vm_map_lookup_entry( cur = cur->left; } else if (cur->end > address) { *entry = cur; + KASSERT(((*entry)->eflags & MAP_ENTRY_LINKED) != 0 && + ((*entry)->eflags & MAP_ENTRY_UNLINKED) == 0 && + ((*entry)->eflags & MAP_ENTRY_ON_FREELIST) == 0, + ("vm_map_lookup_entry: eflags %p", *entry)); return (TRUE); } else { if (cur->right == NULL) { @@ -1052,6 +1232,10 @@ vm_map_lookup_entry( cur = cur->right; } } + KASSERT(((*entry)->eflags & MAP_ENTRY_LINKED) != 0 && + ((*entry)->eflags & MAP_ENTRY_UNLINKED) == 0 && + ((*entry)->eflags & MAP_ENTRY_ON_FREELIST) == 0, + ("vm_map_lookup_entry: eflags %p", *entry)); return (FALSE); } @@ -1076,6 +1260,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_map_entry_t prev_entry; vm_map_entry_t temp_entry; vm_eflags_t protoeflags; + struct vnode *vp; VM_MAP_ASSERT_LOCKED(map); @@ -1195,6 +1380,14 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, */ vm_map_entry_link(map, prev_entry, new_entry); map->size += new_entry->end - new_entry->start; + if (object != NULL && object->type == OBJT_VNODE && + (new_entry->max_protection & VM_PROT_WRITE) != 0 && + (cow & MAP_COPY_ON_WRITE) == 0) { + vp = object->handle; + ASSERT_VOP_ELOCKED(vp, "vm_map_insert inc v_writecount"); + vp->v_writecount++; + new_entry->eflags |= MAP_ENTRY_VN_WRITECNT; + } #if 0 /* @@ -1321,7 +1514,7 @@ vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset, end = start + length; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); - (void) vm_map_delete(map, start, end); + (void) vm_map_delete(map, start, end, TRUE); result = vm_map_insert(map, object, offset, start, end, prot, max, cow); vm_map_unlock(map); @@ -1384,6 +1577,11 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) vm_map_entry_t next, prev; vm_size_t prevsize, esize; + KASSERT((entry->eflags & MAP_ENTRY_LINKED) != 0 && + (entry->eflags & MAP_ENTRY_UNLINKED) == 0 && + (entry->eflags & MAP_ENTRY_ON_FREELIST) == 0, + ("vm_map_simplify_entry: eflags %p", entry)); + if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) return; @@ -1405,18 +1603,11 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) if (entry->prev != &map->header) vm_map_entry_resize_free(map, entry->prev); - /* - * If the backing object is a vnode object, - * vm_object_deallocate() calls vrele(). - * However, vrele() does not lock the vnode - * because the vnode has additional - * references. Thus, the map lock can be kept - * without causing a lock-order reversal with - * the vnode lock. - */ - if (prev->object.vm_object) - vm_object_deallocate(prev->object.vm_object); - vm_map_entry_dispose(map, prev); + prev->next = map->deferred_freelist; + map->deferred_freelist = prev; +#ifdef INVARIANTS + prev->eflags |= MAP_ENTRY_ON_FREELIST; +#endif } } @@ -1436,12 +1627,11 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) entry->end = next->end; vm_map_entry_resize_free(map, entry); - /* - * See comment above. - */ - if (next->object.vm_object) - vm_object_deallocate(next->object.vm_object); - vm_map_entry_dispose(map, next); + next->next = map->deferred_freelist; + map->deferred_freelist = next; +#ifdef INVARIANTS + next->eflags |= MAP_ENTRY_ON_FREELIST; +#endif } } } @@ -1452,10 +1642,10 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) * the specified address; if necessary, * it splits the entry into two. */ -#define vm_map_clip_start(map, entry, startaddr) \ +#define vm_map_clip_start(map, entry, startaddr) \ { \ - if (startaddr > entry->start) \ - _vm_map_clip_start(map, entry, startaddr); \ + if (startaddr > (*entry)->start) \ + _vm_map_clip_start(map, entry, startaddr); \ } /* @@ -1463,10 +1653,14 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) * the entry must be split. */ static void -_vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) +_vm_map_clip_start(vm_map_t map, vm_map_entry_t *pentry, vm_offset_t start) { - vm_map_entry_t new_entry; + vm_map_entry_t entry, new_entry; + int last_timestamp; + entry = *pentry; + KASSERT((entry->eflags & MAP_ENTRY_IS_HOLDER) == 0, + ("Holder %p in vm_map_clip_start", entry)); VM_MAP_ASSERT_LOCKED(map); /* @@ -1486,7 +1680,7 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) if (entry->object.vm_object == NULL && !map->system_map) { vm_object_t object; object = vm_object_allocate(OBJT_DEFAULT, - atop(entry->end - entry->start)); + atop(entry->end - entry->start)); entry->object.vm_object = object; entry->offset = 0; } @@ -1502,6 +1696,13 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { vm_object_reference(new_entry->object.vm_object); + last_timestamp = map->timestamp; + vm_map_entry_inc_vn_wcnt(map, new_entry, entry, FALSE); + if (last_timestamp != map->timestamp && + last_timestamp + 1 != map->timestamp) { + if (!vm_map_lookup_entry(map, start, pentry)) + panic("vm_map_clip_start: lost entry"); + } } } @@ -1514,7 +1715,7 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) */ #define vm_map_clip_end(map, entry, endaddr) \ { \ - if ((endaddr) < (entry->end)) \ + if ((endaddr) < ((*entry)->end)) \ _vm_map_clip_end((map), (entry), (endaddr)); \ } @@ -1523,9 +1724,20 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) * the entry must be split. */ static void -_vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) +_vm_map_clip_end(vm_map_t map, vm_map_entry_t *pentry, vm_offset_t end) { - vm_map_entry_t new_entry; + vm_map_entry_t entry, new_entry; + int last_timestamp; + vm_offset_t entry_start; + + entry = *pentry; + KASSERT((entry->eflags & MAP_ENTRY_IS_HOLDER) == 0, + ("Holder %p in vm_map_clip_end", entry)); + KASSERT((entry->eflags & MAP_ENTRY_LINKED) != 0 && + (entry->eflags & MAP_ENTRY_UNLINKED) == 0 && + (entry->eflags & MAP_ENTRY_ON_FREELIST) == 0, + ("vm_map_clip_end: eflags %p", entry)); + VM_MAP_ASSERT_LOCKED(map); @@ -1557,6 +1769,14 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { vm_object_reference(new_entry->object.vm_object); + entry_start = entry->start; + last_timestamp = map->timestamp; + vm_map_entry_inc_vn_wcnt(map, new_entry, entry, FALSE); + if (last_timestamp != map->timestamp && + last_timestamp + 1 != map->timestamp) { + if (!vm_map_lookup_entry(map, entry_start, pentry)) + panic("vm_map_clip_end: lost entry"); + } } } @@ -1593,11 +1813,11 @@ vm_map_submap( VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { - vm_map_clip_start(map, entry, start); + vm_map_clip_start(map, &entry, start); } else entry = entry->next; - vm_map_clip_end(map, entry, end); + vm_map_clip_end(map, &entry, end); if ((entry->start == start) && (entry->end == end) && ((entry->eflags & MAP_ENTRY_COW) == 0) && @@ -1732,7 +1952,10 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { - vm_map_clip_start(map, entry, start); + if (entry->eflags & MAP_ENTRY_IS_HOLDER) + entry = entry->next; + else + vm_map_clip_start(map, &entry, start); } else { entry = entry->next; } @@ -1746,7 +1969,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_map_unlock(map); return (KERN_INVALID_ARGUMENT); } - if ((new_prot & current->max_protection) != new_prot) { + if (!(current->eflags & MAP_ENTRY_IS_HOLDER) && + (new_prot & current->max_protection) != new_prot) { vm_map_unlock(map); return (KERN_PROTECTION_FAILURE); } @@ -1761,7 +1985,12 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, while ((current != &map->header) && (current->start < end)) { vm_prot_t old_prot; - vm_map_clip_end(map, current, end); + if (current->eflags & MAP_ENTRY_IS_HOLDER) { + current = current->next; + continue; + } + + vm_map_clip_end(map, ¤t, end); old_prot = current->protection; if (set_max) @@ -1840,8 +2069,12 @@ vm_map_madvise( VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { - if (modify_map) - vm_map_clip_start(map, entry, start); + if (modify_map) { + if (entry->eflags & MAP_ENTRY_IS_HOLDER) + entry = entry->next; + else + vm_map_clip_start(map, &entry, start); + } } else { entry = entry->next; } @@ -1857,10 +2090,11 @@ vm_map_madvise( (current != &map->header) && (current->start < end); current = current->next ) { - if (current->eflags & MAP_ENTRY_IS_SUB_MAP) + if ((current->eflags & (MAP_ENTRY_IS_SUB_MAP | + MAP_ENTRY_IS_HOLDER))) continue; - vm_map_clip_end(map, current, end); + vm_map_clip_end(map, ¤t, end); switch (behav) { case MADV_NORMAL: @@ -1907,7 +2141,8 @@ vm_map_madvise( ) { vm_offset_t useStart; - if (current->eflags & MAP_ENTRY_IS_SUB_MAP) + if ((current->eflags & (MAP_ENTRY_IS_SUB_MAP | + MAP_ENTRY_IS_HOLDER))) continue; pindex = OFF_TO_IDX(current->offset); @@ -1969,15 +2204,18 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, } vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); - if (vm_map_lookup_entry(map, start, &temp_entry)) { + if (vm_map_lookup_entry(map, start, &temp_entry) && + (temp_entry->eflags & MAP_ENTRY_IS_HOLDER) == 0) { entry = temp_entry; - vm_map_clip_start(map, entry, start); + vm_map_clip_start(map, &entry, start); } else entry = temp_entry->next; while ((entry != &map->header) && (entry->start < end)) { - vm_map_clip_end(map, entry, end); - entry->inheritance = new_inheritance; - vm_map_simplify_entry(map, entry); + if (!(entry->eflags & MAP_ENTRY_IS_HOLDER)) { + vm_map_clip_end(map, &entry, end); + entry->inheritance = new_inheritance; + vm_map_simplify_entry(map, entry); + } entry = entry->next; } vm_map_unlock(map); @@ -2059,8 +2297,8 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, last_timestamp = map->timestamp; continue; } - vm_map_clip_start(map, entry, start); - vm_map_clip_end(map, entry, end); + vm_map_clip_start(map, &entry, start); + vm_map_clip_end(map, &entry, end); /* * Mark the entry in case the map lock is released. (See * above.) @@ -2100,6 +2338,10 @@ done: } entry = first_entry; while (entry != &map->header && entry->start < end) { + if ((entry->eflags & MAP_ENTRY_IS_HOLDER) != 0) { + entry = entry->next; + continue; + } if (rv == KERN_SUCCESS && (!user_unwire || (entry->eflags & MAP_ENTRY_USER_WIRED))) { if (user_unwire) @@ -2116,6 +2358,8 @@ done: } KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("vm_map_unwire: in-transition flag missing")); + if (entry->eflags & MAP_ENTRY_DEFER_WRITECNT) + vm_map_entry_inc_vn_wcnt(map, entry, NULL, TRUE); entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; @@ -2205,8 +2449,8 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, last_timestamp = map->timestamp; continue; } - vm_map_clip_start(map, entry, start); - vm_map_clip_end(map, entry, end); + vm_map_clip_start(map, &entry, start); + vm_map_clip_end(map, &entry, end); /* * Mark the entry in case the map lock is released. (See * above.) @@ -2305,6 +2549,10 @@ done: } entry = first_entry; while (entry != &map->header && entry->start < end) { + if ((entry->eflags & MAP_ENTRY_IS_HOLDER) != 0) { + entry = entry->next; + continue; + } if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) goto next_entry_done; if (rv == KERN_SUCCESS) { @@ -2332,6 +2580,8 @@ done: next_entry_done: KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("vm_map_wire: in-transition flag missing")); + if (entry->eflags & MAP_ENTRY_DEFER_WRITECNT) + vm_map_entry_inc_vn_wcnt(map, entry, NULL, TRUE); entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION|MAP_ENTRY_WIRE_SKIPPED); if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; @@ -2506,21 +2756,30 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) * map. */ int -vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) +vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end, + boolean_t fixed) { - vm_map_entry_t entry; - vm_map_entry_t first_entry; + vm_map_entry_t entry, first_entry, tmp_entry, holder_entry; + unsigned int last_timestamp; + vm_offset_t saved_start; VM_MAP_ASSERT_LOCKED(map); /* * Find the start of the region, and clip it */ +reclip_start: if (!vm_map_lookup_entry(map, start, &first_entry)) entry = first_entry->next; else { entry = first_entry; - vm_map_clip_start(map, entry, start); + if (entry->eflags & MAP_ENTRY_IS_HOLDER) { + entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; + (void) vm_map_unlock_and_wait(map, 0); + vm_map_lock(map); + goto reclip_start; + } + vm_map_clip_start(map, &entry, start); } /* @@ -2529,6 +2788,7 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) while ((entry != &map->header) && (entry->start < end)) { vm_map_entry_t next; + saved_start = entry->start; /* * Wait for wiring or unwiring of an entry to complete. * Also wait for any system wirings to disappear on @@ -2537,15 +2797,51 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 || (vm_map_pmap(map) != kernel_pmap && vm_map_entry_system_wired_count(entry) != 0)) { - unsigned int last_timestamp; - vm_offset_t saved_start; - vm_map_entry_t tmp_entry; - - saved_start = entry->start; entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; last_timestamp = map->timestamp; + + if (start < saved_start && fixed) { + /* + * Create the transient entry that + * holds already freed address space. + */ + holder_entry = vm_map_entry_create(map); + holder_entry->start = start; + holder_entry->end = saved_start; + holder_entry->object.vm_object = NULL; + holder_entry->offset = 0; + holder_entry->eflags = MAP_ENTRY_IN_TRANSITION | + MAP_ENTRY_IS_HOLDER; + holder_entry->protection = VM_PROT_NONE; + holder_entry->max_protection = VM_PROT_NONE; + holder_entry->inheritance = VM_INHERIT_NONE; + holder_entry->wired_count = 0; + vm_map_entry_link(map, entry->prev, holder_entry); + } (void) vm_map_unlock_and_wait(map, 0); vm_map_lock(map); + + if (start < saved_start && fixed) { + + /* + * Holder entry may be clipped, but + * not removed. + */ + if (!vm_map_lookup_entry(map, start, &holder_entry)) + panic("vm_map_delete: lost holder"); + while (holder_entry->start < saved_start) { + tmp_entry = holder_entry->next; + KASSERT(holder_entry->eflags & MAP_ENTRY_IS_HOLDER, + ("vm_map_delete: not holder %p", holder_entry)); + if (holder_entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) + vm_map_wakeup(map); + if (holder_entry->object.vm_object != NULL) + vm_object_deallocate(holder_entry->object.vm_object); + vm_map_entry_unlink(map, holder_entry); + vm_map_entry_dispose(map, holder_entry); + holder_entry = tmp_entry; + } + } if (last_timestamp + 1 != map->timestamp) { /* * Look again for the entry because the map was @@ -2558,13 +2854,13 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) entry = tmp_entry->next; else { entry = tmp_entry; - vm_map_clip_start(map, entry, + vm_map_clip_start(map, &entry, saved_start); } } continue; } - vm_map_clip_end(map, entry, end); + vm_map_clip_end(map, &entry, end); next = entry->next; @@ -2584,10 +2880,21 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) * page frames may be reallocated, and any modify bits * will be set in the wrong object!) */ + last_timestamp = map->timestamp; vm_map_entry_delete(map, entry); entry->next = map->deferred_freelist; map->deferred_freelist = entry; - entry = next; +#ifdef INVARIANTS + entry->eflags |= MAP_ENTRY_ON_FREELIST; +#endif + if (last_timestamp + 1 == map->timestamp) + entry = next; + else if (!vm_map_lookup_entry(map, saved_start, &tmp_entry)) + entry = tmp_entry->next; + else { + entry = tmp_entry; + vm_map_clip_start(map, &entry, saved_start); + } } return (KERN_SUCCESS); } @@ -2605,7 +2912,7 @@ vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); - result = vm_map_delete(map, start, end); + result = vm_map_delete(map, start, end, FALSE); vm_map_unlock(map); return (result); } @@ -2884,11 +3191,22 @@ vmspace_fork(struct vmspace *vm1) } old_entry = old_entry->next; } + unlock_and_return: vm_map_unlock(old_map); - if (vm2 != NULL) + if (vm2 != NULL) { + for (new_entry = new_map->header.next; + new_entry != &new_map->header; new_entry = new_entry->next) { + if ((new_entry->eflags & MAP_ENTRY_DEFER_WRITECNT) != 0) { + new_entry->eflags &= ~MAP_ENTRY_DEFER_WRITECNT; + new_entry->eflags |= MAP_ENTRY_VN_WRITECNT; + } + if ((new_entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) + vm_map_entry_inc_vn_wcnt(new_map, new_entry, + NULL, FALSE); + } vm_map_unlock(new_map); - + } return (vm2); } @@ -2973,8 +3291,13 @@ vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, /* Now set the avail_ssize amount. */ if (rv == KERN_SUCCESS) { - if (prev_entry != &map->header) - vm_map_clip_end(map, prev_entry, bot); + if (prev_entry != &map->header) { + if (prev_entry->eflags & MAP_ENTRY_IS_HOLDER) { + vm_map_unlock(map); + return (KERN_NO_SPACE); + } + vm_map_clip_end(map, &prev_entry, bot); + } new_entry = prev_entry->next; if (new_entry->end != top || new_entry->start != bot) panic("Bad entry start/end for new stack entry"); @@ -3144,8 +3467,16 @@ Retry: /* Adjust the available stack space by the amount we grew. */ if (rv == KERN_SUCCESS) { - if (prev_entry != &map->header) - vm_map_clip_end(map, prev_entry, addr); + if (prev_entry != &map->header) { + if ((prev_entry->eflags & MAP_ENTRY_IS_HOLDER) && + (addr < prev_entry->end)) { + vm_map_delete(map, addr, + stack_entry->start, FALSE); + vm_map_unlock(map); + return (KERN_NO_SPACE); + } + vm_map_clip_end(map, &prev_entry, addr); + } new_entry = prev_entry->next; KASSERT(new_entry == stack_entry->prev, ("foo")); KASSERT(new_entry->end == stack_entry->start, ("foo")); @@ -3179,6 +3510,13 @@ Retry: stack_entry->offset, (vm_size_t)(stack_entry->end - stack_entry->start), (vm_size_t)grow_amount)) { + if (next_entry != &map->header && + (next_entry->eflags & MAP_ENTRY_IS_HOLDER) && + (addr > next_entry->start)) { + vm_map_unlock(map); + return (KERN_NO_SPACE); + } + map->size += (addr - stack_entry->end); /* Update the current entry. */ stack_entry->end = addr; @@ -3187,7 +3525,7 @@ Retry: rv = KERN_SUCCESS; if (next_entry != &map->header) - vm_map_clip_start(map, next_entry, addr); + vm_map_clip_start(map, &next_entry, addr); } else rv = KERN_FAILURE; } diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 70c3a0b..f2c4fd3 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -133,11 +133,22 @@ struct vm_map_entry { #define MAP_ENTRY_IN_TRANSITION 0x0100 /* entry being changed */ #define MAP_ENTRY_NEEDS_WAKEUP 0x0200 /* waiters in transition */ #define MAP_ENTRY_NOCOREDUMP 0x0400 /* don't include in a core */ +#define MAP_ENTRY_IS_HOLDER 0x0800 /* holds space for vm_map_fixed */ #define MAP_ENTRY_GROWS_DOWN 0x1000 /* Top-down stacks */ #define MAP_ENTRY_GROWS_UP 0x2000 /* Bottom-up stacks */ -#define MAP_ENTRY_WIRE_SKIPPED 0x4000 +#define MAP_ENTRY_VN_WRITECNT 0x4000 /* writeable mapping for a vnode */ +#define MAP_ENTRY_DEFER_WRITECNT 0x8000 /* increment writecount when + dropping the transitioning state */ +#ifdef INVARIANTS +#define MAP_ENTRY_ON_FREELIST 0x10000 +#define MAP_ENTRY_LINKED 0x20000 +#define MAP_ENTRY_UNLINKED 0x40000 +#endif + +#define MAP_ENTRY_WIRE_SKIPPED 0x80000 + #ifdef _KERNEL static __inline u_char @@ -265,7 +276,8 @@ vmspace_pmap(struct vmspace *vmspace) */ void _vm_map_lock(vm_map_t map, const char *file, int line); -void _vm_map_unlock(vm_map_t map, const char *file, int line); +void _vm_map_unlock(vm_map_t map, boolean_t process_freelist, const char *file, + int line); void _vm_map_lock_read(vm_map_t map, const char *file, int line); void _vm_map_unlock_read(vm_map_t map, const char *file, int line); int _vm_map_trylock(vm_map_t map, const char *file, int line); @@ -277,7 +289,7 @@ int vm_map_unlock_and_wait(vm_map_t map, int timo); void vm_map_wakeup(vm_map_t map); #define vm_map_lock(map) _vm_map_lock(map, LOCK_FILE, LOCK_LINE) -#define vm_map_unlock(map) _vm_map_unlock(map, LOCK_FILE, LOCK_LINE) +#define vm_map_unlock(map) _vm_map_unlock(map, TRUE, LOCK_FILE, LOCK_LINE) #define vm_map_lock_read(map) _vm_map_lock_read(map, LOCK_FILE, LOCK_LINE) #define vm_map_unlock_read(map) _vm_map_unlock_read(map, LOCK_FILE, LOCK_LINE) #define vm_map_trylock(map) _vm_map_trylock(map, LOCK_FILE, LOCK_LINE) @@ -339,7 +351,7 @@ long vmspace_wired_count(struct vmspace *vmspace); #ifdef _KERNEL boolean_t vm_map_check_protection (vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t); vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t); -int vm_map_delete(vm_map_t, vm_offset_t, vm_offset_t); +int vm_map_delete(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); int vm_map_find(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t, int, vm_prot_t, vm_prot_t, int); int vm_map_fixed(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_size_t, @@ -362,7 +374,7 @@ void vm_map_startup (void); int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t, vm_map_t); int vm_map_sync(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t); int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int); -void vm_map_simplify_entry (vm_map_t, vm_map_entry_t); +void vm_map_simplify_entry(vm_map_t, vm_map_entry_t); void vm_init2 (void); int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int); int vm_map_growstack (struct proc *p, vm_offset_t addr); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 6cc0acc..e5ab46e 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -117,7 +117,7 @@ vmmapentry_rsrc_init(dummy) } static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, - int *, struct vnode *, vm_ooffset_t, vm_object_t *); + int *, struct vnode *, vm_ooffset_t, vm_object_t *, struct vnode **, int *); static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, struct cdev *, vm_ooffset_t, vm_object_t *); static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, @@ -593,7 +593,7 @@ munmap(td, uap) } #endif /* returns nothing but KERN_SUCCESS anyway */ - vm_map_delete(map, addr, addr + size); + vm_map_delete(map, addr, addr + size, FALSE); vm_map_unlock(map); return (0); } @@ -1142,7 +1142,8 @@ munlock(td, uap) int vm_mmap_vnode(struct thread *td, vm_size_t objsize, vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, - struct vnode *vp, vm_ooffset_t foff, vm_object_t *objp) + struct vnode *vp, vm_ooffset_t foff, vm_object_t *objp, + struct vnode **rvp, int *vfslocked) { struct vattr va; void *handle; @@ -1150,14 +1151,22 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize, struct mount *mp; struct cdevsw *dsw; struct ucred *cred; - int error, flags, type; - int vfslocked; + int error, flags, type, locktype; mp = vp->v_mount; cred = td->td_ucred; - vfslocked = VFS_LOCK_GIANT(mp); - if ((error = vget(vp, LK_SHARED, td)) != 0) { - VFS_UNLOCK_GIANT(vfslocked); + + /* + * Need to bump v_writecount for shared writable mapping in + * vm_map_insert. + */ + if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) + locktype = LK_EXCLUSIVE; + else + locktype = LK_SHARED; + *vfslocked = VFS_LOCK_GIANT(mp); + if ((error = vget(vp, locktype, td)) != 0) { + VFS_UNLOCK_GIANT(*vfslocked); return (error); } flags = *flagsp; @@ -1173,13 +1182,15 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize, if (obj->handle != vp) { vput(vp); vp = (struct vnode*)obj->handle; - vget(vp, LK_SHARED, td); + vget(vp, locktype, td); } type = OBJT_VNODE; handle = vp; + *rvp = vp; } else if (vp->v_type == VCHR) { type = OBJT_DEVICE; handle = vp->v_rdev; + *rvp = vp; dsw = dev_refthread(handle); if (dsw == NULL) { @@ -1250,8 +1261,10 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize, vfs_mark_atime(vp, cred); done: - vput(vp); - VFS_UNLOCK_GIANT(vfslocked); + if (error != 0) { + vput(vp); + VFS_UNLOCK_GIANT(*vfslocked); + } return (error); } @@ -1355,8 +1368,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, boolean_t fitit; vm_object_t object = NULL; int rv = KERN_SUCCESS; - int docow, error; + int docow, error, vfslocked; struct thread *td = curthread; + struct vnode *vp; if (size == 0) return (0); @@ -1390,6 +1404,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, return (EINVAL); fitit = FALSE; } + vfslocked = 0; + vp = NULL; + /* * Lookup/allocate object. */ @@ -1400,7 +1417,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, break; case OBJT_VNODE: error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, - handle, foff, &object); + handle, foff, &object, &vp, &vfslocked); break; case OBJT_SWAP: error = vm_mmap_shm(td, size, prot, &maxprot, &flags, @@ -1448,6 +1465,10 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, rv = vm_map_fixed(map, object, foff, *addr, size, prot, maxprot, docow); + if (vp != NULL) { + vput(vp); + VFS_UNLOCK_GIANT(vfslocked); + } if (rv != KERN_SUCCESS) { /* * Lose the object reference. Will destroy the diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index 6bb9917..3bd2ddd 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -138,7 +138,7 @@ obreak(td, uap) do_map_wirefuture = TRUE; } } else if (new < old) { - rv = vm_map_delete(&vm->vm_map, new, old); + rv = vm_map_delete(&vm->vm_map, new, old, FALSE); if (rv != KERN_SUCCESS) { error = ENOMEM; goto done;