diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 8f30c5b93828..8d1e4b59512f 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1052,6 +1052,8 @@ vm_fault_next(struct faultstate *fs) { vm_object_t next_object; + VM_OBJECT_ASSERT_WLOCKED(fs->object); + /* * The requested page does not exist at this object/ * offset. Remove the invalid page from the object, @@ -1072,13 +1074,11 @@ vm_fault_next(struct faultstate *fs) * Move on to the next object. Lock the next object before * unlocking the current one. */ - VM_OBJECT_ASSERT_WLOCKED(fs->object); next_object = fs->object->backing_object; if (next_object == NULL) return (false); MPASS(fs->first_m != NULL); KASSERT(fs->object != next_object, ("object loop %p", next_object)); - VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs->object != fs->first_object) vm_object_pip_wakeup(fs->object); @@ -1348,7 +1348,7 @@ vm_fault_getpages(struct faultstate *fs, int *behindp, int *aheadp) * page except, perhaps, to pmap it. */ static void -vm_fault_busy_sleep(struct faultstate *fs) +vm_fault_busy_sleep_impl(struct faultstate *fs, bool objlocked) { /* * Reference the page before unlocking and @@ -1362,13 +1362,32 @@ vm_fault_busy_sleep(struct faultstate *fs) } vm_object_pip_wakeup(fs->object); unlock_map(fs); - if (fs->m != vm_page_lookup(fs->object, fs->pindex) || - !vm_page_busy_sleep(fs->m, "vmpfw", 0)) - VM_OBJECT_WUNLOCK(fs->object); + if (objlocked) { + if (fs->m != vm_page_lookup(fs->object, fs->pindex) || + !vm_page_busy_sleep(fs->m, "vmpfw", 0)) + VM_OBJECT_WUNLOCK(fs->object); + } else { + vm_page_busy_sleep_unlocked(fs->object, fs->m, fs->pindex, + "vmpfwu", 0); + } VM_CNT_INC(v_intrans); vm_object_deallocate(fs->first_object); } +static void +vm_fault_busy_sleep(struct faultstate *fs) +{ + + vm_fault_busy_sleep_impl(fs, true); +} + +static void +vm_fault_busy_sleep_unlocked(struct faultstate *fs) +{ + + vm_fault_busy_sleep_impl(fs, false); +} + /* * Handle page lookup, populate, allocate, page-in for the current * object. @@ -1383,6 +1402,8 @@ vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp) enum fault_status res; bool dead; + VM_OBJECT_ASSERT_WLOCKED(fs->object); + /* * If the object is marked for imminent termination, we retry * here, since the collapse pass has raced with us. Otherwise, @@ -1417,7 +1438,6 @@ vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp) return (FAULT_SOFT); } } - VM_OBJECT_ASSERT_WLOCKED(fs->object); /* * Page is not resident. If the pager might contain the page @@ -1458,6 +1478,34 @@ vm_fault_object(struct faultstate *fs, int *behindp, int *aheadp) return (res); } +static enum fault_status +vm_fault_object_unlocked(struct faultstate *fs) +{ + + VM_OBJECT_ASSERT_UNLOCKED(fs->object); + + fs->m = vm_page_lookup_unlocked(fs->object, fs->pindex); + if (fs->m == NULL || !vm_page_all_valid(fs->m)) + goto out_continue; + + if (vm_page_trybusy_unlocked(fs->m, fs->object, fs->pindex, 0) != + BUSY_UNLOCKED_SUCCESS) { + vm_fault_busy_sleep_unlocked(fs); + return (FAULT_RESTART); + } + + if (vm_page_all_valid(fs->m) && + (fs->object->flags & OBJ_DEAD) == 0) + return (FAULT_SOFT); + + vm_page_xunbusy(fs->m); + +out_continue: + fs->m = NULL; + VM_OBJECT_WLOCK(fs->object); + return (FAULT_CONTINUE); +} + int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) @@ -1582,27 +1630,47 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, case FAULT_CONTINUE: break; default: - panic("vm_fault: Unhandled status %d", res); + panic("vm_fault: Unhandled vm_fault_object status %d", res); } /* - * The page was not found in the current object. Try to - * traverse into a backing object or zero fill if none is - * found. + * The page was not found in the current object. + * Traverse into a backing object if there is one. */ - if (vm_fault_next(&fs)) - continue; - if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) { - if (fs.first_object == fs.object) - fault_page_free(&fs.first_m); - unlock_and_deallocate(&fs); - return (KERN_OUT_OF_BOUNDS); + if (!vm_fault_next(&fs)) { + /* + * No backing object, zero fill if requested. + */ + if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) { + if (fs.first_object == fs.object) + fault_page_free(&fs.first_m); + unlock_and_deallocate(&fs); + return (KERN_OUT_OF_BOUNDS); + } + VM_OBJECT_WUNLOCK(fs.object); + vm_fault_zerofill(&fs); + /* + * Don't try to prefault neighboring pages. + */ + faultcount = 1; + break; + } + + /* + * Got a backing object. Try a lockless lookup and fall back + * to a locked variant if we fail. + */ + res = vm_fault_object_unlocked(&fs); + switch (res) { + case FAULT_SOFT: + goto found; + case FAULT_RESTART: + goto RetryFault; + case FAULT_CONTINUE: + break; + default: + panic("vm_fault: Unhandled vm_fault_object_unlocked status %d", res); } - VM_OBJECT_WUNLOCK(fs.object); - vm_fault_zerofill(&fs); - /* Don't try to prefault neighboring pages. */ - faultcount = 1; - break; } found: diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index e7500e9d3e71..710de357a174 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -169,6 +169,7 @@ static uma_zone_t fakepg_zone; static void vm_page_alloc_check(vm_page_t m); static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked); +static void vm_page_busy_release(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(vm_page_t m, uint8_t queue); static bool vm_page_free_prep(vm_page_t m); @@ -841,6 +842,29 @@ vm_page_trybusy(vm_page_t m, int allocflags) return (vm_page_tryxbusy(m)); } +/* + * vm_page_trybusy_unlocked + * + * Helper routine for busying a page without holding the object lock. + * + * See enum busy_unlocked_status for error codes. + */ +enum busy_unlocked_status +vm_page_trybusy_unlocked(vm_page_t m, vm_object_t object, vm_pindex_t pindex, + int allocflags) +{ + + if (!vm_page_trybusy(m, allocflags)) + return (BUSY_UNLOCKED_ISBUSY); + + if (m->object != object || m->pindex != pindex) { + vm_page_busy_release(m); + return (BUSY_UNLOCKED_WRONGIDENT); + } + + return (BUSY_UNLOCKED_SUCCESS); +} + /* * vm_page_tryacquire * @@ -4559,6 +4583,7 @@ static bool vm_page_acquire_unlocked(vm_object_t object, vm_pindex_t pindex, vm_page_t prev, vm_page_t *mp, int allocflags) { + enum busy_unlocked_status us; vm_page_t m; vm_page_grab_check(allocflags); @@ -4583,14 +4608,15 @@ vm_page_acquire_unlocked(vm_object_t object, vm_pindex_t pindex, } if (m == NULL) return (true); - if (vm_page_trybusy(m, allocflags)) { - if (m->object == object && m->pindex == pindex) - break; + us = vm_page_trybusy_unlocked(m, object, pindex, allocflags); + if (us == BUSY_UNLOCKED_SUCCESS) + break; + else if (us == BUSY_UNLOCKED_WRONGIDENT) { /* relookup. */ - vm_page_busy_release(m); cpu_spinwait(); continue; } + MPASS(us == BUSY_UNLOCKED_ISBUSY); if (!vm_page_grab_sleep(object, m, pindex, "pgnslp", allocflags, false)) return (false); diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index d2f6a471e8b0..9983a5ef2b93 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -595,6 +595,14 @@ malloc2vm_flags(int malloc_flags) #define PS_ALL_VALID 0x2 #define PS_NONE_BUSY 0x4 +enum busy_unlocked_status { + BUSY_UNLOCKED_SUCCESS, + BUSY_UNLOCKED_WRONGIDENT, + BUSY_UNLOCKED_ISBUSY, +}; + +enum busy_unlocked_status vm_page_trybusy_unlocked(vm_page_t m, vm_object_t object, + vm_pindex_t pindex, int allocflags); bool vm_page_busy_acquire(vm_page_t m, int allocflags); void vm_page_busy_downgrade(vm_page_t m); int vm_page_busy_tryupgrade(vm_page_t m);