--- ./dev/pci/pcireg.h-9.1 2013-01-02 19:29:58.000000000 -0500 +++ ./dev/pci/pcireg.h 2013-03-02 14:21:44.000000000 -0500 @@ -634,6 +634,7 @@ #define PCIR_EXPRESS_DEVICE_CAP 0x4 #define PCIM_EXP_CAP_MAX_PAYLOAD 0x0007 #define PCIR_EXPRESS_DEVICE_CTL 0x8 +#define PCIER_DEVICE_CTL 0x8 #define PCIM_EXP_CTL_NFER_ENABLE 0x0002 #define PCIM_EXP_CTL_FER_ENABLE 0x0004 #define PCIM_EXP_CTL_URR_ENABLE 0x0008 @@ -641,6 +642,7 @@ #define PCIM_EXP_CTL_MAX_PAYLOAD 0x00e0 #define PCIM_EXP_CTL_NOSNOOP_ENABLE 0x0800 #define PCIM_EXP_CTL_MAX_READ_REQUEST 0x7000 +#define PCIEM_CTL_MAX_READ_REQUEST 0x7000 #define PCIR_EXPRESS_DEVICE_STA 0xa #define PCIM_EXP_STA_CORRECTABLE_ERROR 0x0001 #define PCIM_EXP_STA_NON_FATAL_ERROR 0x0002 @@ -649,6 +651,8 @@ #define PCIM_EXP_STA_AUX_POWER 0x0010 #define PCIM_EXP_STA_TRANSACTION_PND 0x0020 #define PCIR_EXPRESS_LINK_CAP 0xc +#define PCIER_LINK_CAP 0xc +#define PCIEM_LINK_CAP_MAX_SPEED 0x0000000f #define PCIM_LINK_CAP_MAX_SPEED 0x0000000f #define PCIM_LINK_CAP_MAX_WIDTH 0x000003f0 #define PCIM_LINK_CAP_ASPM 0x00000c00 @@ -667,6 +671,7 @@ #define PCIR_EXPRESS_SLOT_STA 0x1a #define PCIR_EXPRESS_ROOT_CTL 0x1c #define PCIR_EXPRESS_ROOT_STA 0x20 +#define PCIER_LINK_CAP2 0x2c /* MSI-X definitions */ #define PCIR_MSIX_CTRL 0x2 --- ./dev/iicbus/iicbb.c-9.1 2013-01-02 19:30:23.000000000 -0500 +++ ./dev/iicbus/iicbb.c 2013-03-08 19:05:03.000000000 -0500 @@ -75,6 +75,7 @@ static int iicbb_write(device_t, const char *, int, int *, int); static int iicbb_read(device_t, char *, int, int *, int, int); static int iicbb_reset(device_t, u_char, u_char, u_char *); +static int iicbb_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs); static device_method_t iicbb_methods[] = { /* device interface */ @@ -94,7 +95,7 @@ DEVMETHOD(iicbus_write, iicbb_write), DEVMETHOD(iicbus_read, iicbb_read), DEVMETHOD(iicbus_reset, iicbb_reset), - DEVMETHOD(iicbus_transfer, iicbus_transfer_gen), + DEVMETHOD(iicbus_transfer, iicbb_transfer), { 0, 0 } }; @@ -413,6 +414,22 @@ return (0); } +static int +iicbb_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs) +{ + int error; + + error = IICBB_PRE_XFER(device_get_parent(dev)); + if (error) + return (error); + + error = iicbus_transfer_gen(dev, msgs, nmsgs); + + IICBB_POST_XFER(device_get_parent(dev)); + return (error); +} + + DRIVER_MODULE(iicbus, iicbb, iicbus_driver, iicbus_devclass, 0, 0); MODULE_DEPEND(iicbb, iicbus, IICBUS_MINVER, IICBUS_PREFVER, IICBUS_MAXVER); --- ./dev/iicbus/iicbb_if.m-9.1 2013-01-02 19:30:23.000000000 -0500 +++ ./dev/iicbus/iicbb_if.m 2013-03-08 19:04:05.000000000 -0500 @@ -31,15 +31,51 @@ INTERFACE iicbb; # +# Default implementation of optional methods +# +CODE { + static int + null_pre_xfer(device_t dev) + { + return 0; + } + + static void + null_post_xfer(device_t dev) + { + } + + static int + null_callback(device_t dev, int index, caddr_t data) + { + return 0; + } +}; + +# # iicbus callback # METHOD int callback { device_t dev; int index; caddr_t data; -}; +} DEFAULT null_callback;; + +# +# Prepare device for I2C transfer +# +METHOD int pre_xfer { + device_t dev; +} DEFAULT null_pre_xfer; # +# Cleanup device after I2C transfer +# +METHOD void post_xfer { + device_t dev; +} DEFAULT null_post_xfer; + +# # Set I2C bus data line # METHOD void setsda { --- ./vm/vm_contig.c-9.1 2013-01-02 19:30:47.000000000 -0500 +++ ./vm/vm_contig.c 2013-03-28 14:32:24.000000000 -0400 @@ -83,7 +83,6 @@ #include #include #include -#include #include static int @@ -187,22 +186,6 @@ } /* - * Frees the given physically contiguous pages. - * - * N.B.: Any pages with PG_ZERO set must, in fact, be zero filled. - */ -static void -vm_page_release_contig(vm_page_t m, vm_pindex_t count) -{ - - while (count--) { - /* Leave PG_ZERO unchanged. */ - vm_page_free_toq(m); - m++; - } -} - -/* * Increase the number of cached pages. */ void @@ -240,9 +223,10 @@ vm_paddr_t high, vm_memattr_t memattr) { vm_object_t object = kernel_object; - vm_offset_t addr, i, offset; + vm_offset_t addr; + vm_ooffset_t end_offset, offset; vm_page_t m; - int tries; + int pflags, tries; size = round_page(size); vm_map_lock(map); @@ -254,12 +238,21 @@ vm_object_reference(object); vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); + if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT) + pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; + else + pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY; + if (flags & M_ZERO) + pflags |= VM_ALLOC_ZERO; VM_OBJECT_LOCK(object); - for (i = 0; i < size; i += PAGE_SIZE) { + end_offset = offset + size; + for (; offset < end_offset; offset += PAGE_SIZE) { tries = 0; retry: - m = vm_phys_alloc_contig(1, low, high, PAGE_SIZE, 0); + m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1, + low, high, PAGE_SIZE, 0, memattr); if (m == NULL) { + VM_OBJECT_UNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { VM_OBJECT_UNLOCK(object); vm_map_unlock(map); @@ -269,20 +262,16 @@ tries++; goto retry; } - while (i != 0) { - i -= PAGE_SIZE; - m = vm_page_lookup(object, OFF_TO_IDX(offset + - i)); - vm_page_free(m); - } - VM_OBJECT_UNLOCK(object); + /* + * Since the pages that were allocated by any previous + * iterations of this loop are not busy, they can be + * freed by vm_object_page_remove(), which is called + * by vm_map_delete(). + */ vm_map_delete(map, addr, addr + size); vm_map_unlock(map); return (0); } - if (memattr != VM_MEMATTR_DEFAULT) - pmap_page_set_memattr(m, memattr); - vm_page_insert(m, object, OFF_TO_IDX(offset + i)); if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; @@ -302,36 +291,62 @@ * specified through the given flags, then the pages are zeroed * before they are mapped. */ -static vm_offset_t -contigmapping(vm_map_t map, vm_size_t size, vm_page_t m, vm_memattr_t memattr, - int flags) +vm_offset_t +kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr) { vm_object_t object = kernel_object; - vm_offset_t addr, tmp_addr; + vm_offset_t addr; + vm_ooffset_t offset; + vm_page_t end_m, m; + int pflags, tries; + size = round_page(size); vm_map_lock(map); if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { vm_map_unlock(map); return (0); } + offset = addr - VM_MIN_KERNEL_ADDRESS; vm_object_reference(object); - vm_map_insert(map, object, addr - VM_MIN_KERNEL_ADDRESS, - addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); - vm_map_unlock(map); + vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, + VM_PROT_ALL, 0); + if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT) + pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; + else + pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY; + if (flags & M_ZERO) + pflags |= VM_ALLOC_ZERO; VM_OBJECT_LOCK(object); - for (tmp_addr = addr; tmp_addr < addr + size; tmp_addr += PAGE_SIZE) { - if (memattr != VM_MEMATTR_DEFAULT) - pmap_page_set_memattr(m, memattr); - vm_page_insert(m, object, - OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); + tries = 0; +retry: + m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, + atop(size), low, high, alignment, boundary, memattr); + if (m == NULL) { + VM_OBJECT_UNLOCK(object); + if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { + vm_map_unlock(map); + vm_contig_grow_cache(tries, low, high); + vm_map_lock(map); + VM_OBJECT_LOCK(object); + tries++; + goto retry; + } + vm_map_delete(map, addr, addr + size); + vm_map_unlock(map); + return(0); + } + end_m = m + atop(size); + for (; m < end_m; m++) { if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; - m++; } VM_OBJECT_UNLOCK(object); - vm_map_wire(map, addr, addr + size, - VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); + vm_map_unlock(map); + vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | + VM_MAP_WIRE_NOHOLES); return (addr); } @@ -354,36 +369,6 @@ return (ret); } -vm_offset_t -kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, - vm_paddr_t high, unsigned long alignment, unsigned long boundary, - vm_memattr_t memattr) -{ - vm_offset_t ret; - vm_page_t pages; - unsigned long npgs; - int tries; - - size = round_page(size); - npgs = size >> PAGE_SHIFT; - tries = 0; -retry: - pages = vm_phys_alloc_contig(npgs, low, high, alignment, boundary); - if (pages == NULL) { - if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - vm_contig_grow_cache(tries, low, high); - tries++; - goto retry; - } - ret = 0; - } else { - ret = contigmapping(map, size, pages, memattr, flags); - if (ret == 0) - vm_page_release_contig(pages, npgs); - } - return (ret); -} - void contigfree(void *addr, unsigned long size, struct malloc_type *type) { --- ./vm/vm_page.c-9.1 2013-01-02 19:30:47.000000000 -0500 +++ ./vm/vm_page.c 2013-03-28 16:22:41.000000000 -0400 @@ -137,6 +137,7 @@ static uma_zone_t fakepg_zone; +static struct vnode *vm_page_alloc_init(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_queue_remove(int queue, vm_page_t m); static void vm_page_enqueue(int queue, vm_page_t m); @@ -1547,12 +1548,166 @@ } /* + * vm_page_alloc_contig: + * + * Allocate a contiguous set of physical pages of the given size "npages" + * from the free lists. All of the physical pages must be at or above + * the given physical address "low" and below the given physical address + * "high". The given value "alignment" determines the alignment of the + * first physical page in the set. If the given value "boundary" is + * non-zero, then the set of physical pages cannot cross any physical + * address boundary that is a multiple of that value. Both "alignment" + * and "boundary" must be a power of two. + * + * If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT, + * then the memory attribute setting for the physical pages is configured + * to the object's memory attribute setting. Otherwise, the memory + * attribute setting for the physical pages is configured to "memattr", + * overriding the object's memory attribute setting. However, if the + * object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the + * memory attribute setting for the physical pages cannot be configured + * to VM_MEMATTR_DEFAULT. + * + * The caller must always specify an allocation class. + * + * allocation classes: + * VM_ALLOC_NORMAL normal process request + * VM_ALLOC_SYSTEM system *really* needs a page + * VM_ALLOC_INTERRUPT interrupt time request + * + * optional allocation flags: + * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page + * VM_ALLOC_NOOBJ page is not associated with an object and + * should not have the flag VPO_BUSY set + * VM_ALLOC_WIRED wire the allocated page + * VM_ALLOC_ZERO prefer a zeroed page + * + * This routine may not sleep. + */ +vm_page_t +vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, + u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary, vm_memattr_t memattr) +{ + struct vnode *drop; + vm_page_t deferred_vdrop_list, m, m_ret; + u_int flags, oflags; + int req_class; + + KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0), + ("vm_page_alloc_contig: inconsistent object/req")); + if (object != NULL) { + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + KASSERT(object->type == OBJT_PHYS, + ("vm_page_alloc_contig: object %p isn't OBJT_PHYS", + object)); + } + KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); + req_class = req & VM_ALLOC_CLASS_MASK; + + /* + * The page daemon is allowed to dig deeper into the free page list. + */ + if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) + req_class = VM_ALLOC_SYSTEM; + + deferred_vdrop_list = NULL; + mtx_lock(&vm_page_queue_free_mtx); + if (cnt.v_free_count + cnt.v_cache_count >= npages + + cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && + cnt.v_free_count + cnt.v_cache_count >= npages + + cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && + cnt.v_free_count + cnt.v_cache_count >= npages)) { +#if VM_NRESERVLEVEL > 0 +retry: + if (object == NULL || (object->flags & OBJ_COLORED) == 0 || + (m_ret = vm_reserv_alloc_contig(object, pindex, npages, + low, high, alignment, boundary)) == NULL) +#endif + m_ret = vm_phys_alloc_contig(npages, low, high, + alignment, boundary); + } else { + mtx_unlock(&vm_page_queue_free_mtx); + atomic_add_int(&vm_pageout_deficit, npages); + pagedaemon_wakeup(); + return (NULL); + } + if (m_ret != NULL) + for (m = m_ret; m < &m_ret[npages]; m++) { + drop = vm_page_alloc_init(m); + if (drop != NULL) { + /* + * Enqueue the vnode for deferred vdrop(). + * + * Once the pages are removed from the free + * page list, "pageq" can be safely abused to + * construct a short-lived list of vnodes. + */ + m->pageq.tqe_prev = (void *)drop; + m->pageq.tqe_next = deferred_vdrop_list; + deferred_vdrop_list = m; + } + } + else { +#if VM_NRESERVLEVEL > 0 + if (vm_reserv_reclaim_contig(npages, low, high, alignment, + boundary)) + goto retry; +#endif + } + mtx_unlock(&vm_page_queue_free_mtx); + if (m_ret == NULL) + return (NULL); + + /* + * Initialize the pages. Only the PG_ZERO flag is inherited. + */ + flags = 0; + if ((req & VM_ALLOC_ZERO) != 0) + flags = PG_ZERO; + if ((req & VM_ALLOC_NODUMP) != 0) + flags |= PG_NODUMP; + if ((req & VM_ALLOC_WIRED) != 0) + atomic_add_int(&cnt.v_wire_count, npages); + oflags = VPO_UNMANAGED; + if (object != NULL) { + if ((req & VM_ALLOC_NOBUSY) == 0) + oflags |= VPO_BUSY; + if (object->memattr != VM_MEMATTR_DEFAULT && + memattr == VM_MEMATTR_DEFAULT) + memattr = object->memattr; + } + for (m = m_ret; m < &m_ret[npages]; m++) { + m->aflags = 0; + m->flags = (m->flags | PG_NODUMP) & flags; + if ((req & VM_ALLOC_WIRED) != 0) + m->wire_count = 1; + /* Unmanaged pages don't use "act_count". */ + m->oflags = oflags; + if (memattr != VM_MEMATTR_DEFAULT) + pmap_page_set_memattr(m, memattr); + if (object != NULL) + vm_page_insert(m, object, pindex); + else + m->pindex = pindex; + pindex++; + } + while (deferred_vdrop_list != NULL) { + vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); + deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; + } + if (vm_paging_needed()) + pagedaemon_wakeup(); + return (m_ret); +} + +/* * Initialize a page that has been freshly dequeued from a freelist. * The caller has to drop the vnode returned, if it is not NULL. * * To be called with vm_page_queue_free_mtx held. */ -struct vnode * +static struct vnode * vm_page_alloc_init(vm_page_t m) { struct vnode *drop; @@ -1592,9 +1747,6 @@ vm_page_zero_count--; /* Don't clear the PG_ZERO flag; we'll need it later. */ m->flags &= PG_ZERO; - m->aflags = 0; - m->oflags = VPO_UNMANAGED; - /* Unmanaged pages don't use "act_count". */ return (drop); } @@ -1610,29 +1762,58 @@ { struct vnode *drop; vm_page_t m; - int page_req; + u_int flags; + int req_class; + + req_class = req & VM_ALLOC_CLASS_MASK; - m = NULL; - page_req = req & VM_ALLOC_CLASS_MASK; - mtx_lock(&vm_page_queue_free_mtx); /* * Do not allocate reserved pages unless the req has asked for it. */ + mtx_lock(&vm_page_queue_free_mtx); if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || - (page_req == VM_ALLOC_SYSTEM && + (req_class == VM_ALLOC_SYSTEM && cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || - (page_req == VM_ALLOC_INTERRUPT && + (req_class == VM_ALLOC_INTERRUPT && cnt.v_free_count + cnt.v_cache_count > 0)) { m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); } + else { + mtx_unlock(&vm_page_queue_free_mtx); + atomic_add_int(&vm_pageout_deficit, + max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); + pagedaemon_wakeup(); + return (NULL); + } if (m == NULL) { mtx_unlock(&vm_page_queue_free_mtx); return (NULL); } drop = vm_page_alloc_init(m); mtx_unlock(&vm_page_queue_free_mtx); - if (drop) + + /* + * Initialize the page. Only the PG_ZERO flag is inherited. + */ + m->aflags = 0; + flags = 0; + if ((req & VM_ALLOC_ZERO) != 0) + flags = PG_ZERO; + m->flags &= flags; + if ((req & VM_ALLOC_WIRED) != 0) { + /* + * The page lock is not required for wiring a page that does + * not belong to an object. + */ + atomic_add_int(&cnt.v_wire_count, 1); + m->wire_count = 1; + } + /* Unmanaged pages don't use "act_count". */ + m->oflags = VPO_UNMANAGED; + if (drop != NULL) vdrop(drop); + if (vm_paging_needed()) + pagedaemon_wakeup(); return (m); } --- ./vm/vm_page.h-9.1 2013-01-02 19:30:47.000000000 -0500 +++ ./vm/vm_page.h 2013-03-28 13:01:34.000000000 -0400 @@ -365,8 +365,10 @@ void vm_page_activate (vm_page_t); vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, + u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary, vm_memattr_t memattr); vm_page_t vm_page_alloc_freelist(int, int); -struct vnode *vm_page_alloc_init(vm_page_t); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache(vm_page_t); void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t); --- ./vm/vm_phys.c-9.1 2013-01-02 19:30:47.000000000 -0500 +++ ./vm/vm_phys.c 2013-03-28 14:41:41.000000000 -0400 @@ -29,11 +29,17 @@ * POSSIBILITY OF SUCH DAMAGE. */ + /* + * Physical memory system implementation + * + * Any external functions defined by this module are only to be used by the + * virtual memory system. + */ + #include __FBSDID("$FreeBSD: release/9.1.0/sys/vm/vm_phys.c 236924 2012-06-11 21:19:59Z kib $"); #include "opt_ddb.h" -#include "opt_vm.h" #include #include @@ -45,7 +51,6 @@ #include #include #include -#include #include @@ -55,7 +60,6 @@ #include #include #include -#include /* * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each @@ -710,6 +714,47 @@ } /* + * Free a contiguous, arbitrarily sized set of physical pages. + * + * The free page queues must be locked. + */ +void +vm_phys_free_contig(vm_page_t m, u_long npages) +{ + u_int n; + int order; + + /* + * Avoid unnecessary coalescing by freeing the pages in the largest + * possible power-of-two-sized subsets. + */ + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + for (;; npages -= n) { + /* + * Unsigned "min" is used here so that "order" is assigned + * "VM_NFREEORDER - 1" when "m"'s physical address is zero + * or the low-order bits of its physical address are zero + * because the size of a physical address exceeds the size of + * a long. + */ + order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, + VM_NFREEORDER - 1); + n = 1 << order; + if (npages < n) + break; + vm_phys_free_pages(m, order); + m += n; + } + /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ + for (; npages > 0; npages -= n) { + order = flsl(npages) - 1; + n = 1 << order; + vm_phys_free_pages(m, order); + m += n; + } +} + +/* * Set the pool for a contiguous, power of two-sized set of physical pages. */ void @@ -850,11 +895,12 @@ { struct vm_freelist *fl; struct vm_phys_seg *seg; - struct vnode *vp; vm_paddr_t pa, pa_last, size; - vm_page_t deferred_vdrop_list, m, m_ret; - int domain, flind, i, oind, order, pind; + vm_page_t m, m_ret; + u_long npages_end; + int domain, flind, oind, order, pind; + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); #if VM_NDOMAIN > 1 domain = PCPU_GET(domain); #else @@ -867,13 +913,8 @@ ("vm_phys_alloc_contig: alignment must be a power of 2")); KASSERT((boundary & (boundary - 1)) == 0, ("vm_phys_alloc_contig: boundary must be a power of 2")); - deferred_vdrop_list = NULL; /* Compute the queue that is the best fit for npages. */ for (order = 0; (1 << order) < npages; order++); - mtx_lock(&vm_page_queue_free_mtx); -#if VM_NRESERVLEVEL > 0 -retry: -#endif for (flind = 0; flind < vm_nfreelists; flind++) { for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { @@ -932,11 +973,6 @@ } } } -#if VM_NRESERVLEVEL > 0 - if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary)) - goto retry; -#endif - mtx_unlock(&vm_page_queue_free_mtx); return (NULL); done: for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { @@ -949,34 +985,10 @@ vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); fl = (*seg->free_queues)[m_ret->pool]; vm_phys_split_pages(m_ret, oind, fl, order); - for (i = 0; i < npages; i++) { - m = &m_ret[i]; - vp = vm_page_alloc_init(m); - if (vp != NULL) { - /* - * Enqueue the vnode for deferred vdrop(). - * - * Unmanaged pages don't use "pageq", so it - * can be safely abused to construct a short- - * lived queue of vnodes. - */ - m->pageq.tqe_prev = (void *)vp; - m->pageq.tqe_next = deferred_vdrop_list; - deferred_vdrop_list = m; - } - } - for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { - m = &m_ret[i]; - KASSERT(m->order == VM_NFREEORDER, - ("vm_phys_alloc_contig: page %p has unexpected order %d", - m, m->order)); - vm_phys_free_pages(m, 0); - } - mtx_unlock(&vm_page_queue_free_mtx); - while (deferred_vdrop_list != NULL) { - vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); - deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; - } + /* Return excess pages to the free lists. */ + npages_end = roundup2(npages, 1 << imin(oind, order)); + if (npages < npages_end) + vm_phys_free_contig(&m_ret[npages], npages_end - npages); return (m_ret); } --- ./vm/vm_phys.h-9.1 2013-01-02 19:30:47.000000000 -0500 +++ ./vm/vm_phys.h 2013-03-28 14:43:51.000000000 -0400 @@ -49,6 +49,9 @@ extern struct mem_affinity *mem_affinity; +/* + * The following functions are only to be used by the virtual memory system. + */ void vm_phys_add_page(vm_paddr_t pa); vm_page_t vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, @@ -60,6 +63,7 @@ vm_memattr_t memattr); void vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end); vm_page_t vm_phys_fictitious_to_vm_page(vm_paddr_t pa); +void vm_phys_free_contig(vm_page_t m, u_long npages); void vm_phys_free_pages(vm_page_t m, int order); void vm_phys_init(void); void vm_phys_set_pool(int pool, vm_page_t m, int order); --- ./vm/vm_reserv.c-9.1 2013-01-02 19:30:47.000000000 -0500 +++ ./vm/vm_reserv.c 2013-03-02 15:44:19.000000000 -0500 @@ -285,6 +285,201 @@ } /* + * Allocates a contiguous set of physical pages of the given size "npages" + * from an existing or newly-created reservation. All of the physical pages + * must be at or above the given physical address "low" and below the given + * physical address "high". The given value "alignment" determines the + * alignment of the first physical page in the set. If the given value + * "boundary" is non-zero, then the set of physical pages cannot cross any + * physical address boundary that is a multiple of that value. Both + * "alignment" and "boundary" must be a power of two. + * + * The object and free page queue must be locked. + */ +vm_page_t +vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) +{ + vm_paddr_t pa, size; + vm_page_t m, m_ret, mpred, msucc; + vm_pindex_t first, leftcap, rightcap; + vm_reserv_t rv; + u_long allocpages, maxpages, minpages; + int i, index, n; + + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0")); + + /* + * Is a reservation fundamentally impossible? + */ + if (pindex < VM_RESERV_INDEX(object, pindex) || + pindex + npages > object->size) + return (NULL); + + /* + * All reservations of a particular size have the same alignment. + * Assuming that the first page is allocated from a reservation, the + * least significant bits of its physical address can be determined + * from its offset from the beginning of the reservation and the size + * of the reservation. + * + * Could the specified index within a reservation of the smallest + * possible size satisfy the alignment and boundary requirements? + */ + pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT; + if ((pa & (alignment - 1)) != 0) + return (NULL); + size = npages << PAGE_SHIFT; + if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) + return (NULL); + + /* + * Look for an existing reservation. + */ + msucc = NULL; + mpred = object->root; + while (mpred != NULL) { + KASSERT(mpred->pindex != pindex, + ("vm_reserv_alloc_contig: pindex already allocated")); + rv = vm_reserv_from_page(mpred); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) + goto found; + else if (mpred->pindex < pindex) { + if (msucc != NULL || + (msucc = TAILQ_NEXT(mpred, listq)) == NULL) + break; + KASSERT(msucc->pindex != pindex, + ("vm_reserv_alloc_contig: pindex already allocated")); + rv = vm_reserv_from_page(msucc); + if (rv->object == object && + vm_reserv_has_pindex(rv, pindex)) + goto found; + else if (pindex < msucc->pindex) + break; + } else if (msucc == NULL) { + msucc = mpred; + mpred = TAILQ_PREV(msucc, pglist, listq); + continue; + } + msucc = NULL; + mpred = object->root = vm_page_splay(pindex, object->root); + } + + /* + * Could at least one reservation fit between the first index to the + * left that can be used and the first index to the right that cannot + * be used? + */ + first = pindex - VM_RESERV_INDEX(object, pindex); + if (mpred != NULL) { + if ((rv = vm_reserv_from_page(mpred))->object != object) + leftcap = mpred->pindex + 1; + else + leftcap = rv->pindex + VM_LEVEL_0_NPAGES; + if (leftcap > first) + return (NULL); + } + minpages = VM_RESERV_INDEX(object, pindex) + npages; + maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES); + allocpages = maxpages; + if (msucc != NULL) { + if ((rv = vm_reserv_from_page(msucc))->object != object) + rightcap = msucc->pindex; + else + rightcap = rv->pindex; + if (first + maxpages > rightcap) { + if (maxpages == VM_LEVEL_0_NPAGES) + return (NULL); + allocpages = minpages; + } + } + + /* + * Would the last new reservation extend past the end of the object? + */ + if (first + maxpages > object->size) { + /* + * Don't allocate the last new reservation if the object is a + * vnode or backed by another object that is a vnode. + */ + if (object->type == OBJT_VNODE || + (object->backing_object != NULL && + object->backing_object->type == OBJT_VNODE)) { + if (maxpages == VM_LEVEL_0_NPAGES) + return (NULL); + allocpages = minpages; + } + /* Speculate that the object may grow. */ + } + + /* + * Allocate and populate the new reservations. The alignment and + * boundary specified for this allocation may be different from the + * alignment and boundary specified for the requested pages. For + * instance, the specified index may not be the first page within the + * first new reservation. + */ + m = vm_phys_alloc_contig(allocpages, low, high, ulmax(alignment, + VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0); + if (m == NULL) + return (NULL); + m_ret = NULL; + index = VM_RESERV_INDEX(object, pindex); + do { + rv = vm_reserv_from_page(m); + KASSERT(rv->pages == m, + ("vm_reserv_alloc_contig: reserv %p's pages is corrupted", + rv)); + KASSERT(rv->object == NULL, + ("vm_reserv_alloc_contig: reserv %p isn't free", rv)); + LIST_INSERT_HEAD(&object->rvq, rv, objq); + rv->object = object; + rv->pindex = first; + KASSERT(rv->popcnt == 0, + ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted", + rv)); + KASSERT(!rv->inpartpopq, + ("vm_reserv_alloc_contig: reserv %p's inpartpopq is TRUE", + rv)); + n = ulmin(VM_LEVEL_0_NPAGES - index, npages); + for (i = 0; i < n; i++) + vm_reserv_populate(rv); + npages -= n; + if (m_ret == NULL) { + m_ret = &rv->pages[index]; + index = 0; + } + m += VM_LEVEL_0_NPAGES; + first += VM_LEVEL_0_NPAGES; + allocpages -= VM_LEVEL_0_NPAGES; + } while (allocpages > 0); + return (m_ret); + + /* + * Found a matching reservation. + */ +found: + index = VM_RESERV_INDEX(object, pindex); + /* Does the allocation fit within the reservation? */ + if (index + npages > VM_LEVEL_0_NPAGES) + return (NULL); + m = &rv->pages[index]; + pa = VM_PAGE_TO_PHYS(m); + if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 || + ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) + return (NULL); + /* Handle vm_page_rename(m, new_object, ...). */ + for (i = 0; i < npages; i++) + if ((rv->pages[index + i].flags & (PG_CACHED | PG_FREE)) == 0) + return (NULL); + for (i = 0; i < npages; i++) + vm_reserv_populate(rv); + return (m); +} + +/* * Allocates a page from an existing or newly-created reservation. * * The object and free page queue must be locked. --- ./vm/vm_reserv.h-9.1 2013-01-02 19:30:47.000000000 -0500 +++ ./vm/vm_reserv.h 2013-03-02 15:44:56.000000000 -0500 @@ -42,6 +42,9 @@ #if VM_NRESERVLEVEL > 0 +vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, + u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary); vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex); void vm_reserv_break_all(vm_object_t object); boolean_t vm_reserv_free_page(vm_page_t m); --- ./sys/cdefs.h-9.1 2013-01-02 19:32:11.000000000 -0500 +++ ./sys/cdefs.h 2013-03-02 15:07:16.000000000 -0500 @@ -404,6 +404,22 @@ (__offsetof(type, end) - __offsetof(type, start)) /* + * Given the pointer x to the member m of the struct s, return + * a pointer to the containing structure. When using GCC, we first + * assign pointer x to a local variable, to check that its type is + * compatible with member m. + */ +#if __GNUC_PREREQ__(3, 1) +#define __containerof(x, s, m) ({ \ + const volatile __typeof(((s *)0)->m) *__x = (x); \ + __DEQUALIFY(s *, (const volatile char *)__x - __offsetof(s, m));\ +}) +#else +#define __containerof(x, s, m) \ + __DEQUALIFY(s *, (const volatile char *)(x) - __offsetof(s, m)) +#endif + +/* * Compiler-dependent macros to declare that functions take printf-like * or scanf-like arguments. They are null except for versions of gcc * that are known to support the features properly (old versions of gcc-2