Index: sys/amd64/include/vmm_dev.h =================================================================== --- sys/amd64/include/vmm_dev.h (revision 246377) +++ sys/amd64/include/vmm_dev.h (working copy) @@ -35,6 +35,7 @@ #endif struct vm_memory_segment { + vm_paddr_t hpa; /* out */ vm_paddr_t gpa; /* in */ size_t len; /* in */ }; Index: sys/amd64/include/vmm.h =================================================================== --- sys/amd64/include/vmm.h (revision 246377) +++ sys/amd64/include/vmm.h (working copy) @@ -90,7 +90,7 @@ struct vm *vm_create(const char *name); void vm_destroy(struct vm *vm); const char *vm_name(struct vm *vm); -int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len); +int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t *ret_hpa); int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len); vm_paddr_t vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t size); Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c (revision 246377) +++ sys/amd64/vmm/vmm.c (working copy) @@ -290,44 +290,6 @@ return (vm); } -static void -vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) -{ - size_t len; - vm_paddr_t hpa; - void *host_domain; - - host_domain = iommu_host_domain(); - - len = 0; - while (len < seg->len) { - hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); - if (hpa == (vm_paddr_t)-1) { - panic("vm_free_mem_segs: cannot free hpa " - "associated with gpa 0x%016lx", seg->gpa + len); - } - - /* - * Remove the 'gpa' to 'hpa' mapping in VMs domain. - * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'. - */ - iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE); - iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE); - - vmm_mem_free(hpa, PAGE_SIZE); - - len += PAGE_SIZE; - } - - /* - * Invalidate cached translations associated with 'vm->iommu' since - * we have now moved some pages from it. - */ - iommu_invalidate_tlb(vm->iommu); - - bzero(seg, sizeof(struct vm_memory_segment)); -} - void vm_destroy(struct vm *vm) { @@ -336,7 +298,7 @@ ppt_unassign_all(vm); for (i = 0; i < vm->num_mem_segs; i++) - vm_free_mem_seg(vm, &vm->mem_segs[i]); + vmm_mem_free(vm->mem_segs[i].hpa, vm->mem_segs[i].len); vm->num_mem_segs = 0; @@ -374,123 +336,50 @@ VM_PROT_NONE, spok)); } -/* - * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise - */ -static boolean_t -vm_gpa_available(struct vm *vm, vm_paddr_t gpa) -{ - int i; - vm_paddr_t gpabase, gpalimit; - - if (gpa & PAGE_MASK) - panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa); - - for (i = 0; i < vm->num_mem_segs; i++) { - gpabase = vm->mem_segs[i].gpa; - gpalimit = gpabase + vm->mem_segs[i].len; - if (gpa >= gpabase && gpa < gpalimit) - return (FALSE); - } - - return (TRUE); -} - int -vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) +vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t *ret_hpa) { - int error, available, allocated; - struct vm_memory_segment *seg; - vm_paddr_t g, hpa; - void *host_domain; + int error; + vm_paddr_t hpa; const boolean_t spok = TRUE; /* superpage mappings are ok */ - - if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) - return (EINVAL); - available = allocated = 0; - g = gpa; - while (g < gpa + len) { - if (vm_gpa_available(vm, g)) - available++; - else - allocated++; - - g += PAGE_SIZE; - } - /* - * If there are some allocated and some available pages in the address - * range then it is an error. + * find the hpa if already it was already vm_malloc'd. */ - if (allocated && available) - return (EINVAL); + hpa = vm_gpa2hpa(vm, gpa, len); + if (hpa != ((vm_paddr_t)-1)) + goto out; - /* - * If the entire address range being requested has already been - * allocated then there isn't anything more to do. - */ - if (allocated && available == 0) - return (0); - if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) return (E2BIG); - host_domain = iommu_host_domain(); + hpa = vmm_mem_alloc(len); + if (hpa == 0) + return (ENOMEM); - seg = &vm->mem_segs[vm->num_mem_segs]; - - error = 0; - seg->gpa = gpa; - seg->len = 0; - while (seg->len < len) { - hpa = vmm_mem_alloc(PAGE_SIZE); - if (hpa == 0) { - error = ENOMEM; - break; - } - - error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, - VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok); - if (error) - break; - - /* - * Remove the 1:1 mapping for 'hpa' from the 'host_domain'. - * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain. - */ - iommu_remove_mapping(host_domain, hpa, PAGE_SIZE); - iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE); - - seg->len += PAGE_SIZE; - } - + error = VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_WRITE_BACK, + VM_PROT_ALL, spok); if (error) { - vm_free_mem_seg(vm, seg); + vmm_mem_free(hpa, len); return (error); } - /* - * Invalidate cached translations associated with 'host_domain' since - * we have now moved some pages from it. - */ - iommu_invalidate_tlb(host_domain); + iommu_create_mapping(vm->iommu, gpa, hpa, len); + vm->mem_segs[vm->num_mem_segs].gpa = gpa; + vm->mem_segs[vm->num_mem_segs].hpa = hpa; + vm->mem_segs[vm->num_mem_segs].len = len; vm->num_mem_segs++; - +out: + *ret_hpa = hpa; return (0); } vm_paddr_t vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len) { - vm_paddr_t nextpage; - nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); - if (len > nextpage - gpa) - panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len); - return (VMMMAP_GET(vm->cookie, gpa)); } Index: sys/amd64/vmm/vmm_dev.c =================================================================== --- sys/amd64/vmm/vmm_dev.c (revision 246377) +++ sys/amd64/vmm/vmm_dev.c (working copy) @@ -313,11 +313,11 @@ break; case VM_MAP_MEMORY: seg = (struct vm_memory_segment *)data; - error = vm_malloc(sc->vm, seg->gpa, seg->len); + error = vm_malloc(sc->vm, seg->gpa, seg->len, &seg->hpa); break; case VM_GET_MEMORY_SEG: seg = (struct vm_memory_segment *)data; - seg->len = 0; + seg->hpa = seg->len = 0; (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg); error = 0; break; Index: sys/amd64/vmm/vmm_mem.c =================================================================== --- sys/amd64/vmm/vmm_mem.c (revision 246377) +++ sys/amd64/vmm/vmm_mem.c (working copy) @@ -36,12 +36,9 @@ #include #include #include -#include #include #include -#include -#include #include #include @@ -52,84 +49,349 @@ #include "vmm_util.h" #include "vmm_mem.h" -SYSCTL_DECL(_hw_vmm); +static MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory"); -static u_long pages_allocated; -SYSCTL_ULONG(_hw_vmm, OID_AUTO, pages_allocated, CTLFLAG_RD, - &pages_allocated, 0, "4KB pages allocated"); +#define MB (1024 * 1024) +#define GB (1024 * MB) +#define VMM_MEM_MAXSEGS 64 + +/* protected by vmm_mem_mtx */ +static struct { + vm_paddr_t base; + vm_size_t length; +} vmm_mem_avail[VMM_MEM_MAXSEGS]; + +static int vmm_mem_nsegs; + +static vm_paddr_t maxaddr; + +static struct mtx vmm_mem_mtx; + +/* + * Steal any memory that was deliberately hidden from FreeBSD either by + * the use of MAXMEM kernel config option or the hw.physmem loader tunable. + */ +static int +vmm_mem_steal_memory(void) +{ + int nsegs; + caddr_t kmdp; + uint32_t smapsize; + uint64_t base, length; + struct bios_smap *smapbase, *smap, *smapend; + + /* + * Borrowed from hammer_time() and getmemsize() in machdep.c + */ + kmdp = preload_search_by_type("elf kernel"); + if (kmdp == NULL) + kmdp = preload_search_by_type("elf64 kernel"); + + smapbase = (struct bios_smap *)preload_search_info(kmdp, + MODINFO_METADATA | MODINFOMD_SMAP); + if (smapbase == NULL) + panic("No BIOS smap info from loader!"); + + smapsize = *((uint32_t *)smapbase - 1); + smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); + + nsegs = 0; + for (smap = smapbase; smap < smapend; smap++) { + /* + * XXX + * Assuming non-overlapping, monotonically increasing + * memory segments. + */ + if (smap->type != SMAP_TYPE_MEMORY) + continue; + if (smap->length == 0) + break; + + base = roundup(smap->base, NBPDR); + length = rounddown(smap->length, NBPDR); + + /* Skip this segment if FreeBSD is using all of it. */ + if (base + length <= ptoa(Maxmem)) + continue; + + /* + * If FreeBSD is using part of this segment then adjust + * 'base' and 'length' accordingly. + */ + if (base < ptoa(Maxmem)) { + uint64_t used; + used = roundup(ptoa(Maxmem), NBPDR) - base; + base += used; + length -= used; + } + + if (length == 0) + continue; + + vmm_mem_avail[nsegs].base = base; + vmm_mem_avail[nsegs].length = length; + + if (base + length > maxaddr) + maxaddr = base + length; + + if (bootverbose) { + printf("vmm_mem_populate: index %d, base 0x%0lx, " + "length %ld\n", + nsegs, vmm_mem_avail[nsegs].base, + vmm_mem_avail[nsegs].length); + } + + nsegs++; + if (nsegs >= VMM_MEM_MAXSEGS) { + printf("vmm_mem_populate: maximum number of vmm memory " + "segments reached!\n"); + return (ENOSPC); + } + } + + vmm_mem_nsegs = nsegs; + + return (0); +} + static void -update_pages_allocated(int howmany) +vmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end) { - pages_allocated += howmany; /* XXX locking? */ + vm_paddr_t addr, remaining; + int pdpi, pdi, superpage_size; + pml4_entry_t *pml4p; + pdp_entry_t *pdp; + pd_entry_t *pd; + uint64_t page_attr_bits; + + if (end >= NBPML4) + panic("Cannot map memory beyond %ldGB", NBPML4 / GB); + + /* XXX FreeBSD 8.1 does not use 1G superpages in the direct map */ + if (0 && vmm_supports_1G_pages()) + superpage_size = NBPDP; + else + superpage_size = NBPDR; + + /* + * Get the page directory pointer page that contains the direct + * map address mappings. + */ + pml4p = kernel_pmap->pm_pml4; + pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK); + + page_attr_bits = PG_RW | PG_V | PG_PS | PG_G; + addr = start; + while (addr < end) { + remaining = end - addr; + pdpi = addr / NBPDP; + if (superpage_size == NBPDP && + remaining >= NBPDP && + addr % NBPDP == 0) { + /* + * If there isn't a mapping for this address then + * create one but if there is one already make sure + * it matches what we expect it to be. + */ + if (pdp[pdpi] == 0) { + pdp[pdpi] = addr | page_attr_bits; + if (0 && bootverbose) { + printf("vmm_mem_populate: mapping " + "0x%lx with 1GB page at " + "pdpi %d\n", addr, pdpi); + } + } else { + pdp_entry_t pdpe = pdp[pdpi]; + if ((pdpe & ~PAGE_MASK) != addr || + (pdpe & page_attr_bits) != page_attr_bits) { + panic("An invalid mapping 0x%016lx " + "already exists for 0x%016lx\n", + pdpe, addr); + } + } + addr += NBPDP; + } else { + if (remaining < NBPDR) { + panic("vmm_mem_populate: remaining (%ld) must " + "be greater than NBPDR (%d)\n", + remaining, NBPDR); + } + if (pdp[pdpi] == 0) { + /* + * XXX we lose this memory forever because + * we do not keep track of the virtual address + * that would be required to free this page. + */ + pd = malloc(PAGE_SIZE, M_VMM_MEM, + M_WAITOK | M_ZERO); + if ((uintptr_t)pd & PAGE_MASK) { + panic("vmm_mem_populate: page directory" + "page not aligned on %d " + "boundary\n", PAGE_SIZE); + } + pdp[pdpi] = vtophys(pd); + pdp[pdpi] |= PG_RW | PG_V | PG_U; + if (0 && bootverbose) { + printf("Creating page directory " + "at pdp index %d for 0x%016lx\n", + pdpi, addr); + } + } + pdi = (addr % NBPDP) / NBPDR; + pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK); + + /* + * Create a new mapping if one doesn't already exist + * or validate it if it does. + */ + if (pd[pdi] == 0) { + pd[pdi] = addr | page_attr_bits; + if (0 && bootverbose) { + printf("vmm_mem_populate: mapping " + "0x%lx with 2MB page at " + "pdpi %d, pdi %d\n", + addr, pdpi, pdi); + } + } else { + pd_entry_t pde = pd[pdi]; + if ((pde & ~PAGE_MASK) != addr || + (pde & page_attr_bits) != page_attr_bits) { + panic("An invalid mapping 0x%016lx " + "already exists for 0x%016lx\n", + pde, addr); + } + } + addr += NBPDR; + } + } } +static int +vmm_mem_populate(void) +{ + int seg, error; + vm_paddr_t start, end; + + /* populate the vmm_mem_avail[] array */ + error = vmm_mem_steal_memory(); + if (error) + return (error); + + /* + * Now map the memory that was hidden from FreeBSD in + * the direct map VA space. + */ + for (seg = 0; seg < vmm_mem_nsegs; seg++) { + start = vmm_mem_avail[seg].base; + end = start + vmm_mem_avail[seg].length; + if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) { + panic("start (0x%016lx) and end (0x%016lx) must be " + "aligned on a %dMB boundary\n", + start, end, NBPDR / MB); + } + vmm_mem_direct_map(start, end); + } + + return (0); +} + int vmm_mem_init(void) { + int error; + mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF); + + error = vmm_mem_populate(); + if (error) + return (error); + return (0); } vm_paddr_t vmm_mem_alloc(size_t size) { - int flags; - vm_page_t m; - vm_paddr_t pa; + int i; + vm_paddr_t addr; - if (size != PAGE_SIZE) - panic("vmm_mem_alloc: invalid allocation size %lu", size); + if ((size & PDRMASK) != 0) { + panic("vmm_mem_alloc: size 0x%0lx must be " + "aligned on a 0x%0x boundary\n", size, NBPDR); + } - flags = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | - VM_ALLOC_ZERO; + addr = 0; - while (1) { - /* - * XXX need policy to determine when to back off the allocation - */ - m = vm_page_alloc(NULL, 0, flags); - if (m == NULL) - VM_WAIT; - else + mtx_lock(&vmm_mem_mtx); + for (i = 0; i < vmm_mem_nsegs; i++) { + if (vmm_mem_avail[i].length >= size) { + addr = vmm_mem_avail[i].base; + vmm_mem_avail[i].base += size; + vmm_mem_avail[i].length -= size; + /* remove a zero length segment */ + if (vmm_mem_avail[i].length == 0) { + memmove(&vmm_mem_avail[i], + &vmm_mem_avail[i + 1], + (vmm_mem_nsegs - (i + 1)) * + sizeof(vmm_mem_avail[0])); + vmm_mem_nsegs--; + } break; + } } + mtx_unlock(&vmm_mem_mtx); - pa = VM_PAGE_TO_PHYS(m); - - if ((m->flags & PG_ZERO) == 0) - pagezero((void *)PHYS_TO_DMAP(pa)); - m->valid = VM_PAGE_BITS_ALL; - - update_pages_allocated(1); - - return (pa); + return (addr); } void vmm_mem_free(vm_paddr_t base, size_t length) { - vm_page_t m; + int i; - if (base & PAGE_MASK) { - panic("vmm_mem_free: base 0x%0lx must be aligned on a " - "0x%0x boundary\n", base, PAGE_SIZE); + if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) { + panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be " + "aligned on a 0x%0x boundary\n", base, length, NBPDR); } - if (length != PAGE_SIZE) - panic("vmm_mem_free: invalid length %lu", length); + mtx_lock(&vmm_mem_mtx); - m = PHYS_TO_VM_PAGE(base); - m->wire_count--; - vm_page_free(m); - atomic_subtract_int(&cnt.v_wire_count, 1); + for (i = 0; i < vmm_mem_nsegs; i++) { + if (vmm_mem_avail[i].base > base) + break; + } - update_pages_allocated(-1); + if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS) + panic("vmm_mem_free: cannot free any more segments"); + + /* Create a new segment at index 'i' */ + memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i], + (vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0])); + + vmm_mem_avail[i].base = base; + vmm_mem_avail[i].length = length; + + vmm_mem_nsegs++; + +coalesce_some_more: + for (i = 0; i < vmm_mem_nsegs - 1; i++) { + if (vmm_mem_avail[i].base + vmm_mem_avail[i].length == + vmm_mem_avail[i + 1].base) { + vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length; + memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2], + (vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0])); + vmm_mem_nsegs--; + goto coalesce_some_more; + } + } + + mtx_unlock(&vmm_mem_mtx); } vm_paddr_t vmm_mem_maxaddr(void) { - return (ptoa(Maxmem)); + return (maxaddr); } Index: sys/amd64/vmm/io/iommu.c =================================================================== --- sys/amd64/vmm/io/iommu.c (revision 246377) +++ sys/amd64/vmm/io/iommu.c (working copy) @@ -165,7 +165,7 @@ /* * Create a domain for the devices owned by the host */ - maxaddr = vmm_mem_maxaddr(); + maxaddr = ptoa(Maxmem); host_domain = IOMMU_CREATE_DOMAIN(maxaddr); if (host_domain == NULL) panic("iommu_init: unable to create a host domain");