--- //depot/vendor/freebsd/src/lib/libvmmapi/vmmapi_freebsd.c +++ //depot/user/jhb/bhyve/lib/libvmmapi/vmmapi_freebsd.c @@ -35,8 +35,17 @@ #include #include +#include +#include + #include "vmmapi.h" +#define I386_TSS_SIZE 104 + +#define DESC_PRESENT 0x00000080 +#define DESC_LONGMODE 0x00002000 +#define DESC_DEF32 0x00004000 +#define DESC_GRAN 0x00008000 #define DESC_UNUSABLE 0x00010000 #define GUEST_NULL_SEL 0 @@ -52,6 +61,159 @@ gdtr[GUEST_DATA_SEL] = 0x0000900000000000; } +static struct segment_descriptor i386_gdt[] = { + {}, /* NULL */ + { .sd_lolimit = 0xffff, .sd_type = SDT_MEMER, /* CODE */ + .sd_p = 1, .sd_hilimit = 0xf, .sd_def32 = 1, .sd_gran = 1 }, + { .sd_lolimit = 0xffff, .sd_type = SDT_MEMRW, /* DATA */ + .sd_p = 1, .sd_hilimit = 0xf, .sd_def32 = 1, .sd_gran = 1 }, + { .sd_lolimit = I386_TSS_SIZE - 1, /* TSS */ + .sd_type = SDT_SYS386TSS, .sd_p = 1 } +}; + +static int +vm_setup_freebsd_i386(struct vmctx *vmctx, int vcpu, uint64_t rip, + uint64_t rsp) +{ + uint64_t cr0, rflags, desc_base, gdtbase, tssbase; + uint32_t desc_access, desc_limit; + uint16_t gsel; + struct segment_descriptor *gdt; + int error, tmp; + + /* A 32-bit guest requires unrestricted mode. */ + error = vm_get_capability(vmctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, &tmp); + if (error) + goto done; + error = vm_set_capability(vmctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1); + if (error) + goto done; + + cr0 = CR0_PE | CR0_NE; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, 0)) != 0) + goto done; + + /* + * Forcing EFER to 0 causes bhyve to clear the "IA-32e guest + * mode" entry control. + */ + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_EFER, 0))) + goto done; + + /* + * userboot places the bootinfo struct at 0x2000 and the stack + * at 0x1000 - 0x2000. Place the GDT at 0x3000. + */ + gdtbase = 0x3000; + gdt = vm_map_gpa(vmctx, gdtbase, 0x1000); + if (gdt == NULL) + return (EFAULT); + memcpy(gdt, i386_gdt, sizeof(i386_gdt)); + desc_base = gdtbase; + desc_limit = sizeof(i386_gdt) - 1; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, + desc_base, desc_limit, 0); + if (error != 0) + goto done; + + /* Place the TSS one page above the GDT. */ + tssbase = gdtbase + 0x1000; + gdt[3].sd_lobase = tssbase; + + rflags = 0x2; + error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); + if (error) + goto done; + + desc_base = 0; + desc_limit = 0xffffffff; + desc_access = DESC_GRAN | DESC_DEF32 | DESC_PRESENT | SDT_MEMERA; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, + desc_base, desc_limit, desc_access); + + desc_access = DESC_GRAN | DESC_DEF32 | DESC_PRESENT | SDT_MEMRWA; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + desc_base = tssbase; + desc_limit = I386_TSS_SIZE - 1; + desc_access = DESC_PRESENT | SDT_SYS386BSY; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, 0, 0, + DESC_UNUSABLE); + if (error) + goto done; + + gsel = GSEL(GUEST_CODE_SEL, SEL_KPL); + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, gsel)) != 0) + goto done; + + gsel = GSEL(GUEST_DATA_SEL, SEL_KPL); + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, gsel)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, gsel)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, gsel)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, gsel)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, gsel)) != 0) + goto done; + + gsel = GSEL(3, SEL_KPL); + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, gsel)) != 0) + goto done; + + /* LDTR is pointing to the null selector */ + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) + goto done; + + /* entry point */ + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, rsp)) != 0) + goto done; + + error = 0; +done: + return (error); +} + /* * Setup the 'vcpu' register set such that it will begin execution at * 'rip' in long mode. @@ -66,6 +228,8 @@ uint32_t desc_access, desc_limit; uint16_t gsel; + if (cr3 == 0) + return (vm_setup_freebsd_i386(vmctx, vcpu, rip, rsp)); cr0 = CR0_PE | CR0_PG | CR0_NE; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) goto done; --- //depot/vendor/freebsd/src/sys/amd64/vmm/vmm_instruction_emul.c +++ //depot/user/jhb/bhyve/sys/amd64/vmm/vmm_instruction_emul.c @@ -54,6 +54,14 @@ CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ }; +enum paging_mode { + PAGING_MODE_FLAT, + PAGING_MODE_32, + PAGING_MODE_PAE, + PAGING_MODE_64, + PAGING_MODE_INVALID, +}; + /* struct vie_op.op_type */ enum { VIE_OP_TYPE_NONE = 0, @@ -474,18 +482,101 @@ vie->index_register = VM_REG_LAST; } +static enum paging_mode +cpu_paging_mode(struct vm *vm, int cpuid) +{ + uint64_t reg; + + if (vm_get_register(vm, cpuid, VM_REG_GUEST_CR0, ®) != 0) + return (PAGING_MODE_INVALID); + if (!(reg & CR0_PG)) + return (PAGING_MODE_FLAT); + if (vm_get_register(vm, cpuid, VM_REG_GUEST_CR4, ®) != 0) + return (PAGING_MODE_INVALID); + if (!(reg & CR4_PAE)) + return (PAGING_MODE_32); + if (vm_get_register(vm, cpuid, VM_REG_GUEST_EFER, ®) != 0) + return (PAGING_MODE_INVALID); + if (reg & EFER_LME) + return (PAGING_MODE_64); + else + return (PAGING_MODE_PAE); +} + static int gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, - uint64_t *gpa, uint64_t *gpaend) + uint64_t *gpa, uint64_t *gpaend, enum paging_mode paging_mode) { int nlevels, ptpshift, ptpindex; uint64_t *ptpbase, pte, pgsize; + uint32_t *ptpbase32, pte32; void *cookie; - /* - * XXX assumes 64-bit guest with 4 page walk levels - */ - nlevels = 4; + if (paging_mode == PAGING_MODE_FLAT) { + *gpa = gla; + *gpaend = 0xffffffff; + return (0); + } + + if (paging_mode == PAGING_MODE_32) { + nlevels = 2; + while (--nlevels >= 0) { + /* Zero out the lower 12 bits. */ + ptpphys &= ~0xfff; + + ptpbase32 = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, + VM_PROT_READ, &cookie); + + if (ptpbase32 == NULL) + goto error; + + ptpshift = PAGE_SHIFT + nlevels * 10; + ptpindex = (gla >> ptpshift) & 0x3FF; + pgsize = 1UL << ptpshift; + + pte32 = ptpbase32[ptpindex]; + + vm_gpa_release(cookie); + + if ((pte32 & PG_V) == 0) + goto error; + + if (pte32 & PG_PS) + break; + + ptpphys = pte32; + } + + /* Zero out the lower 'ptpshift' bits */ + pte32 >>= ptpshift; pte32 <<= ptpshift; + *gpa = pte32 | (gla & (pgsize - 1)); + *gpaend = pte32 + pgsize; + return (0); + } + + if (paging_mode == PAGING_MODE_PAE) { + /* Zero out the lower 4 bits and the upper 12 bits */ + ptpphys >>= 4; ptpphys <<= 16; ptpphys >>= 12; + + ptpbase = vm_gpa_hold(vm, ptpphys, sizeof(*ptpbase) * 4, + VM_PROT_READ, &cookie); + if (ptpbase == NULL) + goto error; + + ptpindex = (gla >> 30) & 0x3; + + pte = ptpbase[ptpindex]; + + vm_gpa_release(cookie); + + if ((pte & PG_V) == 0) + goto error; + + ptpphys = pte; + + nlevels = 2; + } else + nlevels = 4; while (--nlevels >= 0) { /* Zero out the lower 12 bits and the upper 12 bits */ ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; @@ -533,18 +624,22 @@ int n, err, prot; uint64_t gpa, gpaend, off; void *hpa, *cookie; + enum paging_mode paging_mode; /* * XXX cache previously fetched instructions using 'rip' as the tag */ + paging_mode = cpu_paging_mode(vm, cpuid); + if (paging_mode == PAGING_MODE_INVALID) + return (-1); prot = VM_PROT_READ | VM_PROT_EXECUTE; if (inst_length > VIE_INST_SIZE) panic("vmm_fetch_instruction: invalid length %d", inst_length); /* Copy the instruction into 'vie' */ while (vie->num_valid < inst_length) { - err = gla2gpa(vm, rip, cr3, &gpa, &gpaend); + err = gla2gpa(vm, rip, cr3, &gpa, &gpaend, paging_mode); if (err) break; @@ -626,15 +721,9 @@ } static int -decode_modrm(struct vie *vie) +decode_modrm(struct vie *vie, enum cpu_mode cpu_mode) { uint8_t x; - enum cpu_mode cpu_mode; - - /* - * XXX assuming that guest is in IA-32E 64-bit mode - */ - cpu_mode = CPU_MODE_64BIT; if (vie_peek(vie, &x)) return (-1); @@ -913,14 +1002,26 @@ int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) { + uint64_t efer; + enum cpu_mode cpu_mode; - if (decode_rex(vie)) + if (vm_get_register(vm, cpuid, VM_REG_GUEST_EFER, &efer) != 0) return (-1); + if (efer & EFER_LMA) + cpu_mode = CPU_MODE_64BIT; + else + cpu_mode = CPU_MODE_COMPATIBILITY; + + if (cpu_mode == CPU_MODE_64BIT) { + if (decode_rex(vie)) + return (-1); + } + if (decode_opcode(vie)) return (-1); - if (decode_modrm(vie)) + if (decode_modrm(vie, cpu_mode)) return (-1); if (decode_sib(vie)) --- //depot/vendor/freebsd/src/sys/boot/common/load_elf32.c +++ //depot/user/jhb/bhyve/sys/boot/common/load_elf32.c @@ -2,5 +2,6 @@ __FBSDID("$FreeBSD: head/sys/boot/common/load_elf32.c 119483 2003-08-25 23:30:41Z obrien $"); #define __ELF_WORD_SIZE 32 +#define _MACHINE_ELF_WANT_32BIT #include "load_elf.c" --- //depot/vendor/freebsd/src/sys/boot/common/load_elf32_obj.c +++ //depot/user/jhb/bhyve/sys/boot/common/load_elf32_obj.c @@ -2,5 +2,6 @@ __FBSDID("$FreeBSD: head/sys/boot/common/load_elf32_obj.c 134459 2004-08-29 00:48:42Z iedowse $"); #define __ELF_WORD_SIZE 32 +#define _MACHINE_ELF_WANT_32BIT #include "load_elf_obj.c" --- //depot/vendor/freebsd/src/sys/boot/userboot/userboot/Makefile +++ //depot/user/jhb/bhyve/sys/boot/userboot/userboot/Makefile @@ -11,6 +11,7 @@ LIBDIR= /boot SRCS= autoload.c +SRCS+= biossmap.c SRCS+= bootinfo.c SRCS+= bootinfo32.c SRCS+= bootinfo64.c --- //depot/vendor/freebsd/src/sys/boot/userboot/userboot/bootinfo32.c +++ //depot/user/jhb/bhyve/sys/boot/userboot/userboot/bootinfo32.c @@ -66,7 +66,7 @@ COPY32(strlen(s) + 1, a, c); \ if (c) \ CALLBACK(copyin, s, a, strlen(s) + 1); \ - a += roundup(strlen(s) + 1, sizeof(u_long));\ + a += roundup(strlen(s) + 1, sizeof(uint32_t));\ } #define MOD_NAME(a, s, c) MOD_STR(MODINFO_NAME, a, s, c) @@ -78,7 +78,7 @@ COPY32(sizeof(s), a, c); \ if (c) \ CALLBACK(copyin, &s, a, sizeof(s)); \ - a += roundup(sizeof(s), sizeof(u_long)); \ + a += roundup(sizeof(s), sizeof(uint32_t)); \ } #define MOD_ADDR(a, s, c) MOD_VAR(MODINFO_ADDR, a, s, c) @@ -89,7 +89,7 @@ COPY32(mm->md_size, a, c); \ if (c) \ CALLBACK(copyin, mm->md_data, a, mm->md_size); \ - a += roundup(mm->md_size, sizeof(u_long));\ + a += roundup(mm->md_size, sizeof(uint32_t));\ } #define MOD_END(a, c) { \ @@ -146,6 +146,7 @@ int bootdevnr, howto; char *kernelname; const char *kernelpath; + uint64_t lowmem, highmem; howto = bi_getboothowto(args); @@ -198,9 +199,7 @@ file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto); file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp); file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend); -#if 0 bios_addsmapdata(kfp); -#endif /* Figure out the size and location of the metadata */ *modulep = addr; @@ -237,11 +236,10 @@ bi.bi_bios_geom[i] = bd_getbigeom(i); #endif bi.bi_size = sizeof(bi); + CALLBACK(getmem, &lowmem, &highmem); bi.bi_memsizes_valid = 1; -#if 0 - bi.bi_basemem = bios_basemem / 1024; - bi.bi_extmem = bios_extmem / 1024; -#endif + bi.bi_basemem = 640; + bi.bi_extmem = (lowmem - 0x100000) / 1024; bi.bi_envp = envp; bi.bi_modulep = *modulep; bi.bi_kernend = kernend; --- //depot/vendor/freebsd/src/sys/boot/userboot/userboot/bootinfo64.c +++ //depot/user/jhb/bhyve/sys/boot/userboot/userboot/bootinfo64.c @@ -169,53 +169,6 @@ #endif } -struct smap { - uint64_t base; - uint64_t length; - uint32_t type; -} __packed; - -/* From FreeBSD */ -#define SMAP_TYPE_MEMORY 1 - -#define GB (1024UL * 1024 * 1024) - -#define MODINFOMD_SMAP 0x1001 - -static void -bios_addsmapdata(struct preloaded_file *kfp) -{ - uint64_t lowmem, highmem; - int smapnum, len; - struct smap smap[3], *sm; - - CALLBACK(getmem, &lowmem, &highmem); - - sm = &smap[0]; - - sm->base = 0; /* base memory */ - sm->length = 640 * 1024; - sm->type = SMAP_TYPE_MEMORY; - sm++; - - sm->base = 0x100000; /* extended memory */ - sm->length = lowmem - 0x100000; - sm->type = SMAP_TYPE_MEMORY; - sm++; - - smapnum = 2; - - if (highmem != 0) { - sm->base = 4 * GB; - sm->length = highmem; - sm->type = SMAP_TYPE_MEMORY; - smapnum++; - } - - len = smapnum * sizeof (struct smap); - file_addmetadata(kfp, MODINFOMD_SMAP, len, &smap[0]); -} - /* * Load the information expected by an amd64 kernel. * --- //depot/vendor/freebsd/src/sys/boot/userboot/userboot/elf32_freebsd.c +++ //depot/user/jhb/bhyve/sys/boot/userboot/userboot/elf32_freebsd.c @@ -57,7 +57,7 @@ Elf_Ehdr *ehdr; vm_offset_t entry, bootinfop, modulep, kernend; int boothowto, err, bootdev; - uint32_t stack[1024]; + uint32_t stack[1024], *sp; if ((md = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL) @@ -78,16 +78,24 @@ /* * Build a scratch stack at physical 0x1000 */ - stack[0] = boothowto; - stack[1] = bootdev; - stack[2] = 0; - stack[3] = 0; - stack[4] = 0; - stack[5] = bootinfop; - stack[6] = modulep; - stack[7] = kernend; + memset(stack, 0, sizeof(stack)); + sp = (uint32_t *)((char *)stack + sizeof(stack)); + *--sp = kernend; + *--sp = modulep; + *--sp = bootinfop; + *--sp = 0; + *--sp = 0; + *--sp = 0; + *--sp = bootdev; + *--sp = boothowto; + + /* + * Fake return address to mimic "new" boot blocks. For more + * details see recover_bootinfo in locore.S. + */ + *--sp = 0xbeefface; CALLBACK(copyin, stack, 0x1000, sizeof(stack)); - CALLBACK(setreg, 4, 0x1000); + CALLBACK(setreg, 4, (char *)sp - (char *)stack + 0x1000); CALLBACK(exec, entry); panic("exec returned"); --- //depot/vendor/freebsd/src/sys/boot/userboot/userboot/libuserboot.h +++ //depot/user/jhb/bhyve/sys/boot/userboot/userboot/libuserboot.h @@ -65,3 +65,4 @@ int bi_load32(char *args, int *howtop, int *bootdevp, vm_offset_t *bip, vm_offset_t *modulep, vm_offset_t *kernend); int bi_load64(char *args, vm_offset_t *modulep, vm_offset_t *kernend); +void bios_addsmapdata(struct preloaded_file *kfp);