Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC (revision 268883) +++ sys/amd64/conf/GENERIC (working copy) @@ -356,3 +356,7 @@ # VMware support device vmx # VMware VMXNET3 Ethernet + +options KTR +options KTR_MASK=(KTR_GEN) +options KTR_ENTRIES=(4*1024*1024) Index: sys/amd64/include/vmm.h =================================================================== --- sys/amd64/include/vmm.h (revision 268889) +++ sys/amd64/include/vmm.h (working copy) @@ -317,6 +317,8 @@ void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ +void vm_inject_ac(struct vm *vm, int vcpuid, int errcode); /* #AC */ +void vm_inject_ss(struct vm *vm, int vcpuid, int errcode); /* #SS */ void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2); enum vm_reg_name vm_segment_name(int seg_encoding); Index: sys/amd64/include/vmm_instruction_emul.h =================================================================== --- sys/amd64/include/vmm_instruction_emul.h (revision 268883) +++ sys/amd64/include/vmm_instruction_emul.h (working copy) @@ -52,8 +52,8 @@ * s */ int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t mrr, mem_region_write_t mrw, - void *mrarg); + struct vm_guest_paging *paging, mem_region_read_t mrr, + mem_region_write_t mrw, void *mrarg); int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t val, int size); Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c (revision 268889) +++ sys/amd64/vmm/vmm.c (working copy) @@ -1235,8 +1235,8 @@ return (0); } - error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, - retu); + error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, + mread, mwrite, retu); return (error); } @@ -1751,6 +1751,30 @@ vm_inject_fault(vm, vcpuid, &udf); } +void +vm_inject_ac(struct vm *vm, int vcpuid, int error_code) +{ + struct vm_exception acf = { + .vector = IDT_AC, + .error_code_valid = 1, + .error_code = error_code + }; + + vm_inject_fault(vm, vcpuid, &acf); +} + +void +vm_inject_ss(struct vm *vm, int vcpuid, int error_code) +{ + struct vm_exception ssf = { + .vector = IDT_SS, + .error_code_valid = 1, + .error_code = error_code + }; + + vm_inject_fault(vm, vcpuid, &ssf); +} + static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); int Index: sys/amd64/vmm/vmm_instruction_emul.c =================================================================== --- sys/amd64/vmm/vmm_instruction_emul.c (revision 268883) +++ sys/amd64/vmm/vmm_instruction_emul.c (working copy) @@ -65,6 +65,7 @@ VIE_OP_TYPE_AND, VIE_OP_TYPE_OR, VIE_OP_TYPE_TWO_BYTE, + VIE_OP_TYPE_PUSH, VIE_OP_TYPE_LAST }; @@ -72,6 +73,7 @@ #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ #define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ +#define VIE_OP_F_NO_MODRM (1 << 3) static const struct vie_op two_byte_opcodes[256] = { [0xB6] = { @@ -105,6 +107,16 @@ .op_byte = 0x8B, .op_type = VIE_OP_TYPE_MOV, }, + [0xA1] = { + .op_byte = 0xA1, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, + [0xA3] = { + .op_byte = 0xA3, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, [0xC6] = { /* XXX Group 11 extended opcode - not just MOV */ .op_byte = 0xC6, @@ -132,6 +144,11 @@ .op_type = VIE_OP_TYPE_OR, .op_flags = VIE_OP_F_IMM8, }, + [0xFF] = { + /* XXX Group 5 extended opcode - not just PUSH */ + .op_byte = 0xFF, + .op_type = VIE_OP_TYPE_PUSH, + } }; /* struct vie.mod */ @@ -346,6 +363,32 @@ error = vie_update_register(vm, vcpuid, reg, val, size); } break; + case 0xA1: + /* + * MOV from seg:moffset to AX/EAX/RAX + * A1: mov AX, moffs16 + * A1: mov EAX, moffs32 + * REX.W + A1: mov RAX, moffs64 + */ + error = memread(vm, vcpuid, gpa, &val, size, arg); + if (error == 0) { + reg = VM_REG_GUEST_RAX; + error = vie_update_register(vm, vcpuid, reg, val, size); + } + break; + case 0xA3: + /* + * MOV from AX/EAX/RAX to seg:moffset + * A3: mov moffs16, AX + * A3: mov moffs32, EAX + * REX.W + A3: mov moffs64, RAX + */ + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); + if (error == 0) { + val &= size2mask[size]; + error = memwrite(vm, vcpuid, gpa, val, size, arg); + } + break; case 0xC6: /* * MOV from imm8 to mem (ModRM:r/m) @@ -553,10 +596,105 @@ return (error); } +static int +emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ +#ifdef _KERNEL + struct seg_desc ss_desc; + uint64_t cr0, off, rflags, rsp, stack_gla, stack_gpa, val; + void *cookie, *hpa; + int copied, error, n, size, stackaddrsize; + + size = vie->opsize; + /* + * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 + */ + if (paging->cpu_mode == CPU_MODE_REAL) + stackaddrsize = 2; + else if (paging->cpu_mode == CPU_MODE_64BIT) + stackaddrsize = 8; /* always fixed in 64-bit mode */ + else { + /* + * In protected or compability mode the 'B' flag in the + * stack-segment descriptor determines the size of the + * stack pointer. + */ + error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc); + KASSERT(error == 0, ("%s: error %d getting SS descriptor", + __func__, error)); + if (SEG_DESC_DEF32(ss_desc.access)) + stackaddrsize = 4; + else + stackaddrsize = 2; + } + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); + KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); + KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); + + rsp -= size; + if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, + rsp, size, stackaddrsize, PROT_WRITE, &stack_gla)) { + vm_inject_ss(vm, vcpuid, 0); + return (0); + } + + if (vie_canonical_check(paging->cpu_mode, stack_gla)) { + vm_inject_ss(vm, vcpuid, 0); + return (0); + } + + if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { + vm_inject_ac(vm, vcpuid, 0); + return (0); + } + + /* XXX MMIO reads with side-effects should be done last */ + error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); + if (error) + return (error); + + copied = 0; + while (copied < size) { + error = vmm_gla2gpa(vm, vcpuid, paging, stack_gla, PROT_WRITE, + &stack_gpa); + if (error == 1) + return (0); /* resume guest to handle page fault */ + else if (error == -1) + return (EFAULT); + + off = stack_gpa & PAGE_MASK; + n = min(size - copied, PAGE_SIZE - off); + hpa = vm_gpa_hold(vm, stack_gpa, n, PROT_WRITE, &cookie); + if (hpa == NULL) + return (EFAULT); + bcopy((char *)&val + copied, hpa, n); + vm_gpa_release(cookie); + + stack_gla += n; + copied += n; + } + + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, + stackaddrsize); + KASSERT(error == 0, ("%s: error %d updating rsp", __func__, error)); + return (0); +#else + return (EINVAL); /* XXX cannot be executed in userspace context */ +#endif /* KERNEL */ +} + int vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, - void *memarg) + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *memarg) { int error; @@ -564,6 +702,10 @@ return (EINVAL); switch (vie->op.op_type) { + case VIE_OP_TYPE_PUSH: + error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, + memwrite, memarg); + break; case VIE_OP_TYPE_MOV: error = emulate_mov(vm, vcpuid, gpa, vie, memread, memwrite, memarg); @@ -1138,6 +1280,9 @@ if (cpu_mode == CPU_MODE_REAL) return (-1); + if (vie->op.op_flags & VIE_OP_F_NO_MODRM) + return (0); + if (vie_peek(vie, &x)) return (-1); @@ -1314,25 +1459,15 @@ int i, n; uint8_t x; union { - char buf[8]; + char buf[4]; int8_t signed8; int16_t signed16; int32_t signed32; - int64_t signed64; } u; /* Figure out immediate operand size (if any) */ - if (vie->op.op_flags & VIE_OP_F_MOFFSET) { + if (vie->op.op_flags & VIE_OP_F_IMM) { /* - * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: - * The memory offset size follows the address-size of the - * instruction. Although this is treated as an immediate - * value during instruction decoding it is interpreted as - * a segment offset by the instruction emulation. - */ - vie->imm_bytes = vie->addrsize; - } else if (vie->op.op_flags & VIE_OP_F_IMM) { - /* * Section 2.2.1.5 "Immediates", Intel SDM: * In 64-bit mode the typical size of immediate operands * remains 32-bits. When the operand size if 64-bits, the @@ -1350,7 +1485,7 @@ if ((n = vie->imm_bytes) == 0) return (0); - KASSERT(n == 1 || n == 2 || n == 4 || n == 8, + KASSERT(n == 1 || n == 2 || n == 4, ("%s: invalid number of immediate bytes: %d", __func__, n)); for (i = 0; i < n; i++) { @@ -1366,20 +1501,41 @@ vie->immediate = u.signed8; else if (n == 2) vie->immediate = u.signed16; - else if (n == 4) + else vie->immediate = u.signed32; - else - vie->immediate = u.signed64; + return (0); +} - if (vie->op.op_flags & VIE_OP_F_MOFFSET) { - /* - * If the immediate value is going to be interpreted as a - * segment offset then undo the sign-extension above. - */ - vie->immediate &= size2mask[n]; +static int +decode_moffset(struct vie *vie) +{ + int i, n; + uint8_t x; + union { + char buf[8]; + uint64_t u64; + } u; + + if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0) + return (0); + + /* + * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: + * The memory offset size follows the address-size of the instruction. + */ + n = vie->addrsize; + KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n)); + + u.u64 = 0; + for (i = 0; i < n; i++) { + if (vie_peek(vie, &x)) + return (-1); + + u.buf[i] = x; + vie_advance(vie); } - + vie->displacement = u.u64; return (0); } @@ -1470,10 +1626,13 @@ if (decode_displacement(vie)) return (-1); - + if (decode_immediate(vie)) return (-1); + if (decode_moffset(vie)) + return (-1); + if (verify_inst_length(vie)) return (-1); Index: usr.sbin/bhyve/bhyverun.c =================================================================== --- usr.sbin/bhyve/bhyverun.c (revision 268889) +++ usr.sbin/bhyve/bhyverun.c (working copy) @@ -484,7 +484,7 @@ stats.vmexit_inst_emul++; err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, - &vmexit->u.inst_emul.vie); + &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging); if (err) { if (err == EINVAL) { Index: usr.sbin/bhyve/mem.c =================================================================== --- usr.sbin/bhyve/mem.c (revision 268883) +++ usr.sbin/bhyve/mem.c (working copy) @@ -157,7 +157,9 @@ } int -emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie) +emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, + struct vm_guest_paging *paging) + { struct mmio_rb_range *entry; int err; @@ -184,7 +186,7 @@ } assert(entry != NULL); - err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, + err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging, mem_read, mem_write, &entry->mr_param); pthread_rwlock_unlock(&mmio_rwlock); Index: usr.sbin/bhyve/mem.h =================================================================== --- usr.sbin/bhyve/mem.h (revision 268883) +++ usr.sbin/bhyve/mem.h (working copy) @@ -50,7 +50,8 @@ #define MEM_F_RW 0x3 void init_mem(void); -int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie); +int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie, + struct vm_guest_paging *paging); int register_mem(struct mem_range *memp); int register_mem_fallback(struct mem_range *memp);