Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC (revision 265931) +++ sys/amd64/conf/GENERIC (working copy) @@ -351,3 +351,7 @@ # VMware support device vmx # VMware VMXNET3 Ethernet + +options KTR +options KTR_ENTRIES=(1024*1024) +options KTR_MASK=(KTR_GEN) Index: sys/amd64/include/vmm.h =================================================================== --- sys/amd64/include/vmm.h (revision 265931) +++ sys/amd64/include/vmm.h (working copy) @@ -330,23 +330,40 @@ VM_EXITCODE_RENDEZVOUS, VM_EXITCODE_IOAPIC_EOI, VM_EXITCODE_SUSPENDED, + VM_EXITCODE_INOUT_STR, VM_EXITCODE_MAX }; +struct vm_inout { + uint16_t bytes:3; /* 1 or 2 or 4 */ + uint16_t in:1; + uint16_t string:1; + uint16_t rep:1; + uint16_t port; + uint32_t eax; /* valid for out */ +}; + +struct vm_inout_str { + struct vm_inout inout; /* must be the first element */ + enum vie_cpu_mode cpu_mode; + enum vie_paging_mode paging_mode; + uint64_t rflags; + uint64_t cr0; + uint64_t cr3; + uint64_t gla; + uint64_t gpa; + int addrsize; + int cpl; +}; + struct vm_exit { enum vm_exitcode exitcode; int inst_length; /* 0 means unknown */ uint64_t rip; union { + struct vm_inout inout; + struct vm_inout_str inout_str; struct { - uint16_t bytes:3; /* 1 or 2 or 4 */ - uint16_t in:1; /* out is 0, in is 1 */ - uint16_t string:1; - uint16_t rep:1; - uint16_t port; - uint32_t eax; /* valid for out */ - } inout; - struct { uint64_t gpa; int fault_type; } paging; Index: sys/amd64/include/vmm_instruction_emul.h =================================================================== --- sys/amd64/include/vmm_instruction_emul.h (revision 265931) +++ sys/amd64/include/vmm_instruction_emul.h (working copy) @@ -29,6 +29,8 @@ #ifndef _VMM_INSTRUCTION_EMUL_H_ #define _VMM_INSTRUCTION_EMUL_H_ +enum vm_reg_name; + enum vie_cpu_mode { CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ @@ -52,17 +54,31 @@ uint16_t op_flags; }; +/* struct vie_op.op_type */ +enum { + VIE_OP_TYPE_NONE = 0, + VIE_OP_TYPE_MOV, + VIE_OP_TYPE_MOVSX, + VIE_OP_TYPE_MOVZX, + VIE_OP_TYPE_AND, + VIE_OP_TYPE_OR, + VIE_OP_TYPE_OUTS, + VIE_OP_TYPE_TWO_BYTE, + VIE_OP_TYPE_LAST +}; + #define VIE_INST_SIZE 15 struct vie { uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ uint8_t num_valid; /* size of the instruction */ uint8_t num_processed; - uint8_t rex_w:1, /* REX prefix */ + uint8_t rex_w:1, /* prefixes */ rex_r:1, rex_x:1, rex_b:1, - rex_present:1; + rex_present:1, + rep_present:1; uint8_t mod:2, /* ModRM byte */ reg:4, @@ -111,6 +127,9 @@ mem_region_read_t mrr, mem_region_write_t mrw, void *mrarg); +int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, + uint64_t val, int size); + #ifdef _KERNEL /* * APIs to fetch and decode the instruction from nested page fault handler. @@ -123,6 +142,11 @@ void vie_init(struct vie *vie); +uint64_t vie_size2mask(int size); + +uint64_t vie_segbase(struct vm *vm, int vcpuid, enum vie_cpu_mode cpu_mode, + enum vm_reg_name segment); + /* * Decode the instruction fetched into 'vie' so it can be emulated. * @@ -137,6 +161,12 @@ #define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */ int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, enum vie_cpu_mode cpu_mode, struct vie *vie); + +/* + * Translate guest linear address 'gla' to physical 'gpa' using the page + * tables at '%cr3'. + */ +int vie_gla2gpa(struct vm *vm, uint64_t gla, uint64_t cr3, uint64_t *gpa, + enum vie_paging_mode paging_mode); #endif /* _KERNEL */ - #endif /* _VMM_INSTRUCTION_EMUL_H_ */ Index: sys/amd64/vmm/intel/vmx.c =================================================================== --- sys/amd64/vmm/intel/vmx.c (revision 265931) +++ sys/amd64/vmm/intel/vmx.c (working copy) @@ -1492,6 +1492,18 @@ return (HANDLED); } +/* + * From section "Guest Register State" in the Intel SDM: CPL = SS.DPL + */ +static int +vmx_cpl(void) +{ + uint32_t ssar; + + ssar = vmcs_read(VMCS_GUEST_SS_ACCESS_RIGHTS); + return ((ssar >> 5) & 0x3); +} + static enum vie_cpu_mode vmx_cpu_mode(void) { @@ -1732,6 +1744,7 @@ int error, handled; struct vmxctx *vmxctx; struct vlapic *vlapic; + struct vm_inout_str *vis; uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, reason; uint64_t qual, gpa; bool retu; @@ -1894,10 +1907,16 @@ vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0; vmexit->u.inout.port = (uint16_t)(qual >> 16); vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax); - error = emulate_ioport(vmx->vm, vcpu, vmexit); - if (error == 0) { - handled = 1; - vmxctx->guest_rax = vmexit->u.inout.eax; + if (vmexit->u.inout.string) { + vis = &vmexit->u.inout_str; + vmexit->exitcode = VM_EXITCODE_INOUT_STR; + vis->cpu_mode = vmx_cpu_mode(); + vis->paging_mode = vmx_paging_mode(); + vis->cr0 = vmcs_read(VMCS_GUEST_CR0); + vis->cr3 = vmcs_guest_cr3(); + vis->gla = vmcs_gla(); + vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS); + vis->cpl = vmx_cpl(); } break; case EXIT_REASON_CPUID: Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c (revision 265931) +++ sys/amd64/vmm/vmm.c (working copy) @@ -63,6 +63,7 @@ #include #include +#include "vmm_ioport.h" #include "vmm_ktr.h" #include "vmm_host.h" #include "vmm_mem.h" @@ -1348,6 +1349,10 @@ case VM_EXITCODE_INST_EMUL: error = vm_handle_inst_emul(vm, vcpuid, &retu); break; + case VM_EXITCODE_INOUT: + case VM_EXITCODE_INOUT_STR: + error = vm_handle_inout(vm, vcpuid, vme, &retu); + break; default: retu = true; /* handled in userland */ break; Index: sys/amd64/vmm/vmm_instruction_emul.c =================================================================== --- sys/amd64/vmm/vmm_instruction_emul.c (revision 265931) +++ sys/amd64/vmm/vmm_instruction_emul.c (working copy) @@ -49,21 +49,10 @@ #include #endif /* _KERNEL */ -/* struct vie_op.op_type */ -enum { - VIE_OP_TYPE_NONE = 0, - VIE_OP_TYPE_MOV, - VIE_OP_TYPE_MOVSX, - VIE_OP_TYPE_MOVZX, - VIE_OP_TYPE_AND, - VIE_OP_TYPE_OR, - VIE_OP_TYPE_TWO_BYTE, - VIE_OP_TYPE_LAST -}; - /* struct vie_op.op_flags */ #define VIE_OP_F_IMM (1 << 0) /* immediate operand present */ #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ +#define VIE_OP_F_NO_MODRM (1 << 2) /* instruction has no MODRM */ static const struct vie_op two_byte_opcodes[256] = { [0xB6] = { @@ -118,6 +107,18 @@ .op_type = VIE_OP_TYPE_OR, .op_flags = VIE_OP_F_IMM8, }, + [0x6E] = { + /* outsb */ + .op_byte = 0x6E, + .op_type = VIE_OP_TYPE_OUTS, + .op_flags = VIE_OP_F_NO_MODRM, + }, + [0x6F] = { + /* outsw/outsd */ + .op_byte = 0x6F, + .op_type = VIE_OP_TYPE_OUTS, + .op_flags = VIE_OP_F_NO_MODRM, + }, }; /* struct vie.mod */ @@ -205,7 +206,7 @@ return (error); } -static int +int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t val, int size) { @@ -571,9 +572,9 @@ vie->index_register = VM_REG_LAST; } -static int -gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, - uint64_t *gpa, enum vie_paging_mode paging_mode) +int +vie_gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, uint64_t *gpa, + enum vie_paging_mode paging_mode) { int nlevels, ptpshift, ptpindex; uint64_t *ptpbase, pte, pgsize; @@ -701,7 +702,7 @@ /* Copy the instruction into 'vie' */ while (vie->num_valid < inst_length) { - err = gla2gpa(vm, rip, cr3, &gpa, paging_mode); + err = vie_gla2gpa(vm, rip, cr3, &gpa, paging_mode); if (err) break; @@ -744,24 +745,38 @@ } static int -decode_rex(struct vie *vie) +decode_prefix(struct vie *vie, enum vie_cpu_mode cpu_mode) { + int found; uint8_t x; - if (vie_peek(vie, &x)) - return (-1); + while (1) { + found = 0; + if (vie_peek(vie, &x)) + return (-1); - if (x >= 0x40 && x <= 0x4F) { - vie->rex_present = 1; + /* REX prefix is valid only in 64-bit mode */ + if (cpu_mode == CPU_MODE_64BIT) { + if (x >= 0x40 && x <= 0x4F) { + vie->rex_present = 1; + vie->rex_w = x & 0x8 ? 1 : 0; + vie->rex_r = x & 0x4 ? 1 : 0; + vie->rex_x = x & 0x2 ? 1 : 0; + vie->rex_b = x & 0x1 ? 1 : 0; + found = 1; + } + } - vie->rex_w = x & 0x8 ? 1 : 0; - vie->rex_r = x & 0x4 ? 1 : 0; - vie->rex_x = x & 0x2 ? 1 : 0; - vie->rex_b = x & 0x1 ? 1 : 0; + if (x == 0xF3) { + vie->rep_present = 1; + found = 1; + } + if (!found) + break; + vie_advance(vie); } - return (0); } @@ -1087,23 +1102,23 @@ enum vie_cpu_mode cpu_mode, struct vie *vie) { - if (cpu_mode == CPU_MODE_64BIT) { - if (decode_rex(vie)) - return (-1); - } + if (decode_prefix(vie, cpu_mode)) + return (-1); if (decode_opcode(vie)) return (-1); - if (decode_modrm(vie, cpu_mode)) - return (-1); + if ((vie->op.op_flags & VIE_OP_F_NO_MODRM) == 0) { + if (decode_modrm(vie, cpu_mode)) + return (-1); - if (decode_sib(vie)) - return (-1); + if (decode_sib(vie)) + return (-1); - if (decode_displacement(vie)) - return (-1); - + if (decode_displacement(vie)) + return (-1); + } + if (decode_immediate(vie)) return (-1); @@ -1117,4 +1132,56 @@ return (0); } + +uint64_t +vie_size2mask(int size) +{ + /* XXX is '1' a valid address size? */ + if (size == 1 || size == 2 || size == 4 || size == 8) + return (size2mask[size]); + else + panic("%s: invalid size %d", __func__, size); +} + +uint64_t +vie_segbase(struct vm *vm, int vcpuid, enum vie_cpu_mode cpu_mode, + enum vm_reg_name seg) +{ + struct seg_desc seg_desc; + int basesize, error; + + basesize = 4; /* segment base address size in bytes */ + + switch (seg) { + case VM_REG_GUEST_ES: + case VM_REG_GUEST_CS: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_DS: + if (cpu_mode == CPU_MODE_64BIT) { + /* + * Segments having an implicit base address of 0 + * in 64-bit mode. + */ + return (0); + } + break; + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + if (cpu_mode == CPU_MODE_64BIT) { + /* + * In 64-bit mode the FS and GS base address is 8 bytes + * wide. + */ + basesize = 8; + } + break; + default: + panic("%s: invalid segment register %d", __func__, seg); + } + + error = vm_get_seg_desc(vm, vcpuid, seg, &seg_desc); + KASSERT(error == 0, ("%s: vm_get_seg_desc error %d", __func__, error)); + + return (seg_desc.base & size2mask[basesize]); +} #endif /* _KERNEL */ Index: sys/amd64/vmm/vmm_ioport.c =================================================================== --- sys/amd64/vmm/vmm_ioport.c (revision 265931) +++ sys/amd64/vmm/vmm_ioport.c (working copy) @@ -33,11 +33,15 @@ #include #include +#include + #include +#include #include "vatpic.h" #include "vatpit.h" #include "vmm_ioport.h" +#include "vmm_ktr.h" #define MAX_IOPORTS 1280 @@ -55,32 +59,67 @@ [IO_ELCR2] = vatpic_elc_handler, }; -int -emulate_ioport(struct vm *vm, int vcpuid, struct vm_exit *vmexit) +#ifdef KTR +static const char * +inout_instruction(struct vm_exit *vmexit) { - ioport_handler_func_t handler; - uint32_t mask, val; - int error; + int index; - if (vmexit->u.inout.port >= MAX_IOPORTS) - return (-1); + static const char *iodesc[] = { + "outb", "outw", "outl", + "inb", "inw", "inl", + "outsb", "outsw", "outsd" + "insb", "insw", "insd", + }; - handler = ioport_handler[vmexit->u.inout.port]; - if (handler == NULL) - return (-1); + KASSERT(vmexit->exitcode == VM_EXITCODE_INOUT, + ("%s: invalid exitcode %d", __func__, vmexit->exitcode)); switch (vmexit->u.inout.bytes) { case 1: - mask = 0xff; + index = 0; break; case 2: - mask = 0xffff; + index = 1; break; default: - mask = 0xffffffff; + index = 2; break; } + if (vmexit->u.inout.in) + index += 3; + + if (vmexit->u.inout.string) + index += 6; + + KASSERT(index < nitems(iodesc), ("%s: invalid index %d", + __func__, index)); + + return (iodesc[index]); +} +#endif + +static int +emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit, + bool *retu) +{ + ioport_handler_func_t handler; + uint32_t mask, val; + int error; + + error = 0; + *retu = true; + + if (vmexit->u.inout.port >= MAX_IOPORTS) + goto done; + + handler = ioport_handler[vmexit->u.inout.port]; + if (handler == NULL) + goto done; + + mask = vie_size2mask(vmexit->u.inout.bytes); + if (!vmexit->u.inout.in) { val = vmexit->u.inout.eax & mask; } @@ -88,10 +127,177 @@ error = (*handler)(vm, vcpuid, vmexit->u.inout.in, vmexit->u.inout.port, vmexit->u.inout.bytes, &val); - if (!error && vmexit->u.inout.in) { - vmexit->u.inout.eax &= ~mask; - vmexit->u.inout.eax |= val & mask; + if (!error) { + *retu = false; + if (vmexit->u.inout.in) { + vmexit->u.inout.eax &= ~mask; + vmexit->u.inout.eax |= val & mask; + error = vm_set_register(vm, vcpuid, + VM_REG_GUEST_RAX, vmexit->u.inout.eax); + KASSERT(error == 0, ("emulate_ioport: error %d " + "setting guest rax register", error)); + } } +done: + VCPU_CTR4(vm, vcpuid, "%s%s 0x%04x: %s", + vmexit->u.inout.rep ? "rep " : "", + inout_instruction(vmexit), + vmexit->u.inout.port, + error ? "error" : (*retu ? "userspace" : "handled")); + return (error); } + +static int +emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) +{ + struct vm_inout *inout; + uint64_t cr3, gla, gpa; + uint64_t segbase, index; + struct vie vie; + enum vm_reg_name seg_reg, idx_reg; + enum vie_cpu_mode cpu_mode; + enum vie_paging_mode paging_mode; + int addrsize, bytes, error, inst_length; + + inout = &vmexit->u.inout; + + /* + * ins/outs VM exit takes precedence over the following error + * conditions that would ordinarily be checked by the processor: + * + * - #GP(0) due to segment being unusable. + * - #GP(0) due to memory operand effective address outside the limit + * of the segment. + * - #AC(0) if alignment checking is enabled and an unaligned memory + * reference is made at CPL=3 + */ + + /* + * XXX + * inout string emulation only supported in 64-bit mode and only + * for byte instructions. + * + * The #GP(0) fault conditions described above don't apply in + * 64-bit mode. + * + * The #AC(0) fault condition described above does not apply + * because byte accesses don't have alignment constraints. + */ + cpu_mode = vmexit->u.inout_str.cpu_mode; + if (cpu_mode != CPU_MODE_64BIT) { + VCPU_CTR1(vm, vcpuid, "ins/outs not emulated in cpu mode %d", + cpu_mode); + return (EINVAL); + } + + bytes = inout->bytes; + if (bytes != 1) { + VCPU_CTR1(vm, vcpuid, "ins/outs operand size %d not supported", + bytes); + return (EINVAL); + } + + /* + * XXX insb/insw/insd instructions not emulated at this time. + */ + if (inout->in) { + VCPU_CTR0(vm, vcpuid, "ins emulation not implemented"); + return (EINVAL); + } + + inst_length = vmexit->inst_length; + cr3 = vmexit->u.inout_str.cr3; + paging_mode = vmexit->u.inout_str.paging_mode; + + vie_init(&vie); + + error = vmm_fetch_instruction(vm, vcpuid, vmexit->rip, + vmexit->inst_length, cr3, paging_mode, &vie); + if (error) { + VCPU_CTR2(vm, vcpuid, "%s: error %d fetching instruction", + __func__, error); + return (EFAULT); + } + + error = vmm_decode_instruction(vm, vcpuid, VIE_INVALID_GLA, cpu_mode, + &vie); + if (error) { + VCPU_CTR2(vm, vcpuid, "%s: error %d decoding instruction", + __func__, error); + return (EINVAL); + } + + KASSERT(vie.op.op_byte == 0x6E, ("%s: invalid opcode byte %#x", + __func__, vie.op.op_byte)); + KASSERT(vie.op.op_type == VIE_OP_TYPE_OUTS, + ("%s: invalid opcode type %d", __func__, vie.op.op_type)); + KASSERT(vie.rep_present == inout->rep, + ("%s: rep prefix discrepancy for ins/out instruction: %d/%d", + __func__, vie.rep_present, inout->rep)); + + /* + * XXX need to deal with address size override prefix: + * - select between rsi/esi/si (for outs) or rdi/edi/di (for ins) + * - select between rcx/ecx/cx if repeat prefix is present + */ + if (cpu_mode == CPU_MODE_64BIT) + addrsize = 8; + else + addrsize = 4; + + if (vie.op.op_type == VIE_OP_TYPE_OUTS) { + /* XXX deal with segment override prefix */ + seg_reg = VM_REG_GUEST_DS; + idx_reg = VM_REG_GUEST_RSI; + } else { + seg_reg = VM_REG_GUEST_ES; + idx_reg = VM_REG_GUEST_RDI; + } + + segbase = vie_segbase(vm, vcpuid, cpu_mode, seg_reg); + + error = vm_get_register(vm, vcpuid, idx_reg, &index); + KASSERT(error == 0, ("%s: vm_get_register(%d) error %d", __func__, + idx_reg, error)); + index &= vie_size2mask(addrsize); + + gla = segbase + index; + + /* + * Verify that the computed linear address matches with the one + * provided by hardware. + */ + if (vmexit->u.inout_str.gla != VIE_INVALID_GLA) { + KASSERT(gla == vmexit->u.inout_str.gla, ("%s: gla mismatch " + "%#lx/%#lx", __func__, gla, vmexit->u.inout_str.gla)); + } + + error = vie_gla2gpa(vm, gla, cr3, &gpa, paging_mode); + if (error) { + /* XXX inject #PF into guest? */ + VCPU_CTR3(vm, vcpuid, "%s: error %d translating gla %#lx " + "to gpa", __func__, error, gla); + return (EFAULT); + } + vmexit->u.inout_str.gpa = gpa; + vmexit->u.inout_str.addrsize = addrsize; + *retu = true; + return (0); +} + +int +vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) +{ + int bytes; + + bytes = vmexit->u.inout.bytes; + KASSERT(bytes == 1 || bytes == 2 || bytes == 4, + ("vm_handle_inout: invalid operand size %d", bytes)); + + if (vmexit->u.inout.string) + return (emulate_inout_str(vm, vcpuid, vmexit, retu)); + else + return (emulate_inout_port(vm, vcpuid, vmexit, retu)); +} Index: sys/amd64/vmm/vmm_ioport.h =================================================================== --- sys/amd64/vmm/vmm_ioport.h (revision 265931) +++ sys/amd64/vmm/vmm_ioport.h (working copy) @@ -32,6 +32,6 @@ typedef int (*ioport_handler_func_t)(void *vm, int vcpuid, bool in, int port, int bytes, uint32_t *val); -int emulate_ioport(struct vm *vm, int vcpuid, struct vm_exit *vmexit); +int vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu); #endif /* _VMM_IOPORT_H_ */ Index: sys/amd64/vmm/vmm_ktr.h =================================================================== --- sys/amd64/vmm/vmm_ktr.h (revision 265931) +++ sys/amd64/vmm/vmm_ktr.h (working copy) @@ -48,6 +48,10 @@ #define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \ CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3)) +#define VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4) \ +CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), \ + (p1), (p2), (p3), (p4)) + #define VM_CTR0(vm, format) \ CTR1(KTR_VMM, "vm %s: " format, vm_name((vm))) Index: usr.sbin/bhyve/bhyverun.c =================================================================== --- usr.sbin/bhyve/bhyverun.c (revision 265931) +++ usr.sbin/bhyve/bhyverun.c (working copy) @@ -293,33 +293,34 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; - int bytes, port, in, out; - uint32_t eax; + int bytes, port, in, out, string; int vcpu; vcpu = *pvcpu; port = vme->u.inout.port; bytes = vme->u.inout.bytes; - eax = vme->u.inout.eax; + string = vme->u.inout.string; in = vme->u.inout.in; out = !in; - /* We don't deal with these */ - if (vme->u.inout.string || vme->u.inout.rep) - return (VMEXIT_ABORT); - /* Extra-special case of host notifications */ - if (out && port == GUEST_NIO_PORT) - return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); + if (out && port == GUEST_NIO_PORT) { + error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); + return (error); + } - error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); - if (error == INOUT_OK && in) - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); + error = emulate_inout(ctx, vcpu, vme, strictio); + if (error == INOUT_OK && in && !string) { + error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, + vme->u.inout.eax); + } switch (error) { case INOUT_OK: return (VMEXIT_CONTINUE); + case INOUT_RESTART: + return (VMEXIT_RESTART); case INOUT_RESET: stats.io_reset++; return (VMEXIT_RESET); @@ -519,6 +520,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, + [VM_EXITCODE_INOUT_STR] = vmexit_inout, [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_BOGUS] = vmexit_bogus, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, Index: usr.sbin/bhyve/inout.c =================================================================== --- usr.sbin/bhyve/inout.c (revision 265931) +++ usr.sbin/bhyve/inout.c (working copy) @@ -32,10 +32,16 @@ #include #include +#include + +#include +#include + #include #include #include +#include "bhyverun.h" #include "inout.h" SET_DECLARE(inout_port_set, struct inout_port); @@ -91,52 +97,130 @@ } int -emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, int strict) +emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) { - int flags; - uint32_t mask, val; + int addrsize, bytes, flags, in, port, rep; + uint64_t gpa, gpaend; + uint32_t val; inout_func_t handler; void *arg; - int error; + char *gva; + int error, retval; + enum vm_reg_name idxreg; + uint64_t index, count; + static uint64_t size2mask[] = { + [1] = 0xff, + [2] = 0xffff, + [4] = 0xffffffff, + [8] = 0xffffffffffffffff, + }; + + bytes = vmexit->u.inout.bytes; + in = vmexit->u.inout.in; + port = vmexit->u.inout.port; + assert(port < MAX_IOPORTS); + assert(bytes == 1 || bytes == 2 || bytes == 4); handler = inout_handlers[port].handler; if (strict && handler == default_inout) return (-1); - switch (bytes) { - case 1: - mask = 0xff; - break; - case 2: - mask = 0xffff; - break; - default: - mask = 0xffffffff; - break; - } + flags = inout_handlers[port].flags; + arg = inout_handlers[port].arg; - if (!in) { - val = *eax & mask; + if (in) { + if (!(flags & IOPORT_F_IN)) + return (-1); + } else { + if (!(flags & IOPORT_F_OUT)) + return (-1); } - flags = inout_handlers[port].flags; - arg = inout_handlers[port].arg; + retval = 0; + if (vmexit->u.inout.string) { + rep = vmexit->u.inout.rep; + addrsize = vmexit->u.inout_str.addrsize; + assert(addrsize == 2 || addrsize == 4 || addrsize == 8); - if ((in && (flags & IOPORT_F_IN)) || (!in && (flags & IOPORT_F_OUT))) - error = (*handler)(ctx, vcpu, in, port, bytes, &val, arg); - else - error = -1; + /* Index register */ + idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; + error = vm_get_register(ctx, vcpu, idxreg, &index); + assert(error == 0); + index &= size2mask[addrsize]; - if (!error && in) { - *eax &= ~mask; - *eax |= val & mask; + /* Count register */ + if (rep) { + error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, + &count); + assert(error == 0); + count &= size2mask[addrsize]; + } else + count = 1; + + gpa = vmexit->u.inout_str.gpa; + gpaend = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); + gva = paddr_guest2host(ctx, gpa, gpaend - gpa); + + while (count != 0 && gpa < gpaend) { + /* + * XXX this may not work for unaligned accesses because + * the last access on the page may spill over into the + * adjacent page in the linear address space. This is a + * problem because we don't have a gla2gpa() mapping of + * this adjacent page. + */ + assert(gpaend - gpa >= bytes); + + val = 0; + if (!in) + bcopy(gva, &val, bytes); + + retval = handler(ctx, vcpu, in, port, bytes, &val, arg); + if (retval != 0) + break; + + if (in) + bcopy(&val, gva, bytes); + + /* Update index */ + if (vmexit->u.inout_str.rflags & PSL_D) + index -= bytes; + else + index += bytes; + + count--; + gva += bytes; + gpa += bytes; + } + + /* Update index register */ + error = vie_update_register(ctx, vcpu, idxreg, index, addrsize); + assert(error == 0); + + if (rep) { + /* Update count register */ + error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX, + count, addrsize); + assert(error == 0); + } + + /* Restart the instruction if more iterations remain */ + if (retval == INOUT_OK && count != 0) + retval = INOUT_RESTART; + } else { + if (!in) { + val = vmexit->u.inout.eax & size2mask[bytes]; + } + retval = handler(ctx, vcpu, in, port, bytes, &val, arg); + if (retval == 0 && in) { + vmexit->u.inout.eax &= ~size2mask[bytes]; + vmexit->u.inout.eax |= val & size2mask[bytes]; + } } - - return (error); + return (retval); } void Index: usr.sbin/bhyve/inout.h =================================================================== --- usr.sbin/bhyve/inout.h (revision 265931) +++ usr.sbin/bhyve/inout.h (working copy) @@ -32,12 +32,14 @@ #include struct vmctx; +struct vm_exit; /* Handler return values. */ #define INOUT_ERROR -1 #define INOUT_OK 0 -#define INOUT_RESET 1 -#define INOUT_POWEROFF 2 +#define INOUT_RESTART 1 +#define INOUT_RESET 2 +#define INOUT_POWEROFF 3 typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg); @@ -72,8 +74,8 @@ DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__)) void init_inout(void); -int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes, - uint32_t *eax, int strict); +int emulate_inout(struct vmctx *, int vcpu, struct vm_exit *vmexit, + int strict); int register_inout(struct inout_port *iop); int unregister_inout(struct inout_port *iop); void init_bvmcons(void);