Index: lib/libvmmapi/vmmapi.c =================================================================== --- lib/libvmmapi/vmmapi.c (revision 259205) +++ lib/libvmmapi/vmmapi.c (working copy) @@ -397,6 +397,18 @@ } int +vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg) +{ + struct vm_lapic_msi vmmsi; + + bzero(&vmmsi, sizeof(vmmsi)); + vmmsi.addr = addr; + vmmsi.msg = msg; + + return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi)); +} + +int vm_ioapic_assert_irq(struct vmctx *ctx, int irq) { struct vm_ioapic_irq ioapic_irq; @@ -552,7 +564,7 @@ int vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, - int destcpu, int vector, int numvec) + uint64_t addr, uint64_t msg, int numvec) { struct vm_pptdev_msi pptmsi; @@ -561,8 +573,8 @@ pptmsi.bus = bus; pptmsi.slot = slot; pptmsi.func = func; - pptmsi.destcpu = destcpu; - pptmsi.vector = vector; + pptmsi.msg = msg; + pptmsi.addr = addr; pptmsi.numvec = numvec; return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); @@ -570,7 +582,7 @@ int vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, - int idx, uint32_t msg, uint32_t vector_control, uint64_t addr) + int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) { struct vm_pptdev_msix pptmsix; Index: lib/libvmmapi/vmmapi.h =================================================================== --- lib/libvmmapi/vmmapi.h (revision 259205) +++ lib/libvmmapi/vmmapi.h (working copy) @@ -67,6 +67,7 @@ int vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vector, int error_code); int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector); +int vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg); int vm_ioapic_assert_irq(struct vmctx *ctx, int irq); int vm_ioapic_deassert_irq(struct vmctx *ctx, int irq); int vm_ioapic_pulse_irq(struct vmctx *ctx, int irq); @@ -82,9 +83,9 @@ int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, - int dest, int vector, int numvec); + uint64_t addr, uint64_t msg, int numvec); int vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, - int idx, uint32_t msg, uint32_t vector_control, uint64_t addr); + int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); /* * Return a pointer to the statistics buffer. Note that this is not MT-safe. Index: sys/amd64/include/vmm_dev.h =================================================================== --- sys/amd64/include/vmm_dev.h (revision 259205) +++ sys/amd64/include/vmm_dev.h (working copy) @@ -66,6 +66,11 @@ int error_code_valid; }; +struct vm_lapic_msi { + uint64_t msg; + uint64_t addr; +}; + struct vm_lapic_irq { int cpuid; int vector; @@ -103,8 +108,8 @@ int slot; int func; int numvec; /* 0 means disabled */ - int vector; - int destcpu; + uint64_t msg; + uint64_t addr; }; struct vm_pptdev_msix { @@ -113,7 +118,7 @@ int slot; int func; int idx; - uint32_t msg; + uint64_t msg; uint32_t vector_control; uint64_t addr; }; @@ -175,6 +180,7 @@ IOCNUM_IOAPIC_ASSERT_IRQ = 33, IOCNUM_IOAPIC_DEASSERT_IRQ = 34, IOCNUM_IOAPIC_PULSE_IRQ = 35, + IOCNUM_LAPIC_MSI = 36, /* PCI pass-thru */ IOCNUM_BIND_PPTDEV = 40, @@ -211,6 +217,8 @@ _IOW('v', IOCNUM_INJECT_EVENT, struct vm_event) #define VM_LAPIC_IRQ \ _IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq) +#define VM_LAPIC_MSI \ + _IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi) #define VM_IOAPIC_ASSERT_IRQ \ _IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_DEASSERT_IRQ \ Index: sys/amd64/vmm/io/ppt.c =================================================================== --- sys/amd64/vmm/io/ppt.c (revision 259205) +++ sys/amd64/vmm/io/ppt.c (working copy) @@ -72,8 +72,8 @@ struct pptintr_arg { /* pptintr(pptintr_arg) */ struct pptdev *pptdev; - int vec; - int vcpu; + uint64_t addr; + uint64_t msg_data; }; static struct pptdev { @@ -412,16 +412,14 @@ static int pptintr(void *arg) { - int vec; struct pptdev *ppt; struct pptintr_arg *pptarg; pptarg = arg; ppt = pptarg->pptdev; - vec = pptarg->vec; if (ppt->vm != NULL) - lapic_intr_edge(ppt->vm, pptarg->vcpu, vec); + lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data); else { /* * XXX @@ -441,15 +439,13 @@ int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, - int destcpu, int vector, int numvec) + uint64_t addr, uint64_t msg, int numvec) { int i, rid, flags; int msi_count, startrid, error, tmp; struct pptdev *ppt; - if ((destcpu >= VM_MAXCPU || destcpu < 0) || - (vector < 0 || vector > 255) || - (numvec < 0 || numvec > MAX_MSIMSGS)) + if (numvec < 0 || numvec > MAX_MSIMSGS) return (EINVAL); ppt = ppt_find(bus, slot, func); @@ -513,8 +509,8 @@ break; ppt->msi.arg[i].pptdev = ppt; - ppt->msi.arg[i].vec = vector + i; - ppt->msi.arg[i].vcpu = destcpu; + ppt->msi.arg[i].addr = addr; + ppt->msi.arg[i].msg_data = msg + i; error = bus_setup_intr(ppt->dev, ppt->msi.res[i], INTR_TYPE_NET | INTR_MPSAFE, @@ -534,7 +530,7 @@ int ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, - int idx, uint32_t msg, uint32_t vector_control, uint64_t addr) + int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) { struct pptdev *ppt; struct pci_devinfo *dinfo; @@ -605,8 +601,8 @@ return (ENXIO); ppt->msix.arg[idx].pptdev = ppt; - ppt->msix.arg[idx].vec = msg & 0xFF; - ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF; + ppt->msix.arg[idx].addr = addr; + ppt->msix.arg[idx].msg_data = msg; /* Setup the MSI-X interrupt */ error = bus_setup_intr(ppt->dev, ppt->msix.res[idx], Index: sys/amd64/vmm/io/ppt.h =================================================================== --- sys/amd64/vmm/io/ppt.h (revision 259205) +++ sys/amd64/vmm/io/ppt.h (working copy) @@ -33,9 +33,9 @@ int ppt_map_mmio(struct vm *vm, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, - int destcpu, int vector, int numvec); + uint64_t addr, uint64_t msg, int numvec); int ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, - int idx, uint32_t msg, uint32_t vector_control, uint64_t addr); + int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); int ppt_num_devices(struct vm *vm); boolean_t ppt_is_mmio(struct vm *vm, vm_paddr_t gpa); Index: sys/amd64/vmm/io/vhpet.c =================================================================== --- sys/amd64/vmm/io/vhpet.c (revision 259205) +++ sys/amd64/vmm/io/vhpet.c (working copy) @@ -240,8 +240,7 @@ static void vhpet_timer_interrupt(struct vhpet *vhpet, int n) { - int apicid, vector, vcpuid, pin; - cpuset_t dmask; + int pin; /* If interrupts are not enabled for this timer then just return. */ if (!vhpet_timer_interrupt_enabled(vhpet, n)) @@ -256,26 +255,8 @@ } if (vhpet_timer_msi_enabled(vhpet, n)) { - /* - * XXX should have an API 'vlapic_deliver_msi(vm, addr, data)' - * - assuming physical delivery mode - * - no need to interpret contents of 'msireg' here - */ - vector = vhpet->timer[n].msireg & 0xff; - apicid = (vhpet->timer[n].msireg >> (32 + 12)) & 0xff; - if (apicid != 0xff) { - /* unicast */ - vcpuid = vm_apicid2vcpuid(vhpet->vm, apicid); - lapic_intr_edge(vhpet->vm, vcpuid, vector); - } else { - /* broadcast */ - dmask = vm_active_cpus(vhpet->vm); - while ((vcpuid = CPU_FFS(&dmask)) != 0) { - vcpuid--; - CPU_CLR(vcpuid, &dmask); - lapic_intr_edge(vhpet->vm, vcpuid, vector); - } - } + lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32, + vhpet->timer[n].msireg & 0xffffffff); return; } Index: sys/amd64/vmm/io/vioapic.c =================================================================== --- sys/amd64/vmm/io/vioapic.c (revision 259205) +++ sys/amd64/vmm/io/vioapic.c (working copy) @@ -44,6 +44,7 @@ #include "vmm_ktr.h" #include "vmm_lapic.h" +#include "vlapic.h" #include "vioapic.h" #define IOREGSEL 0x00 @@ -91,25 +92,14 @@ else return ("deasserted"); } - -static const char * -trigger_str(bool level) -{ - - if (level) - return ("level"); - else - return ("edge"); -} #endif static void vioapic_send_intr(struct vioapic *vioapic, int pin) { - int vector, apicid, vcpuid; - uint32_t low, high; - cpuset_t dmask; - bool level; + int vector, delmode; + uint32_t low, high, dest; + bool level, phys; KASSERT(pin >= 0 && pin < REDIR_ENTRIES, ("vioapic_set_pinstate: invalid pin number %d", pin)); @@ -120,52 +110,20 @@ low = vioapic->rtbl[pin].reg; high = vioapic->rtbl[pin].reg >> 32; - /* - * XXX We only deal with: - * - physical destination - * - fixed delivery mode - */ - if ((low & IOART_DESTMOD) != IOART_DESTPHY) { - VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported dest mode " - "0x%08x", pin, low); - return; - } - - if ((low & IOART_DELMOD) != IOART_DELFIXED) { - VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported delivery mode " - "0x%08x", pin, low); - return; - } - if ((low & IOART_INTMASK) == IOART_INTMSET) { VIOAPIC_CTR1(vioapic, "ioapic pin%d: masked", pin); return; } + phys = ((low & IOART_DESTMOD) == IOART_DESTPHY); + delmode = low & IOART_DELMOD; level = low & IOART_TRGRLVL ? true : false; if (level) vioapic->rtbl[pin].reg |= IOART_REM_IRR; vector = low & IOART_INTVEC; - apicid = high >> APIC_ID_SHIFT; - if (apicid != 0xff) { - /* unicast */ - vcpuid = vm_apicid2vcpuid(vioapic->vm, apicid); - VIOAPIC_CTR4(vioapic, "ioapic pin%d: %s triggered intr " - "vector %d on vcpuid %d", pin, trigger_str(level), - vector, vcpuid); - lapic_set_intr(vioapic->vm, vcpuid, vector, level); - } else { - /* broadcast */ - VIOAPIC_CTR3(vioapic, "ioapic pin%d: %s triggered intr " - "vector %d on all vcpus", pin, trigger_str(level), vector); - dmask = vm_active_cpus(vioapic->vm); - while ((vcpuid = CPU_FFS(&dmask)) != 0) { - vcpuid--; - CPU_CLR(vcpuid, &dmask); - lapic_set_intr(vioapic->vm, vcpuid, vector, level); - } - } + dest = high >> APIC_ID_SHIFT; + vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector); } static void Index: sys/amd64/vmm/io/vlapic.c =================================================================== --- sys/amd64/vmm/io/vlapic.c (revision 259205) +++ sys/amd64/vmm/io/vlapic.c (working copy) @@ -145,6 +145,84 @@ #define VLAPIC_BUS_FREQ tsc_freq +static __inline uint32_t +vlapic_get_id(struct vlapic *vlapic) +{ + + if (x2apic(vlapic)) + return (vlapic->vcpuid); + else + return (vlapic->vcpuid << 24); +} + +static __inline uint32_t +vlapic_get_ldr(struct vlapic *vlapic) +{ + struct LAPIC *lapic; + int apicid; + uint32_t ldr; + + lapic = &vlapic->apic; + if (x2apic(vlapic)) { + apicid = vlapic_get_id(vlapic); + ldr = 1 << (apicid & 0xf); + ldr |= (apicid & 0xffff0) << 12; + return (ldr); + } else + return (lapic->ldr); +} + +static __inline uint32_t +vlapic_get_dfr(struct vlapic *vlapic) +{ + struct LAPIC *lapic; + + lapic = &vlapic->apic; + if (x2apic(vlapic)) + return (0); + else + return (lapic->dfr); +} + +static void +vlapic_set_dfr(struct vlapic *vlapic, uint32_t data) +{ + uint32_t dfr; + struct LAPIC *lapic; + + if (x2apic(vlapic)) { + VM_CTR1(vlapic->vm, "write to DFR in x2apic mode: %#x", data); + return; + } + + lapic = &vlapic->apic; + dfr = (lapic->dfr & APIC_DFR_RESERVED) | (data & APIC_DFR_MODEL_MASK); + if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) + VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); + else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) + VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); + else + VLAPIC_CTR1(vlapic, "vlapic DFR in Unknown Model %#x", dfr); + + lapic->dfr = dfr; +} + +static void +vlapic_set_ldr(struct vlapic *vlapic, uint32_t data) +{ + struct LAPIC *lapic; + + /* LDR is read-only in x2apic mode */ + if (x2apic(vlapic)) { + VLAPIC_CTR1(vlapic, "write to LDR in x2apic mode: %#x", data); + return; + } + + lapic = &vlapic->apic; + lapic->ldr = data & ~APIC_LDR_RESERVED; + VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); +} + static int vlapic_timer_divisor(uint32_t dcr) { @@ -610,12 +688,115 @@ VLAPIC_TIMER_UNLOCK(vlapic); } +/* + * This function populates 'dmask' with the set of vcpus that match the + * addressing specified by the (dest, phys, lowprio) tuple. + * + * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) + * or xAPIC (8-bit) destination field. + */ +static void +vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, + bool lowprio, bool x2apic_dest) +{ + struct vlapic *vlapic; + uint32_t dfr, ldr, ldest, cluster; + uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; + cpuset_t amask; + int vcpuid; + + if ((x2apic_dest && dest == 0xffffffff) || + (!x2apic_dest && dest == 0xff)) { + /* + * Broadcast in both logical and physical modes. + */ + *dmask = vm_active_cpus(vm); + return; + } + + if (phys) { + /* + * Physical mode: destination is APIC ID. + */ + CPU_ZERO(dmask); + vcpuid = vm_apicid2vcpuid(vm, dest); + if (vcpuid < VM_MAXCPU) + CPU_SET(vcpuid, dmask); + } else { + /* + * In the "Flat Model" the MDA is interpreted as an 8-bit wide + * bitmask. This model is only avilable in the xAPIC mode. + */ + mda_flat_ldest = dest & 0xff; + + /* + * In the "Cluster Model" the MDA is used to identify a + * specific cluster and a set of APICs in that cluster. + */ + if (x2apic_dest) { + mda_cluster_id = dest >> 16; + mda_cluster_ldest = dest & 0xffff; + } else { + mda_cluster_id = (dest >> 4) & 0xf; + mda_cluster_ldest = dest & 0xf; + } + + /* + * Logical mode: match each APIC that has a bit set + * in it's LDR that matches a bit in the ldest. + */ + CPU_ZERO(dmask); + amask = vm_active_cpus(vm); + while ((vcpuid = CPU_FFS(&amask)) != 0) { + vcpuid--; + CPU_CLR(vcpuid, &amask); + + vlapic = vm_lapic(vm, vcpuid); + dfr = vlapic_get_dfr(vlapic); + ldr = vlapic_get_ldr(vlapic); + + if ((dfr & APIC_DFR_MODEL_MASK) == + APIC_DFR_MODEL_FLAT) { + ldest = ldr >> 24; + mda_ldest = mda_flat_ldest; + } else if ((dfr & APIC_DFR_MODEL_MASK) == + APIC_DFR_MODEL_CLUSTER) { + if (x2apic(vlapic)) { + cluster = ldr >> 16; + ldest = ldr & 0xffff; + } else { + cluster = ldr >> 28; + ldest = (ldr >> 24) & 0xf; + } + if (cluster != mda_cluster_id) + continue; + mda_ldest = mda_cluster_ldest; + } else { + /* + * Guest has configured a bad logical + * model for this vcpu - skip it. + */ + VLAPIC_CTR1(vlapic, "vlapic has bad logical " + "model %x - cannot deliver interrupt", dfr); + continue; + } + + if ((mda_ldest & ldest) != 0) { + CPU_SET(vcpuid, dmask); + if (lowprio) + break; + } + } + } +} + static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); static int lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu) { int i; + bool phys; cpuset_t dmask; uint32_t dest, vec, mode; struct vlapic *vlapic2; @@ -631,7 +812,9 @@ if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { switch (icrval & APIC_DEST_MASK) { case APIC_DEST_DESTFLD: - CPU_SETOF(dest, &dmask); + phys = ((icrval & APIC_DESTMODE_LOG) == 0); + vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, + x2apic(vlapic)); break; case APIC_DEST_SELF: CPU_SETOF(vlapic->vcpuid, &dmask); @@ -820,10 +1003,7 @@ switch(offset) { case APIC_OFFSET_ID: - if (x2apic(vlapic)) - *data = vlapic->vcpuid; - else - *data = vlapic->vcpuid << 24; + *data = vlapic_get_id(vlapic); break; case APIC_OFFSET_VER: *data = lapic->version; @@ -841,10 +1021,10 @@ *data = lapic->eoi; break; case APIC_OFFSET_LDR: - *data = lapic->ldr; + *data = vlapic_get_ldr(vlapic); break; case APIC_OFFSET_DFR: - *data = lapic->dfr; + *data = vlapic_get_dfr(vlapic); break; case APIC_OFFSET_SVR: *data = lapic->svr; @@ -921,8 +1101,10 @@ vlapic_process_eoi(vlapic); break; case APIC_OFFSET_LDR: + vlapic_set_ldr(vlapic, data); break; case APIC_OFFSET_DFR: + vlapic_set_dfr(vlapic, data); break; case APIC_OFFSET_SVR: lapic_set_svr(vlapic, data); @@ -1041,6 +1223,34 @@ vlapic->msr_apicbase &= ~APICBASE_X2APIC; } +void +vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, + int delmode, int vec) +{ + bool lowprio; + int vcpuid; + cpuset_t dmask; + + if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { + VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); + return; + } + lowprio = (delmode == APIC_DELMODE_LOWPRIO); + + /* + * We don't provide any virtual interrupt redirection hardware so + * all interrupts originating from the ioapic or MSI specify the + * 'dest' in the legacy xAPIC format. + */ + vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); + + while ((vcpuid = CPU_FFS(&dmask)) != 0) { + vcpuid--; + CPU_CLR(vcpuid, &dmask); + lapic_set_intr(vm, vcpuid, vec, level); + } +} + bool vlapic_enabled(struct vlapic *vlapic) { Index: sys/amd64/vmm/io/vlapic.h =================================================================== --- sys/amd64/vmm/io/vlapic.h (revision 259205) +++ sys/amd64/vmm/io/vlapic.h (working copy) @@ -103,4 +103,6 @@ void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s); bool vlapic_enabled(struct vlapic *vlapic); +void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, + int delmode, int vec); #endif /* _VLAPIC_H_ */ Index: sys/amd64/vmm/vmm_dev.c =================================================================== --- sys/amd64/vmm/vmm_dev.c (revision 259205) +++ sys/amd64/vmm/vmm_dev.c (working copy) @@ -152,6 +152,7 @@ struct vm_run *vmrun; struct vm_event *vmevent; struct vm_lapic_irq *vmirq; + struct vm_lapic_msi *vmmsi; struct vm_ioapic_irq *ioapic_irq; struct vm_capability *vmcap; struct vm_pptdev *pptdev; @@ -254,7 +255,7 @@ pptmsi = (struct vm_pptdev_msi *)data; error = ppt_setup_msi(sc->vm, pptmsi->vcpu, pptmsi->bus, pptmsi->slot, pptmsi->func, - pptmsi->destcpu, pptmsi->vector, + pptmsi->addr, pptmsi->msg, pptmsi->numvec); break; case VM_PPTDEV_MSIX: @@ -262,8 +263,8 @@ error = ppt_setup_msix(sc->vm, pptmsix->vcpu, pptmsix->bus, pptmsix->slot, pptmsix->func, pptmsix->idx, - pptmsix->msg, pptmsix->vector_control, - pptmsix->addr); + pptmsix->addr, pptmsix->msg, + pptmsix->vector_control); break; case VM_MAP_PPTDEV_MMIO: pptmmio = (struct vm_pptdev_mmio *)data; @@ -296,6 +297,10 @@ vmirq = (struct vm_lapic_irq *)data; error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); break; + case VM_LAPIC_MSI: + vmmsi = (struct vm_lapic_msi *)data; + error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); + break; case VM_IOAPIC_ASSERT_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); Index: sys/amd64/vmm/vmm_lapic.c =================================================================== --- sys/amd64/vmm/vmm_lapic.c (revision 259205) +++ sys/amd64/vmm/vmm_lapic.c (working copy) @@ -38,9 +38,18 @@ #include #include "vmm_ipi.h" +#include "vmm_ktr.h" #include "vmm_lapic.h" #include "vlapic.h" +/* + * Some MSI message definitions + */ +#define MSI_X86_ADDR_MASK 0xfff00000 +#define MSI_X86_ADDR_BASE 0xfee00000 +#define MSI_X86_ADDR_RH 0x00000008 /* Redirection Hint */ +#define MSI_X86_ADDR_LOG 0x00000004 /* Destination Mode */ + int lapic_pending_intr(struct vm *vm, int cpu) { @@ -80,6 +89,44 @@ return (0); } +int +lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg) +{ + int delmode, vec; + uint32_t dest; + bool phys; + + VM_CTR2(vm, "lapic MSI addr: %#lx msg: %#lx", addr, msg); + + if ((addr & MSI_X86_ADDR_MASK) != MSI_X86_ADDR_BASE) { + VM_CTR1(vm, "lapic MSI invalid addr %#lx", addr); + return (-1); + } + + /* + * Extract the x86-specific fields from the MSI addr/msg + * params according to the Intel Arch spec, Vol3 Ch 10. + * + * The PCI specification does not support level triggered + * MSI/MSI-X so ignore trigger level in 'msg'. + * + * The 'dest' is interpreted as a logical APIC ID if both + * the Redirection Hint and Destination Mode are '1' and + * physical otherwise. + */ + dest = (addr >> 12) & 0xff; + phys = ((addr & (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)) != + (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)); + delmode = msg & APIC_DELMODE_MASK; + vec = msg & 0xff; + + VM_CTR3(vm, "lapic MSI %s dest %#x, vec %d", + phys ? "physical" : "logical", dest, vec); + + vlapic_deliver_intr(vm, LAPIC_TRIG_EDGE, dest, phys, delmode, vec); + return (0); +} + static boolean_t x2apic_msr(u_int msr) { Index: sys/amd64/vmm/vmm_lapic.h =================================================================== --- sys/amd64/vmm/vmm_lapic.h (revision 259205) +++ sys/amd64/vmm/vmm_lapic.h (working copy) @@ -84,4 +84,5 @@ return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_EDGE)); } +int lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg); #endif Index: usr.sbin/bhyve/pci_emul.c =================================================================== --- usr.sbin/bhyve/pci_emul.c (revision 259205) +++ usr.sbin/bhyve/pci_emul.c (working copy) @@ -850,19 +850,14 @@ else msgdata = pci_get_cfgdata16(pi, capoff + 8); - /* - * XXX check delivery mode, destination mode etc - */ mme = msgctrl & PCIM_MSICTRL_MME_MASK; pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; if (pi->pi_msi.enabled) { - pi->pi_msi.cpu = (addrlo >> 12) & 0xff; - pi->pi_msi.vector = msgdata & 0xff; - pi->pi_msi.msgnum = 1 << (mme >> 4); + pi->pi_msi.addr = addrlo; + pi->pi_msi.msg_data = msgdata; + pi->pi_msi.maxmsgnum = 1 << (mme >> 4); } else { - pi->pi_msi.cpu = 0; - pi->pi_msi.vector = 0; - pi->pi_msi.msgnum = 0; + pi->pi_msi.maxmsgnum = 0; } } @@ -1060,10 +1055,10 @@ } int -pci_msi_msgnum(struct pci_devinst *pi) +pci_msi_maxmsgnum(struct pci_devinst *pi) { if (pi->pi_msi.enabled) - return (pi->pi_msi.msgnum); + return (pi->pi_msi.maxmsgnum); else return (0); } @@ -1092,19 +1087,17 @@ mte = &pi->pi_msix.table[index]; if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { /* XXX Set PBA bit if interrupt is disabled */ - vm_lapic_irq(pi->pi_vmctx, - (mte->addr >> 12) & 0xff, mte->msg_data & 0xff); + vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); } } void -pci_generate_msi(struct pci_devinst *pi, int msg) +pci_generate_msi(struct pci_devinst *pi, int index) { - if (pci_msi_enabled(pi) && msg < pci_msi_msgnum(pi)) { - vm_lapic_irq(pi->pi_vmctx, - pi->pi_msi.cpu, - pi->pi_msi.vector + msg); + if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { + vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, + pi->pi_msi.msg_data + index); } } @@ -1511,10 +1504,10 @@ * Special magic value to generate an interrupt */ if (offset == 4 && size == 4 && pci_msi_enabled(pi)) - pci_generate_msi(pi, value % pci_msi_msgnum(pi)); + pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); if (value == 0xabcdef) { - for (i = 0; i < pci_msi_msgnum(pi); i++) + for (i = 0; i < pci_msi_maxmsgnum(pi); i++) pci_generate_msi(pi, i); } } Index: usr.sbin/bhyve/pci_emul.h =================================================================== --- usr.sbin/bhyve/pci_emul.h (revision 259205) +++ usr.sbin/bhyve/pci_emul.h (working copy) @@ -109,10 +109,10 @@ int pi_bar_getsize; struct { - int enabled; - int cpu; - int vector; - int msgnum; + int enabled; + uint64_t addr; + uint64_t msg_data; + int maxmsgnum; } pi_msi; struct { Index: usr.sbin/bhyve/pci_passthru.c =================================================================== --- usr.sbin/bhyve/pci_passthru.c (revision 259205) +++ usr.sbin/bhyve/pci_passthru.c (working copy) @@ -348,9 +348,9 @@ error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, - index, entry->msg_data, - entry->vector_control, - entry->addr); + index, entry->addr, + entry->msg_data, + entry->vector_control); } } } @@ -653,8 +653,9 @@ msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val); error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu, - pi->pi_msi.vector, pi->pi_msi.msgnum); + sc->psc_sel.pc_dev, sc->psc_sel.pc_func, + pi->pi_msi.addr, pi->pi_msi.msg_data, + pi->pi_msi.maxmsgnum); if (error != 0) { printf("vm_setup_msi returned error %d\r\n", errno); exit(1); @@ -667,15 +668,16 @@ if (pi->pi_msix.enabled) { msix_table_entries = pi->pi_msix.table_count; for (i = 0; i < msix_table_entries; i++) { - error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, i, - pi->pi_msix.table[i].msg_data, - pi->pi_msix.table[i].vector_control, - pi->pi_msix.table[i].addr); + error = vm_setup_msix(ctx, vcpu, + sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, i, + pi->pi_msix.table[i].addr, + pi->pi_msix.table[i].msg_data, + pi->pi_msix.table[i].vector_control); if (error) { - printf("vm_setup_msix returned error %d\r\n", errno); + printf("vm_setup_msix error %d\r\n", + errno); exit(1); } }