Index: usr.sbin/bhyve/acpi.c =================================================================== --- usr.sbin/bhyve/acpi.c (revision 261702) +++ usr.sbin/bhyve/acpi.c (working copy) @@ -713,7 +713,7 @@ pci_write_dsdt(); dsdt_line(""); - dsdt_line(" Scope (_SB.PCI0)"); + dsdt_line(" Scope (_SB.B0)"); dsdt_line(" {"); dsdt_line(" Device (HPET)"); dsdt_line(" {"); Index: usr.sbin/bhyve/mptbl.c =================================================================== --- usr.sbin/bhyve/mptbl.c (revision 261702) +++ usr.sbin/bhyve/mptbl.c (working copy) @@ -196,16 +196,21 @@ static int mpt_count_ioint_entries(void) { + int bus, count; + count = 0; + for (bus = 0; bus <= PCI_BUSMAX; bus++) + count += pci_count_lintr(bus); + /* * Always include entries for the first 16 pins along with a entry * for each active PCI INTx pin. */ - return (16 + pci_count_lintr()); + return (16 + count); } static void -mpt_generate_pci_int(int slot, int pin, int ioapic_irq, void *arg) +mpt_generate_pci_int(int bus, int slot, int pin, int ioapic_irq, void *arg) { int_entry_ptr *mpiep, mpie; @@ -219,7 +224,7 @@ */ mpie->type = MPCT_ENTRY_INT; mpie->int_type = INTENTRY_TYPE_INT; - mpie->src_bus_id = 0; + mpie->src_bus_id = bus; mpie->src_bus_irq = slot << 2 | (pin - 1); mpie->dst_apic_id = mpie[-1].dst_apic_id; mpie->dst_apic_int = ioapic_irq; @@ -230,7 +235,7 @@ static void mpt_build_ioint_entries(int_entry_ptr mpie, int id) { - int pin; + int pin, bus; /* * The following config is taken from kernel mptable.c @@ -277,7 +282,8 @@ } /* Next, generate entries for any PCI INTx interrupts. */ - pci_walk_lintr(mpt_generate_pci_int, &mpie); + for (bus = 0; bus <= PCI_BUSMAX; bus++) + pci_walk_lintr(bus, mpt_generate_pci_int, &mpie); } void Index: usr.sbin/bhyve/pci_emul.c =================================================================== --- usr.sbin/bhyve/pci_emul.c (revision 261702) +++ usr.sbin/bhyve/pci_emul.c (working copy) @@ -69,6 +69,7 @@ } \ } while (0) +#define MAXBUSES (PCI_BUSMAX + 1) #define MAXSLOTS (PCI_SLOTMAX + 1) #define MAXFUNCS (PCI_FUNCMAX + 1) @@ -86,8 +87,17 @@ struct slotinfo { struct intxinfo si_intpins[4]; struct funcinfo si_funcs[MAXFUNCS]; -} pci_slotinfo[MAXSLOTS]; +}; +struct businfo { + uint16_t iobase, iolimit; /* I/O window */ + uint32_t membase32, memlimit32; /* mmio window below 4GB */ + uint64_t membase64, memlimit64; /* mmio window above 4GB */ + struct slotinfo slotinfo[MAXSLOTS]; +}; + +static struct businfo *pci_businfo[MAXBUSES]; + SET_DECLARE(pci_devemu_set, struct pci_devemu); static uint64_t pci_emul_iobase; @@ -97,7 +107,7 @@ #define PCI_EMUL_IOBASE 0x2000 #define PCI_EMUL_IOLIMIT 0x10000 -#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ +#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ #define PCI_EMUL_MEMBASE64 0xD000000000UL #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL @@ -105,7 +115,6 @@ static struct pci_devemu *pci_emul_finddev(char *name); static void pci_lintr_update(struct pci_devinst *pi); -static int pci_emul_devices; static struct mem_range pci_mem_hole; /* @@ -115,6 +124,7 @@ /* * Slot options are in the form: * + * ::,[,] * [:],[,] * * slot is 0..31 @@ -136,37 +146,53 @@ int pci_parse_slot(char *opt) { - char *slot, *func, *emul, *config; - char *str, *cpy; - int error, snum, fnum; + struct businfo *bi; + struct slotinfo *si; + char *emul, *config, *str, *cp; + int error, bnum, snum, fnum; error = -1; - str = cpy = strdup(opt); + str = strdup(opt); - slot = strsep(&str, ","); - func = NULL; - if (strchr(slot, ':') != NULL) { - func = cpy; - (void) strsep(&func, ":"); - } - - emul = strsep(&str, ","); - config = str; - - if (emul == NULL) { + emul = config = NULL; + if ((cp = strchr(str, ',')) != NULL) { + *cp = '\0'; + emul = cp + 1; + if ((cp = strchr(emul, ',')) != NULL) { + *cp = '\0'; + config = cp + 1; + } + } else { pci_parse_slot_usage(opt); goto done; } - snum = atoi(slot); - fnum = func ? atoi(func) : 0; + /* :: */ + if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { + bnum = 0; + /* : */ + if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { + fnum = 0; + /* */ + if (sscanf(str, "%d", &snum) != 1) { + snum = -1; + } + } + } - if (snum < 0 || snum >= MAXSLOTS || fnum < 0 || fnum >= MAXFUNCS) { + if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || + fnum < 0 || fnum >= MAXFUNCS) { pci_parse_slot_usage(opt); goto done; } - if (pci_slotinfo[snum].si_funcs[fnum].fi_name != NULL) { + if (pci_businfo[bnum] == NULL) + pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); + + bi = pci_businfo[bnum]; + si = &bi->slotinfo[snum]; + + if (si->si_funcs[fnum].fi_name != NULL) { fprintf(stderr, "pci slot %d:%d already occupied!\n", snum, fnum); goto done; @@ -179,12 +205,12 @@ } error = 0; - pci_slotinfo[snum].si_funcs[fnum].fi_name = emul; - pci_slotinfo[snum].si_funcs[fnum].fi_param = config; + si->si_funcs[fnum].fi_name = emul; + si->si_funcs[fnum].fi_param = config; done: if (error) - free(cpy); + free(str); return (error); } @@ -665,8 +691,8 @@ } static int -pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, int func, - char *params) +pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, + int func, struct funcinfo *fi) { struct pci_devinst *pdi; int err; @@ -675,7 +701,7 @@ bzero(pdi, sizeof(*pdi)); pdi->pi_vmctx = ctx; - pdi->pi_bus = 0; + pdi->pi_bus = bus; pdi->pi_slot = slot; pdi->pi_func = func; pthread_mutex_init(&pdi->pi_lintr.lock, NULL); @@ -692,13 +718,11 @@ pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); - err = (*pde->pe_init)(ctx, pdi, params); - if (err != 0) { + err = (*pde->pe_init)(ctx, pdi, fi->fi_param); + if (err == 0) + fi->fi_devi = pdi; + else free(pdi); - } else { - pci_emul_devices++; - pci_slotinfo[slot].si_funcs[func].fi_devi = pdi; - } return (err); } @@ -1006,31 +1030,68 @@ return (0); } +#define BUSIO_ROUNDUP 32 +#define BUSMEM_ROUNDUP (1024 * 1024) + int init_pci(struct vmctx *ctx) { struct pci_devemu *pde; + struct businfo *bi; + struct slotinfo *si; struct funcinfo *fi; size_t lowmem; - int slot, func; + int bus, slot, func; int error; pci_emul_iobase = PCI_EMUL_IOBASE; pci_emul_membase32 = vm_get_lowmem_limit(ctx); pci_emul_membase64 = PCI_EMUL_MEMBASE64; - for (slot = 0; slot < MAXSLOTS; slot++) { - for (func = 0; func < MAXFUNCS; func++) { - fi = &pci_slotinfo[slot].si_funcs[func]; - if (fi->fi_name != NULL) { + for (bus = 0; bus < MAXBUSES; bus++) { + if ((bi = pci_businfo[bus]) == NULL) + continue; + /* + * Keep track of the i/o and memory resources allocated to + * this bus. + */ + bi->iobase = pci_emul_iobase; + bi->membase32 = pci_emul_membase32; + bi->membase64 = pci_emul_membase64; + + for (slot = 0; slot < MAXSLOTS; slot++) { + si = &bi->slotinfo[slot]; + for (func = 0; func < MAXFUNCS; func++) { + fi = &si->si_funcs[func]; + if (fi->fi_name == NULL) + continue; pde = pci_emul_finddev(fi->fi_name); assert(pde != NULL); - error = pci_emul_init(ctx, pde, slot, func, - fi->fi_param); + error = pci_emul_init(ctx, pde, bus, slot, + func, fi); if (error) return (error); } } + + /* + * Add some slop to the I/O and memory resources decoded by + * this bus to give a guest some flexibility if it wants to + * reprogram the BARs. + */ + pci_emul_iobase += BUSIO_ROUNDUP; + pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); + bi->iolimit = pci_emul_iobase; + + pci_emul_membase32 += BUSMEM_ROUNDUP; + pci_emul_membase32 = roundup2(pci_emul_membase32, + BUSMEM_ROUNDUP); + bi->memlimit32 = pci_emul_membase32; + + pci_emul_membase64 += BUSMEM_ROUNDUP; + pci_emul_membase64 = roundup2(pci_emul_membase64, + BUSMEM_ROUNDUP); + bi->memlimit64 = pci_emul_membase64; } /* @@ -1060,7 +1121,7 @@ } static void -pci_prt_entry(int slot, int pin, int ioapic_irq, void *arg) +pci_prt_entry(int bus, int slot, int pin, int ioapic_irq, void *arg) { int *count; @@ -1075,96 +1136,158 @@ (*count)--; } -void -pci_write_dsdt(void) +/* + * A bhyve virtual machine has a flat PCI hierarchy with a root port + * corresponding to each PCI bus. + */ +static void +pci_bus_write_dsdt(int bus) { + struct businfo *bi; + struct slotinfo *si; struct pci_devinst *pi; int count, slot, func; + /* + * If there are no devices on this 'bus' then just return. + */ + if ((bi = pci_businfo[bus]) == NULL) { + /* + * Bus 0 is special because it decodes the I/O ports used + * for PCI config space access even if there are no devices + * on it. + */ + if (bus != 0) + return; + } + dsdt_indent(1); dsdt_line("Scope (_SB)"); dsdt_line("{"); - dsdt_line(" Device (PCI0)"); + dsdt_line(" Device (B%d)", bus); dsdt_line(" {"); dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); dsdt_line(" Name (_ADR, Zero)"); + + dsdt_line(" Method (_BBN, 0, NotSerialized)"); + dsdt_line(" {"); + dsdt_line(" Return (0x%08X)", bus); + dsdt_line(" }"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " "MaxFixed, PosDecode,"); dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x0000, // Range Minimum"); - dsdt_line(" 0x00FF, // Range Maximum"); + dsdt_line(" 0x%04X, // Range Minimum", bus); + dsdt_line(" 0x%04X, // Range Maximum", bus); dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0x0100, // Length"); + dsdt_line(" 0x0001, // Length"); dsdt_line(" ,, )"); - dsdt_indent(3); - dsdt_fixed_ioport(0xCF8, 8); - dsdt_unindent(3); + + if (bus == 0) { + dsdt_indent(3); + dsdt_fixed_ioport(0xCF8, 8); + dsdt_unindent(3); + + dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " + "PosDecode, EntireRange,"); + dsdt_line(" 0x0000, // Granularity"); + dsdt_line(" 0x0000, // Range Minimum"); + dsdt_line(" 0x0CF7, // Range Maximum"); + dsdt_line(" 0x0000, // Translation Offset"); + dsdt_line(" 0x0CF8, // Length"); + dsdt_line(" ,, , TypeStatic)"); + + dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " + "PosDecode, EntireRange,"); + dsdt_line(" 0x0000, // Granularity"); + dsdt_line(" 0x0D00, // Range Minimum"); + dsdt_line(" 0x%04X, // Range Maximum", + PCI_EMUL_IOBASE - 1); + dsdt_line(" 0x0000, // Translation Offset"); + dsdt_line(" 0x%04X, // Length", + PCI_EMUL_IOBASE - 0x0D00); + dsdt_line(" ,, , TypeStatic)"); + + if (bi == NULL) { + dsdt_line(" })"); + goto done; + } + } + assert(bi != NULL); + + /* i/o window */ dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " "PosDecode, EntireRange,"); dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x0000, // Range Minimum"); - dsdt_line(" 0x0CF7, // Range Maximum"); + dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); + dsdt_line(" 0x%04X, // Range Maximum", + bi->iolimit - 1); dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0x0CF8, // Length"); + dsdt_line(" 0x%04X, // Length", + bi->iolimit - bi->iobase); dsdt_line(" ,, , TypeStatic)"); - dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " - "PosDecode, EntireRange,"); - dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x0D00, // Range Minimum"); - dsdt_line(" 0xFFFF, // Range Maximum"); - dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0xF300, // Length"); - dsdt_line(" ,, , TypeStatic)"); + + /* mmio window (32-bit) */ dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); dsdt_line(" 0x00000000, // Granularity"); - dsdt_line(" 0x%08lX, // Range Minimum\n", - pci_mem_hole.base); + dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); dsdt_line(" 0x%08X, // Range Maximum\n", - PCI_EMUL_MEMLIMIT32 - 1); + bi->memlimit32 - 1); dsdt_line(" 0x00000000, // Translation Offset"); - dsdt_line(" 0x%08lX, // Length\n", - PCI_EMUL_MEMLIMIT32 - pci_mem_hole.base); + dsdt_line(" 0x%08X, // Length\n", + bi->memlimit32 - bi->membase32); dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); + + /* mmio window (64-bit) */ dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); dsdt_line(" 0x0000000000000000, // Granularity"); - dsdt_line(" 0x%016lX, // Range Minimum\n", - PCI_EMUL_MEMBASE64); + dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); dsdt_line(" 0x%016lX, // Range Maximum\n", - PCI_EMUL_MEMLIMIT64 - 1); + bi->memlimit64 - 1); dsdt_line(" 0x0000000000000000, // Translation Offset"); dsdt_line(" 0x%016lX, // Length\n", - PCI_EMUL_MEMLIMIT64 - PCI_EMUL_MEMBASE64); + bi->memlimit64 - bi->membase64); dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); dsdt_line(" })"); - count = pci_count_lintr(); + + count = pci_count_lintr(bus); if (count != 0) { dsdt_indent(2); dsdt_line("Name (_PRT, Package (0x%02X)", count); dsdt_line("{"); - pci_walk_lintr(pci_prt_entry, &count); + pci_walk_lintr(bus, pci_prt_entry, &count); dsdt_line("})"); dsdt_unindent(2); } dsdt_indent(2); for (slot = 0; slot < MAXSLOTS; slot++) { + si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { - pi = pci_slotinfo[slot].si_funcs[func].fi_devi; + pi = si->si_funcs[func].fi_devi; if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) pi->pi_d->pe_write_dsdt(pi); } } dsdt_unindent(2); - +done: dsdt_line(" }"); dsdt_line("}"); dsdt_unindent(1); } +void +pci_write_dsdt(void) +{ + int bus; + + for (bus = 0; bus < MAXBUSES; bus++) + pci_bus_write_dsdt(bus); +} + int pci_msi_enabled(struct pci_devinst *pi) { @@ -1231,13 +1354,17 @@ int pci_lintr_request(struct pci_devinst *pi) { + struct businfo *bi; struct slotinfo *si; int bestpin, bestcount, irq, pin; + bi = pci_businfo[pi->pi_bus]; + assert(bi != NULL); + /* * First, allocate a pin from our slot. */ - si = &pci_slotinfo[pi->pi_slot]; + si = &bi->slotinfo[pi->pi_slot]; bestpin = 0; bestcount = si->si_intpins[0].ii_count; for (pin = 1; pin < 4; pin++) { @@ -1318,31 +1445,41 @@ } int -pci_count_lintr(void) +pci_count_lintr(int bus) { int count, slot, pin; + struct slotinfo *slotinfo; count = 0; - for (slot = 0; slot < MAXSLOTS; slot++) { - for (pin = 0; pin < 4; pin++) { - if (pci_slotinfo[slot].si_intpins[pin].ii_count != 0) - count++; + if (pci_businfo[bus] != NULL) { + for (slot = 0; slot < MAXSLOTS; slot++) { + slotinfo = &pci_businfo[bus]->slotinfo[slot]; + for (pin = 0; pin < 4; pin++) { + if (slotinfo->si_intpins[pin].ii_count != 0) + count++; + } } } return (count); } void -pci_walk_lintr(pci_lintr_cb cb, void *arg) +pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) { + struct businfo *bi; + struct slotinfo *si; struct intxinfo *ii; int slot, pin; + if ((bi = pci_businfo[bus]) == NULL) + return; + for (slot = 0; slot < MAXSLOTS; slot++) { + si = &bi->slotinfo[slot]; for (pin = 0; pin < 4; pin++) { - ii = &pci_slotinfo[slot].si_intpins[pin]; + ii = &si->si_intpins[pin]; if (ii->ii_count != 0) - cb(slot, pin + 1, ii->ii_ioapic_irq, arg); + cb(bus, slot, pin + 1, ii->ii_ioapic_irq, arg); } } } @@ -1352,14 +1489,19 @@ * Return 0 otherwise. */ static int -pci_emul_is_mfdev(int slot) +pci_emul_is_mfdev(int bus, int slot) { + struct businfo *bi; + struct slotinfo *si; int f, numfuncs; numfuncs = 0; - for (f = 0; f < MAXFUNCS; f++) { - if (pci_slotinfo[slot].si_funcs[f].fi_devi != NULL) { - numfuncs++; + if ((bi = pci_businfo[bus]) != NULL) { + si = &bi->slotinfo[slot]; + for (f = 0; f < MAXFUNCS; f++) { + if (si->si_funcs[f].fi_devi != NULL) { + numfuncs++; + } } } return (numfuncs > 1); @@ -1370,12 +1512,12 @@ * whether or not is a multi-function being emulated in the pci 'slot'. */ static void -pci_emul_hdrtype_fixup(int slot, int off, int bytes, uint32_t *rv) +pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) { int mfdev; if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { - mfdev = pci_emul_is_mfdev(slot); + mfdev = pci_emul_is_mfdev(bus, slot); switch (bytes) { case 1: case 2: @@ -1492,16 +1634,19 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { + struct businfo *bi; + struct slotinfo *si; struct pci_devinst *pi; struct pci_devemu *pe; int coff, idx, needcfg; uint64_t addr, bar, mask; assert(bytes == 1 || bytes == 2 || bytes == 4); - - if (cfgbus == 0) - pi = pci_slotinfo[cfgslot].si_funcs[cfgfunc].fi_devi; - else + + if ((bi = pci_businfo[cfgbus]) != NULL) { + si = &bi->slotinfo[cfgslot]; + pi = si->si_funcs[cfgfunc].fi_devi; + } else pi = NULL; coff = cfgoff + (port - CONF1_DATA_PORT); @@ -1544,7 +1689,7 @@ *eax = pci_get_cfgdata32(pi, coff); } - pci_emul_hdrtype_fixup(cfgslot, coff, bytes, eax); + pci_emul_hdrtype_fixup(cfgbus, cfgslot, coff, bytes, eax); } else { /* Let the device emulation override the default handler */ if (pe->pe_cfgwrite != NULL && @@ -1646,7 +1791,7 @@ /* * Define a dummy test device */ -#define DIOSZ 20 +#define DIOSZ 8 #define DMEMSZ 4096 struct pci_emul_dsoftc { uint8_t ioregs[DIOSZ]; Index: usr.sbin/bhyve/pci_emul.h =================================================================== --- usr.sbin/bhyve/pci_emul.h (revision 261702) +++ usr.sbin/bhyve/pci_emul.h (working copy) @@ -199,7 +199,7 @@ uint16_t slot_status2; } __packed; -typedef void (*pci_lintr_cb)(int slot, int pin, int ioapic_irq, void *arg); +typedef void (*pci_lintr_cb)(int b, int s, int pin, int ioapic_irq, void *arg); int init_pci(struct vmctx *ctx); void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, @@ -229,8 +229,8 @@ int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, uint64_t value); uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size); -int pci_count_lintr(void); -void pci_walk_lintr(pci_lintr_cb cb, void *arg); +int pci_count_lintr(int bus); +void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg); void pci_write_dsdt(void); static __inline void Index: usr.sbin/bhyve/pci_lpc.c =================================================================== --- usr.sbin/bhyve/pci_lpc.c (revision 261702) +++ usr.sbin/bhyve/pci_lpc.c (working copy) @@ -277,9 +277,21 @@ /* * Do not allow more than one LPC bridge to be configured. */ - if (lpc_bridge != NULL) + if (lpc_bridge != NULL) { + fprintf(stderr, "Only one LPC bridge is allowed.\n"); return (-1); + } + /* + * Enforce that the LPC can only be configured on bus 0. This + * simplifies the ACPI DSDT because it can provide a decode for + * all legacy i/o ports behind bus 0. + */ + if (pi->pi_bus != 0) { + fprintf(stderr, "LPC bridge can be present only on bus 0.\n"); + return (-1); + } + if (lpc_init() != 0) return (-1);