From 72a0807f05911cca1361424f506f4656adb2eedc Mon Sep 17 00:00:00 2001 From: Ryan Stone Date: Tue, 15 Apr 2014 23:37:17 -0400 Subject: [PATCH 09/21] Allocate PCI I/O memory spaces for VFs When creating VFs, we must size each SR-IOV BAR on the PF and allocate a configuous I/O memory window large enough for every VF. However, the window only needs to be aligned to a boundary equal to the size of the window for a single VF. When a VF attempts to allocate an I/O memory resource, we must intercept the request in the pci driver and pass it off to the SR-IOV code, which will allocate the correct window from the pre-allocated memory space for the PF. Inform the pci driver about the size and address of the BARs on the VF when the VF is created. This is required by pciconf -b and bhyve. --- sys/dev/pci/pci.c | 35 ++++++++ sys/dev/pci/pci_iov.c | 200 +++++++++++++++++++++++++++++++++++++++++- sys/dev/pci/pci_iov_private.h | 13 +++ sys/dev/pci/pci_private.h | 6 ++ sys/dev/pci/pcivar.h | 2 +- 5 files changed, 252 insertions(+), 4 deletions(-) diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index d434c35..f2caf95 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -4660,11 +4660,30 @@ struct resource * pci_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { +#ifdef PCI_IOV + struct pci_devinfo *dinfo; +#endif if (device_get_parent(child) != dev) return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid, start, end, count, flags)); +#ifdef PCI_IOV + dinfo = device_get_ivars(child); + if (dinfo->cfg.flags & PCICFG_VF) { + switch (type) { + /* VFs can't have I/O BARs. */ + case SYS_RES_IOPORT: + return (NULL); + case SYS_RES_MEMORY: + return (pci_vf_alloc_mem_resource(dev, child, rid, + start, end, count, flags)); + } + + /* Fall through for other types of resource allocations. */ + } +#endif + return (pci_alloc_multi_resource(dev, child, type, rid, start, end, count, 1, flags)); } @@ -4683,6 +4702,22 @@ pci_release_resource(device_t dev, device_t child, int type, int rid, dinfo = device_get_ivars(child); cfg = &dinfo->cfg; + +#ifdef PCI_IOV + if (dinfo->cfg.flags & PCICFG_VF) { + switch (type) { + /* VFs can't have I/O BARs. */ + case SYS_RES_IOPORT: + return (EDOOFUS); + case SYS_RES_MEMORY: + return (pci_vf_release_mem_resource(dev, child, rid, + r)); + } + + /* Fall through for other types of resource allocations. */ + } +#endif + #ifdef NEW_PCIB /* * PCI-PCI bridge I/O window resources are not BARs. For diff --git a/sys/dev/pci/pci_iov.c b/sys/dev/pci/pci_iov.c index 3cc38bb..397ff65 100755 --- a/sys/dev/pci/pci_iov.c +++ b/sys/dev/pci/pci_iov.c @@ -106,7 +106,6 @@ pci_iov_attach_method(device_t bus, device_t dev) error = EBUSY; goto cleanup; } - iov->iov_pos = iov_pos; iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev), @@ -162,6 +161,56 @@ pci_iov_detach_method(device_t bus, device_t dev) return (0); } +static int +pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift) +{ + struct resource *res; + struct pcicfg_iov *iov; + device_t dev, bus; + u_long start, end; + pci_addr_t bar_size; + int rid; + + iov = dinfo->cfg.iov; + dev = dinfo->cfg.dev; + bus = device_get_parent(dev); + rid = iov->iov_pos + PCIR_SRIOV_BAR(bar); + bar_size = 1 << bar_shift; + + res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 1ul, + ~1ul, 1, iov->iov_num_vfs, RF_ACTIVE); + + if (res == NULL) + return (ENXIO); + + iov->iov_bar[bar].res = res; + iov->iov_bar[bar].bar_size = bar_size; + iov->iov_bar[bar].bar_shift = bar_shift; + + start = rman_get_start(res); + end = start + rman_get_size(res) - 1; + return (rman_manage_region(&iov->rman, start, end)); +} + +static void +pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo) +{ + struct pci_iov_bar *bar; + uint64_t bar_start; + int i; + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + bar = &iov->iov_bar[i]; + if (bar->res != NULL) { + bar_start = rman_get_start(bar->res) + + dinfo->cfg.vf.index * bar->bar_size; + + pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start, + bar->bar_shift); + } + } +} + /* * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV * capability. This bit is only writeable on the lowest-numbered PF but @@ -235,6 +284,61 @@ pci_iov_config_page_size(struct pci_devinfo *dinfo) return (0); } +static int +pci_iov_init_rman(struct pcicfg_iov *iov) +{ + int error; + + iov->rman.rm_start = 0; + iov->rman.rm_end = ~0ul; + iov->rman.rm_type = RMAN_ARRAY; + iov->rman.rm_descr = "SR-IOV VF I/O memory"; + + error = rman_init(&iov->rman); + if (error != 0) + return (error); + + iov->iov_flags |= IOV_RMAN_INITED; + return (0); +} + +static int +pci_iov_setup_bars(struct pci_devinfo *dinfo) +{ + device_t dev; + struct pcicfg_iov *iov; + pci_addr_t bar_value, testval; + int i, last_64, error; + + iov = dinfo->cfg.iov; + dev = dinfo->cfg.dev; + last_64 = 0; + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + /* + * If a PCI BAR is a 64-bit wide BAR, then it spans two + * consecutive registers. Therefore if the last BAR that + * we looked at was a 64-bit BAR, we need to skip this + * register as it's the second half of the last BAR. + */ + if (!last_64) { + pci_read_bar(dev, + iov->iov_pos + PCIR_SRIOV_BAR(i), + &bar_value, &testval, &last_64); + + if (testval != 0) { + error = pci_iov_alloc_bar(dinfo, i, + pci_mapsize(testval)); + if (error != 0) + return (error); + } + } else + last_64 = 0; + } + + return (0); +} + static void pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver, uint16_t first_rid, uint16_t rid_stride) @@ -261,6 +365,8 @@ pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver, vfinfo->cfg.iov = iov; vfinfo->cfg.vf.index = i; + pci_iov_add_bars(iov, vfinfo); + error = PCI_ADD_VF(dev, i); if (error != 0) { device_printf(dev, "Failed to add VF %d\n", i); @@ -278,7 +384,7 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) const char *driver; struct pci_devinfo *dinfo; struct pcicfg_iov *iov; - int error; + int i, error; uint16_t rid_off, rid_stride; uint16_t first_rid, last_rid; uint16_t iov_ctl; @@ -345,10 +451,18 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); + error = pci_iov_init_rman(iov); + if (error != 0) + goto out; + iov->iov_num_vfs = arg->num_vfs; + error = pci_iov_setup_bars(dinfo); + if (error != 0) + goto out; + iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); - iov_ctl |= PCIM_SRIOV_VF_EN; + iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE; IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); /* Per specification, we must wait 100ms before accessing VFs. */ @@ -360,6 +474,22 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) out: if (iov_inited) PCI_UNINIT_IOV(dev); + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + if (iov->iov_bar[i].res != NULL) { + pci_release_resource(bus, dev, SYS_RES_MEMORY, + iov->iov_pos + PCIR_SRIOV_BAR(i), + iov->iov_bar[i].res); + pci_delete_resource(bus, dev, SYS_RES_MEMORY, + iov->iov_pos + PCIR_SRIOV_BAR(i)); + iov->iov_bar[i].res = NULL; + } + } + + if (iov->iov_flags & IOV_RMAN_INITED) { + rman_fini(&iov->rman); + iov->iov_flags &= ~IOV_RMAN_INITED; + } iov->iov_num_vfs = 0; mtx_unlock(&Giant); return (error); @@ -378,3 +508,67 @@ pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, } } +struct resource * +pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, u_long start, + u_long end, u_long count, u_int flags) +{ + struct pci_devinfo *dinfo; + struct pcicfg_iov *iov; + struct pci_map *map; + struct resource *res; + struct resource_list_entry *rle; + u_long bar_start, bar_end; + pci_addr_t bar_length; + + dinfo = device_get_ivars(child); + iov = dinfo->cfg.iov; + + map = pci_find_bar(child, *rid); + if (map == NULL) + return (NULL); + + bar_length = 1 << map->pm_size; + bar_start = map->pm_value; + bar_end = bar_start + bar_length - 1; + + res = rman_reserve_resource(&iov->rman, bar_start, bar_end, + bar_length, flags, child); + + if (res == NULL) + return (NULL); + + rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid, + bar_start, bar_end, 1); + + if (rle == NULL) { + rman_release_resource(res); + return (NULL); + } + + rle->res = res; + rle->flags |= RLE_RESERVED; + + return (resource_list_alloc(&dinfo->resources, dev, child, + SYS_RES_MEMORY, rid, bar_start, bar_end, 1, flags)); +} + +int +pci_vf_release_mem_resource(device_t dev, device_t child, int rid, + struct resource *r) +{ + struct pci_devinfo *dinfo; + struct resource_list_entry *rle; + + dinfo = device_get_ivars(child); + + rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid); + + if (rle != NULL) { + rle->res = NULL; + resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, + rid); + } + + return (rman_release_resource(r)); +} + diff --git a/sys/dev/pci/pci_iov_private.h b/sys/dev/pci/pci_iov_private.h index 1c39dcb..d4b81e4 100755 --- a/sys/dev/pci/pci_iov_private.h +++ b/sys/dev/pci/pci_iov_private.h @@ -29,12 +29,25 @@ #ifndef _PCI_IOV_PRIVATE_H_ #define _PCI_IOV_PRIVATE_H_ +struct pci_iov_bar { + struct resource *res; + + pci_addr_t bar_size; + pci_addr_t bar_shift; +}; + struct pcicfg_iov { struct cdev *iov_cdev; + + struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0 + 1]; + struct rman rman; int iov_pos; int iov_num_vfs; + uint32_t iov_flags; }; +#define IOV_RMAN_INITED (1 << 0) + #endif diff --git a/sys/dev/pci/pci_private.h b/sys/dev/pci/pci_private.h index 4824180..89231d3 100644 --- a/sys/dev/pci/pci_private.h +++ b/sys/dev/pci/pci_private.h @@ -146,6 +146,12 @@ struct resource *pci_alloc_multi_resource(device_t dev, device_t child, int pci_iov_attach_method(device_t bus, device_t dev); int pci_iov_detach_method(device_t bus, device_t dev); +struct resource *pci_vf_alloc_mem_resource(device_t dev, device_t child, + int *rid, u_long start, u_long end, u_long count, + u_int flags); +int pci_vf_release_mem_resource(device_t dev, device_t child, + int rid, struct resource *r); + device_t pci_add_iov_child(device_t bus, size_t size, uint16_t rid, uint16_t vid, uint16_t did, const char *driver); diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h index 0c90665..07ef801 100644 --- a/sys/dev/pci/pcivar.h +++ b/sys/dev/pci/pcivar.h @@ -50,7 +50,7 @@ struct pcicfg_pp { struct pci_map { pci_addr_t pm_value; /* Raw BAR value */ pci_addr_t pm_size; - uint8_t pm_reg; + uint16_t pm_reg; STAILQ_ENTRY(pci_map) pm_link; }; -- 1.9.2