sys/conf/files.amd64 | 8 + sys/conf/files.i386 | 8 + sys/dev/acpica/acpi_pci.c | 18 + sys/dev/pci/pci.c | 5 - sys/dev/pci/pci_private.h | 1 + sys/dev/pci/pcivar.h | 8 + sys/kern/subr_bus_dma.c | 21 +- sys/sys/bus_dma.h | 4 + sys/x86/include/busdma_impl.h | 97 ++++ sys/x86/iommu/busdma_dmar.c | 671 ++++++++++++++++++++++++ sys/x86/iommu/busdma_dmar.h | 65 +++ sys/x86/iommu/intel_ctx.c | 524 +++++++++++++++++++ sys/x86/iommu/intel_dmar.h | 344 +++++++++++++ sys/x86/iommu/intel_drv.c | 855 +++++++++++++++++++++++++++++++ sys/x86/iommu/intel_fault.c | 276 ++++++++++ sys/x86/iommu/intel_gas.c | 552 ++++++++++++++++++++ sys/x86/iommu/intel_idpgtbl.c | 796 +++++++++++++++++++++++++++++ sys/x86/iommu/intel_reg.h | 294 +++++++++++ sys/x86/iommu/intel_utils.c | 498 ++++++++++++++++++ sys/x86/x86/busdma_bounce.c | 1102 +++++++++++++++++++++++++++++++++++++++ sys/x86/x86/busdma_machdep.c | 1137 ++++++----------------------------------- 21 files changed, 6274 insertions(+), 1010 deletions(-) diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index cdb43df..3ec8e8f 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -490,6 +490,13 @@ x86/cpufreq/powernow.c optional cpufreq x86/cpufreq/est.c optional cpufreq x86/cpufreq/hwpstate.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq +x86/iommu/busdma_dmar.c optional acpi pci +x86/iommu/intel_ctx.c optional acpi pci +x86/iommu/intel_drv.c optional acpi pci +x86/iommu/intel_fault.c optional acpi pci +x86/iommu/intel_gas.c optional acpi pci +x86/iommu/intel_idpgtbl.c optional acpi pci +x86/iommu/intel_utils.c optional acpi pci x86/isa/atpic.c optional atpic isa x86/isa/atrtc.c standard x86/isa/clock.c standard @@ -500,6 +507,7 @@ x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci +x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 218472b..334f016 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -534,6 +534,13 @@ x86/cpufreq/hwpstate.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/cpufreq/powernow.c optional cpufreq x86/cpufreq/smist.c optional cpufreq +x86/iommu/busdma_dmar.c optional acpi pci +x86/iommu/intel_ctx.c optional acpi pci +x86/iommu/intel_drv.c optional acpi pci +x86/iommu/intel_fault.c optional acpi pci +x86/iommu/intel_gas.c optional acpi pci +x86/iommu/intel_idpgtbl.c optional acpi pci +x86/iommu/intel_utils.c optional acpi pci x86/isa/atpic.c optional atpic x86/isa/atrtc.c optional native x86/isa/clock.c optional native @@ -544,6 +551,7 @@ x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci +x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt diff --git a/sys/dev/acpica/acpi_pci.c b/sys/dev/acpica/acpi_pci.c index 39fba88..5fdd08c 100644 --- a/sys/dev/acpica/acpi_pci.c +++ b/sys/dev/acpica/acpi_pci.c @@ -80,6 +80,7 @@ static ACPI_STATUS acpi_pci_save_handle(ACPI_HANDLE handle, UINT32 level, static int acpi_pci_set_powerstate_method(device_t dev, device_t child, int state); static void acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child); +static bus_dma_tag_t acpi_pci_get_dma_tag(device_t bus, device_t child); static device_method_t acpi_pci_methods[] = { /* Device interface */ @@ -90,6 +91,7 @@ static device_method_t acpi_pci_methods[] = { DEVMETHOD(bus_read_ivar, acpi_pci_read_ivar), DEVMETHOD(bus_write_ivar, acpi_pci_write_ivar), DEVMETHOD(bus_child_location_str, acpi_pci_child_location_str_method), + DEVMETHOD(bus_get_dma_tag, acpi_pci_get_dma_tag), /* PCI interface */ DEVMETHOD(pci_set_powerstate, acpi_pci_set_powerstate_method), @@ -308,3 +310,19 @@ acpi_pci_attach(device_t dev) return (bus_generic_attach(dev)); } + +bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child); +static bus_dma_tag_t +acpi_pci_get_dma_tag(device_t bus, device_t child) +{ + bus_dma_tag_t tag; + + if (device_get_parent(child) == bus) { + /* try dmar and return if it works */ + tag = dmar_get_dma_tag(bus, child); + } else + tag = NULL; + if (tag == NULL) + tag = pci_get_dma_tag(bus, child); + return (tag); +} diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 2851ab7..2936ea5 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -70,10 +70,6 @@ __FBSDID("$FreeBSD$"); #include "pcib_if.h" #include "pci_if.h" -#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF) -#define PCI_DMA_BOUNDARY 0x100000000 -#endif - #define PCIR_IS_BIOS(cfg, reg) \ (((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) || \ ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1)) @@ -99,7 +95,6 @@ static void pci_load_vendor_data(void); static int pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc); static char *pci_describe_device(device_t dev); -static bus_dma_tag_t pci_get_dma_tag(device_t bus, device_t dev); static int pci_modevent(module_t mod, int what, void *arg); static void pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg); diff --git a/sys/dev/pci/pci_private.h b/sys/dev/pci/pci_private.h index b4c0c9e..2181c16 100644 --- a/sys/dev/pci/pci_private.h +++ b/sys/dev/pci/pci_private.h @@ -113,6 +113,7 @@ int pci_child_pnpinfo_str_method(device_t cbdev, device_t child, int pci_assign_interrupt_method(device_t dev, device_t child); int pci_resume(device_t dev); int pci_suspend(device_t dev); +bus_dma_tag_t pci_get_dma_tag(device_t bus, device_t dev); /** Restore the config register state. The state must be previously * saved with pci_cfg_save. However, the pci bus driver takes care of diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h index db3d8b8..1ee040d 100644 --- a/sys/dev/pci/pcivar.h +++ b/sys/dev/pci/pcivar.h @@ -498,6 +498,14 @@ void pci_restore_state(device_t dev); void pci_save_state(device_t dev); int pci_set_max_read_req(device_t dev, int size); +#ifdef BUS_SPACE_MAXADDR +#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF) +#define PCI_DMA_BOUNDARY 0x100000000 +#else +#define PCI_DMA_BOUNDARY 0 +#endif +#endif + #endif /* _SYS_BUS_H_ */ /* diff --git a/sys/kern/subr_bus_dma.c b/sys/kern/subr_bus_dma.c index 98b801f..ff51a01 100644 --- a/sys/kern/subr_bus_dma.c +++ b/sys/kern/subr_bus_dma.c @@ -126,27 +126,14 @@ static int _bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio, int *nsegs, int flags) { - vm_paddr_t paddr; - bus_size_t len, tlen; - int error, i, ma_offs; + int error; if ((bio->bio_flags & BIO_UNMAPPED) == 0) { error = _bus_dmamap_load_buffer(dmat, map, bio->bio_data, bio->bio_bcount, kernel_pmap, flags, NULL, nsegs); - return (error); - } - - error = 0; - tlen = bio->bio_bcount; - ma_offs = bio->bio_ma_offset; - for (i = 0; tlen > 0; i++, tlen -= len) { - len = min(PAGE_SIZE - ma_offs, tlen); - paddr = VM_PAGE_TO_PHYS(bio->bio_ma[i]) + ma_offs; - error = _bus_dmamap_load_phys(dmat, map, paddr, len, - flags, NULL, nsegs); - if (error != 0) - break; - ma_offs = 0; + } else { + error = _bus_dmamap_load_ma(dmat, map, bio->bio_ma, + bio->bio_bcount, bio->bio_ma_offset, flags, NULL, nsegs); } return (error); } diff --git a/sys/sys/bus_dma.h b/sys/sys/bus_dma.h index c3ac56e..d0d6f6b 100644 --- a/sys/sys/bus_dma.h +++ b/sys/sys/bus_dma.h @@ -324,6 +324,10 @@ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t paddr, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp); +int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, + struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, + bus_dma_segment_t *segs, int *segp); + bus_dma_segment_t *_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, diff --git a/sys/x86/include/busdma_impl.h b/sys/x86/include/busdma_impl.h new file mode 100644 index 0000000..70c9a6f --- /dev/null +++ b/sys/x86/include/busdma_impl.h @@ -0,0 +1,97 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __X86_BUSDMA_IMPL_H +#define __X86_BUSDMA_IMPL_H + +struct bus_dma_tag_common { + struct bus_dma_impl *impl; + struct bus_dma_tag_common *parent; + bus_size_t alignment; + bus_addr_t boundary; + bus_addr_t lowaddr; + bus_addr_t highaddr; + bus_dma_filter_t *filter; + void *filterarg; + bus_size_t maxsize; + u_int nsegments; + bus_size_t maxsegsz; + int flags; + bus_dma_lock_t *lockfunc; + void *lockfuncarg; + int ref_count; +}; + +struct bus_dma_impl { + int (*tag_create)(bus_dma_tag_t parent, + bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, + bus_addr_t highaddr, bus_dma_filter_t *filter, + void *filterarg, bus_size_t maxsize, int nsegments, + bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, + void *lockfuncarg, bus_dma_tag_t *dmat); + int (*tag_destroy)(bus_dma_tag_t dmat); + int (*map_create)(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp); + int (*map_destroy)(bus_dma_tag_t dmat, bus_dmamap_t map); + int (*mem_alloc)(bus_dma_tag_t dmat, void** vaddr, int flags, + bus_dmamap_t *mapp); + void (*mem_free)(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map); + int (*load_ma)(bus_dma_tag_t dmat, bus_dmamap_t map, + struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, + bus_dma_segment_t *segs, int *segp); + int (*load_phys)(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_paddr_t buf, bus_size_t buflen, int flags, + bus_dma_segment_t *segs, int *segp); + int (*load_buffer)(bus_dma_tag_t dmat, bus_dmamap_t map, + void *buf, bus_size_t buflen, pmap_t pmap, int flags, + bus_dma_segment_t *segs, int *segp); + void (*map_waitok)(bus_dma_tag_t dmat, bus_dmamap_t map, + struct memdesc *mem, bus_dmamap_callback_t *callback, + void *callback_arg); + bus_dma_segment_t *(*map_complete)(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dma_segment_t *segs, int nsegs, int error); + void (*map_unload)(bus_dma_tag_t dmat, bus_dmamap_t map); + void (*map_sync)(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dmasync_op_t op); +}; + +void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op); +void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op); +int bus_dma_run_filter(struct bus_dma_tag_common *dmat, bus_addr_t paddr); +int common_bus_dma_tag_create(struct bus_dma_tag_common *parent, + bus_size_t alignment, + bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, + bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, + int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, + void *lockfuncarg, size_t sz, void **dmat); + +extern struct bus_dma_impl bus_dma_bounce_impl; + +#endif diff --git a/sys/x86/iommu/busdma_dmar.c b/sys/x86/iommu/busdma_dmar.c new file mode 100644 index 0000000..39c5888 --- /dev/null +++ b/sys/x86/iommu/busdma_dmar.c @@ -0,0 +1,671 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool +dmar_is_dev_disabled(device_t dev, device_t child) +{ + + return (false); +} + +bus_dma_tag_t +dmar_get_dma_tag(device_t dev, device_t child) +{ + struct dmar_unit *dmar; + struct dmar_ctx *ctx; + bool disabled; + + dmar = dmar_find(child); + /* Not in scope of any DMAR ? */ + if (dmar == NULL) + return (NULL); + + disabled = dmar_is_dev_disabled(dev, child); + ctx = dmar_get_ctx(dmar, child, disabled); + if (ctx == NULL) + return (NULL); + ctx->ctx_tag.owner = child; + if (disabled) { + /* XXXKIB: unref ctx */ + return (NULL); + } + return ((bus_dma_tag_t)&ctx->ctx_tag); +} + +static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map"); + +static void dmar_bus_schedule_dmamap(struct dmar_unit *unit, + struct bus_dmamap_dmar *map); + +static int +dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, + bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, + bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, + int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, + void *lockfuncarg, bus_dma_tag_t *dmat) +{ + struct bus_dma_tag_dmar *newtag, *oldtag; + int error; + + *dmat = NULL; + error = common_bus_dma_tag_create(parent != NULL ? + &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment, + boundary, lowaddr, highaddr, filter, filterarg, maxsize, + nsegments, maxsegsz, flags, lockfunc, lockfuncarg, + sizeof(struct bus_dma_tag_dmar), (void **)&newtag); + if (error != 0) + return (error); + + oldtag = (struct bus_dma_tag_dmar *)parent; + newtag->common.impl = &bus_dma_dmar_impl; + newtag->ctx = oldtag->ctx; + newtag->owner = oldtag->owner; + error = 0; + + if (error != 0) + free(newtag, M_DEVBUF); + else + *dmat = (bus_dma_tag_t)newtag; + CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", + __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), + error); + return (error); +} + +static int +dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1) +{ + struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent; + int error; + + error = 0; + dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1; + + if (dmat != NULL) { + if (dmat->map_count != 0) { + error = EBUSY; + goto out; + } + while (dmat != NULL) { + parent = (struct bus_dma_tag_dmar *)dmat->common.parent; + if (atomic_fetchadd_int(&dmat->common.ref_count, -1) == + 1) { + if (dmat == &dmat->ctx->ctx_tag) + dmar_free_ctx(dmat->ctx); + free(dmat->segments, M_DMAR_DMAMAP); + free(dmat, M_DEVBUF); + dmat = parent; + } else + dmat = NULL; + } + } +out: + CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); + return (error); +} + +static int +dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + + tag = (struct bus_dma_tag_dmar *)dmat; + map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO); + if (map == NULL) { + *mapp = NULL; + return (ENOMEM); + } + if (tag->segments == NULL) { + tag->segments = malloc(sizeof(bus_dma_segment_t) * + tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT); + if (tag->segments == NULL) { + free(map, M_DMAR_DMAMAP); + *mapp = NULL; + return (ENOMEM); + } + } + TAILQ_INIT(&map->map_entries); + map->tag = tag; + map->locked = true; + map->cansleep = false; + tag->map_count++; + *mapp = (bus_dmamap_t)map; + + return (0); +} + +static int +dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + + tag = (struct bus_dma_tag_dmar *)dmat; + map = (struct bus_dmamap_dmar *)map1; + if (map != NULL) { + DMAR_CTX_LOCK(tag->ctx); + if (!TAILQ_EMPTY(&map->map_entries)) { + DMAR_CTX_UNLOCK(tag->ctx); + return (EBUSY); + } + DMAR_CTX_UNLOCK(tag->ctx); + free(map, M_DMAR_DMAMAP); + } + tag->map_count--; + return (0); +} + + +static int +dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, + bus_dmamap_t *mapp) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + int error, mflags; + vm_memattr_t attr; + + error = dmar_bus_dmamap_create(dmat, flags, mapp); + if (error != 0) + return (error); + + mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; + mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; + attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : + VM_MEMATTR_DEFAULT; + + tag = (struct bus_dma_tag_dmar *)dmat; + map = (struct bus_dmamap_dmar *)*mapp; + + if (tag->common.maxsize < PAGE_SIZE && + tag->common.alignment <= tag->common.maxsize && + attr == VM_MEMATTR_DEFAULT) { + *vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags); + map->flags |= BUS_DMAMAP_DMAR_MALLOC; + } else { + *vaddr = (void *)kmem_alloc_attr(kernel_map, + tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR, + attr); + map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC; + } + if (*vaddr == NULL) { + dmar_bus_dmamap_destroy(dmat, *mapp); + *mapp = NULL; + return (ENOMEM); + } + return (0); +} + +static void +dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + + tag = (struct bus_dma_tag_dmar *)dmat; + map = (struct bus_dmamap_dmar *)map1; + + if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) { + free(vaddr, M_DEVBUF); + map->flags &= ~BUS_DMAMAP_DMAR_MALLOC; + } else { + KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0, + ("dmar_bus_dmamem_free for non alloced map %p", map)); + kmem_free(kernel_map, (vm_offset_t)vaddr, tag->common.maxsize); + map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC; + } + + dmar_bus_dmamap_destroy(dmat, map1); +} + +static int +dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag, + struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen, + int flags, bus_dma_segment_t *segs, int *segp, + struct dmar_map_entries_tailq *unroll_list) +{ + struct dmar_ctx *ctx; + struct dmar_map_entry *entry; + dmar_gaddr_t size; + bus_size_t buflen1; + int error, idx, gas_flags, seg; + + if (segs == NULL) + segs = tag->segments; + ctx = tag->ctx; + seg = *segp; + idx = 0; + while (buflen > 0) { + seg++; + if (seg >= tag->common.nsegments) { + error = EFBIG; + break; + } + buflen1 = buflen > tag->common.maxsegsz ? + tag->common.maxsegsz : buflen; + buflen -= buflen1; + size = round_page(offset + buflen1); + + /* + * (Too) optimistically allow split if there are more + * then one segments left. + */ + gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0; + if (seg + 1 < tag->common.nsegments) + gas_flags |= DMAR_GM_CANSPLIT; + + error = dmar_gas_map(ctx, &tag->common, size, + DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE, + gas_flags, ma + idx, &entry); + if (error != 0) + break; + if ((gas_flags & DMAR_GM_CANSPLIT) != 0) { + KASSERT(size >= entry->end - entry->start, ("XXX")); + size = entry->end - entry->start; + if (buflen1 > size) + buflen1 = size; + } else { + KASSERT(entry->end - entry->start == size, ("XXX")); + } + + KASSERT(((entry->start + offset) & (tag->common.alignment - 1)) + == 0, + ("alignment failed: ctx %p start 0x%jx offset %x " + "align 0x%jx", ctx, (uintmax_t)entry->start, offset, + (uintmax_t)tag->common.alignment)); + KASSERT(entry->end <= tag->common.lowaddr || + entry->start >= tag->common.highaddr, + ("entry placement failed: ctx %p start 0x%jx end 0x%jx " + "lowaddr 0x%jx highaddr 0x%jx", ctx, + (uintmax_t)entry->start, (uintmax_t)entry->end, + (uintmax_t)tag->common.lowaddr, + (uintmax_t)tag->common.highaddr)); + KASSERT(dmar_test_boundary(entry->start, entry->end - + entry->start, tag->common.boundary), + ("boundary failed: ctx %p start 0x%jx end 0x%jx " + "boundary 0x%jx", ctx, (uintmax_t)entry->start, + (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); + KASSERT(buflen1 <= tag->common.maxsegsz, + ("segment too large: ctx %p start 0x%jx end 0x%jx " + "maxsegsz 0x%jx", ctx, (uintmax_t)entry->start, + (uintmax_t)entry->end, (uintmax_t)tag->common.maxsegsz)); + + DMAR_CTX_LOCK(ctx); + TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); + entry->flags |= DMAR_MAP_ENTRY_MAP; + DMAR_CTX_UNLOCK(ctx); + TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link); + + segs[seg].ds_addr = entry->start + offset; + segs[seg].ds_len = buflen1; + + idx += OFF_TO_IDX(trunc_page(offset + buflen1)); + offset += buflen1; + offset &= DMAR_PAGE_MASK; + } + if (error == 0) + *segp = seg; + return (error); +} + +static int +dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag, + struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen, + int flags, bus_dma_segment_t *segs, int *segp) +{ + struct dmar_ctx *ctx; + struct dmar_map_entry *entry, *entry1; + struct dmar_map_entries_tailq unroll_list; + int error; + + ctx = tag->ctx; + + TAILQ_INIT(&unroll_list); + error = dmar_bus_dmamap_load_something1(tag, map, ma, offset, + buflen, flags, segs, segp, &unroll_list); + if (error != 0) { + /* + * The busdma interface does not allow us to report + * partial buffer load, so unfortunately we have to + * revert all work done. + */ + DMAR_CTX_LOCK(ctx); + TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link, + entry1) { + /* + * No entries other than what we have created + * during the failed run might have been + * inserted there in between, since we own ctx + * pglock. + */ + TAILQ_REMOVE(&map->map_entries, entry, dmamap_link); + TAILQ_REMOVE(&unroll_list, entry, unroll_link); + TAILQ_INSERT_TAIL(&ctx->unload_entries, entry, + dmamap_link); + } + DMAR_CTX_UNLOCK(ctx); + taskqueue_enqueue(ctx->dmar->delayed_taskqueue, + &ctx->unload_task); + } + + if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && + !map->cansleep) + error = EINPROGRESS; + if (error == EINPROGRESS) + dmar_bus_schedule_dmamap(ctx->dmar, map); + return (error); +} + +static int +dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, + struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, + bus_dma_segment_t *segs, int *segp) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + + tag = (struct bus_dma_tag_dmar *)dmat; + map = (struct bus_dmamap_dmar *)map1; + return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, + flags, segs, segp)); +} + +static int +dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, + vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, + int *segp) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + vm_page_t *ma; + vm_paddr_t pstart, pend; + int error, i, ma_cnt, offset; + + tag = (struct bus_dma_tag_dmar *)dmat; + map = (struct bus_dmamap_dmar *)map1; + pstart = trunc_page(buf); + pend = round_page(buf + buflen); + offset = buf & PAGE_MASK; + ma_cnt = OFF_TO_IDX(pend - pstart); + ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ? + M_WAITOK : M_NOWAIT); + if (ma == NULL) + return (ENOMEM); + for (i = 0; i < ma_cnt; i++) + ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE); + error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen, + flags, segs, segp); + free(ma, M_DEVBUF); + return (error); +} + +static int +dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, + bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, + int *segp) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + vm_page_t *ma; + vm_paddr_t pstart, pend, paddr; + int error, i, ma_cnt, offset; + + tag = (struct bus_dma_tag_dmar *)dmat; + map = (struct bus_dmamap_dmar *)map1; + pstart = trunc_page((vm_offset_t)buf); + pend = round_page((vm_offset_t)buf + buflen); + offset = (vm_offset_t)buf & PAGE_MASK; + ma_cnt = OFF_TO_IDX(pend - pstart); + ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ? + M_WAITOK : M_NOWAIT); + if (ma == NULL) + return (ENOMEM); + for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { + if (pmap == kernel_pmap) + paddr = pmap_kextract(pstart); + else + paddr = pmap_extract(pmap, pstart); + ma[i] = PHYS_TO_VM_PAGE(paddr); + } + error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen, + flags, segs, segp); + free(ma, M_DEVBUF); + return (error); +} + +static void +dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, + struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) +{ + struct bus_dmamap_dmar *map; + + if (map1 == NULL) + return; + map = (struct bus_dmamap_dmar *)map1; + map->mem = *mem; + map->tag = (struct bus_dma_tag_dmar *)dmat; + map->callback = callback; + map->callback_arg = callback_arg; +} + +static bus_dma_segment_t * +dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, + bus_dma_segment_t *segs, int nsegs, int error) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + + tag = (struct bus_dma_tag_dmar *)dmat; + map = (struct bus_dmamap_dmar *)map1; + + if (!map->locked) { + KASSERT(map->cansleep, + ("map not locked and not sleepable context %p", map)); + + /* + * We are called from the delayed context. Relock the + * driver. + */ + (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); + map->locked = true; + } + + if (segs == NULL) + segs = tag->segments; + return (segs); +} + +/* + * The limitations of busdma KPI forces the dmar to perform the actual + * unload, consisting of the unmapping of the map entries page tables, + * from the delayed context on i386, since page table page mapping + * might require a sleep to be successfull. The unfortunate + * consequence is that the DMA requests can be served some time after + * the bus_dmamap_unload() call returned. + * + * On amd64, we assume that sf allocation cannot fail. + */ +static void +dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + struct dmar_ctx *ctx; +#if defined(__amd64__) + struct dmar_map_entries_tailq entries; +#endif + + tag = (struct bus_dma_tag_dmar *)dmat; + map = (struct bus_dmamap_dmar *)map1; + ctx = tag->ctx; + +#if defined(__i386__) + DMAR_CTX_LOCK(ctx); + TAILQ_CONCAT(&ctx->unload_entries, &map->map_entries, dmamap_link); + DMAR_CTX_UNLOCK(ctx); + taskqueue_enqueue(ctx->dmar->delayed_taskqueue, &ctx->unload_task); +#else /* defined(__amd64__) */ + TAILQ_INIT(&entries); + DMAR_CTX_LOCK(ctx); + TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); + DMAR_CTX_UNLOCK(ctx); + THREAD_NO_SLEEPING(); + dmar_ctx_unload(ctx, &entries, false); + THREAD_SLEEPING_OK(); + KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx)); +#endif +} + +static void +dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dmasync_op_t op) +{ +} + +struct bus_dma_impl bus_dma_dmar_impl = { + .tag_create = dmar_bus_dma_tag_create, + .tag_destroy = dmar_bus_dma_tag_destroy, + .map_create = dmar_bus_dmamap_create, + .map_destroy = dmar_bus_dmamap_destroy, + .mem_alloc = dmar_bus_dmamem_alloc, + .mem_free = dmar_bus_dmamem_free, + .load_phys = dmar_bus_dmamap_load_phys, + .load_buffer = dmar_bus_dmamap_load_buffer, + .load_ma = dmar_bus_dmamap_load_ma, + .map_waitok = dmar_bus_dmamap_waitok, + .map_complete = dmar_bus_dmamap_complete, + .map_unload = dmar_bus_dmamap_unload, + .map_sync = dmar_bus_dmamap_sync +}; + +static void +dmar_bus_task_dmamap(void *arg, int pending) +{ + struct bus_dma_tag_dmar *tag; + struct bus_dmamap_dmar *map; + struct dmar_unit *unit; + struct dmar_ctx *ctx; + + unit = arg; + DMAR_LOCK(unit); + while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { + TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); + DMAR_UNLOCK(unit); + tag = map->tag; + ctx = map->tag->ctx; + map->cansleep = true; + map->locked = false; + bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, + &map->mem, map->callback, map->callback_arg, + BUS_DMA_WAITOK); + map->cansleep = false; + if (map->locked) { + (tag->common.lockfunc)(tag->common.lockfuncarg, + BUS_DMA_UNLOCK); + } else + map->locked = true; + map->cansleep = false; + DMAR_LOCK(unit); + } + DMAR_UNLOCK(unit); +} + +static void +dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map) +{ + struct dmar_ctx *ctx; + + ctx = map->tag->ctx; + map->locked = false; + DMAR_LOCK(unit); + TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); + DMAR_UNLOCK(unit); + taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); +} + +int +dmar_init_busdma(struct dmar_unit *unit) +{ + + TAILQ_INIT(&unit->delayed_maps); + TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit); + unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK, + taskqueue_thread_enqueue, &unit->delayed_taskqueue); + taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, + "dmar%d busdma taskq", unit->unit); + return (0); +} + +void +dmar_fini_busdma(struct dmar_unit *unit) +{ + + if (unit->delayed_taskqueue == NULL) + return; + + taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); + taskqueue_free(unit->delayed_taskqueue); + unit->delayed_taskqueue = NULL; +} diff --git a/sys/x86/iommu/busdma_dmar.h b/sys/x86/iommu/busdma_dmar.h new file mode 100644 index 0000000..60ea6bc --- /dev/null +++ b/sys/x86/iommu/busdma_dmar.h @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __X86_IOMMU_BUSDMA_DMAR_H +#define __X86_IOMMU_BUSDMA_DMAR_H + +struct dmar_map_entry; +TAILQ_HEAD(dmar_map_entries_tailq, dmar_map_entry); + +struct bus_dma_tag_dmar { + struct bus_dma_tag_common common; + struct dmar_ctx *ctx; + device_t owner; + int map_count; + bus_dma_segment_t *segments; +}; + +struct bus_dmamap_dmar { + struct bus_dma_tag_dmar *tag; + struct memdesc mem; + bus_dmamap_callback_t *callback; + void *callback_arg; + struct dmar_map_entries_tailq map_entries; + TAILQ_ENTRY(bus_dmamap_dmar) delay_link; + bool locked; + bool cansleep; + int flags; +}; + +#define BUS_DMAMAP_DMAR_MALLOC 0x0001 +#define BUS_DMAMAP_DMAR_KMEM_ALLOC 0x0002 + +extern struct bus_dma_impl bus_dma_dmar_impl; + +bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child); + +#endif diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c new file mode 100644 index 0000000..34965ec --- /dev/null +++ b/sys/x86/iommu/intel_ctx.c @@ -0,0 +1,524 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context"); + +static void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx); +static void dmar_ctx_unload_task(void *arg, int pending); + +static void +dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus) +{ + struct sf_buf *sf; + dmar_root_entry_t *re; + vm_page_t ctxm; + + /* + * Allocated context page must be linked. + */ + ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC); + if (ctxm != NULL) + return; + + /* + * Page not present, allocate and link. Note that other + * thread might execute this sequence in parallel. This + * should be safe, because the context entries written by both + * threads are equal. + */ + TD_PREP_PINNED_ASSERT; + ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO | + DMAR_PGF_WAITOK); + re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf); + re += bus; + dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK & + VM_PAGE_TO_PHYS(ctxm))); + dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar)); + TD_PINNED_ASSERT; +} + +static dmar_ctx_entry_t * +dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp) +{ + dmar_ctx_entry_t *ctxp; + + ctxp = dmar_map_pgtbl(ctx->dmar->ctx_obj, 1 + ctx->bus, + DMAR_PGF_NOALLOC, sfp); + ctxp += ((ctx->slot & 0x1f) << 3) + (ctx->func & 0x3); + return (ctxp); +} + +static void +ctx_tag_init(struct dmar_ctx *ctx) +{ + bus_addr_t maxaddr; + + maxaddr = MIN(ctx->end, BUS_SPACE_MAXADDR); + ctx->ctx_tag.common.ref_count = 1; /* Prevent free */ + ctx->ctx_tag.common.impl = &bus_dma_dmar_impl; + ctx->ctx_tag.common.boundary = PCI_DMA_BOUNDARY; + ctx->ctx_tag.common.lowaddr = maxaddr; + ctx->ctx_tag.common.highaddr = maxaddr; + ctx->ctx_tag.common.maxsize = maxaddr; + ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED; + ctx->ctx_tag.common.maxsegsz = maxaddr; + ctx->ctx_tag.ctx = ctx; + /* XXXKIB initialize tag further */ +} + +static void +ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp) +{ + struct dmar_unit *unit; + vm_page_t ctx_root; + + unit = ctx->dmar; + KASSERT(ctxp->ctx1 == 0 && ctxp->ctx2 == 0, + ("dmar%d: initialized ctx entry %d:%d:%d", + unit->unit, ctx->bus, ctx->slot, ctx->func)); + ctxp->ctx2 = DMAR_CTX2_DID(ctx->domain); + ctxp->ctx2 |= ctx->awlvl; + if ((ctx->flags & DMAR_CTX_IDMAP) != 0 && + (unit->hw_ecap & DMAR_ECAP_PT) != 0) { + KASSERT(ctx->pgtbl_obj == NULL, + ("ctx %p non-null pgtbl_obj", ctx)); + dmar_pte_store(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P); + } else { + ctx_root = dmar_pgalloc(ctx->pgtbl_obj, 0, DMAR_PGF_NOALLOC); + dmar_pte_store(&ctxp->ctx1, DMAR_CTX1_T_UNTR | + (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) | + DMAR_CTX1_P); + } +} + +static int +ctx_init_rmrr(struct dmar_ctx *ctx, device_t dev) +{ + struct dmar_map_entries_tailq rmrr_entries; + struct dmar_map_entry *entry, *entry1; + vm_page_t *ma; + vm_pindex_t size, i; + int error, error1; + + error = 0; + TAILQ_INIT(&rmrr_entries); + dmar_ctx_parse_rmrr(ctx, dev, &rmrr_entries); + TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) { + size = OFF_TO_IDX(entry->end - entry->start); + ma = malloc(sizeof(vm_page_t) * OFF_TO_IDX(size), M_TEMP, + M_WAITOK); + for (i = 0; i < size; i++) { + ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, + VM_MEMATTR_DEFAULT); + } + error1 = dmar_gas_map_region(ctx, entry, DMAR_MAP_ENTRY_READ | + DMAR_MAP_ENTRY_WRITE, DMAR_GM_CANWAIT, ma); + /* + * Non-failed RMRR entries are owned by context rb + * tree. Get rid of the failed entry, but do not stop + * the loop. Rest of the parsed RMRR entries are + * loaded and removed on the context destruction. + */ + if (error1 == 0) { + DMAR_LOCK(ctx->dmar); + ctx->flags |= DMAR_CTX_RMRR; + DMAR_UNLOCK(ctx->dmar); + } else { + device_printf(dev, + "dmar%d failed to map RMRR region (%jx, %jx) %d\n", + ctx->dmar->unit, entry->start, entry->end, error1); + error = error1; + TAILQ_REMOVE(&rmrr_entries, entry, unroll_link); + dmar_gas_free_entry(ctx, entry); + } + for (i = 0; i < size; i++) + vm_page_putfake(ma[i]); + free(ma, M_TEMP); + } + return (error); +} + +static struct dmar_ctx * +dmar_get_ctx_alloc(struct dmar_unit *dmar, int bus, int slot, int func) +{ + struct dmar_ctx *ctx; + + ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO); + RB_INIT(&ctx->rb_root); + TAILQ_INIT(&ctx->unload_entries); + TASK_INIT(&ctx->unload_task, 0, dmar_ctx_unload_task, ctx); + mtx_init(&ctx->lock, "dmarctx", NULL, MTX_DEF); + ctx->dmar = dmar; + ctx->bus = bus; + ctx->slot = slot; + ctx->func = func; + return (ctx); +} + +static void +dmar_ctx_dtr(struct dmar_ctx *ctx, bool gas_inited, bool pgtbl_inited) +{ + + if (gas_inited) { + DMAR_CTX_LOCK(ctx); + dmar_gas_fini_ctx(ctx); + DMAR_CTX_UNLOCK(ctx); + } + if (pgtbl_inited) { + if (ctx->pgtbl_obj != NULL) + DMAR_CTX_PGLOCK(ctx); + ctx_free_pgtbl(ctx); + } + mtx_destroy(&ctx->lock); + free(ctx, M_DMAR_CTX); +} + +struct dmar_ctx * +dmar_get_ctx(struct dmar_unit *dmar, device_t dev, bool id_mapped) +{ + struct dmar_ctx *ctx, *ctx1; + dmar_ctx_entry_t *ctxp; + struct sf_buf *sf; + int bus, slot, func, error, mgaw; + bool enable; + + bus = pci_get_bus(dev); + slot = pci_get_slot(dev); + func = pci_get_function(dev); + enable = false; + TD_PREP_PINNED_ASSERT; + DMAR_LOCK(dmar); + ctx = dmar_find_ctx_locked(dmar, bus, slot, func); + error = 0; + if (ctx == NULL) { + /* + * Perform the allocations which require sleep or have + * higher chance to succeed if the sleep is allowed. + */ + DMAR_UNLOCK(dmar); + dmar_ensure_ctx_page(dmar, bus); + ctx1 = dmar_get_ctx_alloc(dmar, bus, slot, func); + + if (id_mapped) { + /* + * For now, use the maximal usable physical + * address of the installed memory to + * calculate the mgaw. It is useful for the + * identity mapping, and less so for the + * virtualized bus address space. + */ + ctx1->end = ptoa(Maxmem); + mgaw = dmar_maxaddr2mgaw(dmar, ctx1->end, false); + error = ctx_set_agaw(ctx1, mgaw); + if (error != 0) { + dmar_ctx_dtr(ctx1, false, false); + TD_PINNED_ASSERT; + return (NULL); + } + } else { + ctx1->end = BUS_SPACE_MAXADDR; + mgaw = dmar_maxaddr2mgaw(dmar, ctx1->end, true); + error = ctx_set_agaw(ctx1, mgaw); + if (error != 0) { + dmar_ctx_dtr(ctx1, false, false); + TD_PINNED_ASSERT; + return (NULL); + } + /* Use all supported address space for remapping. */ + ctx1->end = 1ULL << (ctx1->agaw - 1); + } + + + dmar_gas_init_ctx(ctx1); + if (id_mapped) { + if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) { + ctx1->pgtbl_obj = ctx_get_idmap_pgtbl(ctx1, + ctx1->end); + } + ctx1->flags |= DMAR_CTX_IDMAP; + } else { + error = ctx_alloc_pgtbl(ctx1); + if (error != 0) { + dmar_ctx_dtr(ctx1, true, false); + TD_PINNED_ASSERT; + return (NULL); + } + } + error = ctx_init_rmrr(ctx1, dev); + if (error != 0) { + dmar_ctx_dtr(ctx1, true, true); + TD_PINNED_ASSERT; + return (NULL); + } + ctxp = dmar_map_ctx_entry(ctx1, &sf); + DMAR_LOCK(dmar); + + /* + * Recheck the contexts, other thread might have + * already allocated needed one. + */ + ctx = dmar_find_ctx_locked(dmar, bus, slot, func); + if (ctx == NULL) { + ctx = ctx1; + ctx->domain = alloc_unrl(dmar->domids); + if (ctx->domain == -1) { + DMAR_UNLOCK(dmar); + dmar_unmap_pgtbl(sf, true); + dmar_ctx_dtr(ctx, true, true); + TD_PINNED_ASSERT; + return (NULL); + } + ctx_tag_init(ctx); + + /* + * This is the first activated context for the + * DMAR unit. Enable the translation after + * everything is set up. + */ + if (LIST_EMPTY(&dmar->contexts)) + enable = true; + LIST_INSERT_HEAD(&dmar->contexts, ctx, link); + ctx_id_entry_init(ctx, ctxp); + device_printf(dev, + "dmar%d pci%d:%d:%d:%d domain %d mgaw %d agaw %d\n", + dmar->unit, dmar->segment, bus, slot, + func, ctx->domain, ctx->mgaw, ctx->agaw); + } else { + dmar_ctx_dtr(ctx1, true, true); + } + dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar)); + } + ctx->refs++; + if ((ctx->flags & DMAR_CTX_RMRR) != 0) + ctx->refs++; /* XXXKIB */ + + /* + * If dmar declares Caching Mode as Set, follow 11.5 "Caching + * Mode Consideration" and do the (global) invalidation of the + * negative TLB entries. + */ + if ((dmar->hw_cap & DMAR_CAP_CM) != 0 || enable) { + error = dmar_inv_ctx_glob(dmar); + if (error == 0 && + (dmar->hw_ecap & DMAR_ECAP_DI) != 0) + error = dmar_inv_iotlb_glob(dmar); + if (error != 0) { + dmar_free_ctx_locked(dmar, ctx); + return (NULL); + } + } + if (enable) { + error = dmar_enable_translation(dmar); + if (error != 0) { + dmar_free_ctx_locked(dmar, ctx); + return (NULL); + } + } + DMAR_UNLOCK(dmar); + TD_PINNED_ASSERT; + return (ctx); +} + +static void +dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) +{ + struct sf_buf *sf; + dmar_ctx_entry_t *ctxp; + + DMAR_ASSERT_LOCKED(dmar); + KASSERT(ctx->refs >= 1, + ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); + + /* + * If our reference is not last, only the dereference should + * be performed. + */ + if (ctx->refs > 1) { + ctx->refs--; + DMAR_UNLOCK(dmar); + return; + } + + KASSERT((ctx->flags & DMAR_CTX_RMRR) == 0, + ("lost ref on RMRR ctx %p", ctx)); + + /* + * Otherwise, the context entry must be cleared before the + * page table is destroyed. The mapping of the context + * entries page could require sleep, unlock the dmar. + */ + DMAR_UNLOCK(dmar); + TD_PREP_PINNED_ASSERT; + ctxp = dmar_map_ctx_entry(ctx, &sf); + DMAR_LOCK(dmar); + KASSERT(ctx->refs >= 1, + ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); + + /* + * Other thread might have referenced the context, in which + * case again only the dereference should be performed. + */ + if (ctx->refs > 1) { + ctx->refs--; + DMAR_UNLOCK(dmar); + dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar)); + TD_PINNED_ASSERT; + return; + } + + KASSERT((ctx->flags & DMAR_CTX_RMRR) == 0, + ("lost ref on RMRR ctx %p", ctx)); + + /* + * Clear the context pointer and flush the caches. + * XXXKIB: cannot do this if any RMRR entries are still present. + */ + dmar_pte_clear(&ctxp->ctx1); + ctxp->ctx2 = 0; + dmar_inv_ctx_glob(dmar); + if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) + dmar_inv_iotlb_glob(dmar); + LIST_REMOVE(ctx, link); + DMAR_UNLOCK(dmar); + + /* + * The rest of the destruction is invisible for other users of + * the dmar unit. + */ + taskqueue_drain(dmar->delayed_taskqueue, &ctx->unload_task); + KASSERT(TAILQ_EMPTY(&ctx->unload_entries), + ("unfinished unloads %p", ctx)); + dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar)); + free_unr(dmar->domids, ctx->domain); + dmar_ctx_dtr(ctx, true, true); + TD_PINNED_ASSERT; +} + +void +dmar_free_ctx(struct dmar_ctx *ctx) +{ + struct dmar_unit *dmar; + + dmar = ctx->dmar; + DMAR_LOCK(dmar); + dmar_free_ctx_locked(dmar, ctx); +} + +struct dmar_ctx * +dmar_find_ctx_locked(struct dmar_unit *dmar, int bus, int slot, int func) +{ + struct dmar_ctx *ctx; + + DMAR_ASSERT_LOCKED(dmar); + + LIST_FOREACH(ctx, &dmar->contexts, link) { + if (ctx->bus == bus && ctx->slot == slot && ctx->func == func) + return (ctx); + } + return (NULL); +} + +void +dmar_ctx_unload(struct dmar_ctx *ctx, struct dmar_map_entries_tailq *entries, + bool cansleep) +{ + struct dmar_map_entry *entry; + int error; + + while ((entry = TAILQ_FIRST(entries)) != NULL) { + KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0, + ("not mapped entry %p %p", ctx, entry)); + TAILQ_REMOVE(entries, entry, dmamap_link); + error = ctx_unmap_buf(ctx, entry->start, entry->end - + entry->start, cansleep ? DMAR_PGF_WAITOK : 0); + KASSERT(error == 0, ("unmap %p error %d", ctx, error)); + DMAR_CTX_LOCK(ctx); + dmar_gas_free_space(ctx, entry); + DMAR_CTX_UNLOCK(ctx); + dmar_gas_free_entry(ctx, entry); + } +} + +static void +dmar_ctx_unload_task(void *arg, int pending) +{ + struct dmar_ctx *ctx; + struct dmar_map_entries_tailq entries; + + ctx = arg; + TAILQ_INIT(&entries); + + for (;;) { + DMAR_CTX_LOCK(ctx); + TAILQ_SWAP(&ctx->unload_entries, &entries, dmar_map_entry, + dmamap_link); + DMAR_CTX_UNLOCK(ctx); + if (TAILQ_EMPTY(&entries)) + break; + dmar_ctx_unload(ctx, &entries, true); + } +} diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h new file mode 100644 index 0000000..63d137d --- /dev/null +++ b/sys/x86/iommu/intel_dmar.h @@ -0,0 +1,344 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __X86_IOMMU_INTEL_DMAR_H +#define __X86_IOMMU_INTEL_DMAR_H + +/* Host or physical memory address, after translation. */ +typedef uint64_t dmar_haddr_t; +/* Guest or bus address, before translation. */ +typedef uint64_t dmar_gaddr_t; + +struct dmar_map_entry { + dmar_gaddr_t start; + dmar_gaddr_t end; + dmar_gaddr_t free_after; /* Free space after the entry */ + dmar_gaddr_t free_down; /* Max free space below the + current R/B tree node */ + u_int flags; + TAILQ_ENTRY(dmar_map_entry) dmamap_link; /* Link for dmamap entries */ + RB_ENTRY(dmar_map_entry) rb_entry; /* Links for ctx entries */ + TAILQ_ENTRY(dmar_map_entry) unroll_link; /* Link for unroll after + dmamap_load failure */ +}; + +RB_HEAD(dmar_gas_entries_tree, dmar_map_entry); +RB_PROTOTYPE(dmar_gas_entries_tree, dmar_map_entry, rb_entry, + dmar_gas_cmp_entries); + +#define DMAR_MAP_ENTRY_PLACE 0x0001 /* Fake entry */ +#define DMAR_MAP_ENTRY_RMRR 0x0002 /* Permanent, not linked by + dmamap_link */ +#define DMAR_MAP_ENTRY_MAP 0x0004 /* Busdma created, linked by + dmamap_link */ +#define DMAR_MAP_ENTRY_READ 0x1000 /* Read permitted */ +#define DMAR_MAP_ENTRY_WRITE 0x2000 /* Write permitted */ +#define DMAR_MAP_ENTRY_SNOOP 0x4000 /* Snoop */ +#define DMAR_MAP_ENTRY_TM 0x8000 /* Transient */ + +struct dmar_ctx { + int bus; /* pci bus/slot/func */ + int slot; + int func; + int domain; /* DID */ + int mgaw; /* Real max address width */ + int agaw; /* Adjusted guest address width */ + int pglvl; /* The pagelevel */ + int awlvl; /* The pagelevel as the bitmask, to set in + context entry */ + dmar_gaddr_t end;/* Highest address + 1 in the guest AS */ + u_int refs; /* References to the context, from tags */ + struct dmar_unit *dmar; + struct bus_dma_tag_dmar ctx_tag; /* Root tag */ + struct mtx lock; + LIST_ENTRY(dmar_ctx) link; /* Member in the dmar list */ + vm_object_t pgtbl_obj; /* Page table pages */ + u_int flags; /* Protected by dmar lock */ + uint64_t last_fault_rec[2]; /* Last fault reported */ + u_int entries_cnt; + struct dmar_gas_entries_tree rb_root; + struct dmar_map_entries_tailq unload_entries; /* Entries to unload */ + struct dmar_map_entry *first_place, *last_place; + struct task unload_task; +}; + +/* struct dmar_ctx flags */ +#define DMAR_CTX_FAULTED 0x0001 /* Fault was reported, + last_fault_rec is valid */ +#define DMAR_CTX_IDMAP 0x0002 /* Context uses identity page table */ +#define DMAR_CTX_RMRR 0x0004 /* Context contains RMRR entry, + cannot be turned off */ + +#define DMAR_CTX_PGLOCK(ctx) VM_OBJECT_WLOCK((ctx)->pgtbl_obj) +#define DMAR_CTX_PGTRYLOCK(ctx) VM_OBJECT_TRYWLOCK((ctx)->pgtbl_obj) +#define DMAR_CTX_PGUNLOCK(ctx) VM_OBJECT_WUNLOCK((ctx)->pgtbl_obj) +#define DMAR_CTX_ASSERT_PGLOCKED(ctx) \ + VM_OBJECT_ASSERT_WLOCKED((ctx)->pgtbl_obj) + +#define DMAR_CTX_LOCK(ctx) mtx_lock(&(ctx)->lock) +#define DMAR_CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->lock) +#define DMAR_CTX_ASSERT_LOCKED(ctx) mtx_assert(&(ctx)->lock, MA_OWNED) + +struct dmar_unit { + device_t dev; + int unit; + uint16_t segment; + uint64_t base; + + /* Resources */ + int reg_rid; + struct resource *regs; + int irq; + int irq_rid; + struct resource *irq_res; + void *intr_handle; + + /* Hardware registers cache */ + uint32_t hw_ver; + uint64_t hw_cap; + uint64_t hw_ecap; + uint32_t hw_gcmd; + + /* Data for being a dmar */ + struct mtx lock; + LIST_HEAD(, dmar_ctx) contexts; + struct unrhdr *domids; + vm_object_t ctx_obj; + + /* Fault handler data */ + struct mtx fault_lock; + uint64_t *fault_log; + int fault_log_head; + int fault_log_tail; + int fault_log_size; + struct task fault_task; + struct taskqueue *fault_taskqueue; + + /* Busdma delayed map load */ + struct task dmamap_load_task; + TAILQ_HEAD(, bus_dmamap_dmar) delayed_maps; + struct taskqueue *delayed_taskqueue; +}; + +#define DMAR_LOCK(dmar) mtx_lock(&(dmar)->lock) +#define DMAR_UNLOCK(dmar) mtx_unlock(&(dmar)->lock) +#define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->lock, MA_OWNED) + +#define DMAR_FAULT_LOCK(dmar) mtx_lock_spin(&(dmar)->fault_lock) +#define DMAR_FAULT_UNLOCK(dmar) mtx_unlock_spin(&(dmar)->fault_lock) +#define DMAR_FAULT_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->fault_lock, MA_OWNED) + +#define DMAR_IS_COHERENT(dmar) (((dmar)->hw_ecap & DMAR_ECAP_C) != 0) + +struct dmar_unit *dmar_find(device_t dev); + +u_int dmar_nd2mask(u_int nd); +bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl); +int ctx_set_agaw(struct dmar_ctx *ctx, int mgaw); +int dmar_maxaddr2mgaw(struct dmar_unit* unit, dmar_gaddr_t maxaddr, + bool allow_less); +vm_pindex_t pglvl_max_pages(int pglvl); +int ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl); +dmar_gaddr_t pglvl_page_size(int total_pglvl, int lvl); +dmar_gaddr_t ctx_page_size(struct dmar_ctx *ctx, int lvl); +struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags); +void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags); +void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, + struct sf_buf **sf); +void dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent); +int dmar_load_root_entry_ptr(struct dmar_unit *unit); +int dmar_inv_ctx_glob(struct dmar_unit *unit); +int dmar_inv_iotlb_glob(struct dmar_unit *unit); +int dmar_flush_write_bufs(struct dmar_unit *unit); +int dmar_enable_translation(struct dmar_unit *unit); +int dmar_disable_translation(struct dmar_unit *unit); +void dmar_enable_intr(struct dmar_unit *unit); +void dmar_disable_intr(struct dmar_unit *unit); + +int dmar_intr(void *arg); +int dmar_init_fault_log(struct dmar_unit *unit); +void dmar_fini_fault_log(struct dmar_unit *unit); + +vm_object_t ctx_get_idmap_pgtbl(struct dmar_ctx *ctx, dmar_gaddr_t maxaddr); +void put_idmap_pgtbl(vm_object_t obj); +int ctx_map_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, + vm_page_t *ma, uint64_t pflags, int flags); +int ctx_unmap_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, + int flags); +int ctx_alloc_pgtbl(struct dmar_ctx *ctx); +void ctx_free_pgtbl(struct dmar_ctx *ctx); + +struct dmar_ctx *dmar_get_ctx(struct dmar_unit *dmar, device_t dev, + bool id_mapped); +void dmar_free_ctx(struct dmar_ctx *ctx); +struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, int bus, + int slot, int func); +void dmar_ctx_unload(struct dmar_ctx *ctx, + struct dmar_map_entries_tailq *entries, bool cansleep); + +int dmar_init_busdma(struct dmar_unit *unit); +void dmar_fini_busdma(struct dmar_unit *unit); + +void dmar_gas_init_ctx(struct dmar_ctx *ctx); +void dmar_gas_fini_ctx(struct dmar_ctx *ctx); +struct dmar_map_entry *dmar_gas_alloc_entry(struct dmar_ctx *ctx, u_int flags); +void dmar_gas_free_entry(struct dmar_ctx *ctx, struct dmar_map_entry *entry); +void dmar_gas_free_space(struct dmar_ctx *ctx, struct dmar_map_entry *entry); +int dmar_gas_map(struct dmar_ctx *ctx, const struct bus_dma_tag_common *common, + dmar_gaddr_t size, u_int eflags, u_int flags, vm_page_t *ma, + struct dmar_map_entry **res); +int dmar_gas_map_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry, + u_int eflags, u_int flags, vm_page_t *ma); + +void dmar_ctx_parse_rmrr(struct dmar_ctx *ctx, device_t dev, + struct dmar_map_entries_tailq *rmrr_entries); + +#define DMAR_GM_CANWAIT 0x0001 +#define DMAR_GM_CANSPLIT 0x0002 + +#define DMAR_PGF_WAITOK 0x0001 +#define DMAR_PGF_ZERO 0x0002 +#define DMAR_PGF_ALLOC 0x0004 +#define DMAR_PGF_NOALLOC 0x0008 +#define DMAR_PGF_OBJL 0x0010 + +extern dmar_haddr_t dmar_high; +extern int haw; +extern int dmar_tbl_pagecnt; + +static inline uint32_t +dmar_read4(const struct dmar_unit *unit, int reg) +{ + + return (bus_read_4(unit->regs, reg)); +} + +static inline uint64_t +dmar_read8(const struct dmar_unit *unit, int reg) +{ +#ifdef __i386__ + uint32_t high, low; + + low = bus_read_4(unit->regs, reg); + high = bus_read_4(unit->regs, reg + 4); + return (low | ((uint64_t)high << 32)); +#else + return (bus_read_8(unit->regs, reg)); +#endif +} + +static inline void +dmar_write4(const struct dmar_unit *unit, int reg, uint32_t val) +{ + + bus_write_4(unit->regs, reg, val); +} + +static inline void +dmar_write8(const struct dmar_unit *unit, int reg, uint64_t val) +{ +#ifdef __i386__ + uint32_t high, low; + + low = val; + high = val >> 32; + bus_write_4(unit->regs, reg, low); + bus_write_4(unit->regs, reg + 4, high); +#else + bus_write_8(unit->regs, reg, val); +#endif +} + +/* + * dmar_pte_store and dmar_pte_clear ensure that on i386, 32bit writes + * are issued in the correct order. For store, the lower word, + * containing the P or R and W bits, is set only after the high word + * is written. For clear, the P bit is cleared first, then the high + * word is cleared. + */ +static inline void +dmar_pte_store(volatile uint64_t *dst, uint64_t val) +{ + + KASSERT(*dst == 0, ("used pte %p oldval %jx newval %jx", + dst, (uintmax_t)*dst, (uintmax_t)val)); +#ifdef __i386__ + volatile uint32_t *p; + uint32_t hi, lo; + + hi = val >> 32; + lo = val; + p = (volatile uint32_t *)dst; + *(p + 1) = hi; + *p = lo; +#else + *dst = val; +#endif +} + +static inline void +dmar_pte_clear(volatile uint64_t *dst) +{ +#ifdef __i386__ + volatile uint32_t *p; + + p = (volatile uint32_t *)dst; + *p = 0; + *(p + 1) = 0; +#else + *dst = 0; +#endif +} + +static inline bool +dmar_test_boundary(dmar_gaddr_t start, dmar_gaddr_t size, + dmar_gaddr_t boundary) +{ + + if (boundary == 0) + return (true); + return (start + size <= ((start + boundary) & ~(boundary - 1))); +} + +#ifdef INVARIANTS +#define TD_PREP_PINNED_ASSERT \ + int old_td_pinned; \ + old_td_pinned = curthread->td_pinned +#define TD_PINNED_ASSERT \ + KASSERT(curthread->td_pinned == old_td_pinned, \ + ("pin count leak: %d %d %s:%d", curthread->td_pinned, \ + old_td_pinned, __FILE__, __LINE__)) +#else +#define TD_PREP_PINNED_ASSERT +#define TD_PINNED_ASSERT +#endif + +#endif diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c new file mode 100644 index 0000000..b84a467 --- /dev/null +++ b/sys/x86/iommu/intel_drv.c @@ -0,0 +1,855 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_acpi.h" +#if defined(__amd64__) /* || defined(__ia64__) */ +#define DEV_APIC +#else +#include "opt_apic.h" +#endif +#include "opt_ddb.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEV_APIC +#include "pcib_if.h" +#endif + +#define DMAR_REG_RID 1 +#define DMAR_IRQ_RID 0 + +static devclass_t dmar_devclass; +static device_t *dmar_devs; +static int dmar_devcnt; + +typedef int (*dmar_iter_t)(ACPI_DMAR_HEADER *, void *); + +static void +dmar_iterate_tbl(dmar_iter_t iter, void *arg) +{ + ACPI_TABLE_DMAR *dmartbl; + ACPI_DMAR_HEADER *dmarh; + char *ptr, *ptrend; + ACPI_STATUS status; + + status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl); + if (ACPI_FAILURE(status)) + return; + ptr = (char *)dmartbl + sizeof(*dmartbl); + ptrend = (char *)dmartbl + dmartbl->Header.Length; + for (;;) { + if (ptr >= ptrend) + break; + dmarh = (ACPI_DMAR_HEADER *)ptr; + if (dmarh->Length <= 0) { + printf("dmar_identify: corrupted DMAR table, l %d\n", + dmarh->Length); + break; + } + ptr += dmarh->Length; + if (!iter(dmarh, arg)) + break; + } +} + +struct find_iter_args { + int i; + ACPI_DMAR_HARDWARE_UNIT *res; +}; + +static int +dmar_find_iter(ACPI_DMAR_HEADER *dmarh, void *arg) +{ + struct find_iter_args *fia; + + if (dmarh->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT) + return (1); + + fia = arg; + if (fia->i == 0) { + fia->res = (ACPI_DMAR_HARDWARE_UNIT *)dmarh; + return (0); + } + fia->i--; + return (1); +} + +static ACPI_DMAR_HARDWARE_UNIT * +dmar_find_by_index(int idx) +{ + struct find_iter_args fia; + + fia.i = idx; + fia.res = NULL; + dmar_iterate_tbl(dmar_find_iter, &fia); + return (fia.res); +} + +static int +dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg) +{ + + if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_UNIT) + dmar_devcnt++; + return (1); +} + +static void +dmar_identify(driver_t *driver, device_t parent) +{ + ACPI_TABLE_DMAR *dmartbl; + ACPI_DMAR_HARDWARE_UNIT *dmarh; + ACPI_STATUS status; + int i, error; + + if (acpi_disabled("dmar")) + return; + status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl); + if (ACPI_FAILURE(status)) + return; + haw = dmartbl->Width + 1; + if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR) + dmar_high = BUS_SPACE_MAXADDR; + else + dmar_high = 1ULL << (haw + 1); + if (bootverbose) { + printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width, + (unsigned)dmartbl->Flags, + "\020\001INTR_REMAP\002X2APIC_OPT_OUT"); + } + + dmar_iterate_tbl(dmar_count_iter, NULL); + if (dmar_devcnt == 0) + return; + dmar_devs = malloc(sizeof(device_t) * dmar_devcnt, M_DEVBUF, + M_WAITOK | M_ZERO); + for (i = 0; i < dmar_devcnt; i++) { + dmarh = dmar_find_by_index(i); + if (dmarh == NULL) { + printf("dmar_identify: cannot find HWUNIT %d\n", i); + continue; + } + dmar_devs[i] = BUS_ADD_CHILD(parent, 1, "dmar", i); + if (dmar_devs[i] == NULL) { + printf("dmar_identify: cannot create instance %d\n", i); + continue; + } + error = bus_set_resource(dmar_devs[i], SYS_RES_MEMORY, + DMAR_REG_RID, dmarh->Address, PAGE_SIZE); + if (error != 0) { + printf( + "dmar%d: unable to alloc register window at 0x%08jx: error %d\n", + i, (uintmax_t)dmarh->Address, error); + device_delete_child(parent, dmar_devs[i]); + dmar_devs[i] = NULL; + } + } +} + +static int +dmar_probe(device_t dev) +{ + + if (acpi_disabled("dmar")) + return (ENXIO); + if (acpi_get_handle(dev) != NULL) + return (ENXIO); + device_set_desc(dev, "DMA remap"); + return (0); +} + +static void +dmar_release_resources(device_t dev, struct dmar_unit *unit) +{ + + dmar_fini_busdma(unit); + dmar_fini_fault_log(unit); + if (unit->irq != -1) { + bus_teardown_intr(dev, unit->irq_res, unit->intr_handle); + bus_release_resource(dev, SYS_RES_IRQ, unit->irq_rid, + unit->irq_res); + bus_delete_resource(dev, SYS_RES_IRQ, unit->irq_rid); + PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)), + dev, unit->irq); + unit->irq = -1; + } + if (unit->regs != NULL) { + bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid, + unit->regs); + bus_release_resource(dev, SYS_RES_MEMORY, unit->reg_rid, + unit->regs); + unit->regs = NULL; + } + if (unit->domids != NULL) { + delete_unrhdr(unit->domids); + unit->domids = NULL; + } + if (unit->ctx_obj != NULL) { + vm_object_deallocate(unit->ctx_obj); + unit->ctx_obj = NULL; + } +} + +static int +dmar_alloc_irq(device_t dev, struct dmar_unit *unit) +{ + device_t pcib; + uint64_t msi_addr; + uint32_t msi_data; + int error; + + pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */ + error = PCIB_ALLOC_MSIX(pcib, dev, &unit->irq); + if (error != 0) { + device_printf(dev, "cannot allocate fault interrupt, %d\n", + error); + goto err1; + } + unit->irq_rid = DMAR_IRQ_RID; + error = bus_set_resource(dev, SYS_RES_IRQ, unit->irq_rid, unit->irq, + 1); + if (error != 0) { + device_printf(dev, "cannot set interrupt resource, %d\n", + error); + goto err2; + } + unit->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, + &unit->irq_rid, RF_ACTIVE); + if (unit->irq_res == NULL) { + device_printf(dev, "cannot map fault interrupt\n"); + error = ENXIO; + goto err3; + } + error = bus_setup_intr(dev, unit->irq_res, INTR_TYPE_MISC, + dmar_intr, NULL, unit, &unit->intr_handle); + if (error != 0) { + device_printf(dev, "cannot setup fault interrupt, %d\n", error); + goto err4; + } + bus_describe_intr(dev, unit->irq_res, unit->intr_handle, "fault"); + error = PCIB_MAP_MSI(pcib, dev, unit->irq, &msi_addr, &msi_data); + if (error != 0) { + device_printf(dev, "cannot map interrupt, %d\n", error); + goto err5; + } + dmar_write4(unit, DMAR_FEDATA_REG, msi_data); + dmar_write4(unit, DMAR_FEADDR_REG, msi_addr); + /* Only for xAPIC mode */ + dmar_write4(unit, DMAR_FEUADDR_REG, msi_addr >> 32); + return (0); + +err5: + bus_teardown_intr(dev, unit->irq_res, unit->intr_handle); +err4: + bus_release_resource(dev, SYS_RES_IRQ, unit->irq_rid, unit->irq_res); +err3: + bus_delete_resource(dev, SYS_RES_IRQ, unit->irq_rid); +err2: + PCIB_RELEASE_MSIX(pcib, dev, unit->irq); + unit->irq = -1; +err1: + return (error); +} + +#ifdef DEV_APIC +static int +dmar_remap_intr(device_t dev, device_t child, u_int irq) +{ + struct dmar_unit *unit; + uint64_t msi_addr; + uint32_t msi_data; + int error; + + unit = device_get_softc(dev); + if (irq != unit->irq) + return (ENOENT); + error = PCIB_MAP_MSI(device_get_parent(device_get_parent(dev)), dev, + irq, &msi_addr, &msi_data); + if (error != 0) + return (error); + dmar_disable_intr(unit); + dmar_write4(unit, DMAR_FEDATA_REG, msi_data); + dmar_write4(unit, DMAR_FEADDR_REG, msi_addr); + dmar_write4(unit, DMAR_FEUADDR_REG, msi_addr >> 32); + dmar_enable_intr(unit); + return (0); +} +#endif + +static void +dmar_print_caps(device_t dev, struct dmar_unit *unit, + ACPI_DMAR_HARDWARE_UNIT *dmaru) +{ + uint32_t caphi, ecaphi; + + device_printf(dev, "regs@0x%08jx, ver=%d.%d, seg=%d, flags=<%b>\n", + (uintmax_t)dmaru->Address, DMAR_MAJOR_VER(unit->hw_ver), + DMAR_MINOR_VER(unit->hw_ver), dmaru->Segment, + dmaru->Flags, "\020\001INCLUDE_ALL_PCI"); + caphi = unit->hw_cap >> 32; + device_printf(dev, "cap=%b,", (u_int)unit->hw_cap, + "\020\004AFL\005WBF\006PLMR\007PHMR\010CM\027ZLR\030ISOCH"); + printf("%b, ", caphi, "\020\010PSI\027DWD\030DRD"); + printf("ndoms=%d, sagaw=%d, mgaw=%d, fro=%d, nfr=%d, superp=%d", + DMAR_CAP_ND(unit->hw_cap), DMAR_CAP_SAGAW(unit->hw_cap), + DMAR_CAP_MGAW(unit->hw_cap), DMAR_CAP_FRO(unit->hw_cap), + DMAR_CAP_NFR(unit->hw_cap), DMAR_CAP_SPS(unit->hw_cap)); + if ((unit->hw_cap & DMAR_CAP_PSI) != 0) + printf(", mamv=%d", DMAR_CAP_MAMV(unit->hw_cap)); + printf("\n"); + ecaphi = unit->hw_ecap >> 32; + device_printf(dev, "ecap=%b,", (u_int)unit->hw_ecap, + "\020\001C\002QI\003DI\004IR\005EIM\007PT\010SC"); + printf("%b, ", ecaphi, "\020"); + printf("mhmw=%d, iro=%d\n", DMAR_ECAP_MHMV(unit->hw_ecap), + DMAR_ECAP_IRO(unit->hw_ecap)); +} + +static int +dmar_attach(device_t dev) +{ + struct dmar_unit *unit; + ACPI_DMAR_HARDWARE_UNIT *dmaru; + int error; + + unit = device_get_softc(dev); + unit->dev = dev; + unit->unit = device_get_unit(dev); + dmaru = dmar_find_by_index(unit->unit); + if (dmaru == NULL) + return (EINVAL); + unit->irq = -1; + unit->segment = dmaru->Segment; + unit->base = dmaru->Address; + unit->reg_rid = DMAR_REG_RID; + unit->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &unit->reg_rid, RF_ACTIVE); + if (unit->regs == NULL) { + device_printf(dev, "cannot allocate register window\n"); + return (ENOMEM); + } + unit->hw_ver = dmar_read4(unit, DMAR_VER_REG); + unit->hw_cap = dmar_read8(unit, DMAR_CAP_REG); + unit->hw_ecap = dmar_read8(unit, DMAR_ECAP_REG); + if (bootverbose) + dmar_print_caps(dev, unit, dmaru); + error = dmar_alloc_irq(dev, unit); + if (error != 0) { + dmar_release_resources(dev, unit); + return (error); + } + mtx_init(&unit->lock, "dmarhw", NULL, MTX_DEF); + unit->domids = new_unrhdr(0, dmar_nd2mask(DMAR_CAP_ND(unit->hw_cap)), + &unit->lock); + + /* + * 9.2 "Context Entry": + * When Caching Mode (CM) field is reported as Set, the + * domain-id value of zero is architecturally + * reserved. Software must not use domain-id value of zero + * when CM is Set. + */ + if ((unit->hw_cap & DMAR_CAP_CM) != 0) + alloc_unr_specific(unit->domids, 0); + + unit->ctx_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(1 + + DMAR_CTX_CNT), 0, 0, NULL); + + /* + * Allocate and load the root entry table pointer. Enable the + * address translation after the required invalidations are + * done. + */ + dmar_pgalloc(unit->ctx_obj, 0, DMAR_PGF_WAITOK | DMAR_PGF_ZERO); + DMAR_LOCK(unit); + error = dmar_load_root_entry_ptr(unit); + if (error != 0) { + DMAR_UNLOCK(unit); + dmar_release_resources(dev, unit); + return (error); + } + error = dmar_inv_ctx_glob(unit); + if (error != 0) { + DMAR_UNLOCK(unit); + dmar_release_resources(dev, unit); + return (error); + } + if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) { + error = dmar_inv_iotlb_glob(unit); + if (error != 0) { + DMAR_UNLOCK(unit); + dmar_release_resources(dev, unit); + return (error); + } + } + + DMAR_UNLOCK(unit); + error = dmar_init_fault_log(unit); + if (error != 0) { + dmar_release_resources(dev, unit); + return (error); + } + error = dmar_init_busdma(unit); + if (error != 0) { + dmar_release_resources(dev, unit); + return (error); + } + +#ifdef NOTYET + DMAR_LOCK(unit); + error = dmar_enable_translation(unit); + if (error != 0) { + DMAR_UNLOCK(unit); + dmar_release_resources(dev, unit); + return (error); + } + DMAR_UNLOCK(unit); +#endif + + return (0); +} + +static int +dmar_detach(device_t dev) +{ + + return (EBUSY); +} + +static int +dmar_suspend(device_t dev) +{ + + return (0); +} + +static int +dmar_resume(device_t dev) +{ + + /* XXXKIB */ + return (0); +} + +static device_method_t dmar_methods[] = { + DEVMETHOD(device_identify, dmar_identify), + DEVMETHOD(device_probe, dmar_probe), + DEVMETHOD(device_attach, dmar_attach), + DEVMETHOD(device_detach, dmar_detach), + DEVMETHOD(device_suspend, dmar_suspend), + DEVMETHOD(device_resume, dmar_resume), +#ifdef DEV_APIC + DEVMETHOD(bus_remap_intr, dmar_remap_intr), +#endif + DEVMETHOD_END +}; + +static driver_t dmar_driver = { + "dmar", + dmar_methods, + sizeof(struct dmar_unit), +}; + +DRIVER_MODULE(dmar, acpi, dmar_driver, dmar_devclass, 0, 0); +MODULE_DEPEND(dmar, acpi, 1, 1, 1); + +int dmar_match_verbose = 0; + +static void +dmar_print_path(device_t dev, const char *banner, int busno, int depth, + const ACPI_DMAR_PCI_PATH *path) +{ + int i; + + device_printf(dev, "%s [%d, ", banner, busno); + for (i = 0; i < depth; i++) { + if (i != 0) + printf(", "); + printf("(%d, %d)", path[i].Device, path[i].Function); + } + printf("]\n"); +} + +static int +dmar_dev_depth(device_t child) +{ + devclass_t pci_class; + device_t bus, pcib; + int depth; + + pci_class = devclass_find("pci"); + for (depth = 1; ; depth++) { + bus = device_get_parent(child); + pcib = device_get_parent(bus); + if (device_get_devclass(device_get_parent(pcib)) != + pci_class) + return (depth); + child = pcib; + } +} + +static void +dmar_dev_path(device_t child, int *busno, ACPI_DMAR_PCI_PATH *path, int depth) +{ + devclass_t pci_class; + device_t bus, pcib; + + pci_class = devclass_find("pci"); + for (depth--; depth != -1; depth--) { + path[depth].Device = pci_get_slot(child); + path[depth].Function = pci_get_function(child); + bus = device_get_parent(child); + pcib = device_get_parent(bus); + if (device_get_devclass(device_get_parent(pcib)) != + pci_class) { + /* reached a host bridge */ + *busno = pci_get_bus(bus); + return; + } + child = pcib; + } + panic("wrong depth"); +} + +static int +dmar_match_pathes(int busno1, const ACPI_DMAR_PCI_PATH *path1, int depth1, + int busno2, const ACPI_DMAR_PCI_PATH *path2, int depth2, + enum AcpiDmarScopeType scope_type) +{ + int i, depth; + + if (busno1 != busno2) + return (0); + if (scope_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && depth1 != depth2) + return (0); + depth = depth1; + if (depth2 < depth) + depth = depth2; + for (i = 0; i < depth; i++) { + if (path1[i].Device != path2[i].Device || + path1[i].Function != path2[i].Function) + return (0); + } + return (1); +} + +static int +dmar_match_devscope(ACPI_DMAR_DEVICE_SCOPE *devscope, device_t dev, + int dev_busno, const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len) +{ + ACPI_DMAR_PCI_PATH *path; + int path_len; + + if (devscope->Length < sizeof(*devscope)) { + printf("dmar_find: corrupted DMAR table, dl %d\n", + devscope->Length); + return (-1); + } + if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT && + devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_BRIDGE) + return (0); + path_len = devscope->Length - sizeof(*devscope); + if (path_len % 2 != 0) { + printf("dmar_find_bsf: corrupted DMAR table, dl %d\n", + devscope->Length); + return (-1); + } + path_len /= 2; + path = (ACPI_DMAR_PCI_PATH *)(devscope + 1); + if (path_len == 0) { + printf("dmar_find: corrupted DMAR table, dl %d\n", + devscope->Length); + return (-1); + } + if (dmar_match_verbose) + dmar_print_path(dev, "DMAR", devscope->Bus, path_len, path); + + return (dmar_match_pathes(devscope->Bus, path, path_len, dev_busno, + dev_path, dev_path_len, devscope->EntryType)); +} + +struct dmar_unit * +dmar_find(device_t dev) +{ + device_t dmar_dev; + ACPI_DMAR_HARDWARE_UNIT *dmarh; + ACPI_DMAR_DEVICE_SCOPE *devscope; + char *ptr, *ptrend; + int i, match, dev_domain, dev_busno, dev_path_len; + + dmar_dev = NULL; + dev_domain = pci_get_domain(dev); + dev_path_len = dmar_dev_depth(dev); + ACPI_DMAR_PCI_PATH dev_path[dev_path_len]; + dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len); + if (dmar_match_verbose) + dmar_print_path(dev, "PCI", dev_busno, dev_path_len, dev_path); + + for (i = 0; i < dmar_devcnt; i++) { + if (dmar_devs[i] == NULL) + continue; + dmarh = dmar_find_by_index(i); + if (dmarh == NULL) + continue; + if (dmarh->Segment != dev_domain) + continue; + if ((dmarh->Flags & ACPI_DMAR_INCLUDE_ALL) != 0) { + dmar_dev = dmar_devs[i]; + goto found; + } + ptr = (char *)dmarh + sizeof(*dmarh); + ptrend = (char *)dmarh + dmarh->Header.Length; + for (;;) { + if (ptr >= ptrend) + break; + devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr; + ptr += devscope->Length; + if (dmar_match_verbose) { + device_printf(dev, + "pci%d:%d:%d:%d matching dmar%d\n", + dev_domain, pci_get_bus(dev), + pci_get_slot(dev), + pci_get_function(dev), + ((struct dmar_unit *)device_get_softc( + dmar_devs[i]))->unit); + } + match = dmar_match_devscope(devscope, dev, dev_busno, + dev_path, dev_path_len); + if (dmar_match_verbose) { + if (match == -1) + printf("table error\n"); + else if (match == 0) + printf("not matched\n"); + else + printf("matched\n"); + } + if (match == -1) + return (NULL); + else if (match == 1) { + dmar_dev = dmar_devs[i]; + goto found; + } + } + } + return (NULL); +found: + return (device_get_softc(dmar_dev)); +} + +struct rmrr_iter_args { + struct dmar_ctx *ctx; + device_t dev; + int dev_domain; + int dev_busno; + ACPI_DMAR_PCI_PATH *dev_path; + int dev_path_len; + struct dmar_map_entries_tailq *rmrr_entries; +}; + +static int +dmar_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg) +{ + struct rmrr_iter_args *ria; + ACPI_DMAR_RESERVED_MEMORY *resmem; + ACPI_DMAR_DEVICE_SCOPE *devscope; + struct dmar_map_entry *entry; + char *ptr, *ptrend; + int match; + + if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY) + return (1); + + ria = arg; + resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh; + if (resmem->Segment != ria->dev_domain) + return (1); + + ptr = (char *)resmem + sizeof(*resmem); + ptrend = (char *)resmem + resmem->Header.Length; + for (;;) { + if (ptr >= ptrend) + break; + devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr; + ptr += devscope->Length; + match = dmar_match_devscope(devscope, ria->dev, ria->dev_busno, + ria->dev_path, ria->dev_path_len); + if (match == 1) { + entry = dmar_gas_alloc_entry(ria->ctx, DMAR_PGF_WAITOK); + entry->start = resmem->BaseAddress; + /* The RMRR entry end address is inclusive. */ + entry->end = resmem->EndAddress + 1; + TAILQ_INSERT_TAIL(ria->rmrr_entries, entry, + unroll_link); + } + } + + return (1); +} + +void +dmar_ctx_parse_rmrr(struct dmar_ctx *ctx, device_t dev, + struct dmar_map_entries_tailq *rmrr_entries) +{ + struct rmrr_iter_args ria; + + ria.dev_domain = pci_get_domain(dev); + ria.dev_path_len = dmar_dev_depth(dev); + ACPI_DMAR_PCI_PATH dev_path[ria.dev_path_len]; + dmar_dev_path(dev, &ria.dev_busno, dev_path, ria.dev_path_len); + + ria.ctx = ctx; + ria.dev = dev; + ria.dev_path = dev_path; + ria.rmrr_entries = rmrr_entries; + dmar_iterate_tbl(dmar_rmrr_iter, &ria); +} + +#ifdef DDB +#include + +static void +dmar_print_ctx_entry(const struct dmar_map_entry *entry) +{ + + db_printf(" start %jx end %jx free_after %jx flags %x\n", + entry->start, entry->end, entry->free_after, entry->flags); +} + +static void +dmar_print_ctx(struct dmar_ctx *ctx, bool show_mappings) +{ + struct dmar_map_entry *entry; + + db_printf( + " pci%d:%d:%d dom %d mgaw %d agaw %d pglvl %d end %jx\n" + " refs %d flags %x pgobj %p\n", + ctx->bus, ctx->slot, ctx->func, ctx->domain, ctx->mgaw, + ctx->agaw, ctx->pglvl, (uintmax_t)ctx->end, ctx->refs, + ctx->flags, ctx->pgtbl_obj); + if (!show_mappings) + return; + db_printf(" mapped:\n"); + RB_FOREACH(entry, dmar_gas_entries_tree, &ctx->rb_root) { + dmar_print_ctx_entry(entry); + if (db_pager_quit) + break; + } + db_printf(" unloading:\n"); + TAILQ_FOREACH(entry, &ctx->unload_entries, dmamap_link) { + dmar_print_ctx_entry(entry); + if (db_pager_quit) + break; + } +} + +static void +dmar_print_one(int idx, bool show_ctxs, bool show_mappings) +{ + struct dmar_unit *unit; + struct dmar_ctx *ctx; + int i, frir; + + unit = device_get_softc(dmar_devs[idx]); + db_printf("dmar%d at %p, root at 0x%jx, ver 0x%x\n", unit->unit, unit, + dmar_read8(unit, DMAR_RTADDR_REG), dmar_read4(unit, DMAR_VER_REG)); + db_printf("cap 0x%jx ecap 0x%jx gsts 0x%x fsts 0x%x fectl 0x%x\n", + (uintmax_t)dmar_read8(unit, DMAR_CAP_REG), + (uintmax_t)dmar_read8(unit, DMAR_ECAP_REG), + dmar_read4(unit, DMAR_GSTS_REG), + dmar_read4(unit, DMAR_FSTS_REG), + dmar_read4(unit, DMAR_FECTL_REG)); + db_printf("fed 0x%x fea 0x%x feua 0x%x\n", + dmar_read4(unit, DMAR_FEDATA_REG), + dmar_read4(unit, DMAR_FEADDR_REG), + dmar_read4(unit, DMAR_FEUADDR_REG)); + db_printf("primary fault log:\n"); + for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) { + frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16; + db_printf(" %d at 0x%x: %jx %jx\n", i, frir, + (uintmax_t)dmar_read8(unit, frir), + (uintmax_t)dmar_read8(unit, frir + 8)); + } + if (show_ctxs) { + db_printf("contexts:\n"); + LIST_FOREACH(ctx, &unit->contexts, link) { + dmar_print_ctx(ctx, show_mappings); + if (db_pager_quit) + break; + } + } +} + +DB_SHOW_COMMAND(dmar, dmar_print) +{ + int i; + bool show_ctxs, show_mappings; + + show_ctxs = strchr(modif, 'c') != NULL; + show_mappings = strchr(modif, 'm') != NULL; + if (have_addr) { + dmar_print_one((int)addr, show_ctxs, show_mappings); + } else { + for (i = 0; i < dmar_devcnt; i++) { + dmar_print_one(i, show_ctxs, show_mappings); + if (db_pager_quit) + break; + } + } +} +#endif diff --git a/sys/x86/iommu/intel_fault.c b/sys/x86/iommu/intel_fault.c new file mode 100644 index 0000000..f66fd70 --- /dev/null +++ b/sys/x86/iommu/intel_fault.c @@ -0,0 +1,276 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_acpi.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Fault interrupt handling for DMARs. If advanced fault logging is + * not implemented by hardware, the code emulates it. Fast interrupt + * handler flushes the fault registers into circular buffer at + * unit->fault_log, and schedules a task. + * + * The fast handler is used since faults usually come in bursts, and + * number of fault log registers is limited, e.g. down to one for 5400 + * MCH. We are trying to reduce the latency for clearing the fault + * register file. The task is usually long-running, since printf() is + * slow, but this is not problematic because bursts are rare. + * + * For the same reason, each translation unit task is executed in its + * own thread. + * + * XXXKIB It seems there is no hardware available which implements + * advanced fault logging, so the code to handle AFL is not written. + */ + +static int +dmar_fault_next(struct dmar_unit *unit, int faultp) +{ + + faultp += 2; + if (faultp == unit->fault_log_size) + faultp = 0; + return (faultp); +} + +static void +dmar_intr_clear(struct dmar_unit *unit, uint32_t fsts) +{ + uint32_t clear; + + clear = 0; + if ((fsts & DMAR_FSTS_ITE) != 0) { + printf("DMAR%d: Invalidation timed out\n", unit->unit); + clear |= DMAR_FSTS_ITE; + } + if ((fsts & DMAR_FSTS_ICE) != 0) { + printf("DMAR%d: Invalidation completion error\n", + unit->unit); + clear |= DMAR_FSTS_ICE; + } + if ((fsts & DMAR_FSTS_IQE) != 0) { + printf("DMAR%d: Invalidation queue error\n", + unit->unit); + clear |= DMAR_FSTS_IQE; + } + if ((fsts & DMAR_FSTS_APF) != 0) { + printf("DMAR%d: Advanced pending fault\n", unit->unit); + clear |= DMAR_FSTS_APF; + } + if ((fsts & DMAR_FSTS_AFO) != 0) { + printf("DMAR%d: Advanced fault overflow\n", unit->unit); + clear |= DMAR_FSTS_AFO; + } + if (clear != 0) + dmar_write4(unit, DMAR_FSTS_REG, clear); +} + +int +dmar_intr(void *arg) +{ + struct dmar_unit *unit; + uint64_t fault_rec[2]; + uint32_t fsts; + int fri, frir, faultp; + bool enqueue; + + unit = arg; + enqueue = false; + fsts = dmar_read4(unit, DMAR_FSTS_REG); + dmar_intr_clear(unit, fsts); + + if ((fsts & DMAR_FSTS_PPF) == 0) + goto done; + + fri = DMAR_FSTS_FRI(fsts); + for (;;) { + frir = (DMAR_CAP_FRO(unit->hw_cap) + fri) * 16; + fault_rec[1] = dmar_read8(unit, frir + 8); + if ((fault_rec[1] & DMAR_FRCD2_F) == 0) + break; + fault_rec[0] = dmar_read8(unit, frir); + dmar_write4(unit, frir + 12, DMAR_FRCD2_F32); + DMAR_FAULT_LOCK(unit); + faultp = unit->fault_log_head; + if (dmar_fault_next(unit, faultp) == unit->fault_log_tail) { + /* XXXKIB log overflow */ + } else { + unit->fault_log[faultp] = fault_rec[0]; + unit->fault_log[faultp + 1] = fault_rec[1]; + unit->fault_log_head = dmar_fault_next(unit, faultp); + enqueue = true; + } + DMAR_FAULT_UNLOCK(unit); + fri += 1; + if (fri >= DMAR_CAP_NFR(unit->hw_cap)) + fri = 0; + } + if ((fsts & DMAR_FSTS_PFO) != 0) { + printf("DMAR%d: Fault Overflow\n", unit->unit); + dmar_write4(unit, DMAR_FSTS_REG, DMAR_FSTS_PFO); + } + if (enqueue) { + taskqueue_enqueue_fast(unit->fault_taskqueue, + &unit->fault_task); + } + +done: + return (FILTER_HANDLED); +} + +static void +dmar_fault_task(void *arg, int pending __unused) +{ + struct dmar_unit *unit; + struct dmar_ctx *ctx; + uint64_t fault_rec[2]; + int sid, bus, slot, func, faultp; + + unit = arg; + DMAR_FAULT_LOCK(unit); + for (;;) { + faultp = unit->fault_log_tail; + if (faultp == unit->fault_log_head) + break; + + fault_rec[0] = unit->fault_log[faultp]; + fault_rec[1] = unit->fault_log[faultp + 1]; + unit->fault_log_tail = dmar_fault_next(unit, faultp); + DMAR_FAULT_UNLOCK(unit); + + sid = DMAR_FRCD2_SID(fault_rec[1]); + bus = (sid >> 8) & 0xf; + slot = (sid >> 3) & 0x1f; + func = sid & 0x3; + printf("DMAR%d: ", unit->unit); + DMAR_LOCK(unit); + ctx = dmar_find_ctx_locked(unit, bus, slot, func); + if (ctx == NULL) { + printf(":"); + } else { + ctx->flags |= DMAR_CTX_FAULTED; + ctx->last_fault_rec[0] = fault_rec[0]; + ctx->last_fault_rec[1] = fault_rec[1]; + device_printf(ctx->ctx_tag.owner, ""); + } + DMAR_UNLOCK(unit); + printf( + "pci%d:%d:%d fault acc %x adt 0x%x reason 0x%x addr %jx\n", + bus, slot, func, DMAR_FRCD2_T(fault_rec[1]), + DMAR_FRCD2_AT(fault_rec[1]), DMAR_FRCD2_FR(fault_rec[1]), + (uintmax_t)fault_rec[0]); + DMAR_FAULT_LOCK(unit); + } + DMAR_FAULT_UNLOCK(unit); +} + +static void +dmar_clear_faults(struct dmar_unit *unit) +{ + uint32_t frec, frir, fsts; + int i; + + for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) { + frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16; + frec = dmar_read4(unit, frir + 12); + if ((frec & DMAR_FRCD2_F32) == 0) + continue; + dmar_write4(unit, frir + 12, DMAR_FRCD2_F32); + } + fsts = dmar_read4(unit, DMAR_FSTS_REG); + dmar_write4(unit, DMAR_FSTS_REG, fsts); +} + +int +dmar_init_fault_log(struct dmar_unit *unit) +{ + + mtx_init(&unit->fault_lock, "dmarflt", NULL, MTX_SPIN); + unit->fault_log_size = 256; /* 128 fault log entries */ + TUNABLE_INT_FETCH("hw.dmar.fault_log_size", &unit->fault_log_size); + if (unit->fault_log_size % 2 != 0) + panic("hw.dmar_fault_log_size must be even"); + unit->fault_log = malloc(sizeof(uint64_t) * unit->fault_log_size, + M_DEVBUF, M_WAITOK | M_ZERO); + + TASK_INIT(&unit->fault_task, 0, dmar_fault_task, unit); + unit->fault_taskqueue = taskqueue_create_fast("dmar", M_WAITOK, + taskqueue_thread_enqueue, &unit->fault_taskqueue); + taskqueue_start_threads(&unit->fault_taskqueue, 1, PI_AV, + "dmar%d fault taskq", unit->unit); + + dmar_disable_intr(unit); + dmar_clear_faults(unit); + dmar_enable_intr(unit); + + return (0); +} + +void +dmar_fini_fault_log(struct dmar_unit *unit) +{ + + dmar_disable_intr(unit); + + if (unit->fault_taskqueue == NULL) + return; + + taskqueue_drain(unit->fault_taskqueue, &unit->fault_task); + taskqueue_free(unit->fault_taskqueue); + mtx_destroy(&unit->fault_lock); + + free(unit->fault_log, M_DEVBUF); + unit->fault_log = NULL; + unit->fault_log_head = unit->fault_log_tail = 0; +} diff --git a/sys/x86/iommu/intel_gas.c b/sys/x86/iommu/intel_gas.c new file mode 100644 index 0000000..3a378ff --- /dev/null +++ b/sys/x86/iommu/intel_gas.c @@ -0,0 +1,552 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#define RB_AUGMENT(x) dmar_gas_augment_entry(x) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Guest Address Space management. + */ + +static uma_zone_t dmar_map_entry_zone; + +static void +intel_gas_init(void) +{ + + dmar_map_entry_zone = uma_zcreate("DMAR_MAP_ENTRY", + sizeof(struct dmar_map_entry), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, 0); +} +SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); + +struct dmar_map_entry * +dmar_gas_alloc_entry(struct dmar_ctx *ctx, u_int flags) +{ + struct dmar_map_entry *res; + + KASSERT((flags & ~(DMAR_PGF_WAITOK)) == 0, + ("unsupported flags %x", flags)); + + res = uma_zalloc(dmar_map_entry_zone, ((flags & DMAR_PGF_WAITOK) != + 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); + if (res != NULL) + atomic_add_int(&ctx->entries_cnt, 1); + return (res); +} + +void +dmar_gas_free_entry(struct dmar_ctx *ctx, struct dmar_map_entry *entry) +{ + + atomic_subtract_int(&ctx->entries_cnt, 1); + uma_zfree(dmar_map_entry_zone, entry); +} + +static int +dmar_gas_cmp_entries(struct dmar_map_entry *a, struct dmar_map_entry *b) +{ + + /* Last entry have zero size, so <= */ + KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", + a, (uintmax_t)a->start, (uintmax_t)a->end)); + KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", + b, (uintmax_t)b->start, (uintmax_t)b->end)); +#if 0 + /* Cannot work due to find */ + KASSERT(a->end <= b->start || b->end <= a->start, + ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)", + a, (uintmax_t)a->start, (uintmax_t)a->end, + b, (uintmax_t)b->start, (uintmax_t)b->end)); +#endif + + if (a->end <= b->start) + return (-1); + else if (b->end <= a->start) + return (1); + return (0); +} + +static void +dmar_gas_augment_entry(struct dmar_map_entry *entry) +{ + struct dmar_map_entry *l, *r; + + for (; entry != NULL; entry = RB_PARENT(entry, rb_entry)) { + l = RB_LEFT(entry, rb_entry); + r = RB_RIGHT(entry, rb_entry); + if (l == NULL && r == NULL) { + entry->free_down = entry->free_after; + } else if (l == NULL && r != NULL) { + entry->free_down = MAX(entry->free_after, r->free_down); + } else if (/*l != NULL && */ r == NULL) { + entry->free_down = MAX(entry->free_after, l->free_down); + } else /* if (l != NULL && r != NULL) */ { + entry->free_down = MAX(entry->free_after, l->free_down); + entry->free_down = MAX(entry->free_down, r->free_down); + } + } +} + +RB_GENERATE(dmar_gas_entries_tree, dmar_map_entry, rb_entry, + dmar_gas_cmp_entries); + +void +dmar_gas_init_ctx(struct dmar_ctx *ctx) +{ + struct dmar_map_entry *begin, *end; + + begin = dmar_gas_alloc_entry(ctx, DMAR_PGF_WAITOK); + end = dmar_gas_alloc_entry(ctx, DMAR_PGF_WAITOK); + + DMAR_CTX_LOCK(ctx); + KASSERT(ctx->entries_cnt == 2, ("dirty ctx %p", ctx)); + KASSERT(RB_EMPTY(&ctx->rb_root), ("non-empty entries %p", ctx)); + + begin->start = 0; + begin->end = DMAR_PAGE_SIZE; + begin->free_after = ctx->end - begin->end; + begin->flags = DMAR_MAP_ENTRY_PLACE; + RB_INSERT(dmar_gas_entries_tree, &ctx->rb_root, begin); + + end->start = ctx->end; + end->end = ctx->end; + end->free_after = 0; + end->flags = DMAR_MAP_ENTRY_PLACE; + RB_INSERT(dmar_gas_entries_tree, &ctx->rb_root, end); + + ctx->first_place = begin; + ctx->last_place = end; + DMAR_CTX_UNLOCK(ctx); +} + +void +dmar_gas_fini_ctx(struct dmar_ctx *ctx) +{ + struct dmar_map_entry *entry, *entry1; + + DMAR_CTX_ASSERT_LOCKED(ctx); + KASSERT(ctx->entries_cnt == 2, ("ctx still in use %p", ctx)); + + entry = RB_MIN(dmar_gas_entries_tree, &ctx->rb_root); + KASSERT(entry->start == 0, ("start entry start %p", ctx)); + KASSERT(entry->end == DMAR_PAGE_SIZE, ("start entry end %p", ctx)); + KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE, + ("start entry flags %p", ctx)); + RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry); + dmar_gas_free_entry(ctx, entry); + + entry = RB_MAX(dmar_gas_entries_tree, &ctx->rb_root); + KASSERT(entry->start == ctx->end, ("end entry start %p", ctx)); + KASSERT(entry->end == ctx->end, ("end entry end %p", ctx)); + KASSERT(entry->free_after == 0, ("end entry free_after%p", ctx)); + KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE, + ("end entry flags %p", ctx)); + RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry); + dmar_gas_free_entry(ctx, entry); + + RB_FOREACH_SAFE(entry, dmar_gas_entries_tree, &ctx->rb_root, entry1) { + KASSERT((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0, + ("non-RMRR entry left %p", ctx)); + RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry); + dmar_gas_free_entry(ctx, entry); + } +} + +struct dmar_gas_match_args { + struct dmar_ctx *ctx; + dmar_gaddr_t asize; + dmar_gaddr_t size; + const struct bus_dma_tag_common *common; + u_int gas_flags; + struct dmar_map_entry *entry; +}; + +static int +dmar_gas_match(struct dmar_gas_match_args *a, struct dmar_map_entry *prev) +{ + struct dmar_map_entry *found, *l, *next; + dmar_gaddr_t bs; + int ret; + + KASSERT(prev->start <= a->common->lowaddr, + ("corrupted tree %jx %jx", (uintmax_t)prev->start, + (uintmax_t)a->common->lowaddr)); + if (prev->end + a->asize <= a->common->lowaddr) { + /* No boundary crossing. */ + if (dmar_test_boundary(prev->end, a->asize, + a->common->boundary)) { + a->entry->start = roundup2(prev->end + DMAR_PAGE_SIZE, + a->common->alignment); + goto finish; + } + + /* + * The prev->end to prev->end + asize region crosses + * the boundary. Check if there is enough space after + * the next boundary after the prev->end. + */ + bs = (prev->end + a->common->boundary) & + ~(a->common->boundary - 1); + if (bs + a->asize <= prev->end + prev->free_after) { + a->entry->start = roundup2(bs, a->common->alignment); + goto finish; + } + + /* + * Not enough space to align at boundary, but allowed + * to split. + */ + if ((a->gas_flags & DMAR_GM_CANSPLIT) != 0) { + a->entry->start = roundup2(prev->end + DMAR_PAGE_SIZE, + a->common->alignment); + a->size = bs - a->entry->start; + goto finish; + } + } + if (prev->free_down < a->asize) + return (ENOMEM); + l = RB_LEFT(prev, rb_entry); + ret = dmar_gas_match(a, l); + if (ret == 0) + return (ret); + l = RB_RIGHT(prev, rb_entry); + return (dmar_gas_match(a, l)); + +finish: + /* + * The prev->end is always aligned on the page size, which + * causes page alignment for the entry->start too. The size + * is checked to be multiple of the page size. + * + * The page sized gap is created between consequent + * allocations to ensure that out-of-bounds accesses fault. + */ + a->entry->end = a->entry->start + a->size; + + next = RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root, prev); + KASSERT(next->start >= a->entry->end && + next->start - a->entry->start >= a->size, + ("dmar_gas_match hole failed %p prev (%jx, %jx) " + "free_after %jx next (%jx, %jx) entry (%jx, %jx)", a->ctx, + (uintmax_t)prev->start, (uintmax_t)prev->end, + (uintmax_t)prev->free_after, + (uintmax_t)next->start, (uintmax_t)next->end, + (uintmax_t)a->entry->start, (uintmax_t)a->entry->end)); + + prev->free_after = a->entry->start - prev->end; + a->entry->free_after = next->start - a->entry->end; + + found = RB_INSERT(dmar_gas_entries_tree, &a->ctx->rb_root, a->entry); + KASSERT(found == NULL, ("found dup %p %p start %jx size %jx", + a->ctx, found, (uintmax_t)a->entry->start, (uintmax_t)a->size)); + a->entry->flags = DMAR_MAP_ENTRY_MAP; + + KASSERT(RB_PREV(dmar_gas_entries_tree, &a->ctx->rb_root, + a->entry) == prev, + ("entry %p prev %p inserted prev %p", a->entry, prev, + RB_PREV(dmar_gas_entries_tree, &a->ctx->rb_root, a->entry))); + KASSERT(RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root, + a->entry) == next, + ("entry %p next %p inserted next %p", a->entry, next, + RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root, a->entry))); + return (0); +} + +static int +dmar_gas_find_space(struct dmar_ctx *ctx, + const struct bus_dma_tag_common *common, dmar_gaddr_t size, + u_int flags, struct dmar_map_entry *entry) +{ + struct dmar_map_entry *next, *prev, find_entry; + struct dmar_gas_match_args a; + + DMAR_CTX_ASSERT_LOCKED(ctx); + KASSERT(entry->flags == 0, ("dirty entry %p %p", ctx, entry)); + KASSERT((size & DMAR_PAGE_MASK) == 0, ("size %jx", (uintmax_t)size)); + + a.ctx = ctx; + a.size = size; + a.asize = (common->alignment <= DMAR_PAGE_SIZE) ? size + + DMAR_PAGE_SIZE : roundup2(size + DMAR_PAGE_SIZE, common->alignment); + a.common = common; + a.gas_flags = flags; + a.entry = entry; + + /* XXXKIB upper hole */ + find_entry.start = find_entry.end = common->lowaddr; + next = RB_NFIND(dmar_gas_entries_tree, &ctx->rb_root, &find_entry); + prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, next); + KASSERT(next != NULL, ("no next %p %jx", ctx, + (uintmax_t)find_entry.start)); + KASSERT(prev != NULL, ("no prev %p %jx", ctx, + (uintmax_t)find_entry.start)); + return (dmar_gas_match(&a, prev)); +} + +static int +dmar_gas_alloc_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry, + u_int flags) +{ + struct dmar_map_entry *found, *next, *prev; + + DMAR_CTX_ASSERT_LOCKED(ctx); + + if ((entry->start & DMAR_PAGE_MASK) != 0 || + (entry->end & DMAR_PAGE_MASK) != 0) + return (EINVAL); + if (entry->start >= entry->end) + return (EINVAL); + if (entry->end >= ctx->end) + return (EINVAL); + + next = RB_NFIND(dmar_gas_entries_tree, &ctx->rb_root, entry); + KASSERT(next != NULL, ("next must be non-null %p %jx", ctx, + (uintmax_t)entry->start)); + prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, next); + /* prev could be NULL */ + + if ((prev != NULL && prev->end < entry->start && + (prev->flags & DMAR_MAP_ENTRY_PLACE) == 0) || + (next != NULL && next->start < entry->end && + (next->flags & DMAR_MAP_ENTRY_PLACE) == 0)) + return (EBUSY); + + if (prev != NULL) { + if (prev->end < entry->start) { + RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, prev); + prev = NULL; + } else { + prev->free_after = entry->start - prev->end; + } + } + if (next != NULL) { + if (next->start < entry->end) { + RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, next); + next = NULL; + entry->free_after = 0; + } else { + entry->free_after = next->start - entry->end; + } + } else { + entry->free_after = ctx->end - entry->end; + } + + found = RB_INSERT(dmar_gas_entries_tree, &ctx->rb_root, entry); + KASSERT(found == NULL, ("found RMRR dup %p %p start %jx end %jx", + ctx, found, (uintmax_t)entry->start, (uintmax_t)entry->end)); + entry->flags = DMAR_MAP_ENTRY_RMRR; + +#ifdef INVARIANTS + struct dmar_map_entry *ip, *in; + ip = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry); + in = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry); + KASSERT(prev == NULL || ip == prev, + ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", + entry, entry->start, entry->end, prev, + prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, + ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); + KASSERT(next == NULL || in == next, + ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", + entry, entry->start, entry->end, next, + next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, + in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); +#endif + + return (0); +} + +void +dmar_gas_free_space(struct dmar_ctx *ctx, struct dmar_map_entry *entry) +{ + struct dmar_map_entry *next, *prev; + + DMAR_CTX_ASSERT_LOCKED(ctx); + KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR | + DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_MAP, + ("permanent entry %p %p", ctx, entry)); + + prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry); + KASSERT(prev != NULL, ("entry %p prev NULL", entry)); + next = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry); + KASSERT(next != NULL, ("entry %p next NULL", entry)); + prev->free_after = next->start - prev->end; + + RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry); + entry->flags &= ~DMAR_MAP_ENTRY_MAP; +} + +static void +dmar_gas_free_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry) +{ + struct dmar_map_entry *next, *prev; + + DMAR_CTX_ASSERT_LOCKED(ctx); + KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR | + DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_RMRR, + ("non-RMRR entry %p %p", ctx, entry)); + + prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry); + next = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry); + RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry); + entry->flags &= ~DMAR_MAP_ENTRY_RMRR; + + if (prev == NULL) { + ctx->first_place->free_after = next->start - + ctx->first_place->end; + RB_INSERT(dmar_gas_entries_tree, &ctx->rb_root, + ctx->first_place); + } else if (next == NULL) { + prev->free_after = ctx->end - prev->end; + RB_INSERT(dmar_gas_entries_tree, &ctx->rb_root, + ctx->last_place); + } +} + +int +dmar_gas_map(struct dmar_ctx *ctx, const struct bus_dma_tag_common *common, + dmar_gaddr_t size, u_int eflags, u_int flags, vm_page_t *ma, + struct dmar_map_entry **res) +{ + struct dmar_map_entry *entry; + int error; + + KASSERT((flags & ~(DMAR_GM_CANWAIT | DMAR_GM_CANSPLIT)) == 0, + ("invalid flags 0x%x", flags)); + + entry = dmar_gas_alloc_entry(ctx, (flags & DMAR_GM_CANWAIT) != 0 ? + DMAR_PGF_WAITOK : 0); + if (entry == NULL) + return (ENOMEM); + DMAR_CTX_LOCK(ctx); + error = dmar_gas_find_space(ctx, common, size, flags, entry); + if (error == ENOMEM) { + DMAR_CTX_UNLOCK(ctx); + dmar_gas_free_entry(ctx, entry); + return (error); + } + KASSERT(error == 0, + ("unexpected error %d from dmar_gas_find_entry", error)); + entry->flags |= eflags; + DMAR_CTX_UNLOCK(ctx); + + error = ctx_map_buf(ctx, entry->start, size, ma, + ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) | + ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) | + ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) | + ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0), + (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0); + if (error == ENOMEM) { + DMAR_CTX_LOCK(ctx); + dmar_gas_free_space(ctx, entry); + DMAR_CTX_UNLOCK(ctx); + dmar_gas_free_entry(ctx, entry); + return (error); + } + KASSERT(error == 0, + ("unexpected error %d from ctx_map_buf", error)); + + *res = entry; + return (0); +} + +int +dmar_gas_map_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry, + u_int eflags, u_int flags, vm_page_t *ma) +{ + int error; + + KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", ctx, + entry, entry->flags)); + KASSERT((flags & ~(DMAR_GM_CANWAIT)) == 0, + ("invalid flags 0x%x", flags)); + + DMAR_CTX_LOCK(ctx); + error = dmar_gas_alloc_region(ctx, entry, flags); + if (error != 0) { + DMAR_CTX_UNLOCK(ctx); + return (error); + } + entry->flags |= eflags; + DMAR_CTX_UNLOCK(ctx); + + error = ctx_map_buf(ctx, entry->start, entry->end - entry->start, ma, + ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) | + ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) | + ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) | + ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0), + (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0); + if (error == ENOMEM) { + DMAR_CTX_LOCK(ctx); + dmar_gas_free_region(ctx, entry); + DMAR_CTX_UNLOCK(ctx); + entry->flags = 0; + return (error); + } + KASSERT(error == 0, + ("unexpected error %d from ctx_map_buf", error)); + + return (0); +} diff --git a/sys/x86/iommu/intel_idpgtbl.c b/sys/x86/iommu/intel_idpgtbl.c new file mode 100644 index 0000000..48e9272 --- /dev/null +++ b/sys/x86/iommu/intel_idpgtbl.c @@ -0,0 +1,796 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, + dmar_gaddr_t size, int flags); +static void ctx_flush_iotlb(struct dmar_ctx *ctx, dmar_gaddr_t base, + dmar_gaddr_t size, int flags); + +/* + * The cache of the identity mapping page tables for the DMARs. Using + * the cache saves significant amount of memory for page tables by + * reusing the page tables, since usually DMARs are identical and have + * the same capabilities. Still, cache records the information needed + * to match DMAR capabilities and page table format, to correctly + * handle different DMARs. + */ + +struct idpgtbl { + dmar_gaddr_t maxaddr; /* Page table covers the guest address + range [0..maxaddr) */ + int pglvl; /* Total page table levels ignoring + superpages */ + int leaf; /* The last materialized page table + level, it is non-zero if superpages + are supported */ + vm_object_t pgtbl_obj; /* The page table pages */ + LIST_ENTRY(idpgtbl) link; +}; + +static struct sx idpgtbl_lock; +SX_SYSINIT(idpgtbl, &idpgtbl_lock, "idpgtbl"); +static LIST_HEAD(, idpgtbl) idpgtbls = LIST_HEAD_INITIALIZER(idpgtbls); +static MALLOC_DEFINE(M_DMAR_IDPGTBL, "dmar_idpgtbl", + "Intel DMAR Identity mappings cache elements"); + +/* + * Build the next level of the page tables for the identity mapping. + * - lvl is the level to build; + * - idx is the index of the page table page in the pgtbl_obj, which is + * being allocated filled now; + * - addr is the starting address in the bus address space which is + * mapped by the page table page. + */ +static void +ctx_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx, + dmar_gaddr_t addr) +{ + vm_page_t m, m1; + dmar_pte_t *pte; + struct sf_buf *sf; + dmar_gaddr_t f, pg_sz; + vm_pindex_t base; + int i; + + VM_OBJECT_ASSERT_LOCKED(tbl->pgtbl_obj); + if (addr >= tbl->maxaddr) + return; + m = dmar_pgalloc(tbl->pgtbl_obj, idx, DMAR_PGF_OBJL | DMAR_PGF_WAITOK | + DMAR_PGF_ZERO); + base = idx * DMAR_NPTEPG + 1; /* Index of the first child page of idx */ + pg_sz = pglvl_page_size(tbl->pglvl, lvl); + if (lvl != tbl->leaf) { + for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) + ctx_idmap_nextlvl(tbl, lvl + 1, base + i, f); + } + VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); + pte = dmar_map_pgtbl(tbl->pgtbl_obj, idx, DMAR_PGF_WAITOK, &sf); + if (lvl == tbl->leaf) { + for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { + if (f >= tbl->maxaddr) + break; + pte[i].pte = (DMAR_PTE_ADDR_MASK & f) | + DMAR_PTE_R | DMAR_PTE_W; + } + } else { + for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { + if (f >= tbl->maxaddr) + break; + m1 = dmar_pgalloc(tbl->pgtbl_obj, base + i, + DMAR_PGF_NOALLOC); + KASSERT(m1 != NULL, ("lost page table page")); + pte[i].pte = (DMAR_PTE_ADDR_MASK & + VM_PAGE_TO_PHYS(m1)) | DMAR_PTE_R | DMAR_PTE_W; + } + } + /* ctx_get_idmap_pgtbl flushes CPU cache if needed. */ + dmar_unmap_pgtbl(sf, true); + VM_OBJECT_WLOCK(tbl->pgtbl_obj); +} + +/* + * Find a ready and compatible identity-mapping page table in the + * cache. If not found, populate the identity-mapping page table for + * the context, up to the maxaddr. The maxaddr byte is allowed to be + * not mapped, which is aligned with the definition of Maxmem as the + * highest usable physical address + 1. If superpages are used, the + * maxaddr is typically mapped. + */ +vm_object_t +ctx_get_idmap_pgtbl(struct dmar_ctx *ctx, dmar_gaddr_t maxaddr) +{ + struct dmar_unit *unit; + struct idpgtbl *tbl; + vm_object_t res; + vm_page_t m; + int leaf, i; + + /* + * First, determine where to stop the paging structures. + */ + for (i = 0; i < ctx->pglvl; i++) { + if (i == ctx->pglvl - 1 || ctx_is_sp_lvl(ctx, i)) { + leaf = i; + break; + } + } + + /* + * Search the cache for a compatible page table. Qualified + * page table must map up to maxaddr, its level must be + * supported by the DMAR and leaf should be equal to the + * calculated value. The later restriction could be lifted + * but I believe it is currently impossible to have any + * deviations for existing hardware. + */ + sx_slock(&idpgtbl_lock); + LIST_FOREACH(tbl, &idpgtbls, link) { + if (tbl->maxaddr >= maxaddr && + dmar_pglvl_supported(ctx->dmar, tbl->pglvl) && + tbl->leaf == leaf) { + res = tbl->pgtbl_obj; + vm_object_reference(res); + sx_sunlock(&idpgtbl_lock); + ctx->pglvl = tbl->pglvl; /* XXXKIB ? */ + goto end; + } + } + + /* + * Not found in cache, relock the cache into exclusive mode to + * be able to add element, and recheck cache again after the + * relock. + */ + sx_sunlock(&idpgtbl_lock); + sx_xlock(&idpgtbl_lock); + LIST_FOREACH(tbl, &idpgtbls, link) { + if (tbl->maxaddr >= maxaddr && + dmar_pglvl_supported(ctx->dmar, tbl->pglvl) && + tbl->leaf == leaf) { + res = tbl->pgtbl_obj; + vm_object_reference(res); + sx_xunlock(&idpgtbl_lock); + ctx->pglvl = tbl->pglvl; /* XXXKIB ? */ + return (res); + } + } + + /* + * Still not found, create new page table. + */ + tbl = malloc(sizeof(*tbl), M_DMAR_IDPGTBL, M_WAITOK); + tbl->pglvl = ctx->pglvl; + tbl->leaf = leaf; + tbl->maxaddr = maxaddr; + tbl->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, + IDX_TO_OFF(pglvl_max_pages(tbl->pglvl)), 0, 0, NULL); + VM_OBJECT_WLOCK(tbl->pgtbl_obj); + ctx_idmap_nextlvl(tbl, 0, 0, 0); + VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); + LIST_INSERT_HEAD(&idpgtbls, tbl, link); + res = tbl->pgtbl_obj; + vm_object_reference(res); + sx_xunlock(&idpgtbl_lock); + +end: + /* + * Table was found or created. + * + * If DMAR does not snoop paging structures accesses, flush + * CPU cache to memory. Note that dmar_unmap_pgtbl() coherent + * argument was possibly invalid at the time of the identity + * page table creation, since DMAR which was passed at the + * time of creation could be coherent, while current DMAR is + * not. + * + * If DMAR cannot look into the chipset write buffer, flush it + * as well. + */ + unit = ctx->dmar; + if (!DMAR_IS_COHERENT(unit)) { + VM_OBJECT_WLOCK(res); + for (m = vm_page_lookup(res, 0); m != NULL; + m = vm_page_next(m)) + pmap_invalidate_cache_pages(&m, 1); + VM_OBJECT_WUNLOCK(res); + } + if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { + DMAR_LOCK(unit); + dmar_flush_write_bufs(unit); + DMAR_UNLOCK(unit); + } + + return (res); +} + +/* + * Return a reference to the identity mapping page table to the cache. + */ +void +put_idmap_pgtbl(vm_object_t obj) +{ + struct idpgtbl *tbl, *tbl1; + vm_object_t rmobj; + + sx_slock(&idpgtbl_lock); + KASSERT(obj->ref_count >= 2, ("lost cache reference")); + vm_object_deallocate(obj); + + /* + * Cache always owns one last reference on the page table object. + * If there is an additional reference, object must stay. + */ + if (obj->ref_count > 1) { + sx_sunlock(&idpgtbl_lock); + return; + } + + /* + * Cache reference is the last, remove cache element and free + * page table object, returning the page table pages to the + * system. + */ + sx_sunlock(&idpgtbl_lock); + sx_xlock(&idpgtbl_lock); + LIST_FOREACH_SAFE(tbl, &idpgtbls, link, tbl1) { + rmobj = tbl->pgtbl_obj; + if (rmobj->ref_count == 1) { + LIST_REMOVE(tbl, link); + atomic_subtract_int(&dmar_tbl_pagecnt, + rmobj->resident_page_count); + vm_object_deallocate(rmobj); + free(tbl, M_DMAR_IDPGTBL); + } + } + sx_xunlock(&idpgtbl_lock); +} + +/* + * The core routines to map and unmap host pages at the given guest + * address. Support superpages. + */ + +/* + * Index of the pte for the guest address base in the page table at + * the level lvl. + */ +static int +ctx_pgtbl_pte_off(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl) +{ + + base >>= DMAR_PAGE_SHIFT + (ctx->pglvl - lvl - 1) * DMAR_NPTEPGSHIFT; + return (base & DMAR_PTEMASK); +} + +/* + * Returns the page index of the page table page in the page table + * object, which maps the given address base at the page table level + * lvl. + */ +static vm_pindex_t +ctx_pgtbl_get_pindex(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl) +{ + vm_pindex_t idx, pidx; + int i; + + KASSERT(lvl >= 0 && lvl < ctx->pglvl, ("wrong lvl %p %d", ctx, lvl)); + + for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) + idx = ctx_pgtbl_pte_off(ctx, base, i) + pidx * DMAR_NPTEPG + 1; + return (idx); +} + +static dmar_pte_t * +ctx_pgtbl_map_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags, + vm_pindex_t *idxp, struct sf_buf **sf) +{ + vm_page_t m; + struct sf_buf *sfp; + dmar_pte_t *pte, *ptep; + vm_pindex_t idx, idx1; + + DMAR_CTX_ASSERT_PGLOCKED(ctx); + KASSERT((flags & DMAR_PGF_OBJL) != 0, ("lost PGF_OBJL")); + + idx = ctx_pgtbl_get_pindex(ctx, base, lvl); + if (*sf != NULL && idx == *idxp) { + pte = (dmar_pte_t *)sf_buf_kva(*sf); + } else { + if (*sf != NULL) + dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar)); + *idxp = idx; +retry: + pte = dmar_map_pgtbl(ctx->pgtbl_obj, idx, flags, sf); + if (pte == NULL) { + KASSERT(lvl > 0, ("lost root page table page %p", ctx)); + /* + * Page table page does not exists, allocate + * it and create pte in the up level. + */ + m = dmar_pgalloc(ctx->pgtbl_obj, idx, flags | + DMAR_PGF_ZERO); + if (m == NULL) + return (NULL); + + /* + * Prevent potential free while pgtbl_obj is + * unlocked in the recursive call to + * ctx_pgtbl_map_pte(), if other thread did + * pte write and clean while the lock if + * dropped. + */ + m->wire_count++; + + sfp = NULL; + ptep = ctx_pgtbl_map_pte(ctx, base, lvl - 1, flags, + &idx1, &sfp); + if (ptep == NULL) { + KASSERT(m->pindex != 0, + ("loosing root page %p", ctx)); + m->wire_count--; + dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags); + return (NULL); + } + dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W | + VM_PAGE_TO_PHYS(m)); + sf_buf_page(sfp)->wire_count += 1; + m->wire_count--; + dmar_unmap_pgtbl(sfp, DMAR_IS_COHERENT(ctx->dmar)); + /* Only executed once. */ + goto retry; + } + } + pte += ctx_pgtbl_pte_off(ctx, base, lvl); + return (pte); +} + +static int +ctx_map_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, + vm_page_t *ma, uint64_t pflags, int flags) +{ + struct dmar_unit *unit; + dmar_pte_t *pte; + struct sf_buf *sf; + dmar_gaddr_t pg_sz, base1, size1; + vm_pindex_t pi, c, idx, run_sz; + int lvl; + bool superpage; + + DMAR_CTX_ASSERT_PGLOCKED(ctx); + + base1 = base; + size1 = size; + flags |= DMAR_PGF_OBJL; + TD_PREP_PINNED_ASSERT; + + for (sf = NULL, pi = 0; size > 0; base += pg_sz, size -= pg_sz, + pi += run_sz) { + for (lvl = 0, c = 0, superpage = false;; lvl++) { + pg_sz = ctx_page_size(ctx, lvl); + run_sz = pg_sz >> DMAR_PAGE_SHIFT; + if (lvl == ctx->pglvl - 1) + break; + /* + * Check if the current base suitable for the + * superpage mapping. First, verify the level. + */ + if (!ctx_is_sp_lvl(ctx, lvl)) + continue; + /* + * Next, look at the size of the mapping and + * alignment of both guest and host addresses. + */ + if (size < pg_sz || (base & (pg_sz - 1)) != 0 || + (VM_PAGE_TO_PHYS(ma[pi]) & (pg_sz - 1)) != 0) + continue; + /* All passed, check host pages contiguouty. */ + if (c == 0) { + for (c = 1; c < run_sz; c++) { + if (VM_PAGE_TO_PHYS(ma[pi + c]) != + VM_PAGE_TO_PHYS(ma[pi + c - 1]) + + PAGE_SIZE) + break; + } + } + if (c >= run_sz) { + superpage = true; + break; + } + } + KASSERT(size >= pg_sz, + ("mapping loop overflow %p %jx %jx %jx", ctx, + (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); + pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf); + if (pte == NULL) { + KASSERT((flags & DMAR_PGF_WAITOK) == 0, + ("failed waitable pte alloc %p", ctx)); + if (sf != NULL) { + dmar_unmap_pgtbl(sf, + DMAR_IS_COHERENT(ctx->dmar)); + } + ctx_unmap_buf_locked(ctx, base1, base - base1, flags); + TD_PINNED_ASSERT; + return (ENOMEM); + } + dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags | + (superpage ? DMAR_PTE_SP : 0)); + sf_buf_page(sf)->wire_count += 1; + } + if (sf != NULL) + dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar)); + DMAR_CTX_PGUNLOCK(ctx); + unit = ctx->dmar; + if ((unit->hw_cap & DMAR_CAP_CM) != 0) + ctx_flush_iotlb(ctx, base1, size1, flags); + else if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { + /* See 11.1 Write Buffer Flushing. */ + DMAR_LOCK(unit); + dmar_flush_write_bufs(unit); + DMAR_UNLOCK(unit); + } + + TD_PINNED_ASSERT; + return (0); +} + +int +ctx_map_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, + vm_page_t *ma, uint64_t pflags, int flags) +{ + + KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0, + ("modifying idmap pagetable ctx %p", ctx)); + KASSERT((base & DMAR_PAGE_MASK) == 0, + ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base, + (uintmax_t)size)); + KASSERT((size & DMAR_PAGE_MASK) == 0, + ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base, + (uintmax_t)size)); + KASSERT(size > 0, ("zero size %p %jx %jx", ctx, (uintmax_t)base, + (uintmax_t)size)); + KASSERT(base < (1ULL << ctx->agaw), + ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, + (uintmax_t)size, ctx->agaw)); + KASSERT(base + size < (1ULL << ctx->agaw), + ("end too high %p %jx %jx agaw %jx", ctx, (uintmax_t)base, + (uintmax_t)size, ctx->agaw)); + KASSERT(base + size > base, + ("size overflow %p %jx %jx", ctx, (uintmax_t)base, + (uintmax_t)size)); + KASSERT((pflags & (DMAR_PTE_R | DMAR_PTE_W)) != 0, + ("neither read nor write %jx", (uintmax_t)pflags)); + KASSERT((pflags & ~(DMAR_PTE_R | DMAR_PTE_W | DMAR_PTE_SNP | + DMAR_PTE_TM)) == 0, + ("invalid pte flags %jx", (uintmax_t)pflags)); + KASSERT((pflags & DMAR_PTE_SNP) == 0 || + (ctx->dmar->hw_ecap & DMAR_ECAP_SC) != 0, + ("PTE_SNP for dmar without snoop control %p %jx", + ctx, (uintmax_t)pflags)); + KASSERT((pflags & DMAR_PTE_TM) == 0 || + (ctx->dmar->hw_ecap & DMAR_ECAP_DI) != 0, + ("PTE_TM for dmar without DIOTLB %p %jx", + ctx, (uintmax_t)pflags)); + KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); + + DMAR_CTX_PGLOCK(ctx); + return (ctx_map_buf_locked(ctx, base, size, ma, pflags, flags)); +} + +static void ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, + int lvl, int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_fs); + +static void +ctx_free_pgtbl_pde(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags) +{ + struct sf_buf *sf; + dmar_pte_t *pde; + vm_pindex_t idx; + + sf = NULL; + pde = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf); + ctx_unmap_clear_pte(ctx, base, lvl, flags, pde, &sf, true); +} + +static void +ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, + int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_sf) +{ + vm_page_t m; + + dmar_pte_clear(&pte->pte); + m = sf_buf_page(*sf); + if (free_sf) { + dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar)); + *sf = NULL; + } + m->wire_count--; + if (m->wire_count != 0) + return; + KASSERT(lvl != 0, + ("lost reference (lvl) on root pg ctx %p base %jx lvl %d", + ctx, (uintmax_t)base, lvl)); + KASSERT(m->pindex != 0, + ("lost reference (idx) on root pg ctx %p base %jx lvl %d", + ctx, (uintmax_t)base, lvl)); + dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags); + ctx_free_pgtbl_pde(ctx, base, lvl - 1, flags); +} + +/* + * Assumes that the unmap is never partial. + */ +static int +ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, + dmar_gaddr_t size, int flags) +{ + dmar_pte_t *pte; + struct sf_buf *sf; + vm_pindex_t idx; + dmar_gaddr_t pg_sz, base1, size1; + int lvl; + + DMAR_CTX_ASSERT_PGLOCKED(ctx); + if (size == 0) + return (0); + + KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0, + ("modifying idmap pagetable ctx %p", ctx)); + KASSERT((base & DMAR_PAGE_MASK) == 0, + ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base, + (uintmax_t)size)); + KASSERT((size & DMAR_PAGE_MASK) == 0, + ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base, + (uintmax_t)size)); + KASSERT(base < (1ULL << ctx->agaw), + ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, + (uintmax_t)size, ctx->agaw)); + KASSERT(base + size < (1ULL << ctx->agaw), + ("end too high %p %jx %jx agaw %jx", ctx, (uintmax_t)base, + (uintmax_t)size, ctx->agaw)); + KASSERT(base + size > base, + ("size overflow %p %jx %jx", ctx, (uintmax_t)base, + (uintmax_t)size)); + KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); + + base1 = base; + size1 = size; + flags |= DMAR_PGF_OBJL; + TD_PREP_PINNED_ASSERT; + + for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) { + for (lvl = 0; lvl < ctx->pglvl; lvl++) { + if (lvl != ctx->pglvl - 1 && !ctx_is_sp_lvl(ctx, lvl)) + continue; + pg_sz = ctx_page_size(ctx, lvl); + if (pg_sz > size) + continue; + pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags, + &idx, &sf); + KASSERT(pte != NULL, + ("sleeping or page missed %p %jx %d 0x%x", + ctx, (uintmax_t)base, lvl, flags)); + if ((pte->pte & DMAR_PTE_SP) != 0 || + lvl == ctx->pglvl - 1) { + ctx_unmap_clear_pte(ctx, base, lvl, flags, + pte, &sf, false); + break; + } + } + KASSERT(size >= pg_sz, + ("unmapping loop overflow %p %jx %jx %jx", ctx, + (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); + } + if (sf != NULL) + dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar)); + DMAR_CTX_PGUNLOCK(ctx); + ctx_flush_iotlb(ctx, base1, size1, flags); + /* + * See 11.1 Write Buffer Flushing for an explanation why RWBF + * can be ignored there. + */ + + TD_PINNED_ASSERT; + return (0); +} + +int +ctx_unmap_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, + int flags) +{ + + DMAR_CTX_PGLOCK(ctx); + return (ctx_unmap_buf_locked(ctx, base, size, flags)); +} + +int +ctx_alloc_pgtbl(struct dmar_ctx *ctx) +{ + vm_page_t m; + + KASSERT(ctx->pgtbl_obj == NULL, ("already initialized %p", ctx)); + + ctx->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, + IDX_TO_OFF(pglvl_max_pages(ctx->pglvl)), 0, 0, NULL); + DMAR_CTX_PGLOCK(ctx); + m = dmar_pgalloc(ctx->pgtbl_obj, 0, DMAR_PGF_WAITOK | + DMAR_PGF_ZERO | DMAR_PGF_OBJL); + /* No implicit free of the top level page table page. */ + m->wire_count = 1; + DMAR_CTX_PGUNLOCK(ctx); + return (0); +} + +void +ctx_free_pgtbl(struct dmar_ctx *ctx) +{ + vm_object_t obj; + vm_page_t m; + + obj = ctx->pgtbl_obj; + if (obj == NULL) { + KASSERT((ctx->dmar->hw_ecap & DMAR_ECAP_PT) != 0 && + (ctx->flags & DMAR_CTX_IDMAP) != 0, + ("lost pagetable object ctx %p", ctx)); + return; + } + DMAR_CTX_ASSERT_PGLOCKED(ctx); + ctx->pgtbl_obj = NULL; + + if ((ctx->flags & DMAR_CTX_IDMAP) != 0) { + put_idmap_pgtbl(obj); + ctx->flags &= ~DMAR_CTX_IDMAP; + return; + } + + /* Obliterate wire_counts */ + VM_OBJECT_ASSERT_WLOCKED(obj); + for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) + m->wire_count = 0; + VM_OBJECT_WUNLOCK(obj); + vm_object_deallocate(obj); +} + +static inline uint64_t +ctx_wait_iotlb_flush(struct dmar_unit *unit, uint64_t wt, int iro) +{ + uint64_t iotlbr; + + dmar_write8(unit, iro + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT | + DMAR_IOTLB_DR | DMAR_IOTLB_DW | wt); + for (;;) { + iotlbr = dmar_read8(unit, iro + DMAR_IOTLB_REG_OFF); + if ((iotlbr & DMAR_IOTLB_IVT) == 0) + break; + cpu_spinwait(); + } + return (iotlbr); +} + +/* + * flags is only intended for PGF_WAITOK, to disallow queued + * invalidation. + */ +static void +ctx_flush_iotlb(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, + int flags) +{ + struct dmar_unit *unit; + dmar_gaddr_t isize; + uint64_t iotlbr; + int am, iro; + + unit = ctx->dmar; +#if 0 + if ((unit->hw_ecap & DMAR_ECAP_QI) != 0 && + (flags & DMAR_PGF_WAITOK) != 0) { + /* + * XXXKIB: There, a queued invalidation interface + * could be used. But since queued and registered + * interfaces cannot be used simultaneously, and we + * must use sleep-less (i.e. register) interface when + * DMAR_PGF_WAITOK is not specified, only register + * interface is suitable. + */ + return; + } +#endif + iro = DMAR_ECAP_IRO(unit->hw_ecap) * 16; + DMAR_LOCK(unit); + if ((unit->hw_cap & DMAR_CAP_PSI) == 0 || size > 2 * 1024 * 1024) { + iotlbr = ctx_wait_iotlb_flush(unit, DMAR_IOTLB_IIRG_DOM | + DMAR_IOTLB_DID(ctx->domain), iro); + KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != + DMAR_IOTLB_IAIG_INVLD, + ("unit %p invalidation failed %jx", (uintmax_t)iotlbr)); + } else { + for (; size > 0; base += isize, size -= isize) { + for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) { + isize = 1ULL << (am + DMAR_PAGE_SHIFT); + if ((base & (isize - 1)) == 0 && size >= isize) + break; + if (am == 0) + break; + } + dmar_write8(unit, iro, base | am); + iotlbr = ctx_wait_iotlb_flush(unit, + DMAR_IOTLB_IIRG_PAGE | DMAR_IOTLB_DID(ctx->domain), + iro); + KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != + DMAR_IOTLB_IAIG_INVLD, + ("unit %p invalidation failed %jx %jx %d", + (uintmax_t)iotlbr, (uintmax_t)base, am)); + /* + * Any non-page granularity covers whole guest + * address space for the domain. + */ + if ((iotlbr & DMAR_IOTLB_IAIG_MASK) != + DMAR_IOTLB_IAIG_PAGE) + break; + } + } + DMAR_UNLOCK(unit); +} diff --git a/sys/x86/iommu/intel_reg.h b/sys/x86/iommu/intel_reg.h new file mode 100644 index 0000000..ba4a233 --- /dev/null +++ b/sys/x86/iommu/intel_reg.h @@ -0,0 +1,294 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __X86_IOMMU_INTEL_REG_H +#define __X86_IOMMU_INTEL_REG_H + +#define DMAR_PAGE_SIZE PAGE_SIZE +#define DMAR_PAGE_MASK (DMAR_PAGE_SIZE - 1) +#define DMAR_PAGE_SHIFT PAGE_SHIFT +#define DMAR_NPTEPG (DMAR_PAGE_SIZE / sizeof(dmar_pte_t)) +#define DMAR_NPTEPGSHIFT 9 +#define DMAR_PTEMASK (DMAR_NPTEPG - 1) + +typedef struct dmar_root_entry { + uint64_t r1; + uint64_t r2; +} dmar_root_entry_t; +#define DMAR_ROOT_R1_P 1 /* Present */ +#define DMAR_ROOT_R1_CTP_MASK 0xfffffffffffff000 /* Mask for Context-Entry + Table Pointer */ + +#define DMAR_CTX_CNT (DMAR_PAGE_SIZE / sizeof(dmar_root_entry_t)) + +typedef struct dmar_ctx_entry { + uint64_t ctx1; + uint64_t ctx2; +} dmar_ctx_entry_t; +#define DMAR_CTX1_P 1 /* Present */ +#define DMAR_CTX1_FPD 2 /* Fault Processing Disable */ + /* Translation Type: */ +#define DMAR_CTX1_T_UNTR 0 /* only Untranslated */ +#define DMAR_CTX1_T_TR 4 /* both Untranslated + and Translated */ +#define DMAR_CTX1_T_PASS 8 /* Pass-Through */ +#define DMAR_CTX1_ASR_MASK 0xfffffffffffff000 /* Mask for the Address + Space Root */ +#define DMAR_CTX2_AW_2LVL 0 /* 2-level page tables */ +#define DMAR_CTX2_AW_3LVL 1 /* 3-level page tables */ +#define DMAR_CTX2_AW_4LVL 2 /* 4-level page tables */ +#define DMAR_CTX2_AW_5LVL 3 /* 5-level page tables */ +#define DMAR_CTX2_AW_6LVL 4 /* 6-level page tables */ +#define DMAR_CTX2_DID(x) ((x) << 8) /* Domain Identifier */ + +typedef struct dmar_pte { + uint64_t pte; +} dmar_pte_t; +#define DMAR_PTE_R 1 /* Read */ +#define DMAR_PTE_W (1 << 1) /* Write */ +#define DMAR_PTE_SP (1 << 7) /* Super Page */ +#define DMAR_PTE_SNP (1 << 11) /* Snoop Behaviour */ +#define DMAR_PTE_ADDR_MASK 0xffffffffff000 /* Address Mask */ +#define DMAR_PTE_TM (1ULL << 62) /* Transient Mapping */ + +/* Version register */ +#define DMAR_VER_REG 0 +#define DMAR_MAJOR_VER(x) (((x) >> 4) & 0xf) +#define DMAR_MINOR_VER(x) ((x) & 0xf) + +/* Capabilities register */ +#define DMAR_CAP_REG 0x8 +#define DMAR_CAP_DRD (1ULL << 55) /* DMA Read Draining */ +#define DMAR_CAP_DWD (1ULL << 54) /* DMA Write Draining */ +#define DMAR_CAP_MAMV(x) ((u_int)(((x) >> 48) & 0x3f)) + /* Maximum Address Mask */ +#define DMAR_CAP_NFR(x) ((u_int)(((x) >> 40) & 0xff) + 1) + /* Num of Fault-recording regs */ +#define DMAR_CAP_PSI (1ULL << 39) /* Page Selective Invalidation */ +#define DMAR_CAP_SPS(x) ((u_int)(((x) >> 34) & 0xf)) /* Super-Page Support */ +#define DMAR_CAP_SPS_2M 0x1 +#define DMAR_CAP_SPS_1G 0x2 +#define DMAR_CAP_SPS_512G 0x4 +#define DMAR_CAP_SPS_1T 0x8 +#define DMAR_CAP_FRO(x) ((u_int)(((x) >> 24) & 0x1ff)) + /* Fault-recording reg offset */ +#define DMAR_CAP_ISOCH (1 << 23) /* Isochrony */ +#define DMAR_CAP_ZLR (1 << 22) /* Zero-length reads */ +#define DMAR_CAP_MGAW(x) ((u_int)(((x) >> 16) & 0x3f)) + /* Max Guest Address Width */ +#define DMAR_CAP_SAGAW(x) ((u_int)(((x) >> 8) & 0x1f)) + /* Adjusted Guest Address Width */ +#define DMAR_CAP_SAGAW_2LVL 0x01 +#define DMAR_CAP_SAGAW_3LVL 0x02 +#define DMAR_CAP_SAGAW_4LVL 0x04 +#define DMAR_CAP_SAGAW_5LVL 0x08 +#define DMAR_CAP_SAGAW_6LVL 0x10 +#define DMAR_CAP_CM (1 << 7) /* Caching mode */ +#define DMAR_CAP_PHMR (1 << 6) /* Protected High-mem Region */ +#define DMAR_CAP_PLMR (1 << 5) /* Protected Low-mem Region */ +#define DMAR_CAP_RWBF (1 << 4) /* Required Write-Buffer Flushing */ +#define DMAR_CAP_AFL (1 << 3) /* Advanced Fault Logging */ +#define DMAR_CAP_ND(x) ((u_int)((x) & 0x3)) /* Number of domains */ + +/* Extended Capabilities register */ +#define DMAR_ECAP_REG 0x10 +#define DMAR_ECAP_MHMV(x) ((u_int)(((x) >> 20) & 0xf)) + /* Maximum Handle Mask Value */ +#define DMAR_ECAP_IRO(x) ((u_int)(((x) >> 8) & 0x1f)) + /* IOTLB Register Offset */ +#define DMAR_ECAP_SC (1 << 7) /* Snoop Control */ +#define DMAR_ECAP_PT (1 << 6) /* Pass Through */ +#define DMAR_ECAP_EIM (1 << 4) /* Extended Interrupt Mode */ +#define DMAR_ECAP_IR (1 << 3) /* Interrupt Remapping */ +#define DMAR_ECAP_DI (1 << 2) /* Device IOTLB */ +#define DMAR_ECAP_QI (1 << 1) /* Queued Invalidation */ +#define DMAR_ECAP_C (1 << 0) /* Coherency */ + +/* Global Command register */ +#define DMAR_GCMD_REG 0x18 +#define DMAR_GCMD_TE (1 << 31) /* Translation Enable */ +#define DMAR_GCMD_SRTP (1 << 30) /* Set Root Table Pointer */ +#define DMAR_GCMD_SFL (1 << 29) /* Set Fault Log */ +#define DMAR_GCMD_EAFL (1 << 28) /* Enable Advanced Fault Logging */ +#define DMAR_GCMD_WBF (1 << 27) /* Write Buffer Flush */ +#define DMAR_GCMD_QIE (1 << 26) /* Queued Invalidation Enable */ +#define DMAR_GCMD_IRE (1 << 25) /* Interrupt Remapping Enable */ +#define DMAR_GCMD_SIRTP (1 << 24) /* Set Interrupt Remap Table Pointer */ +#define DMAR_GCMD_CFI (1 << 23) /* Compatibility Format Interrupt */ + +/* Global Status register */ +#define DMAR_GSTS_REG 0x1c +#define DMAR_GSTS_TES (1 << 31) /* Translation Enable Status */ +#define DMAR_GSTS_RTPS (1 << 30) /* Root Table Pointer Status */ +#define DMAR_GSTS_FLS (1 << 29) /* Fault Log Status */ +#define DMAR_GSTS_AFLS (1 << 28) /* Advanced Fault Logging Status */ +#define DMAR_GSTS_WBFS (1 << 27) /* Write Buffer Flush Status */ +#define DMAR_GSTS_QIES (1 << 26) /* Queued Invalidation Enable Status */ +#define DMAR_GSTS_IRES (1 << 25) /* Interrupt Remapping Enable Status */ +#define DMAR_GSTS_IRTPS (1 << 24) /* Interrupt Remapping Table + Pointer Status */ +#define DMAR_GSTS_CFIS (1 << 23) /* Compatibility Format + Interrupt Status */ + +/* Root-Entry Table Address register */ +#define DMAR_RTADDR_REG 0x20 + +/* Context Command register */ +#define DMAR_CCMD_REG 0x28 +#define DMAR_CCMD_ICC (1ULL << 63) /* Invalidate Context-Cache */ +#define DMAR_CCMD_ICC32 (1 << 31) +#define DMAR_CCMD_CIRG_MASK (0x3ULL << 61) /* Context Invalidation + Request Granularity */ +#define DMAR_CCMD_CIRG_GLOB (0x1ULL << 61) /* Global */ +#define DMAR_CCMD_CIRG_DOM (0x2ULL << 61) /* Domain */ +#define DMAR_CCMD_CIRG_DEV (0x3ULL << 61) /* Device */ +#define DMAR_CCMD_CAIG(x) (((x) >> 59) & 0x3) /* Context Actual + Invalidation Granularity */ +#define DMAR_CCMD_CAIG_GLOB 0x1 /* Global */ +#define DMAR_CCMD_CAIG_DOM 0x2 /* Domain */ +#define DMAR_CCMD_CAIG_DEV 0x3 /* Device */ +#define DMAR_CCMD_FM (0x3UUL << 32) /* Function Mask */ +#define DMAR_CCMD_SID(x) (((x) & 0xffff) << 16) /* Source-ID */ +#define DMAR_CCMD_DID(x) ((x) & 0xffff) /* Domain-ID */ + +/* Invalidate Address register */ +#define DMAR_IVA_REG_OFF 0 +#define DMAR_IVA_IH (1 << 6) /* Invalidation Hint */ +#define DMAR_IVA_AM(x) ((x) & 0x1f) /* Address Mask */ +#define DMAR_IVA_ADDR(x) ((x) & ~0xfffULL) /* Address */ + +/* IOTLB Invalidate register */ +#define DMAR_IOTLB_REG_OFF 0x8 +#define DMAR_IOTLB_IVT (1ULL << 63) /* Invalidate IOTLB */ +#define DMAR_IOTLB_IVT32 (1 << 31) +#define DMAR_IOTLB_IIRG_MASK (0x3ULL << 60) /* Invalidation Request + Granularity */ +#define DMAR_IOTLB_IIRG_GLB (0x1ULL << 60) /* Global */ +#define DMAR_IOTLB_IIRG_DOM (0x2ULL << 60) /* Domain-selective */ +#define DMAR_IOTLB_IIRG_PAGE (0x3ULL << 60) /* Page-selective */ +#define DMAR_IOTLB_IAIG_MASK (0x3ULL << 57) /* Actual Invalidation + Granularity */ +#define DMAR_IOTLB_IAIG_INVLD 0 /* Hw detected error */ +#define DMAR_IOTLB_IAIG_GLB (0x1ULL << 57) /* Global */ +#define DMAR_IOTLB_IAIG_DOM (0x2ULL << 57) /* Domain-selective */ +#define DMAR_IOTLB_IAIG_PAGE (0x3ULL << 57) /* Page-selective */ +#define DMAR_IOTLB_DR (0x1ULL << 49) /* Drain Reads */ +#define DMAR_IOTLB_DW (0x1ULL << 48) /* Drain Writes */ +#define DMAR_IOTLB_DID(x) (((uint64_t)(x) & 0xffff) << 32) /* Domain Id */ + +/* Fault Status register */ +#define DMAR_FSTS_REG 0x34 +#define DMAR_FSTS_FRI(x) (((x) >> 8) & 0xff) /* Fault Record Index */ +#define DMAR_FSTS_ITE (1 << 6) /* Invalidation Time-out */ +#define DMAR_FSTS_ICE (1 << 5) /* Invalidation Completion */ +#define DMAR_FSTS_IQE (1 << 4) /* Invalidation Queue */ +#define DMAR_FSTS_APF (1 << 3) /* Advanced Pending Fault */ +#define DMAR_FSTS_AFO (1 << 2) /* Advanced Fault Overflow */ +#define DMAR_FSTS_PPF (1 << 1) /* Primary Pending Fault */ +#define DMAR_FSTS_PFO 1 /* Fault Overflow */ + +/* Fault Event Control register */ +#define DMAR_FECTL_REG 0x38 +#define DMAR_FECTL_IM (1 << 31) /* Interrupt Mask */ +#define DMAR_FECTL_IP (1 << 30) /* Interrupt Pending */ + +/* Fault Event Data register */ +#define DMAR_FEDATA_REG 0x3c + +/* Fault Event Address register */ +#define DMAR_FEADDR_REG 0x40 + +/* Fault Event Upper Address register */ +#define DMAR_FEUADDR_REG 0x44 + +/* Advanced Fault Log register */ +#define DMAR_AFLOG_REG 0x58 + +/* Fault Recording Register, also usable for Advanced Fault Log records */ +#define DMAR_FRCD2_F (1ULL << 63) /* Fault */ +#define DMAR_FRCD2_F32 (1 << 31) +#define DMAR_FRCD2_T(x) ((int)((x >> 62) & 1)) /* Type */ +#define DMAR_FRCD2_T_W 0 /* Write request */ +#define DMAR_FRCD2_T_R 1 /* Read or AtomicOp */ +#define DMAR_FRCD2_AT(x) ((int)((x >> 60) & 0x3)) /* Address Type */ +#define DMAR_FRCD2_FR(x) ((int)((x >> 32) & 0xff)) /* Fault Reason */ +#define DMAR_FRCD2_SID(x) ((int)(x & 0xffff)) /* Source Identifier */ +#define DMAR_FRCS1_FI_MASK 0xffffffffff000 /* Fault Info, Address Mask */ + +/* Protected Memory Enable register */ +#define DMAR_PMEN_REG 0x64 +#define DMAR_PMEN_EPM (1 << 31) /* Enable Protected Memory */ +#define DMAR_PMEN_PRS 1 /* Protected Region Status */ + +/* Protected Low-Memory Base register */ +#define DMAR_PLMBASE_REG 0x68 + +/* Protected Low-Memory Limit register */ +#define DMAR_PLMLIMIT_REG 0x6c + +/* Protected High-Memory Base register */ +#define DMAR_PHMBASE_REG 0x70 + +/* Protected High-Memory Limit register */ +#define DMAR_PHMLIMIT_REG 0x78 + +/* Invalidation Queue Head register */ +#define DMAR_IQH_REG 0x80 + +/* Invalidation Queue Tail register */ +#define DMAR_IQT_REG 0x88 + +/* Invalidation Queue Address register */ +#define DMAR_IQA_REG 0x90 + + /* Invalidation Completion Status register */ +#define DMAR_ICS_REG 0x9c +#define DMAR_ICS_IWC 1 /* Invalidation Wait + Descriptor Complete */ + +/* Invalidation Event Control register */ +#define DMAR_IECTL_REG 0xa0 +#define DMAR_IECTL_IM (1 << 31) /* Interrupt Mask */ +#define DMAR_IECTL_IP (1 << 30) /* Interrupt Pending */ + +/* Invalidation Event Data register */ +#define DMAR_IEDATA_REG 0xa4 + +/* Invalidation Event Address register */ +#define DMAR_IEADDR_REG 0xa8 + +/* Invalidation Event Upper Address register */ +#define DMAR_IEUADDR_REG 0xac + +/* Interrupt Remapping Table Address register */ +#define DMAR_IRTA_REG 0xb8 + +#endif diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c new file mode 100644 index 0000000..4e1d157 --- /dev/null +++ b/sys/x86/iommu/intel_utils.c @@ -0,0 +1,498 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +u_int +dmar_nd2mask(u_int nd) +{ + static const u_int masks[] = { + 0x000f, /* nd == 0 */ + 0x002f, /* nd == 1 */ + 0x00ff, /* nd == 2 */ + 0x02ff, /* nd == 3 */ + 0x0fff, /* nd == 4 */ + 0x2fff, /* nd == 5 */ + 0xffff, /* nd == 6 */ + 0x0000, /* nd == 7 reserved */ + }; + + KASSERT(nd <= 6, ("number of domains %d", nd)); + return (masks[nd]); +} + +static const struct sagaw_bits_tag { + int agaw; + int cap; + int awlvl; + int pglvl; +} sagaw_bits[] = { + {.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL, + .pglvl = 2}, + {.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL, + .pglvl = 3}, + {.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL, + .pglvl = 4}, + {.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL, + .pglvl = 5}, + {.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL, + .pglvl = 6} +}; +#define SIZEOF_SAGAW_BITS (sizeof(sagaw_bits) / sizeof(sagaw_bits[0])) + +bool +dmar_pglvl_supported(struct dmar_unit *unit, int pglvl) +{ + int i; + + for (i = 0; i < SIZEOF_SAGAW_BITS; i++) { + if (sagaw_bits[i].pglvl != pglvl) + continue; + if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0) + return (true); + } + return (false); +} + +int +ctx_set_agaw(struct dmar_ctx *ctx, int mgaw) +{ + int sagaw, i; + + ctx->mgaw = mgaw; + sagaw = DMAR_CAP_SAGAW(ctx->dmar->hw_cap); + for (i = 0; i < SIZEOF_SAGAW_BITS; i++) { + if (sagaw_bits[i].agaw >= mgaw) { + ctx->agaw = sagaw_bits[i].agaw; + ctx->pglvl = sagaw_bits[i].pglvl; + ctx->awlvl = sagaw_bits[i].awlvl; + return (0); + } + } + device_printf(ctx->dmar->dev, + "context request mgaw %d for pci%d:%d:%d:%d, " + "no agaw found, sagaw %x\n", mgaw, ctx->dmar->segment, ctx->bus, + ctx->slot, ctx->func, sagaw); + return (EINVAL); +} + +/* + * Find a best fit mgaw for the given maxaddr: + * - if allow_less is false, must find sagaw which maps all requested + * addresses (used by identity mappings); + * - if allow_less is true, and no supported sagaw can map all requested + * address space, accept the biggest sagaw, whatever is it. + */ +int +dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less) +{ + int i; + + for (i = 0; i < SIZEOF_SAGAW_BITS; i++) { + if ((1ULL << sagaw_bits[i].agaw) >= maxaddr && + (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0) + break; + } + if (allow_less && i == SIZEOF_SAGAW_BITS) { + do { + i--; + } while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) + == 0); + } + if (i < SIZEOF_SAGAW_BITS) + return (sagaw_bits[i].agaw); + KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d", + (uintmax_t) maxaddr, allow_less)); + return (-1); +} + +/* + * Calculate the total amount of page table pages needed to map the + * whole bus address space on the context with the selected agaw. + */ +vm_pindex_t +pglvl_max_pages(int pglvl) +{ + vm_pindex_t res; + int i; + + for (res = 0, i = pglvl; i > 0; i--) { + res *= DMAR_NPTEPG; + res++; + } + return (res); +} + +/* + * Return true if the page table level lvl supports the superpage for + * the context ctx. + */ +int +ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl) +{ + int alvl, cap_sps; + static const int sagaw_sp[] = { + DMAR_CAP_SPS_2M, + DMAR_CAP_SPS_1G, + DMAR_CAP_SPS_512G, + DMAR_CAP_SPS_1T + }; + + alvl = ctx->pglvl - lvl - 1; + cap_sps = DMAR_CAP_SPS(ctx->dmar->hw_cap); + return (alvl < sizeof(sagaw_sp) / sizeof(sagaw_sp[0]) && + (sagaw_sp[alvl] & cap_sps) != 0); +} + +dmar_gaddr_t +pglvl_page_size(int total_pglvl, int lvl) +{ + int rlvl; + static const dmar_gaddr_t pg_sz[] = { + (dmar_gaddr_t)DMAR_PAGE_SIZE, + (dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT, + (dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT), + (dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT), + (dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT), + (dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT) + }; + + KASSERT(lvl >= 0 && lvl < total_pglvl, + ("total %d lvl %d", total_pglvl, lvl)); + rlvl = total_pglvl - lvl - 1; + KASSERT(rlvl < sizeof(pg_sz) / sizeof(pg_sz[0]), + ("sizeof pg_sz lvl %d", lvl)); + return (pg_sz[rlvl]); +} + +dmar_gaddr_t +ctx_page_size(struct dmar_ctx *ctx, int lvl) +{ + + return (pglvl_page_size(ctx->pglvl, lvl)); +} + +dmar_haddr_t dmar_high; +int haw; +int dmar_tbl_pagecnt; + +vm_page_t +dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) +{ + vm_page_t m; + int zeroed; + + zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; + for (;;) { + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WLOCK(obj); + m = vm_page_lookup(obj, idx); + if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) { + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + break; + } + m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY | + VM_ALLOC_SYSTEM | zeroed, 1, 0, dmar_high, PAGE_SIZE, + 0, VM_MEMATTR_DEFAULT); + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + if (m != NULL) { + if (zeroed && (m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + atomic_add_int(&dmar_tbl_pagecnt, 1); + break; + } + if ((flags & DMAR_PGF_WAITOK) == 0) + break; + if ((flags & DMAR_PGF_OBJL) != 0) + VM_OBJECT_WUNLOCK(obj); + VM_WAIT; + if ((flags & DMAR_PGF_OBJL) != 0) + VM_OBJECT_WLOCK(obj); + } + return (m); +} + +void +dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags) +{ + vm_page_t m; + + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WLOCK(obj); + m = vm_page_lookup(obj, idx); + if (m != NULL) { + vm_page_free(m); + atomic_subtract_int(&dmar_tbl_pagecnt, 1); + } + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); +} + +void * +dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, + struct sf_buf **sf) +{ + vm_page_t m; + bool allocated; + + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WLOCK(obj); + m = vm_page_lookup(obj, idx); + if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) { + m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL); + allocated = true; + } else + allocated = false; + if (m == NULL) { + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + return (NULL); + } + /* Sleepable allocations cannot fail. */ + if ((flags & DMAR_PGF_WAITOK) != 0) + VM_OBJECT_WUNLOCK(obj); + sched_pin(); + *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK) + == 0 ? SFB_NOWAIT : 0)); + if (*sf == NULL) { + sched_unpin(); + if (allocated) { + VM_OBJECT_ASSERT_WLOCKED(obj); + dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL); + } + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + return (NULL); + } + if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == + (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) + VM_OBJECT_WLOCK(obj); + else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0) + VM_OBJECT_WUNLOCK(obj); + return ((void *)sf_buf_kva(*sf)); +} + +void +dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent) +{ + vm_page_t m; + + m = sf_buf_page(sf); + sf_buf_free(sf); + sched_unpin(); + + /* + * If DMAR does not snoop paging structures accesses, flush + * CPU cache to memory. + */ + if (!coherent) + pmap_invalidate_cache_pages(&m, 1); +} + +/* + * Load the root entry pointer into the hardware, busily waiting for + * the completion. + */ +int +dmar_load_root_entry_ptr(struct dmar_unit *unit) +{ + vm_page_t root_entry; + + /* + * Access to the GCMD register must be serialized while the + * command is submitted. + */ + DMAR_ASSERT_LOCKED(unit); + + /* VM_OBJECT_RLOCK(unit->ctx_obj); */ + VM_OBJECT_WLOCK(unit->ctx_obj); + root_entry = vm_page_lookup(unit->ctx_obj, 0); + /* VM_OBJECT_RUNLOCK(unit->ctx_obj); */ + VM_OBJECT_WUNLOCK(unit->ctx_obj); + dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry)); + dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) == 0) + cpu_spinwait(); + return (0); +} + +/* + * Globally invalidate the context entries cache, busily waiting for + * the completion. + */ +int +dmar_inv_ctx_glob(struct dmar_unit *unit) +{ + + /* + * Access to the CCMD register must be serialized while the + * command is submitted. + */ + DMAR_ASSERT_LOCKED(unit); + + /* + * The DMAR_CCMD_ICC bit in the upper dword should be written + * after the low dword write is completed. Amd64 + * dmar_write8() does not have this issue, i386 dmar_write8() + * writes the upper dword last. + */ + dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) != 0) + cpu_spinwait(); + return (0); +} + +/* + * Globally invalidate the IOTLB, busily waiting for the completion. + */ +int +dmar_inv_iotlb_glob(struct dmar_unit *unit) +{ + int reg; + + DMAR_ASSERT_LOCKED(unit); + + reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap); + /* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */ + dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT | + DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) & + DMAR_IOTLB_IVT32) != 0) + cpu_spinwait(); + return (0); +} + +/* + * Flush the chipset write buffers. See 11.1 "Write Buffer Flushing" + * in the architecture specification. + */ +int +dmar_flush_write_bufs(struct dmar_unit *unit) +{ + + DMAR_ASSERT_LOCKED(unit); + + /* + * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported. + */ + KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0, ("")); + + dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) == 0) + cpu_spinwait(); + return (0); +} + +int +dmar_enable_translation(struct dmar_unit *unit) +{ + + DMAR_ASSERT_LOCKED(unit); + unit->hw_gcmd |= DMAR_GCMD_TE; + dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0) + cpu_spinwait(); + return (0); +} + +int +dmar_disable_translation(struct dmar_unit *unit) +{ + + DMAR_ASSERT_LOCKED(unit); + unit->hw_gcmd &= ~DMAR_GCMD_TE; + dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0) + cpu_spinwait(); + return (0); +} + +void +dmar_enable_intr(struct dmar_unit *unit) +{ + uint32_t fectl; + + fectl = dmar_read4(unit, DMAR_FECTL_REG); + fectl &= ~DMAR_FECTL_IM; + dmar_write4(unit, DMAR_FECTL_REG, fectl); +} + +void +dmar_disable_intr(struct dmar_unit *unit) +{ + uint32_t fectl; + + fectl = dmar_read4(unit, DMAR_FECTL_REG); + dmar_write4(unit, DMAR_FECTL_REG, fectl | DMAR_FECTL_IM); +} + +static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL, + ""); +SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD, &dmar_tbl_pagecnt, 0, + ""); diff --git a/sys/x86/x86/busdma_bounce.c b/sys/x86/x86/busdma_bounce.c new file mode 100644 index 0000000..5f04c01 --- /dev/null +++ b/sys/x86/x86/busdma_bounce.c @@ -0,0 +1,1102 @@ +/*- + * Copyright (c) 1997, 1998 Justin T. Gibbs. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef __i386__ +#define MAX_BPAGES 512 +#else +#define MAX_BPAGES 8192 +#endif +#define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 +#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 + +struct bounce_zone; + +struct bus_dma_tag { + struct bus_dma_tag_common common; + int map_count; + bus_dma_segment_t *segments; + struct bounce_zone *bounce_zone; +}; + +struct bounce_page { + vm_offset_t vaddr; /* kva of bounce buffer */ + bus_addr_t busaddr; /* Physical address */ + vm_offset_t datavaddr; /* kva of client data */ + bus_addr_t dataaddr; /* client physical address */ + bus_size_t datacount; /* client data count */ + STAILQ_ENTRY(bounce_page) links; +}; + +int busdma_swi_pending; + +struct bounce_zone { + STAILQ_ENTRY(bounce_zone) links; + STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; + int total_bpages; + int free_bpages; + int reserved_bpages; + int active_bpages; + int total_bounced; + int total_deferred; + int map_count; + bus_size_t alignment; + bus_addr_t lowaddr; + char zoneid[8]; + char lowaddrid[20]; + struct sysctl_ctx_list sysctl_tree; + struct sysctl_oid *sysctl_tree_top; +}; + +static struct mtx bounce_lock; +static int total_bpages; +static int busdma_zonecount; +static STAILQ_HEAD(, bounce_zone) bounce_zone_list; + +static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); +SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, + "Total bounce pages"); + +struct bus_dmamap { + struct bp_list bpages; + int pagesneeded; + int pagesreserved; + bus_dma_tag_t dmat; + struct memdesc mem; + bus_dmamap_callback_t *callback; + void *callback_arg; + STAILQ_ENTRY(bus_dmamap) links; +}; + +static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; +static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; +static struct bus_dmamap nobounce_dmamap, contig_dmamap; + +static void init_bounce_pages(void *dummy); +static int alloc_bounce_zone(bus_dma_tag_t dmat); +static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); +static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, + int commit); +static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_offset_t vaddr, bus_addr_t addr, + bus_size_t size); +static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); +int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); +static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, + pmap_t pmap, void *buf, bus_size_t buflen, + int flags); +static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_paddr_t buf, bus_size_t buflen, + int flags); +static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, + int flags); + +#ifdef XEN +#undef pmap_kextract +#define pmap_kextract pmap_kextract_ma +#endif + +/* + * Allocate a device specific dma_tag. + */ +static int +bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, + bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, + bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, + int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, + void *lockfuncarg, bus_dma_tag_t *dmat) +{ + bus_dma_tag_t newtag; + int error; + + *dmat = NULL; + error = common_bus_dma_tag_create(parent != NULL ? &parent->common : + NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, + maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, + sizeof (struct bus_dma_tag), (void **)&newtag); + if (error != 0) + return (error); + + newtag->common.impl = &bus_dma_bounce_impl; + newtag->map_count = 0; + newtag->segments = NULL; + + if (parent != NULL && ((newtag->common.filter != NULL) || + ((parent->common.flags & BUS_DMA_COULD_BOUNCE) != 0))) + newtag->common.flags |= BUS_DMA_COULD_BOUNCE; + + if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) || + newtag->common.alignment > 1) + newtag->common.flags |= BUS_DMA_COULD_BOUNCE; + + if (((newtag->common.flags & BUS_DMA_COULD_BOUNCE) != 0) && + (flags & BUS_DMA_ALLOCNOW) != 0) { + struct bounce_zone *bz; + + /* Must bounce */ + if ((error = alloc_bounce_zone(newtag)) != 0) { + free(newtag, M_DEVBUF); + return (error); + } + bz = newtag->bounce_zone; + + if (ptoa(bz->total_bpages) < maxsize) { + int pages; + + pages = atop(maxsize) - bz->total_bpages; + + /* Add pages to our bounce pool */ + if (alloc_bounce_pages(newtag, pages) < pages) + error = ENOMEM; + } + /* Performed initial allocation */ + newtag->common.flags |= BUS_DMA_MIN_ALLOC_COMP; + } else + error = 0; + + if (error != 0) + free(newtag, M_DEVBUF); + else + *dmat = newtag; + CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", + __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), + error); + return (error); +} + +static int +bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat) +{ + bus_dma_tag_t dmat_copy, parent; + int error; + + error = 0; + dmat_copy = dmat; + + if (dmat != NULL) { + if (dmat->map_count != 0) { + error = EBUSY; + goto out; + } + while (dmat != NULL) { + parent = (bus_dma_tag_t)dmat->common.parent; + atomic_subtract_int(&dmat->common.ref_count, 1); + if (dmat->common.ref_count == 0) { + if (dmat->segments != NULL) + free(dmat->segments, M_DEVBUF); + free(dmat, M_DEVBUF); + /* + * Last reference count, so + * release our reference + * count on our parent. + */ + dmat = parent; + } else + dmat = NULL; + } + } +out: + CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); + return (error); +} + +/* + * Allocate a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +static int +bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) +{ + struct bounce_zone *bz; + int error, maxpages, pages; + + error = 0; + + if (dmat->segments == NULL) { + dmat->segments = (bus_dma_segment_t *)malloc( + sizeof(bus_dma_segment_t) * dmat->common.nsegments, + M_DEVBUF, M_NOWAIT); + if (dmat->segments == NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", + __func__, dmat, ENOMEM); + return (ENOMEM); + } + } + + /* + * Bouncing might be required if the driver asks for an active + * exclusion region, a data alignment that is stricter than 1, and/or + * an active address boundary. + */ + if (dmat->common.flags & BUS_DMA_COULD_BOUNCE) { + /* Must bounce */ + if (dmat->bounce_zone == NULL) { + if ((error = alloc_bounce_zone(dmat)) != 0) + return (error); + } + bz = dmat->bounce_zone; + + *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, + M_NOWAIT | M_ZERO); + if (*mapp == NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", + __func__, dmat, ENOMEM); + return (ENOMEM); + } + + /* Initialize the new map */ + STAILQ_INIT(&((*mapp)->bpages)); + + /* + * Attempt to add pages to our pool on a per-instance + * basis up to a sane limit. + */ + if (dmat->common.alignment > 1) + maxpages = MAX_BPAGES; + else + maxpages = MIN(MAX_BPAGES, Maxmem - + atop(dmat->common.lowaddr)); + if ((dmat->common.flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || + (bz->map_count > 0 && bz->total_bpages < maxpages)) { + pages = MAX(atop(dmat->common.maxsize), 1); + pages = MIN(maxpages - bz->total_bpages, pages); + pages = MAX(pages, 1); + if (alloc_bounce_pages(dmat, pages) < pages) + error = ENOMEM; + if ((dmat->common.flags & BUS_DMA_MIN_ALLOC_COMP) + == 0) { + if (error == 0) { + dmat->common.flags |= + BUS_DMA_MIN_ALLOC_COMP; + } + } else + error = 0; + } + bz->map_count++; + } else { + *mapp = NULL; + } + if (error == 0) + dmat->map_count++; + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->common.flags, error); + return (error); +} + +/* + * Destroy a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +static int +bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + + if (map != NULL && map != &nobounce_dmamap && map != &contig_dmamap) { + if (STAILQ_FIRST(&map->bpages) != NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", + __func__, dmat, EBUSY); + return (EBUSY); + } + if (dmat->bounce_zone) + dmat->bounce_zone->map_count--; + free(map, M_DEVBUF); + } + dmat->map_count--; + CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); + return (0); +} + + +/* + * Allocate a piece of memory that can be efficiently mapped into + * bus device space based on the constraints lited in the dma tag. + * A dmamap to for use with dmamap_load is also allocated. + */ +static int +bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, + bus_dmamap_t *mapp) +{ + vm_memattr_t attr; + int mflags; + + if (flags & BUS_DMA_NOWAIT) + mflags = M_NOWAIT; + else + mflags = M_WAITOK; + + /* If we succeed, no mapping/bouncing will be required */ + *mapp = NULL; + + if (dmat->segments == NULL) { + dmat->segments = (bus_dma_segment_t *)malloc( + sizeof(bus_dma_segment_t) * dmat->common.nsegments, + M_DEVBUF, mflags); + if (dmat->segments == NULL) { + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->common.flags, ENOMEM); + return (ENOMEM); + } + } + if (flags & BUS_DMA_ZERO) + mflags |= M_ZERO; + if (flags & BUS_DMA_NOCACHE) + attr = VM_MEMATTR_UNCACHEABLE; + else + attr = VM_MEMATTR_DEFAULT; + + /* + * XXX: + * (dmat->alignment < dmat->maxsize) is just a quick hack; the exact + * alignment guarantees of malloc need to be nailed down, and the + * code below should be rewritten to take that into account. + * + * In the meantime, we'll warn the user if malloc gets it wrong. + */ + if ((dmat->common.maxsize <= PAGE_SIZE) && + (dmat->common.alignment < dmat->common.maxsize) && + dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) && + attr == VM_MEMATTR_DEFAULT) { + *vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags); + } else if (dmat->common.nsegments >= btoc(dmat->common.maxsize) && + dmat->common.alignment <= PAGE_SIZE && + (dmat->common.boundary == 0 || + dmat->common.boundary >= dmat->common.lowaddr)) { + /* Page-based multi-segment allocations allowed */ + *vaddr = (void *)kmem_alloc_attr(kernel_map, + dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, + attr); + *mapp = &contig_dmamap; + } else { + *vaddr = (void *)kmem_alloc_contig(kernel_map, + dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, + dmat->common.alignment != 0 ? dmat->common.alignment : 1ul, + dmat->common.boundary, attr); + *mapp = &contig_dmamap; + } + if (*vaddr == NULL) { + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->common.flags, ENOMEM); + return (ENOMEM); + } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) { + printf("bus_dmamem_alloc failed to align memory properly.\n"); + } + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->common.flags, 0); + return (0); +} + +/* + * Free a piece of memory and it's allociated dmamap, that was allocated + * via bus_dmamem_alloc. Make the same choice for free/contigfree. + */ +static void +bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) +{ + /* + * dmamem does not need to be bounced, so the map should be + * NULL if malloc() was used and contig_dmamap if + * kmem_alloc_contig() was used. + */ + if (!(map == NULL || map == &contig_dmamap)) + panic("bus_dmamem_free: Invalid map freed\n"); + if (map == NULL) + free(vaddr, M_DEVBUF); + else + kmem_free(kernel_map, (vm_offset_t)vaddr, + dmat->common.maxsize); + CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, + dmat->common.flags); +} + +static void +_bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, + bus_size_t buflen, int flags) +{ + bus_addr_t curaddr; + bus_size_t sgsize; + + if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { + /* + * Count the number of bounce pages + * needed in order to complete this transfer + */ + curaddr = buf; + while (buflen != 0) { + sgsize = MIN(buflen, dmat->common.maxsegsz); + if (bus_dma_run_filter(&dmat->common, curaddr)) { + sgsize = MIN(sgsize, PAGE_SIZE); + map->pagesneeded++; + } + curaddr += sgsize; + buflen -= sgsize; + } + CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); + } +} + +static void +_bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, + void *buf, bus_size_t buflen, int flags) +{ + vm_offset_t vaddr; + vm_offset_t vendaddr; + bus_addr_t paddr; + bus_size_t sg_len; + + if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { + CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " + "alignment= %d", dmat->common.lowaddr, + ptoa((vm_paddr_t)Maxmem), + dmat->common.boundary, dmat->common.alignment); + CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", + map, &nobounce_dmamap, map->pagesneeded); + /* + * Count the number of bounce pages + * needed in order to complete this transfer + */ + vaddr = (vm_offset_t)buf; + vendaddr = (vm_offset_t)buf + buflen; + + while (vaddr < vendaddr) { + sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); + if (pmap == kernel_pmap) + paddr = pmap_kextract(vaddr); + else + paddr = pmap_extract(pmap, vaddr); + if (bus_dma_run_filter(&dmat->common, paddr) != 0) { + sg_len = roundup2(sg_len, + dmat->common.alignment); + map->pagesneeded++; + } + vaddr += sg_len; + } + CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); + } +} + +static int +_bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) +{ + + /* Reserve Necessary Bounce Pages */ + mtx_lock(&bounce_lock); + if (flags & BUS_DMA_NOWAIT) { + if (reserve_bounce_pages(dmat, map, 0) != 0) { + mtx_unlock(&bounce_lock); + return (ENOMEM); + } + } else { + if (reserve_bounce_pages(dmat, map, 1) != 0) { + /* Queue us for resources */ + STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); + mtx_unlock(&bounce_lock); + return (EINPROGRESS); + } + } + mtx_unlock(&bounce_lock); + + return (0); +} + +/* + * Add a single contiguous physical range to the segment list. + */ +static int +_bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, + bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) +{ + bus_addr_t baddr, bmask; + int seg; + + /* + * Make sure we don't cross any boundaries. + */ + bmask = ~(dmat->common.boundary - 1); + if (dmat->common.boundary > 0) { + baddr = (curaddr + dmat->common.boundary) & bmask; + if (sgsize > (baddr - curaddr)) + sgsize = (baddr - curaddr); + } + + /* + * Insert chunk into a segment, coalescing with + * previous segment if possible. + */ + seg = *segp; + if (seg == -1) { + seg = 0; + segs[seg].ds_addr = curaddr; + segs[seg].ds_len = sgsize; + } else { + if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && + (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && + (dmat->common.boundary == 0 || + (segs[seg].ds_addr & bmask) == (curaddr & bmask))) + segs[seg].ds_len += sgsize; + else { + if (++seg >= dmat->common.nsegments) + return (0); + segs[seg].ds_addr = curaddr; + segs[seg].ds_len = sgsize; + } + } + *segp = seg; + return (sgsize); +} + +/* + * Utility function to load a physical buffer. segp contains + * the starting segment on entrace, and the ending segment on exit. + */ +static int +bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, + int *segp) +{ + bus_size_t sgsize; + bus_addr_t curaddr; + int error; + + if (map == NULL || map == &contig_dmamap) + map = &nobounce_dmamap; + + if (segs == NULL) + segs = dmat->segments; + + if ((dmat->common.flags & BUS_DMA_COULD_BOUNCE) != 0) { + _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); + if (map->pagesneeded != 0) { + error = _bus_dmamap_reserve_pages(dmat, map, flags); + if (error) + return (error); + } + } + + while (buflen > 0) { + curaddr = buf; + sgsize = MIN(buflen, dmat->common.maxsegsz); + if (((dmat->common.flags & BUS_DMA_COULD_BOUNCE) != 0) && + map->pagesneeded != 0 && + bus_dma_run_filter(&dmat->common, curaddr)) { + sgsize = MIN(sgsize, PAGE_SIZE); + curaddr = add_bounce_page(dmat, map, 0, curaddr, + sgsize); + } + sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, + segp); + if (sgsize == 0) + break; + buf += sgsize; + buflen -= sgsize; + } + + /* + * Did we fit? + */ + return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ +} + +static int +bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, + struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, + bus_dma_segment_t *segs, int *segp) +{ + vm_paddr_t paddr; + bus_size_t len; + int error, i; + + error = 0; + for (i = 0; tlen > 0; i++, tlen -= len) { + len = min(PAGE_SIZE - ma_offs, tlen); + paddr = VM_PAGE_TO_PHYS(ma[i]) + ma_offs; + error = _bus_dmamap_load_phys(dmat, map, paddr, len, + flags, segs, segp); + if (error != 0) + break; + ma_offs = 0; + } + return (error); +} + +/* + * Utility function to load a linear buffer. segp contains + * the starting segment on entrace, and the ending segment on exit. + */ +static int +bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, + bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, + int *segp) +{ + bus_size_t sgsize, max_sgsize; + bus_addr_t curaddr; + vm_offset_t vaddr; + int error; + + if (map == NULL || map == &contig_dmamap) + map = &nobounce_dmamap; + + if (segs == NULL) + segs = dmat->segments; + + if ((dmat->common.flags & BUS_DMA_COULD_BOUNCE) != 0) { + _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); + if (map->pagesneeded != 0) { + error = _bus_dmamap_reserve_pages(dmat, map, flags); + if (error) + return (error); + } + } + + vaddr = (vm_offset_t)buf; + while (buflen > 0) { + /* + * Get the physical address for this segment. + */ + if (pmap == kernel_pmap) + curaddr = pmap_kextract(vaddr); + else + curaddr = pmap_extract(pmap, vaddr); + + /* + * Compute the segment size, and adjust counts. + */ + max_sgsize = MIN(buflen, dmat->common.maxsegsz); + sgsize = PAGE_SIZE - ((vm_offset_t)curaddr & PAGE_MASK); + if (((dmat->common.flags & BUS_DMA_COULD_BOUNCE) != 0) && + map->pagesneeded != 0 && + bus_dma_run_filter(&dmat->common, curaddr)) { + sgsize = roundup2(sgsize, dmat->common.alignment); + sgsize = MIN(sgsize, max_sgsize); + curaddr = add_bounce_page(dmat, map, vaddr, curaddr, + sgsize); + } else { + sgsize = MIN(sgsize, max_sgsize); + } + sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, + segp); + if (sgsize == 0) + break; + vaddr += sgsize; + buflen -= sgsize; + } + + /* + * Did we fit? + */ + return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ +} + +static void +bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, + struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) +{ + + if (map == NULL) + return; + map->mem = *mem; + map->dmat = dmat; + map->callback = callback; + map->callback_arg = callback_arg; +} + +static bus_dma_segment_t * +bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dma_segment_t *segs, int nsegs, int error) +{ + + if (segs == NULL) + segs = dmat->segments; + return (segs); +} + +/* + * Release the mapping held by map. + */ +static void +bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + struct bounce_page *bpage; + + while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { + STAILQ_REMOVE_HEAD(&map->bpages, links); + free_bounce_page(dmat, bpage); + } +} + +static void +bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dmasync_op_t op) +{ + struct bounce_page *bpage; + + if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { + /* + * Handle data bouncing. We might also + * want to add support for invalidating + * the caches on broken hardware + */ + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " + "performing bounce", __func__, op, dmat, + dmat->common.flags); + + if ((op & BUS_DMASYNC_PREWRITE) != 0) { + while (bpage != NULL) { + if (bpage->datavaddr != 0) { + bcopy((void *)bpage->datavaddr, + (void *)bpage->vaddr, + bpage->datacount); + } else { + physcopyout(bpage->dataaddr, + (void *)bpage->vaddr, + bpage->datacount); + } + bpage = STAILQ_NEXT(bpage, links); + } + dmat->bounce_zone->total_bounced++; + } + + if ((op & BUS_DMASYNC_POSTREAD) != 0) { + while (bpage != NULL) { + if (bpage->datavaddr != 0) { + bcopy((void *)bpage->vaddr, + (void *)bpage->datavaddr, + bpage->datacount); + } else { + physcopyin((void *)bpage->vaddr, + bpage->dataaddr, + bpage->datacount); + } + bpage = STAILQ_NEXT(bpage, links); + } + dmat->bounce_zone->total_bounced++; + } + } +} + +static void +init_bounce_pages(void *dummy __unused) +{ + + total_bpages = 0; + STAILQ_INIT(&bounce_zone_list); + STAILQ_INIT(&bounce_map_waitinglist); + STAILQ_INIT(&bounce_map_callbacklist); + mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); +} +SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); + +static struct sysctl_ctx_list * +busdma_sysctl_tree(struct bounce_zone *bz) +{ + return (&bz->sysctl_tree); +} + +static struct sysctl_oid * +busdma_sysctl_tree_top(struct bounce_zone *bz) +{ + return (bz->sysctl_tree_top); +} + +#if defined(__amd64__) || defined(PAE) +#define SYSCTL_ADD_BUS_SIZE_T SYSCTL_ADD_UQUAD +#else +#define SYSCTL_ADD_BUS_SIZE_T(ctx, parent, nbr, name, flag, ptr, desc) \ + SYSCTL_ADD_UINT(ctx, parent, nbr, name, flag, ptr, 0, desc) +#endif + +static int +alloc_bounce_zone(bus_dma_tag_t dmat) +{ + struct bounce_zone *bz; + + /* Check to see if we already have a suitable zone */ + STAILQ_FOREACH(bz, &bounce_zone_list, links) { + if ((dmat->common.alignment <= bz->alignment) && + (dmat->common.lowaddr >= bz->lowaddr)) { + dmat->bounce_zone = bz; + return (0); + } + } + + if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, + M_NOWAIT | M_ZERO)) == NULL) + return (ENOMEM); + + STAILQ_INIT(&bz->bounce_page_list); + bz->free_bpages = 0; + bz->reserved_bpages = 0; + bz->active_bpages = 0; + bz->lowaddr = dmat->common.lowaddr; + bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE); + bz->map_count = 0; + snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); + busdma_zonecount++; + snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); + STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); + dmat->bounce_zone = bz; + + sysctl_ctx_init(&bz->sysctl_tree); + bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, + SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, + CTLFLAG_RD, 0, ""); + if (bz->sysctl_tree_top == NULL) { + sysctl_ctx_free(&bz->sysctl_tree); + return (0); /* XXX error code? */ + } + + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, + "Total bounce pages"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, + "Free bounce pages"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, + "Reserved bounce pages"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, + "Active bounce pages"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, + "Total bounce requests"); + SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, + "Total bounce requests that were deferred"); + SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); + SYSCTL_ADD_BUS_SIZE_T(busdma_sysctl_tree(bz), + SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, + "alignment", CTLFLAG_RD, &bz->alignment, ""); + + return (0); +} + +static int +alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) +{ + struct bounce_zone *bz; + int count; + + bz = dmat->bounce_zone; + count = 0; + while (numpages > 0) { + struct bounce_page *bpage; + + bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, + M_NOWAIT | M_ZERO); + + if (bpage == NULL) + break; + bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, + M_NOWAIT, 0ul, + bz->lowaddr, + PAGE_SIZE, + 0); + if (bpage->vaddr == 0) { + free(bpage, M_DEVBUF); + break; + } + bpage->busaddr = pmap_kextract(bpage->vaddr); + mtx_lock(&bounce_lock); + STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); + total_bpages++; + bz->total_bpages++; + bz->free_bpages++; + mtx_unlock(&bounce_lock); + count++; + numpages--; + } + return (count); +} + +static int +reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) +{ + struct bounce_zone *bz; + int pages; + + mtx_assert(&bounce_lock, MA_OWNED); + bz = dmat->bounce_zone; + pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); + if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) + return (map->pagesneeded - (map->pagesreserved + pages)); + bz->free_bpages -= pages; + bz->reserved_bpages += pages; + map->pagesreserved += pages; + pages = map->pagesneeded - map->pagesreserved; + + return (pages); +} + +static bus_addr_t +add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, + bus_addr_t addr, bus_size_t size) +{ + struct bounce_zone *bz; + struct bounce_page *bpage; + + KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); + KASSERT(map != NULL && map != &nobounce_dmamap && map != &contig_dmamap, + ("add_bounce_page: bad map %p", map)); + + bz = dmat->bounce_zone; + if (map->pagesneeded == 0) + panic("add_bounce_page: map doesn't need any pages"); + map->pagesneeded--; + + if (map->pagesreserved == 0) + panic("add_bounce_page: map doesn't need any pages"); + map->pagesreserved--; + + mtx_lock(&bounce_lock); + bpage = STAILQ_FIRST(&bz->bounce_page_list); + if (bpage == NULL) + panic("add_bounce_page: free page list is empty"); + + STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); + bz->reserved_bpages--; + bz->active_bpages++; + mtx_unlock(&bounce_lock); + + if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { + /* Page offset needs to be preserved. */ + bpage->vaddr |= vaddr & PAGE_MASK; + bpage->busaddr |= vaddr & PAGE_MASK; + } + bpage->datavaddr = vaddr; + bpage->dataaddr = addr; + bpage->datacount = size; + STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); + return (bpage->busaddr); +} + +static void +free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) +{ + struct bus_dmamap *map; + struct bounce_zone *bz; + + bz = dmat->bounce_zone; + bpage->datavaddr = 0; + bpage->datacount = 0; + if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { + /* + * Reset the bounce page to start at offset 0. Other uses + * of this bounce page may need to store a full page of + * data and/or assume it starts on a page boundary. + */ + bpage->vaddr &= ~PAGE_MASK; + bpage->busaddr &= ~PAGE_MASK; + } + + mtx_lock(&bounce_lock); + STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); + bz->free_bpages++; + bz->active_bpages--; + if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { + if (reserve_bounce_pages(map->dmat, map, 1) == 0) { + STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); + STAILQ_INSERT_TAIL(&bounce_map_callbacklist, + map, links); + busdma_swi_pending = 1; + bz->total_deferred++; + swi_sched(vm_ih, 0); + } + } + mtx_unlock(&bounce_lock); +} + +void +busdma_swi(void) +{ + bus_dma_tag_t dmat; + struct bus_dmamap *map; + + mtx_lock(&bounce_lock); + while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { + STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); + mtx_unlock(&bounce_lock); + dmat = map->dmat; + (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK); + bus_dmamap_load_mem(map->dmat, map, &map->mem, + map->callback, map->callback_arg, BUS_DMA_WAITOK); + (dmat->common.lockfunc)(dmat->common.lockfuncarg, + BUS_DMA_UNLOCK); + mtx_lock(&bounce_lock); + } + mtx_unlock(&bounce_lock); +} + +struct bus_dma_impl bus_dma_bounce_impl = { + .tag_create = bounce_bus_dma_tag_create, + .tag_destroy = bounce_bus_dma_tag_destroy, + .map_create = bounce_bus_dmamap_create, + .map_destroy = bounce_bus_dmamap_destroy, + .mem_alloc = bounce_bus_dmamem_alloc, + .mem_free = bounce_bus_dmamem_free, + .load_phys = bounce_bus_dmamap_load_phys, + .load_buffer = bounce_bus_dmamap_load_buffer, + .load_ma = bounce_bus_dmamap_load_ma, + .map_waitok = bounce_bus_dmamap_waitok, + .map_complete = bounce_bus_dmamap_complete, + .map_unload = bounce_bus_dmamap_unload, + .map_sync = bounce_bus_dmamap_sync +}; diff --git a/sys/x86/x86/busdma_machdep.c b/sys/x86/x86/busdma_machdep.c index f3e94e1..29bd0ff 100644 --- a/sys/x86/x86/busdma_machdep.c +++ b/sys/x86/x86/busdma_machdep.c @@ -31,160 +31,17 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include -#include #include #include -#include #include - #include #include -#include -#include -#include - -#include +#include #include -#include -#include - -#ifdef __i386__ -#define MAX_BPAGES 512 -#else -#define MAX_BPAGES 8192 -#endif -#define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 -#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 - -struct bounce_zone; - -struct bus_dma_tag { - bus_dma_tag_t parent; - bus_size_t alignment; - bus_addr_t boundary; - bus_addr_t lowaddr; - bus_addr_t highaddr; - bus_dma_filter_t *filter; - void *filterarg; - bus_size_t maxsize; - u_int nsegments; - bus_size_t maxsegsz; - int flags; - int ref_count; - int map_count; - bus_dma_lock_t *lockfunc; - void *lockfuncarg; - bus_dma_segment_t *segments; - struct bounce_zone *bounce_zone; -}; - -struct bounce_page { - vm_offset_t vaddr; /* kva of bounce buffer */ - bus_addr_t busaddr; /* Physical address */ - vm_offset_t datavaddr; /* kva of client data */ - bus_addr_t dataaddr; /* client physical address */ - bus_size_t datacount; /* client data count */ - STAILQ_ENTRY(bounce_page) links; -}; - -int busdma_swi_pending; - -struct bounce_zone { - STAILQ_ENTRY(bounce_zone) links; - STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; - int total_bpages; - int free_bpages; - int reserved_bpages; - int active_bpages; - int total_bounced; - int total_deferred; - int map_count; - bus_size_t alignment; - bus_addr_t lowaddr; - char zoneid[8]; - char lowaddrid[20]; - struct sysctl_ctx_list sysctl_tree; - struct sysctl_oid *sysctl_tree_top; -}; - -static struct mtx bounce_lock; -static int total_bpages; -static int busdma_zonecount; -static STAILQ_HEAD(, bounce_zone) bounce_zone_list; - -static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); -SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, - "Total bounce pages"); - -struct bus_dmamap { - struct bp_list bpages; - int pagesneeded; - int pagesreserved; - bus_dma_tag_t dmat; - struct memdesc mem; - bus_dmamap_callback_t *callback; - void *callback_arg; - STAILQ_ENTRY(bus_dmamap) links; -}; - -static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; -static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; -static struct bus_dmamap nobounce_dmamap, contig_dmamap; - -static void init_bounce_pages(void *dummy); -static int alloc_bounce_zone(bus_dma_tag_t dmat); -static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); -static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, - int commit); -static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, - vm_offset_t vaddr, bus_addr_t addr, - bus_size_t size); -static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); -int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); -static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, - pmap_t pmap, void *buf, bus_size_t buflen, - int flags); -static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, - vm_paddr_t buf, bus_size_t buflen, - int flags); -static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, - int flags); - -#ifdef XEN -#undef pmap_kextract -#define pmap_kextract pmap_kextract_ma -#endif - -/* - * Return true if a match is made. - * - * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. - * - * If paddr is within the bounds of the dma tag then call the filter callback - * to check for a match, if there is no filter callback then assume a match. - */ -int -run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) -{ - int retval; - - retval = 0; - - do { - if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) - || ((paddr & (dmat->alignment - 1)) != 0)) - && (dmat->filter == NULL - || (*dmat->filter)(dmat->filterarg, paddr) != 0)) - retval = 1; - - dmat = dmat->parent; - } while (retval == 0 && dmat != NULL); - return (retval); -} +#include /* * Convenience function for manipulating driver locks from busdma (during @@ -216,169 +73,146 @@ busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ -static void -dflt_lock(void *arg, bus_dma_lock_op_t op) +void +bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op) { + panic("driver error: busdma dflt_lock called"); } /* - * Allocate a device specific dma_tag. + * Return true if a match is made. + * + * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. + * + * If paddr is within the bounds of the dma tag then call the filter callback + * to check for a match, if there is no filter callback then assume a match. */ int -bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, - bus_addr_t boundary, bus_addr_t lowaddr, - bus_addr_t highaddr, bus_dma_filter_t *filter, - void *filterarg, bus_size_t maxsize, int nsegments, - bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, - void *lockfuncarg, bus_dma_tag_t *dmat) +bus_dma_run_filter(struct bus_dma_tag_common *tc, bus_addr_t paddr) +{ + int retval; + + retval = 0; + do { + if (((paddr > tc->lowaddr && paddr <= tc->highaddr) || + ((paddr & (tc->alignment - 1)) != 0)) && + (tc->filter == NULL || + (*tc->filter)(tc->filterarg, paddr) != 0)) + retval = 1; + + tc = tc->parent; + } while (retval == 0 && tc != NULL); + return (retval); +} + +int +common_bus_dma_tag_create(struct bus_dma_tag_common *parent, + bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, + bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, + bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, + bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat) { - bus_dma_tag_t newtag; - int error = 0; + void *newtag; + struct bus_dma_tag_common *common; + KASSERT(sz >= sizeof(struct bus_dma_tag_common), ("sz")); /* Basic sanity checking */ if (boundary != 0 && boundary < maxsegsz) maxsegsz = boundary; - - if (maxsegsz == 0) { + if (maxsegsz == 0) return (EINVAL); - } - /* Return a NULL tag on failure */ *dmat = NULL; - newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, - M_ZERO | M_NOWAIT); + newtag = malloc(sz, M_DEVBUF, M_ZERO | M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, ENOMEM); return (ENOMEM); } - newtag->parent = parent; - newtag->alignment = alignment; - newtag->boundary = boundary; - newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); - newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); - newtag->filter = filter; - newtag->filterarg = filterarg; - newtag->maxsize = maxsize; - newtag->nsegments = nsegments; - newtag->maxsegsz = maxsegsz; - newtag->flags = flags; - newtag->ref_count = 1; /* Count ourself */ - newtag->map_count = 0; + common = newtag; + common->impl = &bus_dma_bounce_impl; + common->parent = parent; + common->alignment = alignment; + common->boundary = boundary; + common->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); + common->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); + common->filter = filter; + common->filterarg = filterarg; + common->maxsize = maxsize; + common->nsegments = nsegments; + common->maxsegsz = maxsegsz; + common->flags = flags; + common->ref_count = 1; /* Count ourself */ if (lockfunc != NULL) { - newtag->lockfunc = lockfunc; - newtag->lockfuncarg = lockfuncarg; + common->lockfunc = lockfunc; + common->lockfuncarg = lockfuncarg; } else { - newtag->lockfunc = dflt_lock; - newtag->lockfuncarg = NULL; + common->lockfunc = bus_dma_dflt_lock; + common->lockfuncarg = NULL; } - newtag->segments = NULL; /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { - newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); - newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); - if (newtag->boundary == 0) - newtag->boundary = parent->boundary; - else if (parent->boundary != 0) - newtag->boundary = MIN(parent->boundary, - newtag->boundary); - if ((newtag->filter != NULL) || - ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0)) - newtag->flags |= BUS_DMA_COULD_BOUNCE; - if (newtag->filter == NULL) { + common->impl = parent->impl; + common->lowaddr = MIN(parent->lowaddr, common->lowaddr); + common->highaddr = MAX(parent->highaddr, common->highaddr); + if (common->boundary == 0) + common->boundary = parent->boundary; + else if (parent->boundary != 0) { + common->boundary = MIN(parent->boundary, + common->boundary); + } + if (common->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ - newtag->filter = parent->filter; - newtag->filterarg = parent->filterarg; - newtag->parent = parent->parent; + common->filter = parent->filter; + common->filterarg = parent->filterarg; + common->parent = parent->parent; } - if (newtag->parent != NULL) - atomic_add_int(&parent->ref_count, 1); + atomic_add_int(&parent->ref_count, 1); } + *dmat = common; + return (0); +} - if (newtag->lowaddr < ptoa((vm_paddr_t)Maxmem) - || newtag->alignment > 1) - newtag->flags |= BUS_DMA_COULD_BOUNCE; - - if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && - (flags & BUS_DMA_ALLOCNOW) != 0) { - struct bounce_zone *bz; - - /* Must bounce */ - - if ((error = alloc_bounce_zone(newtag)) != 0) { - free(newtag, M_DEVBUF); - return (error); - } - bz = newtag->bounce_zone; - - if (ptoa(bz->total_bpages) < maxsize) { - int pages; - - pages = atop(maxsize) - bz->total_bpages; +/* + * Allocate a device specific dma_tag. + */ +int +bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, + bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, + bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, + int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, + void *lockfuncarg, bus_dma_tag_t *dmat) +{ + struct bus_dma_tag_common *tc; + int error; - /* Add pages to our bounce pool */ - if (alloc_bounce_pages(newtag, pages) < pages) - error = ENOMEM; - } - /* Performed initial allocation */ - newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; - } - - if (error != 0) { - free(newtag, M_DEVBUF); + if (parent == NULL) { + error = bus_dma_bounce_impl.tag_create(parent, alignment, + boundary, lowaddr, highaddr, filter, filterarg, maxsize, + nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } else { - *dmat = newtag; + tc = (struct bus_dma_tag_common *)parent; + error = tc->impl->tag_create(parent, alignment, + boundary, lowaddr, highaddr, filter, filterarg, maxsize, + nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } - CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", - __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { - bus_dma_tag_t dmat_copy; - int error; + struct bus_dma_tag_common *tc; - error = 0; - dmat_copy = dmat; - - if (dmat != NULL) { - - if (dmat->map_count != 0) { - error = EBUSY; - goto out; - } - - while (dmat != NULL) { - bus_dma_tag_t parent; - - parent = dmat->parent; - atomic_subtract_int(&dmat->ref_count, 1); - if (dmat->ref_count == 0) { - if (dmat->segments != NULL) - free(dmat->segments, M_DEVBUF); - free(dmat, M_DEVBUF); - /* - * Last reference count, so - * release our reference - * count on our parent. - */ - dmat = parent; - } else - dmat = NULL; - } - } -out: - CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); - return (error); + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->tag_destroy(dmat)); } /* @@ -388,83 +222,10 @@ out: int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { - int error; - - error = 0; - - if (dmat->segments == NULL) { - dmat->segments = (bus_dma_segment_t *)malloc( - sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, - M_NOWAIT); - if (dmat->segments == NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, ENOMEM); - return (ENOMEM); - } - } - - /* - * Bouncing might be required if the driver asks for an active - * exclusion region, a data alignment that is stricter than 1, and/or - * an active address boundary. - */ - if (dmat->flags & BUS_DMA_COULD_BOUNCE) { - - /* Must bounce */ - struct bounce_zone *bz; - int maxpages; + struct bus_dma_tag_common *tc; - if (dmat->bounce_zone == NULL) { - if ((error = alloc_bounce_zone(dmat)) != 0) - return (error); - } - bz = dmat->bounce_zone; - - *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (*mapp == NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, ENOMEM); - return (ENOMEM); - } - - /* Initialize the new map */ - STAILQ_INIT(&((*mapp)->bpages)); - - /* - * Attempt to add pages to our pool on a per-instance - * basis up to a sane limit. - */ - if (dmat->alignment > 1) - maxpages = MAX_BPAGES; - else - maxpages = MIN(MAX_BPAGES, Maxmem -atop(dmat->lowaddr)); - if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 - || (bz->map_count > 0 && bz->total_bpages < maxpages)) { - int pages; - - pages = MAX(atop(dmat->maxsize), 1); - pages = MIN(maxpages - bz->total_bpages, pages); - pages = MAX(pages, 1); - if (alloc_bounce_pages(dmat, pages) < pages) - error = ENOMEM; - - if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { - if (error == 0) - dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; - } else { - error = 0; - } - } - bz->map_count++; - } else { - *mapp = NULL; - } - if (error == 0) - dmat->map_count++; - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, error); - return (error); + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->map_create(dmat, flags, mapp)); } /* @@ -474,19 +235,10 @@ bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { - if (map != NULL && map != &nobounce_dmamap && map != &contig_dmamap) { - if (STAILQ_FIRST(&map->bpages) != NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, EBUSY); - return (EBUSY); - } - if (dmat->bounce_zone) - dmat->bounce_zone->map_count--; - free(map, M_DEVBUF); - } - dmat->map_count--; - CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); - return (0); + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->map_destroy(dmat, map)); } @@ -497,72 +249,12 @@ bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, - bus_dmamap_t *mapp) + bus_dmamap_t *mapp) { - vm_memattr_t attr; - int mflags; + struct bus_dma_tag_common *tc; - if (flags & BUS_DMA_NOWAIT) - mflags = M_NOWAIT; - else - mflags = M_WAITOK; - - /* If we succeed, no mapping/bouncing will be required */ - *mapp = NULL; - - if (dmat->segments == NULL) { - dmat->segments = (bus_dma_segment_t *)malloc( - sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, - mflags); - if (dmat->segments == NULL) { - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, ENOMEM); - return (ENOMEM); - } - } - if (flags & BUS_DMA_ZERO) - mflags |= M_ZERO; - if (flags & BUS_DMA_NOCACHE) - attr = VM_MEMATTR_UNCACHEABLE; - else - attr = VM_MEMATTR_DEFAULT; - - /* - * XXX: - * (dmat->alignment < dmat->maxsize) is just a quick hack; the exact - * alignment guarantees of malloc need to be nailed down, and the - * code below should be rewritten to take that into account. - * - * In the meantime, we'll warn the user if malloc gets it wrong. - */ - if ((dmat->maxsize <= PAGE_SIZE) && - (dmat->alignment < dmat->maxsize) && - dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem) && - attr == VM_MEMATTR_DEFAULT) { - *vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags); - } else if (dmat->nsegments >= btoc(dmat->maxsize) && - dmat->alignment <= PAGE_SIZE && - (dmat->boundary == 0 || dmat->boundary >= dmat->lowaddr)) { - /* Page-based multi-segment allocations allowed */ - *vaddr = (void *)kmem_alloc_attr(kernel_map, dmat->maxsize, - mflags, 0ul, dmat->lowaddr, attr); - *mapp = &contig_dmamap; - } else { - *vaddr = (void *)kmem_alloc_contig(kernel_map, dmat->maxsize, - mflags, 0ul, dmat->lowaddr, dmat->alignment ? - dmat->alignment : 1ul, dmat->boundary, attr); - *mapp = &contig_dmamap; - } - if (*vaddr == NULL) { - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, ENOMEM); - return (ENOMEM); - } else if (vtophys(*vaddr) & (dmat->alignment - 1)) { - printf("bus_dmamem_alloc failed to align memory properly.\n"); - } - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, 0); - return (0); + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->mem_alloc(dmat, vaddr, flags, mapp)); } /* @@ -572,153 +264,10 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { - /* - * dmamem does not need to be bounced, so the map should be - * NULL if malloc() was used and contig_dmamap if - * kmem_alloc_contig() was used. - */ - if (!(map == NULL || map == &contig_dmamap)) - panic("bus_dmamem_free: Invalid map freed\n"); - if (map == NULL) - free(vaddr, M_DEVBUF); - else - kmem_free(kernel_map, (vm_offset_t)vaddr, dmat->maxsize); - CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); -} + struct bus_dma_tag_common *tc; -static void -_bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, - bus_size_t buflen, int flags) -{ - bus_addr_t curaddr; - bus_size_t sgsize; - - if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { - /* - * Count the number of bounce pages - * needed in order to complete this transfer - */ - curaddr = buf; - while (buflen != 0) { - sgsize = MIN(buflen, dmat->maxsegsz); - if (run_filter(dmat, curaddr)) { - sgsize = MIN(sgsize, PAGE_SIZE); - map->pagesneeded++; - } - curaddr += sgsize; - buflen -= sgsize; - } - CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); - } -} - -static void -_bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, - void *buf, bus_size_t buflen, int flags) -{ - vm_offset_t vaddr; - vm_offset_t vendaddr; - bus_addr_t paddr; - - if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { - CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " - "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), - dmat->boundary, dmat->alignment); - CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", - map, &nobounce_dmamap, map->pagesneeded); - /* - * Count the number of bounce pages - * needed in order to complete this transfer - */ - vaddr = (vm_offset_t)buf; - vendaddr = (vm_offset_t)buf + buflen; - - while (vaddr < vendaddr) { - bus_size_t sg_len; - - sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); - if (pmap == kernel_pmap) - paddr = pmap_kextract(vaddr); - else - paddr = pmap_extract(pmap, vaddr); - if (run_filter(dmat, paddr) != 0) { - sg_len = roundup2(sg_len, dmat->alignment); - map->pagesneeded++; - } - vaddr += sg_len; - } - CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); - } -} - -static int -_bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) -{ - - /* Reserve Necessary Bounce Pages */ - mtx_lock(&bounce_lock); - if (flags & BUS_DMA_NOWAIT) { - if (reserve_bounce_pages(dmat, map, 0) != 0) { - mtx_unlock(&bounce_lock); - return (ENOMEM); - } - } else { - if (reserve_bounce_pages(dmat, map, 1) != 0) { - /* Queue us for resources */ - STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); - mtx_unlock(&bounce_lock); - return (EINPROGRESS); - } - } - mtx_unlock(&bounce_lock); - - return (0); -} - -/* - * Add a single contiguous physical range to the segment list. - */ -static int -_bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, - bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) -{ - bus_addr_t baddr, bmask; - int seg; - - /* - * Make sure we don't cross any boundaries. - */ - bmask = ~(dmat->boundary - 1); - if (dmat->boundary > 0) { - baddr = (curaddr + dmat->boundary) & bmask; - if (sgsize > (baddr - curaddr)) - sgsize = (baddr - curaddr); - } - - /* - * Insert chunk into a segment, coalescing with - * previous segment if possible. - */ - seg = *segp; - if (seg == -1) { - seg = 0; - segs[seg].ds_addr = curaddr; - segs[seg].ds_len = sgsize; - } else { - if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && - (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && - (dmat->boundary == 0 || - (segs[seg].ds_addr & bmask) == (curaddr & bmask))) - segs[seg].ds_len += sgsize; - else { - if (++seg >= dmat->nsegments) - return (0); - segs[seg].ds_addr = curaddr; - segs[seg].ds_len = sgsize; - } - } - *segp = seg; - return (sgsize); + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->mem_free(dmat, vaddr, map); } /* @@ -726,53 +275,26 @@ _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, * the starting segment on entrace, and the ending segment on exit. */ int -_bus_dmamap_load_phys(bus_dma_tag_t dmat, - bus_dmamap_t map, - vm_paddr_t buf, bus_size_t buflen, - int flags, - bus_dma_segment_t *segs, - int *segp) +_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, + bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { - bus_size_t sgsize; - bus_addr_t curaddr; - int error; - - if (map == NULL || map == &contig_dmamap) - map = &nobounce_dmamap; + struct bus_dma_tag_common *tc; - if (segs == NULL) - segs = dmat->segments; - - if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { - _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); - if (map->pagesneeded != 0) { - error = _bus_dmamap_reserve_pages(dmat, map, flags); - if (error) - return (error); - } - } + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->load_phys(dmat, map, buf, buflen, flags, segs, + segp)); +} - while (buflen > 0) { - curaddr = buf; - sgsize = MIN(buflen, dmat->maxsegsz); - if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && - map->pagesneeded != 0 && run_filter(dmat, curaddr)) { - sgsize = MIN(sgsize, PAGE_SIZE); - curaddr = add_bounce_page(dmat, map, 0, curaddr, - sgsize); - } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) - break; - buf += sgsize; - buflen -= sgsize; - } +int +_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, + bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, + int *segp) +{ + struct bus_dma_tag_common *tc; - /* - * Did we fit? - */ - return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->load_ma(dmat, map, ma, tlen, ma_offs, flags, + segs, segp)); } /* @@ -780,96 +302,35 @@ _bus_dmamap_load_phys(bus_dma_tag_t dmat, * the starting segment on entrace, and the ending segment on exit. */ int -_bus_dmamap_load_buffer(bus_dma_tag_t dmat, - bus_dmamap_t map, - void *buf, bus_size_t buflen, - pmap_t pmap, - int flags, - bus_dma_segment_t *segs, - int *segp) +_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, + bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, + int *segp) { - bus_size_t sgsize; - bus_addr_t curaddr; - vm_offset_t vaddr; - int error; - - if (map == NULL || map == &contig_dmamap) - map = &nobounce_dmamap; - - if (segs == NULL) - segs = dmat->segments; - - if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { - _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); - if (map->pagesneeded != 0) { - error = _bus_dmamap_reserve_pages(dmat, map, flags); - if (error) - return (error); - } - } - - vaddr = (vm_offset_t)buf; + struct bus_dma_tag_common *tc; - while (buflen > 0) { - bus_size_t max_sgsize; - - /* - * Get the physical address for this segment. - */ - if (pmap == kernel_pmap) - curaddr = pmap_kextract(vaddr); - else - curaddr = pmap_extract(pmap, vaddr); - - /* - * Compute the segment size, and adjust counts. - */ - max_sgsize = MIN(buflen, dmat->maxsegsz); - sgsize = PAGE_SIZE - ((vm_offset_t)curaddr & PAGE_MASK); - if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && - map->pagesneeded != 0 && run_filter(dmat, curaddr)) { - sgsize = roundup2(sgsize, dmat->alignment); - sgsize = MIN(sgsize, max_sgsize); - curaddr = add_bounce_page(dmat, map, vaddr, curaddr, - sgsize); - } else { - sgsize = MIN(sgsize, max_sgsize); - } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, - segp); - if (sgsize == 0) - break; - vaddr += sgsize; - buflen -= sgsize; - } - - /* - * Did we fit? - */ - return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->load_buffer(dmat, map, buf, buflen, pmap, flags, segs, + segp)); } void __bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, - struct memdesc *mem, bus_dmamap_callback_t *callback, - void *callback_arg) + struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { - if (map != NULL) { - map->mem = *mem; - map->dmat = dmat; - map->callback = callback; - map->callback_arg = callback_arg; - } + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->map_waitok(dmat, map, mem, callback, callback_arg); } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, - bus_dma_segment_t *segs, int nsegs, int error) + bus_dma_segment_t *segs, int nsegs, int error) { + struct bus_dma_tag_common *tc; - if (segs == NULL) - segs = dmat->segments; - return (segs); + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->map_complete(dmat, map, segs, nsegs, error)); } /* @@ -878,317 +339,17 @@ _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, void _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { - struct bounce_page *bpage; + struct bus_dma_tag_common *tc; - while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { - STAILQ_REMOVE_HEAD(&map->bpages, links); - free_bounce_page(dmat, bpage); - } + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->map_unload(dmat, map); } void _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { - struct bounce_page *bpage; - - if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { - /* - * Handle data bouncing. We might also - * want to add support for invalidating - * the caches on broken hardware - */ - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " - "performing bounce", __func__, op, dmat, dmat->flags); - - if (op & BUS_DMASYNC_PREWRITE) { - while (bpage != NULL) { - if (bpage->datavaddr != 0) - bcopy((void *)bpage->datavaddr, - (void *)bpage->vaddr, - bpage->datacount); - else - physcopyout(bpage->dataaddr, - (void *)bpage->vaddr, - bpage->datacount); - bpage = STAILQ_NEXT(bpage, links); - } - dmat->bounce_zone->total_bounced++; - } - - if (op & BUS_DMASYNC_POSTREAD) { - while (bpage != NULL) { - if (bpage->datavaddr != 0) - bcopy((void *)bpage->vaddr, - (void *)bpage->datavaddr, - bpage->datacount); - else - physcopyin((void *)bpage->vaddr, - bpage->dataaddr, - bpage->datacount); - bpage = STAILQ_NEXT(bpage, links); - } - dmat->bounce_zone->total_bounced++; - } - } -} + struct bus_dma_tag_common *tc; -static void -init_bounce_pages(void *dummy __unused) -{ - - total_bpages = 0; - STAILQ_INIT(&bounce_zone_list); - STAILQ_INIT(&bounce_map_waitinglist); - STAILQ_INIT(&bounce_map_callbacklist); - mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); -} -SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); - -static struct sysctl_ctx_list * -busdma_sysctl_tree(struct bounce_zone *bz) -{ - return (&bz->sysctl_tree); -} - -static struct sysctl_oid * -busdma_sysctl_tree_top(struct bounce_zone *bz) -{ - return (bz->sysctl_tree_top); -} - -#if defined(__amd64__) || defined(PAE) -#define SYSCTL_ADD_BUS_SIZE_T SYSCTL_ADD_UQUAD -#else -#define SYSCTL_ADD_BUS_SIZE_T(ctx, parent, nbr, name, flag, ptr, desc) \ - SYSCTL_ADD_UINT(ctx, parent, nbr, name, flag, ptr, 0, desc) -#endif - -static int -alloc_bounce_zone(bus_dma_tag_t dmat) -{ - struct bounce_zone *bz; - - /* Check to see if we already have a suitable zone */ - STAILQ_FOREACH(bz, &bounce_zone_list, links) { - if ((dmat->alignment <= bz->alignment) - && (dmat->lowaddr >= bz->lowaddr)) { - dmat->bounce_zone = bz; - return (0); - } - } - - if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, - M_NOWAIT | M_ZERO)) == NULL) - return (ENOMEM); - - STAILQ_INIT(&bz->bounce_page_list); - bz->free_bpages = 0; - bz->reserved_bpages = 0; - bz->active_bpages = 0; - bz->lowaddr = dmat->lowaddr; - bz->alignment = MAX(dmat->alignment, PAGE_SIZE); - bz->map_count = 0; - snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); - busdma_zonecount++; - snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); - STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); - dmat->bounce_zone = bz; - - sysctl_ctx_init(&bz->sysctl_tree); - bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, - SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, - CTLFLAG_RD, 0, ""); - if (bz->sysctl_tree_top == NULL) { - sysctl_ctx_free(&bz->sysctl_tree); - return (0); /* XXX error code? */ - } - - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, - "Total bounce pages"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, - "Free bounce pages"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, - "Reserved bounce pages"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, - "Active bounce pages"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, - "Total bounce requests"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, - "Total bounce requests that were deferred"); - SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); - SYSCTL_ADD_BUS_SIZE_T(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "alignment", CTLFLAG_RD, &bz->alignment, ""); - - return (0); -} - -static int -alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) -{ - struct bounce_zone *bz; - int count; - - bz = dmat->bounce_zone; - count = 0; - while (numpages > 0) { - struct bounce_page *bpage; - - bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, - M_NOWAIT | M_ZERO); - - if (bpage == NULL) - break; - bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, - M_NOWAIT, 0ul, - bz->lowaddr, - PAGE_SIZE, - 0); - if (bpage->vaddr == 0) { - free(bpage, M_DEVBUF); - break; - } - bpage->busaddr = pmap_kextract(bpage->vaddr); - mtx_lock(&bounce_lock); - STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); - total_bpages++; - bz->total_bpages++; - bz->free_bpages++; - mtx_unlock(&bounce_lock); - count++; - numpages--; - } - return (count); -} - -static int -reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) -{ - struct bounce_zone *bz; - int pages; - - mtx_assert(&bounce_lock, MA_OWNED); - bz = dmat->bounce_zone; - pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); - if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) - return (map->pagesneeded - (map->pagesreserved + pages)); - bz->free_bpages -= pages; - bz->reserved_bpages += pages; - map->pagesreserved += pages; - pages = map->pagesneeded - map->pagesreserved; - - return (pages); -} - -static bus_addr_t -add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, - bus_addr_t addr, bus_size_t size) -{ - struct bounce_zone *bz; - struct bounce_page *bpage; - - KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); - KASSERT(map != NULL && map != &nobounce_dmamap && map != &contig_dmamap, - ("add_bounce_page: bad map %p", map)); - - bz = dmat->bounce_zone; - if (map->pagesneeded == 0) - panic("add_bounce_page: map doesn't need any pages"); - map->pagesneeded--; - - if (map->pagesreserved == 0) - panic("add_bounce_page: map doesn't need any pages"); - map->pagesreserved--; - - mtx_lock(&bounce_lock); - bpage = STAILQ_FIRST(&bz->bounce_page_list); - if (bpage == NULL) - panic("add_bounce_page: free page list is empty"); - - STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); - bz->reserved_bpages--; - bz->active_bpages++; - mtx_unlock(&bounce_lock); - - if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { - /* Page offset needs to be preserved. */ - bpage->vaddr |= vaddr & PAGE_MASK; - bpage->busaddr |= vaddr & PAGE_MASK; - } - bpage->datavaddr = vaddr; - bpage->dataaddr = addr; - bpage->datacount = size; - STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); - return (bpage->busaddr); -} - -static void -free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) -{ - struct bus_dmamap *map; - struct bounce_zone *bz; - - bz = dmat->bounce_zone; - bpage->datavaddr = 0; - bpage->datacount = 0; - if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { - /* - * Reset the bounce page to start at offset 0. Other uses - * of this bounce page may need to store a full page of - * data and/or assume it starts on a page boundary. - */ - bpage->vaddr &= ~PAGE_MASK; - bpage->busaddr &= ~PAGE_MASK; - } - - mtx_lock(&bounce_lock); - STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); - bz->free_bpages++; - bz->active_bpages--; - if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { - if (reserve_bounce_pages(map->dmat, map, 1) == 0) { - STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); - STAILQ_INSERT_TAIL(&bounce_map_callbacklist, - map, links); - busdma_swi_pending = 1; - bz->total_deferred++; - swi_sched(vm_ih, 0); - } - } - mtx_unlock(&bounce_lock); -} - -void -busdma_swi(void) -{ - bus_dma_tag_t dmat; - struct bus_dmamap *map; - - mtx_lock(&bounce_lock); - while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { - STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); - mtx_unlock(&bounce_lock); - dmat = map->dmat; - (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); - bus_dmamap_load_mem(map->dmat, map, &map->mem, - map->callback, map->callback_arg, - BUS_DMA_WAITOK); - (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); - mtx_lock(&bounce_lock); - } - mtx_unlock(&bounce_lock); + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->map_sync(dmat, map, op); }