Index: amd64/amd64/local_apic.c =================================================================== RCS file: /usr/cvs/src/sys/amd64/amd64/local_apic.c,v retrieving revision 1.32 diff -u -r1.32 local_apic.c --- amd64/amd64/local_apic.c 10 Oct 2006 23:23:11 -0000 1.32 +++ amd64/amd64/local_apic.c 9 Nov 2006 17:26:15 -0000 @@ -744,6 +744,65 @@ panic("Couldn't find an APIC vector for IRQ %u", irq); } +/* + * Request 'count' free contiguous IDT vectors to be used by 'count' + * IRQs. 'count' must be a power of two and the vectors will be + * aligned on a boundary of 'align'. If the request cannot be + * satisfied, 0 is returned. + */ +u_int +apic_alloc_vectors(u_int *irqs, u_int count, u_int align) +{ + u_int first, run, vector; + + KASSERT(powerof2(count), ("bad count")); + KASSERT(powerof2(align), ("bad align")); + KASSERT(align >= count, ("align < count")); +#ifdef INVARIANTS + for (run = 0; run < count; run++) + KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", + irqs[run], run)); +#endif + + /* + * Search for 'count' free vectors. As with apic_alloc_vector(), + * this just uses a simple first fit algorithm. + */ + run = 0; + first = 0; + mtx_lock_spin(&icu_lock); + for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { + + /* Vector is in use, end run. */ + if (ioint_irqs[vector] != 0) { + run = 0; + first = 0; + continue; + } + + /* Start a new run if run == 0 and vector is aligned. */ + if (run == 0) { + if ((vector & (align - 1)) != 0) + continue; + first = vector; + } + run++; + + /* Keep looping if the run isn't long enough yet. */ + if (run < count) + continue; + + /* Found a run, assign IRQs and return the first vector. */ + for (vector = 0; vector < count; vector++) + ioint_irqs[first + vector] = irqs[vector]; + mtx_unlock_spin(&icu_lock); + return (first + APIC_IO_INTS); + } + mtx_unlock_spin(&icu_lock); + printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); + return (0); +} + void apic_enable_vector(u_int vector) { @@ -1002,6 +1061,9 @@ intr_register_pic(&lapic_pic); if (bootverbose) lapic_dump("BSP"); + + /* Enable the MSI "pic". */ + msi_init(); } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL) Index: amd64/amd64/mptable_pci.c =================================================================== RCS file: /usr/cvs/src/sys/amd64/amd64/mptable_pci.c,v retrieving revision 1.4 diff -u -r1.4 mptable_pci.c --- amd64/amd64/mptable_pci.c 6 Jan 2006 19:22:18 -0000 1.4 +++ amd64/amd64/mptable_pci.c 9 Nov 2006 17:26:15 -0000 @@ -96,6 +96,10 @@ DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), { 0, 0 } }; @@ -148,6 +152,10 @@ DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), {0, 0} }; Index: amd64/amd64/msi.c =================================================================== RCS file: amd64/amd64/msi.c diff -N amd64/amd64/msi.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ amd64/amd64/msi.c 9 Nov 2006 17:21:21 -0000 @@ -0,0 +1,538 @@ +/*- + * Copyright (c) 2006 John Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Support for PCI Message Signalled Interrupts (MSI). MSI interrupts on + * x86 are basically APIC messages that the northbridge delivers directly + * to the local APICs as if they had come from an I/O APIC. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Fields in address for Intel MSI messages. */ +#define MSI_INTEL_ADDR_DEST 0x000ff000 +#define MSI_INTEL_ADDR_RH 0x00000008 +# define MSI_INTEL_ADDR_RH_ON 0x00000008 +# define MSI_INTEL_ADDR_RH_OFF 0x00000000 +#define MSI_INTEL_ADDR_DM 0x00000004 +# define MSI_INTEL_ADDR_DM_PHYSICAL 0x00000000 +# define MSI_INTEL_ADDR_DM_LOGICAL 0x00000004 + +/* Fields in data for Intel MSI messages. */ +#define MSI_INTEL_DATA_TRGRMOD IOART_TRGRMOD /* Trigger mode. */ +# define MSI_INTEL_DATA_TRGREDG IOART_TRGREDG +# define MSI_INTEL_DATA_TRGRLVL IOART_TRGRLVL +#define MSI_INTEL_DATA_LEVEL 0x00004000 /* Polarity. */ +# define MSI_INTEL_DATA_DEASSERT 0x00000000 +# define MSI_INTEL_DATA_ASSERT 0x00004000 +#define MSI_INTEL_DATA_DELMOD IOART_DELMOD /* Delivery mode. */ +# define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED +# define MSI_INTEL_DATA_DELLOPRI IOART_DELLOPRI +# define MSI_INTEL_DATA_DELSMI IOART_DELSMI +# define MSI_INTEL_DATA_DELNMI IOART_DELNMI +# define MSI_INTEL_DATA_DELINIT IOART_DELINIT +# define MSI_INTEL_DATA_DELEXINT IOART_DELEXINT +#define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ + +/* + * Build Intel MSI message and data values from a source. AMD64 systems + * seem to be compatible, so we use the same function for both. + */ +#define INTEL_ADDR(msi) \ + (MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 | \ + MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL) +#define INTEL_DATA(msi) \ + (MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector) + +static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI"); + +/* + * MSI sources are bunched into groups. This is because MSI forces + * all of the messages to share the address and data registers and + * thus certain properties (such as the local APIC ID target on x86). + * Each group has a 'first' source that contains information global to + * the group. These fields are marked with (g) below. + * + * Note that local APIC ID is kind of special. Each message will be + * assigned an ID by the system; however, a group will use the ID from + * the first message. + * + * For MSI-X, each message is isolated, and msi_index indicates the + * index of this message in the device's MSI-X table. + */ +struct msi_intsrc { + struct intsrc msi_intsrc; + device_t msi_dev; /* Owning device. (g) */ + struct msi_intsrc *msi_first; /* First source in group. */ + u_int msi_irq; /* IRQ cookie. */ + u_int msi_index; /* Index of this message. */ + u_int msi_msix; /* MSI-X message. */ + u_int msi_vector:8; /* IDT vector. */ + u_int msi_cpu:8; /* Local APIC ID. (g) */ + u_int msi_count:8; /* Messages in this group. (g) */ +}; + +static void msi_enable_source(struct intsrc *isrc); +static void msi_disable_source(struct intsrc *isrc, int eoi); +static void msi_eoi_source(struct intsrc *isrc); +static void msi_enable_intr(struct intsrc *isrc); +static int msi_vector(struct intsrc *isrc); +static int msi_source_pending(struct intsrc *isrc); +static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol); +static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id); +static void msix_enable_intr(struct intsrc *isrc); +static int msix_source_pending(struct intsrc *isrc); +static void msix_assign_cpu(struct intsrc *isrc, u_int apic_id); + +struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, + msi_enable_intr, msi_vector, msi_source_pending, + NULL, NULL, msi_config_intr, msi_assign_cpu }; +struct pic msix_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, + msix_enable_intr, msi_vector, msix_source_pending, + NULL, NULL, msi_config_intr, msix_assign_cpu }; + +static int msi_enabled; +static struct sx msi_sx; + +static void +msi_enable_source(struct intsrc *isrc) +{ +} + +static void +msi_disable_source(struct intsrc *isrc, int eoi) +{ + + if (eoi == PIC_EOI) + lapic_eoi(); +} + +static void +msi_eoi_source(struct intsrc *isrc) +{ + + lapic_eoi(); +} + +static void +msi_enable_intr(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + /* + * Since we can only enable the entire group at once, go ahead and + * enable the messages when the first message is given a handler. + * Note that we assume all devices will register a handler for the + * first message. + */ + if (msi->msi_index == 0) { + mtx_lock_spin(&icu_lock); + pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); + } + apic_enable_vector(msi->msi_vector); +} + +static int +msi_vector(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + return (msi->msi_irq); +} + +static int +msi_source_pending(struct intsrc *isrc) +{ + + return (0); +} + +static int +msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol) +{ + + return (ENODEV); +} + +static void +msi_assign_cpu(struct intsrc *isrc, u_int apic_id) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + msi->msi_cpu = apic_id; + if (bootverbose) + printf("msi: Assigning MSI IRQ %d to local APIC %u\n", + msi->msi_irq, msi->msi_cpu); + mtx_lock_spin(&icu_lock); + if (isrc->is_enabled) + pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); +} + +static void +msix_enable_intr(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + mtx_lock_spin(&icu_lock); + pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi), + INTEL_DATA(msi)); + pci_unmask_msix(msi->msi_dev, msi->msi_index); + mtx_unlock_spin(&icu_lock); + apic_enable_vector(msi->msi_vector); +} + +static int +msix_source_pending(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + return (pci_pending_msix(msi->msi_dev, msi->msi_index)); +} + +static void +msix_assign_cpu(struct intsrc *isrc, u_int apic_id) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + msi->msi_cpu = apic_id; + if (bootverbose) + printf("msi: Assigning MSI IRQ %d to local APIC %u\n", + msi->msi_irq, msi->msi_cpu); + mtx_lock_spin(&icu_lock); + if (isrc->is_enabled) + pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi), + INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); +} + +void +msi_init(void) +{ + + /* Check if we have a supported CPU. */ + if (!(strcmp(cpu_vendor, "GenuineIntel") == 0 || + strcmp(cpu_vendor, "AuthenticAMD") == 0)) + return; + + msi_enabled = 1; + intr_register_pic(&msi_pic); + intr_register_pic(&msix_pic); + sx_init(&msi_sx, "msi"); +} + +/* + * Try to allocate 'count' interrupt sources with contiguous IDT values. If + * we allocate any new sources, then their IRQ values will be at the end of + * the irqs[] array, with *newirq being the index of the first new IRQ value + * and *newcount being the number of new IRQ values added. + */ +int +msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq, + int *newcount) +{ + struct msi_intsrc *msi, *fsrc; + int cnt, i, j, vector; + + *newirq = 0; + *newcount = 0; + if (!msi_enabled) + return (ENXIO); + + sx_xlock(&msi_sx); + + /* DBG */ + device_printf(dev, "msi_alloc(%d, %d)\n", count, maxcount); + + /* Try to find 'count' free IRQs. */ + cnt = 0; + for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(i); + + /* End of allocated sources, so break. */ + if (msi == NULL) + break; + + /* If this is a free one, save its IRQ in the array. */ + if (msi->msi_dev == NULL) { + irqs[cnt] = i; + cnt++; + if (cnt == count) + break; + } + } + + /* Do we need to create some new sources? */ + if (cnt < count) { + /* If we would exceed the max, give up. */ + if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* DBG */ + device_printf(dev, "creating %d MSI sources at IRQ %d\n", + count - cnt, i); + + /* We need count - cnt more sources starting at index 'cnt'. */ + *newirq = cnt; + *newcount = count - cnt; + for (j = 0; j < count - cnt; j++) { + + /* Create a new MSI source. */ + msi = malloc(sizeof(struct msi_intsrc), M_MSI, + M_WAITOK | M_ZERO); + msi->msi_intsrc.is_pic = &msi_pic; + msi->msi_irq = i + j; + intr_register_source(&msi->msi_intsrc); + + /* Add it to our array. */ + irqs[cnt] = i + j; + cnt++; + } + } + + /* Ok, we now have the IRQs allocated. */ + KASSERT(cnt == count, ("count mismatch")); + + /* DBG */ + device_printf(dev, "trying to use IRQs %u", irqs[0]); + for (i = 1; i < count; i++) + printf(", %u", irqs[i]); + printf(" for MSI\n"); + + /* Allocate 'count' IDT vectors. */ + vector = apic_alloc_vectors(irqs, count, maxcount); + if (vector == 0) { + sx_xunlock(&msi_sx); + return (ENOSPC); + } + + /* DBG */ + device_printf(dev, "allocated %d vectors starting at %d for MSI\n", + count, vector); + + /* Assign IDT vectors and make these messages owned by 'dev'. */ + fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]); + for (i = 0; i < count; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); + msi->msi_intsrc.is_pic = &msi_pic; + msi->msi_dev = dev; + msi->msi_vector = vector + i; + msi->msi_index = i; + msi->msi_first = fsrc; + + /* XXX: Somewhat gross. */ + msi->msi_intsrc.is_enabled = 0; + } + fsrc->msi_count = count; + sx_xunlock(&msi_sx); + + return (0); +} + +int +msi_release(int *irqs, int count) +{ + struct msi_intsrc *msi, *first; + int i; + + sx_xlock(&msi_sx); + first = (struct msi_intsrc *)intr_lookup_source(irqs[0]); + if (first == NULL) { + sx_xunlock(&msi_sx); + return (ENOENT); + } + + /* Make sure this isn't an MSI-X message. */ + if (first->msi_msix) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + + /* Make sure this message is allocated to a group. */ + if (first->msi_first == NULL) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* + * Make sure this is the start of a group and that we are releasing + * the entire group. + */ + if (first->msi_first != first || first->msi_count != count) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + KASSERT(first->msi_index == 0, ("index mismatch")); + + KASSERT(first->msi_dev != NULL, ("unowned group")); + + /* Clear all the extra messages in the group. */ + for (i = 1; i < count; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); + KASSERT(msi->msi_first == first, ("message not in group")); + KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch")); + msi->msi_first = NULL; + msi->msi_dev = NULL; + apic_free_vector(msi->msi_vector, msi->msi_irq); + msi->msi_vector = 0; + msi->msi_index = 0; + } + + /* Clear out the first message. */ + first->msi_first = NULL; + first->msi_dev = NULL; + apic_free_vector(first->msi_vector, first->msi_irq); + first->msi_vector = 0; + first->msi_count = 0; + + sx_xunlock(&msi_sx); + return (0); +} + +int +msix_alloc(device_t dev, int index, int *irq, int *new) +{ + struct msi_intsrc *msi; + int i, vector; + + *new = 0; + if (!msi_enabled) + return (ENXIO); + + sx_xlock(&msi_sx); + + /* DBG */ + device_printf(dev, "msix_alloc(%d)\n", index); + + /* Find a free IRQ. */ + for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(i); + + /* End of allocated sources, so break. */ + if (msi == NULL) + break; + + /* If this is a free one, start or continue a run. */ + if (msi->msi_dev == NULL) + break; + } + + /* Do we need to create a new source? */ + if (msi == NULL) { + /* If we would exceed the max, give up. */ + if (i + 1 > FIRST_MSI_INT + NUM_MSI_INTS) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* DBG */ + device_printf(dev, "creating MSI-X source at IRQ %d\n", i); + + /* Create a new source. */ + *new = 1; + msi = malloc(sizeof(struct msi_intsrc), M_MSI, + M_WAITOK | M_ZERO); + msi->msi_intsrc.is_pic = &msix_pic; + msi->msi_irq = i; + intr_register_source(&msi->msi_intsrc); + } + + /* Allocate an IDT vector. */ + vector = apic_alloc_vector(i); + + /* DBG */ + device_printf(dev, "allocated vector %d for MSI-X irq %d\n", + vector, i); + + /* Setup source. */ + msi->msi_intsrc.is_pic = &msix_pic; + msi->msi_dev = dev; + msi->msi_vector = vector; + msi->msi_index = index; + msi->msi_msix = 1; + + /* XXX: Somewhat gross. */ + msi->msi_intsrc.is_enabled = 0; + sx_xunlock(&msi_sx); + + *irq = i; + return (0); +} + +int +msix_release(int irq) +{ + struct msi_intsrc *msi; + + sx_xlock(&msi_sx); + msi = (struct msi_intsrc *)intr_lookup_source(irq); + if (msi == NULL) { + sx_xunlock(&msi_sx); + return (ENOENT); + } + + /* Make sure this is an MSI-X message. */ + if (!msi->msi_msix) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + + KASSERT(msi->msi_dev != NULL, ("unowned message")); + + /* Clear out the message. */ + msi->msi_dev = NULL; + apic_free_vector(msi->msi_vector, msi->msi_irq); + msi->msi_vector = 0; + msi->msi_index = 0; + msi->msi_msix = 0; + + sx_xunlock(&msi_sx); + return (0); +} Index: amd64/amd64/nexus.c =================================================================== RCS file: /usr/cvs/src/sys/amd64/amd64/nexus.c,v retrieving revision 1.69 diff -u -r1.69 nexus.c --- amd64/amd64/nexus.c 11 Sep 2006 19:31:51 -0000 1.69 +++ amd64/amd64/nexus.c 9 Nov 2006 17:26:15 -0000 @@ -61,6 +61,8 @@ #include +#include "pcib_if.h" + #ifdef DEV_ISA #include #include @@ -100,6 +102,10 @@ static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long); static int nexus_get_resource(device_t, device_t, int, int, u_long *, u_long *); static void nexus_delete_resource(device_t, device_t, int, int); +static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs); +static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs); +static int nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq); +static int nexus_release_msix(device_t pcib, device_t dev, int irq); static device_method_t nexus_methods[] = { /* Device interface */ @@ -125,6 +131,12 @@ DEVMETHOD(bus_get_resource, nexus_get_resource), DEVMETHOD(bus_delete_resource, nexus_delete_resource), + /* pcib interface */ + DEVMETHOD(pcib_alloc_msi, nexus_alloc_msi), + DEVMETHOD(pcib_release_msi, nexus_release_msi), + DEVMETHOD(pcib_alloc_msix, nexus_alloc_msix), + DEVMETHOD(pcib_release_msix, nexus_release_msix), + { 0, 0 } }; @@ -504,6 +516,50 @@ resource_list_delete(rl, type, rid); } +static int +nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq) +{ + int error, new; + + error = msix_alloc(dev, index, irq, &new); + if (new) { + /* DBG */ + device_printf(pcib, "adding IRQ %u to rman\n", *irq); + rman_manage_region(&irq_rman, *irq, *irq); + } + return (error); +} + +static int +nexus_release_msix(device_t pcib, device_t dev, int irq) +{ + + return (msix_release(irq)); +} + +static int +nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) +{ + int error, i, newirq, newcount; + + /* First alloc the messages. */ + error = msi_alloc(dev, count, maxcount, irqs, &newirq, &newcount); + + /* Always add any new IRQs to the rman, even on failure. */ + for (i = 0; i < newcount; i++) + rman_manage_region(&irq_rman, irqs[newirq + i], + irqs[newirq + i]); + + return (error); +} + +static int +nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs) +{ + + return (msi_release(irqs, count)); +} + #ifdef DEV_ISA /* * Placeholder which claims PnP 'devices' which describe system Index: amd64/include/apicvar.h =================================================================== RCS file: /usr/cvs/src/sys/amd64/include/apicvar.h,v retrieving revision 1.19 diff -u -r1.19 apicvar.h --- amd64/include/apicvar.h 10 Oct 2006 23:23:11 -0000 1.19 +++ amd64/include/apicvar.h 9 Nov 2006 17:26:15 -0000 @@ -175,6 +175,7 @@ IDTVEC(apic_isr7), IDTVEC(spuriousint), IDTVEC(timerint); u_int apic_alloc_vector(u_int irq); +u_int apic_alloc_vectors(u_int *irqs, u_int count, u_int align); void apic_enable_vector(u_int vector); void apic_free_vector(u_int vector, u_int irq); u_int apic_idt_to_irq(u_int vector); Index: amd64/include/intr_machdep.h =================================================================== RCS file: /usr/cvs/src/sys/amd64/include/intr_machdep.h,v retrieving revision 1.11 diff -u -r1.11 intr_machdep.h --- amd64/include/intr_machdep.h 10 Oct 2006 23:23:11 -0000 1.11 +++ amd64/include/intr_machdep.h 9 Nov 2006 17:26:15 -0000 @@ -43,11 +43,18 @@ * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * - * For now we stick with 255 as ISA IRQs and PCI intline IRQs only allow - * for IRQs in the range 0 - 254. When MSI support is added this number - * will likely increase. + * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. + * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid + * confusion since 255 is used in PCI to indicate an invalid IRQ. */ -#define NUM_IO_INTS 255 +#define NUM_MSI_INTS 128 +#define FIRST_MSI_INT 256 +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) + +/* + * Default base address for MSI messages on x86 platforms. + */ +#define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. @@ -140,6 +147,12 @@ void intr_resume(void); void intr_suspend(void); void intrcnt_add(const char *name, u_long **countp); +void msi_init(void); +int msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq, + int *newcount); +int msi_release(int *irqs, int count); +int msix_alloc(device_t dev, int index, int *irq, int *new); +int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ Index: amd64/pci/pci_bus.c =================================================================== RCS file: /usr/cvs/src/sys/amd64/pci/pci_bus.c,v retrieving revision 1.117 diff -u -r1.117 pci_bus.c --- amd64/pci/pci_bus.c 13 Mar 2006 23:58:40 -0000 1.117 +++ amd64/pci/pci_bus.c 9 Nov 2006 17:26:15 -0000 @@ -322,6 +322,10 @@ DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), { 0, 0 } }; Index: conf/files.amd64 =================================================================== RCS file: /usr/cvs/src/sys/conf/files.amd64,v retrieving revision 1.98 diff -u -r1.98 files.amd64 --- conf/files.amd64 29 Oct 2006 14:02:39 -0000 1.98 +++ conf/files.amd64 9 Nov 2006 17:26:15 -0000 @@ -114,6 +114,7 @@ amd64/amd64/mpboot.S optional smp amd64/amd64/mptable.c optional mptable amd64/amd64/mptable_pci.c optional mptable pci +amd64/amd64/msi.c optional pci amd64/amd64/nexus.c standard amd64/amd64/pmap.c standard amd64/amd64/prof_machdep.c optional profiling-routine Index: conf/files.i386 =================================================================== RCS file: /usr/cvs/src/sys/conf/files.i386,v retrieving revision 1.570 diff -u -r1.570 files.i386 --- conf/files.i386 29 Oct 2006 14:02:39 -0000 1.570 +++ conf/files.i386 9 Nov 2006 17:26:15 -0000 @@ -294,6 +294,7 @@ i386/i386/mpboot.s optional smp i386/i386/mptable.c optional apic i386/i386/mptable_pci.c optional apic pci +i386/i386/msi.c optional apic pci i386/i386/nexus.c standard i386/i386/perfmon.c optional perfmon i386/i386/pmap.c standard Index: dev/acpica/acpi_pcib_acpi.c =================================================================== RCS file: /usr/cvs/src/sys/dev/acpica/acpi_pcib_acpi.c,v retrieving revision 1.50 diff -u -r1.50 acpi_pcib_acpi.c --- dev/acpica/acpi_pcib_acpi.c 6 Jan 2006 19:22:18 -0000 1.50 +++ dev/acpica/acpi_pcib_acpi.c 9 Nov 2006 17:26:15 -0000 @@ -103,6 +103,10 @@ DEVMETHOD(pcib_read_config, acpi_pcib_read_config), DEVMETHOD(pcib_write_config, acpi_pcib_write_config), DEVMETHOD(pcib_route_interrupt, acpi_pcib_acpi_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), {0, 0} }; Index: dev/acpica/acpi_pcib_pci.c =================================================================== RCS file: /usr/cvs/src/sys/dev/acpica/acpi_pcib_pci.c,v retrieving revision 1.14 diff -u -r1.14 acpi_pcib_pci.c --- dev/acpica/acpi_pcib_pci.c 6 Jan 2006 19:22:18 -0000 1.14 +++ dev/acpica/acpi_pcib_pci.c 9 Nov 2006 17:26:15 -0000 @@ -93,6 +93,10 @@ DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, acpi_pcib_pci_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), {0, 0} }; Index: dev/bce/if_bce.c =================================================================== RCS file: /usr/cvs/src/sys/dev/bce/if_bce.c,v retrieving revision 1.18 diff -u -r1.18 if_bce.c --- dev/bce/if_bce.c 31 Oct 2006 03:28:25 -0000 1.18 +++ dev/bce/if_bce.c 9 Nov 2006 17:26:15 -0000 @@ -485,7 +485,12 @@ sc->bce_vhandle = (vm_offset_t) rman_get_virtual(sc->bce_res); /* Allocate PCI IRQ resources. */ - rid = 0; + rid = 1; + if (pci_alloc_msi(dev, &rid) == 0) { + rid = 1; + sc->bce_flags |= BCE_USING_MSI_FLAG; + } else + rid = 0; sc->bce_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); @@ -2539,9 +2544,12 @@ if (sc->bce_irq != NULL) bus_release_resource(dev, SYS_RES_IRQ, - 0, + sc->bce_flags & BCE_USING_MSI_FLAG ? 1 : 0, sc->bce_irq); + if (sc->bce_flags & BCE_USING_MSI_FLAG) + pci_release_msi(dev); + if (sc->bce_res != NULL) bus_release_resource(dev, SYS_RES_MEMORY, Index: dev/em/if_em.c =================================================================== RCS file: /usr/cvs/src/sys/dev/em/if_em.c,v retrieving revision 1.163 diff -u -r1.163 if_em.c --- dev/em/if_em.c 9 Nov 2006 16:00:18 -0000 1.163 +++ dev/em/if_em.c 9 Nov 2006 17:26:15 -0000 @@ -2200,7 +2200,12 @@ rman_get_bushandle(adapter->flash_mem); } - rid = 0x0; + val = 1; + if (pci_alloc_msi(dev, &val) == 0) { + rid = 1; + adapter->msi = 1; + } else + rid = 0; adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res_interrupt == NULL) { @@ -2279,7 +2284,11 @@ device_t dev = adapter->dev; if (adapter->res_interrupt != NULL) - bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt); + bus_release_resource(dev, SYS_RES_IRQ, adapter->msi ? 1 : 0, + adapter->res_interrupt); + + if (adapter->msi) + pci_release_msi(dev); if (adapter->res_memory != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), Index: dev/em/if_em.h =================================================================== RCS file: /usr/cvs/src/sys/dev/em/if_em.h,v retrieving revision 1.55 diff -u -r1.55 if_em.h --- dev/em/if_em.h 9 Nov 2006 16:00:18 -0000 1.55 +++ dev/em/if_em.h 9 Nov 2006 17:26:15 -0000 @@ -288,6 +288,7 @@ struct callout tx_fifo_timer; int watchdog_timer; int io_rid; + int msi; int if_flags; struct mtx mtx; int em_insert_vlan_header; Index: dev/pci/pci.c =================================================================== RCS file: /usr/cvs/src/sys/dev/pci/pci.c,v retrieving revision 1.320 diff -u -r1.320 pci.c --- dev/pci/pci.c 7 Nov 2006 18:55:51 -0000 1.320 +++ dev/pci/pci.c 9 Nov 2006 18:37:28 -0000 @@ -52,6 +52,10 @@ #include #include +#if defined(__i386__) || defined(__amd64__) +#include +#endif + #include #include #include @@ -140,6 +144,8 @@ DEVMETHOD(pci_set_powerstate, pci_set_powerstate_method), DEVMETHOD(pci_assign_interrupt, pci_assign_interrupt_method), DEVMETHOD(pci_find_extcap, pci_find_extcap_method), + DEVMETHOD(pci_alloc_msi, pci_alloc_msi_method), + DEVMETHOD(pci_release_msi, pci_release_msi_method), { 0, 0 } }; @@ -207,6 +213,16 @@ &pci_do_power_resume, 1, "Transition from D3 -> D0 on resume."); +static int pci_do_msi = 1; +TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi); +SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1, + "Enable support for MSI interrupts"); + +static int pci_do_msix = 1; +TUNABLE_INT("hw.pci.enable_msix", &pci_do_msi); +SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1, + "Enable support for MSI-X interrupts"); + /* Find a device_t by bus/slot/function */ device_t @@ -429,6 +445,11 @@ pci_read_extcap(device_t pcib, pcicfgregs *cfg) { #define REG(n, w) PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w) +#define WREG(n, v, w) PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w) +#if defined(__i386__) || defined(__amd64__) + uint64_t addr; +#endif + uint32_t val; int ptr, nextptr, ptrptr; switch (cfg->hdrtype & PCIM_HDRTYPE) { @@ -468,15 +489,49 @@ cfg->pp.pp_data = ptr + PCIR_POWER_DATA; } break; +#if defined(__i386__) || defined(__amd64__) + case PCIY_HT: /* HyperTransport */ + /* Determine HT-specific capability type. */ + val = REG(ptr + PCIR_HT_COMMAND, 2); + switch (val & PCIM_HTCMD_CAP_MASK) { + case PCIM_HTCAP_MSI_MAPPING: + /* Sanity check the mapping window. */ + addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4); + addr <<= 32; + addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4); + if (addr != MSI_INTEL_ADDR_BASE) + device_printf(pcib, + "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n", + cfg->bus, cfg->slot, cfg->func, + (long long)addr); + + /* Enable MSI -> HT mapping. */ + val |= PCIM_HTCMD_MSI_ENABLE; + WREG(ptr + PCIR_HT_COMMAND, val, 2); + break; + } + break; +#endif case PCIY_MSI: /* PCI MSI */ + cfg->msi.msi_location = ptr; cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2); - if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) - cfg->msi.msi_data = PCIR_MSI_DATA_64BIT; - else - cfg->msi.msi_data = PCIR_MSI_DATA; cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl & PCIM_MSICTRL_MMC_MASK)>>1); break; + case PCIY_MSIX: /* PCI MSI-X */ + cfg->msix.msix_location = ptr; + cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2); + cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl & + PCIM_MSIXCTRL_TABLE_SIZE) + 1; + val = REG(ptr + PCIR_MSIX_TABLE, 4); + cfg->msix.msix_table_bar = PCIR_BAR(val & + PCIM_MSIX_BIR_MASK); + cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK; + val = REG(ptr + PCIR_MSIX_PBA, 4); + cfg->msix.msix_pba_bar = PCIR_BAR(val & + PCIM_MSIX_BIR_MASK); + cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK; + break; case PCIY_VPD: /* PCI Vital Product Data */ cfg->vpd.vpd_reg = ptr; pci_read_vpd(pcib, cfg); @@ -485,7 +540,7 @@ break; } } -/* REG use carry through to next functions */ +/* REG and WREG use carry through to next functions */ } /* @@ -494,7 +549,6 @@ static uint32_t pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg) { -#define WREG(n, v, w) PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w) KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned")); @@ -519,7 +573,6 @@ return; } #endif -#undef WREG struct vpd_readstate { device_t pcib; @@ -781,6 +834,7 @@ cfg->vpd.vpd_ros = NULL; } #undef REG +#undef WREG } int @@ -869,6 +923,360 @@ return (ENOENT); } +/* + * Support for MSI-X message interrupts. + */ +void +pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + pcicfgregs *cfg = &dinfo->cfg; + uint32_t offset; + + KASSERT(cfg->msix.msix_alloc > index, ("bogus index")); + offset = cfg->msix.msix_table_offset + index * 16; + bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff); + bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32); + bus_write_4(cfg->msix.msix_table_res, offset + 8, data); + + /* DBG */ + device_printf(dev, "Enabled MSI %d addr %lx data %x\n", index, + (u_long)address, data); +} + +void +pci_mask_msix(device_t dev, u_int index) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + pcicfgregs *cfg = &dinfo->cfg; + uint32_t offset, val; + + KASSERT(cfg->msix.msix_msgnum > index, ("bogus index")); + offset = cfg->msix.msix_table_offset + index * 16 + 12; + val = bus_read_4(cfg->msix.msix_table_res, offset); + if (!(val & PCIM_MSIX_VCTRL_MASK)) { + val |= PCIM_MSIX_VCTRL_MASK; + bus_write_4(cfg->msix.msix_table_res, offset, val); + } +} + +void +pci_unmask_msix(device_t dev, u_int index) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + pcicfgregs *cfg = &dinfo->cfg; + uint32_t offset, val; + + KASSERT(cfg->msix.msix_alloc > index, ("bogus index")); + offset = cfg->msix.msix_table_offset + index * 16 + 12; + val = bus_read_4(cfg->msix.msix_table_res, offset); + if (val & PCIM_MSIX_VCTRL_MASK) { + val &= ~PCIM_MSIX_VCTRL_MASK; + bus_write_4(cfg->msix.msix_table_res, offset, val); + } +} + +int +pci_pending_msix(device_t dev, u_int index) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + pcicfgregs *cfg = &dinfo->cfg; + uint32_t offset, bit; + + KASSERT(cfg->msix.msix_alloc > index, ("bogus index")); + offset = cfg->msix.msix_pba_offset + (index / 4) * 4; + bit = 1 << index % 32; + return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit); +} + +static int +pci_alloc_msix(device_t dev, device_t child, int *count) +{ + struct pci_devinfo *dinfo = device_get_ivars(child); + pcicfgregs *cfg = &dinfo->cfg; + struct resource_list_entry *rle; + int actual, error, i, irq, max; + + /* MSI-X capability present? */ + if (cfg->msix.msix_location == 0 || !pci_do_msix) + return (ENODEV); + + /* Make sure the appropriate BARs are mapped. */ + rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, + cfg->msix.msix_table_bar); + if (rle == NULL || rle->res == NULL || + !(rman_get_flags(rle->res) & RF_ACTIVE)) + return (ENXIO); + cfg->msix.msix_table_res = rle->res; + if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) { + rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, + cfg->msix.msix_pba_bar); + if (rle == NULL || rle->res == NULL || + !(rman_get_flags(rle->res) & RF_ACTIVE)) + return (ENXIO); + } + cfg->msix.msix_pba_res = rle->res; + + /* Already have allocated messages? */ + if (cfg->msix.msix_alloc != 0) + return (ENXIO); + + max = min(*count, cfg->msix.msix_msgnum); + for (i = 0; i < max; i++) { + /* Allocate a message. */ + error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i, + &irq); + /* DBG */ + if (error) + device_printf(child, "allocing MSI-X IRQ failed\n"); + else + device_printf(child, "allocd MSI-X IRQ %d at rid %d\n", + irq, i + 1); + if (error) + break; + resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq, + irq, 1); + } + actual = i; + + /* Mask all vectors. */ + for (i = 0; i < cfg->msix.msix_msgnum; i++) + pci_mask_msix(child, i); + + /* Update control register to enable MSI-X. */ + cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; + pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL, + cfg->msix.msix_ctrl, 2); + + /* Update counts of alloc'd messages. */ + cfg->msix.msix_alloc = actual; + *count = actual; + return (0); +} + +static int +pci_release_msix(device_t dev, device_t child) +{ + struct pci_devinfo *dinfo = device_get_ivars(child); + pcicfgregs *cfg = &dinfo->cfg; + struct resource_list_entry *rle; + int i; + + /* Do we have any messages to release? */ + if (cfg->msix.msix_alloc == 0) + return (ENODEV); + + /* Make sure none of the resources are allocated. */ + for (i = 0; i < cfg->msix.msix_alloc; i++) { + rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); + KASSERT(rle != NULL, ("missing MSI resource")); + if (rle->res != NULL) + return (EBUSY); + } + + /* Update control register with to disable MSI-X. */ + cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE; + pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL, + cfg->msix.msix_ctrl, 2); + + /* Release the messages. */ + for (i = 0; i < cfg->msix.msix_alloc; i++) { + rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); + PCIB_RELEASE_MSIX(device_get_parent(dev), child, + rle->start); + resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1); + } + + /* Update alloc count. */ + cfg->msix.msix_alloc = 0; + return (0); +} + +/* + * Support for MSI message signalled interrupts. + */ +void +pci_enable_msi(device_t dev, uint64_t address, uint16_t data) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + pcicfgregs *cfg = &dinfo->cfg; + + /* Write data and address values. */ + cfg->msi.msi_addr = address; + cfg->msi.msi_data = data; + pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR, + address & 0xffffffff, 4); + if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) { + pci_write_config(dev, cfg->msi.msi_location + + PCIR_MSI_ADDR_HIGH, address >> 32, 4); + pci_write_config(dev, cfg->msi.msi_location + + PCIR_MSI_DATA_64BIT, data, 2); + } else + pci_write_config(dev, cfg->msi.msi_location + + PCIR_MSI_DATA, data, 2); + + /* Enable MSI in the control register. */ + cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE; + pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL, + cfg->msi.msi_ctrl, 2); +} + +/* + * Restore MSI registers during resume. If MSI is enabled then + * restore the data and address registers in addition to the control + * register. + */ +static void +pci_resume_msi(device_t dev) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + pcicfgregs *cfg = &dinfo->cfg; + uint64_t address; + uint16_t data; + + if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) { + address = cfg->msi.msi_addr; + data = cfg->msi.msi_data; + pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR, + address & 0xffffffff, 4); + if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) { + pci_write_config(dev, cfg->msi.msi_location + + PCIR_MSI_ADDR_HIGH, address >> 32, 4); + pci_write_config(dev, cfg->msi.msi_location + + PCIR_MSI_DATA_64BIT, data, 2); + } else + pci_write_config(dev, cfg->msi.msi_location + + PCIR_MSI_DATA, data, 2); + } + pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL, + cfg->msi.msi_ctrl, 2); +} + +/* + * Attempt to allocate *count MSI messages. The actual number allocated is + * returned in *count. After this function returns, each message will be + * available to the driver as SYS_RES_IRQ resources starting at a rid 1. + */ +int +pci_alloc_msi_method(device_t dev, device_t child, int *count) +{ + struct pci_devinfo *dinfo = device_get_ivars(child); + pcicfgregs *cfg = &dinfo->cfg; + struct resource_list_entry *rle; + int actual, error, i, irqs[32]; + uint16_t ctrl; + + /* Don't let count == 0 get us into trouble. */ + if (*count == 0) + return (EINVAL); + + /* If rid 0 is allocated, then fail. */ + rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0); + if (rle != NULL && rle->res != NULL) + return (ENXIO); + + /* Try MSI-X first. */ + error = pci_alloc_msix(dev, child, count); + if (error != ENODEV) + return (error); + + /* MSI capability present? */ + if (cfg->msi.msi_location == 0 || !pci_do_msi) + return (ENODEV); + + /* Already have allocated messages? */ + if (cfg->msi.msi_alloc != 0) + return (ENXIO); + + /* Don't ask for more than the device supports. */ + actual = min(*count, cfg->msi.msi_msgnum); + + /* Don't ask for more than 32 messages. */ + actual = min(actual, 32); + + /* MSI requires power of 2 number of messages. */ + if (!powerof2(actual)) + return (EINVAL); + + for (;;) { + /* Try to allocate N messages. */ + error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual, + cfg->msi.msi_msgnum, irqs); + if (error == 0) + break; + if (actual == 1) + return (error); + + /* Try N / 2. */ + actual >>= 1; + } + + /* + * We now have N actual messages mapped onto SYS_RES_IRQ + * resources in the irqs[] array, so add new resources + * starting at rid 1. + */ + for (i = 0; i < actual; i++) + resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, + irqs[i], irqs[i], 1); + + /* Update control register with actual count and enable MSI. */ + ctrl = cfg->msi.msi_ctrl; + ctrl &= ~PCIM_MSICTRL_MME_MASK; + ctrl |= (ffs(actual) - 1) << 4; + cfg->msi.msi_ctrl = ctrl; + pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2); + + /* Update counts of alloc'd messages. */ + cfg->msi.msi_alloc = actual; + *count = actual; + return (0); +} + +/* Release the MSI messages associated with this device. */ +int +pci_release_msi_method(device_t dev, device_t child) +{ + struct pci_devinfo *dinfo = device_get_ivars(child); + pcicfgregs *cfg = &dinfo->cfg; + struct resource_list_entry *rle; + int error, i, irqs[32]; + + /* Try MSI-X first. */ + error = pci_release_msix(dev, child); + if (error != ENODEV) + return (error); + + /* Do we have any messages to release? */ + if (cfg->msi.msi_alloc == 0) + return (ENODEV); + KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages")); + + /* Make sure none of the resources are allocated. */ + for (i = 0; i < cfg->msi.msi_alloc; i++) { + rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); + KASSERT(rle != NULL, ("missing MSI resource")); + if (rle->res != NULL) + return (EBUSY); + irqs[i] = rle->start; + } + + /* Update control register with 0 count and disable MSI. */ + cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE); + pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, + cfg->msi.msi_ctrl, 2); + + /* Release the messages. */ + PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc, + irqs); + for (i = 0; i < cfg->msi.msi_alloc; i++) + resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1); + + /* Update alloc count. */ + cfg->msi.msi_alloc = 0; + return (0); +} + /* free pcicfgregs structure and all depending data structures */ int @@ -1174,16 +1582,28 @@ vwp->start + vwp->len, vwp->value); } } - if (cfg->msi.msi_data) { + if (cfg->msi.msi_location) { int ctrl; - ctrl = cfg->msi.msi_ctrl; + ctrl = cfg->msi.msi_ctrl; printf("\tMSI supports %d message%s%s%s\n", cfg->msi.msi_msgnum, (cfg->msi.msi_msgnum == 1) ? "" : "s", (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "", (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":""); } + if (cfg->msix.msix_location) { + printf("\tMSI-X supports %d message%s", + cfg->msix.msix_msgnum, + (cfg->msix.msix_msgnum == 1) ? "" : "s"); + if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar) + printf("in map 0x%x\n", + cfg->msix.msix_table_bar); + else + printf("in maps 0x%x and 0x%x\n", + cfg->msix.msix_table_bar, + cfg->msix.msix_pba_bar); + } } } @@ -2252,11 +2672,18 @@ switch (type) { case SYS_RES_IRQ: /* + * Can't alloc legacy interrupt once MSI messages + * have been allocated. + */ + if (*rid == 0 && (cfg->msi.msi_alloc > 0 || + cfg->msix.msix_alloc > 0)) + return (NULL); + /* * If the child device doesn't have an * interrupt routed and is deserving of an * interrupt, try to assign it one. */ - if (!PCI_INTERRUPT_VALID(cfg->intline) && + if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) && (cfg->intpin != 0)) pci_assign_interrupt(dev, child, 0); break; @@ -2467,6 +2894,13 @@ pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1); pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1); pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1); + + /* + * Restore MSI configuration if it is present. If MSI is enabled, + * then restore the data and addr registers. + */ + if (dinfo->cfg.msi.msi_location != 0) + pci_resume_msi(dev); } void Index: dev/pci/pci_if.m =================================================================== RCS file: /usr/cvs/src/sys/dev/pci/pci_if.m,v retrieving revision 1.9 diff -u -r1.9 pci_if.m --- dev/pci/pci_if.m 9 Oct 2006 16:15:55 -0000 1.9 +++ dev/pci/pci_if.m 9 Nov 2006 17:26:15 -0000 @@ -102,3 +102,14 @@ int capability; int *capreg; }; + +METHOD int alloc_msi { + device_t dev; + device_t child; + int *count; +}; + +METHOD int release_msi { + device_t dev; + device_t child; +}; Index: dev/pci/pci_pci.c =================================================================== RCS file: /usr/cvs/src/sys/dev/pci/pci_pci.c,v retrieving revision 1.42 diff -u -r1.42 pci_pci.c --- dev/pci/pci_pci.c 30 Oct 2006 19:18:46 -0000 1.42 +++ dev/pci/pci_pci.c 9 Nov 2006 17:26:15 -0000 @@ -79,6 +79,10 @@ DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, pcib_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), { 0, 0 } }; @@ -533,6 +537,47 @@ return(intnum); } +/* Pass request to alloc MSI messages up to the parent bridge. */ +int +pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) +{ + device_t bus; + + bus = device_get_parent(pcib); + return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount, + irqs)); +} + +/* Pass request to release MSI messages up to the parent bridge. */ +int +pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) +{ + device_t bus; + + bus = device_get_parent(pcib); + return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs)); +} + +/* Pass request to alloc an MSI-X message up to the parent bridge. */ +int +pcib_alloc_msix(device_t pcib, device_t dev, int index, int *irq) +{ + device_t bus; + + bus = device_get_parent(pcib); + return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, index, irq)); +} + +/* Pass request to release an MSI-X message up to the parent bridge. */ +int +pcib_release_msix(device_t pcib, device_t dev, int irq) +{ + device_t bus; + + bus = device_get_parent(pcib); + return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq)); +} + /* * Try to read the bus number of a host-PCI bridge using appropriate config * registers. Index: dev/pci/pci_private.h =================================================================== RCS file: /usr/cvs/src/sys/dev/pci/pci_private.h,v retrieving revision 1.19 diff -u -r1.19 pci_private.h --- dev/pci/pci_private.h 7 Nov 2006 18:55:51 -0000 1.19 +++ dev/pci/pci_private.h 9 Nov 2006 17:26:15 -0000 @@ -66,6 +66,8 @@ int pci_disable_io_method(device_t dev, device_t child, int space); int pci_find_extcap_method(device_t dev, device_t child, int capability, int *capreg); +int pci_alloc_msi_method(device_t dev, device_t child, int *count); +int pci_release_msi_method(device_t dev, device_t child); struct resource *pci_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags); Index: dev/pci/pcib_if.m =================================================================== RCS file: /usr/cvs/src/sys/dev/pci/pcib_if.m,v retrieving revision 1.8 diff -u -r1.8 pcib_if.m --- dev/pci/pcib_if.m 7 Nov 2006 18:55:51 -0000 1.8 +++ dev/pci/pcib_if.m 9 Nov 2006 17:26:15 -0000 @@ -88,3 +88,48 @@ device_t dev; int pin; } DEFAULT null_route_interrupt; + +# +# Allocate 'count' MSI messsages mapped onto 'count' IRQs. 'irq' points +# to an array of at least 'count' ints. The max number of messages this +# device supports is included so that the MD code can take that into +# account when assigning resources so that the proper number of low bits +# are clear in the resulting message data value. +# +METHOD int alloc_msi { + device_t pcib; + device_t dev; + int count; + int maxcount; + int *irqs; +}; + +# +# Release 'count' MSI message mapped onto 'count' IRQs stored in the +# array pointed to by 'irq'. +# +METHOD int release_msi { + device_t pcib; + device_t dev; + int count; + int *irqs; +}; + +# +# Allocate a single MSI-X message mapped onto '*irq'. +# +METHOD int alloc_msix { + device_t pcib; + device_t dev; + int index; + int *irq; +}; + +# +# Release a single MSI-X message mapped onto 'irq'. +# +METHOD int release_msix { + device_t pcib; + device_t dev; + int irq; +}; Index: dev/pci/pcib_private.h =================================================================== RCS file: /usr/cvs/src/sys/dev/pci/pcib_private.h,v retrieving revision 1.7 diff -u -r1.7 pcib_private.h --- dev/pci/pcib_private.h 6 Jan 2006 19:22:18 -0000 1.7 +++ dev/pci/pcib_private.h 9 Nov 2006 17:26:15 -0000 @@ -74,5 +74,9 @@ uint32_t pcib_read_config(device_t dev, int b, int s, int f, int reg, int width); void pcib_write_config(device_t dev, int b, int s, int f, int reg, uint32_t val, int width); int pcib_route_interrupt(device_t pcib, device_t dev, int pin); +int pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs); +int pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs); +int pcib_alloc_msix(device_t pcib, device_t dev, int index, int *irq); +int pcib_release_msix(device_t pcib, device_t dev, int irq); #endif Index: dev/pci/pcireg.h =================================================================== RCS file: /usr/cvs/src/sys/dev/pci/pcireg.h,v retrieving revision 1.52 diff -u -r1.52 pcireg.h --- dev/pci/pcireg.h 30 May 2006 21:36:12 -0000 1.52 +++ dev/pci/pcireg.h 9 Nov 2006 17:26:15 -0000 @@ -430,3 +430,40 @@ #define PCIXM_STATUS_MAXSPLITS 0x0380 /* Maximum Split Transactions */ #define PCIXM_STATUS_MAXCRDS 0x1C00 /* Maximum Cumulative Read Size */ #define PCIXM_STATUS_RCVDSCEM 0x2000 /* Received a Split Comp w/Error msg */ + +/* HT (HyperTransport) Capability definitions */ +#define PCIR_HT_COMMAND 0x2 +#define PCIM_HTCMD_CAP_MASK 0xf800 /* Capability type. */ +#define PCIM_HTCAP_SLAVE 0x0000 /* 000xx */ +#define PCIM_HTCAP_HOST 0x2000 /* 001xx */ +#define PCIM_HTCAP_SWITCH 0x4000 /* 01000 */ +#define PCIM_HTCAP_INTERRUPT 0x8000 /* 10000 */ +#define PCIM_HTCAP_REVISION_ID 0x8800 /* 10001 */ +#define PCIM_HTCAP_UNITID_CLUMPING 0x9000 /* 10010 */ +#define PCIM_HTCAP_EXT_CONFIG_SPACE 0x9800 /* 10011 */ +#define PCIM_HTCAP_ADDRESS_MAPPING 0xa000 /* 10100 */ +#define PCIM_HTCAP_MSI_MAPPING 0xa800 /* 10101 */ +#define PCIM_HTCAP_DIRECT_ROUTE 0xb000 /* 10110 */ +#define PCIM_HTCAP_VCSET 0xb800 /* 10111 */ +#define PCIM_HTCAP_RETRY_MODE 0xc000 /* 11000 */ + +/* HT MSI Mapping Capability definitions. */ +#define PCIM_HTCMD_MSI_ENABLE 0x0001 +#define PCIR_HTMSI_ADDRESS_LO 0x4 +#define PCIR_HTMSI_ADDRESS_HI 0x8 + +/* MSI-X definitions */ +#define PCIR_MSIX_CTRL 0x2 +#define PCIM_MSIXCTRL_MSIX_ENABLE 0x8000 +#define PCIM_MSIXCTRL_FUNCTION_MASK 0x4000 +#define PCIM_MSIXCTRL_TABLE_SIZE 0x07FF +#define PCIR_MSIX_TABLE 0x4 +#define PCIR_MSIX_PBA 0x8 +#define PCIM_MSIX_BIR_MASK 0x7 +#define PCIM_MSIX_BIR_BAR_10 0 +#define PCIM_MSIX_BIR_BAR_14 1 +#define PCIM_MSIX_BIR_BAR_18 2 +#define PCIM_MSIX_BIR_BAR_1C 3 +#define PCIM_MSIX_BIR_BAR_20 4 +#define PCIM_MSIX_BIR_BAR_24 5 +#define PCIM_MSIX_VCTRL_MASK 0x1 Index: dev/pci/pcivar.h =================================================================== RCS file: /usr/cvs/src/sys/dev/pci/pcivar.h,v retrieving revision 1.72 diff -u -r1.72 pcivar.h --- dev/pci/pcivar.h 30 Oct 2006 19:18:46 -0000 1.72 +++ dev/pci/pcivar.h 9 Nov 2006 17:26:15 -0000 @@ -77,8 +77,25 @@ /* Interesting values for PCI MSI */ struct pcicfg_msi { uint16_t msi_ctrl; /* Message Control */ + uint8_t msi_location; /* Offset of MSI capability registers. */ uint8_t msi_msgnum; /* Number of messages */ - uint16_t msi_data; /* Location of MSI data word */ + int msi_alloc; /* Number of allocated messages. */ + uint64_t msi_addr; /* Contents of address register. */ + uint16_t msi_data; /* Contents of data register. */ +}; + +/* Interesting values for PCI MSI */ +struct pcicfg_msix { + uint16_t msix_ctrl; /* Message Control */ + uint8_t msix_location; /* Offset of MSI capability registers. */ + uint16_t msix_msgnum; /* Number of messages */ + int msix_alloc; /* Number of allocated messages. */ + uint8_t msix_table_bar; /* BAR containing vector table. */ + uint8_t msix_pba_bar; /* BAR containing PBA. */ + uint32_t msix_table_offset; + uint32_t msix_pba_offset; + struct resource *msix_table_res; /* Resource containing vector table. */ + struct resource *msix_pba_res; /* Resource containing PBA. */ }; /* config header information common to all header types */ @@ -120,6 +137,7 @@ struct pcicfg_pp pp; /* pci power management */ struct pcicfg_vpd vpd; /* pci vital product data */ struct pcicfg_msi msi; /* pci msi */ + struct pcicfg_msix msix; /* pci msi-x */ } pcicfgregs; /* additional type 1 device config header information (PCI to PCI bridge) */ @@ -371,8 +389,29 @@ return PCI_FIND_EXTCAP(device_get_parent(dev), dev, capability, capreg); } +static __inline int +pci_alloc_msi(device_t dev, int *count) +{ + return (PCI_ALLOC_MSI(device_get_parent(dev), dev, count)); +} + +static __inline int +pci_release_msi(device_t dev) +{ + return (PCI_RELEASE_MSI(device_get_parent(dev), dev)); +} + device_t pci_find_bsf(uint8_t, uint8_t, uint8_t); device_t pci_find_device(uint16_t, uint16_t); + +/* Used by MD code to program MSI and MSI-X registers. */ +void pci_enable_msi(device_t dev, uint64_t address, uint16_t data); +void pci_enable_msix(device_t dev, u_int index, uint64_t address, + uint32_t data); +void pci_mask_msix(device_t dev, u_int index); +int pci_pending_msix(device_t dev, u_int index); +void pci_unmask_msix(device_t dev, u_int index); + #endif /* _SYS_BUS_H_ */ /* Index: i386/i386/local_apic.c =================================================================== RCS file: /usr/cvs/src/sys/i386/i386/local_apic.c,v retrieving revision 1.34 diff -u -r1.34 local_apic.c --- i386/i386/local_apic.c 10 Oct 2006 23:23:12 -0000 1.34 +++ i386/i386/local_apic.c 9 Nov 2006 17:26:15 -0000 @@ -746,6 +746,65 @@ panic("Couldn't find an APIC vector for IRQ %u", irq); } +/* + * Request 'count' free contiguous IDT vectors to be used by 'count' + * IRQs. 'count' must be a power of two and the vectors will be + * aligned on a boundary of 'align'. If the request cannot be + * satisfied, 0 is returned. + */ +u_int +apic_alloc_vectors(u_int *irqs, u_int count, u_int align) +{ + u_int first, run, vector; + + KASSERT(powerof2(count), ("bad count")); + KASSERT(powerof2(align), ("bad align")); + KASSERT(align >= count, ("align < count")); +#ifdef INVARIANTS + for (run = 0; run < count; run++) + KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", + irqs[run], run)); +#endif + + /* + * Search for 'count' free vectors. As with apic_alloc_vector(), + * this just uses a simple first fit algorithm. + */ + run = 0; + first = 0; + mtx_lock_spin(&icu_lock); + for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { + + /* Vector is in use, end run. */ + if (ioint_irqs[vector] != 0) { + run = 0; + first = 0; + continue; + } + + /* Start a new run if run == 0 and vector is aligned. */ + if (run == 0) { + if ((vector & (align - 1)) != 0) + continue; + first = vector; + } + run++; + + /* Keep looping if the run isn't long enough yet. */ + if (run < count) + continue; + + /* Found a run, assign IRQs and return the first vector. */ + for (vector = 0; vector < count; vector++) + ioint_irqs[first + vector] = irqs[vector]; + mtx_unlock_spin(&icu_lock); + return (first + APIC_IO_INTS); + } + mtx_unlock_spin(&icu_lock); + printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); + return (0); +} + void apic_enable_vector(u_int vector) { @@ -1005,6 +1064,9 @@ intr_register_pic(&lapic_pic); if (bootverbose) lapic_dump("BSP"); + + /* Enable the MSI "pic". */ + msi_init(); } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL) Index: i386/i386/mptable_pci.c =================================================================== RCS file: /usr/cvs/src/sys/i386/i386/mptable_pci.c,v retrieving revision 1.4 diff -u -r1.4 mptable_pci.c --- i386/i386/mptable_pci.c 6 Jan 2006 19:22:18 -0000 1.4 +++ i386/i386/mptable_pci.c 9 Nov 2006 17:26:15 -0000 @@ -96,6 +96,10 @@ DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), { 0, 0 } }; @@ -148,6 +152,10 @@ DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), {0, 0} }; Index: i386/i386/msi.c =================================================================== RCS file: i386/i386/msi.c diff -N i386/i386/msi.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ i386/i386/msi.c 9 Nov 2006 17:21:53 -0000 @@ -0,0 +1,538 @@ +/*- + * Copyright (c) 2006 John Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Support for PCI Message Signalled Interrupts (MSI). MSI interrupts on + * x86 are basically APIC messages that the northbridge delivers directly + * to the local APICs as if they had come from an I/O APIC. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Fields in address for Intel MSI messages. */ +#define MSI_INTEL_ADDR_DEST 0x000ff000 +#define MSI_INTEL_ADDR_RH 0x00000008 +# define MSI_INTEL_ADDR_RH_ON 0x00000008 +# define MSI_INTEL_ADDR_RH_OFF 0x00000000 +#define MSI_INTEL_ADDR_DM 0x00000004 +# define MSI_INTEL_ADDR_DM_PHYSICAL 0x00000000 +# define MSI_INTEL_ADDR_DM_LOGICAL 0x00000004 + +/* Fields in data for Intel MSI messages. */ +#define MSI_INTEL_DATA_TRGRMOD IOART_TRGRMOD /* Trigger mode. */ +# define MSI_INTEL_DATA_TRGREDG IOART_TRGREDG +# define MSI_INTEL_DATA_TRGRLVL IOART_TRGRLVL +#define MSI_INTEL_DATA_LEVEL 0x00004000 /* Polarity. */ +# define MSI_INTEL_DATA_DEASSERT 0x00000000 +# define MSI_INTEL_DATA_ASSERT 0x00004000 +#define MSI_INTEL_DATA_DELMOD IOART_DELMOD /* Delivery mode. */ +# define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED +# define MSI_INTEL_DATA_DELLOPRI IOART_DELLOPRI +# define MSI_INTEL_DATA_DELSMI IOART_DELSMI +# define MSI_INTEL_DATA_DELNMI IOART_DELNMI +# define MSI_INTEL_DATA_DELINIT IOART_DELINIT +# define MSI_INTEL_DATA_DELEXINT IOART_DELEXINT +#define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ + +/* + * Build Intel MSI message and data values from a source. AMD64 systems + * seem to be compatible, so we use the same function for both. + */ +#define INTEL_ADDR(msi) \ + (MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 | \ + MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL) +#define INTEL_DATA(msi) \ + (MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector) + +static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI"); + +/* + * MSI sources are bunched into groups. This is because MSI forces + * all of the messages to share the address and data registers and + * thus certain properties (such as the local APIC ID target on x86). + * Each group has a 'first' source that contains information global to + * the group. These fields are marked with (g) below. + * + * Note that local APIC ID is kind of special. Each message will be + * assigned an ID by the system; however, a group will use the ID from + * the first message. + * + * For MSI-X, each message is isolated, and msi_index indicates the + * index of this message in the device's MSI-X table. + */ +struct msi_intsrc { + struct intsrc msi_intsrc; + device_t msi_dev; /* Owning device. (g) */ + struct msi_intsrc *msi_first; /* First source in group. */ + u_int msi_irq; /* IRQ cookie. */ + u_int msi_index; /* Index of this message. */ + u_int msi_msix; /* MSI-X message. */ + u_int msi_vector:8; /* IDT vector. */ + u_int msi_cpu:8; /* Local APIC ID. (g) */ + u_int msi_count:8; /* Messages in this group. (g) */ +}; + +static void msi_enable_source(struct intsrc *isrc); +static void msi_disable_source(struct intsrc *isrc, int eoi); +static void msi_eoi_source(struct intsrc *isrc); +static void msi_enable_intr(struct intsrc *isrc); +static int msi_vector(struct intsrc *isrc); +static int msi_source_pending(struct intsrc *isrc); +static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol); +static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id); +static void msix_enable_intr(struct intsrc *isrc); +static int msix_source_pending(struct intsrc *isrc); +static void msix_assign_cpu(struct intsrc *isrc, u_int apic_id); + +struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, + msi_enable_intr, msi_vector, msi_source_pending, + NULL, NULL, msi_config_intr, msi_assign_cpu }; +struct pic msix_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, + msix_enable_intr, msi_vector, msix_source_pending, + NULL, NULL, msi_config_intr, msix_assign_cpu }; + +static int msi_enabled; +static struct sx msi_sx; + +static void +msi_enable_source(struct intsrc *isrc) +{ +} + +static void +msi_disable_source(struct intsrc *isrc, int eoi) +{ + + if (eoi == PIC_EOI) + lapic_eoi(); +} + +static void +msi_eoi_source(struct intsrc *isrc) +{ + + lapic_eoi(); +} + +static void +msi_enable_intr(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + /* + * Since we can only enable the entire group at once, go ahead and + * enable the messages when the first message is given a handler. + * Note that we assume all devices will register a handler for the + * first message. + */ + if (msi->msi_index == 0) { + mtx_lock_spin(&icu_lock); + pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); + } + apic_enable_vector(msi->msi_vector); +} + +static int +msi_vector(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + return (msi->msi_irq); +} + +static int +msi_source_pending(struct intsrc *isrc) +{ + + return (0); +} + +static int +msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol) +{ + + return (ENODEV); +} + +static void +msi_assign_cpu(struct intsrc *isrc, u_int apic_id) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + msi->msi_cpu = apic_id; + if (bootverbose) + printf("msi: Assigning MSI IRQ %d to local APIC %u\n", + msi->msi_irq, msi->msi_cpu); + mtx_lock_spin(&icu_lock); + if (isrc->is_enabled) + pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); +} + +static void +msix_enable_intr(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + mtx_lock_spin(&icu_lock); + pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi), + INTEL_DATA(msi)); + pci_unmask_msix(msi->msi_dev, msi->msi_index); + mtx_unlock_spin(&icu_lock); + apic_enable_vector(msi->msi_vector); +} + +static int +msix_source_pending(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + return (pci_pending_msix(msi->msi_dev, msi->msi_index)); +} + +static void +msix_assign_cpu(struct intsrc *isrc, u_int apic_id) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + msi->msi_cpu = apic_id; + if (bootverbose) + printf("msi: Assigning MSI IRQ %d to local APIC %u\n", + msi->msi_irq, msi->msi_cpu); + mtx_lock_spin(&icu_lock); + if (isrc->is_enabled) + pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi), + INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); +} + +void +msi_init(void) +{ + + /* Check if we have a supported CPU. */ + if (!(strcmp(cpu_vendor, "GenuineIntel") == 0 || + strcmp(cpu_vendor, "AuthenticAMD") == 0)) + return; + + msi_enabled = 1; + intr_register_pic(&msi_pic); + intr_register_pic(&msix_pic); + sx_init(&msi_sx, "msi"); +} + +/* + * Try to allocate 'count' interrupt sources with contiguous IDT values. If + * we allocate any new sources, then their IRQ values will be at the end of + * the irqs[] array, with *newirq being the index of the first new IRQ value + * and *newcount being the number of new IRQ values added. + */ +int +msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq, + int *newcount) +{ + struct msi_intsrc *msi, *fsrc; + int cnt, i, j, vector; + + *newirq = 0; + *newcount = 0; + if (!msi_enabled) + return (ENXIO); + + sx_xlock(&msi_sx); + + /* DBG */ + device_printf(dev, "msi_alloc(%d, %d)\n", count, maxcount); + + /* Try to find 'count' free IRQs. */ + cnt = 0; + for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(i); + + /* End of allocated sources, so break. */ + if (msi == NULL) + break; + + /* If this is a free one, save its IRQ in the array. */ + if (msi->msi_dev == NULL) { + irqs[cnt] = i; + cnt++; + if (cnt == count) + break; + } + } + + /* Do we need to create some new sources? */ + if (cnt < count) { + /* If we would exceed the max, give up. */ + if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* DBG */ + device_printf(dev, "creating %d MSI sources at IRQ %d\n", + count - cnt, i); + + /* We need count - cnt more sources starting at index 'cnt'. */ + *newirq = cnt; + *newcount = count - cnt; + for (j = 0; j < count - cnt; j++) { + + /* Create a new MSI source. */ + msi = malloc(sizeof(struct msi_intsrc), M_MSI, + M_WAITOK | M_ZERO); + msi->msi_intsrc.is_pic = &msi_pic; + msi->msi_irq = i + j; + intr_register_source(&msi->msi_intsrc); + + /* Add it to our array. */ + irqs[cnt] = i + j; + cnt++; + } + } + + /* Ok, we now have the IRQs allocated. */ + KASSERT(cnt == count, ("count mismatch")); + + /* DBG */ + device_printf(dev, "trying to use IRQs %u", irqs[0]); + for (i = 1; i < count; i++) + printf(", %u", irqs[i]); + printf(" for MSI\n"); + + /* Allocate 'count' IDT vectors. */ + vector = apic_alloc_vectors(irqs, count, maxcount); + if (vector == 0) { + sx_xunlock(&msi_sx); + return (ENOSPC); + } + + /* DBG */ + device_printf(dev, "allocated %d vectors starting at %d for MSI\n", + count, vector); + + /* Assign IDT vectors and make these messages owned by 'dev'. */ + fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]); + for (i = 0; i < count; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); + msi->msi_intsrc.is_pic = &msi_pic; + msi->msi_dev = dev; + msi->msi_vector = vector + i; + msi->msi_index = i; + msi->msi_first = fsrc; + + /* XXX: Somewhat gross. */ + msi->msi_intsrc.is_enabled = 0; + } + fsrc->msi_count = count; + sx_xunlock(&msi_sx); + + return (0); +} + +int +msi_release(int *irqs, int count) +{ + struct msi_intsrc *msi, *first; + int i; + + sx_xlock(&msi_sx); + first = (struct msi_intsrc *)intr_lookup_source(irqs[0]); + if (first == NULL) { + sx_xunlock(&msi_sx); + return (ENOENT); + } + + /* Make sure this isn't an MSI-X message. */ + if (first->msi_msix) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + + /* Make sure this message is allocated to a group. */ + if (first->msi_first == NULL) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* + * Make sure this is the start of a group and that we are releasing + * the entire group. + */ + if (first->msi_first != first || first->msi_count != count) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + KASSERT(first->msi_index == 0, ("index mismatch")); + + KASSERT(first->msi_dev != NULL, ("unowned group")); + + /* Clear all the extra messages in the group. */ + for (i = 1; i < count; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); + KASSERT(msi->msi_first == first, ("message not in group")); + KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch")); + msi->msi_first = NULL; + msi->msi_dev = NULL; + apic_free_vector(msi->msi_vector, msi->msi_irq); + msi->msi_vector = 0; + msi->msi_index = 0; + } + + /* Clear out the first message. */ + first->msi_first = NULL; + first->msi_dev = NULL; + apic_free_vector(first->msi_vector, first->msi_irq); + first->msi_vector = 0; + first->msi_count = 0; + + sx_xunlock(&msi_sx); + return (0); +} + +int +msix_alloc(device_t dev, int index, int *irq, int *new) +{ + struct msi_intsrc *msi; + int i, vector; + + *new = 0; + if (!msi_enabled) + return (ENXIO); + + sx_xlock(&msi_sx); + + /* DBG */ + device_printf(dev, "msix_alloc(%d)\n", index); + + /* Find a free IRQ. */ + for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(i); + + /* End of allocated sources, so break. */ + if (msi == NULL) + break; + + /* If this is a free one, start or continue a run. */ + if (msi->msi_dev == NULL) + break; + } + + /* Do we need to create a new source? */ + if (msi == NULL) { + /* If we would exceed the max, give up. */ + if (i + 1 > FIRST_MSI_INT + NUM_MSI_INTS) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* DBG */ + device_printf(dev, "creating MSI-X source at IRQ %d\n", i); + + /* Create a new source. */ + *new = 1; + msi = malloc(sizeof(struct msi_intsrc), M_MSI, + M_WAITOK | M_ZERO); + msi->msi_intsrc.is_pic = &msix_pic; + msi->msi_irq = i; + intr_register_source(&msi->msi_intsrc); + } + + /* Allocate an IDT vector. */ + vector = apic_alloc_vector(i); + + /* DBG */ + device_printf(dev, "allocated vector %d for MSI-X irq %d\n", + vector, i); + + /* Setup source. */ + msi->msi_intsrc.is_pic = &msix_pic; + msi->msi_dev = dev; + msi->msi_vector = vector; + msi->msi_index = index; + msi->msi_msix = 1; + + /* XXX: Somewhat gross. */ + msi->msi_intsrc.is_enabled = 0; + sx_xunlock(&msi_sx); + + *irq = i; + return (0); +} + +int +msix_release(int irq) +{ + struct msi_intsrc *msi; + + sx_xlock(&msi_sx); + msi = (struct msi_intsrc *)intr_lookup_source(irq); + if (msi == NULL) { + sx_xunlock(&msi_sx); + return (ENOENT); + } + + /* Make sure this is an MSI-X message. */ + if (!msi->msi_msix) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + + KASSERT(msi->msi_dev != NULL, ("unowned message")); + + /* Clear out the message. */ + msi->msi_dev = NULL; + apic_free_vector(msi->msi_vector, msi->msi_irq); + msi->msi_vector = 0; + msi->msi_index = 0; + msi->msi_msix = 0; + + sx_xunlock(&msi_sx); + return (0); +} Index: i386/i386/nexus.c =================================================================== RCS file: /usr/cvs/src/sys/i386/i386/nexus.c,v retrieving revision 1.64 diff -u -r1.64 nexus.c --- i386/i386/nexus.c 11 Sep 2006 19:31:51 -0000 1.64 +++ i386/i386/nexus.c 9 Nov 2006 17:26:15 -0000 @@ -41,6 +41,7 @@ * and I/O memory address space. */ +#include "opt_apic.h" #include "opt_isa.h" #include @@ -61,6 +62,10 @@ #include +#ifdef DEV_APIC +#include "pcib_if.h" +#endif + #ifdef DEV_ISA #include #ifdef PC98 @@ -104,6 +109,12 @@ static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long); static int nexus_get_resource(device_t, device_t, int, int, u_long *, u_long *); static void nexus_delete_resource(device_t, device_t, int, int); +#ifdef DEV_APIC +static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs); +static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs); +static int nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq); +static int nexus_release_msix(device_t pcib, device_t dev, int irq); +#endif static device_method_t nexus_methods[] = { /* Device interface */ @@ -129,6 +140,14 @@ DEVMETHOD(bus_get_resource, nexus_get_resource), DEVMETHOD(bus_delete_resource, nexus_delete_resource), + /* pcib interface */ +#ifdef DEV_APIC + DEVMETHOD(pcib_alloc_msi, nexus_alloc_msi), + DEVMETHOD(pcib_release_msi, nexus_release_msi), + DEVMETHOD(pcib_alloc_msix, nexus_alloc_msix), + DEVMETHOD(pcib_release_msix, nexus_release_msix), +#endif + { 0, 0 } }; @@ -552,6 +571,52 @@ resource_list_delete(rl, type, rid); } +#ifdef DEV_APIC +static int +nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq) +{ + int error, new; + + error = msix_alloc(dev, index, irq, &new); + if (new) { + /* DBG */ + device_printf(pcib, "adding IRQ %u to rman\n", *irq); + rman_manage_region(&irq_rman, *irq, *irq); + } + return (error); +} + +static int +nexus_release_msix(device_t pcib, device_t dev, int irq) +{ + + return (msix_release(irq)); +} + +static int +nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) +{ + int error, i, newirq, newcount; + + /* First alloc the messages. */ + error = msi_alloc(dev, count, maxcount, irqs, &newirq, &newcount); + + /* Always add any new IRQs to the rman, even on failure. */ + for (i = 0; i < newcount; i++) + rman_manage_region(&irq_rman, irqs[newirq + i], + irqs[newirq + i]); + + return (error); +} + +static int +nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs) +{ + + return (msi_release(irqs, count)); +} +#endif + #ifdef DEV_ISA /* * Placeholder which claims PnP 'devices' which describe system Index: i386/include/apicvar.h =================================================================== RCS file: /usr/cvs/src/sys/i386/include/apicvar.h,v retrieving revision 1.19 diff -u -r1.19 apicvar.h --- i386/include/apicvar.h 10 Oct 2006 23:23:12 -0000 1.19 +++ i386/include/apicvar.h 9 Nov 2006 17:26:15 -0000 @@ -174,6 +174,7 @@ IDTVEC(apic_isr7), IDTVEC(spuriousint), IDTVEC(timerint); u_int apic_alloc_vector(u_int irq); +u_int apic_alloc_vectors(u_int *irqs, u_int count, u_int align); void apic_enable_vector(u_int vector); void apic_free_vector(u_int vector, u_int irq); u_int apic_idt_to_irq(u_int vector); Index: i386/include/intr_machdep.h =================================================================== RCS file: /usr/cvs/src/sys/i386/include/intr_machdep.h,v retrieving revision 1.13 diff -u -r1.13 intr_machdep.h --- i386/include/intr_machdep.h 10 Oct 2006 23:23:12 -0000 1.13 +++ i386/include/intr_machdep.h 9 Nov 2006 17:26:15 -0000 @@ -43,11 +43,18 @@ * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * - * For now we stick with 255 as ISA IRQs and PCI intline IRQs only allow - * for IRQs in the range 0 - 254. When MSI support is added this number - * will likely increase. + * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. + * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid + * confusion since 255 is used in PCI to indicate an invalid IRQ. */ -#define NUM_IO_INTS 255 +#define NUM_MSI_INTS 128 +#define FIRST_MSI_INT 256 +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) + +/* + * Default base address for MSI messages on x86 platforms. + */ +#define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. @@ -137,6 +144,12 @@ void intr_resume(void); void intr_suspend(void); void intrcnt_add(const char *name, u_long **countp); +void msi_init(void); +int msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq, + int *newcount); +int msi_release(int* irqs, int count); +int msix_alloc(device_t dev, int index, int *irq, int *new); +int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ Index: i386/pci/pci_bus.c =================================================================== RCS file: /usr/cvs/src/sys/i386/pci/pci_bus.c,v retrieving revision 1.123 diff -u -r1.123 pci_bus.c --- i386/pci/pci_bus.c 6 Jan 2006 19:22:19 -0000 1.123 +++ i386/pci/pci_bus.c 9 Nov 2006 17:26:15 -0000 @@ -534,6 +534,10 @@ DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), { 0, 0 } }; @@ -623,6 +627,10 @@ DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), {0, 0} };