--- //depot/projects/smpng/sys/arm/conf/CRB +++ //depot/user/jhb/intr/arm/conf/CRB @@ -51,7 +51,6 @@ options PSEUDOFS #Pseudo-filesystem framework options SCSI_DELAY=5000 #Delay (in ms) before probing SCSI options KTRACE #ktrace(1) support -options INTR_FILTER options SYSVSHM #SYSV-style shared memory options SYSVMSG #SYSV-style message queues options SYSVSEM #SYSV-style semaphores --- //depot/projects/smpng/sys/conf/options +++ //depot/user/jhb/intr/conf/options @@ -849,9 +849,6 @@ # XBOX options for FreeBSD/i386, but some files are MI XBOX opt_xbox.h -# Interrupt filtering -INTR_FILTER - # 802.11 support layer IEEE80211_DEBUG opt_wlan.h IEEE80211_DEBUG_REFCNT opt_wlan.h --- //depot/projects/smpng/sys/dev/ae/if_ae.c +++ //depot/user/jhb/intr/dev/ae/if_ae.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -132,8 +133,8 @@ static void ae_stop_rxmac(ae_softc_t *sc); static void ae_stop_txmac(ae_softc_t *sc); static void ae_mac_config(ae_softc_t *sc); -static int ae_intr(void *arg); -static void ae_int_task(void *arg, int pending); +static int ae_filter(void *arg); +static void ae_intr(void *arg); static void ae_tx_intr(ae_softc_t *sc); static int ae_rxeof(ae_softc_t *sc, ae_rxd_t *rxd); static void ae_rx_intr(ae_softc_t *sc); @@ -252,7 +253,6 @@ */ mtx_init(&sc->mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->tick_ch, &sc->mtx, 0); - TASK_INIT(&sc->int_task, 0, ae_int_task, sc); TASK_INIT(&sc->link_task, 0, ae_link_task, sc); pci_enable_busmaster(dev); /* Enable bus mastering. */ @@ -361,37 +361,23 @@ goto fail; } - ether_ifattach(ifp, sc->eaddr); /* Tell the upper layer(s) we support long frames. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); /* - * Create and run all helper tasks. - */ - sc->tq = taskqueue_create_fast("ae_taskq", M_WAITOK, - taskqueue_thread_enqueue, &sc->tq); - if (sc->tq == NULL) { - device_printf(dev, "could not create taskqueue.\n"); - ether_ifdetach(ifp); - error = ENXIO; - goto fail; - } - taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", - device_get_nameunit(sc->dev)); - - /* * Configure interrupt handlers. */ error = bus_setup_intr(dev, sc->irq[0], INTR_TYPE_NET | INTR_MPSAFE, - ae_intr, NULL, sc, &sc->intrhand); + ae_filter, NULL, sc, &sc->filthand); + if (error == 0) + error = bus_setup_intr(dev, sc->irq[0], INTR_TYPE_NET | INTR_MPSAFE | + INTR_MANUAL, NULL, ae_intr, sc, &sc->intrhand); if (error != 0) { device_printf(dev, "could not set up interrupt handler.\n"); - taskqueue_free(sc->tq); - sc->tq = NULL; - ether_ifdetach(ifp); goto fail; } + ether_ifattach(ifp, sc->eaddr); fail: if (error != 0) ae_detach(dev); @@ -761,19 +747,12 @@ KASSERT(sc != NULL, ("[ae: %d]: sc is NULL", __LINE__)); ifp = sc->ifp; if (device_is_attached(dev)) { + ether_ifdetach(ifp); AE_LOCK(sc); - sc->flags |= AE_FLAG_DETACH; ae_stop(sc); AE_UNLOCK(sc); callout_drain(&sc->tick_ch); - taskqueue_drain(sc->tq, &sc->int_task); taskqueue_drain(taskqueue_swi, &sc->link_task); - ether_ifdetach(ifp); - } - if (sc->tq != NULL) { - taskqueue_drain(sc->tq, &sc->int_task); - taskqueue_free(sc->tq); - sc->tq = NULL; } if (sc->miibus != NULL) { device_delete_child(dev, sc->miibus); @@ -785,6 +764,10 @@ bus_teardown_intr(dev, sc->irq[0], sc->intrhand); sc->intrhand = NULL; } + if (sc->filthand != NULL) { + bus_teardown_intr(dev, sc->irq[0], sc->filthand); + sc->filthand = NULL; + } if (ifp != NULL) { if_free(ifp); sc->ifp = NULL; @@ -876,6 +859,7 @@ ae_softc_t *sc; sc = device_get_softc(dev); + /* XXX: Why is this deferred? */ taskqueue_enqueue(taskqueue_swi, &sc->link_task); } @@ -1521,6 +1505,7 @@ ae_softc_t *sc; sc = ifp->if_softc; + KASSERT(sc != NULL, ("[ae, %d]: sc is NULL", __LINE__)); AE_LOCK(sc); ae_start_locked(ifp); AE_UNLOCK(sc); @@ -1535,7 +1520,6 @@ int error; sc = ifp->if_softc; - KASSERT(sc != NULL, ("[ae, %d]: sc is NULL", __LINE__)); AE_LOCK_ASSERT(sc); #ifdef AE_DEBUG @@ -1732,7 +1716,7 @@ } static int -ae_intr(void *arg) +ae_filter(void *arg) { ae_softc_t *sc; uint32_t val; @@ -1747,14 +1731,13 @@ /* Disable interrupts. */ AE_WRITE_4(sc, AE_ISR_REG, AE_ISR_DISABLE); - /* Schedule interrupt processing. */ - taskqueue_enqueue(sc->tq, &sc->int_task); + hwi_sched(sc->intrhand); return (FILTER_HANDLED); } static void -ae_int_task(void *arg, int pending) +ae_intr(void *arg) { ae_softc_t *sc; struct ifnet *ifp; @@ -2127,8 +2110,7 @@ & (IFF_PROMISC | IFF_ALLMULTI)) != 0) ae_rxfilter(sc); } else { - if ((sc->flags & AE_FLAG_DETACH) == 0) - ae_init_locked(sc); + ae_init_locked(sc); } } else { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) --- //depot/projects/smpng/sys/dev/ae/if_aevar.h +++ //depot/user/jhb/intr/dev/ae/if_aevar.h @@ -107,6 +107,7 @@ struct resource_spec *spec_mem; struct resource *irq[1]; struct resource_spec *spec_irq; + void *filthand; void *intrhand; struct mtx mtx; @@ -118,9 +119,7 @@ struct callout tick_ch; /* Tasks. */ - struct task int_task; struct task link_task; - struct taskqueue *tq; /* DMA tags. */ bus_dma_tag_t dma_parent_tag; @@ -161,7 +160,6 @@ #define BUS_ADDR_HI(x) ((uint64_t) (x) >> 32) #define AE_FLAG_LINK 0x01 /* Has link. */ -#define AE_FLAG_DETACH 0x02 /* Is detaching. */ #define AE_FLAG_TXAVAIL 0x04 /* Tx'es available. */ #define AE_FLAG_MSI 0x08 /* Using MSI. */ #define AE_FLAG_PMG 0x10 /* Supports PCI power management. */ --- //depot/projects/smpng/sys/dev/age/if_age.c +++ //depot/user/jhb/intr/dev/age/if_age.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -126,8 +127,8 @@ static void age_mac_config(struct age_softc *); static void age_link_task(void *, int); static void age_stats_update(struct age_softc *); -static int age_intr(void *); -static void age_int_task(void *, int); +static int age_filter(void *); +static void age_intr(void *); static void age_txintr(struct age_softc *, int); static void age_rxeof(struct age_softc *sc, struct rx_rdesc *); static int age_rxintr(struct age_softc *, int, int); @@ -269,6 +270,7 @@ struct age_softc *sc; sc = device_get_softc(dev); + /* XXX: Why is this deferred? */ taskqueue_enqueue(taskqueue_swi, &sc->age_link_task); } @@ -473,7 +475,6 @@ mtx_init(&sc->age_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->age_tick_ch, &sc->age_mtx, 0); - TASK_INIT(&sc->age_int_task, 0, age_int_task, sc); TASK_INIT(&sc->age_link_task, 0, age_link_task, sc); /* Map the device. */ @@ -637,18 +638,6 @@ /* Tell the upper layer(s) we support long frames. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); - /* Create local taskq. */ - sc->age_tq = taskqueue_create_fast("age_taskq", M_WAITOK, - taskqueue_thread_enqueue, &sc->age_tq); - if (sc->age_tq == NULL) { - device_printf(dev, "could not create taskqueue.\n"); - ether_ifdetach(ifp); - error = ENXIO; - goto fail; - } - taskqueue_start_threads(&sc->age_tq, 1, PI_NET, "%s taskq", - device_get_nameunit(sc->age_dev)); - if ((sc->age_flags & AGE_FLAG_MSIX) != 0) msic = AGE_MSIX_MESSAGES; else if ((sc->age_flags & AGE_FLAG_MSI) != 0) @@ -657,15 +646,13 @@ msic = 1; for (i = 0; i < msic; i++) { error = bus_setup_intr(dev, sc->age_irq[i], - INTR_TYPE_NET | INTR_MPSAFE, age_intr, NULL, sc, + INTR_TYPE_NET | INTR_MPSAFE, age_filter, age_intr, sc, &sc->age_intrhand[i]); if (error != 0) break; } if (error != 0) { device_printf(dev, "could not set up interrupt handler.\n"); - taskqueue_free(sc->age_tq); - sc->age_tq = NULL; ether_ifdetach(ifp); goto fail; } @@ -688,20 +675,12 @@ ifp = sc->age_ifp; if (device_is_attached(dev)) { + ether_ifdetach(ifp); AGE_LOCK(sc); - sc->age_flags |= AGE_FLAG_DETACH; age_stop(sc); AGE_UNLOCK(sc); callout_drain(&sc->age_tick_ch); - taskqueue_drain(sc->age_tq, &sc->age_int_task); taskqueue_drain(taskqueue_swi, &sc->age_link_task); - ether_ifdetach(ifp); - } - - if (sc->age_tq != NULL) { - taskqueue_drain(sc->age_tq, &sc->age_int_task); - taskqueue_free(sc->age_tq); - sc->age_tq = NULL; } if (sc->age_miibus != NULL) { @@ -1744,7 +1723,6 @@ int enq; sc = ifp->if_softc; - AGE_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != @@ -1851,8 +1829,7 @@ & (IFF_PROMISC | IFF_ALLMULTI)) != 0) age_rxfilter(sc); } else { - if ((sc->age_flags & AGE_FLAG_DETACH) == 0) - age_init_locked(sc); + age_init_locked(sc); } } else { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) @@ -2118,7 +2095,7 @@ } static int -age_intr(void *arg) +age_filter(void *arg) { struct age_softc *sc; uint32_t status; @@ -2130,13 +2107,12 @@ return (FILTER_STRAY); /* Disable interrupts. */ CSR_WRITE_4(sc, AGE_INTR_STATUS, status | INTR_DIS_INT); - taskqueue_enqueue(sc->age_tq, &sc->age_int_task); - return (FILTER_HANDLED); + return (FILTER_SCHEDULE_THREAD); } static void -age_int_task(void *arg, int pending) +age_intr(void *arg) { struct age_softc *sc; struct ifnet *ifp; @@ -2201,7 +2177,7 @@ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); status = le32toh(cmb->intr_status); if (sc->age_morework != 0 || (status & AGE_INTRS) != 0) { - taskqueue_enqueue(sc->age_tq, &sc->age_int_task); + hwi_sched(sc->age_intrhand[0]); AGE_UNLOCK(sc); return; } --- //depot/projects/smpng/sys/dev/age/if_agevar.h +++ //depot/user/jhb/intr/dev/age/if_agevar.h @@ -211,7 +211,6 @@ #define AGE_FLAG_MSI 0x0004 #define AGE_FLAG_MSIX 0x0008 #define AGE_FLAG_PMCAP 0x0010 -#define AGE_FLAG_DETACH 0x4000 #define AGE_FLAG_LINK 0x8000 struct callout age_tick_ch; @@ -227,9 +226,7 @@ int age_rr_prod; int age_tpd_cons; - struct task age_int_task; struct task age_link_task; - struct taskqueue *age_tq; struct mtx age_mtx; }; --- //depot/projects/smpng/sys/dev/asmc/asmc.c +++ //depot/user/jhb/intr/dev/asmc/asmc.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include @@ -55,8 +54,6 @@ #include #include -#include "opt_intr_filter.h" - /* * Device interface. */ @@ -81,11 +78,8 @@ static int asmc_sms_read(device_t, const char *key, int16_t *val); static void asmc_sms_calibrate(device_t dev); static int asmc_sms_intrfast(void *arg); -#ifdef INTR_FILTER static void asmc_sms_handler(void *arg); -#endif static void asmc_sms_printintr(device_t dev, uint8_t); -static void asmc_sms_task(void *arg, int pending); #ifdef DEBUG void asmc_dumpall(device_t); static int asmc_key_dump(device_t, int); @@ -317,6 +311,7 @@ struct sysctl_oid *sysctlnode; struct asmc_model *model; + sc->sc_dev; sc->sc_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &sc->sc_rid_port, RF_ACTIVE); if (sc->sc_ioport == NULL) { @@ -458,24 +453,6 @@ "Sudden Motion Sensor Z value"); /* - * Need a taskqueue to send devctl_notify() events - * when the SMS interrupt us. - * - * PI_REALTIME is used due to the sensitivity of the - * interrupt. An interrupt from the SMS means that the - * disk heads should be turned off as quickly as possible. - * - * We only need to do this for the non INTR_FILTER case. - */ - sc->sc_sms_tq = NULL; -#ifndef INTR_FILTER - TASK_INIT(&sc->sc_sms_task, 0, asmc_sms_task, sc); - sc->sc_sms_tq = taskqueue_create_fast("asmc_taskq", M_WAITOK, - taskqueue_thread_enqueue, &sc->sc_sms_tq); - taskqueue_start_threads(&sc->sc_sms_tq, 1, PI_REALTIME, "%s sms taskq", - device_get_nameunit(dev)); -#endif - /* * Allocate an IRQ for the SMS. */ sc->sc_rid_irq = 0; @@ -487,14 +464,8 @@ goto err2; } - ret = bus_setup_intr(dev, sc->sc_irq, - INTR_TYPE_MISC | INTR_MPSAFE, -#ifdef INTR_FILTER - asmc_sms_intrfast, asmc_sms_handler, -#else - asmc_sms_intrfast, NULL, -#endif - dev, &sc->sc_cookie); + ret = bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_AV | INTR_MPSAFE, + asmc_sms_intrfast, asmc_sms_handler, sc, &sc->sc_cookie); if (ret) { device_printf(dev, "unable to setup SMS IRQ\n"); @@ -508,8 +479,6 @@ bus_release_resource(dev, SYS_RES_IOPORT, sc->sc_rid_port, sc->sc_ioport); mtx_destroy(&sc->sc_mtx); - if (sc->sc_sms_tq) - taskqueue_free(sc->sc_sms_tq); return (ret); } @@ -519,10 +488,6 @@ { struct asmc_softc *sc = device_get_softc(dev); - if (sc->sc_sms_tq) { - taskqueue_drain(sc->sc_sms_tq, &sc->sc_sms_task); - taskqueue_free(sc->sc_sms_tq); - } if (sc->sc_cookie) bus_teardown_intr(dev, sc->sc_irq, sc->sc_cookie); if (sc->sc_irq) @@ -1059,9 +1024,9 @@ static int asmc_sms_intrfast(void *arg) { + struct asmc_softc *sc = arg; uint8_t type; - device_t dev = (device_t) arg; - struct asmc_softc *sc = device_get_softc(dev); + if (!sc->sc_sms_intr_works) return (FILTER_HANDLED); @@ -1070,28 +1035,12 @@ mtx_unlock_spin(&sc->sc_mtx); sc->sc_sms_intrtype = type; - asmc_sms_printintr(dev, type); + asmc_sms_printintr(sc->sc_dev, type); -#ifdef INTR_FILTER - return (FILTER_SCHEDULE_THREAD | FILTER_HANDLED); -#else - taskqueue_enqueue(sc->sc_sms_tq, &sc->sc_sms_task); -#endif - return (FILTER_HANDLED); + return (FILTER_SCHEDULE_THREAD); } -#ifdef INTR_FILTER static void -asmc_sms_handler(void *arg) -{ - struct asmc_softc *sc = device_get_softc(arg); - - asmc_sms_task(sc, 0); -} -#endif - - -static void asmc_sms_printintr(device_t dev, uint8_t type) { @@ -1111,9 +1060,9 @@ } static void -asmc_sms_task(void *arg, int pending) +asmc_sms_handler(void *arg) { - struct asmc_softc *sc = (struct asmc_softc *)arg; + struct asmc_softc *sc = arg; char notify[16]; int type; --- //depot/projects/smpng/sys/dev/asmc/asmcvar.h +++ //depot/user/jhb/intr/dev/asmc/asmcvar.h @@ -47,8 +47,6 @@ struct resource *sc_irq; void *sc_cookie; int sc_sms_intrtype; - struct taskqueue *sc_sms_tq; - struct task sc_sms_task; uint8_t sc_sms_intr_works; }; --- //depot/projects/smpng/sys/dev/e1000/if_igb.c +++ //depot/user/jhb/intr/dev/e1000/if_igb.c @@ -48,6 +48,9 @@ #endif #include #include +#if __FreeBSD_version >= 1000000 +#include +#endif #include #include #include @@ -280,8 +283,13 @@ static int igb_irq_fast(void *); static void igb_msix_que(void *); static void igb_msix_link(void *); +#if __FreeBSD_version < 1000000 static void igb_handle_que(void *context, int pending); static void igb_handle_link(void *context, int pending); +#else +static void igb_handle_que(void *); +static void igb_handle_link(void *); +#endif static void igb_handle_link_locked(struct adapter *); static void igb_set_sysctl_value(struct adapter *, const char *, @@ -1400,9 +1408,13 @@ IGB_CORE_UNLOCK(adapter); } - +#if __FreeBSD_version < 1000000 static void igb_handle_que(void *context, int pending) +#else +static void +igb_handle_que(void *context) +#endif { struct igb_queue *que = context; struct adapter *adapter = que->adapter; @@ -1428,7 +1440,11 @@ IGB_TX_UNLOCK(txr); /* Do we need another? */ if (more) { +#if __FreeBSD_version < 1000000 taskqueue_enqueue(que->tq, &que->que_task); +#else + hwi_sched(que->tag); +#endif return; } } @@ -1445,8 +1461,13 @@ } /* Deal with link in a sleepable context */ +#if __FreeBSD_version < 1000000 static void igb_handle_link(void *context, int pending) +#else +static void +igb_handle_link(void *context) +#endif { struct adapter *adapter = context; @@ -1491,7 +1512,9 @@ igb_irq_fast(void *arg) { struct adapter *adapter = arg; +#if __FreeBSD_version < 1000000 struct igb_queue *que = adapter->queues; +#endif u32 reg_icr; @@ -1514,15 +1537,25 @@ * MSI message reordering errata on certain systems. */ igb_disable_intr(adapter); +#if __FreeBSD_version < 1000000 taskqueue_enqueue(que->tq, &que->que_task); +#endif /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) +#if __FreeBSD_version < 1000000 taskqueue_enqueue(que->tq, &adapter->link_task); +#else + hwi_sched(adapter->link_tag); +#endif if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; +#if __FreeBSD_version < 1000000 return FILTER_HANDLED; +#else + return FILTER_SCHEDULE_THREAD; +#endif } #ifdef DEVICE_POLLING @@ -1675,7 +1708,11 @@ no_calc: /* Schedule a clean task if needed*/ if (more_rx) +#if __FreeBSD_version < 1000000 taskqueue_enqueue(que->tq, &que->que_task); +#else + hwi_sched(que->tag); +#endif else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); @@ -1699,7 +1736,11 @@ icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (!(icr & E1000_ICR_LSC)) goto spurious; +#if __FreeBSD_version < 1000000 igb_handle_link(adapter, 0); +#else + igb_handle_link(adapter); +#endif spurious: /* Rearm */ @@ -2127,8 +2168,15 @@ ++hung; if (txr->queue_status & IGB_QUEUE_DEPLETED) ++busy; +#if 0 + /* + * XXX: This is a bad idea, should never do this + * from a non-interrupt context to avoid out-of-order + * packet processing! + */ if ((txr->queue_status & IGB_QUEUE_IDLE) == 0) taskqueue_enqueue(que->tq, &que->que_task); +#endif } if (hung == adapter->num_queues) goto timeout; @@ -2404,6 +2452,7 @@ TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); #endif +#if __FreeBSD_version < 1000000 /* * Try allocating a fast interrupt and the associated deferred * processing contexts. @@ -2424,7 +2473,29 @@ que->tq = NULL; return (error); } +#else + /* Create a taskqueue for deferred transmit queue starts. */ + que->tq = taskqueue_create("igb_taskq", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", + device_get_nameunit(adapter->dev)); + error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, + igb_irq_fast, igb_handle_que, adapter, &adapter->tag); + if (error) { + device_printf(dev, "Failed to register que interrupt " + "handler: %d\n", error); + return (error); + } + error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE | + INTR_MANUAL, NULL, igb_handle_link, adapter, &adapter->link_tag); + if (error) { + device_printf(dev, "Failed to register link interrupt " + "handler: %d\n", error); + return (error); + } +#endif + return (0); } @@ -2485,11 +2556,13 @@ igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); } #ifndef IGB_LEGACY_TX + /* Make tasklet for deferred handling */ TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr); #endif - /* Make tasklet for deferred handling */ +#if __FreeBSD_version < 1000000 TASK_INIT(&que->que_task, 0, igb_handle_que, que); +#endif que->tq = taskqueue_create("igb_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", @@ -2702,10 +2775,18 @@ que = adapter->queues; if (adapter->tag != NULL) { +#if __FreeBSD_version < 1000000 taskqueue_drain(que->tq, &adapter->link_task); +#endif bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } +#if __FreeBSD_version >= 1000000 + if (adapter->link_tag != NULL) { + bus_teardown_intr(dev, adapter->res, adapter->link_tag); + adapter->link_tag = NULL; + } +#endif if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); @@ -2714,7 +2795,9 @@ #ifndef IGB_LEGACY_TX taskqueue_drain(que->tq, &que->txr->txq_task); #endif +#if __FreeBSD_version < 1000000 taskqueue_drain(que->tq, &que->que_task); +#endif taskqueue_free(que->tq); } } --- //depot/projects/smpng/sys/dev/e1000/if_igb.h +++ //depot/user/jhb/intr/dev/e1000/if_igb.h @@ -281,7 +281,9 @@ void *tag; struct tx_ring *txr; struct rx_ring *rxr; +#if __FreeBSD_version < 900000 struct task que_task; +#endif struct taskqueue *tq; u64 irqs; }; --- //depot/projects/smpng/sys/dev/pccard/pccard.c +++ //depot/user/jhb/intr/dev/pccard/pccard.c @@ -1230,7 +1230,6 @@ { struct pccard_function *pf = (struct pccard_function*) arg; int reg; - int doisr = 1; /* * MFC cards know if they interrupted, so we have to ack the @@ -1254,14 +1253,11 @@ pccard_ccr_write(pf, PCCARD_CCR_STATUS, reg & ~PCCARD_CCR_STATUS_INTR); else - doisr = 0; + return (FILTER_STRAY); } - if (doisr) { - if (pf->intr_filter != NULL) - return (pf->intr_filter(pf->intr_handler_arg)); - return (FILTER_SCHEDULE_THREAD); - } - return (FILTER_STRAY); + if (pf->intr_filter != NULL) + return (pf->intr_filter(pf->intr_handler_arg)); + return (FILTER_SCHEDULE_THREAD); } static void @@ -1269,7 +1265,7 @@ { struct pccard_function *pf = (struct pccard_function*) arg; - pf->intr_handler(pf->intr_handler_arg); + pf->intr_handler(pf->intr_handler_arg); } static int @@ -1284,8 +1280,9 @@ if (pf->intr_filter != NULL || pf->intr_handler != NULL) panic("Only one interrupt handler per function allowed"); - err = bus_generic_setup_intr(dev, child, irq, flags, pccard_filter, - intr ? pccard_intr : NULL, pf, cookiep); + err = bus_generic_setup_intr(dev, child, irq, flags, + filt != NULL || pccard_mfc(sc) ? pccard_filter : NULL, + intr != NULL ? pccard_intr : NULL, pf, cookiep); if (err != 0) return (err); pf->intr_filter = filt; --- //depot/projects/smpng/sys/dev/xl/if_xl.c +++ //depot/user/jhb/intr/dev/xl/if_xl.c @@ -1576,12 +1576,12 @@ /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { + ether_ifdetach(ifp); XL_LOCK(sc); xl_stop(sc); XL_UNLOCK(sc); taskqueue_drain(taskqueue_swi, &sc->xl_task); callout_drain(&sc->xl_tick_callout); - ether_ifdetach(ifp); } if (sc->xl_miibus) device_delete_child(dev, sc->xl_miibus); --- //depot/projects/smpng/sys/kern/kern_intr.c +++ //depot/user/jhb/intr/kern/kern_intr.c @@ -63,13 +63,18 @@ #endif /* - * Describe an interrupt thread. There is one of these per interrupt event. + * Describe an interrupt thread. One or more threads are associated with + * each interrupt queue. + * + * XXX: For now we assume a one:one binding between events and queues. */ struct intr_thread { - struct intr_event *it_event; struct thread *it_thread; /* Kernel thread. */ - int it_flags; /* (j) IT_* flags. */ - int it_need; /* Needs service. */ + struct intr_queue *it_queue; + int it_flags; /* IT_* flags. */ +#ifdef INVARIANTS + struct intr_handler *it_current; /* Current handler for hwi. */ +#endif }; /* Interrupt thread flags kept in it_flags */ @@ -81,6 +86,13 @@ uintptr_t event; }; +/* Return values for intr_handler_execute(). */ +enum { + FINISHED, + DYING, + REQUEUE, +}; + struct intr_event *clk_intr_event; struct intr_event *tty_intr_event; void *vm_ih; @@ -88,37 +100,36 @@ static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads"); +SYSCTL_NODE(_kern, OID_AUTO, intr, CTLFLAG_RD, 0, "Interrupt parameters"); + static int intr_storm_threshold = 1000; -TUNABLE_INT("hw.intr_storm_threshold", &intr_storm_threshold); -SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RW, +TUNABLE_INT("kern.intr.storm_threshold", &intr_storm_threshold); +SYSCTL_INT(_kern_intr, OID_AUTO, storm_threshold, CTLFLAG_RW, &intr_storm_threshold, 0, "Number of consecutive interrupts before storm protection is enabled"); -static TAILQ_HEAD(, intr_event) event_list = - TAILQ_HEAD_INITIALIZER(event_list); + +static TAILQ_HEAD(, intr_hardware) hwi_event_list = + TAILQ_HEAD_INITIALIZER(hwi_event_list); +static TAILQ_HEAD(, intr_software) swi_event_list = + TAILQ_HEAD_INITIALIZER(swi_event_list); static struct mtx event_lock; -MTX_SYSINIT(intr_event_list, &event_lock, "intr event list", MTX_DEF); +static struct callout storm_timer; +MTX_SYSINIT(intr_event_list, &event_lock, "intr event lists", MTX_DEF); +static void intr_event_init(struct intr_event *ie, int flags, + const char *fmt, va_list ap); static void intr_event_update(struct intr_event *ie); -#ifdef INTR_FILTER -static int intr_event_schedule_thread(struct intr_event *ie, - struct intr_thread *ithd); -static int intr_filter_loop(struct intr_event *ie, - struct trapframe *frame, struct intr_thread **ithd); -static struct intr_thread *ithread_create(const char *name, - struct intr_handler *ih); -#else -static int intr_event_schedule_thread(struct intr_event *ie); -static struct intr_thread *ithread_create(const char *name); -#endif -static void ithread_destroy(struct intr_thread *ithread); -static void ithread_execute_handlers(struct proc *p, - struct intr_event *ie); -#ifdef INTR_FILTER -static void priv_ithread_execute_handler(struct proc *p, +static void intr_handler_ack_dying(struct intr_event *ie, struct intr_handler *ih); -#endif -static void ithread_loop(void *); -static void ithread_update(struct intr_thread *ithd); +static int intr_handler_execute(struct intr_handler *ih, + struct intr_event *ie, struct thread *td); +static struct intr_hardware *intr_lookup(int irq); +static void intr_loop(void *arg); +static struct intr_queue *intr_queue_create(const char *name); +static void intr_queue_destroy(struct intr_queue *iq); +static void intr_queue_update(struct intr_event *ie); +static struct intr_thread *intr_thread_create(struct intr_queue *iq); +static void intr_thread_destroy(struct intr_thread *it); static void start_softintr(void *); /* Map an interrupt type to an ithread priority. */ @@ -159,18 +170,110 @@ return pri; } +/* Create an interrupt thread for a specific interrupt queue. */ +static struct intr_thread * +intr_thread_create(struct intr_queue *iq) +{ + struct intr_thread *it; + struct thread *td; + int error; + + it = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO); + error = kproc_kthread_add(intr_loop, iq, &intrproc, &td, RFSTOPPED | + RFHIGHPID, 0, "intr", iq->iq_name); + if (error) + panic("failed to create interrupt thread with %d", error); + thread_lock(td); + sched_class(td, PRI_ITHD); + sched_prio(td, PRI_MAX_ITHD); + TD_SET_IWAIT(td); + td->td_pflags |= TDP_ITHREAD; + td->td_ithread = it; + it->it_thread = td; + it->it_queue = iq; + mtx_lock_spin(&iq->iq_lock); + thread_lock_set(td, &iq->iq_lock); + thread_unlock(td); + CTR2(KTR_INTR, "intr_thread_create: created tid %d(%s)", td->td_tid, + iq->iq_name); + return (it); +} + +static void +intr_thread_destroy(struct intr_thread *it) +{ + struct thread *td; + + td = it->it_thread; + CTR2(KTR_INTR, "intr_thread_destroy: killing tid %d(%s)", td->td_tid, + td->td_name); + mtx_assert(&it->it_queue->iq_lock, MA_OWNED); + thread_lock(td); + it->it_flags |= IT_DEAD; + if (TD_AWAITING_INTR(td)) { + THREAD_LOCKPTR_ASSERT(td, &it->it_queue->iq_lock); + TD_CLR_IWAIT(td); + sched_add(td, SRQ_INTR); + } + thread_unlock(td); +} + +/* Create an interrupt queue with a single dedicated thread. */ +static struct intr_queue * +intr_queue_create(const char *name) +{ + struct intr_queue *iq; + + iq = malloc(sizeof(struct intr_queue), M_ITHREAD, M_WAITOK | M_ZERO); + STAILQ_INIT(&iq->iq_active); + mtx_init(&iq->iq_lock, "intr queue", NULL, MTX_SPIN | MTX_RECURSE); + strncpy(iq->iq_name, name, sizeof(iq->iq_name)); + iq->iq_thread = intr_thread_create(iq); + return (iq); +}; + +/* Schedule a thread to service requests for an interrupt queue. */ +static void +intr_queue_schedule(struct intr_queue *iq) +{ + struct thread *td; + + mtx_assert(&iq->iq_lock, MA_OWNED); + td = iq->iq_thread->it_thread; + thread_lock(td); + if (TD_AWAITING_INTR(td)) { + THREAD_LOCKPTR_ASSERT(td, &iq->iq_lock); + CTR2(KTR_INTR, "intr_queue_schedule: tid %d (%s)", td->td_tid, + td->td_name); + TD_CLR_IWAIT(td); + sched_add(td, SRQ_INTR); + } else + CTR3(KTR_INTR, "intr_queue_schedule: tid %d (%s): state %d", + td->td_tid, td->td_name, td->td_state); + thread_unlock(td); +} + +static void +intr_queue_destroy(struct intr_queue *iq) +{ + + mtx_lock_spin(&iq->iq_lock); + intr_thread_destroy(iq->iq_thread); + mtx_unlock_spin(&iq->iq_lock); +} + /* - * Update an ithread based on the associated intr_event. + * Update the interrupt queue based on the associated intr_event. + * + * XXX: This very much assumes one:one queue:event. */ static void -ithread_update(struct intr_thread *ithd) +intr_queue_update(struct intr_event *ie) { - struct intr_event *ie; struct thread *td; u_char pri; - ie = ithd->it_event; - td = ithd->it_thread; + td = ie->ie_queue->iq_thread->it_thread; /* Determine the overall priority of this event. */ if (TAILQ_EMPTY(&ie->ie_handlers)) @@ -242,46 +345,51 @@ * If this event has an ithread, update it's priority and * name. */ - if (ie->ie_thread != NULL) - ithread_update(ie->ie_thread); + if (ie->ie_queue != NULL) + intr_queue_update(ie); CTR2(KTR_INTR, "%s: updated %s", __func__, ie->ie_fullname); } -int -intr_event_create(struct intr_event **event, void *source, int flags, int irq, - void (*pre_ithread)(void *), void (*post_ithread)(void *), - void (*post_filter)(void *), int (*assign_cpu)(void *, u_char), - const char *fmt, ...) +static void +intr_event_init(struct intr_event *ie, int flags, const char *fmt, va_list ap) { - struct intr_event *ie; - va_list ap; - /* The only valid flag during creation is IE_SOFT. */ - if ((flags & ~IE_SOFT) != 0) - return (EINVAL); - ie = malloc(sizeof(struct intr_event), M_ITHREAD, M_WAITOK | M_ZERO); - ie->ie_source = source; - ie->ie_pre_ithread = pre_ithread; - ie->ie_post_ithread = post_ithread; - ie->ie_post_filter = post_filter; - ie->ie_assign_cpu = assign_cpu; ie->ie_flags = flags; - ie->ie_irq = irq; ie->ie_cpu = NOCPU; TAILQ_INIT(&ie->ie_handlers); mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF); + vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap); + strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname)); + CTR1(KTR_INTR, "intr_event_init: created %s", ie->ie_name); +} + +void +hwi_create(struct intr_event **event, void *source, int irq, + void (*pre_ithread)(void *), void (*post_ithread)(void *), + void (*post_filter)(void *), int (*assign_cpu)(void *, u_char), + const char *fmt, ...) +{ + struct intr_hardware *ihw; + va_list ap; + + ihw = malloc(sizeof(struct intr_hardware), M_ITHREAD, + M_WAITOK | M_ZERO); va_start(ap, fmt); - vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap); + intr_event_init(&ihw->ihw_event, 0, fmt, ap); va_end(ap); - strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname)); + ihw->ihw_source = source; + ihw->ihw_pre_ithread = pre_ithread; + ihw->ihw_post_ithread = post_ithread; + ihw->ihw_post_filter = post_filter; + ihw->ihw_assign_cpu = assign_cpu; + ihw->ihw_irq = irq; + TAILQ_INIT(&ihw->ihw_manual); mtx_lock(&event_lock); - TAILQ_INSERT_TAIL(&event_list, ie, ie_list); + TAILQ_INSERT_TAIL(&hwi_event_list, ihw, ihw_list); mtx_unlock(&event_lock); if (event != NULL) - *event = ie; - CTR2(KTR_INTR, "%s: created %s", __func__, ie->ie_name); - return (0); + *event = &ihw->ihw_event; } /* @@ -295,6 +403,7 @@ int intr_event_bind(struct intr_event *ie, u_char cpu) { + struct intr_hardware *ihw; cpuset_t mask; lwpid_t id; int error; @@ -303,46 +412,54 @@ if (cpu != NOCPU && CPU_ABSENT(cpu)) return (EINVAL); - if (ie->ie_assign_cpu == NULL) - return (EOPNOTSUPP); - error = priv_check(curthread, PRIV_SCHED_CPUSET_INTR); if (error) return (error); + if (!(ie->ie_flags & IE_SOFT)) { + ihw = (struct intr_hardware *)ie; + + if (ihw->ihw_assign_cpu == NULL) + return (EOPNOTSUPP); + } + /* * If we have any ithreads try to set their mask first to verify * permissions, etc. */ mtx_lock(&ie->ie_lock); - if (ie->ie_thread != NULL) { + if (ie->ie_queue != NULL) { CPU_ZERO(&mask); if (cpu == NOCPU) CPU_COPY(cpuset_root, &mask); else CPU_SET(cpu, &mask); - id = ie->ie_thread->it_thread->td_tid; + id = ie->ie_queue->iq_thread->it_thread->td_tid; mtx_unlock(&ie->ie_lock); error = cpuset_setthread(id, &mask); if (error) return (error); } else mtx_unlock(&ie->ie_lock); - error = ie->ie_assign_cpu(ie->ie_source, cpu); - if (error) { - mtx_lock(&ie->ie_lock); - if (ie->ie_thread != NULL) { - CPU_ZERO(&mask); - if (ie->ie_cpu == NOCPU) - CPU_COPY(cpuset_root, &mask); - else - CPU_SET(ie->ie_cpu, &mask); - id = ie->ie_thread->it_thread->td_tid; - mtx_unlock(&ie->ie_lock); - (void)cpuset_setthread(id, &mask); - } else - mtx_unlock(&ie->ie_lock); - return (error); + if (!(ie->ie_flags & IE_SOFT)) { + ihw = (struct intr_hardware *)ie; + + error = ihw->ihw_assign_cpu(ihw->ihw_source, cpu); + if (error) { + mtx_lock(&ie->ie_lock); + if (ie->ie_queue != NULL) { + CPU_ZERO(&mask); + if (ie->ie_cpu == NOCPU) + CPU_COPY(cpuset_root, &mask); + else + CPU_SET(ie->ie_cpu, &mask); + id = ie->ie_queue->iq_thread->it_thread->td_tid; + mtx_unlock(&ie->ie_lock); + (void)cpuset_setthread(id, &mask); + } else + mtx_unlock(&ie->ie_lock); + return (error); + } } mtx_lock(&ie->ie_lock); @@ -352,25 +469,24 @@ return (error); } -static struct intr_event * +static struct intr_hardware * intr_lookup(int irq) { - struct intr_event *ie; + struct intr_hardware *ihw; mtx_lock(&event_lock); - TAILQ_FOREACH(ie, &event_list, ie_list) - if (ie->ie_irq == irq && - (ie->ie_flags & IE_SOFT) == 0 && - TAILQ_FIRST(&ie->ie_handlers) != NULL) + TAILQ_FOREACH(ihw, &hwi_event_list, ihw_list) + if (ihw->ihw_irq == irq && + TAILQ_FIRST(&ihw->ihw_event.ie_handlers) != NULL) break; mtx_unlock(&event_lock); - return (ie); + return (ihw); } int intr_setaffinity(int irq, void *m) { - struct intr_event *ie; + struct intr_hardware *ihw; cpuset_t *mask; u_char cpu; int n; @@ -390,226 +506,128 @@ cpu = (u_char)n; } } - ie = intr_lookup(irq); - if (ie == NULL) + ihw = intr_lookup(irq); + if (ihw == NULL) return (ESRCH); - return (intr_event_bind(ie, cpu)); + return (intr_event_bind(&ihw->ihw_event, cpu)); } int intr_getaffinity(int irq, void *m) { - struct intr_event *ie; + struct intr_hardware *ihw; cpuset_t *mask; mask = m; - ie = intr_lookup(irq); - if (ie == NULL) + ihw = intr_lookup(irq); + if (ihw == NULL) return (ESRCH); CPU_ZERO(mask); - mtx_lock(&ie->ie_lock); - if (ie->ie_cpu == NOCPU) + mtx_lock(&ihw->ihw_event.ie_lock); + if (ihw->ihw_event.ie_cpu == NOCPU) CPU_COPY(cpuset_root, mask); else - CPU_SET(ie->ie_cpu, mask); - mtx_unlock(&ie->ie_lock); + CPU_SET(ihw->ihw_event.ie_cpu, mask); + mtx_unlock(&ihw->ihw_event.ie_lock); return (0); } int -intr_event_destroy(struct intr_event *ie) +hwi_destroy(struct intr_event *ie) { + struct intr_hardware *ihw; + if (ie->ie_flags & IE_SOFT) + return (EINVAL); + ihw = (struct intr_hardware *)ie; + mtx_lock(&event_lock); mtx_lock(&ie->ie_lock); - if (!TAILQ_EMPTY(&ie->ie_handlers)) { + if (!TAILQ_EMPTY(&ie->ie_handlers) || !TAILQ_EMPTY(&ihw->ihw_manual)) { mtx_unlock(&ie->ie_lock); mtx_unlock(&event_lock); return (EBUSY); } - TAILQ_REMOVE(&event_list, ie, ie_list); -#ifndef notyet - if (ie->ie_thread != NULL) { - ithread_destroy(ie->ie_thread); - ie->ie_thread = NULL; - } -#endif + TAILQ_REMOVE(&hwi_event_list, ihw, ihw_list); mtx_unlock(&ie->ie_lock); mtx_unlock(&event_lock); - mtx_destroy(&ie->ie_lock); - free(ie, M_ITHREAD); + if (ie->ie_queue != NULL) + intr_queue_destroy(ie->ie_queue); return (0); } -#ifndef INTR_FILTER -static struct intr_thread * -ithread_create(const char *name) -{ - struct intr_thread *ithd; - struct thread *td; - int error; - - ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO); - - error = kproc_kthread_add(ithread_loop, ithd, &intrproc, - &td, RFSTOPPED | RFHIGHPID, - 0, "intr", "%s", name); - if (error) - panic("kproc_create() failed with %d", error); - thread_lock(td); - sched_class(td, PRI_ITHD); - TD_SET_IWAIT(td); - thread_unlock(td); - td->td_pflags |= TDP_ITHREAD; - ithd->it_thread = td; - CTR2(KTR_INTR, "%s: created %s", __func__, name); - return (ithd); -} -#else -static struct intr_thread * -ithread_create(const char *name, struct intr_handler *ih) -{ - struct intr_thread *ithd; - struct thread *td; - int error; - - ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO); - - error = kproc_kthread_add(ithread_loop, ih, &intrproc, - &td, RFSTOPPED | RFHIGHPID, - 0, "intr", "%s", name); - if (error) - panic("kproc_create() failed with %d", error); - thread_lock(td); - sched_class(td, PRI_ITHD); - TD_SET_IWAIT(td); - thread_unlock(td); - td->td_pflags |= TDP_ITHREAD; - ithd->it_thread = td; - CTR2(KTR_INTR, "%s: created %s", __func__, name); - return (ithd); -} -#endif - -static void -ithread_destroy(struct intr_thread *ithread) -{ - struct thread *td; - - CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name); - td = ithread->it_thread; - thread_lock(td); - ithread->it_flags |= IT_DEAD; - if (TD_AWAITING_INTR(td)) { - TD_CLR_IWAIT(td); - sched_add(td, SRQ_INTR); - } - thread_unlock(td); -} - -#ifndef INTR_FILTER int intr_event_add_handler(struct intr_event *ie, const char *name, driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri, enum intr_type flags, void **cookiep) { struct intr_handler *ih, *temp_ih; - struct intr_thread *it; + struct intr_hardware *ihw; + struct intr_queue *iq; - if (ie == NULL || name == NULL || (handler == NULL && filter == NULL)) + if (ie == NULL || name == NULL || + (handler == NULL && filter == NULL) || + ((flags & INTR_MANUAL) && filter != NULL) || + ((flags & INTR_MANUAL) && (ie->ie_flags & IE_SOFT)) || + ((ie->ie_flags & IE_SOFT) && filter != NULL)) return (EINVAL); /* Allocate and populate an interrupt handler structure. */ - ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO); + ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | + M_ZERO); ih->ih_filter = filter; ih->ih_handler = handler; ih->ih_argument = arg; strlcpy(ih->ih_name, name, sizeof(ih->ih_name)); ih->ih_event = ie; ih->ih_pri = pri; + if (flags & INTR_MANUAL) + ih->ih_flags |= IH_MANUAL; if (flags & INTR_EXCL) - ih->ih_flags = IH_EXCLUSIVE; + ih->ih_flags |= IH_EXCLUSIVE; if (flags & INTR_MPSAFE) ih->ih_flags |= IH_MPSAFE; if (flags & INTR_ENTROPY) ih->ih_flags |= IH_ENTROPY; - /* We can only have one exclusive handler in a event. */ - mtx_lock(&ie->ie_lock); - if (!TAILQ_EMPTY(&ie->ie_handlers)) { - if ((flags & INTR_EXCL) || - (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) { + /* + * Manually scheduled interrupt handlers are not part of the + * normal list of handlers. However, they require that at + * least one non-manual handler is already active for this + * event. In general they should be handled by an existing + * filter or handler. + */ + if (flags & INTR_MANUAL) { + mtx_lock(&ie->ie_lock); + if (TAILQ_EMPTY(&ie->ie_handlers)) { mtx_unlock(&ie->ie_lock); free(ih, M_ITHREAD); return (EINVAL); } + ihw = (struct intr_hardware *)ie; + TAILQ_INSERT_TAIL(&ihw->ihw_manual, ih, ih_next); + goto finish; } - /* Create a thread if we need one. */ - while (ie->ie_thread == NULL && handler != NULL) { - if (ie->ie_flags & IE_ADDING_THREAD) - msleep(ie, &ie->ie_lock, 0, "ithread", 0); + /* Create a queue if we need one. */ + mtx_lock(&ie->ie_lock); + while (ie->ie_queue == NULL) { + if (ie->ie_flags & IE_ADDING_QUEUE) + mtx_sleep(ie, &ie->ie_lock, 0, "iqueue", 0); else { - ie->ie_flags |= IE_ADDING_THREAD; + ie->ie_flags |= IE_ADDING_QUEUE; mtx_unlock(&ie->ie_lock); - it = ithread_create("intr: newborn"); + iq = intr_queue_create(ie->ie_name); mtx_lock(&ie->ie_lock); - ie->ie_flags &= ~IE_ADDING_THREAD; - ie->ie_thread = it; - it->it_event = ie; - ithread_update(it); + ie->ie_flags &= ~IE_ADDING_QUEUE; + iq->iq_event = ie; + ie->ie_queue = iq; + intr_queue_update(ie); wakeup(ie); } } - /* Add the new handler to the event in priority order. */ - TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) { - if (temp_ih->ih_pri > ih->ih_pri) - break; - } - if (temp_ih == NULL) - TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next); - else - TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next); - intr_event_update(ie); - - CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name, - ie->ie_name); - mtx_unlock(&ie->ie_lock); - - if (cookiep != NULL) - *cookiep = ih; - return (0); -} -#else -int -intr_event_add_handler(struct intr_event *ie, const char *name, - driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri, - enum intr_type flags, void **cookiep) -{ - struct intr_handler *ih, *temp_ih; - struct intr_thread *it; - - if (ie == NULL || name == NULL || (handler == NULL && filter == NULL)) - return (EINVAL); - - /* Allocate and populate an interrupt handler structure. */ - ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO); - ih->ih_filter = filter; - ih->ih_handler = handler; - ih->ih_argument = arg; - strlcpy(ih->ih_name, name, sizeof(ih->ih_name)); - ih->ih_event = ie; - ih->ih_pri = pri; - if (flags & INTR_EXCL) - ih->ih_flags = IH_EXCLUSIVE; - if (flags & INTR_MPSAFE) - ih->ih_flags |= IH_MPSAFE; - if (flags & INTR_ENTROPY) - ih->ih_flags |= IH_ENTROPY; - /* We can only have one exclusive handler in a event. */ - mtx_lock(&ie->ie_lock); if (!TAILQ_EMPTY(&ie->ie_handlers)) { if ((flags & INTR_EXCL) || (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) { @@ -619,52 +637,27 @@ } } - /* For filtered handlers, create a private ithread to run on. */ - if (filter != NULL && handler != NULL) { - mtx_unlock(&ie->ie_lock); - it = ithread_create("intr: newborn", ih); - mtx_lock(&ie->ie_lock); - it->it_event = ie; - ih->ih_thread = it; - ithread_update(it); /* XXX - do we really need this?!?!? */ - } else { /* Create the global per-event thread if we need one. */ - while (ie->ie_thread == NULL && handler != NULL) { - if (ie->ie_flags & IE_ADDING_THREAD) - msleep(ie, &ie->ie_lock, 0, "ithread", 0); - else { - ie->ie_flags |= IE_ADDING_THREAD; - mtx_unlock(&ie->ie_lock); - it = ithread_create("intr: newborn", ih); - mtx_lock(&ie->ie_lock); - ie->ie_flags &= ~IE_ADDING_THREAD; - ie->ie_thread = it; - it->it_event = ie; - ithread_update(it); - wakeup(ie); - } - } - } - /* Add the new handler to the event in priority order. */ TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) { if (temp_ih->ih_pri > ih->ih_pri) break; } + mtx_lock_spin(&ie->ie_queue->iq_lock); if (temp_ih == NULL) TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next); else TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next); + mtx_unlock_spin(&ie->ie_queue->iq_lock); intr_event_update(ie); +finish: CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name, ie->ie_name); mtx_unlock(&ie->ie_lock); - if (cookiep != NULL) *cookiep = ih; return (0); } -#endif /* * Append a description preceded by a ':' to the name of the specified @@ -674,6 +667,9 @@ intr_event_describe_handler(struct intr_event *ie, void *cookie, const char *descr) { +#ifdef INVARIANTS + struct intr_hardware *ihw; +#endif struct intr_handler *ih; size_t space; char *start; @@ -684,6 +680,13 @@ if (ih == cookie) break; } + if (ih == NULL && !(ie->ie_flags & IE_SOFT)) { + ihw = (struct intr_hardware *)ie; + TAILQ_FOREACH(ih, &ihw->ihw_manual, ih_next) { + if (ih == cookie) + break; + } + } if (ih == NULL) { mtx_unlock(&ie->ie_lock); panic("handler %p not found in interrupt event %p", cookie, ie); @@ -722,12 +725,13 @@ } /* - * Return the ie_source field from the intr_event an intr_handler is - * associated with. + * Return the source cookie for a hardware interrupt that a hardware + * interrupt handler is associated with. */ void * -intr_handler_source(void *cookie) +hwi_handler_source(void *cookie) { + struct intr_hardware *ihw; struct intr_handler *ih; struct intr_event *ie; @@ -738,208 +742,99 @@ KASSERT(ie != NULL, ("interrupt handler \"%s\" has a NULL interrupt event", ih->ih_name)); - return (ie->ie_source); + KASSERT(!(ie->ie_flags & IE_SOFT), + ("intr_handler_source: swi handler")); + ihw = (struct intr_hardware *)ie; + return (ihw->ihw_source); } /* - * Sleep until an ithread finishes executing an interrupt handler. - * - * XXX Doesn't currently handle interrupt filters or fast interrupt - * handlers. This is intended for compatibility with linux drivers - * only. Do not use in BSD code. + * Sleep until an interrupt handler finishes executing. */ void -_intr_drain(int irq) +hwi_drain(void *cookie) { + struct intr_handler *ih; struct intr_event *ie; - struct intr_thread *ithd; - struct thread *td; + int state; - ie = intr_lookup(irq); - if (ie == NULL) + ih = (struct intr_handler *)cookie; + if (ih == NULL) return; - if (ie->ie_thread == NULL) - return; - ithd = ie->ie_thread; - td = ithd->it_thread; - /* - * We set the flag and wait for it to be cleared to avoid - * long delays with potentially busy interrupt handlers - * were we to only sample TD_AWAITING_INTR() every tick. - */ - thread_lock(td); - if (!TD_AWAITING_INTR(td)) { - ithd->it_flags |= IT_WAIT; - while (ithd->it_flags & IT_WAIT) { - thread_unlock(td); - pause("idrain", 1); - thread_lock(td); + ie = ih->ih_event; + KASSERT(ie != NULL, + ("interrupt handler \"%s\" has a NULL interrupt event", + ih->ih_name)); + + for (state = ih->ih_state; state != IS_DEAD; state = ih->ih_state) { + switch (state) { + case IS_IDLE: + case IS_DEAD: + return; + default: + /* Just punt on fancy and do a simple poll. */ + tsleep(ih, 0, "idrain", 1); } } - thread_unlock(td); - return; } - -#ifndef INTR_FILTER -int -intr_event_remove_handler(void *cookie) +/* + * Sleep until all of the handlers associated with an IRQ finish + * executing. This is intended for compatibility with Linux drivers + * only. FreeBSD drivers should use hwi_drain() on individual + * handlers which is cheaper. + */ +void +_intr_drain(int irq) { - struct intr_handler *handler = (struct intr_handler *)cookie; - struct intr_event *ie; -#ifdef INVARIANTS + struct intr_hardware *ihw; struct intr_handler *ih; -#endif -#ifdef notyet - int dead; -#endif - if (handler == NULL) - return (EINVAL); - ie = handler->ih_event; - KASSERT(ie != NULL, - ("interrupt handler \"%s\" has a NULL interrupt event", - handler->ih_name)); - mtx_lock(&ie->ie_lock); - CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name, - ie->ie_name); -#ifdef INVARIANTS - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) - if (ih == handler) - goto ok; - mtx_unlock(&ie->ie_lock); - panic("interrupt handler \"%s\" not found in interrupt event \"%s\"", - ih->ih_name, ie->ie_name); -ok: -#endif - /* - * If there is no ithread, then just remove the handler and return. - * XXX: Note that an INTR_FAST handler might be running on another - * CPU! - */ - if (ie->ie_thread == NULL) { - TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - mtx_unlock(&ie->ie_lock); - free(handler, M_ITHREAD); - return (0); - } + ihw = intr_lookup(irq); + if (ihw == NULL) + return; - /* - * If the interrupt thread is already running, then just mark this - * handler as being dead and let the ithread do the actual removal. - * - * During a cold boot while cold is set, msleep() does not sleep, - * so we have to remove the handler here rather than letting the - * thread do it. - */ - thread_lock(ie->ie_thread->it_thread); - if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) { - handler->ih_flags |= IH_DEAD; - - /* - * Ensure that the thread will process the handler list - * again and remove this handler if it has already passed - * it on the list. - */ - atomic_store_rel_int(&ie->ie_thread->it_need, 1); - } else - TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - thread_unlock(ie->ie_thread->it_thread); - while (handler->ih_flags & IH_DEAD) - msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0); - intr_event_update(ie); -#ifdef notyet - /* - * XXX: This could be bad in the case of ppbus(8). Also, I think - * this could lead to races of stale data when servicing an - * interrupt. - */ - dead = 1; - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { - if (!(ih->ih_flags & IH_FAST)) { - dead = 0; - break; +restart: + mtx_lock(&ihw->ihw_event.ie_lock); + TAILQ_FOREACH(ih, &ihw->ihw_event.ie_handlers, ih_next) { + switch (ih->ih_state) { + case IS_IDLE: + case IS_DEAD: + continue; + default: + mtx_unlock(&ihw->ihw_event.ie_lock); + tsleep(ih, 0, "idrain", 1); + goto restart; } } - if (dead) { - ithread_destroy(ie->ie_thread); - ie->ie_thread = NULL; - } -#endif - mtx_unlock(&ie->ie_lock); - free(handler, M_ITHREAD); - return (0); + mtx_unlock(&ihw->ihw_event.ie_lock); } -static int -intr_event_schedule_thread(struct intr_event *ie) +/* + * Called from an interrupt thread loop when it encounters a dying + * interrupt handler. This marks the handler as dead and awakens the + * sleeping thread that is removing the handler. + */ +static void +intr_handler_ack_dying(struct intr_event *ie, struct intr_handler *ih) { - struct intr_entropy entropy; - struct intr_thread *it; - struct thread *td; - struct thread *ctd; - struct proc *p; - /* - * If no ithread or no handlers, then we have a stray interrupt. - */ - if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) || - ie->ie_thread == NULL) - return (EINVAL); - - ctd = curthread; - it = ie->ie_thread; - td = it->it_thread; - p = td->td_proc; - - /* - * If any of the handlers for this ithread claim to be good - * sources of entropy, then gather some. - */ - if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) { - CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__, - p->p_pid, td->td_name); - entropy.event = (uintptr_t)ie; - entropy.td = ctd; - random_harvest(&entropy, sizeof(entropy), 2, - RANDOM_INTERRUPT); - } - - KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name)); - - /* - * Set it_need to tell the thread to keep running if it is already - * running. Then, lock the thread and see if we actually need to - * put it on the runqueue. - */ - atomic_store_rel_int(&it->it_need, 1); - thread_lock(td); - if (TD_AWAITING_INTR(td)) { - CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, - td->td_name); - TD_CLR_IWAIT(td); - sched_add(td, SRQ_INTR); - } else { - CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", - __func__, p->p_pid, td->td_name, it->it_need, td->td_state); - } - thread_unlock(td); + mtx_lock(&ie->ie_lock); + ih->ih_state = IS_DEAD; + wakeup(ih); + mtx_unlock(&ie->ie_lock); +} - return (0); -} -#else int intr_event_remove_handler(void *cookie) { struct intr_handler *handler = (struct intr_handler *)cookie; + struct intr_hardware *ihw; struct intr_event *ie; - struct intr_thread *it; #ifdef INVARIANTS struct intr_handler *ih; #endif -#ifdef notyet - int dead; -#endif + int state; if (handler == NULL) return (EINVAL); @@ -951,6 +846,13 @@ CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name, ie->ie_name); #ifdef INVARIANTS + if (!(ie->ie_flags & IE_SOFT)) { + ihw = (struct intr_hardware *)ie; + TAILQ_FOREACH(ih, &ihw->ihw_manual, ih_next) { + if (ih == handler) + goto ok; + } + } TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) if (ih == handler) goto ok; @@ -959,139 +861,106 @@ ih->ih_name, ie->ie_name); ok: #endif - /* - * If there are no ithreads (per event and per handler), then - * just remove the handler and return. - * XXX: Note that an INTR_FAST handler might be running on another CPU! - */ - if (ie->ie_thread == NULL && handler->ih_thread == NULL) { - TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - mtx_unlock(&ie->ie_lock); - free(handler, M_ITHREAD); - return (0); - } - /* Private or global ithread? */ - it = (handler->ih_thread) ? handler->ih_thread : ie->ie_thread; /* - * If the interrupt thread is already running, then just mark this - * handler as being dead and let the ithread do the actual removal. - * - * During a cold boot while cold is set, msleep() does not sleep, - * so we have to remove the handler here rather than letting the - * thread do it. + * Manual interrupt handlers are on a separate list in the + * interrupt event. */ - thread_lock(it->it_thread); - if (!TD_AWAITING_INTR(it->it_thread) && !cold) { - handler->ih_flags |= IH_DEAD; - + if (handler->ih_flags & IH_MANUAL) { + ihw = (struct intr_hardware *)ie; + TAILQ_REMOVE(&ihw->ihw_manual, handler, ih_next); + } else { /* - * Ensure that the thread will process the handler list - * again and remove this handler if it has already passed - * it on the list. + * First, wait for the interrupt event to go idle so + * we can remove the handler from the event's list. */ - atomic_store_rel_int(&it->it_need, 1); - } else + mtx_lock_spin(&ie->ie_queue->iq_lock); TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - thread_unlock(it->it_thread); - while (handler->ih_flags & IH_DEAD) - msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0); - /* - * At this point, the handler has been disconnected from the event, - * so we can kill the private ithread if any. - */ - if (handler->ih_thread) { - ithread_destroy(handler->ih_thread); - handler->ih_thread = NULL; + mtx_unlock_spin(&ie->ie_queue->iq_lock); + intr_event_update(ie); } - intr_event_update(ie); -#ifdef notyet + mtx_unlock(&ie->ie_lock); + /* - * XXX: This could be bad in the case of ppbus(8). Also, I think - * this could lead to races of stale data when servicing an - * interrupt. + * Next, wait for the interrupt handler to go idle. If it is + * already idle, just mark it as dead. If it has been queued + * or is executing, attempt to mark it as dying and then wait + * for an interrupt thread to drain it from a queue. */ - dead = 1; - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { - if (handler != NULL) { - dead = 0; + for (state = handler->ih_state; state != IS_DEAD; + state = handler->ih_state) { + switch (state) { + case IS_IDLE: + /* + * If the interrupt handler is idle, try to mark + * it as dead. + */ + atomic_cmpset_int(&handler->ih_state, IS_IDLE, IS_DEAD); + break; + default: + /* + * If the interrupt handler is busy, mark it + * as dying and wait. + */ + if (atomic_cmpset_int(&handler->ih_state, state, + IS_DYING)) { + mtx_lock(&ie->ie_lock); + while (handler->ih_state != IS_DEAD) + mtx_sleep(handler, &ie->ie_lock, 0, + "iev_rmh", 0); + mtx_unlock(&ie->ie_lock); + } break; } } - if (dead) { - ithread_destroy(ie->ie_thread); - ie->ie_thread = NULL; - } -#endif - mtx_unlock(&ie->ie_lock); + + /* The handler is now unreferenced, so can finally free it. */ free(handler, M_ITHREAD); return (0); } -static int -intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it) +/* Create a software interrupt event and thread. */ +int +swi_create(struct intr_event **event, const char *fmt, ...) { - struct intr_entropy entropy; - struct thread *td; - struct thread *ctd; - struct proc *p; + struct intr_software *isw; + va_list ap; - /* - * If no ithread or no handlers, then we have a stray interrupt. - */ - if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) || it == NULL) - return (EINVAL); - - ctd = curthread; - td = it->it_thread; - p = td->td_proc; - - /* - * If any of the handlers for this ithread claim to be good - * sources of entropy, then gather some. - */ - if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) { - CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__, - p->p_pid, td->td_name); - entropy.event = (uintptr_t)ie; - entropy.td = ctd; - random_harvest(&entropy, sizeof(entropy), 2, - RANDOM_INTERRUPT); - } - - KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name)); - - /* - * Set it_need to tell the thread to keep running if it is already - * running. Then, lock the thread and see if we actually need to - * put it on the runqueue. - */ - atomic_store_rel_int(&it->it_need, 1); - thread_lock(td); - if (TD_AWAITING_INTR(td)) { - CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, - td->td_name); - TD_CLR_IWAIT(td); - sched_add(td, SRQ_INTR); - } else { - CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", - __func__, p->p_pid, td->td_name, it->it_need, td->td_state); - } - thread_unlock(td); - + isw = malloc(sizeof(struct intr_software), M_ITHREAD, + M_WAITOK | M_ZERO); + va_start(ap, fmt); + intr_event_init(&isw->isw_event, IE_SOFT, fmt, ap); + va_end(ap); + mtx_lock(&event_lock); + TAILQ_INSERT_TAIL(&swi_event_list, isw, isw_list); + mtx_unlock(&event_lock); + if (event != NULL) + *event = &isw->isw_event; return (0); } -#endif -/* - * Allow interrupt event binding for software interrupt handlers -- a no-op, - * since interrupts are generated in software rather than being directed by - * a PIC. - */ -static int -swi_assign_cpu(void *arg, u_char cpu) +/* Tear down a software interrupt event and thread. */ +int +swi_destroy(struct intr_event *ie) { + struct intr_software *isw; + + if (!(ie->ie_flags & IE_SOFT)) + return (EINVAL); + isw = (struct intr_software *)ie; + mtx_lock(&event_lock); + mtx_lock(&ie->ie_lock); + if (!TAILQ_EMPTY(&ie->ie_handlers)) { + mtx_unlock(&ie->ie_lock); + mtx_unlock(&event_lock); + return (EBUSY); + } + TAILQ_REMOVE(&swi_event_list, isw, isw_list); + mtx_unlock(&ie->ie_lock); + mtx_unlock(&event_lock); + if (ie->ie_queue != NULL) + intr_queue_destroy(ie->ie_queue); return (0); } @@ -1115,8 +984,7 @@ if (!(ie->ie_flags & IE_SOFT)) return (EINVAL); } else { - error = intr_event_create(&ie, NULL, IE_SOFT, 0, - NULL, NULL, NULL, swi_assign_cpu, "swi%d:", pri); + error = swi_create(&ie, "swi%d:", pri); if (error) return (error); if (eventp != NULL) @@ -1128,18 +996,23 @@ } /* - * Schedule a software interrupt thread. + * Schedule a software interrupt handler. */ void swi_sched(void *cookie, int flags) { - struct intr_handler *ih = (struct intr_handler *)cookie; - struct intr_event *ie = ih->ih_event; struct intr_entropy entropy; - int error; + struct intr_handler *ih; + struct intr_event *ie; + struct intr_queue *iq; + int state; - CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name, - ih->ih_need); + ih = cookie; + ie = ih->ih_event; + iq = ie->ie_queue; + KASSERT(ie->ie_flags & IE_SOFT, ("swi_sched: hardware interrupt event")); + CTR3(KTR_INTR, "swi_sched: %s %s state=%d", ie->ie_name, ih->ih_name, + ih->ih_state); if (harvest.swi) { CTR2(KTR_INTR, "swi_sched: pid %d (%s) gathering entropy", @@ -1149,567 +1022,617 @@ random_harvest(&entropy, sizeof(entropy), 1, RANDOM_SWI); } + for (;;) { + state = ih->ih_state; + switch (state) { + case IS_IDLE: + /* + * Try to change state to queued. If that fails, + * try the loop again. + */ + if (!atomic_cmpset_int(&ih->ih_state, IS_IDLE, + IS_QUEUED)) + break; - /* - * Set ih_need for this handler so that if the ithread is already - * running it will execute this handler on the next pass. Otherwise, - * it will execute it the next time it runs. - */ - atomic_store_rel_int(&ih->ih_need, 1); + /* Queue the handler. */ + mtx_lock_spin(&iq->iq_lock); + STAILQ_INSERT_TAIL(&iq->iq_active, ih, ih_queued); + PCPU_INC(cnt.v_soft); - if (!(flags & SWI_DELAY)) { - PCPU_INC(cnt.v_soft); -#ifdef INTR_FILTER - error = intr_event_schedule_thread(ie, ie->ie_thread); -#else - error = intr_event_schedule_thread(ie); -#endif - KASSERT(error == 0, ("stray software interrupt")); + /* Schedule the thread if needed. */ + if (!(flags & SWI_DELAY)) + intr_queue_schedule(iq); + mtx_unlock_spin(&iq->iq_lock); + return; + case IS_QUEUED: + case IS_REQUEUE: + /* + * Do an atomic op to ensure it is in one of the + * queued states. If so, nothing else to do. + */ + if (atomic_cmpset_int(&ih->ih_state, state, + state)) { + PCPU_INC(cnt.v_soft); + return; + } + break; + case IS_RUNNING: + /* + * Try to change the state to requeue so that + * the interrupt thread will requeue the + * handler when it is finished executing. + */ + if (atomic_cmpset_int(&ih->ih_state, IS_RUNNING, + IS_REQUEUE)) + return; + break; + case IS_DEAD: + case IS_DYING: + /* + * If this happens, it is probably a bug in + * the calling code, but just ignore it. + */ + return; + } } } /* * Remove a software interrupt handler. Currently this code does not - * remove the associated interrupt event if it becomes empty. Calling code - * may do so manually via intr_event_destroy(), but that's not really - * an optimal interface. + * remove the associated interrupt event if it becomes empty. */ int swi_remove(void *cookie) { +#ifdef INVARIANTS + struct intr_handler *ih; + ih = cookie; + KASSERT(ih->ih_event->ie_flags & IE_SOFT, + ("swi_remove: hardware interrupt event")); +#endif return (intr_event_remove_handler(cookie)); } -#ifdef INTR_FILTER -static void -priv_ithread_execute_handler(struct proc *p, struct intr_handler *ih) +/* + * Executes a threaded interrupt handler. Returns true if the handler + * has been marked as dying. + */ +static __inline int +intr_handler_execute(struct intr_handler *ih, struct intr_event *ie, + struct thread *td) { - struct intr_event *ie; + int state; - ie = ih->ih_event; - /* - * If this handler is marked for death, remove it from - * the list of handlers and wake up the sleeper. - */ - if (ih->ih_flags & IH_DEAD) { - mtx_lock(&ie->ie_lock); - TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next); - ih->ih_flags &= ~IH_DEAD; - wakeup(ih); - mtx_unlock(&ie->ie_lock); - return; + /* Transition state from queued to running. */ + for (;;) { + state = ih->ih_state; + switch (state) { + case IS_DYING: + return (DYING); + case IS_QUEUED: + /* Mark the handler as running. */ + if (atomic_cmpset_int(&ih->ih_state, IS_QUEUED, + IS_RUNNING)) { + /* XXXTEST */ + CTR1(KTR_INTR, "%s: IS_QUEUED -> IS_RUNNING", + ih->ih_name); + goto run; + } + break; +#ifdef INVARIANTS + default: + panic("bad pre-exec intr handler state %d", state); +#endif + } } - - /* Execute this handler. */ - CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x", - __func__, p->p_pid, (void *)ih->ih_handler, ih->ih_argument, - ih->ih_name, ih->ih_flags); - + +run: + CTR5(KTR_INTR, "intr_exec: tid %d exec %p(%p) for %s flg=%x", + td->td_tid, (void *)ih->ih_handler, ih->ih_argument, ih->ih_name, + ih->ih_flags); if (!(ih->ih_flags & IH_MPSAFE)) mtx_lock(&Giant); ih->ih_handler(ih->ih_argument); if (!(ih->ih_flags & IH_MPSAFE)) mtx_unlock(&Giant); + + /* Transition state from running back to idle. */ + for (;;) { + state = ih->ih_state; + switch (state) { + case IS_DYING: + return (1); + case IS_REQUEUE: + /* + * Try to set the state to queued. If that + * succeeds, requeue the handler. The return + * value tells the interrupt thread to requeue + * the handler. For hardware interrupts, + * there is no need to schedule a thread as + * this thread will reclaim the current CPU if + * there is not another associated thread + * already. + */ + if (atomic_cmpset_int(&ih->ih_state, IS_REQUEUE, + IS_QUEUED)) { + /* XXXTEST */ + CTR1(KTR_INTR, "%s: IS_REQUEUE -> IS_QUEUED", + ih->ih_name); + return (REQUEUE); + } + break; + case IS_RUNNING: + if (atomic_cmpset_int(&ih->ih_state, IS_RUNNING, + IS_IDLE)) { + /* XXXTEST */ + CTR1(KTR_INTR, "%s: IS_RUNNING -> IS_IDLE", + ih->ih_name); + return (FINISHED); + } + break; +#ifdef INVARIANTS + default: + panic("bad post-exec intr handler state %d", state); +#endif + } + } } -#endif /* - * This is a public function for use by drivers that mux interrupt - * handlers for child devices from their interrupt handler. + * Timer routine that invokes the 'post_ithread' hook on all storming + * interrupts. */ -void -intr_event_execute_handlers(struct proc *p, struct intr_event *ie) +static void +storm_clear(void *arg) { - struct intr_handler *ih, *ihn; + struct intr_hardware *ihw; - TAILQ_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) { - /* - * If this handler is marked for death, remove it from - * the list of handlers and wake up the sleeper. - */ - if (ih->ih_flags & IH_DEAD) { - mtx_lock(&ie->ie_lock); - TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next); - ih->ih_flags &= ~IH_DEAD; - wakeup(ih); - mtx_unlock(&ie->ie_lock); - continue; + mtx_assert(&event_lock, MA_OWNED); + TAILQ_FOREACH(ihw, &hwi_event_list, ihw_list) { + if (ihw->ihw_storming) { + ihw->ihw_storming = 0; + CTR1(KTR_INTR, "storm_clear: post_ithread for %s", + ihw->ihw_event.ie_name); + ihw->ihw_post_ithread(ihw->ihw_source); + if (ihw->ihw_queued == 0) + ihw->ihw_count = 0; + /* + * Don't bother incrementing ihw_count if the + * interrupt continues to storm since it has + * already crossed the threshold and we risk + * overflow. + */ } - - /* Skip filter only handlers */ - if (ih->ih_handler == NULL) - continue; - - /* - * For software interrupt threads, we only execute - * handlers that have their need flag set. Hardware - * interrupt threads always invoke all of their handlers. - */ - if (ie->ie_flags & IE_SOFT) { - if (atomic_load_acq_int(&ih->ih_need) == 0) - continue; - else - atomic_store_rel_int(&ih->ih_need, 0); - } - - /* Execute this handler. */ - CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x", - __func__, p->p_pid, (void *)ih->ih_handler, - ih->ih_argument, ih->ih_name, ih->ih_flags); - - if (!(ih->ih_flags & IH_MPSAFE)) - mtx_lock(&Giant); - ih->ih_handler(ih->ih_argument); - if (!(ih->ih_flags & IH_MPSAFE)) - mtx_unlock(&Giant); } } +/* + * Handling invoking the 'post_ithread' hook for hardware interrupts. + * If the interrupt has a queued requests after the 'post_ithread' + * hook returns, then treat that as a storming event. If the + * interrupt accumulates enough storm events, throttle it by deferring + * the 'post_ithread' hook for a clock tick. + */ static void -ithread_execute_handlers(struct proc *p, struct intr_event *ie) +hwi_post_ithread(struct intr_event *ie) { + struct intr_hardware *ihw; - /* Interrupt handlers should not sleep. */ - if (!(ie->ie_flags & IE_SOFT)) - THREAD_NO_SLEEPING(); - intr_event_execute_handlers(p, ie); - if (!(ie->ie_flags & IE_SOFT)) - THREAD_SLEEPING_OK(); + ihw = (struct intr_hardware *)ie; + if (ihw->ihw_post_ithread == NULL) + return; + + if (atomic_fetchadd_int(&ihw->ihw_queued, -1) != 1) + return; /* - * Interrupt storm handling: - * - * If this interrupt source is currently storming, then throttle - * it to only fire the handler once per clock tick. - * - * If this interrupt source is not currently storming, but the - * number of back to back interrupts exceeds the storm threshold, - * then enter storming mode. + * If this event is storming, schedule a storming callout and + * defer the 'post_ithread' hook until it fires. */ - if (intr_storm_threshold != 0 && ie->ie_count >= intr_storm_threshold && - !(ie->ie_flags & IE_SOFT)) { + if (intr_storm_threshold != 0 && + ihw->ihw_count >= intr_storm_threshold) { /* Report the message only once every second. */ - if (ppsratecheck(&ie->ie_warntm, &ie->ie_warncnt, 1)) { + if (ppsratecheck(&ihw->ihw_warntm, &ihw->ihw_warncnt, 1)) { printf( "interrupt storm detected on \"%s\"; throttling interrupt source\n", ie->ie_name); } - pause("istorm", 1); - } else - ie->ie_count++; + mtx_lock(&event_lock); + ihw->ihw_storming = 1; + callout_reset(&storm_timer, 1, storm_clear, NULL); + mtx_unlock(&event_lock); + return; + } + + CTR1(KTR_INTR, "intr_loop: post_ithread for %s", ie->ie_name); + ihw->ihw_post_ithread(ihw->ihw_source); + if (ihw->ihw_queued == 0) { + ihw->ihw_count = 0; + return; + } - /* - * Now that all the handlers have had a chance to run, reenable - * the interrupt source. - */ - if (ie->ie_post_ithread != NULL) - ie->ie_post_ithread(ie->ie_source); + atomic_add_int(&ihw->ihw_count, 1); } -#ifndef INTR_FILTER /* - * This is the main code for interrupt threads. + * Main loop for interrupt threads. Each interrupt thread is bound to + * a specific interrupt queue and executes handlers for that queue. */ static void -ithread_loop(void *arg) +intr_loop(void *arg) { - struct intr_thread *ithd; + struct intr_handler *ih; + struct intr_thread *it; + struct intr_queue *iq; struct intr_event *ie; struct thread *td; - struct proc *p; - int wake; + int state; td = curthread; - p = td->td_proc; - ithd = (struct intr_thread *)arg; - KASSERT(ithd->it_thread == td, - ("%s: ithread and proc linkage out of sync", __func__)); - ie = ithd->it_event; - ie->ie_count = 0; - wake = 0; + iq = arg; + it = td->td_ithread; + KASSERT(it->it_thread == td, ("intr_loop: ithread linkage out of sync")); /* - * As long as we have interrupts outstanding, go through the - * list of handlers, giving each one a go at it. + * Execute handlers queued on the active list. If there are + * no handlers, block waiting for more handlers. */ - for (;;) { - /* - * If we are an orphaned thread, then just die. - */ - if (ithd->it_flags & IT_DEAD) { - CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__, - p->p_pid, td->td_name); - free(ithd, M_ITHREAD); - kthread_exit(); - } + mtx_lock_spin(&iq->iq_lock); + while (!(it->it_flags & IT_DEAD)) { + /* Remove active handlers and execute them. */ + while ((ih = STAILQ_FIRST(&iq->iq_active)) != NULL) { + STAILQ_REMOVE_HEAD(&iq->iq_active, ih_queued); + mtx_unlock_spin(&iq->iq_lock); + ie = ih->ih_event; + + /* Hardware interrupt handlers should not sleep. */ + if (!(ie->ie_flags & IE_SOFT)) + THREAD_NO_SLEEPING(); +#ifdef INVARIANTS + it->it_current = ih; +#endif + state = intr_handler_execute(ih, ie, td); +#ifdef INVARIANTS + it->it_current = NULL; +#endif - /* - * Service interrupts. If another interrupt arrives while - * we are running, it will set it_need to note that we - * should make another pass. - */ - while (atomic_load_acq_int(&ithd->it_need) != 0) { /* - * This might need a full read and write barrier - * to make sure that this write posts before any - * of the memory or device accesses in the - * handlers. + * If this is a hardware interrupt handler + * that isn't being requeued, invoke the + * 'post_ithread' hook if needed. */ - atomic_store_rel_int(&ithd->it_need, 0); - ithread_execute_handlers(p, ie); + if (!(ie->ie_flags & IE_SOFT)) { + THREAD_SLEEPING_OK(); + if (state != REQUEUE && + !(ih->ih_flags & IH_MANUAL)) + hwi_post_ithread(ie); + } + + if (state == DYING) + intr_handler_ack_dying(ie, ih); + + WITNESS_WARN(WARN_PANIC, NULL, "finished intr"); + mtx_assert(&Giant, MA_NOTOWNED); + + mtx_lock_spin(&iq->iq_lock); + if (state == REQUEUE) + STAILQ_INSERT_TAIL(&iq->iq_active, ih, + ih_queued); } - WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread"); - mtx_assert(&Giant, MA_NOTOWNED); - /* - * Processed all our interrupts. Now get the sched - * lock. This may take a while and it_need may get - * set again, so we have to check it again. - */ - thread_lock(td); - if ((atomic_load_acq_int(&ithd->it_need) == 0) && - !(ithd->it_flags & (IT_DEAD | IT_WAIT))) { + /* Block waiting for more work. */ + if (!(it->it_flags & IT_DEAD)) { + thread_lock(td); + thread_lock_set(td, &iq->iq_lock); TD_SET_IWAIT(td); - ie->ie_count = 0; mi_switch(SW_VOL | SWT_IWAIT, NULL); - } - if (ithd->it_flags & IT_WAIT) { - wake = 1; - ithd->it_flags &= ~IT_WAIT; - } - thread_unlock(td); - if (wake) { - wakeup(ithd); - wake = 0; + thread_unlock(td); + mtx_lock_spin(&iq->iq_lock); } } + mtx_unlock_spin(&iq->iq_lock); + + CTR2(KTR_INTR, "intr_loop: tid %d (%s) exiting", td->td_tid, + td->td_name); + + /* XXX: This assumes one:one events and queues. */ + free(iq->iq_event, M_ITHREAD); + + /* Free the interrupt thread and associated interrupt queue. */ + free(it, M_ITHREAD); + mtx_destroy(&iq->iq_lock); + free(iq, M_ITHREAD); + kthread_exit(); } /* - * Main interrupt handling body. - * - * Input: - * o ie: the event connected to this interrupt. - * o frame: some archs (i.e. i386) pass a frame to some. - * handlers as their main argument. - * Return value: - * o 0: everything ok. - * o EINVAL: stray interrupt. + * Entry point for MD code to call to handle a hardware interrupt. + * The trapframe is passed as the argument to any filter handlers that + * specify NULL as their argument. */ int -intr_event_handle(struct intr_event *ie, struct trapframe *frame) +hwi_handle(struct intr_event *ie, struct trapframe *frame) { + struct intr_entropy entropy; + struct intr_hardware *ihw; struct intr_handler *ih; + struct intr_queue *iq; struct trapframe *oldframe; struct thread *td; - int error, ret, thread; - - td = curthread; + void *arg; + int queued, ret, state; /* An interrupt with no event or handlers is a stray interrupt. */ if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers)) return (EINVAL); + td = curthread; + td->td_intr_nesting_level++; + oldframe = td->td_intr_frame; + td->td_intr_frame = frame; + /* - * Execute fast interrupt handlers directly. + * Execute filter interrupt handlers directly. * To support clock handlers, if a handler registers * with a NULL argument, then we pass it a pointer to * a trapframe as its argument. */ - td->td_intr_nesting_level++; - thread = 0; - ret = 0; - critical_enter(); - oldframe = td->td_intr_frame; - td->td_intr_frame = frame; + queued = 0; + iq = ie->ie_queue; + ihw = (struct intr_hardware *)ie; + mtx_lock_spin(&iq->iq_lock); TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { - if (ih->ih_filter == NULL) { - thread = 1; - continue; + /* If we have a filter, run it first. */ + if (ih->ih_filter != NULL) { + arg = ih->ih_argument; + if (arg == NULL) + arg = frame; + CTR3(KTR_INTR, "hwi_handle: exec %p(%p) for %s", + ih->ih_filter, arg, ih->ih_name); + ret = ih->ih_filter(arg); + CTR1(KTR_INTR, "hwi_handle: filter returned %#x", ret); + KASSERT(ret == FILTER_STRAY || ret == FILTER_HANDLED || + ret == FILTER_SCHEDULE_THREAD, + ("incorrect filter return value %d from %s", ret, + ih->ih_name)); + KASSERT(!(ret == FILTER_SCHEDULE_THREAD && + ih->ih_handler == NULL), + ("filter attempted to schedule NULL handler")); + + /* + * If no need to schedule threaded handler, + * nothing left to do for this handler. + */ + if (ret != FILTER_SCHEDULE_THREAD) + continue; } - CTR4(KTR_INTR, "%s: exec %p(%p) for %s", __func__, - ih->ih_filter, ih->ih_argument == NULL ? frame : - ih->ih_argument, ih->ih_name); - if (ih->ih_argument == NULL) - ret = ih->ih_filter(frame); - else - ret = ih->ih_filter(ih->ih_argument); - KASSERT(ret == FILTER_STRAY || - ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 && - (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0), - ("%s: incorrect return value %#x from %s", __func__, ret, - ih->ih_name)); + + /* Queue this handler. */ + for (;;) { + state = ih->ih_state; + switch (state) { + case IS_IDLE: + /* Try to change the state to queued. */ + if (!atomic_cmpset_int(&ih->ih_state, IS_IDLE, + IS_QUEUED)) + break; + + /* XXXTEST */ + CTR1(KTR_INTR, "%s: IS_IDLE -> IS_QUEUED", + ih->ih_name); - /* - * Wrapper handler special handling: - * - * in some particular cases (like pccard and pccbb), - * the _real_ device handler is wrapped in a couple of - * functions - a filter wrapper and an ithread wrapper. - * In this case (and just in this case), the filter wrapper - * could ask the system to schedule the ithread and mask - * the interrupt source if the wrapped handler is composed - * of just an ithread handler. - * - * TODO: write a generic wrapper to avoid people rolling - * their own - */ - if (!thread) { - if (ret == FILTER_SCHEDULE_THREAD) - thread = 1; + /* + * Queue the handler. Note that we + * need to use the 'pre_ithread' and + * 'post_ithread' hooks. + */ + queued = 1; + atomic_add_int(&ihw->ihw_queued, 1); + CTR2(KTR_INTR, + "hwi_handle: scheduled %s for %s", + ih->ih_name, ie->ie_name); + STAILQ_INSERT_TAIL(&iq->iq_active, ih, ih_queued); + goto next; + case IS_QUEUED: + case IS_REQUEUE: + /* Ensure it is truly still queued. */ + if (atomic_cmpset_int(&ih->ih_state, state, + state)) { + /* XXXTEST */ + CTR3(KTR_INTR, "%s: %s -> %s", + ih->ih_name, state == IS_QUEUED ? + "IS_QUEUED" : "IS_REQUEUE", + state == IS_QUEUED ? "IS_QUEUED" : + "IS_REQUEUE"); + goto next; + } + break; + case IS_RUNNING: + /* Try to change the state to requeue. */ + if (atomic_cmpset_int(&ih->ih_state, IS_RUNNING, + IS_REQUEUE)) { + /* XXXTEST */ + CTR1(KTR_INTR, + "%s: IS_RUNNING -> IS_REQUEUE", + ih->ih_name); + goto next; + } + break; +#ifdef INVARIANTS + default: + /* + * The dying/dead states should not + * happen. They are only set while + * holding the run lock and once they + * are set the event is removed from + * the interrupt event's handler + * list. + */ + panic("hwi_handle: bad state %d", state); +#endif + } } + next:; } - td->td_intr_frame = oldframe; - if (thread) { - if (ie->ie_pre_ithread != NULL) - ie->ie_pre_ithread(ie->ie_source); + /* + * If any handlers were queued, invoke the 'pre_ithread' hook, + * otherwise invoke the 'post_filter' hook. + */ + if (queued) { + if (ihw->ihw_pre_ithread != NULL) { + CTR1(KTR_INTR, "hwi_handle: pre_ithread for %s", + ie->ie_name); + ihw->ihw_pre_ithread(ihw->ihw_source); + } } else { - if (ie->ie_post_filter != NULL) - ie->ie_post_filter(ie->ie_source); + if (ihw->ihw_post_filter != NULL) { + CTR1(KTR_INTR, "hwi_handle: post_filter for %s", + ie->ie_name); + ihw->ihw_post_filter(ihw->ihw_source); + } + } + + /* + * If any of the handlers for this event claim to be good + * sources of entropy, then gather some. + */ + if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) { + td = curthread; + CTR2(KTR_INTR, "hwi_handle: tid %d (%s) gathering entropy", + td->td_tid, td->td_name); + entropy.event = (uintptr_t)ie; + entropy.td = td; + random_harvest(&entropy, sizeof(entropy), 2, + RANDOM_INTERRUPT); } - + /* Schedule the ithread if needed. */ - if (thread) { - error = intr_event_schedule_thread(ie); -#ifndef XEN - KASSERT(error == 0, ("bad stray interrupt")); -#else - if (error != 0) - log(LOG_WARNING, "bad stray interrupt"); -#endif - } - critical_exit(); + if (!STAILQ_EMPTY(&iq->iq_active)) + intr_queue_schedule(iq); + mtx_unlock_spin(&iq->iq_lock); + td->td_intr_frame = oldframe; td->td_intr_nesting_level--; return (0); } -#else + /* - * This is the main code for interrupt threads. + * Allow a hardware interrupt handler to be manually scheduled on the + * current CPU's queue. This can be used either to schedule manual + * interrupt handlers from a filter or handler or to reschedule the + * currently executing handler. Much of the logic is copied from + * hwi_handle(). */ -static void -ithread_loop(void *arg) +void +hwi_sched(void *cookie) { - struct intr_thread *ithd; + struct intr_hardware *ihw; struct intr_handler *ih; struct intr_event *ie; - struct thread *td; - struct proc *p; - int priv; - int wake; + struct intr_queue *iq; + int state; - td = curthread; - p = td->td_proc; - ih = (struct intr_handler *)arg; - priv = (ih->ih_thread != NULL) ? 1 : 0; - ithd = (priv) ? ih->ih_thread : ih->ih_event->ie_thread; - KASSERT(ithd->it_thread == td, - ("%s: ithread and proc linkage out of sync", __func__)); - ie = ithd->it_event; - ie->ie_count = 0; - wake = 0; + ih = cookie; + ie = ih->ih_event; + iq = ie->ie_queue; + ihw = (struct intr_hardware *)ie; + KASSERT((curthread->td_pflags & TDP_ITHREAD) || + curthread->td_intr_nesting_level > 0, + ("hwi_sched: invalid calling thread context")); + KASSERT((ih->ih_flags & IH_MANUAL) || + ((curthread->td_pflags & TDP_ITHREAD) && + curthread->td_ithread->it_current == ih), + ("hwi_sched: attempt to schedule invalid handler")); + KASSERT(!(ie->ie_flags & IE_SOFT), ("hwi_sched: swi event")); - /* - * As long as we have interrupts outstanding, go through the - * list of handlers, giving each one a go at it. - */ + /* Place this interrupt handler on this CPU's queue. */ for (;;) { - /* - * If we are an orphaned thread, then just die. - */ - if (ithd->it_flags & IT_DEAD) { - CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__, - p->p_pid, td->td_name); - free(ithd, M_ITHREAD); - kthread_exit(); - } + state = ih->ih_state; + switch (state) { + case IS_IDLE: + /* Try to change the state to queued. */ + if (!atomic_cmpset_int(&ih->ih_state, IS_IDLE, + IS_QUEUED)) + break; - /* - * Service interrupts. If another interrupt arrives while - * we are running, it will set it_need to note that we - * should make another pass. - */ - while (atomic_load_acq_int(&ithd->it_need) != 0) { +#if 0 /* - * This might need a full read and write barrier - * to make sure that this write posts before any - * of the memory or device accesses in the - * handlers. + * If requeueing the currently executing + * handler and it is not a manual handler, + * bump the queued count to defer the + * 'post_ithread' hook. */ - atomic_store_rel_int(&ithd->it_need, 0); - if (priv) - priv_ithread_execute_handler(p, ih); - else - ithread_execute_handlers(p, ie); - } - WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread"); - mtx_assert(&Giant, MA_NOTOWNED); + if (!(ih->ih_flags & IH_MANUAL)) + atomic_add_int(&ihw->ihw_queued, 1); +#else + KASSERT(ih->ih_flags & IH_MANUAL, + ("hwi_sched: queuing idle non-manual handler")); +#endif - /* - * Processed all our interrupts. Now get the sched - * lock. This may take a while and it_need may get - * set again, so we have to check it again. - */ - thread_lock(td); - if ((atomic_load_acq_int(&ithd->it_need) == 0) && - !(ithd->it_flags & (IT_DEAD | IT_WAIT))) { - TD_SET_IWAIT(td); - ie->ie_count = 0; - mi_switch(SW_VOL | SWT_IWAIT, NULL); + /* + * Queue the handler. If we are called from a + * filter, the queue should already be locked. + * hwi_handle() will schedule the thread after + * the filter returns. + */ + if (curthread->td_intr_nesting_level > 0) + mtx_assert(&iq->iq_lock, MA_OWNED); + else + mtx_lock_spin(&iq->iq_lock); + STAILQ_INSERT_TAIL(&iq->iq_active, ih, ih_queued); + if (curthread->td_intr_nesting_level == 0) { + intr_queue_schedule(iq); + mtx_unlock_spin(&iq->iq_lock); + } + return; + case IS_QUEUED: + case IS_REQUEUE: + /* Ensure it is truly still queued. */ + if (atomic_cmpset_int(&ih->ih_state, state, state)) + return; + break; + case IS_RUNNING: + /* Try to change the state to requeue. */ + if (atomic_cmpset_int(&ih->ih_state, IS_RUNNING, + IS_REQUEUE)) + return; + break; + case IS_DYING: + /* + * This can happen if the currently executing + * handler is being removed by another thread. + * In that case, just ignore the reschedule + * attempt. The main loop of the hwi thread + * will ack the dying request once this + * handler finishes. + */ + KASSERT(curthread->td_ithread != NULL && + curthread->td_ithread->it_current == ih, + ("hwi_sched: dying handler is not current")); + return; +#ifdef INVARIANTS + default: + /* + * The dead state should not happen. The + * currently executing handler cannot be dead, + * only dying, and the owner of a manual + * handler is responsible for destroying any + * filters or handlers that can schedule that + * event before destroying the manual handler. + */ + panic("hwi_sched: bad state %d", state); +#endif } - if (ithd->it_flags & IT_WAIT) { - wake = 1; - ithd->it_flags &= ~IT_WAIT; - } - thread_unlock(td); - if (wake) { - wakeup(ithd); - wake = 0; - } - } -} - -/* - * Main loop for interrupt filter. - * - * Some architectures (i386, amd64 and arm) require the optional frame - * parameter, and use it as the main argument for fast handler execution - * when ih_argument == NULL. - * - * Return value: - * o FILTER_STRAY: No filter recognized the event, and no - * filter-less handler is registered on this - * line. - * o FILTER_HANDLED: A filter claimed the event and served it. - * o FILTER_SCHEDULE_THREAD: No filter claimed the event, but there's at - * least one filter-less handler on this line. - * o FILTER_HANDLED | - * FILTER_SCHEDULE_THREAD: A filter claimed the event, and asked for - * scheduling the per-handler ithread. - * - * In case an ithread has to be scheduled, in *ithd there will be a - * pointer to a struct intr_thread containing the thread to be - * scheduled. - */ - -static int -intr_filter_loop(struct intr_event *ie, struct trapframe *frame, - struct intr_thread **ithd) -{ - struct intr_handler *ih; - void *arg; - int ret, thread_only; - - ret = 0; - thread_only = 0; - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { - /* - * Execute fast interrupt handlers directly. - * To support clock handlers, if a handler registers - * with a NULL argument, then we pass it a pointer to - * a trapframe as its argument. - */ - arg = ((ih->ih_argument == NULL) ? frame : ih->ih_argument); - - CTR5(KTR_INTR, "%s: exec %p/%p(%p) for %s", __func__, - ih->ih_filter, ih->ih_handler, arg, ih->ih_name); - - if (ih->ih_filter != NULL) - ret = ih->ih_filter(arg); - else { - thread_only = 1; - continue; - } - KASSERT(ret == FILTER_STRAY || - ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 && - (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0), - ("%s: incorrect return value %#x from %s", __func__, ret, - ih->ih_name)); - if (ret & FILTER_STRAY) - continue; - else { - *ithd = ih->ih_thread; - return (ret); - } - } - - /* - * No filters handled the interrupt and we have at least - * one handler without a filter. In this case, we schedule - * all of the filter-less handlers to run in the ithread. - */ - if (thread_only) { - *ithd = ie->ie_thread; - return (FILTER_SCHEDULE_THREAD); - } - return (FILTER_STRAY); -} - -/* - * Main interrupt handling body. - * - * Input: - * o ie: the event connected to this interrupt. - * o frame: some archs (i.e. i386) pass a frame to some. - * handlers as their main argument. - * Return value: - * o 0: everything ok. - * o EINVAL: stray interrupt. - */ -int -intr_event_handle(struct intr_event *ie, struct trapframe *frame) -{ - struct intr_thread *ithd; - struct trapframe *oldframe; - struct thread *td; - int thread; - - ithd = NULL; - td = curthread; - - if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers)) - return (EINVAL); - - td->td_intr_nesting_level++; - thread = 0; - critical_enter(); - oldframe = td->td_intr_frame; - td->td_intr_frame = frame; - thread = intr_filter_loop(ie, frame, &ithd); - if (thread & FILTER_HANDLED) { - if (ie->ie_post_filter != NULL) - ie->ie_post_filter(ie->ie_source); - } else { - if (ie->ie_pre_ithread != NULL) - ie->ie_pre_ithread(ie->ie_source); - } - td->td_intr_frame = oldframe; - critical_exit(); - - /* Interrupt storm logic */ - if (thread & FILTER_STRAY) { - ie->ie_count++; - if (ie->ie_count < intr_storm_threshold) - printf("Interrupt stray detection not present\n"); - } - - /* Schedule an ithread if needed. */ - if (thread & FILTER_SCHEDULE_THREAD) { - if (intr_event_schedule_thread(ie, ithd) != 0) - panic("%s: impossible stray interrupt", __func__); } - td->td_intr_nesting_level--; - return (0); } -#endif #ifdef DDB /* * Dump details about an interrupt handler */ -static void -db_dump_intrhand(struct intr_handler *ih) +void +db_dump_intrhand(struct intr_handler *ih, int display_event) { int comma; @@ -1741,7 +1664,12 @@ break; } db_printf(" "); + if (display_event) + db_printf("(%s) ", ih->ih_event->ie_name); if (ih->ih_filter != NULL) { + db_printsym((uintptr_t)ih->ih_filter, DB_STGY_PROC); + db_printf(","); + } db_printf("[F]"); db_printsym((uintptr_t)ih->ih_filter, DB_STGY_PROC); } @@ -1752,9 +1680,7 @@ db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC); } db_printf("(%p)", ih->ih_argument); - if (ih->ih_need || - (ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_DEAD | - IH_MPSAFE)) != 0) { + if ((ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_MPSAFE)) != 0) { db_printf(" {"); comma = 0; if (ih->ih_flags & IH_EXCLUSIVE) { @@ -1769,25 +1695,37 @@ db_printf("ENTROPY"); comma = 1; } - if (ih->ih_flags & IH_DEAD) { - if (comma) - db_printf(", "); - db_printf("DEAD"); - comma = 1; - } if (ih->ih_flags & IH_MPSAFE) { if (comma) db_printf(", "); db_printf("MPSAFE"); - comma = 1; - } - if (ih->ih_need) { - if (comma) - db_printf(", "); - db_printf("NEED"); } db_printf("}"); } + db_printf(" "); + switch (ih->ih_state) { + case IS_IDLE: + db_printf("IDLE"); + break; + case IS_QUEUED: + db_printf("QUEUED"); + break; + case IS_RUNNING: + db_printf("RUNNING"); + break; + case IS_REQUEUE: + db_printf("REQUEUE"); + break; + case IS_DYING: + db_printf("DYING"); + break; + case IS_DEAD: + db_printf("DEAD"); + break; + default: + db_printf("0x%x", ih->ih_state); + break; + } db_printf("\n"); } @@ -1797,18 +1735,19 @@ void db_dump_intr_event(struct intr_event *ie, int handlers) { + struct intr_hardware *ihw; struct intr_handler *ih; - struct intr_thread *it; int comma; - db_printf("%s ", ie->ie_fullname); - it = ie->ie_thread; - if (it != NULL) - db_printf("(pid %d)", it->it_thread->td_proc->p_pid); - else - db_printf("(no thread)"); - if ((ie->ie_flags & (IE_SOFT | IE_ENTROPY | IE_ADDING_THREAD)) != 0 || - (it != NULL && it->it_need)) { + db_printf("%s", ie->ie_fullname); + if (!(ie->ie_flags & IE_SOFT)) { + ihw = (struct intr_hardware *)ie; + db_printf(" IRQ %d queued %d", ihw->ihw_irq, ihw->ihw_queued); + } + if (ie->ie_queue != NULL) + db_printf(" (tid %d)", + ie->ie_queue->iq_thread->it_thread->td_tid); + if ((ie->ie_flags & (IE_SOFT | IE_ENTROPY)) != 0) { db_printf(" {"); comma = 0; if (ie->ie_flags & IE_SOFT) { @@ -1819,26 +1758,15 @@ if (comma) db_printf(", "); db_printf("ENTROPY"); - comma = 1; - } - if (ie->ie_flags & IE_ADDING_THREAD) { - if (comma) - db_printf(", "); - db_printf("ADDING_THREAD"); - comma = 1; - } - if (it != NULL && it->it_need) { - if (comma) - db_printf(", "); - db_printf("NEED"); } db_printf("}"); } db_printf("\n"); if (handlers) - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) - db_dump_intrhand(ih); + TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { + db_dump_intrhand(ih, 0); + } } /* @@ -1846,23 +1774,39 @@ */ DB_SHOW_COMMAND(intr, db_show_intr) { - struct intr_event *ie; + struct intr_software *isw; + struct intr_hardware *ihw; int all, verbose; verbose = strchr(modif, 'v') != NULL; all = strchr(modif, 'a') != NULL; - TAILQ_FOREACH(ie, &event_list, ie_list) { - if (!all && TAILQ_EMPTY(&ie->ie_handlers)) + TAILQ_FOREACH(ihw, &hwi_event_list, ihw_list) { + if (!all && TAILQ_EMPTY(&ihw->ihw_event.ie_handlers)) + continue; + db_dump_intr_event(&ihw->ihw_event, verbose); + if (db_pager_quit) + break; + } + TAILQ_FOREACH(isw, &swi_event_list, isw_list) { + if (!all && TAILQ_EMPTY(&isw->isw_event.ie_handlers)) continue; - db_dump_intr_event(ie, verbose); + db_dump_intr_event(&isw->isw_event, verbose); if (db_pager_quit) break; } } #endif /* DDB */ +static void +ithread_init(void *dummy) +{ + + callout_init_mtx(&storm_timer, &event_lock, 0); +} +SYSINIT(ithread_init, SI_SUB_INTR, SI_ORDER_FIRST, ithread_init, NULL); + /* - * Start standard software interrupt threads + * Start standard software interrupt threads. */ static void start_softintr(void *dummy) --- //depot/projects/smpng/sys/kern/kern_timeout.c +++ //depot/user/jhb/intr/kern/kern_timeout.c @@ -358,24 +358,30 @@ { struct callout_cpu *cc; #ifdef SMP + char name[MAXCOMLEN]; int cpu; -#endif cc = CC_CPU(timeout_cpu); - if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK, + snprintf(name, sizeof(name), "clock: cpu%d", timeout_cpu); + if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); -#ifdef SMP CPU_FOREACH(cpu) { if (cpu == timeout_cpu) continue; cc = CC_CPU(cpu); cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ callout_cpu_init(cc); - if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK, + snprintf(name, sizeof(name), "clock: cpu%d", cpu); + if (swi_add(NULL, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); } +#else + cc = CC_CPU(timeout_cpu); + if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK, + INTR_MPSAFE, &cc->cc_cookie)) + panic("died while creating standard software ithreads"); #endif } SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); --- //depot/projects/smpng/sys/modules/asmc/Makefile +++ //depot/user/jhb/intr/modules/asmc/Makefile @@ -3,6 +3,6 @@ .PATH: ${.CURDIR}/../../dev/asmc KMOD= asmc -SRCS= asmc.c opt_acpi.h opt_intr_filter.h acpi_if.h bus_if.h device_if.h +SRCS= asmc.c opt_acpi.h acpi_if.h bus_if.h device_if.h .include --- //depot/projects/smpng/sys/notes +++ //depot/user/jhb/intr/notes @@ -73,3 +73,38 @@ - jhb_socket - socket hacking Space reserved for child branches: +- Kill eintrcnt crap and make intrcnt/intrnames MI with a MD length + constant. Export lengths instead. + - Move non-stray intrcnt managing into intr_event code? +- Use RCU to lock the per-source list of interrupt handlers. + +IRQ Shuffle: +- Break up the icu_lock used to protect all IO APIC pins; possible solutions: + - 1) use a hash table to assign each pin a spin lock + - 2) give each CPU its own spin lock and when an pin is assigned a CPU, + use that lock to protect that pin + - 2a) bind the ithreads to the CPU that the IDT vector is assigned to + and then you can just use critical sections to protect the APIC pins; + actually, has to be spinlock_enter/exit rather than critical sections as + we are protecting against bottom-half code rather than preemptions + +Interrupt Binding: +- u_int BUS_INTR_BOUND(device_t parent, device_t child, struct resource *irq) + - returns NOCPU for error + +Ithreads round 2: +- Use a pool of ithreads to service hardware interrupts +- Ithreads in general drain queues of active handlers. Hardware interrupt + handlers are placed in per-CPU queues, software interrupts are currently + tied to dedicated threads still. +- XXX: interrupt storm detection is broken + - could maybe notice in hwi_handle() if curthread == PCPU_GET(hwi_thread) + and bump ihw_count in that case, but reset it otherwise? +- XXX: should ithreads use a different thread lock in the IWAIT state, + perhaps hwi_thread_lock for hwi threads? + - ok, doing this for hwi threads + - not sure what to do for swi, dedicated lock? +- Add a hwi_sched() that can be used to requeue threaded handlers, could also + be used to schedule auxiliary handlers such as the link handler for em/igb +- XXX: filters that do not handle their interrupt are broken. not sure we + can do much about this, perhaps just ban them? --- //depot/projects/smpng/sys/sys/bus.h +++ //depot/user/jhb/intr/sys/bus.h @@ -130,24 +130,29 @@ /** * @brief Driver interrupt filter return values * - * If a driver provides an interrupt filter routine it must return an - * integer consisting of oring together zero or more of the following - * flags: + * If a driver provides an interrupt filter routine it must return one of + * the following values: * * FILTER_STRAY - this device did not trigger the interrupt * FILTER_HANDLED - the interrupt has been fully handled and can be EOId - * FILTER_SCHEDULE_THREAD - the threaded interrupt handler should be - * scheduled to execute + * FILTER_SCHEDULE_THREAD - the interrupt should be masked and EOId, and + * the threaded interrupt handler should be scheduled + * to execute * * If the driver does not provide a filter, then the interrupt code will - * act is if the filter had returned FILTER_SCHEDULE_THREAD. Note that it - * is illegal to specify any other flag with FILTER_STRAY and that it is - * illegal to not specify either of FILTER_HANDLED or FILTER_SCHEDULE_THREAD - * if FILTER_STRAY is not specified. + * act as if a filter had returned FILTER_SCHEDULE_THREAD. + * + * If a driver wishes to schedule threaded interrupt handlers while + * avoiding masking the interrupt, it may use hwi_sched() in the + * filter routine to schedule INTR_MANUAL handlers and return + * FILTER_HANDLED. In that case, the filter must ensure that the + * interrupt does not continuously fire ("storm") until the threaded + * handler executes. For example, a driver could mask interrupts in a + * device-specific register until the threaded handler executes. */ -#define FILTER_STRAY 0x01 -#define FILTER_HANDLED 0x02 -#define FILTER_SCHEDULE_THREAD 0x04 +#define FILTER_STRAY 0 +#define FILTER_HANDLED 1 +#define FILTER_SCHEDULE_THREAD 2 /** * @brief Driver interrupt service routines @@ -170,8 +175,7 @@ /** * @brief Interrupt type bits. * - * These flags are used both by newbus interrupt - * registration (nexus.c) and also in struct intrec, which defines + * These flags are used by newbus interrupt registration to define * interrupt properties. * * XXX We should probably revisit this and remove the vestiges of the @@ -180,6 +184,8 @@ * * Buses which do interrupt remapping will want to change their type * to reflect what sort of devices are underneath. + * + * XXX: We should probably replace INTR_MPSAFE with INTR_GIANT instead. */ enum intr_type { INTR_TYPE_TTY = 1, @@ -192,6 +198,7 @@ INTR_EXCL = 256, /* exclusive interrupt */ INTR_MPSAFE = 512, /* this interrupt is SMP safe */ INTR_ENTROPY = 1024, /* this interrupt provides entropy */ + INTR_MANUAL = 2048, /* only scheduled via hwi_sched() */ INTR_MD1 = 4096, /* flag reserved for MD use */ INTR_MD2 = 8192, /* flag reserved for MD use */ INTR_MD3 = 16384, /* flag reserved for MD use */ --- //depot/projects/smpng/sys/sys/interrupt.h +++ //depot/user/jhb/intr/sys/interrupt.h @@ -37,94 +37,223 @@ struct trapframe; /* + * An interrupt handler consists of a threaded interrupt handler + * and/or a filter handler. The threaded interrupt handler is + * executed from an interrupt thread and may perform many of the same + * tasks as other top-half kernel code with the exception that they + * are not allowed to sleep. A filter handler executes in primary + * interrupt context. As a result, it has many more restrictions + * (e.g. it can only use spin locks and no other locking primitives). + * Each interrupt handler is associated with exactly one interrupt + * event for its entire lifetime. + * + * An interrupt event contains a list of interrupt handlers that + * should be executed when a specific interrupt is asserted. Each + * interrupt source in the system (e.g. an IRQ) is associated with a + * single interrupt event. Each interrupt event is associated with + * exactly one interrupt queue. However, it may change the + * association between different queues during its lifetime. + * + * An interrupt queue contains a queue of pending interrupt handlers + * and a pool of interrupt threads. It is somewhat like a specialized + * taskqueue. The interrupt threads are responsible for pulling + * pending interrupt handlers off of the queue and executing their + * threaded interrupt handlers. + * + * When an interrupt occurs, all of the interrupt handlers for the + * associated interrupt event are scheduled to execute on the + * interrupt event's associated interrupt queue. If the queue is + * empty, then an interrupt thread for that queue is scheduled. This + * thread is pinned to the CPU that received the interrupt and will + * continue to execute until the interrupt queue is empty. If an + * interrupt thread blocks on a lock and the interrupt queue is not + * empty, a new interrupt thread is scheduled to execute those + * handlers if a thread is available (XXX: not implemented). + * + * For an interrupt even with multiple interrupt handlers, the event + * must not be enabled until all of the handlers for that event have + * executed. To handle this, each hardware interrupt holds a count of + * threaded events queued for execution. Once these handlers are + * drained, the 'post_ithread' hook is invoked. + * + * A driver may also add "manual" interrupt handlers to an interrupt + * event. These handlers are not automatically scheduled when an + * interrupt occurs. Instead, a filter routine from a different + * interrupt handler must use the hwi_sched() routine to schedule a + * manual handler. Manual handlers are ignored for purposes of + * determining when an interrupt event should be re-enabled via its + * 'post_ithread' hook. + * + * XXX: Not sure how to handle interrupt storms in this mode. + */ + +/* * Describe a hardware interrupt handler. * * Multiple interrupt handlers for a specific event can be chained * together. */ struct intr_handler { - driver_filter_t *ih_filter; /* Filter handler function. */ + driver_filter_t *ih_filter; /* Filter handler function. */ driver_intr_t *ih_handler; /* Threaded handler function. */ void *ih_argument; /* Argument to pass to handlers. */ int ih_flags; + volatile int ih_state; /* IS_* state. */ char ih_name[MAXCOMLEN + 1]; /* Name of handler. */ struct intr_event *ih_event; /* Event we are connected to. */ - int ih_need; /* Needs service. */ TAILQ_ENTRY(intr_handler) ih_next; /* Next handler for this event. */ u_char ih_pri; /* Priority of this handler. */ - struct intr_thread *ih_thread; /* Ithread for filtered handler. */ + STAILQ_ENTRY(intr_handler) ih_queued; /* Links for active list. */ }; /* Interrupt handle flags kept in ih_flags */ +#define IH_MANUAL 0x00000001 /* Manually scheduled via hwi_sched(). */ #define IH_EXCLUSIVE 0x00000002 /* Exclusive interrupt. */ #define IH_ENTROPY 0x00000004 /* Device is a good entropy source. */ -#define IH_DEAD 0x00000008 /* Handler should be removed. */ #define IH_MPSAFE 0x80000000 /* Handler does not need Giant. */ /* + * Interrupt handle states. + * + * Initially an interrupt handler is idle. An idle handler can move + * either into the dead state (when it is being removed) or queued + * state (when it is queued to an interrup thread). + * + * A queued handler can move either into the dying state (when it is + * being removed), the queued state (an attempt to queue an + * already-queued handler), or the running state (when an interrupt + * thread executes the handler). + * + * An interrupt handler is placed into the running state by an + * interrupt thread while it is being executed. A running handler can + * move either into the dying state (when it is being removed), the + * requeue state (an attempt to queue an executing handler), or the + * idle state. + * + * If an interrupt handler is rescheduled while it is executing, it is + * placed into the requeue state. A requeued handler can move either + * into the dying state (when it is being removed) or the queued state + * (when the interrupt thread requeues it after execution finishes). + * + * When an interrupt handler is removed, it is placed into the dying + * state if it is not currently idle. The removing thread then sleeps + * until an interrupt thread dequeues the handler or finishes + * executing the handler. The interrupt thread then acks the dying + * request by moving the handler into the dead state. + */ +#define IS_IDLE 0 +#define IS_QUEUED 1 +#define IS_RUNNING 2 +#define IS_REQUEUE 3 +#define IS_DYING 4 +#define IS_DEAD 5 + +/* + * A queue of active interrupt handlers. + * + * This structure is used to hold a list of handlers in the queued state. + * One or more interrupt threads are associated with a given queue and + * drain the handlers from that queue. + * + * Each interrupt event is associated with a single queue and all + * handlers for that event are scheduled on that queue. + * + * Each queue contains a spin lock. This spin lock protects the list + * of interrupt handlers for any interrupt events associated with this + * queue. It is also used as the thread lock for idle interrupt + * threads associated with this queue. + * + * Currently, each interrupt event is given a unique queue, and each + * queue is given a single thread. + */ +struct intr_queue { + STAILQ_HEAD(, intr_handler) iq_active; + struct mtx iq_lock; + struct intr_thread *iq_thread; + struct intr_event *iq_event; + char iq_name[MAXCOMLEN + 1]; +}; + +/* * Describe an interrupt event. An event holds a list of handlers. + * Events are split into two classes: hardware interrupt events and + * software interrupt events. + * * The 'pre_ithread', 'post_ithread', 'post_filter', and 'assign_cpu' - * hooks are used to invoke MD code for certain operations. + * hooks are used to invoke MD code for certain operations for + * hardware interrupt events. * - * The 'pre_ithread' hook is called when an interrupt thread for - * handlers without filters is scheduled. It is responsible for + * The 'pre_ithread' hook is called when an interrupt schedules at + * least one threaded interrupt handler. It is responsible for * ensuring that 1) the system won't be swamped with an interrupt - * storm from the associated source while the ithread runs and 2) the - * current CPU is able to receive interrupts from other interrupt - * sources. The first is usually accomplished by disabling - * level-triggered interrupts until the ithread completes. The second - * is accomplished on some platforms by acknowledging the interrupt - * via an EOI. + * storm from the associated source while threaded interrupt handlers + * execute and 2) the current CPU is able to receive interrupts from + * other interrupt sources. The first is usually accomplished by + * disabling level-triggered interrupts until all of the handlers for + * this event have completed. The second is accomplished on some + * platforms by acknowledging the interrupt via an EOI. * - * The 'post_ithread' hook is invoked when an ithread finishes. It is - * responsible for ensuring that the associated interrupt source will - * trigger an interrupt when it is asserted in the future. Usually - * this is implemented by enabling a level-triggered interrupt that - * was previously disabled via the 'pre_ithread' hook. + * The 'post_ithread' hook is invoked when all of the threaded + * interrupt handlers for an event finish. It is responsible for + * ensuring that the associated interrupt source will trigger an + * interrupt when it is asserted in the future. Usually this is + * implemented by enabling a level-triggered interrupt that was + * previously disabled via the 'pre_ithread' hook. * - * The 'post_filter' hook is invoked when a filter handles an - * interrupt. It is responsible for ensuring that the current CPU is - * able to receive interrupts again. On some platforms this is done - * by acknowledging the interrupts via an EOI. + * The 'post_filter' hook is invoked when an interrupt occurs but no + * threaded interrupt handlers are scheduled. It is responsible for + * ensuring that the current CPU is able to receive interrupts again. + * On some platforms this is done by acknowledging the interrupts via + * an EOI. * * The 'assign_cpu' hook is used to bind an interrupt source to a * specific CPU. If the interrupt cannot be bound, this function may * return an error. * - * Note that device drivers may also use interrupt events to manage - * multiplexing interrupt interrupt handler into handlers for child - * devices. In that case, the above hooks are not used. The device - * can create an event for its interrupt resource and register child - * event handlers with that event. It can then use - * intr_event_execute_handlers() to execute non-filter handlers. - * Currently filter handlers are not supported by this, but that can - * be added by splitting out the filter loop from intr_event_handle() - * if desired. + * The list of handlers in an interrupt event are protected by two + * locks. First, there is a regular mutex that can be used alone for + * read-only access in top-half code. Second, there is a spin lock + * stored in the associated interrupt queue. This spin lock is held + * in the low-level interrupt code while walking the list of interrupt + * handlers. It must also be held in top-half code that adds or + * removes handlers to or from the list. */ struct intr_event { - TAILQ_ENTRY(intr_event) ie_list; TAILQ_HEAD(, intr_handler) ie_handlers; /* Interrupt handlers. */ char ie_name[MAXCOMLEN + 1]; /* Individual event name. */ char ie_fullname[MAXCOMLEN + 1]; struct mtx ie_lock; - void *ie_source; /* Cookie used by MD code. */ - struct intr_thread *ie_thread; /* Thread we are connected to. */ - void (*ie_pre_ithread)(void *); - void (*ie_post_ithread)(void *); - void (*ie_post_filter)(void *); - int (*ie_assign_cpu)(void *, u_char); int ie_flags; - int ie_count; /* Loop counter. */ - int ie_warncnt; /* Rate-check interrupt storm warns. */ - struct timeval ie_warntm; - int ie_irq; /* Physical irq number if !SOFT. */ - u_char ie_cpu; /* CPU this event is bound to. */ + int ie_cpu; /* CPU this event is bound to. */ + struct intr_queue *ie_queue; +}; + +struct intr_hardware { + struct intr_event ihw_event; + TAILQ_ENTRY(intr_hardware) ihw_list; + void *ihw_source; /* Cookie used by MD code. */ + void (*ihw_pre_ithread)(void *); + void (*ihw_post_ithread)(void *); + void (*ihw_post_filter)(void *); + int (*ihw_assign_cpu)(void *, u_char); + int ihw_queued; /* Number of queued non-filter handlers. */ + int ihw_irq; /* Physical irq number. */ + int ihw_storming; + int ihw_count; /* Loop counter. */ + int ihw_warncnt; /* Rate-check interrupt storm warns. */ + struct timeval ihw_warntm; + TAILQ_HEAD(, intr_handler) ihw_manual; /* Manual interrupt handlers. */ +}; + +struct intr_software { + struct intr_event isw_event; + TAILQ_ENTRY(intr_software) isw_list; }; /* Interrupt event flags kept in ie_flags. */ #define IE_SOFT 0x000001 /* Software interrupt. */ #define IE_ENTROPY 0x000002 /* Interrupt is an entropy source. */ -#define IE_ADDING_THREAD 0x000004 /* Currently building an ithread. */ +#define IE_ADDING_QUEUE 0x000004 /* Currently building an interrupt queue. */ /* Flags to pass to sched_swi. */ #define SWI_DELAY 0x2 @@ -143,6 +272,7 @@ #define SWI_TQ_GIANT 6 struct proc; +struct thread; extern struct intr_event *tty_intr_event; extern struct intr_event *clk_intr_event; @@ -156,31 +286,46 @@ #ifdef DDB void db_dump_intr_event(struct intr_event *ie, int handlers); +void db_dump_intrhand(struct intr_handler *ih, int display_event); #endif +void hwi_create(struct intr_event **event, void *source, int irq, + void (*pre_ithread)(void *), void (*post_ithread)(void *), + void (*post_filter)(void *), int (*assign_cpu)(void *, u_char), + const char *fmt, ...) __printflike(8, 9); +int hwi_destroy(struct intr_event *ie); +void hwi_drain(void *cookie); +int hwi_handle(struct intr_event *ie, struct trapframe *frame); +void *hwi_handler_source(void *cookie); +void hwi_sched(void *cookie); +void _intr_drain(int irq); /* Linux compat only. */ u_char intr_priority(enum intr_type flags); int intr_event_add_handler(struct intr_event *ie, const char *name, driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri, enum intr_type flags, void **cookiep); int intr_event_bind(struct intr_event *ie, u_char cpu); -int intr_event_create(struct intr_event **event, void *source, - int flags, int irq, void (*pre_ithread)(void *), - void (*post_ithread)(void *), void (*post_filter)(void *), - int (*assign_cpu)(void *, u_char), const char *fmt, ...) - __printflike(9, 10); int intr_event_describe_handler(struct intr_event *ie, void *cookie, const char *descr); -int intr_event_destroy(struct intr_event *ie); -void intr_event_execute_handlers(struct proc *p, struct intr_event *ie); -int intr_event_handle(struct intr_event *ie, struct trapframe *frame); int intr_event_remove_handler(void *cookie); int intr_getaffinity(int irq, void *mask); -void *intr_handler_source(void *cookie); int intr_setaffinity(int irq, void *mask); -void _intr_drain(int irq); /* Linux compat only. */ int swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler, void *arg, int pri, enum intr_type flags, void **cookiep); +int swi_create(struct intr_event **event, const char *fmt, ...); +int swi_destroy(struct intr_event *ie); +int swi_remove(void *cookie); void swi_sched(void *cookie, int flags); -int swi_remove(void *cookie); + +/* XXX: Compat shims */ +#define intr_event_create(ev, src, f, irq, prei, posti, postf, ac, ...) \ + (hwi_create(ev, src, irq, prei, posti, postf, ac, __VA_ARGS__), 0) +#define intr_event_destroy(ev) \ + hwi_destroy(ev) +#define intr_event_handle(ev, frame) \ + hwi_handle(ev, frame) +#define intr_handler_source(cookie) \ + hwi_handler_source(cookie) + +/* XXX: Should we have hwi_add() and hwi_remove()? */ #endif --- //depot/projects/smpng/sys/sys/proc.h +++ //depot/user/jhb/intr/sys/proc.h @@ -319,6 +319,7 @@ struct proc *td_rfppwait_p; /* (k) The vforked child */ struct vm_page **td_ma; /* (k) uio pages held */ int td_ma_cnt; /* (k) size of *td_ma */ + struct intr_thread *td_ithread; /* (b) Interrupt thread state. */ }; struct mtx *thread_lock_block(struct thread *);