--- //depot/projects/smpng/sys/amd64/amd64/intr_machdep.c 2008/12/04 20:58:27 +++ //depot/user/jhb/intr/amd64/amd64/intr_machdep.c 2008/12/04 22:14:32 @@ -249,6 +249,7 @@ */ (*isrc->is_count)++; PCPU_INC(cnt.v_intr); + (*PCPU_GET(intr_count))++; ie = isrc->is_event; @@ -284,12 +285,10 @@ #ifndef DEV_ATPIC atpic_reset(); #endif - sx_xlock(&intr_table_lock); STAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_resume != NULL) pic->pic_resume(pic); } - sx_xunlock(&intr_table_lock); } void @@ -297,12 +296,22 @@ { struct pic *pic; - sx_xlock(&intr_table_lock); STAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_suspend != NULL) pic->pic_suspend(pic); } - sx_xunlock(&intr_table_lock); +} + +int +intr_pending(u_int irq) +{ + struct intsrc *isrc; + + isrc = intr_lookup_source(irq); + if (isrc == NULL) + /* XXX: panic? */ + return (0); + return (isrc->is_pic->pic_source_pending(isrc)); } static int @@ -380,6 +389,7 @@ STAILQ_INIT(&pics); sx_init(&intr_table_lock, "intr sources"); mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); + intrcnt_add("cpu0: intrs", PCPU_PTR(intr_count)); } SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); --- //depot/projects/smpng/sys/amd64/amd64/local_apic.c 2009/05/08 11:53:25 +++ //depot/user/jhb/intr/amd64/amd64/local_apic.c 2009/05/08 14:17:47 @@ -295,6 +295,11 @@ eflags = intr_disable(); maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; + if (PCPU_GET(cpuid) != 0) { + snprintf(buf, sizeof(buf), "cpu%d: intrs", PCPU_GET(cpuid)); + intrcnt_add(buf, PCPU_PTR(intr_count)); + } + /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); --- //depot/projects/smpng/sys/amd64/amd64/machdep.c 2009/05/19 13:40:43 +++ //depot/user/jhb/intr/amd64/amd64/machdep.c 2009/05/20 17:51:14 @@ -1063,28 +1063,6 @@ sd->sd_gran = ssd->ssd_gran; } -#if !defined(DEV_ATPIC) && defined(DEV_ISA) -#include -#include -/* - * Return a bitmap of the current interrupt requests. This is 8259-specific - * and is only suitable for use at probe time. - * This is only here to pacify sio. It is NOT FATAL if this doesn't work. - * It shouldn't be here. There should probably be an APIC centric - * implementation in the apic driver code, if at all. - */ -intrmask_t -isa_irq_pending(void) -{ - u_char irr1; - u_char irr2; - - irr1 = inb(IO_ICU1); - irr2 = inb(IO_ICU2); - return ((irr2 << 8) | irr1); -} -#endif - u_int basemem; /* --- //depot/projects/smpng/sys/amd64/amd64/sys_machdep.c 2009/04/07 17:48:51 +++ //depot/user/jhb/intr/amd64/amd64/sys_machdep.c 2009/04/07 19:19:09 @@ -35,6 +35,8 @@ #include #include +#include +#include #include #include #include @@ -49,6 +51,7 @@ #include #include +#include #include #include #include @@ -163,6 +166,9 @@ uint32_t i386base; uint64_t a64base; struct i386_ioperm_args iargs; +#ifdef SMP + struct amd64_intr_bind_args bargs; +#endif if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT) return (sysarch_ldt(td, uap, UIO_USERSPACE)); @@ -216,6 +222,17 @@ update_gdt_gsbase(td, i386base); } break; +#ifdef SMP + /* ABI and API compatible with I386_INTR_BIND. */ + case AMD64_INTR_BIND: + error = copyin(uap->parms, &bargs, + sizeof(struct amd64_intr_bind_args)); + if (error == 0) + error = priv_check(td, PRIV_DRIVER); + if (error == 0) + error = intr_bind(bargs.vector, bargs.cpu); + break; +#endif case AMD64_GET_FSBASE: error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase)); break; --- //depot/projects/smpng/sys/amd64/include/intr_machdep.h 2009/02/04 21:27:39 +++ //depot/user/jhb/intr/amd64/include/intr_machdep.h 2009/02/04 22:38:20 @@ -153,6 +153,7 @@ enum intr_polarity pol); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); struct intsrc *intr_lookup_source(int vector); +int intr_pending(u_int irq); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); --- //depot/projects/smpng/sys/amd64/include/pcpu.h 2009/05/08 11:53:25 +++ //depot/user/jhb/intr/amd64/include/pcpu.h 2009/05/08 14:17:47 @@ -66,6 +66,7 @@ struct amd64tss *pc_commontssp;/* Common TSS for the CPU */ \ register_t pc_rsp0; \ register_t pc_scratch_rsp; /* User %rsp in syscall */ \ + u_long *pc_intr_count; \ u_int pc_apic_id; \ u_int pc_acpi_id; /* ACPI CPU id */ \ /* Pointer to the CPU %fs descriptor */ \ --- //depot/projects/smpng/sys/amd64/include/sysarch.h 2009/04/07 17:48:51 +++ //depot/user/jhb/intr/amd64/include/sysarch.h 2009/04/07 19:19:09 @@ -48,6 +48,7 @@ #define I386_SET_FSBASE 8 #define I386_GET_GSBASE 9 #define I386_SET_GSBASE 10 +#define AMD64_INTR_BIND 11 /* Leave space for 0-127 for to avoid translating syscalls */ #define AMD64_GET_FSBASE 128 @@ -67,6 +68,11 @@ int enable; }; +struct amd64_intr_bind_args { + unsigned int vector; + unsigned int cpu; +}; + #ifndef _KERNEL #include @@ -75,6 +81,7 @@ int amd64_get_gsbase(void **); int amd64_set_fsbase(void *); int amd64_set_gsbase(void *); +int amd64_intr_bind(unsigned int, unsigned int); int sysarch(int, void *); __END_DECLS #else --- //depot/projects/smpng/sys/amd64/isa/atpic.c 2008/03/24 19:59:34 +++ //depot/user/jhb/intr/amd64/isa/atpic.c 2008/04/24 14:32:37 @@ -595,19 +595,4 @@ DRIVER_MODULE(atpic, isa, atpic_driver, atpic_devclass, 0, 0); DRIVER_MODULE(atpic, acpi, atpic_driver, atpic_devclass, 0, 0); - -/* - * Return a bitmap of the current interrupt requests. This is 8259-specific - * and is only suitable for use at probe time. - */ -intrmask_t -isa_irq_pending(void) -{ - u_char irr1; - u_char irr2; - - irr1 = inb(IO_ICU1); - irr2 = inb(IO_ICU2); - return ((irr2 << 8) | irr1); -} #endif /* DEV_ISA */ --- //depot/projects/smpng/sys/dev/sio/sio.c 2008/10/21 22:11:05 +++ //depot/user/jhb/intr/dev/sio/sio.c 2008/10/21 23:09:33 @@ -85,6 +85,11 @@ #endif #include +#if defined(__i386__) || defined(__amd64__) +#define PROBE_IRQ +#include +#endif + #define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */ #ifdef COM_MULTIPORT @@ -430,11 +435,12 @@ int fn; device_t idev; Port_t iobase; - intrmask_t irqmap[4]; - intrmask_t irqs; +#ifdef PROBE_IRQ + int irqpending[4]; + u_long xirq; +#endif u_char mcr_image; int result; - u_long xirq; u_int flags = device_get_flags(dev); int rid; struct resource *port; @@ -540,8 +546,13 @@ } } #endif /* COM_MULTIPORT */ - if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) != 0) +#ifdef PROBE_IRQ + if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) != 0) { mcr_image = 0; + xirq = ~0ul; + } + bzero(irqpending, sizeof(irqpending)); +#endif bzero(failures, sizeof failures); iobase = rman_get_start(port); @@ -608,7 +619,10 @@ sio_setreg(com, com_mcr, mcr_image); sio_setreg(com, com_ier, 0); DELAY(1000); /* XXX */ - irqmap[0] = isa_irq_pending(); +#ifdef PROBE_IRQ + if (xirq != ~0ul) + irqpending[0] = intr_pending(xirq); +#endif /* * Attempt to set loopback mode so that we can send a null byte @@ -724,10 +738,16 @@ failures[1] = sio_getreg(com, com_ier) - IER_ETXRDY; failures[2] = sio_getreg(com, com_mcr) - mcr_image; DELAY(10000); /* Some internal modems need this time */ - irqmap[1] = isa_irq_pending(); +#ifdef PROBE_IRQ + if (xirq != ~0ul) + irqpending[1] = intr_pending(xirq); +#endif failures[4] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_TXRDY; DELAY(1000); /* XXX */ - irqmap[2] = isa_irq_pending(); +#ifdef PROBE_IRQ + if (xirq != ~0ul) + irqpending[2] = intr_pending(xirq); +#endif failures[6] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; /* @@ -743,25 +763,27 @@ sio_setreg(com, com_cfcr, CFCR_8BITS); /* dummy to avoid bus echo */ failures[7] = sio_getreg(com, com_ier); DELAY(1000); /* XXX */ - irqmap[3] = isa_irq_pending(); +#ifdef PROBE_IRQ + if (xirq != ~0ul) + irqpending[3] = intr_pending(xirq); +#endif failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; mtx_unlock_spin(&sio_lock); - irqs = irqmap[1] & ~irqmap[0]; - if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 && - ((1 << xirq) & irqs) == 0) { - printf( - "sio%d: configured irq %ld not in bitmap of probed irqs %#x\n", - device_get_unit(dev), xirq, irqs); - printf( - "sio%d: port may not be enabled\n", - device_get_unit(dev)); +#ifdef PROBE_IRQ + if (xirq != ~0ul) { + if (irqpending[1] != 0) { + device_printf(dev, + "configured irq %ld did not trigger\n", xirq); + device_printf(dev, "port may not be enabled\n"); + } + if (bootverbose) + device_printf(dev, "irq pending: %d %d %d %d\n", + irqpending[0], irqpending[1], irqpending[2], + irqpending[3]); } - if (bootverbose) - printf("sio%d: irq maps: %#x %#x %#x %#x\n", - device_get_unit(dev), - irqmap[0], irqmap[1], irqmap[2], irqmap[3]); +#endif result = 0; for (fn = 0; fn < sizeof failures; ++fn) --- //depot/projects/smpng/sys/i386/i386/intr_machdep.c 2008/12/04 20:58:27 +++ //depot/user/jhb/intr/i386/i386/intr_machdep.c 2008/12/04 22:14:32 @@ -240,6 +240,7 @@ */ (*isrc->is_count)++; PCPU_INC(cnt.v_intr); + (*PCPU_GET(intr_count))++; ie = isrc->is_event; @@ -272,12 +273,10 @@ { struct pic *pic; - sx_xlock(&intr_table_lock); STAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_resume != NULL) pic->pic_resume(pic); } - sx_xunlock(&intr_table_lock); } void @@ -285,12 +284,22 @@ { struct pic *pic; - sx_xlock(&intr_table_lock); STAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_suspend != NULL) pic->pic_suspend(pic); } - sx_xunlock(&intr_table_lock); +} + +int +intr_pending(u_int irq) +{ + struct intsrc *isrc; + + isrc = intr_lookup_source(irq); + if (isrc == NULL) + /* XXX: panic? */ + return (0); + return (isrc->is_pic->pic_source_pending(isrc)); } static int @@ -368,6 +377,7 @@ STAILQ_INIT(&pics); sx_init(&intr_table_lock, "intr sources"); mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); + intrcnt_add("cpu0: intrs", PCPU_PTR(intr_count)); } SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); --- //depot/projects/smpng/sys/i386/i386/local_apic.c 2009/05/08 11:53:25 +++ //depot/user/jhb/intr/i386/i386/local_apic.c 2009/05/08 14:17:47 @@ -297,6 +297,11 @@ eflags = intr_disable(); maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; + if (PCPU_GET(cpuid) != 0) { + snprintf(buf, sizeof(buf), "cpu%d: intrs", PCPU_GET(cpuid)); + intrcnt_add(buf, PCPU_PTR(intr_count)); + } + /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); --- //depot/projects/smpng/sys/i386/i386/sys_machdep.c 2008/11/03 21:11:59 +++ //depot/user/jhb/intr/i386/i386/sys_machdep.c 2008/11/03 22:18:06 @@ -37,6 +37,8 @@ #include #include +#include +#include #include #include #include @@ -51,6 +53,7 @@ #include #include +#include #include #include #include @@ -104,6 +107,9 @@ union { struct i386_ldt_args largs; struct i386_ioperm_args iargs; +#ifdef SMP + struct i386_intr_bind_args bargs; +#endif } kargs; uint32_t base; struct segment_descriptor sd, *sdp; @@ -244,6 +250,16 @@ load_gs(GSEL(GUGS_SEL, SEL_UPL)); } break; +#ifdef SMP + case I386_INTR_BIND: + error = copyin(uap->parms, &kargs.bargs, + sizeof(struct i386_intr_bind_args)); + if (error == 0) + error = priv_check(td, PRIV_DRIVER); + if (error == 0) + error = intr_bind(kargs.bargs.vector, kargs.bargs.cpu); + break; +#endif default: error = EINVAL; break; --- //depot/projects/smpng/sys/i386/include/intr_machdep.h 2009/02/04 21:27:39 +++ //depot/user/jhb/intr/i386/include/intr_machdep.h 2009/02/04 22:38:20 @@ -140,6 +140,7 @@ enum intr_polarity pol); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); struct intsrc *intr_lookup_source(int vector); +int intr_pending(u_int irq); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); --- //depot/projects/smpng/sys/i386/include/pcpu.h 2008/11/03 21:11:59 +++ //depot/user/jhb/intr/i386/include/pcpu.h 2008/11/03 22:18:06 @@ -73,6 +73,7 @@ struct segment_descriptor *pc_fsgs_gdt; \ vm_paddr_t *pc_pdir_shadow; \ int pc_currentldt; \ + u_long *pc_intr_count; \ u_int pc_acpi_id; /* ACPI CPU id */ \ u_int pc_apic_id; \ int pc_private_tss; /* Flag indicating private tss*/\ --- //depot/projects/smpng/sys/i386/include/sysarch.h 2005/04/14 18:55:16 +++ //depot/user/jhb/intr/i386/include/sysarch.h 2006/03/24 22:02:35 @@ -47,6 +47,7 @@ #define I386_SET_FSBASE 8 #define I386_GET_GSBASE 9 #define I386_SET_GSBASE 10 +#define I386_INTR_BIND 11 /* These four only exist when running an i386 binary on amd64 */ #define _AMD64_GET_FSBASE 128 @@ -71,6 +72,11 @@ char *sub_args; /* args */ }; +struct i386_intr_bind_args { + unsigned int vector; + unsigned int cpu; +}; + #ifndef _KERNEL #include @@ -94,6 +100,7 @@ int i386_set_gsbase(void *); int i386_set_watch(int, unsigned int, int, int, struct dbreg *); int i386_clr_watch(int, struct dbreg *); +int i386_intr_bind(unsigned int, unsigned int); int sysarch(int, void *); __END_DECLS #else --- //depot/projects/smpng/sys/i386/isa/atpic.c 2008/03/24 19:59:34 +++ //depot/user/jhb/intr/i386/isa/atpic.c 2008/04/24 14:32:37 @@ -661,19 +661,4 @@ #ifndef PC98 DRIVER_MODULE(atpic, acpi, atpic_driver, atpic_devclass, 0, 0); #endif - -/* - * Return a bitmap of the current interrupt requests. This is 8259-specific - * and is only suitable for use at probe time. - */ -intrmask_t -isa_irq_pending(void) -{ - u_char irr1; - u_char irr2; - - irr1 = inb(IO_ICU1); - irr2 = inb(IO_ICU2); - return ((irr2 << 8) | irr1); -} #endif /* DEV_ISA */ --- //depot/projects/smpng/sys/isa/isavar.h 2008/11/18 23:25:45 +++ //depot/user/jhb/intr/isa/isavar.h 2008/11/18 23:46:38 @@ -162,7 +162,6 @@ /* Device class for ISA bridges. */ extern devclass_t isab_devclass; -extern intrmask_t isa_irq_pending(void); extern void isa_probe_children(device_t dev); void isa_dmacascade(int chan); --- //depot/projects/smpng/sys/kern/kern_poll.c 2009/05/20 17:20:32 +++ //depot/user/jhb/intr/kern/kern_poll.c 2009/05/20 17:51:14 @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include /* needed by net/if.h */ #include #include @@ -226,7 +228,9 @@ SYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RD, &residual_burst, 0, "# of residual cycles in burst"); -static uint32_t poll_handlers; /* next free entry in pr[]. */ +static volatile int max_handler; /* maximum busy index + 1 in pr[]. */ + +static uint32_t poll_handlers; SYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD, &poll_handlers, 0, "Number of registered poll handlers"); @@ -255,8 +259,12 @@ struct pollrec { poll_handler_t *handler; struct ifnet *ifp; + int flags; }; +#define PR_RUNNING 0x1 +#define PR_DEAD 0x2 + static struct pollrec pr[POLL_LIST_LEN]; static void @@ -322,22 +330,46 @@ } /* - * ether_poll is called from the idle loop. + * Run a single poll handler. If it is already running or dead then + * don't run it. Return true if we run the handler. + */ +static __inline int +poll_run_handler(struct pollrec *pr, enum poll_cmd arg, int count) +{ + + mtx_assert(&poll_mtx, MA_OWNED); + if ((pr->flags & (PR_RUNNING | PR_DEAD)) == 0) { + pr->flags |= PR_RUNNING; + mtx_unlock(&poll_mtx); + pr->handler(pr->ifp, arg, count); + mtx_lock(&poll_mtx); + pr->flags &= ~PR_RUNNING; + if (pr->flags & PR_DEAD) + wakeup(pr); + return (1); + } + return (0); +} + +/* + * ether_poll is called from the idle loop. We try to run the next poll + * handler. If all of the handlers are running (or there are no handlers) + * then this returns false. */ -static void +static int ether_poll(int count) { - int i; + int i, handlers; - mtx_lock(&poll_mtx); + mtx_assert(&poll_mtx, MA_OWNED); if (count > poll_each_burst) count = poll_each_burst; - for (i = 0 ; i < poll_handlers ; i++) - pr[i].handler(pr[i].ifp, POLL_ONLY, count); - - mtx_unlock(&poll_mtx); + handlers = 0; + for (i = 0 ; i < max_handler ; i++) + handlers += poll_run_handler(&pr[i], POLL_ONLY, count); + return (handlers); } /* @@ -428,8 +460,8 @@ residual_burst : poll_each_burst; residual_burst -= cycles; - for (i = 0 ; i < poll_handlers ; i++) - pr[i].handler(pr[i].ifp, arg, cycles); + for (i = 0 ; i < max_handler ; i++) + poll_run_handler(&pr[i], arg, cycles); phase = 4; mtx_unlock(&poll_mtx); @@ -445,7 +477,7 @@ int ether_poll_register(poll_handler_t *h, struct ifnet *ifp) { - int i; + int i, needwakeup, free; KASSERT(h != NULL, ("%s: handler is NULL", __func__)); KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__)); @@ -469,20 +501,30 @@ return (ENOMEM); /* no polling for you */ } - for (i = 0 ; i < poll_handlers ; i++) + free = max_handler; + for (i = 0 ; i < max_handler ; i++) { + if (pr[i].flags == PR_DEAD) { + free = min(free, i); + continue; + } if (pr[i].ifp == ifp && pr[i].handler != NULL) { mtx_unlock(&poll_mtx); log(LOG_DEBUG, "ether_poll_register: %s: handler" " already registered\n", ifp->if_xname); return (EEXIST); } + } - pr[poll_handlers].handler = h; - pr[poll_handlers].ifp = ifp; + pr[free].handler = h; + pr[free].ifp = ifp; + pr[free].flags = 0; poll_handlers++; + if (free == max_handler) + max_handler++; + needwakeup = idlepoll_sleeping; mtx_unlock(&poll_mtx); - if (idlepoll_sleeping) - wakeup(&idlepoll_sleeping); + if (needwakeup) + wakeup_one(&idlepoll_sleeping); return (0); } @@ -498,20 +540,28 @@ mtx_lock(&poll_mtx); - for (i = 0 ; i < poll_handlers ; i++) + for (i = 0 ; i < max_handler ; i++) if (pr[i].ifp == ifp) /* found it */ break; - if (i == poll_handlers) { + if (i >= max_handler) { + mtx_unlock(&poll_mtx); log(LOG_DEBUG, "ether_poll_deregister: %s: not found!\n", ifp->if_xname); - mtx_unlock(&poll_mtx); return (ENOENT); } + + /* Wait if it is currently running. */ + pr[i].flags |= PR_DEAD; + while (pr[i].flags & PR_RUNNING) + mtx_sleep(&pr[i], &poll_mtx, 0, "depoll", hz); + pr[i].ifp = NULL; + pr[i].handler = NULL; + poll_handlers--; - if (i < poll_handlers) { /* Last entry replaces this one. */ - pr[i].handler = pr[poll_handlers].handler; - pr[i].ifp = pr[poll_handlers].ifp; - } + + /* Trim any free entries from the end of the list. */ + while (max_handler > 0 && pr[max_handler - 1].flags == PR_DEAD) + max_handler--; mtx_unlock(&poll_mtx); return (0); } @@ -561,36 +611,84 @@ } static void -poll_idle(void) +poll_idle(void *arg) { struct thread *td = curthread; - struct rtprio rtp; +#ifdef SMP + int cpu; +#endif + int busy; - rtp.prio = RTP_PRIO_MAX; /* lowest priority */ - rtp.type = RTP_PRIO_IDLE; - PROC_SLOCK(td->td_proc); - rtp_to_pri(&rtp, td); - PROC_SUNLOCK(td->td_proc); + thread_lock(td); + sched_prio(td, PRI_MAX_IDLE); /* lowest priority */ +#ifdef SMP + cpu = (intptr_t)arg; + if (cpu != NOCPU) + sched_bind(td, cpu); +#endif + thread_unlock(td); + + mtx_lock(&poll_mtx); for (;;) { - if (poll_in_idle_loop && poll_handlers > 0) { - idlepoll_sleeping = 0; - ether_poll(poll_each_burst); + if (poll_in_idle_loop && poll_handlers > 0) + busy = ether_poll(poll_each_burst); + else + busy = 0; + if (busy) { + /* + * Just yield the CPU to other idle threads like + * pagezero. + * + * XXX: It seems like what we probably want instead + * is a kind of taskqueue for idle tasks and a single + * per-cpu thread for idle tasks. + */ thread_lock(td); mi_switch(SW_VOL, NULL); thread_unlock(td); } else { - idlepoll_sleeping = 1; - tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3); + idlepoll_sleeping++; + mtx_sleep(&idlepoll_sleeping, &poll_mtx, 0, "pollid", + hz * 3); + idlepoll_sleeping--; } } } -static struct proc *idlepoll; -static struct kproc_desc idlepoll_kp = { - "idlepoll", - poll_idle, - &idlepoll -}; -SYSINIT(idlepoll, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, kproc_start, - &idlepoll_kp); +static int percpu_threads = 1; +TUNABLE_INT("kern.polling.pcpu_threads", &percpu_threads); + +static void +poll_init_kthreads(void *dummy) +{ +#ifdef SMP + struct thread *td; + int i; +#endif + struct proc *p; + int error; + +#ifdef SMP + p = NULL; + if (mp_ncpus > 1 && percpu_threads) { + for (i = 0; i <= mp_maxid; i++) { + if (CPU_ABSENT(i)) + continue; + error = kproc_kthread_add(poll_idle, + (void *)(intptr_t)i, &p, &td, 0, 0, "idlepoll", + "poll: cpu %d", i); + if (error) + panic("failed to create idle poll kthread: %d", + error); + } + return; + } +#endif + + error = kproc_create(poll_idle, (void *)(intptr_t)NOCPU, &p, 0, + 0, "idlepoll"); + if (error) + panic("failed to create idle poll kproc: %d", error); +} +SYSINIT(idlepoll, SI_SUB_SMP, SI_ORDER_ANY, poll_init_kthreads, NULL); --- //depot/projects/smpng/sys/notes 2009/02/18 22:05:55 +++ //depot/user/jhb/intr/notes 2009/02/20 16:15:34 @@ -73,3 +73,44 @@ - jhb_socket - socket hacking Space reserved for child branches: +- Kill eintrcnt crap and make intrcnt/intrnames MI with a MD length + constant. Export lengths instead. + - Move non-stray intrcnt managing into intr_event code? +- Use RCU to lock the per-source list of interrupt handlers. + +IRQ Shuffle: +- Break up the icu_lock used to protect all IO APIC pins; possible solutions: + - 1) use a hash table to assign each pin a spin lock + - 2) give each CPU its own spin lock and when an pin is assigned a CPU, + use that lock to protect that pin + - 2a) bind the ithreads to the CPU that the IDT vector is assigned to + and then you can just use critical sections to protect the APIC pins; + actually, has to be spinlock_enter/exit rather than critical sections as + we are protecting against bottom-half code rather than preemptions + +Remaining Interrupt Event issues: +- Fix a race in intr_event_add_handler(). We should create the ithread if + needed and set ie_thread before adding the handler to the TAILQ. We would + need a membar after the ie_thread write since we don't do any locking while + executing handlers. + +Interrupt Binding: ++ BUS_BIND_INTR(device_t parent, device_t child, struct resource *irq, + u_int cpu) +- u_int BUS_INTR_BOUND(device_t parent, device_t child, struct resource *irq) + - returns NOCPU for error + +Interrupt filters: +- Give filters their own separate list from handlers that just use ithreads + as an optimization +- Use different "main" routine for private ithreads + +Ithreads round 2: +- Fourth Pass: This requires some other support work to allow easy setup of + kernel threads (not just kernel processes) as well as letting kernel + processes have KSE's that do continuations that do a kind of upcall into + a kernel scheduler rather than the UTS. + - Get all that gory kthread stuff actually working + - embed a struct task in each intrhand and use a single intr kernel process + with one kse per CPU and threads that spawn new threads when blocking + to execute tasks from a interrupt taskqueue --- //depot/projects/smpng/sys/pc98/cbus/sio.c 2008/09/17 20:27:47 +++ //depot/user/jhb/intr/pc98/cbus/sio.c 2008/09/18 17:18:53 @@ -125,6 +125,11 @@ #include #endif +#if defined(__i386__) || defined(__amd64__) +#define PROBE_IRQ +#include +#endif + #define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */ /* @@ -768,11 +773,12 @@ int fn; device_t idev; Port_t iobase; - intrmask_t irqmap[4]; - intrmask_t irqs; +#ifdef PROBE_IRQ + int irqpending[4]; + u_long xirq; +#endif u_char mcr_image; int result; - u_long xirq; u_int flags = device_get_flags(dev); int rid; struct resource *port; @@ -937,7 +943,7 @@ tmp = ( inb( iod.ctrl ) & ~(IEN_Rx|IEN_TxEMP|IEN_Tx)); outb( iod.ctrl, tmp|IEN_TxEMP ); DELAY(10); - result = isa_irq_pending() ? 0 : ENXIO; + result = intr_pending(iod.irq) ? 0 : ENXIO; outb( iod.ctrl, tmp ); COM_INT_ENABLE } else { @@ -994,8 +1000,13 @@ #endif } #endif /* COM_MULTIPORT */ - if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) != 0) +#ifdef PROBE_IRQ + if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) != 0) { mcr_image = 0; + xirq = ~0ul; + } + bzero(irqpending, sizeof(irqpending)); +#endif bzero(failures, sizeof failures); iobase = rman_get_start(port); @@ -1078,7 +1089,10 @@ sio_setreg(com, com_mcr, mcr_image); sio_setreg(com, com_ier, 0); DELAY(1000); /* XXX */ - irqmap[0] = isa_irq_pending(); +#ifdef PROBE_IRQ + if (xirq != ~0ul) + irqpending[0] = intr_pending(xirq); +#endif /* * Attempt to set loopback mode so that we can send a null byte @@ -1190,14 +1204,20 @@ failures[1] = sio_getreg(com, com_ier) - IER_ETXRDY; failures[2] = sio_getreg(com, com_mcr) - mcr_image; DELAY(10000); /* Some internal modems need this time */ - irqmap[1] = isa_irq_pending(); +#ifdef PROBE_IRQ + if (xirq != ~0ul) + irqpending[1] = intr_pending(xirq); +#endif failures[4] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_TXRDY; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) inb(iobase + rsa_srr); #endif DELAY(1000); /* XXX */ - irqmap[2] = isa_irq_pending(); +#ifdef PROBE_IRQ + if (xirq != ~0ul) + irqpending[2] = intr_pending(xirq); +#endif failures[6] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) @@ -1221,7 +1241,10 @@ outb(iobase + rsa_ier, 0x00); #endif DELAY(1000); /* XXX */ - irqmap[3] = isa_irq_pending(); +#ifdef PROBE_IRQ + if (xirq != ~0ul) + irqpending[3] = intr_pending(xirq); +#endif failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) { @@ -1232,20 +1255,19 @@ mtx_unlock_spin(&sio_lock); - irqs = irqmap[1] & ~irqmap[0]; - if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 && - ((1 << xirq) & irqs) == 0) { - printf( - "sio%d: configured irq %ld not in bitmap of probed irqs %#x\n", - device_get_unit(dev), xirq, irqs); - printf( - "sio%d: port may not be enabled\n", - device_get_unit(dev)); +#ifdef PROBE_IRQ + if (xirq != ~0ul) { + if (irqpending[1] != 0) { + device_printf(dev, + "configured irq %ld did not trigger\n", xirq); + device_printf(dev, "port may not be enabled\n"); + } + if (bootverbose) + device_printf(dev, "irq pending: %d %d %d %d\n", + irqpending[0], irqpending[1], irqpending[2], + irqpending[3]); } - if (bootverbose) - printf("sio%d: irq maps: %#x %#x %#x %#x\n", - device_get_unit(dev), - irqmap[0], irqmap[1], irqmap[2], irqmap[3]); +#endif result = 0; for (fn = 0; fn < sizeof failures; ++fn) --- //depot/projects/smpng/sys/sys/interrupt.h 2008/09/17 20:27:47 +++ //depot/user/jhb/intr/sys/interrupt.h 2008/09/18 17:18:53 @@ -43,9 +43,9 @@ * together. */ struct intr_handler { - driver_filter_t *ih_filter; /* Filter function. */ - driver_intr_t *ih_handler; /* Handler function. */ - void *ih_argument; /* Argument to pass to handler. */ + driver_filter_t *ih_filter; /* Filter handler function. */ + driver_intr_t *ih_handler; /* Threaded handler function. */ + void *ih_argument; /* Argument to pass to handlers. */ int ih_flags; const char *ih_name; /* Name of handler. */ struct intr_event *ih_event; /* Event we are connected to. */