--- //depot/projects/smpng/sys/kern/kern_poll.c 2008/03/18 12:54:14 +++ //depot/user/jhb/intr/kern/kern_poll.c 2008/05/07 22:25:41 @@ -41,16 +41,18 @@ #include /* for IFF_* flags */ #include /* for NETISR_POLL */ +#include #include #include -#include +#include +#include static void netisr_poll(void); /* the two netisr handlers */ static void netisr_pollmore(void); static int poll_switch(SYSCTL_HANDLER_ARGS); void hardclock_device_poll(void); /* hook from hardclock */ -void ether_poll(int); /* polling in idle loop */ +int ether_poll(int); /* polling in idle loop */ static struct mtx poll_mtx; @@ -224,7 +226,9 @@ SYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RD, &residual_burst, 0, "# of residual cycles in burst"); -static uint32_t poll_handlers; /* next free entry in pr[]. */ +static volatile int max_handler; /* maximum busy index + 1 in pr[]. */ + +static uint32_t poll_handlers; SYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD, &poll_handlers, 0, "Number of registered poll handlers"); @@ -253,8 +257,12 @@ struct pollrec { poll_handler_t *handler; struct ifnet *ifp; + int flags; }; +#define PR_RUNNING 0x1 +#define PR_DEAD 0x2 + static struct pollrec pr[POLL_LIST_LEN]; static void @@ -322,22 +330,46 @@ } /* - * ether_poll is called from the idle loop. + * Run a single poll handler. If it is already running or dead then + * don't run it. Return true if we run the handler. + */ +static __inline int +poll_run_handler(struct pollrec *pr, enum poll_cmd arg, int count) +{ + + mtx_assert(&poll_mtx, MA_OWNED); + if ((pr->flags & (PR_RUNNING | PR_DEAD)) == 0) { + pr->flags |= PR_RUNNING; + mtx_unlock(&poll_mtx); + pr->handler(pr->ifp, arg, count); + mtx_lock(&poll_mtx); + pr->flags &= ~PR_RUNNING; + if (pr->flags & PR_DEAD) + wakeup(pr); + return (1); + } + return (0); +} + +/* + * ether_poll is called from the idle loop. We try to run the next poll + * handler. If all of the handlers are running (or there are no handlers) + * then this returns false. */ -void +int ether_poll(int count) { - int i; + int i, handlers; - mtx_lock(&poll_mtx); + mtx_assert(&poll_mtx, MA_OWNED); if (count > poll_each_burst) count = poll_each_burst; - for (i = 0 ; i < poll_handlers ; i++) - pr[i].handler(pr[i].ifp, POLL_ONLY, count); - - mtx_unlock(&poll_mtx); + handlers = 0; + for (i = 0 ; i < max_handler ; i++) + handlers += poll_run_handler(&pr[i], POLL_ONLY, count); + return (handlers); } /* @@ -428,8 +460,8 @@ residual_burst : poll_each_burst; residual_burst -= cycles; - for (i = 0 ; i < poll_handlers ; i++) - pr[i].handler(pr[i].ifp, arg, cycles); + for (i = 0 ; i < max_handler ; i++) + poll_run_handler(&pr[i], arg, cycles); phase = 4; mtx_unlock(&poll_mtx); @@ -445,7 +477,7 @@ int ether_poll_register(poll_handler_t *h, struct ifnet *ifp) { - int i; + int i, needwakeup, free; KASSERT(h != NULL, ("%s: handler is NULL", __func__)); KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__)); @@ -469,20 +501,30 @@ return (ENOMEM); /* no polling for you */ } - for (i = 0 ; i < poll_handlers ; i++) + free = max_handler; + for (i = 0 ; i < max_handler ; i++) { + if (pr[i].flags == PR_DEAD) { + free = min(free, i); + continue; + } if (pr[i].ifp == ifp && pr[i].handler != NULL) { mtx_unlock(&poll_mtx); log(LOG_DEBUG, "ether_poll_register: %s: handler" " already registered\n", ifp->if_xname); return (EEXIST); } + } - pr[poll_handlers].handler = h; - pr[poll_handlers].ifp = ifp; + pr[free].handler = h; + pr[free].ifp = ifp; + pr[free].flags = 0; poll_handlers++; + if (free == max_handler) + max_handler++; + needwakeup = idlepoll_sleeping; mtx_unlock(&poll_mtx); - if (idlepoll_sleeping) - wakeup(&idlepoll_sleeping); + if (needwakeup) + wakeup_one(&idlepoll_sleeping); return (0); } @@ -498,20 +540,28 @@ mtx_lock(&poll_mtx); - for (i = 0 ; i < poll_handlers ; i++) + for (i = 0 ; i < max_handler ; i++) if (pr[i].ifp == ifp) /* found it */ break; - if (i == poll_handlers) { + if (i >= max_handler) { + mtx_unlock(&poll_mtx); log(LOG_DEBUG, "ether_poll_deregister: %s: not found!\n", ifp->if_xname); - mtx_unlock(&poll_mtx); return (ENOENT); } + + /* Wait if it is currently running. */ + pr[i].flags |= PR_DEAD; + while (pr[i].flags & PR_RUNNING) + mtx_sleep(&pr[i], &poll_mtx, 0, "depoll", hz); + pr[i].ifp = NULL; + pr[i].handler = NULL; + poll_handlers--; - if (i < poll_handlers) { /* Last entry replaces this one. */ - pr[i].handler = pr[poll_handlers].handler; - pr[i].ifp = pr[poll_handlers].ifp; - } + + /* Trim any free entries from the end of the list. */ + while (max_handler > 0 && pr[max_handler - 1].flags == PR_DEAD) + max_handler--; mtx_unlock(&poll_mtx); return (0); } @@ -562,36 +612,84 @@ } static void -poll_idle(void) +poll_idle(void *arg) { struct thread *td = curthread; - struct rtprio rtp; +#ifdef SMP + int cpu; +#endif + int busy; - rtp.prio = RTP_PRIO_MAX; /* lowest priority */ - rtp.type = RTP_PRIO_IDLE; - PROC_SLOCK(td->td_proc); - rtp_to_pri(&rtp, td); - PROC_SUNLOCK(td->td_proc); + thread_lock(td); + sched_prio(td, PRI_MAX_IDLE); /* lowest priority */ +#ifdef SMP + cpu = (intptr_t)arg; + if (cpu != NOCPU) + sched_bind(td, cpu); +#endif + thread_unlock(td); + + mtx_lock(&poll_mtx); for (;;) { - if (poll_in_idle_loop && poll_handlers > 0) { - idlepoll_sleeping = 0; - ether_poll(poll_each_burst); + if (poll_in_idle_loop && poll_handlers > 0) + busy = ether_poll(poll_each_burst); + else + busy = 0; + if (busy) { + /* + * Just yield the CPU to other idle threads like + * pagezero. + * + * XXX: It seems like what we probably want instead + * is a kind of taskqueue for idle tasks and a single + * per-cpu thread for idle tasks. + */ thread_lock(td); mi_switch(SW_VOL, NULL); thread_unlock(td); } else { - idlepoll_sleeping = 1; - tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3); + idlepoll_sleeping++; + mtx_sleep(&idlepoll_sleeping, &poll_mtx, 0, "pollid", + hz * 3); + idlepoll_sleeping--; } } } -static struct proc *idlepoll; -static struct kproc_desc idlepoll_kp = { - "idlepoll", - poll_idle, - &idlepoll -}; -SYSINIT(idlepoll, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, kproc_start, - &idlepoll_kp); +static int percpu_threads = 1; +TUNABLE_INT("kern.polling.pcpu_threads", &percpu_threads); + +static void +poll_init_kthreads(void *dummy) +{ +#ifdef SMP + struct thread *td; + int i; +#endif + struct proc *p; + int error; + +#ifdef SMP + p = NULL; + if (mp_ncpus > 1 && percpu_threads) { + for (i = 0; i <= mp_maxid; i++) { + if (CPU_ABSENT(i)) + continue; + error = kproc_kthread_add(poll_idle, + (void *)(intptr_t)i, &p, &td, 0, 0, "idlepoll", + "poll: cpu %d", i); + if (error) + panic("failed to create idle poll kthread: %d", + error); + } + return; + } +#endif + + error = kproc_create(poll_idle, (void *)(intptr_t)NOCPU, &p, 0, + 0, "idlepoll"); + if (error) + panic("failed to create idle poll kproc: %d", error); +} +SYSINIT(idlepoll, SI_SUB_SMP, SI_ORDER_ANY, poll_init_kthreads, NULL);