/*- * Copyright (c) 2008, David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _NSC_PER_PAGE (PAGE_SIZE/sizeof(sc_shared_t)) #define _NSC_BITS (sizeof(int) * NBBY) #define _NSC_WORDS howmany(_NSC_PER_PAGE, _NSC_BITS) #define PSCF_ALLOC 0x01 #define PSCF_WAIT 0x02 struct pschedctl { SLIST_HEAD(,sc_page) psc_list; vm_offset_t psc_start; vm_offset_t psc_cur; vm_offset_t psc_max; int psc_flags; }; typedef struct sc_page { SLIST_ENTRY(sc_page) scp_link; int scp_bmp[_NSC_WORDS]; int scp_free; sc_shared_t *scp_uaddr; sc_shared_t *scp_kaddr; vm_page_t scp_page; int scp_rotator; } sc_page_t; extern int max_threads_per_proc; static uma_zone_t sc_page_zone; static uma_zone_t psc_zone; static void schedctl_start(void); static int schedctl_shared_alloc(sc_shared_t **, vm_offset_t *); static int schedctl_alloc_page(sc_page_t **); static void schedctl_free_page(sc_page_t *); static int schedctl_alloc_bit(sc_page_t *); static sc_page_t *schedctl_find_page(sc_shared_t *, int *); static void schedctl_clear_bit(sc_page_t *, int); static void schedctl_proc_cleanup(void *); static void schedctl_fork_hook(void *arg __unused, struct proc *p1, struct proc *p2, int flags __unused); static int pschedctl_ctor(void *, int, void *, int); SYSINIT(schedctl, SI_SUB_P1003_1B, SI_ORDER_ANY, schedctl_start, NULL); static void schedctl_start(void) { sc_page_zone = uma_zcreate("sc_page", sizeof(sc_page_t), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); psc_zone = uma_zcreate("pschedctl", sizeof(struct pschedctl), pschedctl_ctor, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); EVENTHANDLER_REGISTER(process_exit, schedctl_proc_cleanup, NULL, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(process_exec, schedctl_proc_cleanup, NULL, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(process_fork, schedctl_fork_hook, NULL, EVENTHANDLER_PRI_ANY); } static int pschedctl_ctor(void *mem, int size, void *arg, int flags) { struct pschedctl *psc; psc = (struct pschedctl *)mem; SLIST_INIT(&psc->psc_list); return (0); } int schedctl(struct thread *td, struct schedctl_args *uap) { sc_shared_t *ssp; vm_offset_t uaddr; int error; if (uap->version != 0) return (ENODEV); if (td->td_schedctl == NULL) { if ((error = schedctl_shared_alloc(&ssp, &uaddr)) != 0) return (error); bzero(ssp, sizeof(*ssp)); thread_lock(td); td->td_schedctl = ssp; td->td_sc_uaddr = uaddr; td->td_schedctl->sc_state = SC_RUN; thread_unlock(td); } td->td_retval[0] = (register_t)(td->td_sc_uaddr); return (0); } static int schedctl_init(void) { struct proc *p = curproc; vm_map_t map; int error; int flags; vm_size_t size; vm_offset_t uaddr; struct pschedctl *psc; struct vmspace *vms; if (p->p_schedctl != NULL && p->p_schedctl->psc_start != 0) return (0); error = 0; if (p->p_schedctl == NULL) { psc = (struct pschedctl *)uma_zalloc(psc_zone, M_WAITOK|M_ZERO); PROC_LOCK(p); if (p->p_schedctl != NULL) uma_zfree(psc_zone, psc); else { SLIST_INIT(&psc->psc_list); p->p_schedctl = psc; } } else { PROC_LOCK(p); } psc = p->p_schedctl; while (psc->psc_start == 0) { if (psc->psc_flags & PSCF_ALLOC) { while (psc->psc_flags & PSCF_ALLOC) { psc->psc_flags |= PSCF_WAIT; msleep(&psc->psc_flags, &p->p_mtx, 0, "schdctl", 0); } } else { psc->psc_flags |= PSCF_ALLOC; PROC_UNLOCK(p); size = sizeof(sc_shared_t) * max_threads_per_proc; size = round_page(size); vms = p->p_vmspace; map = &vms->vm_map; uaddr = round_page((vm_offset_t)vms->vm_daddr + lim_max(p, RLIMIT_DATA)); error = vm_mmap(map, &uaddr, size, VM_PROT_READ|VM_PROT_WRITE, VM_PROT_ALL, MAP_ANON|MAP_PRIVATE, OBJT_DEFAULT, NULL, 0); PROC_LOCK(p); if (error == 0) { psc->psc_start = uaddr; psc->psc_cur = uaddr; psc->psc_max = uaddr + size; } flags = psc->psc_flags; psc->psc_flags &= ~(PSCF_ALLOC|PSCF_WAIT); if (flags & PSCF_WAIT) wakeup(&psc->psc_flags); break; } } PROC_UNLOCK(p); return (error); } static int schedctl_shared_alloc(sc_shared_t **ssp, vm_offset_t *uaddr) { struct proc *p = curproc; struct pschedctl *psc; sc_page_t *scp; int error, bit; int flags; error = schedctl_init(); if (error != 0) return (error); psc = p->p_schedctl; PROC_LOCK(p); restart: SLIST_FOREACH(scp, &psc->psc_list, scp_link) { if (scp->scp_free != 0) break; } if (scp == NULL) { if (psc->psc_flags & PSCF_ALLOC) { while (psc->psc_flags & PSCF_ALLOC) { psc->psc_flags |= PSCF_WAIT; msleep(&psc->psc_flags, &p->p_mtx, 0, "schdctl", 0); } goto restart; } psc->psc_flags |= PSCF_ALLOC; PROC_UNLOCK(p); error = schedctl_alloc_page(&scp); PROC_LOCK(p); flags = psc->psc_flags; psc->psc_flags &= ~(PSCF_ALLOC|PSCF_WAIT); if (flags & PSCF_WAIT) wakeup(&psc->psc_flags); if (error != 0) { PROC_UNLOCK(p); return (error); } SLIST_INSERT_HEAD(&psc->psc_list, scp, scp_link); } bit = schedctl_alloc_bit(scp); PROC_UNLOCK(p); *ssp = scp->scp_kaddr + bit; *uaddr = (vm_offset_t)(scp->scp_uaddr + bit); return (0); } void schedctl_thread_exit(struct thread *td) { sc_shared_t *ssp = td->td_schedctl; sc_page_t *scp; int index = 0; PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if (ssp == NULL) return; MPASS(curproc->p_schedctl != NULL); thread_lock(td); ssp->sc_state = SC_FREE; td->td_schedctl = NULL; td->td_sc_uaddr = 0; thread_unlock(td); scp = schedctl_find_page(ssp, &index); KASSERT(scp != NULL, ("failed to find sc_page")); schedctl_clear_bit(scp, index); } static void schedctl_proc_cleanup(void *arg) { struct thread *td = curthread; struct proc *p = td->td_proc; struct pschedctl *psc; sc_page_t *scp; PROC_LOCK(p); schedctl_thread_exit(td); PROC_UNLOCK(p); psc = p->p_schedctl; if (psc != NULL) { /* * Locking is unnecessary, because the process is already * single-threaded. */ while ((scp = SLIST_FIRST(&psc->psc_list)) != NULL) { SLIST_REMOVE_HEAD(&psc->psc_list, scp_link); schedctl_free_page(scp); } p->p_schedctl = NULL; uma_zfree(psc_zone, psc); } } static void schedctl_fork_hook(void *arg __unused, struct proc *p1, struct proc *p2, int flags __unused) { struct pschedctl *psc1, *psc2; if ((psc1 = p1->p_schedctl) != NULL) { psc2 = uma_zalloc(psc_zone, M_WAITOK|M_ZERO); psc2->psc_start = psc1->psc_start; psc2->psc_cur = psc1->psc_start; psc2->psc_max = psc1->psc_max; p2->p_schedctl = psc2; } } static int schedctl_alloc_page(sc_page_t **scpp) { struct proc *p = curproc; struct pschedctl *psc; sc_page_t *scp; vm_map_t map; vm_offset_t kaddr; vm_offset_t uaddr; pmap_t pmap; vm_page_t page; int i; map = &p->p_vmspace->vm_map; pmap = vmspace_pmap(p->p_vmspace); psc = p->p_schedctl; uaddr = psc->psc_cur; if (uaddr == psc->psc_max) return (ENOMEM); do { if (vm_fault_quick((caddr_t)uaddr, VM_PROT_READ) < 0) return (EFAULT); page = pmap_extract_and_hold(pmap, uaddr, VM_PROT_READ); } while (page == NULL); kaddr = kmem_alloc_nofault(kernel_map, PAGE_SIZE); if (kaddr == 0) { vm_page_lock_queues(); vm_page_unhold(page); vm_page_unlock_queues(); return (ENOMEM); } scp = (sc_page_t *)uma_zalloc(sc_page_zone, M_WAITOK|M_ZERO); scp->scp_page = page; scp->scp_uaddr = (sc_shared_t *)uaddr; scp->scp_kaddr = (sc_shared_t *)kaddr; scp->scp_rotator = 0; psc->psc_cur = uaddr + PAGE_SIZE; for (i = 0; i < _NSC_WORDS; ++i) scp->scp_bmp[i] = -1; scp->scp_free = _NSC_PER_PAGE; pmap_qenter(kaddr, &scp->scp_page, 1); *scpp = scp; return (0); } static void schedctl_free_page(sc_page_t *scp) { vm_offset_t kaddr; kaddr = (vm_offset_t)scp->scp_kaddr; kmem_free(kernel_map, kaddr, kaddr + PAGE_SIZE); vm_page_lock_queues(); vm_page_unhold(scp->scp_page); vm_page_unlock_queues(); uma_zfree(sc_page_zone, scp); } static int schedctl_alloc_bit(sc_page_t *scp) { int v; int lowest; int i; for (i = scp->scp_rotator; scp->scp_bmp[i] == 0;) { if (++i >= _NSC_WORDS) i = 0; } v = scp->scp_bmp[i]; lowest = ffs(v) - 1; scp->scp_bmp[i] &= ~(1 << lowest); scp->scp_free--; /* * Allocate sparsely to avoid cacheline false sharing * if there aren't many threads. */ scp->scp_rotator = (i+1) % _NSC_WORDS; return (i * _NSC_BITS + lowest); } static sc_page_t * schedctl_find_page(sc_shared_t *ssp, int *index) { struct pschedctl *psc = curproc->p_schedctl; sc_page_t *scp; PROC_LOCK_ASSERT(p, MA_OWNED); SLIST_FOREACH(scp, &psc->psc_list, scp_link) { if (ssp >= scp->scp_kaddr && ssp < scp->scp_kaddr + _NSC_PER_PAGE) { *index = (int)(ssp - scp->scp_kaddr); break; } } return (scp); } static void schedctl_clear_bit(sc_page_t *scp, int index) { scp->scp_bmp[index / _NSC_BITS] |= (1 << (index % _NSC_BITS)); scp->scp_free++; }