Index: amd64/amd64/machdep.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/amd64/machdep.c,v retrieving revision 1.649 diff -u -r1.649 machdep.c --- amd64/amd64/machdep.c 11 May 2006 17:29:22 -0000 1.649 +++ amd64/amd64/machdep.c 4 Jun 2006 22:04:33 -0000 @@ -1137,7 +1137,7 @@ * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ - proc_linkup(&proc0, &ksegrp0, &thread0); + proc_linkup(&proc0, &thread0); preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); preload_bootstrap_relocate(KERNBASE); Index: amd64/amd64/trap.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/amd64/trap.c,v retrieving revision 1.303 diff -u -r1.303 trap.c --- amd64/amd64/trap.c 13 Mar 2006 23:55:31 -0000 1.303 +++ amd64/amd64/trap.c 4 Jun 2006 22:30:25 -0000 @@ -301,8 +301,6 @@ case T_PAGEFLT: /* page fault */ addr = frame.tf_addr; - if (td->td_pflags & TDP_SA) - thread_user_enter(td); i = trap_pfault(&frame, TRUE); if (i == -1) goto userout; @@ -759,8 +757,6 @@ td->td_frame = &frame; if (td->td_ucred != p->p_ucred) cred_update_thread(td); - if (p->p_flag & P_SA) - thread_user_enter(td); params = (caddr_t)frame.tf_rsp + sizeof(register_t); code = frame.tf_rax; orig_tf_rflags = frame.tf_rflags; Index: amd64/amd64/vm_machdep.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/amd64/vm_machdep.c,v retrieving revision 1.250 diff -u -r1.250 vm_machdep.c --- amd64/amd64/vm_machdep.c 24 Oct 2005 20:53:51 -0000 1.250 +++ amd64/amd64/vm_machdep.c 4 Jun 2006 22:03:49 -0000 @@ -311,15 +311,6 @@ stack_t *stack) { - /* - * Do any extra cleaning that needs to be done. - * The thread may have optional components - * that are not present in a fresh thread. - * This may be a recycled thread so make it look - * as though it's newly allocated. - */ - cpu_thread_clean(td); - /* * Set the trap frame to point at the beginning of the uts * function. Index: arm/arm/trap.c =================================================================== RCS file: /home/ncvs/src/sys/arm/arm/trap.c,v retrieving revision 1.24 diff -u -r1.24 trap.c --- arm/arm/trap.c 9 Apr 2006 20:16:47 -0000 1.24 +++ arm/arm/trap.c 4 Jun 2006 21:47:06 -0000 @@ -264,8 +264,6 @@ td->td_frame = tf; if (td->td_ucred != td->td_proc->p_ucred) cred_update_thread(td); - if (td->td_pflags & TDP_SA) - thread_user_enter(td); } /* Grab the current pcb */ Index: arm/at91/kb920x_machdep.c =================================================================== RCS file: /home/ncvs/src/sys/arm/at91/kb920x_machdep.c,v retrieving revision 1.4 diff -u -r1.4 kb920x_machdep.c --- arm/at91/kb920x_machdep.c 22 Mar 2006 22:31:31 -0000 1.4 +++ arm/at91/kb920x_machdep.c 4 Jun 2006 21:13:45 -0000 @@ -382,7 +382,7 @@ undefined_handler_address = (u_int)undefinedinstruction_bounce; undefined_init(); - proc_linkup(&proc0, &ksegrp0, &thread0); + proc_linkup(&proc0, &thread0); thread0.td_kstack = kernelstack.pv_va; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; Index: arm/sa11x0/assabet_machdep.c =================================================================== RCS file: /home/ncvs/src/sys/arm/sa11x0/assabet_machdep.c,v retrieving revision 1.15 diff -u -r1.15 assabet_machdep.c --- arm/sa11x0/assabet_machdep.c 30 May 2006 15:47:55 -0000 1.15 +++ arm/sa11x0/assabet_machdep.c 4 Jun 2006 22:04:37 -0000 @@ -413,7 +413,7 @@ /* Set stack for exception handlers */ - proc_linkup(&proc0, &ksegrp0, &thread0); + proc_linkup(&proc0, &thread0); thread0.td_kstack = kernelstack.pv_va; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; Index: arm/xscale/i80321/iq31244_machdep.c =================================================================== RCS file: /home/ncvs/src/sys/arm/xscale/i80321/iq31244_machdep.c,v retrieving revision 1.19 diff -u -r1.19 iq31244_machdep.c --- arm/xscale/i80321/iq31244_machdep.c 26 May 2006 01:41:46 -0000 1.19 +++ arm/xscale/i80321/iq31244_machdep.c 4 Jun 2006 22:04:38 -0000 @@ -428,7 +428,7 @@ undefined_handler_address = (u_int)undefinedinstruction_bounce; undefined_init(); - proc_linkup(&proc0, &ksegrp0, &thread0); + proc_linkup(&proc0, &thread0); thread0.td_kstack = kernelstack.pv_va; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; Index: conf/files =================================================================== RCS file: /home/ncvs/src/sys/conf/files,v retrieving revision 1.1119 diff -u -r1.1119 files --- conf/files 27 May 2006 16:32:05 -0000 1.1119 +++ conf/files 4 Jun 2006 22:09:53 -0000 @@ -1279,7 +1279,6 @@ kern/kern_idle.c standard kern/kern_intr.c standard kern/kern_jail.c standard -kern/kern_kse.c standard kern/kern_kthread.c standard kern/kern_ktr.c optional ktr kern/kern_ktrace.c standard Index: ddb/db_ps.c =================================================================== RCS file: /home/ncvs/src/sys/ddb/db_ps.c,v retrieving revision 1.60 diff -u -r1.60 db_ps.c --- ddb/db_ps.c 11 May 2006 21:59:55 -0000 1.60 +++ ddb/db_ps.c 4 Jun 2006 22:05:04 -0000 @@ -295,7 +295,6 @@ db_printf("Thread %d at %p:\n", td->td_tid, td); db_printf(" proc (pid %d): %p ", td->td_proc->p_pid, td->td_proc); - db_printf(" ksegrp: %p\n", td->td_ksegrp); if (td->td_name[0] != '\0') db_printf(" name: %s\n", td->td_name); db_printf(" flags: %#x ", td->td_flags); Index: i386/i386/machdep.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/machdep.c,v retrieving revision 1.629 diff -u -r1.629 machdep.c --- i386/i386/machdep.c 31 May 2006 00:17:29 -0000 1.629 +++ i386/i386/machdep.c 4 Jun 2006 22:06:02 -0000 @@ -2071,7 +2071,7 @@ * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ - proc_linkup(&proc0, &ksegrp0, &thread0); + proc_linkup(&proc0, &thread0); metadata_missing = 0; if (bootinfo.bi_modulep) { Index: i386/i386/sys_machdep.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/sys_machdep.c,v retrieving revision 1.104 diff -u -r1.104 sys_machdep.c --- i386/i386/sys_machdep.c 26 Dec 2005 00:07:19 -0000 1.104 +++ i386/i386/sys_machdep.c 4 Jun 2006 22:29:54 -0000 @@ -233,9 +233,6 @@ 0 /* granularity */ }; - if (td->td_proc->p_flag & P_SA) - return (EINVAL); /* XXXKSE */ -/* XXXKSE All the code below only works in 1:1 needs changing */ ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1)); if (ext == 0) return (ENOMEM); Index: i386/i386/trap.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/trap.c,v retrieving revision 1.289 diff -u -r1.289 trap.c --- i386/i386/trap.c 8 Feb 2006 08:09:15 -0000 1.289 +++ i386/i386/trap.c 4 Jun 2006 22:29:31 -0000 @@ -348,9 +348,6 @@ break; case T_PAGEFLT: /* page fault */ - if (td->td_pflags & TDP_SA) - thread_user_enter(td); - i = trap_pfault(&frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { @@ -938,8 +935,6 @@ td->td_frame = &frame; if (td->td_ucred != p->p_ucred) cred_update_thread(td); - if (p->p_flag & P_SA) - thread_user_enter(td); params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; orig_tf_eflags = frame.tf_eflags; Index: ia64/ia64/machdep.c =================================================================== RCS file: /home/ncvs/src/sys/ia64/ia64/machdep.c,v retrieving revision 1.207 diff -u -r1.207 machdep.c --- ia64/ia64/machdep.c 22 Feb 2006 18:57:49 -0000 1.207 +++ ia64/ia64/machdep.c 4 Jun 2006 21:13:09 -0000 @@ -767,7 +767,7 @@ msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); msgbufinit(msgbufp, MSGBUF_SIZE); - proc_linkup(&proc0, &ksegrp0, &thread0); + proc_linkup(&proc0, &thread0); /* * Init mapping for kernel stack for proc 0 */ Index: kern/init_main.c =================================================================== RCS file: /home/ncvs/src/sys/kern/init_main.c,v retrieving revision 1.264 diff -u -r1.264 init_main.c --- kern/init_main.c 14 May 2006 07:11:28 -0000 1.264 +++ kern/init_main.c 4 Jun 2006 22:06:15 -0000 @@ -95,7 +95,6 @@ static struct pgrp pgrp0; struct proc proc0; struct thread thread0 __aligned(8); -struct ksegrp ksegrp0; struct vmspace vmspace0; struct proc *initproc; @@ -363,12 +362,10 @@ struct proc *p; unsigned i; struct thread *td; - struct ksegrp *kg; GIANT_REQUIRED; p = &proc0; td = &thread0; - kg = &ksegrp0; /* * Initialize magic number. @@ -376,14 +373,14 @@ p->p_magic = P_MAGIC; /* - * Initialize thread, process and ksegrp structures. + * Initialize thread and process structures. */ procinit(); /* set up proc zone */ - threadinit(); /* set up thead, upcall and KSEGRP zones */ + threadinit(); /* set up UMA zones */ /* * Initialise scheduler resources. - * Add scheduler specific parts to proc, ksegrp, thread as needed. + * Add scheduler specific parts to proc, thread as needed. */ schedinit(); /* scheduler gets its house in order */ /* @@ -420,8 +417,8 @@ STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; td->td_state = TDS_RUNNING; - kg->kg_pri_class = PRI_TIMESHARE; - kg->kg_user_pri = PUSER; + td->td_pri_class = PRI_TIMESHARE; + td->td_user_pri = PUSER; td->td_priority = PVM; td->td_base_pri = PUSER; td->td_oncpu = 0; Index: kern/init_sysent.c =================================================================== RCS file: /home/ncvs/src/sys/kern/init_sysent.c,v retrieving revision 1.211 diff -u -r1.211 init_sysent.c --- kern/init_sysent.c 23 Mar 2006 08:48:37 -0000 1.211 +++ kern/init_sysent.c 4 Jun 2006 22:33:13 -0000 @@ -2,8 +2,8 @@ * System call switch table. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/kern/init_sysent.c,v 1.211 2006/03/23 08:48:37 davidxu Exp $ - * created from FreeBSD: src/sys/kern/syscalls.master,v 1.213 2006/03/23 08:46:41 davidxu Exp + * $FreeBSD$ + * created from FreeBSD: src/sys/kern/syscalls.master,v 1.215 2006/03/28 14:32:37 des Exp */ #include "opt_compat.h" @@ -408,11 +408,11 @@ { SYF_MPSAFE | AS(eaccess_args), (sy_call_t *)eaccess, AUE_EACCESS }, /* 376 = eaccess */ { 0, (sy_call_t *)nosys, AUE_NULL }, /* 377 = afs_syscall */ { AS(nmount_args), (sy_call_t *)nmount, AUE_NMOUNT }, /* 378 = nmount */ - { SYF_MPSAFE | 0, (sy_call_t *)kse_exit, AUE_NULL }, /* 379 = kse_exit */ - { SYF_MPSAFE | AS(kse_wakeup_args), (sy_call_t *)kse_wakeup, AUE_NULL }, /* 380 = kse_wakeup */ - { SYF_MPSAFE | AS(kse_create_args), (sy_call_t *)kse_create, AUE_NULL }, /* 381 = kse_create */ - { SYF_MPSAFE | AS(kse_thr_interrupt_args), (sy_call_t *)kse_thr_interrupt, AUE_NULL }, /* 382 = kse_thr_interrupt */ - { SYF_MPSAFE | AS(kse_release_args), (sy_call_t *)kse_release, AUE_NULL }, /* 383 = kse_release */ + { 0, (sy_call_t *)nosys, AUE_NULL }, /* 379 = kse_exit */ + { 0, (sy_call_t *)nosys, AUE_NULL }, /* 380 = kse_wakeup */ + { 0, (sy_call_t *)nosys, AUE_NULL }, /* 381 = kse_create */ + { 0, (sy_call_t *)nosys, AUE_NULL }, /* 382 = kse_thr_interrupt */ + { 0, (sy_call_t *)nosys, AUE_NULL }, /* 383 = kse_release */ { SYF_MPSAFE | AS(__mac_get_proc_args), (sy_call_t *)__mac_get_proc, AUE_NULL }, /* 384 = __mac_get_proc */ { SYF_MPSAFE | AS(__mac_set_proc_args), (sy_call_t *)__mac_set_proc, AUE_NULL }, /* 385 = __mac_set_proc */ { SYF_MPSAFE | AS(__mac_get_fd_args), (sy_call_t *)__mac_get_fd, AUE_NULL }, /* 386 = __mac_get_fd */ @@ -469,7 +469,7 @@ { SYF_MPSAFE | AS(extattr_list_fd_args), (sy_call_t *)extattr_list_fd, AUE_NULL }, /* 437 = extattr_list_fd */ { SYF_MPSAFE | AS(extattr_list_file_args), (sy_call_t *)extattr_list_file, AUE_NULL }, /* 438 = extattr_list_file */ { SYF_MPSAFE | AS(extattr_list_link_args), (sy_call_t *)extattr_list_link, AUE_NULL }, /* 439 = extattr_list_link */ - { SYF_MPSAFE | AS(kse_switchin_args), (sy_call_t *)kse_switchin, AUE_NULL }, /* 440 = kse_switchin */ + { 0, (sy_call_t *)nosys, AUE_NULL }, /* 440 = kse_switchin */ { SYF_MPSAFE | AS(ksem_timedwait_args), (sy_call_t *)lkmressys, AUE_NULL }, /* 441 = ksem_timedwait */ { SYF_MPSAFE | AS(thr_suspend_args), (sy_call_t *)thr_suspend, AUE_NULL }, /* 442 = thr_suspend */ { SYF_MPSAFE | AS(thr_wake_args), (sy_call_t *)thr_wake, AUE_NULL }, /* 443 = thr_wake */ Index: kern/kern_clock.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_clock.c,v retrieving revision 1.188 diff -u -r1.188 kern_clock.c --- kern/kern_clock.c 17 Apr 2006 20:14:51 -0000 1.188 +++ kern/kern_clock.c 13 May 2006 18:40:59 -0000 @@ -201,21 +201,17 @@ * Run current process's virtual and profile time, as needed. */ mtx_lock_spin_flags(&sched_lock, MTX_QUIET); - if (p->p_flag & P_SA) { - /* XXXKSE What to do? */ - } else { - pstats = p->p_stats; - if (usermode && - timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { - p->p_sflag |= PS_ALRMPEND; - td->td_flags |= TDF_ASTPENDING; - } - if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { - p->p_sflag |= PS_PROFPEND; - td->td_flags |= TDF_ASTPENDING; - } + pstats = p->p_stats; + if (usermode && + timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { + p->p_sflag |= PS_ALRMPEND; + td->td_flags |= TDF_ASTPENDING; + } + if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { + p->p_sflag |= PS_PROFPEND; + td->td_flags |= TDF_ASTPENDING; } mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); @@ -413,8 +409,6 @@ /* * Charge the time as appropriate. */ - if (p->p_flag & P_SA) - thread_statclock(1); td->td_uticks++; if (p->p_nice > NZERO) cp_time[CP_NICE]++; @@ -438,8 +432,6 @@ td->td_iticks++; cp_time[CP_INTR]++; } else { - if (p->p_flag & P_SA) - thread_statclock(0); td->td_pticks++; td->td_sticks++; if (td != PCPU_GET(idlethread)) Index: kern/kern_fork.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_fork.c,v retrieving revision 1.258 diff -u -r1.258 kern_fork.c --- kern/kern_fork.c 15 Mar 2006 23:24:14 -0000 1.258 +++ kern/kern_fork.c 4 Jun 2006 20:04:38 -0000 @@ -205,7 +205,6 @@ struct filedesc *fd; struct filedesc_to_leader *fdtol; struct thread *td2; - struct ksegrp *kg2; struct sigacts *newsigacts; int error; @@ -472,7 +471,6 @@ * then copy the section that is copied directly from the parent. */ td2 = FIRST_THREAD_IN_PROC(p2); - kg2 = FIRST_KSEGRP_IN_PROC(p2); /* Allocate and switch to an alternate kstack if specified. */ if (pages != 0) @@ -485,15 +483,11 @@ __rangeof(struct proc, p_startzero, p_endzero)); bzero(&td2->td_startzero, __rangeof(struct thread, td_startzero, td_endzero)); - bzero(&kg2->kg_startzero, - __rangeof(struct ksegrp, kg_startzero, kg_endzero)); bcopy(&p1->p_startcopy, &p2->p_startcopy, __rangeof(struct proc, p_startcopy, p_endcopy)); bcopy(&td->td_startcopy, &td2->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); - bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy, - __rangeof(struct ksegrp, kg_startcopy, kg_endcopy)); td2->td_sigstk = td->td_sigstk; td2->td_sigmask = td->td_sigmask; Index: kern/kern_idle.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_idle.c,v retrieving revision 1.43 diff -u -r1.43 kern_idle.c --- kern/kern_idle.c 4 Apr 2005 21:53:54 -0000 1.43 +++ kern/kern_idle.c 4 Jun 2006 20:15:57 -0000 @@ -79,7 +79,7 @@ td = FIRST_THREAD_IN_PROC(p); TD_SET_CAN_RUN(td); td->td_flags |= TDF_IDLETD; - sched_class(td->td_ksegrp, PRI_IDLE); + sched_class(td, PRI_IDLE); sched_prio(td, PRI_MAX_IDLE); mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); Index: kern/kern_intr.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_intr.c,v retrieving revision 1.132 diff -u -r1.132 kern_intr.c --- kern/kern_intr.c 17 Apr 2006 18:20:37 -0000 1.132 +++ kern/kern_intr.c 4 Jun 2006 17:56:01 -0000 @@ -296,7 +296,7 @@ panic("kthread_create() failed with %d", error); td = FIRST_THREAD_IN_PROC(p); /* XXXKSE */ mtx_lock_spin(&sched_lock); - td->td_ksegrp->kg_pri_class = PRI_ITHD; + td->td_pri_class = PRI_ITHD; TD_SET_IWAIT(td); mtx_unlock_spin(&sched_lock); td->td_pflags |= TDP_ITHREAD; Index: kern/kern_kse.c =================================================================== RCS file: kern/kern_kse.c diff -N kern/kern_kse.c --- kern/kern_kse.c 22 Feb 2006 18:57:49 -0000 1.223 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,1481 +0,0 @@ -/*- - * Copyright (C) 2001 Julian Elischer . - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice(s), this list of conditions and the following disclaimer as - * the first lines of this file unmodified other than the possible - * addition of one or more copyright notices. - * 2. Redistributions in binary form must reproduce the above copyright - * notice(s), this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - */ - -#include -__FBSDID("$FreeBSD: src/sys/kern/kern_kse.c,v 1.223 2006/02/22 18:57:49 jhb Exp $"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * KSEGRP related storage. - */ -static uma_zone_t upcall_zone; - -/* DEBUG ONLY */ -extern int virtual_cpu; -extern int thread_debug; - -extern int max_threads_per_proc; -extern int max_groups_per_proc; -extern int max_threads_hits; -extern struct mtx kse_zombie_lock; - - -TAILQ_HEAD(, kse_upcall) zombie_upcalls = - TAILQ_HEAD_INITIALIZER(zombie_upcalls); - -static int thread_update_usr_ticks(struct thread *td); -static void thread_alloc_spare(struct thread *td); - -struct kse_upcall * -upcall_alloc(void) -{ - struct kse_upcall *ku; - - ku = uma_zalloc(upcall_zone, M_WAITOK | M_ZERO); - return (ku); -} - -void -upcall_free(struct kse_upcall *ku) -{ - - uma_zfree(upcall_zone, ku); -} - -void -upcall_link(struct kse_upcall *ku, struct ksegrp *kg) -{ - - mtx_assert(&sched_lock, MA_OWNED); - TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link); - ku->ku_ksegrp = kg; - kg->kg_numupcalls++; -} - -void -upcall_unlink(struct kse_upcall *ku) -{ - struct ksegrp *kg = ku->ku_ksegrp; - - mtx_assert(&sched_lock, MA_OWNED); - KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__)); - TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link); - kg->kg_numupcalls--; - upcall_stash(ku); -} - -void -upcall_remove(struct thread *td) -{ - - mtx_assert(&sched_lock, MA_OWNED); - if (td->td_upcall != NULL) { - td->td_upcall->ku_owner = NULL; - upcall_unlink(td->td_upcall); - td->td_upcall = NULL; - } -} - -#ifndef _SYS_SYSPROTO_H_ -struct kse_switchin_args { - struct kse_thr_mailbox *tmbx; - int flags; -}; -#endif - -int -kse_switchin(struct thread *td, struct kse_switchin_args *uap) -{ - struct kse_thr_mailbox tmbx; - struct kse_upcall *ku; - int error; - - if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td)) - return (EINVAL); - error = (uap->tmbx == NULL) ? EINVAL : 0; - if (!error) - error = copyin(uap->tmbx, &tmbx, sizeof(tmbx)); - if (!error && (uap->flags & KSE_SWITCHIN_SETTMBX)) - error = (suword(&ku->ku_mailbox->km_curthread, - (long)uap->tmbx) != 0 ? EINVAL : 0); - if (!error) - error = set_mcontext(td, &tmbx.tm_context.uc_mcontext); - if (!error) { - suword32(&uap->tmbx->tm_lwp, td->td_tid); - if (uap->flags & KSE_SWITCHIN_SETTMBX) { - td->td_mailbox = uap->tmbx; - td->td_pflags |= TDP_CAN_UNBIND; - } - PROC_LOCK(td->td_proc); - if (td->td_proc->p_flag & P_TRACED) { - _PHOLD(td->td_proc); - if (tmbx.tm_dflags & TMDF_SSTEP) - ptrace_single_step(td); - else - ptrace_clear_single_step(td); - if (tmbx.tm_dflags & TMDF_SUSPEND) { - mtx_lock_spin(&sched_lock); - /* fuword can block, check again */ - if (td->td_upcall) - ku->ku_flags |= KUF_DOUPCALL; - mtx_unlock_spin(&sched_lock); - } - _PRELE(td->td_proc); - } - PROC_UNLOCK(td->td_proc); - } - return ((error == 0) ? EJUSTRETURN : error); -} - -/* -struct kse_thr_interrupt_args { - struct kse_thr_mailbox * tmbx; - int cmd; - long data; -}; -*/ -int -kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap) -{ - struct kse_execve_args args; - struct image_args iargs; - struct proc *p; - struct thread *td2; - struct kse_upcall *ku; - struct kse_thr_mailbox *tmbx; - uint32_t flags; - int error; - - p = td->td_proc; - - if (!(p->p_flag & P_SA)) - return (EINVAL); - - switch (uap->cmd) { - case KSE_INTR_SENDSIG: - if (uap->data < 0 || uap->data > _SIG_MAXSIG) - return (EINVAL); - case KSE_INTR_INTERRUPT: - case KSE_INTR_RESTART: - PROC_LOCK(p); - mtx_lock_spin(&sched_lock); - FOREACH_THREAD_IN_PROC(p, td2) { - if (td2->td_mailbox == uap->tmbx) - break; - } - if (td2 == NULL) { - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); - return (ESRCH); - } - if (uap->cmd == KSE_INTR_SENDSIG) { - if (uap->data > 0) { - td2->td_flags &= ~TDF_INTERRUPT; - mtx_unlock_spin(&sched_lock); - tdsignal(p, td2, (int)uap->data, NULL); - } else { - mtx_unlock_spin(&sched_lock); - } - } else { - td2->td_flags |= TDF_INTERRUPT | TDF_ASTPENDING; - if (TD_CAN_UNBIND(td2)) - td2->td_upcall->ku_flags |= KUF_DOUPCALL; - if (uap->cmd == KSE_INTR_INTERRUPT) - td2->td_intrval = EINTR; - else - td2->td_intrval = ERESTART; - if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) - sleepq_abort(td2, td2->td_intrval); - mtx_unlock_spin(&sched_lock); - } - PROC_UNLOCK(p); - break; - case KSE_INTR_SIGEXIT: - if (uap->data < 1 || uap->data > _SIG_MAXSIG) - return (EINVAL); - PROC_LOCK(p); - sigexit(td, (int)uap->data); - break; - - case KSE_INTR_DBSUSPEND: - /* this sub-function is only for bound thread */ - if (td->td_pflags & TDP_SA) - return (EINVAL); - ku = td->td_upcall; - tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread); - if (tmbx == NULL || tmbx == (void *)-1) - return (EINVAL); - flags = 0; - while ((p->p_flag & P_TRACED) && !(p->p_flag & P_SINGLE_EXIT)) { - flags = fuword32(&tmbx->tm_dflags); - if (!(flags & TMDF_SUSPEND)) - break; - PROC_LOCK(p); - mtx_lock_spin(&sched_lock); - thread_stopped(p); - thread_suspend_one(td); - PROC_UNLOCK(p); - mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); - } - return (0); - - case KSE_INTR_EXECVE: - error = copyin((void *)uap->data, &args, sizeof(args)); - if (error) - return (error); - error = exec_copyin_args(&iargs, args.path, UIO_USERSPACE, - args.argv, args.envp); - if (error == 0) - error = kern_execve(td, &iargs, NULL); - if (error == 0) { - PROC_LOCK(p); - SIGSETOR(td->td_siglist, args.sigpend); - PROC_UNLOCK(p); - kern_sigprocmask(td, SIG_SETMASK, &args.sigmask, NULL, - 0); - } - return (error); - - default: - return (EINVAL); - } - return (0); -} - -/* -struct kse_exit_args { - register_t dummy; -}; -*/ -int -kse_exit(struct thread *td, struct kse_exit_args *uap) -{ - struct proc *p; - struct ksegrp *kg; - struct kse_upcall *ku, *ku2; - int error, count; - - p = td->td_proc; - /* - * Ensure that this is only called from the UTS - */ - if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td)) - return (EINVAL); - - kg = td->td_ksegrp; - count = 0; - - /* - * Calculate the existing non-exiting upcalls in this ksegroup. - * If we are the last upcall but there are still other threads, - * then do not exit. We need the other threads to be able to - * complete whatever they are doing. - * XXX This relies on the userland knowing what to do if we return. - * It may be a better choice to convert ourselves into a kse_release - * ( or similar) and wait in the kernel to be needed. - */ - PROC_LOCK(p); - mtx_lock_spin(&sched_lock); - FOREACH_UPCALL_IN_GROUP(kg, ku2) { - if (ku2->ku_flags & KUF_EXITING) - count++; - } - if ((kg->kg_numupcalls - count) == 1 && - (kg->kg_numthreads > 1)) { - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); - return (EDEADLK); - } - ku->ku_flags |= KUF_EXITING; - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); - - /* - * Mark the UTS mailbox as having been finished with. - * If that fails then just go for a segfault. - * XXX need to check it that can be deliverred without a mailbox. - */ - error = suword32(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE); - if (!(td->td_pflags & TDP_SA)) - if (suword32(&td->td_mailbox->tm_lwp, 0)) - error = EFAULT; - PROC_LOCK(p); - if (error) - psignal(p, SIGSEGV); - sigqueue_flush(&td->td_sigqueue); - mtx_lock_spin(&sched_lock); - upcall_remove(td); - if (p->p_numthreads != 1) { - /* - * If we are not the last thread, but we are the last - * thread in this ksegrp, then by definition this is not - * the last group and we need to clean it up as well. - * thread_exit will clean up the kseg as needed. - */ - thread_stopped(p); - thread_exit(); - /* NOTREACHED */ - } - /* - * This is the last thread. Just return to the user. - * We know that there is only one ksegrp too, as any others - * would have been discarded in previous calls to thread_exit(). - * Effectively we have left threading mode.. - * The only real thing left to do is ensure that the - * scheduler sets out concurrency back to 1 as that may be a - * resource leak otherwise. - * This is an A[PB]I issue.. what SHOULD we do? - * One possibility is to return to the user. It may not cope well. - * The other possibility would be to let the process exit. - */ - thread_unthread(td); - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); -#if 1 - return (0); -#else - exit1(td, 0); -#endif -} - -/* - * Either becomes an upcall or waits for an awakening event and - * then becomes an upcall. Only error cases return. - */ -/* -struct kse_release_args { - struct timespec *timeout; -}; -*/ -int -kse_release(struct thread *td, struct kse_release_args *uap) -{ - struct proc *p; - struct ksegrp *kg; - struct kse_upcall *ku; - struct timespec timeout; - struct timeval tv; - sigset_t sigset; - int error; - - p = td->td_proc; - kg = td->td_ksegrp; - if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td)) - return (EINVAL); - if (uap->timeout != NULL) { - if ((error = copyin(uap->timeout, &timeout, sizeof(timeout)))) - return (error); - TIMESPEC_TO_TIMEVAL(&tv, &timeout); - } - if (td->td_pflags & TDP_SA) - td->td_pflags |= TDP_UPCALLING; - else { - ku->ku_mflags = fuword32(&ku->ku_mailbox->km_flags); - if (ku->ku_mflags == -1) { - PROC_LOCK(p); - sigexit(td, SIGSEGV); - } - } - PROC_LOCK(p); - if (ku->ku_mflags & KMF_WAITSIGEVENT) { - /* UTS wants to wait for signal event */ - if (!(p->p_flag & P_SIGEVENT) && - !(ku->ku_flags & KUF_DOUPCALL)) { - td->td_kflags |= TDK_KSERELSIG; - error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH, - "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0)); - td->td_kflags &= ~(TDK_KSERELSIG | TDK_WAKEUP); - } - p->p_flag &= ~P_SIGEVENT; - sigset = p->p_siglist; - PROC_UNLOCK(p); - error = copyout(&sigset, &ku->ku_mailbox->km_sigscaught, - sizeof(sigset)); - } else { - if ((ku->ku_flags & KUF_DOUPCALL) == 0 && - ((ku->ku_mflags & KMF_NOCOMPLETED) || - (kg->kg_completed == NULL))) { - kg->kg_upsleeps++; - td->td_kflags |= TDK_KSEREL; - error = msleep(&kg->kg_completed, &p->p_mtx, - PPAUSE|PCATCH, "kserel", - (uap->timeout ? tvtohz(&tv) : 0)); - td->td_kflags &= ~(TDK_KSEREL | TDK_WAKEUP); - kg->kg_upsleeps--; - } - PROC_UNLOCK(p); - } - if (ku->ku_flags & KUF_DOUPCALL) { - mtx_lock_spin(&sched_lock); - ku->ku_flags &= ~KUF_DOUPCALL; - mtx_unlock_spin(&sched_lock); - } - return (0); -} - -/* struct kse_wakeup_args { - struct kse_mailbox *mbx; -}; */ -int -kse_wakeup(struct thread *td, struct kse_wakeup_args *uap) -{ - struct proc *p; - struct ksegrp *kg; - struct kse_upcall *ku; - struct thread *td2; - - p = td->td_proc; - td2 = NULL; - ku = NULL; - /* KSE-enabled processes only, please. */ - if (!(p->p_flag & P_SA)) - return (EINVAL); - PROC_LOCK(p); - mtx_lock_spin(&sched_lock); - if (uap->mbx) { - FOREACH_KSEGRP_IN_PROC(p, kg) { - FOREACH_UPCALL_IN_GROUP(kg, ku) { - if (ku->ku_mailbox == uap->mbx) - break; - } - if (ku) - break; - } - } else { - kg = td->td_ksegrp; - if (kg->kg_upsleeps) { - mtx_unlock_spin(&sched_lock); - wakeup(&kg->kg_completed); - PROC_UNLOCK(p); - return (0); - } - ku = TAILQ_FIRST(&kg->kg_upcalls); - } - if (ku == NULL) { - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); - return (ESRCH); - } - if ((td2 = ku->ku_owner) == NULL) { - mtx_unlock_spin(&sched_lock); - panic("%s: no owner", __func__); - } else if (td2->td_kflags & (TDK_KSEREL | TDK_KSERELSIG)) { - mtx_unlock_spin(&sched_lock); - if (!(td2->td_kflags & TDK_WAKEUP)) { - td2->td_kflags |= TDK_WAKEUP; - if (td2->td_kflags & TDK_KSEREL) - sleepq_remove(td2, &kg->kg_completed); - else - sleepq_remove(td2, &p->p_siglist); - } - } else { - ku->ku_flags |= KUF_DOUPCALL; - mtx_unlock_spin(&sched_lock); - } - PROC_UNLOCK(p); - return (0); -} - -/* - * No new KSEG: first call: use current KSE, don't schedule an upcall - * All other situations, do allocate max new KSEs and schedule an upcall. - * - * XXX should be changed so that 'first' behaviour lasts for as long - * as you have not made a kse in this ksegrp. i.e. as long as we do not have - * a mailbox.. - */ -/* struct kse_create_args { - struct kse_mailbox *mbx; - int newgroup; -}; */ -int -kse_create(struct thread *td, struct kse_create_args *uap) -{ - struct ksegrp *newkg; - struct ksegrp *kg; - struct proc *p; - struct kse_mailbox mbx; - struct kse_upcall *newku; - int err, ncpus, sa = 0, first = 0; - struct thread *newtd; - - p = td->td_proc; - kg = td->td_ksegrp; - if ((err = copyin(uap->mbx, &mbx, sizeof(mbx)))) - return (err); - - ncpus = mp_ncpus; - if (virtual_cpu != 0) - ncpus = virtual_cpu; - /* - * If the new UTS mailbox says that this - * will be a BOUND lwp, then it had better - * have its thread mailbox already there. - * In addition, this ksegrp will be limited to - * a concurrency of 1. There is more on this later. - */ - if (mbx.km_flags & KMF_BOUND) { - if (mbx.km_curthread == NULL) - return (EINVAL); - ncpus = 1; - } else { - sa = TDP_SA; - } - - PROC_LOCK(p); - /* - * Processes using the other threading model can't - * suddenly start calling this one - */ - if ((p->p_flag & (P_SA|P_HADTHREADS)) == P_HADTHREADS) { - PROC_UNLOCK(p); - return (EINVAL); - } - - /* - * Limit it to NCPU upcall contexts per ksegrp in any case. - * There is a small race here as we don't hold proclock - * until we inc the ksegrp count, but it's not really a big problem - * if we get one too many, but we save a proc lock. - */ - if ((!uap->newgroup) && (kg->kg_numupcalls >= ncpus)) { - PROC_UNLOCK(p); - return (EPROCLIM); - } - - if (!(p->p_flag & P_SA)) { - first = 1; - p->p_flag |= P_SA|P_HADTHREADS; - } - - PROC_UNLOCK(p); - /* - * Now pay attention! - * If we are going to be bound, then we need to be either - * a new group, or the first call ever. In either - * case we will be creating (or be) the only thread in a group. - * and the concurrency will be set to 1. - * This is not quite right, as we may still make ourself - * bound after making other ksegrps but it will do for now. - * The library will only try do this much. - */ - if (!sa && !(uap->newgroup || first)) - return (EINVAL); - - if (uap->newgroup) { - newkg = ksegrp_alloc(); - bzero(&newkg->kg_startzero, - __rangeof(struct ksegrp, kg_startzero, kg_endzero)); - bcopy(&kg->kg_startcopy, &newkg->kg_startcopy, - __rangeof(struct ksegrp, kg_startcopy, kg_endcopy)); - sched_init_concurrency(newkg); - PROC_LOCK(p); - if (p->p_numksegrps >= max_groups_per_proc) { - PROC_UNLOCK(p); - ksegrp_free(newkg); - return (EPROCLIM); - } - ksegrp_link(newkg, p); - mtx_lock_spin(&sched_lock); - sched_fork_ksegrp(td, newkg); - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); - } else { - /* - * We want to make a thread in our own ksegrp. - * If we are just the first call, either kind - * is ok, but if not then either we must be - * already an upcallable thread to make another, - * or a bound thread to make one of those. - * Once again, not quite right but good enough for now.. XXXKSE - */ - if (!first && ((td->td_pflags & TDP_SA) != sa)) - return (EINVAL); - - newkg = kg; - } - - /* - * This test is a bit "indirect". - * It might simplify things if we made a direct way of testing - * if a ksegrp has been worked on before. - * In the case of a bound request and the concurrency being set to - * one, the concurrency will already be 1 so it's just inefficient - * but not dangerous to call this again. XXX - */ - if (newkg->kg_numupcalls == 0) { - /* - * Initialize KSE group with the appropriate - * concurrency. - * - * For a multiplexed group, create as as much concurrency - * as the number of physical cpus. - * This increases concurrency in the kernel even if the - * userland is not MP safe and can only run on a single CPU. - * In an ideal world, every physical cpu should execute a - * thread. If there is enough concurrency, threads in the - * kernel can be executed parallel on different cpus at - * full speed without being restricted by the number of - * upcalls the userland provides. - * Adding more upcall structures only increases concurrency - * in userland. - * - * For a bound thread group, because there is only one thread - * in the group, we only set the concurrency for the group - * to 1. A thread in this kind of group will never schedule - * an upcall when blocked. This simulates pthread system - * scope thread behaviour. - */ - sched_set_concurrency(newkg, ncpus); - } - /* - * Even bound LWPs get a mailbox and an upcall to hold it. - */ - newku = upcall_alloc(); - newku->ku_mailbox = uap->mbx; - newku->ku_func = mbx.km_func; - bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t)); - - /* - * For the first call this may not have been set. - * Of course nor may it actually be needed. - */ - if (td->td_standin == NULL) - thread_alloc_spare(td); - - PROC_LOCK(p); - mtx_lock_spin(&sched_lock); - if (newkg->kg_numupcalls >= ncpus) { - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); - upcall_free(newku); - return (EPROCLIM); - } - - /* - * If we are the first time, and a normal thread, - * then transfer all the signals back to the 'process'. - * SA threading will make a special thread to handle them. - */ - if (first && sa) { - sigqueue_move_set(&td->td_sigqueue, &p->p_sigqueue, - &td->td_sigqueue.sq_signals); - SIGFILLSET(td->td_sigmask); - SIG_CANTMASK(td->td_sigmask); - } - - /* - * Make the new upcall available to the ksegrp. - * It may or may not use it, but it's available. - */ - upcall_link(newku, newkg); - PROC_UNLOCK(p); - if (mbx.km_quantum) - newkg->kg_upquantum = max(1, mbx.km_quantum / tick); - - /* - * Each upcall structure has an owner thread, find which - * one owns it. - */ - if (uap->newgroup) { - /* - * Because the new ksegrp hasn't a thread, - * create an initial upcall thread to own it. - */ - newtd = thread_schedule_upcall(td, newku); - } else { - /* - * If the current thread hasn't an upcall structure, - * just assign the upcall to it. - * It'll just return. - */ - if (td->td_upcall == NULL) { - newku->ku_owner = td; - td->td_upcall = newku; - newtd = td; - } else { - /* - * Create a new upcall thread to own it. - */ - newtd = thread_schedule_upcall(td, newku); - } - } - mtx_unlock_spin(&sched_lock); - - /* - * Let the UTS instance know its LWPID. - * It doesn't really care. But the debugger will. - */ - suword32(&newku->ku_mailbox->km_lwp, newtd->td_tid); - - /* - * In the same manner, if the UTS has a current user thread, - * then it is also running on this LWP so set it as well. - * The library could do that of course.. but why not.. - */ - if (mbx.km_curthread) - suword32(&mbx.km_curthread->tm_lwp, newtd->td_tid); - - - if (sa) { - newtd->td_pflags |= TDP_SA; - } else { - newtd->td_pflags &= ~TDP_SA; - - /* - * Since a library will use the mailbox pointer to - * identify even a bound thread, and the mailbox pointer - * will never be allowed to change after this syscall - * for a bound thread, set it here so the library can - * find the thread after the syscall returns. - */ - newtd->td_mailbox = mbx.km_curthread; - - if (newtd != td) { - /* - * If we did create a new thread then - * make sure it goes to the right place - * when it starts up, and make sure that it runs - * at full speed when it gets there. - * thread_schedule_upcall() copies all cpu state - * to the new thread, so we should clear single step - * flag here. - */ - cpu_set_upcall_kse(newtd, newku->ku_func, - newku->ku_mailbox, &newku->ku_stack); - PROC_LOCK(p); - if (p->p_flag & P_TRACED) { - _PHOLD(p); - ptrace_clear_single_step(newtd); - _PRELE(p); - } - PROC_UNLOCK(p); - } - } - - /* - * If we are starting a new thread, kick it off. - */ - if (newtd != td) { - mtx_lock_spin(&sched_lock); - setrunqueue(newtd, SRQ_BORING); - mtx_unlock_spin(&sched_lock); - } - return (0); -} - -/* - * Initialize global thread allocation resources. - */ -void -kseinit(void) -{ - - upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall), - NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); -} - -/* - * Stash an embarasingly extra upcall into the zombie upcall queue. - */ - -void -upcall_stash(struct kse_upcall *ku) -{ - mtx_lock_spin(&kse_zombie_lock); - TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link); - mtx_unlock_spin(&kse_zombie_lock); -} - -/* - * Reap zombie kse resource. - */ -void -kse_GC(void) -{ - struct kse_upcall *ku_first, *ku_next; - - /* - * Don't even bother to lock if none at this instant, - * we really don't care about the next instant.. - */ - if (!TAILQ_EMPTY(&zombie_upcalls)) { - mtx_lock_spin(&kse_zombie_lock); - ku_first = TAILQ_FIRST(&zombie_upcalls); - if (ku_first) - TAILQ_INIT(&zombie_upcalls); - mtx_unlock_spin(&kse_zombie_lock); - while (ku_first) { - ku_next = TAILQ_NEXT(ku_first, ku_link); - upcall_free(ku_first); - ku_first = ku_next; - } - } -} - -/* - * Store the thread context in the UTS's mailbox. - * then add the mailbox at the head of a list we are building in user space. - * The list is anchored in the ksegrp structure. - */ -int -thread_export_context(struct thread *td, int willexit) -{ - struct proc *p; - struct ksegrp *kg; - uintptr_t mbx; - void *addr; - int error = 0, sig; - mcontext_t mc; - - p = td->td_proc; - kg = td->td_ksegrp; - - /* - * Post sync signal, or process SIGKILL and SIGSTOP. - * For sync signal, it is only possible when the signal is not - * caught by userland or process is being debugged. - */ - PROC_LOCK(p); - if (td->td_flags & TDF_NEEDSIGCHK) { - mtx_lock_spin(&sched_lock); - td->td_flags &= ~TDF_NEEDSIGCHK; - mtx_unlock_spin(&sched_lock); - mtx_lock(&p->p_sigacts->ps_mtx); - while ((sig = cursig(td)) != 0) - postsig(sig); - mtx_unlock(&p->p_sigacts->ps_mtx); - } - if (willexit) - SIGFILLSET(td->td_sigmask); - PROC_UNLOCK(p); - - /* Export the user/machine context. */ - get_mcontext(td, &mc, 0); - addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext); - error = copyout(&mc, addr, sizeof(mcontext_t)); - if (error) - goto bad; - - addr = (caddr_t)(&td->td_mailbox->tm_lwp); - if (suword32(addr, 0)) { - error = EFAULT; - goto bad; - } - - /* Get address in latest mbox of list pointer */ - addr = (void *)(&td->td_mailbox->tm_next); - /* - * Put the saved address of the previous first - * entry into this one - */ - for (;;) { - mbx = (uintptr_t)kg->kg_completed; - if (suword(addr, mbx)) { - error = EFAULT; - goto bad; - } - PROC_LOCK(p); - if (mbx == (uintptr_t)kg->kg_completed) { - kg->kg_completed = td->td_mailbox; - /* - * The thread context may be taken away by - * other upcall threads when we unlock - * process lock. it's no longer valid to - * use it again in any other places. - */ - td->td_mailbox = NULL; - PROC_UNLOCK(p); - break; - } - PROC_UNLOCK(p); - } - td->td_usticks = 0; - return (0); - -bad: - PROC_LOCK(p); - sigexit(td, SIGILL); - return (error); -} - -/* - * Take the list of completed mailboxes for this KSEGRP and put them on this - * upcall's mailbox as it's the next one going up. - */ -static int -thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku) -{ - struct proc *p = kg->kg_proc; - void *addr; - uintptr_t mbx; - - addr = (void *)(&ku->ku_mailbox->km_completed); - for (;;) { - mbx = (uintptr_t)kg->kg_completed; - if (suword(addr, mbx)) { - PROC_LOCK(p); - psignal(p, SIGSEGV); - PROC_UNLOCK(p); - return (EFAULT); - } - PROC_LOCK(p); - if (mbx == (uintptr_t)kg->kg_completed) { - kg->kg_completed = NULL; - PROC_UNLOCK(p); - break; - } - PROC_UNLOCK(p); - } - return (0); -} - -/* - * This function should be called at statclock interrupt time - */ -int -thread_statclock(int user) -{ - struct thread *td = curthread; - - if (!(td->td_pflags & TDP_SA)) - return (0); - if (user) { - /* Current always do via ast() */ - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_ASTPENDING; - mtx_unlock_spin(&sched_lock); - td->td_uuticks++; - } else if (td->td_mailbox != NULL) - td->td_usticks++; - return (0); -} - -/* - * Export state clock ticks for userland - */ -static int -thread_update_usr_ticks(struct thread *td) -{ - struct proc *p = td->td_proc; - caddr_t addr; - u_int uticks; - - if (td->td_mailbox == NULL) - return (-1); - - if ((uticks = td->td_uuticks) != 0) { - td->td_uuticks = 0; - addr = (caddr_t)&td->td_mailbox->tm_uticks; - if (suword32(addr, uticks+fuword32(addr))) - goto error; - } - if ((uticks = td->td_usticks) != 0) { - td->td_usticks = 0; - addr = (caddr_t)&td->td_mailbox->tm_sticks; - if (suword32(addr, uticks+fuword32(addr))) - goto error; - } - return (0); - -error: - PROC_LOCK(p); - psignal(p, SIGSEGV); - PROC_UNLOCK(p); - return (-2); -} - -/* - * This function is intended to be used to initialize a spare thread - * for upcall. Initialize thread's large data area outside sched_lock - * for thread_schedule_upcall(). The crhold is also here to get it out - * from the schedlock as it has a mutex op itself. - * XXX BUG.. we need to get the cr ref after the thread has - * checked and chenged its own, not 6 months before... - */ -void -thread_alloc_spare(struct thread *td) -{ - struct thread *spare; - - if (td->td_standin) - return; - spare = thread_alloc(); - td->td_standin = spare; - bzero(&spare->td_startzero, - __rangeof(struct thread, td_startzero, td_endzero)); - spare->td_proc = td->td_proc; - spare->td_ucred = crhold(td->td_ucred); -} - -/* - * Create a thread and schedule it for upcall on the KSE given. - * Use our thread's standin so that we don't have to allocate one. - */ -struct thread * -thread_schedule_upcall(struct thread *td, struct kse_upcall *ku) -{ - struct thread *td2; - - mtx_assert(&sched_lock, MA_OWNED); - - /* - * Schedule an upcall thread on specified kse_upcall, - * the kse_upcall must be free. - * td must have a spare thread. - */ - KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__)); - if ((td2 = td->td_standin) != NULL) { - td->td_standin = NULL; - } else { - panic("no reserve thread when scheduling an upcall"); - return (NULL); - } - CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)", - td2, td->td_proc->p_pid, td->td_proc->p_comm); - /* - * Bzero already done in thread_alloc_spare() because we can't - * do the crhold here because we are in schedlock already. - */ - bcopy(&td->td_startcopy, &td2->td_startcopy, - __rangeof(struct thread, td_startcopy, td_endcopy)); - thread_link(td2, ku->ku_ksegrp); - /* inherit parts of blocked thread's context as a good template */ - cpu_set_upcall(td2, td); - /* Let the new thread become owner of the upcall */ - ku->ku_owner = td2; - td2->td_upcall = ku; - td2->td_flags = 0; - td2->td_pflags = TDP_SA|TDP_UPCALLING; - td2->td_state = TDS_CAN_RUN; - td2->td_inhibitors = 0; - SIGFILLSET(td2->td_sigmask); - SIG_CANTMASK(td2->td_sigmask); - sched_fork_thread(td, td2); - return (td2); /* bogus.. should be a void function */ -} - -/* - * It is only used when thread generated a trap and process is being - * debugged. - */ -void -thread_signal_add(struct thread *td, ksiginfo_t *ksi) -{ - struct proc *p; - struct sigacts *ps; - int error; - - p = td->td_proc; - PROC_LOCK_ASSERT(p, MA_OWNED); - ps = p->p_sigacts; - mtx_assert(&ps->ps_mtx, MA_OWNED); - - mtx_unlock(&ps->ps_mtx); - SIGADDSET(td->td_sigmask, ksi->ksi_signo); - PROC_UNLOCK(p); - error = copyout(&ksi->ksi_info, &td->td_mailbox->tm_syncsig, - sizeof(siginfo_t)); - if (error) { - PROC_LOCK(p); - sigexit(td, SIGSEGV); - } - PROC_LOCK(p); - mtx_lock(&ps->ps_mtx); -} -#include "opt_sched.h" -struct thread * -thread_switchout(struct thread *td, int flags, struct thread *nextthread) -{ - struct kse_upcall *ku; - struct thread *td2; - - mtx_assert(&sched_lock, MA_OWNED); - - /* - * If the outgoing thread is in threaded group and has never - * scheduled an upcall, decide whether this is a short - * or long term event and thus whether or not to schedule - * an upcall. - * If it is a short term event, just suspend it in - * a way that takes its KSE with it. - * Select the events for which we want to schedule upcalls. - * For now it's just sleep or if thread is suspended but - * process wide suspending flag is not set (debugger - * suspends thread). - * XXXKSE eventually almost any inhibition could do. - */ - if (TD_CAN_UNBIND(td) && (td->td_standin) && - (TD_ON_SLEEPQ(td) || (TD_IS_SUSPENDED(td) && - !P_SHOULDSTOP(td->td_proc)))) { - /* - * Release ownership of upcall, and schedule an upcall - * thread, this new upcall thread becomes the owner of - * the upcall structure. It will be ahead of us in the - * run queue, so as we are stopping, it should either - * start up immediatly, or at least before us if - * we release our slot. - */ - ku = td->td_upcall; - ku->ku_owner = NULL; - td->td_upcall = NULL; - td->td_pflags &= ~TDP_CAN_UNBIND; - td2 = thread_schedule_upcall(td, ku); - if (flags & SW_INVOL || nextthread) { - setrunqueue(td2, SRQ_YIELDING); - } else { - /* Keep up with reality.. we have one extra thread - * in the picture.. and it's 'running'. - */ - return td2; - } - } - return (nextthread); -} - -/* - * Setup done on the thread when it enters the kernel. - */ -void -thread_user_enter(struct thread *td) -{ - struct proc *p = td->td_proc; - struct ksegrp *kg; - struct kse_upcall *ku; - struct kse_thr_mailbox *tmbx; - uint32_t flags; - - /* - * First check that we shouldn't just abort. we - * can suspend it here or just exit. - */ - if (__predict_false(P_SHOULDSTOP(p))) { - PROC_LOCK(p); - thread_suspend_check(0); - PROC_UNLOCK(p); - } - - if (!(td->td_pflags & TDP_SA)) - return; - - /* - * If we are doing a syscall in a KSE environment, - * note where our mailbox is. - */ - - kg = td->td_ksegrp; - ku = td->td_upcall; - - KASSERT(ku != NULL, ("no upcall owned")); - KASSERT(ku->ku_owner == td, ("wrong owner")); - KASSERT(!TD_CAN_UNBIND(td), ("can unbind")); - - if (td->td_standin == NULL) - thread_alloc_spare(td); - ku->ku_mflags = fuword32((void *)&ku->ku_mailbox->km_flags); - tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread); - if ((tmbx == NULL) || (tmbx == (void *)-1L) || - (ku->ku_mflags & KMF_NOUPCALL)) { - td->td_mailbox = NULL; - } else { - flags = fuword32(&tmbx->tm_flags); - /* - * On some architectures, TP register points to thread - * mailbox but not points to kse mailbox, and userland - * can not atomically clear km_curthread, but can - * use TP register, and set TMF_NOUPCALL in thread - * flag to indicate a critical region. - */ - if (flags & TMF_NOUPCALL) { - td->td_mailbox = NULL; - } else { - td->td_mailbox = tmbx; - td->td_pflags |= TDP_CAN_UNBIND; - if (__predict_false(p->p_flag & P_TRACED)) { - flags = fuword32(&tmbx->tm_dflags); - if (flags & TMDF_SUSPEND) { - mtx_lock_spin(&sched_lock); - /* fuword can block, check again */ - if (td->td_upcall) - ku->ku_flags |= KUF_DOUPCALL; - mtx_unlock_spin(&sched_lock); - } - } - } - } -} - -/* - * The extra work we go through if we are a threaded process when we - * return to userland. - * - * If we are a KSE process and returning to user mode, check for - * extra work to do before we return (e.g. for more syscalls - * to complete first). If we were in a critical section, we should - * just return to let it finish. Same if we were in the UTS (in - * which case the mailbox's context's busy indicator will be set). - * The only traps we suport will have set the mailbox. - * We will clear it here. - */ -int -thread_userret(struct thread *td, struct trapframe *frame) -{ - struct kse_upcall *ku; - struct ksegrp *kg, *kg2; - struct proc *p; - struct timespec ts; - int error = 0, upcalls, uts_crit; - - /* Nothing to do with bound thread */ - if (!(td->td_pflags & TDP_SA)) - return (0); - - /* - * Update stat clock count for userland - */ - if (td->td_mailbox != NULL) { - thread_update_usr_ticks(td); - uts_crit = 0; - } else { - uts_crit = 1; - } - - p = td->td_proc; - kg = td->td_ksegrp; - ku = td->td_upcall; - - /* - * Optimisation: - * This thread has not started any upcall. - * If there is no work to report other than ourself, - * then it can return direct to userland. - */ - if (TD_CAN_UNBIND(td)) { - td->td_pflags &= ~TDP_CAN_UNBIND; - if ((td->td_flags & TDF_NEEDSIGCHK) == 0 && - (kg->kg_completed == NULL) && - (ku->ku_flags & KUF_DOUPCALL) == 0 && - (kg->kg_upquantum && ticks < kg->kg_nextupcall)) { - nanotime(&ts); - error = copyout(&ts, - (caddr_t)&ku->ku_mailbox->km_timeofday, - sizeof(ts)); - td->td_mailbox = 0; - ku->ku_mflags = 0; - if (error) - goto out; - return (0); - } - thread_export_context(td, 0); - /* - * There is something to report, and we own an upcall - * structure, we can go to userland. - * Turn ourself into an upcall thread. - */ - td->td_pflags |= TDP_UPCALLING; - } else if (td->td_mailbox && (ku == NULL)) { - thread_export_context(td, 1); - PROC_LOCK(p); - if (kg->kg_upsleeps) - wakeup(&kg->kg_completed); - WITNESS_WARN(WARN_PANIC, &p->p_mtx.mtx_object, - "thread exiting in userret"); - sigqueue_flush(&td->td_sigqueue); - mtx_lock_spin(&sched_lock); - thread_stopped(p); - thread_exit(); - /* NOTREACHED */ - } - - KASSERT(ku != NULL, ("upcall is NULL")); - KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind")); - - if (p->p_numthreads > max_threads_per_proc) { - max_threads_hits++; - PROC_LOCK(p); - mtx_lock_spin(&sched_lock); - p->p_maxthrwaits++; - while (p->p_numthreads > max_threads_per_proc) { - upcalls = 0; - FOREACH_KSEGRP_IN_PROC(p, kg2) { - if (kg2->kg_numupcalls == 0) - upcalls++; - else - upcalls += kg2->kg_numupcalls; - } - if (upcalls >= max_threads_per_proc) - break; - mtx_unlock_spin(&sched_lock); - if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH, - "maxthreads", hz/10) != EWOULDBLOCK) { - mtx_lock_spin(&sched_lock); - break; - } else { - mtx_lock_spin(&sched_lock); - } - } - p->p_maxthrwaits--; - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); - } - - if (td->td_pflags & TDP_UPCALLING) { - uts_crit = 0; - kg->kg_nextupcall = ticks + kg->kg_upquantum; - /* - * There is no more work to do and we are going to ride - * this thread up to userland as an upcall. - * Do the last parts of the setup needed for the upcall. - */ - CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)", - td, td->td_proc->p_pid, td->td_proc->p_comm); - - td->td_pflags &= ~TDP_UPCALLING; - if (ku->ku_flags & KUF_DOUPCALL) { - mtx_lock_spin(&sched_lock); - ku->ku_flags &= ~KUF_DOUPCALL; - mtx_unlock_spin(&sched_lock); - } - /* - * Set user context to the UTS - */ - if (!(ku->ku_mflags & KMF_NOUPCALL)) { - cpu_set_upcall_kse(td, ku->ku_func, ku->ku_mailbox, - &ku->ku_stack); - PROC_LOCK(p); - if (p->p_flag & P_TRACED) { - _PHOLD(p); - ptrace_clear_single_step(td); - _PRELE(p); - } - PROC_UNLOCK(p); - error = suword32(&ku->ku_mailbox->km_lwp, - td->td_tid); - if (error) - goto out; - error = suword(&ku->ku_mailbox->km_curthread, 0); - if (error) - goto out; - } - - /* - * Unhook the list of completed threads. - * anything that completes after this gets to - * come in next time. - * Put the list of completed thread mailboxes on - * this KSE's mailbox. - */ - if (!(ku->ku_mflags & KMF_NOCOMPLETED) && - (error = thread_link_mboxes(kg, ku)) != 0) - goto out; - } - if (!uts_crit) { - nanotime(&ts); - error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts)); - } - -out: - if (error) { - /* - * Things are going to be so screwed we should just kill - * the process. - * how do we do that? - */ - PROC_LOCK(p); - psignal(p, SIGSEGV); - PROC_UNLOCK(p); - } else { - /* - * Optimisation: - * Ensure that we have a spare thread available, - * for when we re-enter the kernel. - */ - if (td->td_standin == NULL) - thread_alloc_spare(td); - } - - ku->ku_mflags = 0; - td->td_mailbox = NULL; - td->td_usticks = 0; - return (error); /* go sync */ -} - -/* - * called after ptrace resumed a process, force all - * virtual CPUs to schedule upcall for SA process, - * because debugger may have changed something in userland, - * we should notice UTS as soon as possible. - */ -void -thread_continued(struct proc *p) -{ - struct ksegrp *kg; - struct kse_upcall *ku; - struct thread *td; - - PROC_LOCK_ASSERT(p, MA_OWNED); - KASSERT(P_SHOULDSTOP(p), ("process not stopped")); - - if (!(p->p_flag & P_SA)) - return; - - if (p->p_flag & P_TRACED) { - FOREACH_KSEGRP_IN_PROC(p, kg) { - td = TAILQ_FIRST(&kg->kg_threads); - if (td == NULL) - continue; - /* not a SA group, nothing to do */ - if (!(td->td_pflags & TDP_SA)) - continue; - FOREACH_UPCALL_IN_GROUP(kg, ku) { - mtx_lock_spin(&sched_lock); - ku->ku_flags |= KUF_DOUPCALL; - mtx_unlock_spin(&sched_lock); - wakeup(&kg->kg_completed); - } - } - } -} Index: kern/kern_poll.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_poll.c,v retrieving revision 1.26 diff -u -r1.26 kern_poll.c --- kern/kern_poll.c 17 Apr 2006 18:20:37 -0000 1.26 +++ kern/kern_poll.c 4 Jun 2006 17:59:34 -0000 @@ -581,7 +581,7 @@ rtp.prio = RTP_PRIO_MAX; /* lowest priority */ rtp.type = RTP_PRIO_IDLE; mtx_lock_spin(&sched_lock); - rtp_to_pri(&rtp, td->td_ksegrp); + rtp_to_pri(&rtp, td); mtx_unlock_spin(&sched_lock); for (;;) { Index: kern/kern_proc.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_proc.c,v retrieving revision 1.240 diff -u -r1.240 kern_proc.c --- kern/kern_proc.c 11 Feb 2006 09:33:06 -0000 1.240 +++ kern/kern_proc.c 4 Jun 2006 21:21:02 -0000 @@ -142,9 +142,6 @@ { struct proc *p; struct thread *td; -#ifdef INVARIANTS - struct ksegrp *kg; -#endif /* INVARIANTS checks go here */ p = (struct proc *)mem; @@ -152,10 +149,7 @@ #ifdef INVARIANTS KASSERT((p->p_numthreads == 1), ("bad number of threads in exiting process")); - KASSERT((p->p_numksegrps == 1), ("free proc with > 1 ksegrp")); KASSERT((td != NULL), ("proc_dtor: bad thread pointer")); - kg = FIRST_KSEGRP_IN_PROC(p); - KASSERT((kg != NULL), ("proc_dtor: bad kg pointer")); KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr")); #endif @@ -178,17 +172,14 @@ { struct proc *p; struct thread *td; - struct ksegrp *kg; p = (struct proc *)mem; p->p_sched = (struct p_sched *)&p[1]; td = thread_alloc(); - kg = ksegrp_alloc(); bzero(&p->p_mtx, sizeof(struct mtx)); mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); p->p_stats = pstats_alloc(); - proc_linkup(p, kg, td); - sched_newproc(p, kg, td); + proc_linkup(p, td); return (0); } @@ -204,7 +195,6 @@ p = (struct proc *)mem; pstats_free(p->p_stats); - ksegrp_free(FIRST_KSEGRP_IN_PROC(p)); thread_free(FIRST_THREAD_IN_PROC(p)); mtx_destroy(&p->p_mtx); if (p->p_ksi != NULL) @@ -760,7 +750,6 @@ static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp) { - struct ksegrp *kg; struct proc *p; p = td->td_proc; @@ -800,14 +789,6 @@ kp->ki_stat = SIDL; } - kg = td->td_ksegrp; - - /* things in the KSE GROUP */ - kp->ki_estcpu = kg->kg_estcpu; - kp->ki_slptime = kg->kg_slptime; - kp->ki_pri.pri_user = kg->kg_user_pri; - kp->ki_pri.pri_class = kg->kg_pri_class; - /* Things in the thread */ kp->ki_wchan = td->td_wchan; kp->ki_pri.pri_level = td->td_priority; @@ -820,6 +801,10 @@ kp->ki_pcb = td->td_pcb; kp->ki_kstack = (void *)td->td_kstack; kp->ki_pctcpu = sched_pctcpu(td); + kp->ki_estcpu = td->td_estcpu; + kp->ki_slptime = td->td_slptime; + kp->ki_pri.pri_class = td->td_pri_class; + kp->ki_pri.pri_user = td->td_user_pri; /* We can't get this anymore but ps etc never used it anyway. */ kp->ki_rqindex = 0; Index: kern/kern_resource.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_resource.c,v retrieving revision 1.158 diff -u -r1.158 kern_resource.c --- kern/kern_resource.c 11 Mar 2006 10:48:19 -0000 1.158 +++ kern/kern_resource.c 4 Jun 2006 21:51:19 -0000 @@ -292,7 +292,7 @@ { struct proc *curp; struct proc *p; - struct ksegrp *kg; + struct thread *tdp; struct rtprio rtp; int cierror, error; @@ -328,14 +328,14 @@ * as leaving it zero. */ if (uap->pid == 0) { - pri_to_rtp(td->td_ksegrp, &rtp); + pri_to_rtp(td, &rtp); } else { struct rtprio rtp2; rtp.type = RTP_PRIO_IDLE; rtp.prio = RTP_PRIO_MAX; - FOREACH_KSEGRP_IN_PROC(p, kg) { - pri_to_rtp(kg, &rtp2); + FOREACH_THREAD_IN_PROC(p, tdp) { + pri_to_rtp(tdp, &rtp2); if (rtp2.type < rtp.type || (rtp2.type == rtp.type && rtp2.prio < rtp.prio)) { @@ -378,18 +378,17 @@ /* * If we are setting our own priority, set just our - * KSEGRP but if we are doing another process, - * do all the groups on that process. If we + * thread but if we are doing another process, + * do all the threads on that process. If we * specify our own pid we do the latter. */ mtx_lock_spin(&sched_lock); if (uap->pid == 0) { - error = rtp_to_pri(&rtp, td->td_ksegrp); + error = rtp_to_pri(&rtp, td); } else { - FOREACH_KSEGRP_IN_PROC(p, kg) { - if ((error = rtp_to_pri(&rtp, kg)) != 0) { + FOREACH_THREAD_IN_PROC(p, td) { + if ((error = rtp_to_pri(&rtp, td)) != 0) break; - } } } mtx_unlock_spin(&sched_lock); @@ -403,7 +402,7 @@ } int -rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg) +rtp_to_pri(struct rtprio *rtp, struct thread *td) { mtx_assert(&sched_lock, MA_OWNED); @@ -411,43 +410,42 @@ return (EINVAL); switch (RTP_PRIO_BASE(rtp->type)) { case RTP_PRIO_REALTIME: - kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio; + td->td_user_pri = PRI_MIN_REALTIME + rtp->prio; break; case RTP_PRIO_NORMAL: - kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio; + td->td_user_pri = PRI_MIN_TIMESHARE + rtp->prio; break; case RTP_PRIO_IDLE: - kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio; + td->td_user_pri = PRI_MIN_IDLE + rtp->prio; break; default: return (EINVAL); } - sched_class(kg, rtp->type); - if (curthread->td_ksegrp == kg) { - sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */ - } + sched_class(td, rtp->type); /* XXX fix */ + if (curthread == td) + sched_prio(curthread, td->td_user_pri); /* XXX dubious */ return (0); } void -pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp) +pri_to_rtp(struct thread *td, struct rtprio *rtp) { mtx_assert(&sched_lock, MA_OWNED); - switch (PRI_BASE(kg->kg_pri_class)) { + switch (PRI_BASE(td->td_pri_class)) { case PRI_REALTIME: - rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME; + rtp->prio = td->td_user_pri - PRI_MIN_REALTIME; break; case PRI_TIMESHARE: - rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE; + rtp->prio = td->td_user_pri - PRI_MIN_TIMESHARE; break; case PRI_IDLE: - rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE; + rtp->prio = td->td_user_pri - PRI_MIN_IDLE; break; default: break; } - rtp->type = kg->kg_pri_class; + rtp->type = td->td_pri_class; } #if defined(COMPAT_43) Index: kern/kern_sig.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_sig.c,v retrieving revision 1.331 diff -u -r1.331 kern_sig.c --- kern/kern_sig.c 12 May 2006 05:04:44 -0000 1.331 +++ kern/kern_sig.c 4 Jun 2006 22:15:55 -0000 @@ -96,7 +96,6 @@ static struct thread *sigtd(struct proc *p, int sig, int prop); static int kern_sigtimedwait(struct thread *, sigset_t, ksiginfo_t *, struct timespec *); -static int do_tdsignal(struct proc *, struct thread *, int, ksiginfo_t *); static void sigqueue_start(void); static uma_zone_t ksiginfo_zone = NULL; @@ -570,7 +569,7 @@ signotify(struct thread *td) { struct proc *p; - sigset_t set, saved; + sigset_t set; p = td->td_proc; @@ -581,8 +580,6 @@ * previously masked by all threads to our sigqueue. */ set = p->p_sigqueue.sq_signals; - if (p->p_flag & P_SA) - saved = p->p_sigqueue.sq_signals; SIGSETNAND(set, td->td_sigmask); if (! SIGISEMPTY(set)) sigqueue_move_set(&p->p_sigqueue, &td->td_sigqueue, &set); @@ -591,13 +588,6 @@ td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING; mtx_unlock_spin(&sched_lock); } - if ((p->p_flag & P_SA) && !(p->p_flag & P_SIGEVENT)) { - if (!SIGSETEQ(saved, p->p_sigqueue.sq_signals)) { - /* pending set changed */ - p->p_flag |= P_SIGEVENT; - wakeup(&p->p_siglist); - } - } } int @@ -749,11 +739,6 @@ if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || (sigprop(sig) & SA_IGNORE && ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)) { - if ((p->p_flag & P_SA) && - SIGISMEMBER(p->p_sigqueue.sq_signals, sig)) { - p->p_flag |= P_SIGEVENT; - wakeup(&p->p_siglist); - } /* never to be seen again */ sigqueue_delete_proc(p, sig); if (sig != SIGCONT) @@ -1211,10 +1196,6 @@ continue; if (!SIGISMEMBER(td->td_sigqueue.sq_signals, i)) { if (SIGISMEMBER(p->p_sigqueue.sq_signals, i)) { - if (p->p_flag & P_SA) { - p->p_flag |= P_SIGEVENT; - wakeup(&p->p_siglist); - } sigqueue_move(&p->p_sigqueue, &td->td_sigqueue, i); } else @@ -1887,7 +1868,6 @@ { struct sigacts *ps; struct proc *p; - int error; int sig; int code; @@ -1896,23 +1876,7 @@ code = ksi->ksi_code; KASSERT(_SIG_VALID(sig), ("invalid signal")); - if (td->td_pflags & TDP_SA) { - if (td->td_mailbox == NULL) - thread_user_enter(td); - PROC_LOCK(p); - SIGDELSET(td->td_sigmask, sig); - mtx_lock_spin(&sched_lock); - /* - * Force scheduling an upcall, so UTS has chance to - * process the signal before thread runs again in - * userland. - */ - if (td->td_upcall) - td->td_upcall->ku_flags |= KUF_DOUPCALL; - mtx_unlock_spin(&sched_lock); - } else { - PROC_LOCK(p); - } + PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) && @@ -1923,27 +1887,8 @@ ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)], &td->td_sigmask, code); #endif - if (!(td->td_pflags & TDP_SA)) - (*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)], + (*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)], ksi, &td->td_sigmask); - else if (td->td_mailbox == NULL) { - mtx_unlock(&ps->ps_mtx); - /* UTS caused a sync signal */ - p->p_code = code; /* XXX for core dump/debugger */ - p->p_sig = sig; /* XXX to verify code */ - sigexit(td, sig); - } else { - mtx_unlock(&ps->ps_mtx); - SIGADDSET(td->td_sigmask, sig); - PROC_UNLOCK(p); - error = copyout(&ksi->ksi_info, &td->td_mailbox->tm_syncsig, - sizeof(siginfo_t)); - PROC_LOCK(p); - /* UTS memory corrupted */ - if (error) - sigexit(td, SIGSEGV); - mtx_lock(&ps->ps_mtx); - } SIGSETOR(td->td_sigmask, ps->ps_catchmask[_SIG_IDX(sig)]); if (!SIGISMEMBER(ps->ps_signodefer, sig)) SIGADDSET(td->td_sigmask, sig); @@ -2057,25 +2002,6 @@ int tdsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi) { - sigset_t saved; - int ret; - - if (p->p_flag & P_SA) - saved = p->p_sigqueue.sq_signals; - ret = do_tdsignal(p, td, sig, ksi); - if ((p->p_flag & P_SA) && !(p->p_flag & P_SIGEVENT)) { - if (!SIGSETEQ(saved, p->p_sigqueue.sq_signals)) { - /* pending set changed */ - p->p_flag |= P_SIGEVENT; - wakeup(&p->p_siglist); - } - } - return (ret); -} - -static int -do_tdsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi) -{ sig_t action; sigqueue_t *sigqueue; int prop; @@ -2086,9 +2012,9 @@ PROC_LOCK_ASSERT(p, MA_OWNED); if (!_SIG_VALID(sig)) - panic("do_tdsignal(): invalid signal"); + panic("tdsignal(): invalid signal"); - KASSERT(ksi == NULL || !KSI_ONQ(ksi), ("do_tdsignal: ksi on queue")); + KASSERT(ksi == NULL || !KSI_ONQ(ksi), ("tdsignal: ksi on queue")); /* * IEEE Std 1003.1-2001: return success when killing a zombie. @@ -2250,11 +2176,6 @@ /* * The process wants to catch it so it needs * to run at least one thread, but which one? - * It would seem that the answer would be to - * run an upcall in the next KSE to run, and - * deliver the signal that way. In a NON KSE - * process, we need to make sure that the - * single thread is runnable asap. * XXXKSE for now however, make them all run. */ goto runfast; @@ -2548,8 +2469,6 @@ */ if (SIGISMEMBER(ps->ps_sigignore, sig) && (traced == 0)) { sigqueue_delete(&td->td_sigqueue, sig); - if (td->td_pflags & TDP_SA) - SIGADDSET(td->td_sigmask, sig); continue; } if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) { @@ -2560,9 +2479,6 @@ newsig = ptracestop(td, sig); mtx_lock(&ps->ps_mtx); - if (td->td_pflags & TDP_SA) - SIGADDSET(td->td_sigmask, sig); - if (sig != newsig) { ksiginfo_t ksi; /* @@ -2586,8 +2502,6 @@ * signal is being masked, look for other signals. */ SIGADDSET(td->td_sigqueue.sq_signals, sig); - if (td->td_pflags & TDP_SA) - SIGDELSET(td->td_sigmask, sig); if (SIGISMEMBER(td->td_sigmask, sig)) continue; signotify(td); @@ -2750,7 +2664,7 @@ mtx_lock(&ps->ps_mtx); } - if (!(td->td_pflags & TDP_SA) && action == SIG_DFL) { + if (action == SIG_DFL) { /* * Default action, where the default is to kill * the process. (Other cases were ignored above.) @@ -2759,13 +2673,6 @@ sigexit(td, sig); /* NOTREACHED */ } else { - if (td->td_pflags & TDP_SA) { - if (sig == SIGKILL) { - mtx_unlock(&ps->ps_mtx); - sigexit(td, sig); - } - } - /* * If we get here, the signal must be caught. */ @@ -2808,10 +2715,7 @@ p->p_code = 0; p->p_sig = 0; } - if (td->td_pflags & TDP_SA) - thread_signal_add(curthread, &ksi); - else - (*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask); + (*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask); } } Index: kern/kern_subr.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_subr.c,v retrieving revision 1.96 diff -u -r1.96 kern_subr.c --- kern/kern_subr.c 6 Jan 2005 23:35:39 -0000 1.96 +++ kern/kern_subr.c 4 Jun 2006 18:07:13 -0000 @@ -430,7 +430,7 @@ td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); - sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ + sched_prio(td, td->td_user_pri); mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); Index: kern/kern_switch.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_switch.c,v retrieving revision 1.121 diff -u -r1.121 kern_switch.c --- kern/kern_switch.c 1 Jun 2006 22:45:56 -0000 1.121 +++ kern/kern_switch.c 4 Jun 2006 22:23:36 -0000 @@ -24,67 +24,6 @@ * SUCH DAMAGE. */ -/*** -Here is the logic.. - -If there are N processors, then there are at most N KSEs (kernel -schedulable entities) working to process threads that belong to a -KSEGROUP (kg). If there are X of these KSEs actually running at the -moment in question, then there are at most M (N-X) of these KSEs on -the run queue, as running KSEs are not on the queue. - -Runnable threads are queued off the KSEGROUP in priority order. -If there are M or more threads runnable, the top M threads -(by priority) are 'preassigned' to the M KSEs not running. The KSEs take -their priority from those threads and are put on the run queue. - -The last thread that had a priority high enough to have a KSE associated -with it, AND IS ON THE RUN QUEUE is pointed to by -kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs -assigned as all the available KSEs are activly running, or because there -are no threads queued, that pointer is NULL. - -When a KSE is removed from the run queue to become runnable, we know -it was associated with the highest priority thread in the queue (at the head -of the queue). If it is also the last assigned we know M was 1 and must -now be 0. Since the thread is no longer queued that pointer must be -removed from it. Since we know there were no more KSEs available, -(M was 1 and is now 0) and since we are not FREEING our KSE -but using it, we know there are STILL no more KSEs available, we can prove -that the next thread in the ksegrp list will not have a KSE to assign to -it, so we can show that the pointer must be made 'invalid' (NULL). - -The pointer exists so that when a new thread is made runnable, it can -have its priority compared with the last assigned thread to see if -it should 'steal' its KSE or not.. i.e. is it 'earlier' -on the list than that thread or later.. If it's earlier, then the KSE is -removed from the last assigned (which is now not assigned a KSE) -and reassigned to the new thread, which is placed earlier in the list. -The pointer is then backed up to the previous thread (which may or may not -be the new thread). - -When a thread sleeps or is removed, the KSE becomes available and if there -are queued threads that are not assigned KSEs, the highest priority one of -them is assigned the KSE, which is then placed back on the run queue at -the approipriate place, and the kg->kg_last_assigned pointer is adjusted down -to point to it. - -The following diagram shows 2 KSEs and 3 threads from a single process. - - RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) - \ \____ - \ \ - KSEGROUP---thread--thread--thread (queued in priority order) - \ / - \_______________/ - (last_assigned) - -The result of this scheme is that the M available KSEs are always -queued at the priorities they have inherrited from the M highest priority -threads for that KSEGROUP. If this situation changes, the KSEs are -reassigned to keep this true. -***/ - #include __FBSDID("$FreeBSD: src/sys/kern/kern_switch.c,v 1.121 2006/06/01 22:45:56 cognet Exp $"); @@ -143,51 +82,35 @@ * Functions that manipulate runnability from a thread perspective. * ************************************************************************/ /* - * Select the KSE that will be run next. From that find the thread, and - * remove it from the KSEGRP's run queue. If there is thread clustering, - * this will be what does it. + * Select the thread that will be run next. */ struct thread * choosethread(void) { - struct kse *ke; struct thread *td; - struct ksegrp *kg; #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) if (smp_active == 0 && PCPU_GET(cpuid) != 0) { /* Shutting down, run idlethread on AP's */ td = PCPU_GET(idlethread); - ke = td->td_kse; CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); - ke->ke_flags |= KEF_DIDRUN; + td->td_kse->ke_flags |= KEF_DIDRUN; TD_SET_RUNNING(td); return (td); } #endif retry: - ke = sched_choose(); - if (ke) { - td = ke->ke_thread; - KASSERT((td->td_kse == ke), ("kse/thread mismatch")); - kg = ke->ke_ksegrp; - if (td->td_proc->p_flag & P_HADTHREADS) { - if (kg->kg_last_assigned == td) { - kg->kg_last_assigned = TAILQ_PREV(td, - threadqueue, td_runq); - } - TAILQ_REMOVE(&kg->kg_runq, td, td_runq); - } + td = sched_choose(); + if (td) { CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", td, td->td_priority); } else { /* Simulate runq_choose() having returned the idle thread */ td = PCPU_GET(idlethread); - ke = td->td_kse; CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); } - ke->ke_flags |= KEF_DIDRUN; + td->td_kse->ke_flags |= KEF_DIDRUN; /* * If we are in panic, only allow system threads, @@ -205,96 +128,11 @@ } /* - * Given a surplus system slot, try assign a new runnable thread to it. - * Called from: - * sched_thread_exit() (local) - * sched_switch() (local) - * sched_thread_exit() (local) - * remrunqueue() (local) (not at the moment) - */ -static void -slot_fill(struct ksegrp *kg) -{ - struct thread *td; - - mtx_assert(&sched_lock, MA_OWNED); - while (kg->kg_avail_opennings > 0) { - /* - * Find the first unassigned thread - */ - if ((td = kg->kg_last_assigned) != NULL) - td = TAILQ_NEXT(td, td_runq); - else - td = TAILQ_FIRST(&kg->kg_runq); - - /* - * If we found one, send it to the system scheduler. - */ - if (td) { - kg->kg_last_assigned = td; - sched_add(td, SRQ_YIELDING); - CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg); - } else { - /* no threads to use up the slots. quit now */ - break; - } - } -} - -#ifdef SCHED_4BSD -/* - * Remove a thread from its KSEGRP's run queue. - * This in turn may remove it from a KSE if it was already assigned - * to one, possibly causing a new thread to be assigned to the KSE - * and the KSE getting a new priority. - */ -static void -remrunqueue(struct thread *td) -{ - struct thread *td2, *td3; - struct ksegrp *kg; - struct kse *ke; - - mtx_assert(&sched_lock, MA_OWNED); - KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue")); - kg = td->td_ksegrp; - ke = td->td_kse; - CTR1(KTR_RUNQ, "remrunqueue: td%p", td); - TD_SET_CAN_RUN(td); - /* - * If it is not a threaded process, take the shortcut. - */ - if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { - /* remve from sys run queue and free up a slot */ - sched_rem(td); - return; - } - td3 = TAILQ_PREV(td, threadqueue, td_runq); - TAILQ_REMOVE(&kg->kg_runq, td, td_runq); - if (ke->ke_state == KES_ONRUNQ) { - /* - * This thread has been assigned to the system run queue. - * We need to dissociate it and try assign the - * KSE to the next available thread. Then, we should - * see if we need to move the KSE in the run queues. - */ - sched_rem(td); - td2 = kg->kg_last_assigned; - KASSERT((td2 != NULL), ("last assigned has wrong value")); - if (td2 == td) - kg->kg_last_assigned = td3; - /* slot_fill(kg); */ /* will replace it with another */ - } -} -#endif - -/* * Change the priority of a thread that is on the run queue. */ void -adjustrunqueue( struct thread *td, int newpri) +adjustrunqueue(struct thread *td, int newpri) { - struct ksegrp *kg; struct kse *ke; mtx_assert(&sched_lock, MA_OWNED); @@ -302,193 +140,20 @@ ke = td->td_kse; CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td); - /* - * If it is not a threaded process, take the shortcut. - */ - if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { - /* We only care about the kse in the run queue. */ - td->td_priority = newpri; - if (ke->ke_rqindex != (newpri / RQ_PPQ)) { - sched_rem(td); - sched_add(td, SRQ_BORING); - } - return; - } - - /* It is a threaded process */ - kg = td->td_ksegrp; - if (ke->ke_state == KES_ONRUNQ -#ifdef SCHED_ULE - || ((ke->ke_flags & KEF_ASSIGNED) != 0 && - (ke->ke_flags & KEF_REMOVED) == 0) -#endif - ) { - if (kg->kg_last_assigned == td) { - kg->kg_last_assigned = - TAILQ_PREV(td, threadqueue, td_runq); - } - sched_rem(td); - } - TAILQ_REMOVE(&kg->kg_runq, td, td_runq); - TD_SET_CAN_RUN(td); + /* We only care about the kse in the run queue. */ td->td_priority = newpri; - setrunqueue(td, SRQ_BORING); -} - -/* - * This function is called when a thread is about to be put on a - * ksegrp run queue because it has been made runnable or its - * priority has been adjusted and the ksegrp does not have a - * free kse slot. It determines if a thread from the same ksegrp - * should be preempted. If so, it tries to switch threads - * if the thread is on the same cpu or notifies another cpu that - * it should switch threads. - */ - -static void -maybe_preempt_in_ksegrp(struct thread *td) -#if !defined(SMP) -{ - struct thread *running_thread; - - mtx_assert(&sched_lock, MA_OWNED); - running_thread = curthread; - - if (running_thread->td_ksegrp != td->td_ksegrp) - return; - - if (td->td_priority >= running_thread->td_priority) - return; -#ifdef PREEMPTION -#ifndef FULL_PREEMPTION - if (td->td_priority > PRI_MAX_ITHD) { - running_thread->td_flags |= TDF_NEEDRESCHED; - return; - } -#endif /* FULL_PREEMPTION */ - - if (running_thread->td_critnest > 1) - running_thread->td_owepreempt = 1; - else - mi_switch(SW_INVOL, NULL); - -#else /* PREEMPTION */ - running_thread->td_flags |= TDF_NEEDRESCHED; -#endif /* PREEMPTION */ - return; -} - -#else /* SMP */ -{ - struct thread *running_thread; - int worst_pri; - struct ksegrp *kg; - cpumask_t cpumask,dontuse; - struct pcpu *pc; - struct pcpu *best_pcpu; - struct thread *cputhread; - - mtx_assert(&sched_lock, MA_OWNED); - - running_thread = curthread; - -#if !defined(KSEG_PEEMPT_BEST_CPU) - if (running_thread->td_ksegrp != td->td_ksegrp) { -#endif - kg = td->td_ksegrp; - - /* if someone is ahead of this thread, wait our turn */ - if (td != TAILQ_FIRST(&kg->kg_runq)) - return; - - worst_pri = td->td_priority; - best_pcpu = NULL; - dontuse = stopped_cpus | idle_cpus_mask; - - /* - * Find a cpu with the worst priority that runs at thread from - * the same ksegrp - if multiple exist give first the last run - * cpu and then the current cpu priority - */ - - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { - cpumask = pc->pc_cpumask; - cputhread = pc->pc_curthread; - - if ((cpumask & dontuse) || - cputhread->td_ksegrp != kg) - continue; - - if (cputhread->td_priority > worst_pri) { - worst_pri = cputhread->td_priority; - best_pcpu = pc; - continue; - } - - if (cputhread->td_priority == worst_pri && - best_pcpu != NULL && - (td->td_lastcpu == pc->pc_cpuid || - (PCPU_GET(cpumask) == cpumask && - td->td_lastcpu != best_pcpu->pc_cpuid))) - best_pcpu = pc; - } - - /* Check if we need to preempt someone */ - if (best_pcpu == NULL) - return; - -#if defined(IPI_PREEMPTION) && defined(PREEMPTION) -#if !defined(FULL_PREEMPTION) - if (td->td_priority <= PRI_MAX_ITHD) -#endif /* ! FULL_PREEMPTION */ - { - ipi_selected(best_pcpu->pc_cpumask, IPI_PREEMPT); - return; - } -#endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */ - - if (PCPU_GET(cpuid) != best_pcpu->pc_cpuid) { - best_pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED; - ipi_selected(best_pcpu->pc_cpumask, IPI_AST); - return; - } -#if !defined(KSEG_PEEMPT_BEST_CPU) - } -#endif - - if (td->td_priority >= running_thread->td_priority) - return; -#ifdef PREEMPTION - -#if !defined(FULL_PREEMPTION) - if (td->td_priority > PRI_MAX_ITHD) { - running_thread->td_flags |= TDF_NEEDRESCHED; + if (ke->ke_rqindex != (newpri / RQ_PPQ)) { + sched_rem(td); + sched_add(td, SRQ_BORING); } -#endif /* ! FULL_PREEMPTION */ - - if (running_thread->td_critnest > 1) - running_thread->td_owepreempt = 1; - else - mi_switch(SW_INVOL, NULL); - -#else /* PREEMPTION */ - running_thread->td_flags |= TDF_NEEDRESCHED; -#endif /* PREEMPTION */ - return; } -#endif /* !SMP */ - -int limitcount; void setrunqueue(struct thread *td, int flags) { - struct ksegrp *kg; - struct thread *td2; - struct thread *tda; - CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d", - td, td->td_ksegrp, td->td_proc->p_pid); + CTR2(KTR_RUNQ, "setrunqueue: td:%p pid:%d", + td, td->td_proc->p_pid); CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, curthread, curthread->td_proc->p_comm); @@ -498,97 +163,7 @@ KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), ("setrunqueue: bad thread state")); TD_SET_RUNQ(td); - kg = td->td_ksegrp; - if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { - /* - * Common path optimisation: Only one of everything - * and the KSE is always already attached. - * Totally ignore the ksegrp run queue. - */ - if (kg->kg_avail_opennings != 1) { - if (limitcount < 1) { - limitcount++; - printf("pid %d: corrected slot count (%d->1)\n", - td->td_proc->p_pid, kg->kg_avail_opennings); - - } - kg->kg_avail_opennings = 1; - } - sched_add(td, flags); - return; - } - - /* - * If the concurrency has reduced, and we would go in the - * assigned section, then keep removing entries from the - * system run queue, until we are not in that section - * or there is room for us to be put in that section. - * What we MUST avoid is the case where there are threads of less - * priority than the new one scheduled, but it can not - * be scheduled itself. That would lead to a non contiguous set - * of scheduled threads, and everything would break. - */ - tda = kg->kg_last_assigned; - while ((kg->kg_avail_opennings <= 0) && - (tda && (tda->td_priority > td->td_priority))) { - /* - * None free, but there is one we can commandeer. - */ - CTR2(KTR_RUNQ, - "setrunqueue: kg:%p: take slot from td: %p", kg, tda); - sched_rem(tda); - tda = kg->kg_last_assigned = - TAILQ_PREV(tda, threadqueue, td_runq); - } - - /* - * Add the thread to the ksegrp's run queue at - * the appropriate place. - */ - TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { - if (td2->td_priority > td->td_priority) { - TAILQ_INSERT_BEFORE(td2, td, td_runq); - break; - } - } - if (td2 == NULL) { - /* We ran off the end of the TAILQ or it was empty. */ - TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); - } - - /* - * If we have a slot to use, then put the thread on the system - * run queue and if needed, readjust the last_assigned pointer. - * it may be that we need to schedule something anyhow - * even if the availabel slots are -ve so that - * all the items < last_assigned are scheduled. - */ - if (kg->kg_avail_opennings > 0) { - if (tda == NULL) { - /* - * No pre-existing last assigned so whoever is first - * gets the slot.. (maybe us) - */ - td2 = TAILQ_FIRST(&kg->kg_runq); - kg->kg_last_assigned = td2; - } else if (tda->td_priority > td->td_priority) { - td2 = td; - } else { - /* - * We are past last_assigned, so - * give the next slot to whatever is next, - * which may or may not be us. - */ - td2 = TAILQ_NEXT(tda, td_runq); - kg->kg_last_assigned = td2; - } - sched_add(td2, flags); - } else { - CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d", - td, td->td_ksegrp, td->td_proc->p_pid); - if ((flags & SRQ_YIELDING) == 0) - maybe_preempt_in_ksegrp(td); - } + sched_add(td, flags); } /* @@ -700,22 +275,6 @@ */ MPASS(TD_ON_RUNQ(td)); MPASS(td->td_sched->ke_state != KES_ONRUNQ); - if (td->td_proc->p_flag & P_HADTHREADS) { - /* - * If this is a threaded process we actually ARE on the - * ksegrp run queue so take it off that first. - * Also undo any damage done to the last_assigned pointer. - * XXX Fix setrunqueue so this isn't needed - */ - struct ksegrp *kg; - - kg = td->td_ksegrp; - if (kg->kg_last_assigned == td) - kg->kg_last_assigned = - TAILQ_PREV(td, threadqueue, td_runq); - TAILQ_REMOVE(&kg->kg_runq, td, td_runq); - } - TD_SET_RUNNING(td); CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, td->td_proc->p_pid, td->td_proc->p_comm); @@ -830,11 +389,10 @@ rqh = &rq->rq_queues[pri]; CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p", ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); - if (flags & SRQ_PREEMPTED) { + if (flags & SRQ_PREEMPTED) TAILQ_INSERT_HEAD(rqh, ke, ke_procq); - } else { + else TAILQ_INSERT_TAIL(rqh, ke, ke_procq); - } } /* @@ -921,7 +479,7 @@ struct rqhead *rqh; int pri; - KASSERT(ke->ke_proc->p_sflag & PS_INMEM, + KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM, ("runq_remove: process swapped out")); pri = ke->ke_rqindex; rqh = &rq->rq_queues[pri]; @@ -940,22 +498,6 @@ extern struct mtx kse_zombie_lock; /* - * Allocate scheduler specific per-process resources. - * The thread and ksegrp have already been linked in. - * In this case just set the default concurrency value. - * - * Called from: - * proc_init() (UMA init method) - */ -void -sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td) -{ - - /* This can go in sched_fork */ - sched_init_concurrency(kg); -} - -/* * thread is being either created or recycled. * Fix up the per-scheduler resources associated with it. * Called from: @@ -975,61 +517,4 @@ ke->ke_state = KES_THREAD; } -/* - * Set up an initial concurrency of 1 - * and set the given thread (if given) to be using that - * concurrency slot. - * May be used "offline"..before the ksegrp is attached to the world - * and thus wouldn't need schedlock in that case. - * Called from: - * thr_create() - * proc_init() (UMA) via sched_newproc() - */ -void -sched_init_concurrency(struct ksegrp *kg) -{ - - CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg); - kg->kg_concurrency = 1; - kg->kg_avail_opennings = 1; -} - -/* - * Change the concurrency of an existing ksegrp to N - * Called from: - * kse_create() - * kse_exit() - * thread_exit() - * thread_single() - */ -void -sched_set_concurrency(struct ksegrp *kg, int concurrency) -{ - - CTR4(KTR_RUNQ,"kg %p set concurrency to %d, slots %d -> %d", - kg, - concurrency, - kg->kg_avail_opennings, - kg->kg_avail_opennings + (concurrency - kg->kg_concurrency)); - kg->kg_avail_opennings += (concurrency - kg->kg_concurrency); - kg->kg_concurrency = concurrency; -} - -/* - * Called from thread_exit() for all exiting thread - * - * Not to be confused with sched_exit_thread() - * that is only called from thread_exit() for threads exiting - * without the rest of the process exiting because it is also called from - * sched_exit() and we wouldn't want to call it twice. - * XXX This can probably be fixed. - */ -void -sched_thread_exit(struct thread *td) -{ - - SLOT_RELEASE(td->td_ksegrp); - slot_fill(td->td_ksegrp); -} - #endif /* KERN_SWITCH_INCLUDE */ Index: kern/kern_synch.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_synch.c,v retrieving revision 1.280 diff -u -r1.280 kern_synch.c --- kern/kern_synch.c 3 Jun 2006 20:49:44 -0000 1.280 +++ kern/kern_synch.c 4 Jun 2006 22:06:16 -0000 @@ -405,8 +405,6 @@ PCPU_SET(switchticks, ticks); CTR4(KTR_PROC, "mi_switch: old thread %p (kse %p, pid %ld, %s)", (void *)td, td->td_sched, (long)p->p_pid, p->p_comm); - if ((flags & SW_VOL) && (td->td_proc->p_flag & P_SA)) - newtd = thread_switchout(td, flags, newtd); #if (KTR_COMPILE & KTR_SCHED) != 0 if (td == PCPU_GET(idlethread)) CTR3(KTR_SCHED, "mi_switch: %p(%s) prio %d idle", @@ -542,9 +540,7 @@ int yield(struct thread *td, struct yield_args *uap) { - struct ksegrp *kg; - kg = td->td_ksegrp; mtx_assert(&Giant, MA_NOTOWNED); mtx_lock_spin(&sched_lock); sched_prio(td, PRI_MAX_TIMESHARE); Index: kern/kern_thr.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_thr.c,v retrieving revision 1.43 diff -u -r1.43 kern_thr.c --- kern/kern_thr.c 17 Apr 2006 18:20:37 -0000 1.43 +++ kern/kern_thr.c 4 Jun 2006 20:32:24 -0000 @@ -51,13 +51,6 @@ extern int max_groups_per_proc; SYSCTL_DECL(_kern_threads); -static int thr_scope = 0; -SYSCTL_INT(_kern_threads, OID_AUTO, thr_scope, CTLFLAG_RW, - &thr_scope, 0, "sys or proc scope scheduling"); - -static int thr_concurrency = 0; -SYSCTL_INT(_kern_threads, OID_AUTO, thr_concurrency, CTLFLAG_RW, - &thr_concurrency, 0, "a concurrency value if not default"); static int create_thread(struct thread *td, mcontext_t *ctx, void (*start_func)(void *), void *arg, @@ -111,31 +104,18 @@ { stack_t stack; struct thread *newtd; - struct ksegrp *kg, *newkg; struct proc *p; long id; - int error, scope_sys, linkkg; + int error; error = 0; p = td->td_proc; - kg = td->td_ksegrp; /* Have race condition but it is cheap. */ - if ((p->p_numksegrps >= max_groups_per_proc) || - (p->p_numthreads >= max_threads_per_proc)) { + if ((p->p_numthreads >= max_threads_per_proc)) return (EPROCLIM); - } - - /* Check PTHREAD_SCOPE_SYSTEM */ - scope_sys = (flags & THR_SYSTEM_SCOPE) != 0; - - /* sysctl overrides user's flag */ - if (thr_scope == 1) - scope_sys = 0; - else if (thr_scope == 2) - scope_sys = 1; - /* Initialize our td and new ksegrp.. */ + /* Initialize our td.. */ newtd = thread_alloc(); /* @@ -186,66 +166,16 @@ } } - if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { - /* Treat initial thread as it has PTHREAD_SCOPE_PROCESS. */ - p->p_procscopegrp = kg; - mtx_lock_spin(&sched_lock); - sched_set_concurrency(kg, - thr_concurrency ? thr_concurrency : (2*mp_ncpus)); - mtx_unlock_spin(&sched_lock); - } - - linkkg = 0; - if (scope_sys) { - linkkg = 1; - newkg = ksegrp_alloc(); - bzero(&newkg->kg_startzero, - __rangeof(struct ksegrp, kg_startzero, kg_endzero)); - bcopy(&kg->kg_startcopy, &newkg->kg_startcopy, - __rangeof(struct ksegrp, kg_startcopy, kg_endcopy)); - sched_init_concurrency(newkg); - PROC_LOCK(td->td_proc); - } else { - /* - * Try to create a KSE group which will be shared - * by all PTHREAD_SCOPE_PROCESS threads. - */ -retry: - PROC_LOCK(td->td_proc); - if ((newkg = p->p_procscopegrp) == NULL) { - PROC_UNLOCK(p); - newkg = ksegrp_alloc(); - bzero(&newkg->kg_startzero, - __rangeof(struct ksegrp, kg_startzero, kg_endzero)); - bcopy(&kg->kg_startcopy, &newkg->kg_startcopy, - __rangeof(struct ksegrp, kg_startcopy, kg_endcopy)); - PROC_LOCK(p); - if (p->p_procscopegrp == NULL) { - p->p_procscopegrp = newkg; - sched_init_concurrency(newkg); - sched_set_concurrency(newkg, - thr_concurrency ? thr_concurrency : (2*mp_ncpus)); - linkkg = 1; - } else { - PROC_UNLOCK(p); - ksegrp_free(newkg); - goto retry; - } - } - } + PROC_LOCK(td->td_proc); td->td_proc->p_flag |= P_HADTHREADS; newtd->td_sigmask = td->td_sigmask; mtx_lock_spin(&sched_lock); - if (linkkg) - ksegrp_link(newkg, p); - thread_link(newtd, newkg); + thread_link(newtd, p); PROC_UNLOCK(p); /* let the scheduler know about these things. */ - if (linkkg) - sched_fork_ksegrp(td, newkg); - sched_fork_thread(td, newtd); + sched_fork(td, newtd); TD_SET_CAN_RUN(newtd); /* if ((flags & THR_SUSPENDED) == 0) */ setrunqueue(newtd, SRQ_BORING); Index: kern/kern_thread.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_thread.c,v retrieving revision 1.233 diff -u -r1.233 kern_thread.c --- kern/kern_thread.c 21 Mar 2006 10:05:15 -0000 1.233 +++ kern/kern_thread.c 4 Jun 2006 22:28:55 -0000 @@ -51,9 +51,8 @@ #include /* - * KSEGRP related storage. + * thread related storage. */ -static uma_zone_t ksegrp_zone; static uma_zone_t thread_zone; /* DEBUG ONLY */ @@ -74,38 +73,10 @@ SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD, &max_threads_hits, 0, ""); -int virtual_cpu; - TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); -TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); struct mtx kse_zombie_lock; MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN); -static int -sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS) -{ - int error, new_val; - int def_val; - - def_val = mp_ncpus; - if (virtual_cpu == 0) - new_val = def_val; - else - new_val = virtual_cpu; - error = sysctl_handle_int(oidp, &new_val, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (new_val < 0) - return (EINVAL); - virtual_cpu = new_val; - return (0); -} - -/* DEBUG ONLY */ -SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW, - 0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I", - "debug virtual cpus"); - struct mtx tid_lock; static struct unrhdr *tid_unrhdr; @@ -216,62 +187,6 @@ } /* - * Initialize type-stable parts of a ksegrp (when newly created). - */ -static int -ksegrp_ctor(void *mem, int size, void *arg, int flags) -{ - struct ksegrp *kg; - - kg = (struct ksegrp *)mem; - bzero(mem, size); - kg->kg_sched = (struct kg_sched *)&kg[1]; - return (0); -} - -void -ksegrp_link(struct ksegrp *kg, struct proc *p) -{ - - TAILQ_INIT(&kg->kg_threads); - TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ - TAILQ_INIT(&kg->kg_upcalls); /* all upcall structure in ksegrp */ - kg->kg_proc = p; - /* - * the following counters are in the -zero- section - * and may not need clearing - */ - kg->kg_numthreads = 0; - kg->kg_numupcalls = 0; - /* link it in now that it's consistent */ - p->p_numksegrps++; - TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); -} - -/* - * Called from: - * thread-exit() - */ -void -ksegrp_unlink(struct ksegrp *kg) -{ - struct proc *p; - - mtx_assert(&sched_lock, MA_OWNED); - KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads")); - KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls")); - - p = kg->kg_proc; - TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); - p->p_numksegrps--; - /* - * Aggregate stats from the KSE - */ - if (p->p_procscopegrp == kg) - p->p_procscopegrp = NULL; -} - -/* * For a newly created process, * link up all the structures and its initial threads etc. * called from: @@ -280,10 +195,9 @@ * proc_init() */ void -proc_linkup(struct proc *p, struct ksegrp *kg, struct thread *td) +proc_linkup(struct proc *p, struct thread *td) { - TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ TAILQ_INIT(&p->p_threads); /* all threads in proc */ TAILQ_INIT(&p->p_suspended); /* Threads suspended */ sigqueue_init(&p->p_sigqueue, p); @@ -293,11 +207,9 @@ p->p_ksi->ksi_flags = KSI_EXT | KSI_INS; } LIST_INIT(&p->p_mqnotifier); - p->p_numksegrps = 0; p->p_numthreads = 0; - ksegrp_link(kg, p); - thread_link(td, kg); + thread_link(td, p); } /* @@ -313,10 +225,6 @@ thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), thread_ctor, thread_dtor, thread_init, thread_fini, UMA_ALIGN_CACHE, 0); - ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), - ksegrp_ctor, NULL, NULL, NULL, - UMA_ALIGN_CACHE, 0); - kseinit(); /* set up kse specific stuff e.g. upcall zone*/ } /* @@ -325,19 +233,9 @@ void thread_stash(struct thread *td) { - mtx_lock_spin(&kse_zombie_lock); - TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); - mtx_unlock_spin(&kse_zombie_lock); -} -/* - * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. - */ -void -ksegrp_stash(struct ksegrp *kg) -{ mtx_lock_spin(&kse_zombie_lock); - TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); + TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); mtx_unlock_spin(&kse_zombie_lock); } @@ -348,21 +246,16 @@ thread_reap(void) { struct thread *td_first, *td_next; - struct ksegrp *kg_first, * kg_next; /* * Don't even bother to lock if none at this instant, * we really don't care about the next instant.. */ - if ((!TAILQ_EMPTY(&zombie_threads)) - || (!TAILQ_EMPTY(&zombie_ksegrps))) { + if (!TAILQ_EMPTY(&zombie_threads)) { mtx_lock_spin(&kse_zombie_lock); td_first = TAILQ_FIRST(&zombie_threads); - kg_first = TAILQ_FIRST(&zombie_ksegrps); if (td_first) TAILQ_INIT(&zombie_threads); - if (kg_first) - TAILQ_INIT(&zombie_ksegrps); mtx_unlock_spin(&kse_zombie_lock); while (td_first) { td_next = TAILQ_NEXT(td_first, td_runq); @@ -371,48 +264,21 @@ thread_free(td_first); td_first = td_next; } - while (kg_first) { - kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); - ksegrp_free(kg_first); - kg_first = kg_next; - } - /* - * there will always be a thread on the list if one of these - * is there. - */ - kse_GC(); } } /* - * Allocate a ksegrp. - */ -struct ksegrp * -ksegrp_alloc(void) -{ - return (uma_zalloc(ksegrp_zone, M_WAITOK)); -} - -/* * Allocate a thread. */ struct thread * thread_alloc(void) { + thread_reap(); /* check if any zombies to get */ return (uma_zalloc(thread_zone, M_WAITOK)); } /* - * Deallocate a ksegrp. - */ -void -ksegrp_free(struct ksegrp *td) -{ - uma_zfree(ksegrp_zone, td); -} - -/* * Deallocate a thread. */ void @@ -449,7 +315,6 @@ * kse_exit() * thr_exit() * thread_user_enter() - * thread_userret() * thread_suspend_check() */ void @@ -458,17 +323,14 @@ uint64_t new_switchtime; struct thread *td; struct proc *p; - struct ksegrp *kg; td = curthread; - kg = td->td_ksegrp; p = td->td_proc; mtx_assert(&sched_lock, MA_OWNED); mtx_assert(&Giant, MA_NOTOWNED); PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(p != NULL, ("thread exiting without a process")); - KASSERT(kg != NULL, ("thread exiting without a kse group")); CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td, (long)p->p_pid, p->p_comm); KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending")); @@ -477,15 +339,6 @@ AUDIT_SYSCALL_EXIT(0, td); #endif - if (td->td_standin != NULL) { - /* - * Note that we don't need to free the cred here as it - * is done in thread_reap(). - */ - thread_stash(td->td_standin); - td->td_standin = NULL; - } - /* * drop FPU & debug register state storage, or any other * architecture specific resources that @@ -493,14 +346,6 @@ */ cpu_thread_exit(td); /* XXXSMP */ - /* - * The thread is exiting. scheduler can release its stuff - * and collect stats etc. - * XXX this is not very right, since PROC_UNLOCK may still - * need scheduler stuff. - */ - sched_thread_exit(td); - /* Do the same timestamp bookkeeping that mi_switch() would do. */ new_switchtime = cpu_ticks(); p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime)); @@ -526,9 +371,7 @@ if (p->p_flag & P_HADTHREADS) { if (p->p_numthreads > 1) { thread_unlink(td); - - /* XXX first arg not used in 4BSD or ULE */ - sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); + sched_exit(p, td); /* * The test below is NOT true if we are the @@ -536,51 +379,10 @@ * in exit1() after it is the only survivor. */ if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { - if (p->p_numthreads == p->p_suspcount) { + if (p->p_numthreads == p->p_suspcount) thread_unsuspend_one(p->p_singlethread); - } - } - - /* - * Because each upcall structure has an owner thread, - * owner thread exits only when process is in exiting - * state, so upcall to userland is no longer needed, - * deleting upcall structure is safe here. - * So when all threads in a group is exited, all upcalls - * in the group should be automatically freed. - * XXXKSE This is a KSE thing and should be exported - * there somehow. - */ - upcall_remove(td); - - /* - * If the thread we unlinked above was the last one, - * then this ksegrp should go away too. - */ - if (kg->kg_numthreads == 0) { - /* - * let the scheduler know about this in case - * it needs to recover stats or resources. - * Theoretically we could let - * sched_exit_ksegrp() do the equivalent of - * setting the concurrency to 0 - * but don't do it yet to avoid changing - * the existing scheduler code until we - * are ready. - * We supply a random other ksegrp - * as the recipient of any built up - * cpu usage etc. (If the scheduler wants it). - * XXXKSE - * This is probably not fair so think of - * a better answer. - */ - sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); - sched_set_concurrency(kg, 0); /* XXX TEMP */ - ksegrp_unlink(kg); - ksegrp_stash(kg); } PROC_UNLOCK(p); - td->td_ksegrp = NULL; PCPU_SET(deadthread, td); } else { /* @@ -591,7 +393,6 @@ * kse_exit() - treats last thread specially * thr_exit() - treats last thread specially * thread_user_enter() - only if more exist - * thread_userret() - only if more exist * thread_suspend_check() - only if more exist */ panic ("thread_exit: Last thread exiting on its own"); @@ -622,16 +423,7 @@ mtx_assert(&Giant, MA_NOTOWNED); KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()")); - KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()")); FOREACH_THREAD_IN_PROC(p, td) { - if (td->td_standin != NULL) { - if (td->td_standin->td_ucred != NULL) { - crfree(td->td_standin->td_ucred); - td->td_standin->td_ucred = NULL; - } - thread_free(td->td_standin); - td->td_standin = NULL; - } cpu_thread_clean(td); crfree(td->td_ucred); } @@ -647,28 +439,21 @@ * The thread is linked as if running but no KSE assigned. * Called from: * proc_linkup() - * thread_schedule_upcall() * thr_create() */ void -thread_link(struct thread *td, struct ksegrp *kg) +thread_link(struct thread *td, struct proc *p) { - struct proc *p; - p = kg->kg_proc; td->td_state = TDS_INACTIVE; td->td_proc = p; - td->td_ksegrp = kg; td->td_flags = 0; - td->td_kflags = 0; LIST_INIT(&td->td_contested); sigqueue_init(&td->td_sigqueue, p); callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); - TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); p->p_numthreads++; - kg->kg_numthreads++; } /* @@ -683,15 +468,7 @@ struct proc *p = td->td_proc; KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads")); - upcall_remove(td); - p->p_flag &= ~(P_SA|P_HADTHREADS); - td->td_mailbox = NULL; - td->td_pflags &= ~(TDP_SA | TDP_CAN_UNBIND); - if (td->td_standin != NULL) { - thread_stash(td->td_standin); - td->td_standin = NULL; - } - sched_set_concurrency(td->td_ksegrp, 1); + p->p_flag &= ~P_HADTHREADS; } /* @@ -702,15 +479,12 @@ thread_unlink(struct thread *td) { struct proc *p = td->td_proc; - struct ksegrp *kg = td->td_ksegrp; mtx_assert(&sched_lock, MA_OWNED); TAILQ_REMOVE(&p->p_threads, td, td_plist); p->p_numthreads--; - TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); - kg->kg_numthreads--; /* could clear a few other things here */ - /* Must NOT clear links to proc and ksegrp! */ + /* Must NOT clear links to proc! */ } /* @@ -1029,7 +803,6 @@ p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY); mtx_lock_spin(&sched_lock); p->p_singlethread = NULL; - p->p_procscopegrp = NULL; /* * If there are other threads they mey now run, * unless of course there is a blanket 'stop order' Index: kern/sched_4bsd.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sched_4bsd.c,v retrieving revision 1.80 diff -u -r1.80 sched_4bsd.c --- kern/sched_4bsd.c 27 Apr 2006 17:57:59 -0000 1.80 +++ kern/sched_4bsd.c 4 Jun 2006 23:03:39 -0000 @@ -74,11 +74,9 @@ #define NICE_WEIGHT 1 /* Priorities per nice level. */ /* - * The schedulable entity that can be given a context to run. + * The schedulable entity that runs a context. * A process may have several of these. Probably one per processor - * but posibly a few more. In this universe they are grouped - * with a KSEG that contains the priority and niceness - * for the group. + * but posibly a few more. */ struct kse { TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ @@ -93,9 +91,6 @@ struct runq *ke_runq; /* runq the kse is currently on */ }; -#define ke_proc ke_thread->td_proc -#define ke_ksegrp ke_thread->td_ksegrp - #define td_kse td_sched /* flags kept in td_flags */ @@ -111,38 +106,6 @@ #define SKE_RUNQ_PCPU(ke) \ ((ke)->ke_runq != 0 && (ke)->ke_runq != &runq) -struct kg_sched { - struct thread *skg_last_assigned; /* (j) Last thread assigned to */ - /* the system scheduler. */ - int skg_avail_opennings; /* (j) Num KSEs requested in group. */ - int skg_concurrency; /* (j) Num KSEs requested in group. */ -}; -#define kg_last_assigned kg_sched->skg_last_assigned -#define kg_avail_opennings kg_sched->skg_avail_opennings -#define kg_concurrency kg_sched->skg_concurrency - -#define SLOT_RELEASE(kg) \ -do { \ - kg->kg_avail_opennings++; \ - CTR3(KTR_RUNQ, "kg %p(%d) Slot released (->%d)", \ - kg, \ - kg->kg_concurrency, \ - kg->kg_avail_opennings); \ -/* KASSERT((kg->kg_avail_opennings <= kg->kg_concurrency), \ - ("slots out of whack"));*/ \ -} while (0) - -#define SLOT_USE(kg) \ -do { \ - kg->kg_avail_opennings--; \ - CTR3(KTR_RUNQ, "kg %p(%d) Slot used (->%d)", \ - kg, \ - kg->kg_concurrency, \ - kg->kg_avail_opennings); \ -/* KASSERT((kg->kg_avail_opennings >= 0), \ - ("slots out of whack"));*/ \ -} while (0) - /* * KSE_CAN_MIGRATE macro returns true if the kse can migrate between * cpus. @@ -151,7 +114,6 @@ ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) static struct kse kse0; -static struct kg_sched kg_sched0; static int sched_tdcnt; /* Total runnable threads in the system. */ static int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ @@ -159,8 +121,7 @@ static struct callout roundrobin_callout; -static void slot_fill(struct ksegrp *kg); -static struct kse *sched_choose(void); /* XXX Should be thread * */ +static struct thread *sched_choose(void); static void setup_runqs(void); static void roundrobin(void *arg); @@ -169,9 +130,9 @@ static void sched_priority(struct thread *td, u_char prio); static void sched_setup(void *dummy); static void maybe_resched(struct thread *td); -static void updatepri(struct ksegrp *kg); -static void resetpriority(struct ksegrp *kg); -static void resetpriority_thread(struct thread *td, struct ksegrp *kg); +static void updatepri(struct thread *td); +static void resetpriority(struct thread *td); +static void resetpriority_thread(struct thread *td); #ifdef SMP static int forward_wakeup(int cpunum); #endif @@ -274,20 +235,6 @@ "account for htt"); #endif -static int sched_followon = 0; -SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW, - &sched_followon, 0, - "allow threads to share a quantum"); - -static int sched_pfollowons = 0; -SYSCTL_INT(_kern_sched, OID_AUTO, pfollowons, CTLFLAG_RD, - &sched_pfollowons, 0, - "number of followons done to a different ksegrp"); - -static int sched_kgfollowons = 0; -SYSCTL_INT(_kern_sched, OID_AUTO, kgfollowons, CTLFLAG_RD, - &sched_kgfollowons, 0, - "number of followons done in a ksegrp"); static __inline void sched_load_add(void) @@ -338,20 +285,20 @@ /* * Constants for digital decay and forget: - * 90% of (kg_estcpu) usage in 5 * loadav time + * 90% of (td_estcpu) usage in 5 * loadav time * 95% of (ke_pctcpu) usage in 60 seconds (load insensitive) * Note that, as ps(1) mentions, this can let percentages * total over 100% (I've seen 137.9% for 3 processes). * - * Note that schedclock() updates kg_estcpu and p_cpticks asynchronously. + * Note that schedclock() updates td_estcpu and p_cpticks asynchronously. * - * We wish to decay away 90% of kg_estcpu in (5 * loadavg) seconds. + * We wish to decay away 90% of td_estcpu in (5 * loadavg) seconds. * That is, the system wants to compute a value of decay such * that the following for loop: * for (i = 0; i < (5 * loadavg); i++) - * kg_estcpu *= decay; + * td_estcpu *= decay; * will compute - * kg_estcpu *= 0.1; + * td_estcpu *= 0.1; * for all values of loadavg: * * Mathematically this loop can be expressed by saying: @@ -434,7 +381,6 @@ struct thread *td; struct proc *p; struct kse *ke; - struct ksegrp *kg; int awake, realstathz; realstathz = stathz ? stathz : hz; @@ -449,63 +395,62 @@ * 16-bit int's (remember them?) overflow takes 45 days. */ p->p_swtime++; - FOREACH_KSEGRP_IN_PROC(p, kg) { + FOREACH_THREAD_IN_PROC(p, td) { awake = 0; - FOREACH_THREAD_IN_GROUP(kg, td) { - ke = td->td_kse; - /* - * Increment sleep time (if sleeping). We - * ignore overflow, as above. - */ - /* - * The kse slptimes are not touched in wakeup - * because the thread may not HAVE a KSE. - */ - if (ke->ke_state == KES_ONRUNQ) { - awake = 1; - ke->ke_flags &= ~KEF_DIDRUN; - } else if ((ke->ke_state == KES_THREAD) && - (TD_IS_RUNNING(td))) { - awake = 1; - /* Do not clear KEF_DIDRUN */ - } else if (ke->ke_flags & KEF_DIDRUN) { - awake = 1; - ke->ke_flags &= ~KEF_DIDRUN; - } + ke = td->td_kse; + /* + * Increment sleep time (if sleeping). We + * ignore overflow, as above. + */ + /* + * The kse slptimes are not touched in wakeup + * because the thread may not HAVE a KSE. + */ + if (ke->ke_state == KES_ONRUNQ) { + awake = 1; + ke->ke_flags &= ~KEF_DIDRUN; + } else if ((ke->ke_state == KES_THREAD) && + (TD_IS_RUNNING(td))) { + awake = 1; + /* Do not clear KEF_DIDRUN */ + } else if (ke->ke_flags & KEF_DIDRUN) { + awake = 1; + ke->ke_flags &= ~KEF_DIDRUN; + } - /* - * ke_pctcpu is only for ps and ttyinfo(). - * Do it per kse, and add them up at the end? - * XXXKSE - */ - ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >> - FSHIFT; - /* - * If the kse has been idle the entire second, - * stop recalculating its priority until - * it wakes up. - */ - if (ke->ke_cpticks == 0) - continue; + /* + * ke_pctcpu is only for ps and ttyinfo(). + * Do it per kse, and add them up at the end? + * XXXKSE + */ + ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >> + FSHIFT; + /* + * If the kse has been idle the entire second, + * stop recalculating its priority until + * it wakes up. + */ + if (ke->ke_cpticks == 0) + continue; #if (FSHIFT >= CCPU_SHIFT) - ke->ke_pctcpu += (realstathz == 100) - ? ((fixpt_t) ke->ke_cpticks) << - (FSHIFT - CCPU_SHIFT) : - 100 * (((fixpt_t) ke->ke_cpticks) - << (FSHIFT - CCPU_SHIFT)) / realstathz; + ke->ke_pctcpu += (realstathz == 100) + ? ((fixpt_t) ke->ke_cpticks) << + (FSHIFT - CCPU_SHIFT) : + 100 * (((fixpt_t) ke->ke_cpticks) + << (FSHIFT - CCPU_SHIFT)) / realstathz; #else - ke->ke_pctcpu += ((FSCALE - ccpu) * - (ke->ke_cpticks * - FSCALE / realstathz)) >> FSHIFT; + ke->ke_pctcpu += ((FSCALE - ccpu) * + (ke->ke_cpticks * + FSCALE / realstathz)) >> FSHIFT; #endif - ke->ke_cpticks = 0; - } /* end of kse loop */ + ke->ke_cpticks = 0; + /* - * If there are ANY running threads in this KSEGRP, + * If there are ANY running threads in this process, * then don't count it as sleeping. */ if (awake) { - if (kg->kg_slptime > 1) { + if (td->td_slptime > 1) { /* * In an ideal world, this should not * happen, because whoever woke us @@ -515,19 +460,17 @@ * priority. Should KASSERT at some * point when all the cases are fixed. */ - updatepri(kg); + updatepri(td); } - kg->kg_slptime = 0; + td->td_slptime = 0; } else - kg->kg_slptime++; - if (kg->kg_slptime > 1) + td->td_slptime++; + if (td->td_slptime > 1) continue; - kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu); - resetpriority(kg); - FOREACH_THREAD_IN_GROUP(kg, td) { - resetpriority_thread(td, kg); - } - } /* end of ksegrp loop */ + td->td_estcpu = decay_cpu(loadfac, td->td_estcpu); + resetpriority(td); + resetpriority_thread(td); + } /* end of thread loop */ mtx_unlock_spin(&sched_lock); } /* end of process loop */ sx_sunlock(&allproc_lock); @@ -549,24 +492,24 @@ /* * Recalculate the priority of a process after it has slept for a while. - * For all load averages >= 1 and max kg_estcpu of 255, sleeping for at - * least six times the loadfactor will decay kg_estcpu to zero. + * For all load averages >= 1 and max td_estcpu of 255, sleeping for at + * least six times the loadfactor will decay td_estcpu to zero. */ static void -updatepri(struct ksegrp *kg) +updatepri(struct thread *td) { register fixpt_t loadfac; register unsigned int newcpu; loadfac = loadfactor(averunnable.ldavg[0]); - if (kg->kg_slptime > 5 * loadfac) - kg->kg_estcpu = 0; + if (td->td_slptime > 5 * loadfac) + td->td_estcpu = 0; else { - newcpu = kg->kg_estcpu; - kg->kg_slptime--; /* was incremented in schedcpu() */ - while (newcpu && --kg->kg_slptime) + newcpu = td->td_estcpu; + td->td_slptime--; /* was incremented in schedcpu() */ + while (newcpu && --td->td_slptime) newcpu = decay_cpu(loadfac, newcpu); - kg->kg_estcpu = newcpu; + td->td_estcpu = newcpu; } } @@ -576,16 +519,16 @@ * than that of the current process. */ static void -resetpriority(struct ksegrp *kg) +resetpriority(struct thread *td) { register unsigned int newpriority; - if (kg->kg_pri_class == PRI_TIMESHARE) { - newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT + - NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN); + if (td->td_pri_class == PRI_TIMESHARE) { + newpriority = PUSER + td->td_estcpu / INVERSE_ESTCPU_WEIGHT + + NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN); newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), PRI_MAX_TIMESHARE); - kg->kg_user_pri = newpriority; + td->td_user_pri = newpriority; } } @@ -594,7 +537,7 @@ * priority changes. */ static void -resetpriority_thread(struct thread *td, struct ksegrp *kg) +resetpriority_thread(struct thread *td) { /* Only change threads with a time sharing user priority. */ @@ -605,7 +548,7 @@ /* XXX the whole needresched thing is broken, but not silly. */ maybe_resched(td); - sched_prio(td, kg->kg_user_pri); + sched_prio(td, td->td_user_pri); } /* ARGSUSED */ @@ -641,12 +584,9 @@ * Set up the scheduler specific parts of proc0. */ proc0.p_sched = NULL; /* XXX */ - ksegrp0.kg_sched = &kg_sched0; thread0.td_sched = &kse0; kse0.ke_thread = &thread0; kse0.ke_state = KES_THREAD; - kg_sched0.skg_concurrency = 1; - kg_sched0.skg_avail_opennings = 0; /* we are already running */ } int @@ -670,8 +610,8 @@ /* * We adjust the priority of the current process. The priority of * a process gets worse as it accumulates CPU time. The cpu usage - * estimator (kg_estcpu) is increased here. resetpriority() will - * compute a different priority each time kg_estcpu increases by + * estimator (td_estcpu) is increased here. resetpriority() will + * compute a different priority each time td_estcpu increases by * INVERSE_ESTCPU_WEIGHT * (until MAXPRI is reached). The cpu usage estimator ramps up * quite quickly when the process is running (linearly), and decays @@ -684,102 +624,65 @@ void sched_clock(struct thread *td) { - struct ksegrp *kg; struct kse *ke; mtx_assert(&sched_lock, MA_OWNED); - kg = td->td_ksegrp; ke = td->td_kse; ke->ke_cpticks++; - kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1); - if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { - resetpriority(kg); - resetpriority_thread(td, kg); + td->td_estcpu = ESTCPULIM(td->td_estcpu + 1); + if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { + resetpriority(td); + resetpriority_thread(td); } } /* * charge childs scheduling cpu usage to parent. - * - * XXXKSE assume only one thread & kse & ksegrp keep estcpu in each ksegrp. - * Charge it to the ksegrp that did the wait since process estcpu is sum of - * all ksegrps, this is strictly as expected. Assume that the child process - * aggregated all the estcpu into the 'built-in' ksegrp. */ void sched_exit(struct proc *p, struct thread *td) { - sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); - sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); -} + struct thread *parent = FIRST_THREAD_IN_PROC(p); -void -sched_exit_ksegrp(struct ksegrp *kg, struct thread *childtd) -{ + CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", + td, td->td_proc->p_comm, td->td_priority); - mtx_assert(&sched_lock, MA_OWNED); - kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + childtd->td_ksegrp->kg_estcpu); -} - -void -sched_exit_thread(struct thread *td, struct thread *child) -{ - CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", - child, child->td_proc->p_comm, child->td_priority); - if ((child->td_proc->p_flag & P_NOLOAD) == 0) + parent->td_estcpu = ESTCPULIM(parent->td_estcpu + td->td_estcpu); + if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_load_rem(); } void sched_fork(struct thread *td, struct thread *childtd) { - sched_fork_ksegrp(td, childtd->td_ksegrp); - sched_fork_thread(td, childtd); -} - -void -sched_fork_ksegrp(struct thread *td, struct ksegrp *child) -{ - mtx_assert(&sched_lock, MA_OWNED); - child->kg_estcpu = td->td_ksegrp->kg_estcpu; -} - -void -sched_fork_thread(struct thread *td, struct thread *childtd) -{ + childtd->td_estcpu = td->td_estcpu; sched_newthread(childtd); } void sched_nice(struct proc *p, int nice) { - struct ksegrp *kg; struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&sched_lock, MA_OWNED); p->p_nice = nice; - FOREACH_KSEGRP_IN_PROC(p, kg) { - resetpriority(kg); - FOREACH_THREAD_IN_GROUP(kg, td) { - resetpriority_thread(td, kg); - } + FOREACH_THREAD_IN_PROC(p, td) { + resetpriority(td); + resetpriority_thread(td); } } void -sched_class(struct ksegrp *kg, int class) +sched_class(struct thread *td, int class) { mtx_assert(&sched_lock, MA_OWNED); - kg->kg_pri_class = class; + td->td_pri_class = class; } /* * Adjust the priority of a thread. - * This may include moving the thread within the KSEGRP, - * changing the assignment of a kse to the thread, - * and moving a KSE in the system run queue. */ static void sched_priority(struct thread *td, u_char prio) @@ -825,7 +728,7 @@ if (td->td_base_pri >= PRI_MIN_TIMESHARE && td->td_base_pri <= PRI_MAX_TIMESHARE) - base_pri = td->td_ksegrp->kg_user_pri; + base_pri = td->td_user_pri; else base_pri = td->td_base_pri; if (prio >= base_pri) { @@ -867,16 +770,13 @@ { mtx_assert(&sched_lock, MA_OWNED); - td->td_ksegrp->kg_slptime = 0; + td->td_slptime = 0; } -static void remrunqueue(struct thread *td); - void sched_switch(struct thread *td, struct thread *newtd, int flags) { struct kse *ke; - struct ksegrp *kg; struct proc *p; ke = td->td_kse; @@ -886,32 +786,6 @@ if ((p->p_flag & P_NOLOAD) == 0) sched_load_rem(); - /* - * We are volunteering to switch out so we get to nominate - * a successor for the rest of our quantum - * First try another thread in our ksegrp, and then look for - * other ksegrps in our process. - */ - if (sched_followon && - (p->p_flag & P_HADTHREADS) && - (flags & SW_VOL) && - newtd == NULL) { - /* lets schedule another thread from this process */ - kg = td->td_ksegrp; - if ((newtd = TAILQ_FIRST(&kg->kg_runq))) { - remrunqueue(newtd); - sched_kgfollowons++; - } else { - FOREACH_KSEGRP_IN_PROC(p, kg) { - if ((newtd = TAILQ_FIRST(&kg->kg_runq))) { - sched_pfollowons++; - remrunqueue(newtd); - break; - } - } - } - } - if (newtd) newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED); @@ -928,21 +802,11 @@ if (td == PCPU_GET(idlethread)) TD_SET_CAN_RUN(td); else { - SLOT_RELEASE(td->td_ksegrp); if (TD_IS_RUNNING(td)) { /* Put us back on the run queue (kse and all). */ setrunqueue(td, (flags & SW_PREEMPT) ? SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : SRQ_OURSELF|SRQ_YIELDING); - } else if (p->p_flag & P_HADTHREADS) { - /* - * We will not be on the run queue. So we must be - * sleeping or similar. As it's available, - * someone else can use the KSE if they need it. - * It's NOT available if we are about to need it - */ - if (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp) - slot_fill(td->td_ksegrp); } } if (newtd) { @@ -951,12 +815,10 @@ * as if it had been added to the run queue and selected. * It came from: * * A preemption - * * An upcall * * A followon */ KASSERT((newtd->td_inhibitors == 0), ("trying to run inhibitted thread")); - SLOT_USE(newtd->td_ksegrp); newtd->td_kse->ke_flags |= KEF_DIDRUN; TD_SET_RUNNING(newtd); if ((newtd->td_proc->p_flag & P_NOLOAD) == 0) @@ -984,15 +846,13 @@ void sched_wakeup(struct thread *td) { - struct ksegrp *kg; mtx_assert(&sched_lock, MA_OWNED); - kg = td->td_ksegrp; - if (kg->kg_slptime > 1) { - updatepri(kg); - resetpriority(kg); + if (td->td_slptime > 1) { + updatepri(td); + resetpriority(td); } - kg->kg_slptime = 0; + td->td_slptime = 0; setrunqueue(td, SRQ_BORING); } @@ -1132,14 +992,13 @@ mtx_assert(&sched_lock, MA_OWNED); KASSERT(ke->ke_state != KES_ONRUNQ, ("sched_add: kse %p (%s) already in run queue", ke, - ke->ke_proc->p_comm)); - KASSERT(ke->ke_proc->p_sflag & PS_INMEM, + td->td_proc->p_comm)); + KASSERT(td->td_proc->p_sflag & PS_INMEM, ("sched_add: process swapped out")); CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, curthread, curthread->td_proc->p_comm); - if (td->td_pinned != 0) { cpu = td->td_lastcpu; ke->ke_runq = &runq_pcpu[cpu]; @@ -1163,7 +1022,6 @@ if (single_cpu && (cpu != PCPU_GET(cpuid))) { kick_other_cpu(td->td_priority,cpu); } else { - if (!single_cpu) { cpumask_t me = PCPU_GET(cpumask); int idle = idle_cpus_mask & me; @@ -1172,7 +1030,6 @@ (idle_cpus_mask & ~(hlt_cpus_mask | me))) forwarded = forward_wakeup(cpu); } - if (!forwarded) { if ((flags & SRQ_YIELDING) == 0 && maybe_preempt(td)) return; @@ -1183,7 +1040,6 @@ if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_load_add(); - SLOT_USE(td->td_ksegrp); runq_add(ke->ke_runq, ke, flags); ke->ke_state = KES_ONRUNQ; } @@ -1194,8 +1050,8 @@ mtx_assert(&sched_lock, MA_OWNED); KASSERT(ke->ke_state != KES_ONRUNQ, ("sched_add: kse %p (%s) already in run queue", ke, - ke->ke_proc->p_comm)); - KASSERT(ke->ke_proc->p_sflag & PS_INMEM, + td->td_proc->p_comm)); + KASSERT(td->td_proc->p_sflag & PS_INMEM, ("sched_add: process swapped out")); CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, curthread, @@ -1220,7 +1076,6 @@ } if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_load_add(); - SLOT_USE(td->td_ksegrp); runq_add(ke->ke_runq, ke, flags); ke->ke_state = KES_ONRUNQ; maybe_resched(td); @@ -1233,7 +1088,7 @@ struct kse *ke; ke = td->td_kse; - KASSERT(ke->ke_proc->p_sflag & PS_INMEM, + KASSERT(td->td_proc->p_sflag & PS_INMEM, ("sched_rem: process swapped out")); KASSERT((ke->ke_state == KES_ONRUNQ), ("sched_rem: KSE not on run queue")); @@ -1244,9 +1099,7 @@ if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_load_rem(); - SLOT_RELEASE(td->td_ksegrp); runq_remove(ke->ke_runq, ke); - ke->ke_state = KES_THREAD; } @@ -1254,7 +1107,7 @@ * Select threads to run. * Notice that the running threads still consume a slot. */ -struct kse * +struct thread * sched_choose(void) { struct kse *ke; @@ -1283,38 +1136,23 @@ ke = runq_choose(&runq); #endif - if (ke != NULL) { + if (ke) { runq_remove(rq, ke); ke->ke_state = KES_THREAD; - KASSERT(ke->ke_proc->p_sflag & PS_INMEM, + KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM, ("sched_choose: process swapped out")); + return (ke->ke_thread); } - return (ke); + return (NULL); } void sched_userret(struct thread *td) { - struct ksegrp *kg; - /* - * XXX we cheat slightly on the locking here to avoid locking in - * the usual case. Setting td_priority here is essentially an - * incomplete workaround for not setting it properly elsewhere. - * Now that some interrupt handlers are threads, not setting it - * properly elsewhere can clobber it in the window between setting - * it here and returning to user mode, so don't waste time setting - * it perfectly here. - */ + KASSERT((td->td_flags & TDF_BORROWING) == 0, ("thread with borrowed priority returning to userland")); - kg = td->td_ksegrp; - if (td->td_priority != kg->kg_user_pri) { - mtx_lock_spin(&sched_lock); - td->td_priority = kg->kg_user_pri; - td->td_base_pri = kg->kg_user_pri; - mtx_unlock_spin(&sched_lock); - } } void @@ -1325,17 +1163,13 @@ mtx_assert(&sched_lock, MA_OWNED); KASSERT(TD_IS_RUNNING(td), ("sched_bind: cannot bind non-running thread")); - ke = td->td_kse; - ke->ke_flags |= KEF_BOUND; #ifdef SMP ke->ke_runq = &runq_pcpu[cpu]; if (PCPU_GET(cpuid) == cpu) return; - ke->ke_state = KES_THREAD; - mi_switch(SW_VOL, NULL); #endif } @@ -1343,6 +1177,7 @@ void sched_unbind(struct thread* td) { + mtx_assert(&sched_lock, MA_OWNED); td->td_kse->ke_flags &= ~KEF_BOUND; } @@ -1350,6 +1185,7 @@ int sched_is_bound(struct thread *td) { + mtx_assert(&sched_lock, MA_OWNED); return (td->td_kse->ke_flags & KEF_BOUND); } @@ -1357,22 +1193,20 @@ int sched_load(void) { + return (sched_tdcnt); } int -sched_sizeof_ksegrp(void) -{ - return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); -} -int sched_sizeof_proc(void) { + return (sizeof(struct proc)); } int sched_sizeof_thread(void) { + return (sizeof(struct thread) + sizeof(struct kse)); } Index: kern/sched_ule.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sched_ule.c,v retrieving revision 1.161 diff -u -r1.161 sched_ule.c --- kern/sched_ule.c 27 Dec 2005 12:02:03 -0000 1.161 +++ kern/sched_ule.c 4 Jun 2006 21:12:05 -0000 @@ -94,7 +94,7 @@ * The schedulable entity that can be given a context to run. A process may * have several of these. */ -struct kse { +struct td_sched { /* really kse */ TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ int ke_flags; /* (j) KEF_* flags. */ struct thread *ke_thread; /* (*) Active associated thread. */ @@ -113,11 +113,11 @@ int ke_ftick; /* First tick that we were running on */ int ke_ticks; /* Tick count */ + /* originally from kg_sched */ + int skg_slptime; /* Number of ticks we vol. slept */ + int skg_runtime; /* Number of ticks we were running */ }; #define td_kse td_sched -#define td_slptime td_kse->ke_slptime -#define ke_proc ke_thread->td_proc -#define ke_ksegrp ke_thread->td_ksegrp #define ke_assign ke_procq.tqe_next /* flags kept in ke_flags */ #define KEF_ASSIGNED 0x0001 /* Thread is being migrated. */ @@ -130,25 +130,7 @@ #define KEF_DIDRUN 0x02000 /* Thread actually ran. */ #define KEF_EXIT 0x04000 /* Thread is being killed. */ -struct kg_sched { - struct thread *skg_last_assigned; /* (j) Last thread assigned to */ - /* the system scheduler */ - int skg_slptime; /* Number of ticks we vol. slept */ - int skg_runtime; /* Number of ticks we were running */ - int skg_avail_opennings; /* (j) Num unfilled slots in group.*/ - int skg_concurrency; /* (j) Num threads requested in group.*/ -}; -#define kg_last_assigned kg_sched->skg_last_assigned -#define kg_avail_opennings kg_sched->skg_avail_opennings -#define kg_concurrency kg_sched->skg_concurrency -#define kg_runtime kg_sched->skg_runtime -#define kg_slptime kg_sched->skg_slptime - -#define SLOT_RELEASE(kg) (kg)->kg_avail_opennings++ -#define SLOT_USE(kg) (kg)->kg_avail_opennings-- - static struct kse kse0; -static struct kg_sched kg_sched0; /* * The priority is primarily determined by the interactivity score. Thus, we @@ -206,11 +188,11 @@ * This macro determines whether or not the thread belongs on the current or * next run queue. */ -#define SCHED_INTERACTIVE(kg) \ - (sched_interact_score(kg) < SCHED_INTERACT_THRESH) -#define SCHED_CURR(kg, ke) \ +#define SCHED_INTERACTIVE(td) \ + (sched_interact_score(td) < SCHED_INTERACT_THRESH) +#define SCHED_CURR(td, ke) \ ((ke->ke_thread->td_flags & TDF_BORROWING) || \ - (ke->ke_flags & KEF_PREEMPTED) || SCHED_INTERACTIVE(kg)) + (ke->ke_flags & KEF_PREEMPTED) || SCHED_INTERACTIVE(td)) /* * Cpu percentage computation macros and defines. @@ -287,14 +269,13 @@ #define KSEQ_CPU(x) (&kseq_cpu) #endif -static void slot_fill(struct ksegrp *); static struct kse *sched_choose(void); /* XXX Should be thread * */ static void sched_slice(struct kse *); -static void sched_priority(struct ksegrp *); +static void sched_priority(struct thread *); static void sched_thread_priority(struct thread *, u_char); -static int sched_interact_score(struct ksegrp *); -static void sched_interact_update(struct ksegrp *); -static void sched_interact_fork(struct ksegrp *); +static int sched_interact_score(struct thread *); +static void sched_interact_update(struct thread *); +static void sched_interact_fork(struct thread *); static void sched_pctcpu_update(struct kse *); /* Operations on per processor queues */ @@ -378,19 +359,19 @@ { int class; mtx_assert(&sched_lock, MA_OWNED); - class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); + class = PRI_BASE(ke->ke_thread->td_pri_class); if (class == PRI_TIMESHARE) kseq->ksq_load_timeshare++; kseq->ksq_load++; CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); - if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) + if (class != PRI_ITHD && (ke->ke_thread->td_proc->p_flag & P_NOLOAD) == 0) #ifdef SMP kseq->ksq_group->ksg_load++; #else kseq->ksq_sysload++; #endif - if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) - kseq_nice_add(kseq, ke->ke_proc->p_nice); + if (ke->ke_thread->td_pri_class == PRI_TIMESHARE) + kseq_nice_add(kseq, ke->ke_thread->td_proc->p_nice); } static void @@ -398,10 +379,10 @@ { int class; mtx_assert(&sched_lock, MA_OWNED); - class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); + class = PRI_BASE(ke->ke_thread->td_pri_class); if (class == PRI_TIMESHARE) kseq->ksq_load_timeshare--; - if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) + if (class != PRI_ITHD && (ke->ke_thread->td_proc->p_flag & P_NOLOAD) == 0) #ifdef SMP kseq->ksq_group->ksg_load--; #else @@ -410,8 +391,8 @@ kseq->ksq_load--; CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); ke->ke_runq = NULL; - if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) - kseq_nice_rem(kseq, ke->ke_proc->p_nice); + if (ke->ke_thread->td_pri_class == PRI_TIMESHARE) + kseq_nice_rem(kseq, ke->ke_thread->td_proc->p_nice); } static void @@ -685,7 +666,7 @@ kseq = KSEQ_CPU(cpu); /* XXX */ - class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); + class = PRI_BASE(ke->ke_thread->td_pri_class); if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && (kseq_idle & kseq->ksq_group->ksg_mask)) atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); @@ -888,10 +869,10 @@ * TIMESHARE kse group and its nice was too far out * of the range that receives slices. */ - nice = ke->ke_proc->p_nice + (0 - kseq->ksq_nicemin); + nice = ke->ke_thread->td_proc->p_nice + (0 - kseq->ksq_nicemin); #if 0 if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && - ke->ke_proc->p_nice != 0)) { + ke->ke_thread->td_proc->p_nice != 0)) { runq_remove(ke->ke_runq, ke); sched_slice(ke); ke->ke_runq = kseq->ksq_next; @@ -1044,41 +1025,41 @@ * process. */ static void -sched_priority(struct ksegrp *kg) +sched_priority(struct thread *td) { int pri; - if (kg->kg_pri_class != PRI_TIMESHARE) + if (td->td_pri_class != PRI_TIMESHARE) return; - pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); + pri = SCHED_PRI_INTERACT(sched_interact_score(td)); pri += SCHED_PRI_BASE; - pri += kg->kg_proc->p_nice; + pri += td->td_proc->p_nice; if (pri > PRI_MAX_TIMESHARE) pri = PRI_MAX_TIMESHARE; else if (pri < PRI_MIN_TIMESHARE) pri = PRI_MIN_TIMESHARE; - kg->kg_user_pri = pri; + td->td_user_pri = pri; return; } /* * Calculate a time slice based on the properties of the kseg and the runq - * that we're on. This is only for PRI_TIMESHARE ksegrps. + * that we're on. This is only for PRI_TIMESHARE threads. */ static void sched_slice(struct kse *ke) { struct kseq *kseq; - struct ksegrp *kg; + struct thread *td; - kg = ke->ke_ksegrp; + td = ke->ke_thread; kseq = KSEQ_CPU(ke->ke_cpu); - if (ke->ke_thread->td_flags & TDF_BORROWING) { + if (td->td_flags & TDF_BORROWING) { ke->ke_slice = SCHED_SLICE_MIN; return; } @@ -1098,7 +1079,7 @@ * * There is 20 point window that starts relative to the least * nice kse on the run queue. Slice size is determined by - * the kse distance from the last nice ksegrp. + * the kse distance from the last nice thread. * * If the kse is outside of the window it will get no slice * and will be reevaluated each time it is selected on the @@ -1106,16 +1087,16 @@ * a nice -20 is running. They are always granted a minimum * slice. */ - if (!SCHED_INTERACTIVE(kg)) { + if (!SCHED_INTERACTIVE(td)) { int nice; - nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); + nice = td->td_proc->p_nice + (0 - kseq->ksq_nicemin); if (kseq->ksq_load_timeshare == 0 || - kg->kg_proc->p_nice < kseq->ksq_nicemin) + td->td_proc->p_nice < kseq->ksq_nicemin) ke->ke_slice = SCHED_SLICE_MAX; else if (nice <= SCHED_SLICE_NTHRESH) ke->ke_slice = SCHED_SLICE_NICE(nice); - else if (kg->kg_proc->p_nice == 0) + else if (td->td_proc->p_nice == 0) ke->ke_slice = SCHED_SLICE_MIN; else ke->ke_slice = SCHED_SLICE_MIN; /* 0 */ @@ -1132,11 +1113,11 @@ * adjusted to more than double their maximum. */ static void -sched_interact_update(struct ksegrp *kg) +sched_interact_update(struct thread *td) { int sum; - sum = kg->kg_runtime + kg->kg_slptime; + sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; if (sum < SCHED_SLP_RUN_MAX) return; /* @@ -1145,40 +1126,40 @@ * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] */ if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { - kg->kg_runtime /= 2; - kg->kg_slptime /= 2; + td->td_sched->skg_runtime /= 2; + td->td_sched->skg_slptime /= 2; return; } - kg->kg_runtime = (kg->kg_runtime / 5) * 4; - kg->kg_slptime = (kg->kg_slptime / 5) * 4; + td->td_sched->skg_runtime = (td->td_sched->skg_runtime / 5) * 4; + td->td_sched->skg_slptime = (td->td_sched->skg_slptime / 5) * 4; } static void -sched_interact_fork(struct ksegrp *kg) +sched_interact_fork(struct thread *td) { int ratio; int sum; - sum = kg->kg_runtime + kg->kg_slptime; + sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; if (sum > SCHED_SLP_RUN_FORK) { ratio = sum / SCHED_SLP_RUN_FORK; - kg->kg_runtime /= ratio; - kg->kg_slptime /= ratio; + td->td_sched->skg_runtime /= ratio; + td->td_sched->skg_slptime /= ratio; } } static int -sched_interact_score(struct ksegrp *kg) +sched_interact_score(struct thread *td) { int div; - if (kg->kg_runtime > kg->kg_slptime) { - div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); + if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { + div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); return (SCHED_INTERACT_HALF + - (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); - } if (kg->kg_slptime > kg->kg_runtime) { - div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); - return (kg->kg_runtime / div); + (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); + } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { + div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); + return (td->td_sched->skg_runtime / div); } /* @@ -1201,12 +1182,9 @@ * Set up the scheduler specific parts of proc0. */ proc0.p_sched = NULL; /* XXX */ - ksegrp0.kg_sched = &kg_sched0; thread0.td_sched = &kse0; kse0.ke_thread = &thread0; kse0.ke_state = KES_THREAD; - kg_sched0.skg_concurrency = 1; - kg_sched0.skg_avail_opennings = 0; /* we are already running */ } /* @@ -1306,7 +1284,7 @@ if (td->td_base_pri >= PRI_MIN_TIMESHARE && td->td_base_pri <= PRI_MAX_TIMESHARE) - base_pri = td->td_ksegrp->kg_user_pri; + base_pri = td->td_user_pri; else base_pri = td->td_base_pri; if (prio >= base_pri) { @@ -1367,7 +1345,6 @@ TD_SET_CAN_RUN(td); } else if ((ke->ke_flags & KEF_ASSIGNED) == 0) { /* We are ending our run so make our slot available again */ - SLOT_RELEASE(td->td_ksegrp); kseq_load_rem(ksq, ke); if (TD_IS_RUNNING(td)) { /* @@ -1379,15 +1356,7 @@ SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : SRQ_OURSELF|SRQ_YIELDING); ke->ke_flags &= ~KEF_HOLD; - } else if ((td->td_proc->p_flag & P_HADTHREADS) && - (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp)) - /* - * We will not be on the run queue. - * So we must be sleeping or similar. - * Don't use the slot if we will need it - * for newtd. - */ - slot_fill(td->td_ksegrp); + } } if (newtd != NULL) { /* @@ -1398,15 +1367,6 @@ newtd->td_kse->ke_runq = ksq->ksq_curr; TD_SET_RUNNING(newtd); kseq_load_add(KSEQ_SELF(), newtd->td_kse); - /* - * XXX When we preempt, we've already consumed a slot because - * we got here through sched_add(). However, newtd can come - * from thread_switchout() which can't SLOT_USE() because - * the SLOT code is scheduler dependent. We must use the - * slot here otherwise. - */ - if ((flags & SW_PREEMPT) == 0) - SLOT_USE(newtd->td_ksegrp); } else newtd = choosethread(); if (td != newtd) { @@ -1429,7 +1389,6 @@ void sched_nice(struct proc *p, int nice) { - struct ksegrp *kg; struct kse *ke; struct thread *td; struct kseq *kseq; @@ -1439,23 +1398,20 @@ /* * We need to adjust the nice counts for running KSEs. */ - FOREACH_KSEGRP_IN_PROC(p, kg) { - if (kg->kg_pri_class == PRI_TIMESHARE) { - FOREACH_THREAD_IN_GROUP(kg, td) { - ke = td->td_kse; - if (ke->ke_runq == NULL) - continue; - kseq = KSEQ_CPU(ke->ke_cpu); - kseq_nice_rem(kseq, p->p_nice); - kseq_nice_add(kseq, nice); - } + FOREACH_THREAD_IN_PROC(p, td) { + if (td->td_pri_class == PRI_TIMESHARE) { + ke = td->td_kse; + if (ke->ke_runq == NULL) + continue; + kseq = KSEQ_CPU(ke->ke_cpu); + kseq_nice_rem(kseq, p->p_nice); + kseq_nice_add(kseq, nice); } } p->p_nice = nice; - FOREACH_KSEGRP_IN_PROC(p, kg) { - sched_priority(kg); - FOREACH_THREAD_IN_GROUP(kg, td) - td->td_flags |= TDF_NEEDRESCHED; + FOREACH_THREAD_IN_PROC(p, td) { + sched_priority(td); + td->td_flags |= TDF_NEEDRESCHED; } } @@ -1464,7 +1420,7 @@ { mtx_assert(&sched_lock, MA_OWNED); - td->td_slptime = ticks; + td->td_kse->ke_slptime = ticks; } void @@ -1476,22 +1432,20 @@ * Let the kseg know how long we slept for. This is because process * interactivity behavior is modeled in the kseg. */ - if (td->td_slptime) { - struct ksegrp *kg; + if (td->td_kse->ke_slptime) { int hzticks; - kg = td->td_ksegrp; - hzticks = (ticks - td->td_slptime) << 10; + hzticks = (ticks - td->td_kse->ke_slptime) << 10; if (hzticks >= SCHED_SLP_RUN_MAX) { - kg->kg_slptime = SCHED_SLP_RUN_MAX; - kg->kg_runtime = 1; + td->td_sched->skg_slptime = SCHED_SLP_RUN_MAX; + td->td_sched->skg_runtime = 1; } else { - kg->kg_slptime += hzticks; - sched_interact_update(kg); + td->td_sched->skg_slptime += hzticks; + sched_interact_update(td); } - sched_priority(kg); + sched_priority(td); sched_slice(td->td_kse); - td->td_slptime = 0; + td->td_kse->ke_slptime = 0; } setrunqueue(td, SRQ_BORING); } @@ -1501,36 +1455,22 @@ * priority. */ void -sched_fork(struct thread *td, struct thread *childtd) +sched_fork(struct thread *td, struct thread *child) { + struct kse *ke; + struct kse *ke2; mtx_assert(&sched_lock, MA_OWNED); - sched_fork_ksegrp(td, childtd->td_ksegrp); - sched_fork_thread(td, childtd); -} - -void -sched_fork_ksegrp(struct thread *td, struct ksegrp *child) -{ - struct ksegrp *kg = td->td_ksegrp; - mtx_assert(&sched_lock, MA_OWNED); - - child->kg_slptime = kg->kg_slptime; - child->kg_runtime = kg->kg_runtime; - child->kg_user_pri = kg->kg_user_pri; + child->td_sched->skg_slptime = td->td_sched->skg_slptime; + child->td_sched->skg_runtime = td->td_sched->skg_runtime; + child->td_user_pri = td->td_user_pri; sched_interact_fork(child); - kg->kg_runtime += tickincr; - sched_interact_update(kg); -} - -void -sched_fork_thread(struct thread *td, struct thread *child) -{ - struct kse *ke; - struct kse *ke2; + td->td_sched->skg_runtime += tickincr; + sched_interact_update(td); sched_newthread(child); + ke = td->td_kse; ke2 = child->td_kse; ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ @@ -1544,55 +1484,52 @@ } void -sched_class(struct ksegrp *kg, int class) +sched_class(struct thread *td, int class) { struct kseq *kseq; struct kse *ke; - struct thread *td; int nclass; int oclass; mtx_assert(&sched_lock, MA_OWNED); - if (kg->kg_pri_class == class) + if (td->td_pri_class == class) return; nclass = PRI_BASE(class); - oclass = PRI_BASE(kg->kg_pri_class); - FOREACH_THREAD_IN_GROUP(kg, td) { - ke = td->td_kse; - if ((ke->ke_state != KES_ONRUNQ && - ke->ke_state != KES_THREAD) || ke->ke_runq == NULL) - continue; - kseq = KSEQ_CPU(ke->ke_cpu); + oclass = PRI_BASE(td->td_pri_class); + ke = td->td_kse; + if ((ke->ke_state != KES_ONRUNQ && + ke->ke_state != KES_THREAD) || ke->ke_runq == NULL) + continue; + kseq = KSEQ_CPU(ke->ke_cpu); #ifdef SMP - /* - * On SMP if we're on the RUNQ we must adjust the transferable - * count because could be changing to or from an interrupt - * class. - */ - if (ke->ke_state == KES_ONRUNQ) { - if (KSE_CAN_MIGRATE(ke)) { - kseq->ksq_transferable--; - kseq->ksq_group->ksg_transferable--; - } - if (KSE_CAN_MIGRATE(ke)) { - kseq->ksq_transferable++; - kseq->ksq_group->ksg_transferable++; - } - } -#endif - if (oclass == PRI_TIMESHARE) { - kseq->ksq_load_timeshare--; - kseq_nice_rem(kseq, kg->kg_proc->p_nice); + /* + * On SMP if we're on the RUNQ we must adjust the transferable + * count because could be changing to or from an interrupt + * class. + */ + if (ke->ke_state == KES_ONRUNQ) { + if (KSE_CAN_MIGRATE(ke)) { + kseq->ksq_transferable--; + kseq->ksq_group->ksg_transferable--; } - if (nclass == PRI_TIMESHARE) { - kseq->ksq_load_timeshare++; - kseq_nice_add(kseq, kg->kg_proc->p_nice); + if (KSE_CAN_MIGRATE(ke)) { + kseq->ksq_transferable++; + kseq->ksq_group->ksg_transferable++; } } +#endif + if (oclass == PRI_TIMESHARE) { + kseq->ksq_load_timeshare--; + kseq_nice_rem(kseq, td->td_proc->p_nice); + } + if (nclass == PRI_TIMESHARE) { + kseq->ksq_load_timeshare++; + kseq_nice_add(kseq, td->td_proc->p_nice); + } - kg->kg_pri_class = class; + td->td_pri_class = class; } /* @@ -1601,24 +1538,16 @@ void sched_exit(struct proc *p, struct thread *childtd) { + struct thread *parent = FIRST_THREAD_IN_PROC(p); mtx_assert(&sched_lock, MA_OWNED); - sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), childtd); - sched_exit_thread(NULL, childtd); -} -void -sched_exit_ksegrp(struct ksegrp *kg, struct thread *td) -{ - /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ - kg->kg_runtime += td->td_ksegrp->kg_runtime; - sched_interact_update(kg); -} - -void -sched_exit_thread(struct thread *td, struct thread *childtd) -{ - CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", + CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", childtd, childtd->td_proc->p_comm, childtd->td_priority); + + /* parent->td_sched->skg_slptime += childtd->td_sched->skg_slptime; */ + parent->td_sched->skg_runtime += childtd->td_sched->skg_runtime; + sched_interact_update(parent); + kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); } @@ -1626,7 +1555,6 @@ sched_clock(struct thread *td) { struct kseq *kseq; - struct ksegrp *kg; struct kse *ke; mtx_assert(&sched_lock, MA_OWNED); @@ -1644,7 +1572,6 @@ kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ #endif ke = td->td_kse; - kg = ke->ke_ksegrp; /* Adjust ticks for pctcpu */ ke->ke_ticks++; @@ -1657,16 +1584,16 @@ if (td->td_flags & TDF_IDLETD) return; /* - * We only do slicing code for TIMESHARE ksegrps. + * We only do slicing code for TIMESHARE threads. */ - if (kg->kg_pri_class != PRI_TIMESHARE) + if (td->td_pri_class != PRI_TIMESHARE) return; /* - * We used a tick charge it to the ksegrp so that we can compute our + * We used a tick charge it to the thread so that we can compute our * interactivity. */ - kg->kg_runtime += tickincr; - sched_interact_update(kg); + td->td_sched->skg_runtime += tickincr; + sched_interact_update(td); /* * We used up one time slice. @@ -1677,9 +1604,9 @@ * We're out of time, recompute priorities and requeue. */ kseq_load_rem(kseq, ke); - sched_priority(kg); + sched_priority(td); sched_slice(ke); - if (SCHED_CURR(kg, ke)) + if (SCHED_CURR(td, ke)) ke->ke_runq = kseq->ksq_curr; else ke->ke_runq = kseq->ksq_next; @@ -1717,17 +1644,9 @@ void sched_userret(struct thread *td) { - struct ksegrp *kg; KASSERT((td->td_flags & TDF_BORROWING) == 0, ("thread with borrowed priority returning to userland")); - kg = td->td_ksegrp; - if (td->td_priority != kg->kg_user_pri) { - mtx_lock_spin(&sched_lock); - td->td_priority = kg->kg_user_pri; - td->td_base_pri = kg->kg_user_pri; - mtx_unlock_spin(&sched_lock); - } } struct kse * @@ -1746,7 +1665,7 @@ ke = kseq_choose(kseq); if (ke) { #ifdef SMP - if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) + if (ke->ke_thread->td_pri_class == PRI_IDLE) if (kseq_idled(kseq) == 0) goto restart; #endif @@ -1766,7 +1685,6 @@ sched_add(struct thread *td, int flags) { struct kseq *kseq; - struct ksegrp *kg; struct kse *ke; int preemptive; int canmigrate; @@ -1777,13 +1695,10 @@ curthread->td_proc->p_comm); mtx_assert(&sched_lock, MA_OWNED); ke = td->td_kse; - kg = td->td_ksegrp; canmigrate = 1; preemptive = !(flags & SRQ_YIELDING); - class = PRI_BASE(kg->kg_pri_class); + class = PRI_BASE(td->td_pri_class); kseq = KSEQ_SELF(); - if ((ke->ke_flags & KEF_INTERNAL) == 0) - SLOT_USE(td->td_ksegrp); ke->ke_flags &= ~KEF_INTERNAL; #ifdef SMP if (ke->ke_flags & KEF_ASSIGNED) { @@ -1803,8 +1718,8 @@ #endif KASSERT(ke->ke_state != KES_ONRUNQ, ("sched_add: kse %p (%s) already in run queue", ke, - ke->ke_proc->p_comm)); - KASSERT(ke->ke_proc->p_sflag & PS_INMEM, + td->td_proc->p_comm)); + KASSERT(td->td_proc->p_sflag & PS_INMEM, ("sched_add: process swapped out")); KASSERT(ke->ke_runq == NULL, ("sched_add: KSE %p is still assigned to a run queue", ke)); @@ -1819,7 +1734,7 @@ ke->ke_cpu = PCPU_GET(cpuid); break; case PRI_TIMESHARE: - if (SCHED_CURR(kg, ke)) + if (SCHED_CURR(td, ke)) ke->ke_runq = kseq->ksq_curr; else ke->ke_runq = kseq->ksq_next; @@ -1891,7 +1806,6 @@ curthread->td_proc->p_comm); mtx_assert(&sched_lock, MA_OWNED); ke = td->td_kse; - SLOT_RELEASE(td->td_ksegrp); ke->ke_flags &= ~KEF_PREEMPTED; if (ke->ke_flags & KEF_ASSIGNED) { ke->ke_flags |= KEF_REMOVED; @@ -1934,7 +1848,7 @@ pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; } - ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; + td->td_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; mtx_unlock_spin(&sched_lock); return (pctcpu); @@ -1991,12 +1905,6 @@ } int -sched_sizeof_ksegrp(void) -{ - return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); -} - -int sched_sizeof_proc(void) { return (sizeof(struct proc)); Index: kern/subr_trap.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_trap.c,v retrieving revision 1.286 diff -u -r1.286 subr_trap.c --- kern/subr_trap.c 10 Feb 2006 14:59:16 -0000 1.286 +++ kern/subr_trap.c 4 Jun 2006 20:26:52 -0000 @@ -115,18 +115,10 @@ } /* - * Do special thread processing, e.g. upcall tweaking and such. - */ - if (p->p_flag & P_SA) - thread_userret(td, frame); - - /* * Charge system time if profiling. */ - if (p->p_flag & P_PROFIL) { - + if (p->p_flag & P_PROFIL) addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio); - } /* * Let the scheduler adjust our priority etc. @@ -146,7 +138,6 @@ { struct thread *td; struct proc *p; - struct ksegrp *kg; struct rlimit rlim; int sflag; int flags; @@ -158,7 +149,6 @@ td = curthread; p = td->td_proc; - kg = td->td_ksegrp; CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); @@ -169,9 +159,6 @@ td->td_frame = framep; td->td_pticks = 0; - if ((p->p_flag & P_SA) && (td->td_mailbox == NULL)) - thread_user_enter(td); - /* * This updates the p_sflag's for the checks below in one * "atomic" operation with turning off the astpending flag. @@ -255,7 +242,7 @@ ktrcsw(1, 1); #endif mtx_lock_spin(&sched_lock); - sched_prio(td, kg->kg_user_pri); + sched_prio(td, td->td_user_pri); mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); #ifdef KTRACE Index: kern/sys_process.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sys_process.c,v retrieving revision 1.137 diff -u -r1.137 sys_process.c --- kern/sys_process.c 22 Feb 2006 18:57:50 -0000 1.137 +++ kern/sys_process.c 4 Jun 2006 22:29:15 -0000 @@ -806,7 +806,6 @@ * continuing process. */ mtx_unlock_spin(&sched_lock); - thread_continued(p); p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED); mtx_lock_spin(&sched_lock); thread_unsuspend(p); @@ -944,13 +943,7 @@ pl->pl_event = PL_EVENT_SIGNAL; else pl->pl_event = 0; - if (td2->td_pflags & TDP_SA) { - pl->pl_flags = PL_FLAG_SA; - if (td2->td_upcall && !TD_CAN_UNBIND(td2)) - pl->pl_flags |= PL_FLAG_BOUND; - } else { - pl->pl_flags = 0; - } + pl->pl_flags = 0; pl->pl_sigmask = td2->td_sigmask; pl->pl_siglist = td2->td_siglist; break; Index: kern/syscalls.c =================================================================== RCS file: /home/ncvs/src/sys/kern/syscalls.c,v retrieving revision 1.195 diff -u -r1.195 syscalls.c --- kern/syscalls.c 23 Mar 2006 08:48:37 -0000 1.195 +++ kern/syscalls.c 4 Jun 2006 22:33:13 -0000 @@ -2,8 +2,8 @@ * System call names. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/kern/syscalls.c,v 1.195 2006/03/23 08:48:37 davidxu Exp $ - * created from FreeBSD: src/sys/kern/syscalls.master,v 1.213 2006/03/23 08:46:41 davidxu Exp + * $FreeBSD$ + * created from FreeBSD: src/sys/kern/syscalls.master,v 1.215 2006/03/28 14:32:37 des Exp */ const char *syscallnames[] = { @@ -386,11 +386,11 @@ "eaccess", /* 376 = eaccess */ "#377", /* 377 = afs_syscall */ "nmount", /* 378 = nmount */ - "kse_exit", /* 379 = kse_exit */ - "kse_wakeup", /* 380 = kse_wakeup */ - "kse_create", /* 381 = kse_create */ - "kse_thr_interrupt", /* 382 = kse_thr_interrupt */ - "kse_release", /* 383 = kse_release */ + "#379", /* 379 = kse_exit */ + "#380", /* 380 = kse_wakeup */ + "#381", /* 381 = kse_create */ + "#382", /* 382 = kse_thr_interrupt */ + "#383", /* 383 = kse_release */ "__mac_get_proc", /* 384 = __mac_get_proc */ "__mac_set_proc", /* 385 = __mac_set_proc */ "__mac_get_fd", /* 386 = __mac_get_fd */ @@ -447,7 +447,7 @@ "extattr_list_fd", /* 437 = extattr_list_fd */ "extattr_list_file", /* 438 = extattr_list_file */ "extattr_list_link", /* 439 = extattr_list_link */ - "kse_switchin", /* 440 = kse_switchin */ + "#440", /* 440 = kse_switchin */ "ksem_timedwait", /* 441 = ksem_timedwait */ "thr_suspend", /* 442 = thr_suspend */ "thr_wake", /* 443 = thr_wake */ Index: kern/syscalls.master =================================================================== RCS file: /home/ncvs/src/sys/kern/syscalls.master,v retrieving revision 1.215 diff -u -r1.215 syscalls.master --- kern/syscalls.master 28 Mar 2006 14:32:37 -0000 1.215 +++ kern/syscalls.master 4 Jun 2006 22:33:02 -0000 @@ -664,14 +664,11 @@ 377 AUE_NULL UNIMPL afs_syscall 378 AUE_NMOUNT STD { int nmount(struct iovec *iovp, \ unsigned int iovcnt, int flags); } -379 AUE_NULL MSTD { int kse_exit(void); } -380 AUE_NULL MSTD { int kse_wakeup(struct kse_mailbox *mbx); } -381 AUE_NULL MSTD { int kse_create(struct kse_mailbox *mbx, \ - int newgroup); } -382 AUE_NULL MSTD { int kse_thr_interrupt( \ - struct kse_thr_mailbox *tmbx, int cmd, \ - long data); } -383 AUE_NULL MSTD { int kse_release(struct timespec *timeout); } +379 AUE_NULL UNIMPL kse_exit +380 AUE_NULL UNIMPL kse_wakeup +381 AUE_NULL UNIMPL kse_create +382 AUE_NULL UNIMPL kse_thr_interrupt +383 AUE_NULL UNIMPL kse_release 384 AUE_NULL MSTD { int __mac_get_proc(struct mac *mac_p); } 385 AUE_NULL MSTD { int __mac_set_proc(struct mac *mac_p); } 386 AUE_NULL MSTD { int __mac_get_fd(int fd, \ @@ -770,9 +767,7 @@ 439 AUE_NULL MSTD { ssize_t extattr_list_link( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } -440 AUE_NULL MSTD { int kse_switchin( \ - struct kse_thr_mailbox *tmbx, \ - int flags); } +440 AUE_NULL UNIMPL kse_switchin 441 AUE_NULL MNOSTD { int ksem_timedwait(semid_t id, \ const struct timespec *abstime); } 442 AUE_NULL MSTD { int thr_suspend( \ Index: kern/tty.c =================================================================== RCS file: /home/ncvs/src/sys/kern/tty.c,v retrieving revision 1.257 diff -u -r1.257 tty.c --- kern/tty.c 10 Jan 2006 09:19:09 -0000 1.257 +++ kern/tty.c 4 Jun 2006 17:25:12 -0000 @@ -2661,7 +2661,7 @@ { int esta, estb; - struct ksegrp *kg; + struct thread *td; mtx_assert(&sched_lock, MA_OWNED); if (p1 == NULL) return (1); @@ -2682,12 +2682,10 @@ * tie - favor one with highest recent cpu utilization */ esta = estb = 0; - FOREACH_KSEGRP_IN_PROC(p1,kg) { - esta += kg->kg_estcpu; - } - FOREACH_KSEGRP_IN_PROC(p2,kg) { - estb += kg->kg_estcpu; - } + FOREACH_THREAD_IN_PROC(p1, td) + esta += td->td_estcpu; + FOREACH_THREAD_IN_PROC(p2, td) + estb += td->td_estcpu; if (estb > esta) return (1); if (esta > estb) Index: pc98/pc98/machdep.c =================================================================== RCS file: /home/ncvs/src/sys/pc98/pc98/machdep.c,v retrieving revision 1.375 diff -u -r1.375 machdep.c --- pc98/pc98/machdep.c 11 May 2006 17:29:23 -0000 1.375 +++ pc98/pc98/machdep.c 4 Jun 2006 22:07:09 -0000 @@ -1928,7 +1928,7 @@ * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ - proc_linkup(&proc0, &ksegrp0, &thread0); + proc_linkup(&proc0, &thread0); /* * Initialize DMAC Index: posix4/ksched.c =================================================================== RCS file: /home/ncvs/src/sys/posix4/ksched.c,v retrieving revision 1.28 diff -u -r1.28 ksched.c --- posix4/ksched.c 19 May 2006 06:37:24 -0000 1.28 +++ posix4/ksched.c 4 Jun 2006 22:07:10 -0000 @@ -106,7 +106,7 @@ int e = 0; mtx_lock_spin(&sched_lock); - pri_to_rtp(td->td_ksegrp, &rtp); + pri_to_rtp(td, &rtp); mtx_unlock_spin(&sched_lock); switch (rtp.type) { @@ -153,7 +153,7 @@ struct rtprio rtp; mtx_lock_spin(&sched_lock); - pri_to_rtp(td->td_ksegrp, &rtp); + pri_to_rtp(td, &rtp); mtx_unlock_spin(&sched_lock); if (RTP_PRIO_IS_REALTIME(rtp.type)) param->sched_priority = rtpprio_to_p4prio(rtp.prio); @@ -174,7 +174,6 @@ { int e = 0; struct rtprio rtp; - struct ksegrp *kg = td->td_ksegrp; switch(policy) { @@ -189,16 +188,7 @@ ? RTP_PRIO_FIFO : RTP_PRIO_REALTIME; mtx_lock_spin(&sched_lock); - rtp_to_pri(&rtp, kg); - FOREACH_THREAD_IN_GROUP(kg, td) { /* XXXKSE */ - if (TD_IS_RUNNING(td)) { - td->td_flags |= TDF_NEEDRESCHED; - } else if (TD_ON_RUNQ(td)) { - if (td->td_priority > kg->kg_user_pri) { - sched_prio(td, kg->kg_user_pri); - } - } - } + rtp_to_pri(&rtp, td); mtx_unlock_spin(&sched_lock); } else @@ -212,24 +202,7 @@ rtp.type = RTP_PRIO_NORMAL; rtp.prio = p4prio_to_rtpprio(param->sched_priority); mtx_lock_spin(&sched_lock); - rtp_to_pri(&rtp, kg); - - /* XXX Simply revert to whatever we had for last - * normal scheduler priorities. - * This puts a requirement - * on the scheduling code: You must leave the - * scheduling info alone. - */ - FOREACH_THREAD_IN_GROUP(kg, td) { - if (TD_IS_RUNNING(td)) { - td->td_flags |= TDF_NEEDRESCHED; - } else if (TD_ON_RUNQ(td)) { - if (td->td_priority > kg->kg_user_pri) { - sched_prio(td, kg->kg_user_pri); - } - } - - } + rtp_to_pri(&rtp, td); mtx_unlock_spin(&sched_lock); } break; Index: powerpc/powerpc/machdep.c =================================================================== RCS file: /home/ncvs/src/sys/powerpc/powerpc/machdep.c,v retrieving revision 1.95 diff -u -r1.95 machdep.c --- powerpc/powerpc/machdep.c 16 May 2006 14:32:17 -0000 1.95 +++ powerpc/powerpc/machdep.c 4 Jun 2006 22:07:13 -0000 @@ -295,7 +295,7 @@ /* * Start initializing proc0 and thread0. */ - proc_linkup(&proc0, &ksegrp0, &thread0); + proc_linkup(&proc0, &thread0); thread0.td_frame = &frame0; /* Index: sparc64/sparc64/machdep.c =================================================================== RCS file: /home/ncvs/src/sys/sparc64/sparc64/machdep.c,v retrieving revision 1.129 diff -u -r1.129 machdep.c --- sparc64/sparc64/machdep.c 3 Apr 2006 21:27:01 -0000 1.129 +++ sparc64/sparc64/machdep.c 4 Jun 2006 21:13:26 -0000 @@ -391,7 +391,7 @@ /* * Initialize proc0 stuff (p_contested needs to be done early). */ - proc_linkup(&proc0, &ksegrp0, &thread0); + proc_linkup(&proc0, &thread0); proc0.p_md.md_sigtramp = NULL; proc0.p_md.md_utrap = NULL; thread0.td_kstack = kstack0; Index: sys/proc.h =================================================================== RCS file: /home/ncvs/src/sys/sys/proc.h,v retrieving revision 1.458 diff -u -r1.458 proc.h --- sys/proc.h 18 May 2006 08:43:46 -0000 1.458 +++ sys/proc.h 4 Jun 2006 22:17:52 -0000 @@ -144,7 +144,7 @@ * q - td_contested lock * r - p_peers lock * x - created at fork, only changes during single threading in exec - * z - zombie threads/ksegroup lock + * z - zombie threads lock * * If the locking key specifies two identifiers (for example, p_pptr) then * either lock is sufficient for read access, but both locks must be held @@ -152,99 +152,23 @@ */ struct auditinfo; struct kaudit_record; -struct kg_sched; +struct td_sched; struct nlminfo; struct kaioinfo; struct p_sched; +struct proc; struct sleepqueue; -struct td_sched; +struct thread; struct trapframe; struct turnstile; struct mqueue_notifier; /* - * Here we define the three structures used for process information. - * - * The first is the thread. It might be thought of as a "Kernel - * Schedulable Entity Context". - * This structure contains all the information as to where a thread of - * execution is now, or was when it was suspended, why it was suspended, - * and anything else that will be needed to restart it when it is - * rescheduled. Always associated with a KSE when running, but can be - * reassigned to an equivalent KSE when being restarted for - * load balancing. Each of these is associated with a kernel stack - * and a pcb. - * - * It is important to remember that a particular thread structure may only - * exist as long as the system call or kernel entrance (e.g. by pagefault) - * which it is currently executing. It should therefore NEVER be referenced - * by pointers in long lived structures that live longer than a single - * request. If several threads complete their work at the same time, - * they will all rewind their stacks to the user boundary, report their - * completion state, and all but one will be freed. That last one will - * be kept to provide a kernel stack and pcb for the NEXT syscall or kernel - * entrance (basically to save freeing and then re-allocating it). The existing - * thread keeps a cached spare thread available to allow it to quickly - * get one when it needs a new one. There is also a system - * cache of free threads. Threads have priority and partake in priority - * inheritance schemes. - */ -struct thread; - -/* - * The KSEGRP is allocated resources across a number of CPUs. - * (Including a number of CPUxQUANTA. It parcels these QUANTA up among - * its threads, each of which should be running in a different CPU. - * BASE priority and total available quanta are properties of a KSEGRP. - * Multiple KSEGRPs in a single process compete against each other - * for total quanta in the same way that a forked child competes against - * it's parent process. - */ -struct ksegrp; - -/* - * A process is the owner of all system resources allocated to a task - * except CPU quanta. - * All KSEGs under one process see, and have the same access to, these - * resources (e.g. files, memory, sockets, credential, kqueues). - * A process may compete for CPU cycles on the same basis as a - * forked process cluster by spawning several KSEGRPs. - */ -struct proc; - -/*************** - * In pictures: - With a single run queue used by all processors: - - RUNQ: --->KSE---KSE--... SLEEPQ:[]---THREAD---THREAD---THREAD - \ \ []---THREAD - KSEG---THREAD--THREAD--THREAD [] - []---THREAD---THREAD - - (processors run THREADs from the KSEG until they are exhausted or - the KSEG exhausts its quantum) - -With PER-CPU run queues: -KSEs on the separate run queues directly -They would be given priorities calculated from the KSEG. - - * - *****************/ - -/* - * Kernel runnable context (thread). - * This is what is put to sleep and reactivated. - * The first KSE available in the correct group will run this thread. - * If several are available, use the one on the same CPU as last time. - * When waiting to be run, threads are hung off the KSEGRP in priority order. - * With N runnable and queued KSEs in the KSEGRP, the first N threads - * are linked to them. Other threads are not yet assigned. + * Thread context. Processes may have multiple threads. */ struct thread { struct proc *td_proc; /* (*) Associated process. */ - struct ksegrp *td_ksegrp; /* (*) Associated KSEG. */ TAILQ_ENTRY(thread) td_plist; /* (*) All threads in this proc. */ - TAILQ_ENTRY(thread) td_kglist; /* (*) All threads in this ksegrp. */ /* The two queues below should someday be merged. */ TAILQ_ENTRY(thread) td_slpq; /* (j) Sleep queue. */ @@ -278,10 +202,9 @@ struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */ int td_intr_nesting_level; /* (k) Interrupt recursion. */ int td_pinned; /* (k) Temporary cpu pin count. */ - struct kse_thr_mailbox *td_mailbox; /* (*) Userland mailbox address. */ struct ucred *td_ucred; /* (k) Reference to credentials. */ - struct thread *td_standin; /* (k + a) Use this for an upcall. */ - struct kse_upcall *td_upcall; /* (k + j) Upcall structure. */ + u_int td_estcpu; /* (j) Sum of the same field in KSEs. */ + u_int td_slptime; /* (j) How long completely blocked. */ u_int td_pticks; /* (k) Statclock hits for profiling */ u_int td_sticks; /* (k) Statclock hits in system mode. */ u_int td_iticks; /* (k) Statclock hits in intr mode. */ @@ -293,7 +216,6 @@ sigset_t td_sigmask; /* (c) Current signal mask. */ volatile u_int td_generation; /* (k) For detection of preemption */ stack_t td_sigstk; /* (k) Stack ptr and on-stack flag. */ - int td_kflags; /* (c) Flags for KSE threading. */ int td_xsig; /* (c) Signal for ptrace */ u_long td_profil_addr; /* (k) Temporary addr until AST. */ u_int td_profil_ticks; /* (k) Temporary ticks until AST. */ @@ -304,6 +226,8 @@ #define td_startcopy td_endzero u_char td_base_pri; /* (j) Thread base kernel priority. */ u_char td_priority; /* (j) Thread active priority. */ + u_char td_pri_class; /* (j) Scheduling class. */ + u_char td_user_pri; /* (j) User pri from estcpu and nice. */ #define td_endcopy td_pcb /* @@ -372,15 +296,15 @@ #define TDP_OLDMASK 0x00000001 /* Need to restore mask after suspend. */ #define TDP_INKTR 0x00000002 /* Thread is currently in KTR code. */ #define TDP_INKTRACE 0x00000004 /* Thread is currently in KTRACE code. */ -#define TDP_UPCALLING 0x00000008 /* This thread is doing an upcall. */ +/* 0x00000008 */ #define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock aquisition - deadlock treatment. */ -#define TDP_SA 0x00000080 /* A scheduler activation based thread. */ +/* 0x00000080 */ #define TDP_NOSLEEPING 0x00000100 /* Thread is not allowed to sleep on a sq. */ #define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */ #define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */ -#define TDP_CAN_UNBIND 0x00000800 /* Only temporarily bound. */ +/* 0x00000800 */ #define TDP_SCHED1 0x00001000 /* Reserved for scheduler private use */ #define TDP_SCHED2 0x00002000 /* Reserved for scheduler private use */ #define TDP_SCHED3 0x00004000 /* Reserved for scheduler private use */ @@ -399,17 +323,6 @@ #define TDI_LOCK 0x0008 /* Stopped on a lock. */ #define TDI_IWAIT 0x0010 /* Awaiting interrupt. */ -/* - * flags (in kflags) related to M:N threading. - */ -#define TDK_KSEREL 0x0001 /* Blocked in msleep on kg->kg_completed. */ -#define TDK_KSERELSIG 0x0002 /* Blocked in msleep on p->p_siglist. */ -#define TDK_WAKEUP 0x0004 /* Thread has been woken by kse_wakeup. */ - -#define TD_CAN_UNBIND(td) \ - (((td)->td_pflags & TDP_CAN_UNBIND) && \ - ((td)->td_upcall != NULL)) - #define TD_IS_SLEEPING(td) ((td)->td_inhibitors & TDI_SLEEPING) #define TD_ON_SLEEPQ(td) ((td)->td_wchan != NULL) #define TD_IS_SUSPENDED(td) ((td)->td_inhibitors & TDI_SUSPENDED) @@ -450,54 +363,6 @@ #define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN /* - * An upcall is used when returning to userland. If a thread does not have - * an upcall on return to userland the thread exports its context and exits. - */ -struct kse_upcall { - TAILQ_ENTRY(kse_upcall) ku_link; /* List of upcalls in KSEG. */ - struct ksegrp *ku_ksegrp; /* Associated KSEG. */ - struct thread *ku_owner; /* Owning thread. */ - int ku_flags; /* KUF_* flags. */ - struct kse_mailbox *ku_mailbox; /* Userland mailbox address. */ - stack_t ku_stack; /* Userland upcall stack. */ - void *ku_func; /* Userland upcall function. */ - unsigned int ku_mflags; /* Cached upcall mbox flags. */ -}; - -#define KUF_DOUPCALL 0x00001 /* Do upcall now; don't wait. */ -#define KUF_EXITING 0x00002 /* Upcall structure is exiting. */ - -/* - * Kernel-scheduled entity group (KSEG). The scheduler considers each KSEG to - * be an indivisible unit from a time-sharing perspective, though each KSEG may - * contain multiple KSEs. - */ -struct ksegrp { - struct proc *kg_proc; /* (*) Proc that contains this KSEG. */ - TAILQ_ENTRY(ksegrp) kg_ksegrp; /* (*) Queue of KSEGs in kg_proc. */ - TAILQ_HEAD(, thread) kg_threads;/* (td_kglist) All threads. */ - TAILQ_HEAD(, thread) kg_runq; /* (td_runq) waiting RUNNABLE threads */ - TAILQ_HEAD(, kse_upcall) kg_upcalls; /* All upcalls in the group. */ - -#define kg_startzero kg_estcpu - u_int kg_estcpu; /* (j) Sum of the same field in KSEs. */ - u_int kg_slptime; /* (j) How long completely blocked. */ - int kg_numupcalls; /* (j) Num upcalls. */ - int kg_upsleeps; /* (c) Num threads in kse_release(). */ - struct kse_thr_mailbox *kg_completed; /* (c) Completed thread mboxes. */ - int kg_nextupcall; /* (n) Next upcall time. */ - int kg_upquantum; /* (n) Quantum to schedule an upcall. */ -#define kg_endzero kg_pri_class - -#define kg_startcopy kg_endzero - u_char kg_pri_class; /* (j) Scheduling class. */ - u_char kg_user_pri; /* (j) User pri from estcpu and nice. */ -#define kg_endcopy kg_numthreads - int kg_numthreads; /* (j) Num threads in total. */ - struct kg_sched *kg_sched; /* (*) Scheduler-specific data. */ -}; - -/* * XXX: Does this belong in resource.h or resourcevar.h instead? * Resource usage extension. The times in rusage structs in the kernel are * never up to date. The actual times are kept as runtimes and tick counts @@ -523,7 +388,6 @@ */ struct proc { LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ - TAILQ_HEAD(, ksegrp) p_ksegrps; /* (c)(kg_ksegrp) All KSEGs. */ TAILQ_HEAD(, thread) p_threads; /* (j)(td_plist) Threads. (shortcut) */ TAILQ_HEAD(, thread) p_suspended; /* (td_runq) Suspended threads. */ struct ucred *p_ucred; /* (c) Process owner's identity. */ @@ -586,7 +450,6 @@ int p_suspcount; /* (c) Num threads in suspended mode. */ struct thread *p_xthread; /* (c) Trap thread */ int p_boundary_count;/* (c) Num threads at user boundary */ - struct ksegrp *p_procscopegrp; int p_pendingcnt; /* how many signals are pending */ struct itimers *p_itimers; /* (c) POSIX interval timers. */ /* End area that is zeroed on creation. */ @@ -607,7 +470,6 @@ u_short p_xstat; /* (c) Exit status; also stop sig. */ struct knlist p_klist; /* (c) Knotes attached to this proc. */ int p_numthreads; /* (j) Number of threads. */ - int p_numksegrps; /* (c) Number of ksegrps. */ struct mdproc p_md; /* Any machine-dependent fields. */ struct callout p_itcallout; /* (h + c) Interval timer callout. */ u_short p_acflag; /* (c) Accounting flags. */ @@ -716,18 +578,11 @@ #define FOREACH_PROC_IN_SYSTEM(p) \ LIST_FOREACH((p), &allproc, p_list) -#define FOREACH_KSEGRP_IN_PROC(p, kg) \ - TAILQ_FOREACH((kg), &(p)->p_ksegrps, kg_ksegrp) -#define FOREACH_THREAD_IN_GROUP(kg, td) \ - TAILQ_FOREACH((td), &(kg)->kg_threads, td_kglist) -#define FOREACH_UPCALL_IN_GROUP(kg, ku) \ - TAILQ_FOREACH((ku), &(kg)->kg_upcalls, ku_link) #define FOREACH_THREAD_IN_PROC(p, td) \ TAILQ_FOREACH((td), &(p)->p_threads, td_plist) /* XXXKSE the following lines should probably only be used in 1:1 code: */ #define FIRST_THREAD_IN_PROC(p) TAILQ_FIRST(&(p)->p_threads) -#define FIRST_KSEGRP_IN_PROC(p) TAILQ_FIRST(&(p)->p_ksegrps) /* * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t, @@ -838,7 +693,6 @@ extern struct sx allproc_lock; extern struct sx proctree_lock; extern struct mtx ppeers_lock; -extern struct ksegrp ksegrp0; /* Primary ksegrp in proc0. */ extern struct proc proc0; /* Process slot for swapper. */ extern struct thread thread0; /* Primary thread in proc0. */ extern struct vmspace vmspace0; /* VM space for proc0. */ @@ -889,7 +743,7 @@ void pargs_free(struct pargs *pa); void pargs_hold(struct pargs *pa); void procinit(void); -void proc_linkup(struct proc *p, struct ksegrp *kg, struct thread *td); +void proc_linkup(struct proc *p, struct thread *td); void proc_reparent(struct proc *child, struct proc *newparent); struct pstats *pstats_alloc(void); void pstats_fork(struct pstats *src, struct pstats *dst); @@ -917,9 +771,6 @@ void cpu_set_fork_handler(struct thread *, void (*)(void *), void *); /* New in KSE. */ -struct ksegrp *ksegrp_alloc(void); -void ksegrp_free(struct ksegrp *kg); -void ksegrp_stash(struct ksegrp *kg); void kse_GC(void); void kseinit(void); void cpu_set_upcall(struct thread *td, struct thread *td0); @@ -930,16 +781,13 @@ void cpu_thread_setup(struct thread *td); void cpu_thread_swapin(struct thread *); void cpu_thread_swapout(struct thread *); -void ksegrp_link(struct ksegrp *kg, struct proc *p); -void ksegrp_unlink(struct ksegrp *kg); struct thread *thread_alloc(void); void thread_continued(struct proc *p); void thread_exit(void) __dead2; int thread_export_context(struct thread *td, int willexit); void thread_free(struct thread *td); -void thread_link(struct thread *td, struct ksegrp *kg); +void thread_link(struct thread *td, struct proc *p); void thread_reap(void); -struct thread *thread_schedule_upcall(struct thread *td, struct kse_upcall *ku); void thread_signal_add(struct thread *td, ksiginfo_t *); int thread_single(int how); void thread_single_end(void); @@ -962,12 +810,6 @@ void thread_wait(struct proc *p); struct thread *thread_find(struct proc *p, lwpid_t tid); void thr_exit1(void); -struct kse_upcall *upcall_alloc(void); -void upcall_free(struct kse_upcall *ku); -void upcall_link(struct kse_upcall *ku, struct ksegrp *kg); -void upcall_unlink(struct kse_upcall *ku); -void upcall_remove(struct thread *td); -void upcall_stash(struct kse_upcall *ke); #endif /* _KERNEL */ Index: sys/rtprio.h =================================================================== RCS file: /home/ncvs/src/sys/sys/rtprio.h,v retrieving revision 1.14 diff -u -r1.14 rtprio.h --- sys/rtprio.h 7 Jan 2005 02:29:24 -0000 1.14 +++ sys/rtprio.h 4 Jun 2006 22:24:35 -0000 @@ -75,9 +75,9 @@ }; #ifdef _KERNEL -struct ksegrp; -int rtp_to_pri(struct rtprio *, struct ksegrp *); -void pri_to_rtp(struct ksegrp *, struct rtprio *); +struct thread; +int rtp_to_pri(struct rtprio *, struct thread *); +void pri_to_rtp(struct thread *, struct rtprio *); #endif #endif Index: sys/sched.h =================================================================== RCS file: /home/ncvs/src/sys/sys/sched.h,v retrieving revision 1.24 diff -u -r1.24 sched.h --- sys/sched.h 19 Apr 2005 04:01:25 -0000 1.24 +++ sys/sched.h 4 Jun 2006 21:22:23 -0000 @@ -52,17 +52,13 @@ * KSE Groups contain scheduling priority information. They record the * behavior of groups of KSEs and threads. */ -void sched_class(struct ksegrp *kg, int class); -void sched_exit_ksegrp(struct ksegrp *kg, struct thread *childtd); -void sched_fork_ksegrp(struct thread *td, struct ksegrp *child); +void sched_class(struct thread *td, int class); void sched_nice(struct proc *p, int nice); /* * Threads are switched in and out, block on resources, have temporary * priorities inherited from their ksegs, and use up cpu time. */ -void sched_exit_thread(struct thread *td, struct thread *child); -void sched_fork_thread(struct thread *td, struct thread *child); fixpt_t sched_pctcpu(struct thread *td); void sched_prio(struct thread *td, u_char prio); void sched_lend_prio(struct thread *td, u_char prio); @@ -93,7 +89,6 @@ * These procedures tell the process data structure allocation code how * many bytes to actually allocate. */ -int sched_sizeof_ksegrp(void); int sched_sizeof_proc(void); int sched_sizeof_thread(void); @@ -111,11 +106,7 @@ /* temporarily here */ void schedinit(void); -void sched_init_concurrency(struct ksegrp *kg); -void sched_set_concurrency(struct ksegrp *kg, int cuncurrency); void sched_schedinit(void); -void sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td); -void sched_thread_exit(struct thread *td); void sched_newthread(struct thread *td); #endif /* !_SYS_SCHED_H_ */ Index: sys/syscall.h =================================================================== RCS file: /home/ncvs/src/sys/sys/syscall.h,v retrieving revision 1.192 diff -u -r1.192 syscall.h --- sys/syscall.h 23 Mar 2006 08:48:37 -0000 1.192 +++ sys/syscall.h 4 Jun 2006 22:33:13 -0000 @@ -2,8 +2,8 @@ * System call numbers. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/sys/syscall.h,v 1.192 2006/03/23 08:48:37 davidxu Exp $ - * created from FreeBSD: src/sys/kern/syscalls.master,v 1.213 2006/03/23 08:46:41 davidxu Exp + * $FreeBSD$ + * created from FreeBSD: src/sys/kern/syscalls.master,v 1.215 2006/03/28 14:32:37 des Exp */ #define SYS_syscall 0 @@ -306,11 +306,6 @@ #define SYS_nfsclnt 375 #define SYS_eaccess 376 #define SYS_nmount 378 -#define SYS_kse_exit 379 -#define SYS_kse_wakeup 380 -#define SYS_kse_create 381 -#define SYS_kse_thr_interrupt 382 -#define SYS_kse_release 383 #define SYS___mac_get_proc 384 #define SYS___mac_set_proc 385 #define SYS___mac_get_fd 386 @@ -363,7 +358,6 @@ #define SYS_extattr_list_fd 437 #define SYS_extattr_list_file 438 #define SYS_extattr_list_link 439 -#define SYS_kse_switchin 440 #define SYS_ksem_timedwait 441 #define SYS_thr_suspend 442 #define SYS_thr_wake 443 Index: sys/syscall.mk =================================================================== RCS file: /home/ncvs/src/sys/sys/syscall.mk,v retrieving revision 1.147 diff -u -r1.147 syscall.mk --- sys/syscall.mk 23 Mar 2006 08:48:37 -0000 1.147 +++ sys/syscall.mk 4 Jun 2006 22:33:13 -0000 @@ -1,7 +1,7 @@ # FreeBSD system call names. # DO NOT EDIT-- this file is automatically generated. -# $FreeBSD: src/sys/sys/syscall.mk,v 1.147 2006/03/23 08:48:37 davidxu Exp $ -# created from FreeBSD: src/sys/kern/syscalls.master,v 1.213 2006/03/23 08:46:41 davidxu Exp +# $FreeBSD$ +# created from FreeBSD: src/sys/kern/syscalls.master,v 1.215 2006/03/28 14:32:37 des Exp MIASM = \ syscall.o \ exit.o \ @@ -248,11 +248,6 @@ nfsclnt.o \ eaccess.o \ nmount.o \ - kse_exit.o \ - kse_wakeup.o \ - kse_create.o \ - kse_thr_interrupt.o \ - kse_release.o \ __mac_get_proc.o \ __mac_set_proc.o \ __mac_get_fd.o \ @@ -305,7 +300,6 @@ extattr_list_fd.o \ extattr_list_file.o \ extattr_list_link.o \ - kse_switchin.o \ ksem_timedwait.o \ thr_suspend.o \ thr_wake.o \ Index: sys/sysproto.h =================================================================== RCS file: /home/ncvs/src/sys/sys/sysproto.h,v retrieving revision 1.194 diff -u -r1.194 sysproto.h --- sys/sysproto.h 28 Mar 2006 14:32:38 -0000 1.194 +++ sys/sysproto.h 4 Jun 2006 22:33:13 -0000 @@ -2,8 +2,8 @@ * System call prototypes. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/sys/sysproto.h,v 1.194 2006/03/28 14:32:38 des Exp $ - * created from FreeBSD: src/sys/kern/syscalls.master,v 1.213 2006/03/23 08:46:41 davidxu Exp + * $FreeBSD$ + * created from FreeBSD: src/sys/kern/syscalls.master,v 1.215 2006/03/28 14:32:37 des Exp */ #ifndef _SYS_SYSPROTO_H_ @@ -1095,24 +1095,6 @@ char iovcnt_l_[PADL_(unsigned int)]; unsigned int iovcnt; char iovcnt_r_[PADR_(unsigned int)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; }; -struct kse_exit_args { - register_t dummy; -}; -struct kse_wakeup_args { - char mbx_l_[PADL_(struct kse_mailbox *)]; struct kse_mailbox * mbx; char mbx_r_[PADR_(struct kse_mailbox *)]; -}; -struct kse_create_args { - char mbx_l_[PADL_(struct kse_mailbox *)]; struct kse_mailbox * mbx; char mbx_r_[PADR_(struct kse_mailbox *)]; - char newgroup_l_[PADL_(int)]; int newgroup; char newgroup_r_[PADR_(int)]; -}; -struct kse_thr_interrupt_args { - char tmbx_l_[PADL_(struct kse_thr_mailbox *)]; struct kse_thr_mailbox * tmbx; char tmbx_r_[PADR_(struct kse_thr_mailbox *)]; - char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; - char data_l_[PADL_(long)]; long data; char data_r_[PADR_(long)]; -}; -struct kse_release_args { - char timeout_l_[PADL_(struct timespec *)]; struct timespec * timeout; char timeout_r_[PADR_(struct timespec *)]; -}; struct __mac_get_proc_args { char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)]; }; @@ -1336,10 +1318,6 @@ char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)]; char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)]; }; -struct kse_switchin_args { - char tmbx_l_[PADL_(struct kse_thr_mailbox *)]; struct kse_thr_mailbox * tmbx; char tmbx_r_[PADR_(struct kse_thr_mailbox *)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; struct ksem_timedwait_args { char id_l_[PADL_(semid_t)]; semid_t id; char id_r_[PADR_(semid_t)]; char abstime_l_[PADL_(const struct timespec *)]; const struct timespec * abstime; char abstime_r_[PADR_(const struct timespec *)]; @@ -1691,11 +1669,6 @@ int nfsclnt(struct thread *, struct nfsclnt_args *); int eaccess(struct thread *, struct eaccess_args *); int nmount(struct thread *, struct nmount_args *); -int kse_exit(struct thread *, struct kse_exit_args *); -int kse_wakeup(struct thread *, struct kse_wakeup_args *); -int kse_create(struct thread *, struct kse_create_args *); -int kse_thr_interrupt(struct thread *, struct kse_thr_interrupt_args *); -int kse_release(struct thread *, struct kse_release_args *); int __mac_get_proc(struct thread *, struct __mac_get_proc_args *); int __mac_set_proc(struct thread *, struct __mac_set_proc_args *); int __mac_get_fd(struct thread *, struct __mac_get_fd_args *); @@ -1748,7 +1721,6 @@ int extattr_list_fd(struct thread *, struct extattr_list_fd_args *); int extattr_list_file(struct thread *, struct extattr_list_file_args *); int extattr_list_link(struct thread *, struct extattr_list_link_args *); -int kse_switchin(struct thread *, struct kse_switchin_args *); int ksem_timedwait(struct thread *, struct ksem_timedwait_args *); int thr_suspend(struct thread *, struct thr_suspend_args *); int thr_wake(struct thread *, struct thr_wake_args *); Index: vm/vm_glue.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_glue.c,v retrieving revision 1.215 diff -u -r1.215 vm_glue.c --- vm/vm_glue.c 29 May 2006 21:28:56 -0000 1.215 +++ vm/vm_glue.c 4 Jun 2006 22:07:25 -0000 @@ -682,10 +682,8 @@ ppri = INT_MIN; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { - struct ksegrp *kg; - if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) { + if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) continue; - } mtx_lock_spin(&sched_lock); FOREACH_THREAD_IN_PROC(p, td) { /* @@ -694,14 +692,13 @@ * */ if (td->td_inhibitors == TDI_SWAPPED) { - kg = td->td_ksegrp; - pri = p->p_swtime + kg->kg_slptime; + pri = p->p_swtime + td->td_slptime; if ((p->p_sflag & PS_SWAPINREQ) == 0) { pri -= p->p_nice * 8; } /* - * if this ksegrp is higher priority + * if this thread is higher priority * and there is enough space, then select * this process instead of the previous * selection. @@ -810,7 +807,6 @@ { struct proc *p; struct thread *td; - struct ksegrp *kg; int didswap = 0; retry: @@ -884,15 +880,15 @@ * do not swapout a realtime process * Check all the thread groups.. */ - FOREACH_KSEGRP_IN_PROC(p, kg) { - if (PRI_IS_REALTIME(kg->kg_pri_class)) + FOREACH_THREAD_IN_PROC(p, td) { + if (PRI_IS_REALTIME(td->td_pri_class)) goto nextproc; /* * Guarantee swap_idle_threshold1 * time in memory. */ - if (kg->kg_slptime < swap_idle_threshold1) + if (td->td_slptime < swap_idle_threshold1) goto nextproc; /* @@ -904,11 +900,8 @@ * This could be refined to support * swapping out a thread. */ - FOREACH_THREAD_IN_GROUP(kg, td) { - if ((td->td_priority) < PSOCK || - !thread_safetoswapout(td)) - goto nextproc; - } + if ((td->td_priority) < PSOCK || !thread_safetoswapout(td)) + goto nextproc; /* * If the system is under memory stress, * or if we are swapping @@ -917,11 +910,11 @@ */ if (((action & VM_SWAP_NORMAL) == 0) && (((action & VM_SWAP_IDLE) == 0) || - (kg->kg_slptime < swap_idle_threshold2))) + (td->td_slptime < swap_idle_threshold2))) goto nextproc; - if (minslptime > kg->kg_slptime) - minslptime = kg->kg_slptime; + if (minslptime > td->td_slptime) + minslptime = td->td_slptime; } /* Index: vm/vm_zeroidle.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_zeroidle.c,v retrieving revision 1.37 diff -u -r1.37 vm_zeroidle.c --- vm/vm_zeroidle.c 17 Apr 2006 18:20:38 -0000 1.37 +++ vm/vm_zeroidle.c 4 Jun 2006 21:19:24 -0000 @@ -182,7 +182,7 @@ PROC_UNLOCK(pagezero_proc); mtx_lock_spin(&sched_lock); td = FIRST_THREAD_IN_PROC(pagezero_proc); - sched_class(td->td_ksegrp, PRI_IDLE); + sched_class(td, PRI_IDLE); sched_prio(td, PRI_MAX_IDLE); setrunqueue(td, SRQ_BORING); mtx_unlock_spin(&sched_lock);