From 1568fcd6d2a1b386bde603a725d0b6708416d4a8 Mon Sep 17 00:00:00 2001 From: Andrey Zonov Date: Thu, 24 Oct 2013 23:38:46 -0700 Subject: [PATCH 1/2] Implement passive serialization --- sys/conf/files | 1 + sys/kern/kern_psz.c | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++ sys/kern/kern_synch.c | 6 ++ sys/sys/kernel.h | 1 + sys/sys/psz.h | 46 ++++++++++ 5 files changed, 282 insertions(+) create mode 100644 sys/kern/kern_psz.c create mode 100644 sys/sys/psz.h diff --git a/sys/conf/files b/sys/conf/files index 08ab51f..6aba70a 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2913,6 +2913,7 @@ kern/kern_poll.c optional device_polling kern/kern_priv.c standard kern/kern_proc.c standard kern/kern_prot.c standard +kern/kern_psz.c standard kern/kern_racct.c standard kern/kern_rangelock.c standard kern/kern_rctl.c standard diff --git a/sys/kern/kern_psz.c b/sys/kern/kern_psz.c new file mode 100644 index 0000000..bfb16e3 --- /dev/null +++ b/sys/kern/kern_psz.c @@ -0,0 +1,228 @@ +/*- + * Copyright (c) 2014 Andrey Zonov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static u_long psz_inqueue; + +static SYSCTL_NODE(_debug, OID_AUTO, psz, CTLFLAG_RD, NULL, + "passive serialization debugging"); +SYSCTL_ULONG(_debug_psz, OID_AUTO, inqueue, CTLFLAG_RD, &psz_inqueue, 0, ""); + +struct psz_debug { + uint64_t case0; + uint64_t case1; + uint64_t case1match; + uint64_t case2; + uint64_t case2match; + uint64_t case3; + uint64_t checkpoints; + uint64_t runs; + uint64_t wakeups; +}; + +#define PSZ_DEBUG_SIZE (sizeof(struct psz_debug) / sizeof(uint64_t)) + +static counter_u64_t pcpu_psz_debug[PSZ_DEBUG_SIZE]; + +#define PSZ_DEBUG_OFFSET(name) \ + pcpu_psz_debug[offsetof(struct psz_debug, name) / sizeof(uint64_t)] +#define PSZ_DEBUG_SYSCTL(name) \ + SYSCTL_COUNTER_U64(_debug_psz, OID_AUTO, name, CTLFLAG_RD, \ + &PSZ_DEBUG_OFFSET(name), "") +#define PSZ_DEBUG_INC(name) \ + counter_u64_add(PSZ_DEBUG_OFFSET(name), 1) + +PSZ_DEBUG_SYSCTL(case0); +PSZ_DEBUG_SYSCTL(case1); +PSZ_DEBUG_SYSCTL(case1match); +PSZ_DEBUG_SYSCTL(case2); +PSZ_DEBUG_SYSCTL(case2match); +PSZ_DEBUG_SYSCTL(case3); +PSZ_DEBUG_SYSCTL(checkpoints); +PSZ_DEBUG_SYSCTL(runs); +PSZ_DEBUG_SYSCTL(wakeups); + +struct psz_info { + int cpu; + int gen; + struct thread *td; + STAILQ_HEAD(, psz_cb) q0; + STAILQ_HEAD(, psz_cb) q1; + STAILQ_HEAD(, psz_cb) q2; +}; + +static struct psz_info pcpu_psz_info[MAXCPU]; + +static int psz_all_cpus1; +static int psz_all_cpus2; + +static void +psz_worker(void *data) +{ + struct psz_info *info = data; + struct psz_cb *cb, *next; + STAILQ_HEAD(, psz_cb) q; + + thread_lock(curthread); + sched_bind(curthread, info->cpu); + thread_unlock(curthread); + + for ( ;; ) { + STAILQ_INIT(&q); + critical_enter(); + while (STAILQ_EMPTY(&info->q2)) { + critical_exit(); + tsleep(info->td, 0, "-", 60 * hz); + PSZ_DEBUG_INC(wakeups); + critical_enter(); + } + STAILQ_CONCAT(&q, &info->q2); + critical_exit(); + /* + * Execute all expired callbacks. + */ + cb = STAILQ_FIRST(&q); + while (cb != NULL) { + next = STAILQ_NEXT(cb, next); + (*cb->func)(cb); + atomic_subtract_long(&psz_inqueue, 1); + cb = next; + } + PSZ_DEBUG_INC(runs); + } +} + +static void +psz_setup(void *dummy) +{ + struct psz_info *info; + struct proc *p; + struct thread *td; + int cpu, error; + + COUNTER_ARRAY_ALLOC(pcpu_psz_debug, PSZ_DEBUG_SIZE, M_WAITOK); + + p = NULL; /* create new process */ + CPU_FOREACH(cpu) { + info = &pcpu_psz_info[cpu]; + info->cpu = cpu; + STAILQ_INIT(&info->q0); + STAILQ_INIT(&info->q1); + STAILQ_INIT(&info->q2); + error = kproc_kthread_add(psz_worker, info, &p, &td, + RFSTOPPED | RFHIGHPID, 0, "psz", "psz: cpu%d", cpu); + if (error) { + panic("%s: kproc_kthread_add returned error %d\n", + __func__, error); + } + info->td = td; + thread_lock(td); + sched_add(td, SRQ_BORING); + thread_unlock(td); + } +} + +SYSINIT(psz, SI_SUB_PSZ, SI_ORDER_ANY, psz_setup, NULL); + +void +psz_enqueue(void (*func)(struct psz_cb *), struct psz_cb *cb) +{ + + MPASS(func != NULL); + MPASS(cb != NULL); + + cb->func = func; + critical_enter(); + STAILQ_INSERT_TAIL(&pcpu_psz_info[curcpu].q0, cb, next); + critical_exit(); + atomic_add_long(&psz_inqueue, 1); +} + +void +psz_checkpoint(void) +{ + struct psz_info *info; + + info = &pcpu_psz_info[curcpu]; + if (psz_inqueue == 0 || curthread == info->td) + return; + + PSZ_DEBUG_INC(checkpoints); + switch (info->gen) + { + case 0: + PSZ_DEBUG_INC(case0); + info->gen = 1; + atomic_add_int(&psz_all_cpus1, 1); + return; + case 1: + PSZ_DEBUG_INC(case1); + if (psz_all_cpus1 == mp_ncpus) { + PSZ_DEBUG_INC(case1match); + info->gen = 2; + atomic_add_int(&psz_all_cpus2, 1); + STAILQ_CONCAT(&info->q1, &info->q0); + } + return; + case 2: + PSZ_DEBUG_INC(case2); + if (psz_all_cpus2 == mp_ncpus) { + PSZ_DEBUG_INC(case2match); + info->gen = 3; + atomic_subtract_int(&psz_all_cpus1, 1); + STAILQ_CONCAT(&info->q2, &info->q1); + if (!STAILQ_EMPTY(&info->q2)) + wakeup_one(info->td); + } + return; + case 3: + PSZ_DEBUG_INC(case3); + info->gen = 0; + atomic_subtract_int(&psz_all_cpus2, 1); + return; + } +} diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 37c538b..77df594 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -490,6 +491,11 @@ mi_switch(int flags, struct thread *newtd) #ifdef XEN PT_UPDATES_FLUSH(); #endif + /* + * Make passive serailization checkpoint. + */ + psz_checkpoint(); + sched_switch(td, newtd, flags); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running", "prio:%d", td->td_priority); diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index 3c5258a..f424416 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -167,6 +167,7 @@ enum sysinit_sub_id { SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/ SI_SUB_SMP = 0xf000000, /* start the APs*/ SI_SUB_RACCTD = 0xf100000, /* start racctd*/ + SI_SUB_PSZ = 0xf400000, /* Passive serialization */ SI_SUB_LAST = 0xfffffff /* final initialization */ }; diff --git a/sys/sys/psz.h b/sys/sys/psz.h new file mode 100644 index 0000000..9b88d78 --- /dev/null +++ b/sys/sys/psz.h @@ -0,0 +1,46 @@ +/*- + * Copyright (c) 2014 Andrey Zonov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_PSZ_H_ +#define _SYS_PSZ_H_ + +#include +#include + +#define psz2struct(p, s, m) __containerof(p, s, m); + +struct psz_cb { + STAILQ_ENTRY(psz_cb) next; + void (*func)(struct psz_cb *); +}; + +#define psz_update_ptr(ptr, val) atomic_store_rel_ptr((volatile uintptr_t *)(ptr), (uintptr_t)(val)) +#define psz_read_lock() critical_enter() +#define psz_read_unlock() critical_exit() +void psz_enqueue(void (*func)(struct psz_cb *), struct psz_cb *cb); +void psz_checkpoint(void); + +#endif /* !_SYS_PSZ_H_ */ -- 1.9.0