lib/libc/amd64/sys/Makefile.inc | 3 +- lib/libc/amd64/sys/__vdso_gettc.c | 50 +++++++++++ lib/libc/gen/aux.c | 15 ++++ lib/libc/i386/sys/Makefile.inc | 3 +- lib/libc/i386/sys/__vdso_gettc.c | 50 +++++++++++ lib/libc/include/libc_private.h | 7 ++ lib/libc/sys/Makefile.inc | 4 + lib/libc/sys/__vdso_gettimeofday.c | 148 +++++++++++++++++++++++++++++++ lib/libc/sys/clock_gettime.c | 52 +++++++++++ lib/libc/sys/gettimeofday.c | 51 +++++++++++ sys/amd64/include/elf.h | 1 + sys/amd64/include/vdso.h | 6 ++ sys/i386/i386/elf_machdep.c | 5 +- sys/i386/include/elf.h | 1 + sys/i386/include/vdso.h | 6 ++ sys/i386/include/vmparam.h | 3 +- sys/kern/imgact_elf.c | 4 + sys/kern/kern_exec.c | 170 +++++++++++++++++++++++++++++++++--- sys/kern/kern_tc.c | 84 ++++++++++++++++++ sys/sys/sysent.h | 11 ++- sys/sys/vdso.h | 116 ++++++++++++++++++++++++ sys/x86/include/vdso.h | 41 +++++++++ sys/x86/x86/tsc.c | 22 +++++ 23 files changed, 834 insertions(+), 19 deletions(-) diff --git a/lib/libc/amd64/sys/Makefile.inc b/lib/libc/amd64/sys/Makefile.inc index c7b17e0..51583d3 100644 --- a/lib/libc/amd64/sys/Makefile.inc +++ b/lib/libc/amd64/sys/Makefile.inc @@ -1,7 +1,8 @@ # from: Makefile.inc,v 1.1 1993/09/03 19:04:23 jtc Exp # $FreeBSD$ -SRCS+= amd64_get_fsbase.c amd64_get_gsbase.c amd64_set_fsbase.c amd64_set_gsbase.c +SRCS+= amd64_get_fsbase.c amd64_get_gsbase.c amd64_set_fsbase.c \ + amd64_set_gsbase.c __vdso_gettc.c MDASM= vfork.S brk.S cerror.S exect.S getcontext.S pipe.S ptrace.S \ reboot.S sbrk.S setlogin.S sigreturn.S diff --git a/lib/libc/amd64/sys/__vdso_gettc.c b/lib/libc/amd64/sys/__vdso_gettc.c new file mode 100644 index 0000000..4419141 --- /dev/null +++ b/lib/libc/amd64/sys/__vdso_gettc.c @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2012 Konstantin Belousov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +static u_int +__vdso_gettc_low(const struct vdso_timehands *th) +{ + uint32_t rv; + + __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" + : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); + return (rv); +} + +#pragma weak __vdso_gettc +u_int +__vdso_gettc(const struct vdso_timehands *th) +{ + + return (th->th_x86_shift > 0 ? __vdso_gettc_low(th) : rdtsc32()); +} diff --git a/lib/libc/gen/aux.c b/lib/libc/gen/aux.c index 4bf8643..3767ac0 100644 --- a/lib/libc/gen/aux.c +++ b/lib/libc/gen/aux.c @@ -66,6 +66,7 @@ __init_elf_aux_vector(void) static pthread_once_t aux_once = PTHREAD_ONCE_INIT; static int pagesize, osreldate, canary_len, ncpus, pagesizes_len; static char *canary, *pagesizes; +static void *timekeep; static void init_aux(void) @@ -101,6 +102,10 @@ init_aux(void) case AT_NCPUS: ncpus = aux->a_un.a_val; break; + + case AT_TIMEKEEP: + timekeep = aux->a_un.a_ptr; + break; } } } @@ -163,6 +168,16 @@ _elf_aux_info(int aux, void *buf, int buflen) } else res = EINVAL; break; + case AT_TIMEKEEP: + if (buflen == sizeof(void *)) { + if (timekeep != NULL) { + *(void **)buf = timekeep; + res = 0; + } else + res = ENOENT; + } else + res = EINVAL; + break; default: res = ENOENT; break; diff --git a/lib/libc/i386/sys/Makefile.inc b/lib/libc/i386/sys/Makefile.inc index 98a9c9e..9eefabc 100644 --- a/lib/libc/i386/sys/Makefile.inc +++ b/lib/libc/i386/sys/Makefile.inc @@ -5,7 +5,8 @@ SRCS+= i386_clr_watch.c i386_set_watch.c i386_vm86.c .endif SRCS+= i386_get_fsbase.c i386_get_gsbase.c i386_get_ioperm.c i386_get_ldt.c \ - i386_set_fsbase.c i386_set_gsbase.c i386_set_ioperm.c i386_set_ldt.c + i386_set_fsbase.c i386_set_gsbase.c i386_set_ioperm.c i386_set_ldt.c \ + __vdso_gettc.c MDASM= Ovfork.S brk.S cerror.S exect.S getcontext.S pipe.S ptrace.S \ reboot.S sbrk.S setlogin.S sigreturn.S syscall.S diff --git a/lib/libc/i386/sys/__vdso_gettc.c b/lib/libc/i386/sys/__vdso_gettc.c new file mode 100644 index 0000000..4419141 --- /dev/null +++ b/lib/libc/i386/sys/__vdso_gettc.c @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2012 Konstantin Belousov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +static u_int +__vdso_gettc_low(const struct vdso_timehands *th) +{ + uint32_t rv; + + __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" + : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); + return (rv); +} + +#pragma weak __vdso_gettc +u_int +__vdso_gettc(const struct vdso_timehands *th) +{ + + return (th->th_x86_shift > 0 ? __vdso_gettc_low(th) : rdtsc32()); +} diff --git a/lib/libc/include/libc_private.h b/lib/libc/include/libc_private.h index 2182f46..faae028 100644 --- a/lib/libc/include/libc_private.h +++ b/lib/libc/include/libc_private.h @@ -34,6 +34,7 @@ #ifndef _LIBC_PRIVATE_H_ #define _LIBC_PRIVATE_H_ +#include #include /* @@ -245,6 +246,12 @@ extern void * __sys_freebsd6_mmap(void *, __size_t, int, int, int, int, __off_t) /* Without back-compat translation */ extern int __sys_fcntl(int, int, ...); +struct timespec; +struct timeval; +struct timezone; +int __sys_gettimeofday(struct timeval *, struct timezone *); +int __sys_clock_gettime(__clockid_t, struct timespec *ts); + /* execve() with PATH processing to implement posix_spawnp() */ int _execvpe(const char *, char * const *, char * const *); diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index 61d1713..df4ef42 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -15,6 +15,10 @@ # .sinclude "${.CURDIR}/${LIBC_ARCH}/sys/Makefile.inc" +SRCS+= clock_gettime.c gettimeofday.c __vdso_gettimeofday.c +NOASM+= clock_gettime.o gettimeofday.o +PSEUDO+= _clock_gettime.o _gettimeofday.o + # Sources common to both syscall interfaces: SRCS+= stack_protector.c stack_protector_compat.c __error.c .if !defined(WITHOUT_SYSCALL_COMPAT) diff --git a/lib/libc/sys/__vdso_gettimeofday.c b/lib/libc/sys/__vdso_gettimeofday.c new file mode 100644 index 0000000..d29a6fa --- /dev/null +++ b/lib/libc/sys/__vdso_gettimeofday.c @@ -0,0 +1,148 @@ +/*- + * Copyright (c) 2012 Konstantin Belousov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include "libc_private.h" + +int __vdso_gettimeofday(struct timeval *, struct timezone *); +#pragma weak __vdso_gettimeofday + +int __vdso_clock_gettime(clockid_t, struct timespec *); +#pragma weak __vdso_clock_gettime + +static u_int +tc_delta(const struct vdso_timehands *th) +{ + + return ((__vdso_gettc(th) - th->th_offset_count) & + th->th_counter_mask); +} + +static int +binuptime(struct bintime *bt, struct vdso_timekeep *tk, int abs) +{ + struct vdso_timehands *th; + uint32_t curr, gen; + + do { + if (!tk->tk_enabled) + return (ENOSYS); + + /* + * XXXKIB. The load of tk->tk_current should use + * atomic_load_acq_32 to provide load barrier. But + * since tk points to r/o mapped page, x86 + * implementation of atomic_load_acq faults. + */ + curr = tk->tk_current; + rmb(); + th = &tk->tk_th[curr]; + if (th->th_algo != VDSO_TH_ALGO_1) + return (ENOSYS); + gen = th->th_gen; + *bt = th->th_offset; + bintime_addx(bt, th->th_scale * tc_delta(th)); + if (abs) + bintime_add(bt, &th->th_boottime); + + /* + * Barrier for load of both tk->tk_current and th->th_gen. + */ + rmb(); + } while (curr != tk->tk_current || gen == 0 || gen != th->th_gen); + return (0); +} + +static struct vdso_timekeep *tk; + +int +__vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + struct bintime bt; + int error; + + if (tz != NULL) + return (ENOSYS); + if (tk == NULL) { + error = _elf_aux_info(AT_TIMEKEEP, &tk, sizeof(tk)); + if (error != 0 || tk == NULL) + return (ENOSYS); + } + if (tk->tk_ver != VDSO_TK_VER_CURR) + return (ENOSYS); + error = binuptime(&bt, tk, 1); + if (error != 0) + return (error); + bintime2timeval(&bt, tv); + return (0); +} + +int +__vdso_clock_gettime(clockid_t clock_id, struct timespec *ts) +{ + struct bintime bt; + int abs, error; + + if (tk == NULL) { + error = _elf_aux_info(AT_TIMEKEEP, &tk, sizeof(tk)); + if (error != 0 || tk == NULL) + return (ENOSYS); + } + if (tk->tk_ver != VDSO_TK_VER_CURR) + return (ENOSYS); + switch (clock_id) { + case CLOCK_REALTIME: + case CLOCK_REALTIME_PRECISE: + case CLOCK_REALTIME_FAST: + case CLOCK_SECOND: + abs = 1; + break; + case CLOCK_MONOTONIC: + case CLOCK_MONOTONIC_PRECISE: + case CLOCK_MONOTONIC_FAST: + case CLOCK_UPTIME: + case CLOCK_UPTIME_PRECISE: + case CLOCK_UPTIME_FAST: + abs = 0; + break; + default: + return (ENOSYS); + } + error = binuptime(&bt, tk, abs); + if (error != 0) + return (error); + bintime2timespec(&bt, ts); + if (clock_id == CLOCK_SECOND) + ts->tv_nsec = 0; + return (0); +} diff --git a/lib/libc/sys/clock_gettime.c b/lib/libc/sys/clock_gettime.c new file mode 100644 index 0000000..e7e701b --- /dev/null +++ b/lib/libc/sys/clock_gettime.c @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2012 Konstantin Belousov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include "libc_private.h" + +int __clock_gettime(clockid_t, struct timespec *ts); + +__weak_reference(__clock_gettime, clock_gettime); + +int +__clock_gettime(clockid_t clock_id, struct timespec *ts) +{ + int error; + + if (__vdso_clock_gettime != NULL && __vdso_gettc != NULL) + error = __vdso_clock_gettime(clock_id, ts); + else + error = ENOSYS; + if (error == ENOSYS) + error = __sys_clock_gettime(clock_id, ts); + return (error); +} diff --git a/lib/libc/sys/gettimeofday.c b/lib/libc/sys/gettimeofday.c new file mode 100644 index 0000000..4cc87e1 --- /dev/null +++ b/lib/libc/sys/gettimeofday.c @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2012 Konstantin Belousov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include "libc_private.h" + +int __gettimeofday(struct timeval *tv, struct timezone *tz); + +__weak_reference(__gettimeofday, gettimeofday); + +int +__gettimeofday(struct timeval *tv, struct timezone *tz) +{ + int error; + + if (__vdso_gettimeofday != NULL && __vdso_gettc != NULL) + error = __vdso_gettimeofday(tv, tz); + else + error = ENOSYS; + if (error == ENOSYS) + error = __sys_gettimeofday(tv, tz); + return (error); +} diff --git a/sys/amd64/include/elf.h b/sys/amd64/include/elf.h index ded4e44..d69c6b4 100644 --- a/sys/amd64/include/elf.h +++ b/sys/amd64/include/elf.h @@ -94,6 +94,7 @@ __ElfType(Auxinfo); #define AT_NCPUS 19 /* Number of CPUs. */ #define AT_PAGESIZES 20 /* Pagesizes. */ #define AT_PAGESIZESLEN 21 /* Number of pagesizes. */ +#define AT_TIMEKEEP 22 /* Pointer to timehands. */ #define AT_STACKPROT 23 /* Initial stack protection. */ #define AT_COUNT 24 /* Count of defined aux entry types. */ diff --git a/sys/amd64/include/vdso.h b/sys/amd64/include/vdso.h new file mode 100644 index 0000000..b81c455 --- /dev/null +++ b/sys/amd64/include/vdso.h @@ -0,0 +1,6 @@ +/*- + * This file is in the public domain. + */ +/* $FreeBSD$ */ + +#include diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c index a782445..034b4c4 100644 --- a/sys/i386/i386/elf_machdep.c +++ b/sys/i386/i386/elf_machdep.c @@ -74,12 +74,15 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32, + .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, + .sv_shared_page_base = SHAREDPAGE, + .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, }; +INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, diff --git a/sys/i386/include/elf.h b/sys/i386/include/elf.h index 9427811..93fa1fd 100644 --- a/sys/i386/include/elf.h +++ b/sys/i386/include/elf.h @@ -96,6 +96,7 @@ __ElfType(Auxinfo); #define AT_NCPUS 19 /* Number of CPUs. */ #define AT_PAGESIZES 20 /* Pagesizes. */ #define AT_PAGESIZESLEN 21 /* Number of pagesizes. */ +#define AT_TIMEKEEP 22 /* Pointer to timehands. */ #define AT_STACKPROT 23 /* Initial stack protection. */ #define AT_COUNT 24 /* Count of defined aux entry types. */ diff --git a/sys/i386/include/vdso.h b/sys/i386/include/vdso.h new file mode 100644 index 0000000..b81c455 --- /dev/null +++ b/sys/i386/include/vdso.h @@ -0,0 +1,6 @@ +/*- + * This file is in the public domain. + */ +/* $FreeBSD$ */ + +#include diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h index 56ab4b3..ce6672d 100644 --- a/sys/i386/include/vmparam.h +++ b/sys/i386/include/vmparam.h @@ -165,7 +165,8 @@ #define VM_MAXUSER_ADDRESS VADDR(PTDPTDI, 0) -#define USRSTACK VM_MAXUSER_ADDRESS +#define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) +#define USRSTACK SHAREDPAGE #define VM_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI) #define VM_MIN_ADDRESS ((vm_offset_t)0) diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index f907526..5d0f494 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1011,6 +1011,10 @@ __elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp) AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes); AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen); } + if (imgp->sysent->sv_timekeep_base != 0) { + AUXARGS_ENTRY(pos, AT_TIMEKEEP, + imgp->sysent->sv_timekeep_base); + } AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : imgp->sysent->sv_stackprot); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index dac4703..80502e3 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -28,6 +28,7 @@ __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" +#include "opt_compat.h" #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" #include "opt_ktrace.h" @@ -64,6 +65,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #ifdef KTRACE @@ -1512,33 +1514,81 @@ exec_unregister(execsw_arg) return (0); } +static struct sx shared_page_alloc_sx; static vm_object_t shared_page_obj; static int shared_page_free; -int -shared_page_fill(int size, int align, const char *data) +struct sf_buf * +shared_page_write_start(int base) { vm_page_t m; struct sf_buf *s; + + VM_OBJECT_LOCK(shared_page_obj); + m = vm_page_grab(shared_page_obj, OFF_TO_IDX(base), VM_ALLOC_RETRY); + VM_OBJECT_UNLOCK(shared_page_obj); + s = sf_buf_alloc(m, SFB_DEFAULT); + return (s); +} + +void +shared_page_write_end(struct sf_buf *sf) +{ + vm_page_t m; + + m = sf_buf_page(sf); + sf_buf_free(sf); + VM_OBJECT_LOCK(shared_page_obj); + vm_page_wakeup(m); + VM_OBJECT_UNLOCK(shared_page_obj); +} + +void +shared_page_write(int base, int size, const void *data) +{ + struct sf_buf *sf; vm_offset_t sk; + + sf = shared_page_write_start(base); + sk = sf_buf_kva(sf); + bcopy(data, (void *)(sk + (base & PAGE_MASK)), size); + shared_page_write_end(sf); +} + +static int +shared_page_alloc_locked(int size, int align) +{ int res; - VM_OBJECT_LOCK(shared_page_obj); - m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY); res = roundup(shared_page_free, align); if (res + size >= IDX_TO_OFF(shared_page_obj->size)) res = -1; - else { - VM_OBJECT_UNLOCK(shared_page_obj); - s = sf_buf_alloc(m, SFB_DEFAULT); - sk = sf_buf_kva(s); - bcopy(data, (void *)(sk + res), size); + else shared_page_free = res + size; - sf_buf_free(s); - VM_OBJECT_LOCK(shared_page_obj); - } - vm_page_wakeup(m); - VM_OBJECT_UNLOCK(shared_page_obj); + return (res); +} + +int +shared_page_alloc(int size, int align) +{ + int res; + + sx_xlock(&shared_page_alloc_sx); + res = shared_page_alloc_locked(size, align); + sx_xunlock(&shared_page_alloc_sx); + return (res); +} + +int +shared_page_fill(int size, int align, const void *data) +{ + int res; + + sx_xlock(&shared_page_alloc_sx); + res = shared_page_alloc_locked(size, align); + if (res != -1) + shared_page_write(res, size, data); + sx_xunlock(&shared_page_alloc_sx); return (res); } @@ -1547,6 +1597,7 @@ shared_page_init(void *dummy __unused) { vm_page_t m; + sx_init(&shared_page_alloc_sx, "shpsx"); shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE, VM_PROT_DEFAULT, 0, NULL); VM_OBJECT_LOCK(shared_page_obj); @@ -1559,10 +1610,76 @@ shared_page_init(void *dummy __unused) SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init, NULL); +static void +timehands_update(void *arg) +{ + struct sysentvec *sv; + struct sf_buf *sf; + struct vdso_timehands th; + struct vdso_timekeep *tk; + uint32_t enabled, idx; + + sv = arg; + sx_xlock(&shared_page_alloc_sx); + enabled = tc_fill_vdso_timehands(&th); + sf = shared_page_write_start(sv->sv_timekeep_off); + tk = (void *)(sf_buf_kva(sf) + (sv->sv_timekeep_off & PAGE_MASK)); + idx = sv->sv_timekeep_curr; + atomic_store_rel_32(&tk->tk_th[idx].th_gen, 0); + if (++idx >= VDSO_TH_NUM) + idx = 0; + sv->sv_timekeep_curr = idx; + if (++sv->sv_timekeep_gen == 0) + sv->sv_timekeep_gen = 1; + th.th_gen = 0; + if (enabled) + tk->tk_th[idx] = th; + tk->tk_enabled = enabled; + atomic_store_rel_32(&tk->tk_th[idx].th_gen, sv->sv_timekeep_gen); + tk->tk_current = idx; + shared_page_write_end(sf); + sx_xunlock(&shared_page_alloc_sx); +} + +#ifdef COMPAT_FREEBSD32 +static void +timehands_update32(void *arg) +{ + struct sysentvec *sv; + struct sf_buf *sf; + struct vdso_timekeep32 *tk; + struct vdso_timehands32 th; + uint32_t enabled, idx; + + sv = arg; + sx_xlock(&shared_page_alloc_sx); + enabled = tc_fill_vdso_timehands32(&th); + sf = shared_page_write_start(sv->sv_timekeep_off); + tk = (void *)(sf_buf_kva(sf) + (sv->sv_timekeep_off & PAGE_MASK)); + idx = sv->sv_timekeep_curr; + atomic_store_rel_32(&tk->tk_th[idx].th_gen, 0); + if (++idx >= VDSO_TH_NUM) + idx = 0; + sv->sv_timekeep_curr = idx; + if (++sv->sv_timekeep_gen == 0) + sv->sv_timekeep_gen = 1; + th.th_gen = 0; + if (enabled) + tk->tk_th[idx] = th; + tk->tk_enabled = enabled; + atomic_store_rel_32(&tk->tk_th[idx].th_gen, sv->sv_timekeep_gen); + tk->tk_current = idx; + shared_page_write_end(sf); + sx_xunlock(&shared_page_alloc_sx); +} +#endif + void exec_sysvec_init(void *param) { struct sysentvec *sv; + int tk_base; + uint32_t tk_ver; sv = (struct sysentvec *)param; @@ -1571,4 +1688,29 @@ exec_sysvec_init(void *param) sv->sv_shared_page_obj = shared_page_obj; sv->sv_sigcode_base = sv->sv_shared_page_base + shared_page_fill(*(sv->sv_szsigcode), 16, sv->sv_sigcode); + tk_ver = VDSO_TK_VER_CURR; +#ifdef COMPAT_FREEBSD32 + if ((sv->sv_flags & SV_ILP32) != 0) { + tk_base = shared_page_alloc(sizeof(struct vdso_timekeep32) + + sizeof(struct vdso_timehands32) * VDSO_TH_NUM, 16); + KASSERT(tk_base != -1, ("tk_base -1 for 32bit")); + EVENTHANDLER_REGISTER(tc_windup, timehands_update32, sv, + EVENTHANDLER_PRI_ANY); + shared_page_write(tk_base + offsetof(struct vdso_timekeep32, + tk_ver), sizeof(uint32_t), &tk_ver); + } else { +#endif + tk_base = shared_page_alloc(sizeof(struct vdso_timekeep) + + sizeof(struct vdso_timehands) * VDSO_TH_NUM, 16); + KASSERT(tk_base != -1, ("tk_base -1 for native")); + EVENTHANDLER_REGISTER(tc_windup, timehands_update, sv, + EVENTHANDLER_PRI_ANY); + shared_page_write(tk_base + offsetof(struct vdso_timekeep, + tk_ver), sizeof(uint32_t), &tk_ver); +#ifdef COMPAT_FREEBSD32 + } +#endif + sv->sv_timekeep_base = sv->sv_shared_page_base + tk_base; + sv->sv_timekeep_off = tk_base; + EVENTHANDLER_INVOKE(tc_windup); } diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index e272fdd..0b8fefe 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -16,6 +16,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_compat.h" #include "opt_ntp.h" #include "opt_ffclock.h" @@ -30,8 +31,10 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include +#include /* * A large step happens on boot. This constant detects such steps. @@ -118,8 +121,12 @@ SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, ×tepwarnings, 0, "Log time steps"); static void tc_windup(void); +static void tc_windup_push_vdso(void *ctx, int pending); static void cpu_tick_calibrate(int); +static struct task tc_windup_push_vdso_task = TASK_INITIALIZER(0, + tc_windup_push_vdso, 0); + static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS) { @@ -1360,6 +1367,7 @@ tc_windup(void) #endif timehands = th; + taskqueue_enqueue_fast(taskqueue_fast, &tc_windup_push_vdso_task); } /* Report or change the active timecounter hardware. */ @@ -1386,6 +1394,7 @@ sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) (void)newtc->tc_get_timecount(newtc); timecounter = newtc; + EVENTHANDLER_INVOKE(tc_windup); return (0); } return (EINVAL); @@ -1844,3 +1853,78 @@ cputick2usec(uint64_t tick) } cpu_tick_f *cpu_ticks = tc_cpu_ticks; + +static int vdso_th_enable = 1; +static int +sysctl_fast_gettime(SYSCTL_HANDLER_ARGS) +{ + int old_vdso_th_enable, error; + + old_vdso_th_enable = vdso_th_enable; + error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req); + if (error != 0) + return (error); + vdso_th_enable = old_vdso_th_enable; + EVENTHANDLER_INVOKE(tc_windup); + return (0); +} +SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day"); + +uint32_t +tc_fill_vdso_timehands(struct vdso_timehands *vdso_th) +{ + struct timehands *th; + uint32_t enabled; + int gen; + + do { + th = timehands; + gen = th->th_generation; + vdso_th->th_algo = VDSO_TH_ALGO_1; + vdso_th->th_scale = th->th_scale; + vdso_th->th_offset_count = th->th_offset_count; + vdso_th->th_counter_mask = th->th_counter->tc_counter_mask; + vdso_th->th_offset = th->th_offset; + vdso_th->th_boottime = boottimebin; + enabled = cpu_fill_vdso_timehands(vdso_th); + } while (gen == 0 || timehands->th_generation != gen); + if (!vdso_th_enable) + enabled = 0; + return (enabled); +} + +#ifdef COMPAT_FREEBSD32 +uint32_t +tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) +{ + struct timehands *th; + uint32_t enabled; + int gen; + + do { + th = timehands; + gen = th->th_generation; + vdso_th32->th_algo = VDSO_TH_ALGO_1; + *(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale; + vdso_th32->th_offset_count = th->th_offset_count; + vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask; + vdso_th32->th_offset.sec = th->th_offset.sec; + *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac; + vdso_th32->th_boottime.sec = boottimebin.sec; + *(uint64_t *)&vdso_th32->th_boottime.frac[0] = boottimebin.frac; + enabled = cpu_fill_vdso_timehands32(vdso_th32); + } while (gen == 0 || timehands->th_generation != gen); + if (!vdso_th_enable) + enabled = 0; + return (enabled); +} +#endif + +static void +tc_windup_push_vdso(void *ctx, int pending) +{ + + EVENTHANDLER_INVOKE(tc_windup); +} diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index d916cf1..22769c2 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -124,6 +124,10 @@ struct sysentvec { vm_offset_t sv_shared_page_base; vm_offset_t sv_shared_page_len; vm_offset_t sv_sigcode_base; + vm_offset_t sv_timekeep_base; + int sv_timekeep_off; + int sv_timekeep_curr; + uint32_t sv_timekeep_gen; void *sv_shared_page_obj; void (*sv_schedtail)(struct thread *); }; @@ -256,8 +260,13 @@ int lkmressys(struct thread *, struct nosys_args *); int syscall_thread_enter(struct thread *td, struct sysent *se); void syscall_thread_exit(struct thread *td, struct sysent *se); -int shared_page_fill(int size, int align, const char *data); +struct sf_buf; +int shared_page_alloc(int size, int align); +int shared_page_fill(int size, int align, const void *data); +void shared_page_write(int base, int size, const void *data); void exec_sysvec_init(void *param); +struct sf_buf *shared_page_write_start(int base); +void shared_page_write_end(struct sf_buf *sf); #define INIT_SYSENTVEC(name, sv) \ SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, \ diff --git a/sys/sys/vdso.h b/sys/sys/vdso.h new file mode 100644 index 0000000..cf9cfb0 --- /dev/null +++ b/sys/sys/vdso.h @@ -0,0 +1,116 @@ +/*- + * Copyright 2012 Konstantin Belousov . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _SYS_VDSO_H +#define _SYS_VDSO_H + +#include +#include +#include + +struct vdso_timehands { + uint32_t th_algo; + uint32_t th_gen; + uint64_t th_scale; + uint32_t th_offset_count; + uint32_t th_counter_mask; + struct bintime th_offset; + struct bintime th_boottime; + VDSO_TIMEHANDS_MD +}; + +struct vdso_timekeep { + uint32_t tk_ver; + uint32_t tk_enabled; + uint32_t tk_current; + struct vdso_timehands tk_th[]; +}; + +#define VDSO_TK_CURRENT_BUSY 0xffffffff +#define VDSO_TK_VER_1 0x1 +#define VDSO_TK_VER_CURR VDSO_TK_VER_1 +#define VDSO_TH_ALGO_1 0x1 + +#ifndef _KERNEL +struct timespec; +struct timeval; +struct timezone; + +int __vdso_clock_gettime(clockid_t clock_id, struct timespec *ts); +int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); +u_int __vdso_gettc(const struct vdso_timehands *vdso_th); +#endif + +#ifdef _KERNEL + +uint32_t tc_fill_vdso_timehands(struct vdso_timehands *vdso_th); + +/* + * The cpu_fill_vdso_timehands() function should fill MD-part of the + * struct vdso_timehands, which is both machine- and + * timecounter-depended. The return value should be 1 if fast + * userspace timecounter is enabled by hardware, and 0 otherwise. The + * global sysctl enable override is handled by machine-independed code + * after cpu_fill_vdso_timehands() call is made. + */ +uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th); + +typedef void (*tc_windup_fn)(void *); +EVENTHANDLER_DECLARE(tc_windup, tc_windup_fn); + +#define VDSO_TH_NUM 4 + +#ifdef COMPAT_FREEBSD32 +struct bintime32 { + uint32_t sec; + uint32_t frac[2]; +}; + +struct vdso_timehands32 { + uint32_t th_algo; + uint32_t th_gen; + uint32_t th_scale[2]; + uint32_t th_offset_count; + uint32_t th_counter_mask; + struct bintime32 th_offset; + struct bintime32 th_boottime; + VDSO_TIMEHANDS_MD32 +}; + +struct vdso_timekeep32 { + uint32_t tk_ver; + uint32_t tk_enabled; + uint32_t tk_current; + struct vdso_timehands32 tk_th[]; +}; + +uint32_t tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32); +uint32_t cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32); + +#endif +#endif + +#endif diff --git a/sys/x86/include/vdso.h b/sys/x86/include/vdso.h new file mode 100644 index 0000000..0f6e3c6 --- /dev/null +++ b/sys/x86/include/vdso.h @@ -0,0 +1,41 @@ +/*- + * Copyright 2012 Konstantin Belousov . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _X86_VDSO_H +#define _X86_VDSO_H + +#define VDSO_TIMEHANDS_MD \ + uint32_t th_x86_shift; \ + uint32_t th_res[7]; + +#ifdef _KERNEL +#ifdef COMPAT_FREEBSD32 + +#define VDSO_TIMEHANDS_MD32 VDSO_TIMEHANDS_MD + +#endif +#endif +#endif diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c index 4d1618f..085c339 100644 --- a/sys/x86/x86/tsc.c +++ b/sys/x86/x86/tsc.c @@ -27,6 +27,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_compat.h" #include "opt_clock.h" #include @@ -41,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -604,3 +606,23 @@ tsc_get_timecount_low(struct timecounter *tc) : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx"); return (rv); } + +uint32_t +cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th) +{ + + vdso_th->th_x86_shift = (int)(intptr_t)timecounter->tc_priv; + bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); + return (timecounter == &tsc_timecounter); +} + +#ifdef COMPAT_FREEBSD32 +uint32_t +cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) +{ + + vdso_th32->th_x86_shift = (int)(intptr_t)timecounter->tc_priv; + bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res)); + return (timecounter == &tsc_timecounter); +} +#endif