Index: Makefile =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/Makefile,v retrieving revision 1.18 diff -u -r1.18 Makefile --- Makefile 7 Apr 2003 16:21:25 -0000 1.18 +++ Makefile 1 May 2003 18:29:07 -0000 @@ -1,7 +1,7 @@ # $FreeBSD: src/libexec/rtld-elf/Makefile,v 1.18 2003/04/07 16:21:25 mdodd Exp $ PROG= ld-elf.so.1 -SRCS= rtld_start.S rtld.c lockdflt.c map_object.c malloc.c \ +SRCS= rtld_start.S rtld.c rtld_lock.c map_object.c malloc.c \ xmalloc.c debug.c reloc.c MAN= rtld.1 CFLAGS+= -Wall -DFREEBSD_ELF -I${.CURDIR}/${MACHINE_ARCH} -I${.CURDIR} Index: rtld.c =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/rtld.c,v retrieving revision 1.77 diff -u -r1.77 rtld.c --- rtld.c 7 Apr 2003 16:21:25 -0000 1.77 +++ rtld.c 1 May 2003 17:49:54 -0000 @@ -202,7 +202,7 @@ rlock_acquire(void) { lockinfo.rlock_acquire(lockinfo.thelock); - atomic_incr_int(&lockinfo.rcount); + atomic_add_int(&lockinfo.rcount, 1); lock_check(); } @@ -210,21 +210,21 @@ wlock_acquire(void) { lockinfo.wlock_acquire(lockinfo.thelock); - atomic_incr_int(&lockinfo.wcount); + atomic_add_int(&lockinfo.wcount, 1); lock_check(); } static __inline void rlock_release(void) { - atomic_decr_int(&lockinfo.rcount); + atomic_subtract_int(&lockinfo.rcount, 1); lockinfo.rlock_release(lockinfo.thelock); } static __inline void wlock_release(void) { - atomic_decr_int(&lockinfo.wcount); + atomic_subtract_int(&lockinfo.wcount, 1); lockinfo.wlock_release(lockinfo.thelock); } Index: rtld_lock.c =================================================================== RCS file: rtld_lock.c diff -N rtld_lock.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ rtld_lock.c 1 May 2003 18:46:09 -0000 @@ -0,0 +1,186 @@ +/*- + * Copyright 1999, 2000 John D. Polstra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from: FreeBSD: src/libexec/rtld-elf/sparc64/lockdflt.c,v 1.3 2002/10/09 + * $FreeBSD: $ + */ + +/* + * Thread locking implementation for the dynamic linker. + * + * We use the "simple, non-scalable reader-preference lock" from: + * + * J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer + * Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on + * Principles and Practice of Parallel Programming, April 1991. + * + * In this algorithm the lock is a single word. Its low-order bit is + * set when a writer holds the lock. The remaining high-order bits + * contain a count of readers desiring the lock. The algorithm requires + * atomic "compare_and_store" and "add" operations, which we implement + * using assembly language sequences in "rtld_start.S". + */ + +#include +#include +#include + +#include "debug.h" +#include "rtld.h" +#include "rtld_machdep.h" + +#define WAFLAG 0x1 /* A writer holds the lock */ +#define RC_INCR 0x2 /* Adjusts count of readers desiring lock */ + +typedef struct Struct_Lock { + volatile int lock; + void *base; +} Lock; + +static sigset_t fullsigmask, oldsigmask; + +static void * +lock_create(void *context) +{ + void *base; + char *p; + uintptr_t r; + Lock *l; + + /* + * Arrange for the lock to occupy its own cache line. First, we + * optimistically allocate just a cache line, hoping that malloc + * will give us a well-aligned block of memory. If that doesn't + * work, we allocate a larger block and take a well-aligned cache + * line from it. + */ + base = xmalloc(CACHE_LINE_SIZE); + p = (char *)base; + if ((uintptr_t)p % CACHE_LINE_SIZE != 0) { + free(base); + base = xmalloc(2 * CACHE_LINE_SIZE); + p = (char *)base; + if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0) + p += CACHE_LINE_SIZE - r; + } + l = (Lock *)p; + l->base = base; + l->lock = 0; + return l; +} + +static void +lock_destroy(void *lock) +{ + Lock *l = (Lock *)lock; + + free(l->base); +} + +static void +rlock_acquire(void *lock) +{ + Lock *l = (Lock *)lock; + + atomic_add_acq_int(&l->lock, RC_INCR); + while (l->lock & WAFLAG) + ; /* Spin */ +} + +static void +wlock_acquire(void *lock) +{ + Lock *l = (Lock *)lock; + sigset_t tmp_oldsigmask; + + for ( ; ; ) { + sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); + if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG)) + break; + sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); + } + oldsigmask = tmp_oldsigmask; +} + +static void +rlock_release(void *lock) +{ + Lock *l = (Lock *)lock; + + atomic_add_rel_int(&l->lock, -RC_INCR); +} + +static void +wlock_release(void *lock) +{ + Lock *l = (Lock *)lock; + + atomic_add_rel_int(&l->lock, -WAFLAG); + sigprocmask(SIG_SETMASK, &oldsigmask, NULL); +} + +#if __i386__ +/* + * Import a crude exclusive lock implementation for i386 processors. + * This file will be removed once i386 support is deprecated in favor + * of i486+. + */ +#include "i386/lockdflt.c" + +#endif + +void +lockdflt_init(LockInfo *li) +{ + li->context = NULL; + li->lock_create = lock_create; + li->rlock_acquire = rlock_acquire; + li->wlock_acquire = wlock_acquire; + li->rlock_release = rlock_release; + li->wlock_release = wlock_release; + li->lock_destroy = lock_destroy; + li->context_destroy = NULL; + +#if __i386__ + if (!cpu_supports_cmpxchg()) { + /* It's a cruddy old 80386. */ + li->rlock_acquire = li->wlock_acquire = lock80386_acquire; + li->rlock_release = li->wlock_release = lock80386_release; + } +#endif + + /* + * Construct a mask to block all signals except traps which might + * conceivably be generated within the dynamic linker itself. + */ + sigfillset(&fullsigmask); + sigdelset(&fullsigmask, SIGILL); + sigdelset(&fullsigmask, SIGTRAP); + sigdelset(&fullsigmask, SIGABRT); + sigdelset(&fullsigmask, SIGEMT); + sigdelset(&fullsigmask, SIGFPE); + sigdelset(&fullsigmask, SIGBUS); + sigdelset(&fullsigmask, SIGSEGV); + sigdelset(&fullsigmask, SIGSYS); +} Index: alpha/rtld_machdep.h =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/alpha/rtld_machdep.h,v retrieving revision 1.8 diff -u -r1.8 rtld_machdep.h --- alpha/rtld_machdep.h 18 Nov 2002 22:08:49 -0000 1.8 +++ alpha/rtld_machdep.h 1 May 2003 17:40:42 -0000 @@ -29,6 +29,18 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include +#include + +/* + * This value of CACHE_LINE_SIZE is conservative. The actual size + * is 32 on the 21064, 21064A, 21066, 21066A, and 21164. It is 64 + * on the 21264. Compaq recommends sequestering each lock in its own + * 128-byte block to allow for future implementations with larger + * cache lines. + */ +#define CACHE_LINE_SIZE 128 + struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ @@ -47,11 +59,5 @@ /* Lazy binding entry point, called via PLT. */ void _rtld_bind_start_old(void); - -/* Atomic operations. */ -int cmp0_and_store_int(volatile int *, int); -void atomic_add_int(volatile int *, int); -void atomic_incr_int(volatile int *); -void atomic_decr_int(volatile int *); #endif Index: alpha/rtld_start.S =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/alpha/rtld_start.S,v retrieving revision 1.5 diff -u -r1.5 rtld_start.S --- alpha/rtld_start.S 18 Feb 2002 02:24:10 -0000 1.5 +++ alpha/rtld_start.S 1 May 2003 18:50:44 -0000 @@ -209,52 +209,3 @@ END(_rtld_bind_start_old) -/* - * int cmp0_and_store_int(volatile int *p, int newval); - * - * If an int holds 0, store newval into it; else do nothing. Returns - * the previous value. - */ -LEAF(cmp0_and_store_int, 2) -1: mov a1, t0 - ldl_l v0, 0(a0) - bne v0, 3f - stl_c t0, 0(a0) - beq t0, 2f - mb - RET -2: br 1b -3: RET -END(cmp0_and_store_int) - -LEAF(atomic_add_int, 2) -0: ldl_l t0, 0(a0) - addq t0, a1, t0 - stl_c t0, 0(a0) - beq t0, 1f - mb - RET -1: br 0b -END(atomic_add_int) - -/* Atomically increment an int. */ -LEAF(atomic_incr_int, 1) -0: ldl_l t0, 0(a0) - addq t0, 1, t0 - stl_c t0, 0(a0) - beq t0, 1f - mb - RET -1: br 0b -END(atomic_incr_int) - -/* Atomically decrement an int. */ -LEAF(atomic_decr_int, 1) -0: ldl_l t0, 0(a0) - subq t0, 1, t0 - stl_c t0, 0(a0) - beq t0, 1f - mb - RET -1: br 0b -END(atomic_decr_int) Index: amd64/rtld_machdep.h =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/amd64/rtld_machdep.h,v retrieving revision 1.8 diff -u -r1.8 rtld_machdep.h --- amd64/rtld_machdep.h 18 Nov 2002 22:08:49 -0000 1.8 +++ amd64/rtld_machdep.h 1 May 2003 17:45:17 -0000 @@ -29,6 +29,11 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include +#include + +#define CACHE_LINE_SIZE 32 + struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ @@ -52,26 +57,5 @@ #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) - -static inline void -atomic_decr_int(volatile int *p) -{ - __asm __volatile ("lock; decl %0" : "+m"(*p) : : "cc"); -} - -static inline void -atomic_incr_int(volatile int *p) -{ - __asm __volatile ("lock; incl %0" : "+m"(*p) : : "cc"); -} - -static inline void -atomic_add_int(volatile int *p, int val) -{ - __asm __volatile ("lock; addl %1, %0" - : "+m"(*p) - : "ri"(val) - : "cc"); -} #endif Index: i386/lockdflt.c =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/i386/lockdflt.c,v retrieving revision 1.8 diff -u -r1.8 lockdflt.c --- i386/lockdflt.c 6 Jul 2002 20:25:55 -0000 1.8 +++ i386/lockdflt.c 1 May 2003 18:47:15 -0000 @@ -50,23 +50,6 @@ #include #include -#include -#include - -#include "debug.h" -#include "rtld.h" - -#define CACHE_LINE_SIZE 32 - -#define WAFLAG 0x1 /* A writer holds the lock */ -#define RC_INCR 0x2 /* Adjusts count of readers desiring lock */ - -typedef struct Struct_Lock { - volatile int lock; - void *base; -} Lock; - -static sigset_t fullsigmask, oldsigmask; static inline int cmpxchgl(int old, int new, volatile int *m) @@ -93,44 +76,6 @@ return result; } -static void * -lock_create(void *context) -{ - void *base; - char *p; - uintptr_t r; - Lock *l; - - /* - * Arrange for the lock to occupy its own cache line. First, we - * optimistically allocate just a cache line, hoping that malloc - * will give us a well-aligned block of memory. If that doesn't - * work, we allocate a larger block and take a well-aligned cache - * line from it. - */ - base = xmalloc(CACHE_LINE_SIZE); - p = (char *)base; - if ((uintptr_t)p % CACHE_LINE_SIZE != 0) { - free(base); - base = xmalloc(2 * CACHE_LINE_SIZE); - p = (char *)base; - if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0) - p += CACHE_LINE_SIZE - r; - } - l = (Lock *)p; - l->base = base; - l->lock = 0; - return l; -} - -static void -lock_destroy(void *lock) -{ - Lock *l = (Lock *)lock; - - free(l->base); -} - /* * Crude exclusive locks for the 80386, which does not support the * cmpxchg instruction. @@ -162,51 +107,6 @@ } /* - * Better reader/writer locks for the 80486 and later CPUs. - */ -static void -rlock_acquire(void *lock) -{ - Lock *l = (Lock *)lock; - - atomic_add_int(&l->lock, RC_INCR); - while (l->lock & WAFLAG) - ; /* Spin */ -} - -static void -wlock_acquire(void *lock) -{ - Lock *l = (Lock *)lock; - sigset_t tmp_oldsigmask; - - for ( ; ; ) { - sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); - if (cmpxchgl(0, WAFLAG, &l->lock) == 0) - break; - sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); - } - oldsigmask = tmp_oldsigmask; -} - -static void -rlock_release(void *lock) -{ - Lock *l = (Lock *)lock; - - atomic_add_int(&l->lock, -RC_INCR); -} - -static void -wlock_release(void *lock) -{ - Lock *l = (Lock *)lock; - - atomic_add_int(&l->lock, -WAFLAG); - sigprocmask(SIG_SETMASK, &oldsigmask, NULL); -} - -/* * Code to determine at runtime whether the CPU supports the cmpxchg * instruction. This instruction allows us to use locks that are more * efficient, but it didn't exist on the 80386. @@ -242,35 +142,3 @@ return result; } -void -lockdflt_init(LockInfo *li) -{ - li->context = NULL; - li->context_destroy = NULL; - li->lock_create = lock_create; - li->lock_destroy = lock_destroy; - if (cpu_supports_cmpxchg()) { - /* Use fast locks that require an 80486 or later. */ - li->rlock_acquire = rlock_acquire; - li->wlock_acquire = wlock_acquire; - li->rlock_release = rlock_release; - li->wlock_release = wlock_release; - } else { - /* It's a cruddy old 80386. */ - li->rlock_acquire = li->wlock_acquire = lock80386_acquire; - li->rlock_release = li->wlock_release = lock80386_release; - } - /* - * Construct a mask to block all signals except traps which might - * conceivably be generated within the dynamic linker itself. - */ - sigfillset(&fullsigmask); - sigdelset(&fullsigmask, SIGILL); - sigdelset(&fullsigmask, SIGTRAP); - sigdelset(&fullsigmask, SIGABRT); - sigdelset(&fullsigmask, SIGEMT); - sigdelset(&fullsigmask, SIGFPE); - sigdelset(&fullsigmask, SIGBUS); - sigdelset(&fullsigmask, SIGSEGV); - sigdelset(&fullsigmask, SIGSYS); -} Index: i386/rtld_machdep.h =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/i386/rtld_machdep.h,v retrieving revision 1.8 diff -u -r1.8 rtld_machdep.h --- i386/rtld_machdep.h 18 Nov 2002 22:08:49 -0000 1.8 +++ i386/rtld_machdep.h 1 May 2003 17:41:51 -0000 @@ -29,6 +29,11 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include +#include + +#define CACHE_LINE_SIZE 32 + struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ @@ -52,26 +57,5 @@ #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) - -static inline void -atomic_decr_int(volatile int *p) -{ - __asm __volatile ("lock; decl %0" : "+m"(*p) : : "cc"); -} - -static inline void -atomic_incr_int(volatile int *p) -{ - __asm __volatile ("lock; incl %0" : "+m"(*p) : : "cc"); -} - -static inline void -atomic_add_int(volatile int *p, int val) -{ - __asm __volatile ("lock; addl %1, %0" - : "+m"(*p) - : "ri"(val) - : "cc"); -} #endif Index: ia64/rtld_machdep.h =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/ia64/rtld_machdep.h,v retrieving revision 1.3 diff -u -r1.3 rtld_machdep.h --- ia64/rtld_machdep.h 18 Nov 2002 22:08:50 -0000 1.3 +++ ia64/rtld_machdep.h 1 May 2003 17:42:50 -0000 @@ -29,6 +29,11 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include +#include + +#define CACHE_LINE_SIZE 128 + /* * Macros for cracking ia64 function pointers. */ @@ -49,11 +54,5 @@ const struct Struct_Obj_Entry *, const Elf_Rel *); void *make_function_pointer(const Elf_Sym *, const struct Struct_Obj_Entry *); void call_initfini_pointer(const struct Struct_Obj_Entry *, Elf_Addr); - -/* Atomic operations. */ -int cmp0_and_store_int(volatile int *, int); -void atomic_add_int(volatile int *, int); -void atomic_incr_int(volatile int *); -void atomic_decr_int(volatile int *); #endif Index: ia64/rtld_start.S =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/ia64/rtld_start.S,v retrieving revision 1.2 diff -u -r1.2 rtld_start.S --- ia64/rtld_start.S 29 Oct 2001 10:05:32 -0000 1.2 +++ ia64/rtld_start.S 1 May 2003 18:54:16 -0000 @@ -157,60 +157,6 @@ } END(_rtld_bind_start) -/* - * int cmp0_and_store_int(volatile int *p, int newval); - * - * If an int holds 0, store newval into it; else do nothing. Returns - * the previous value. - */ -ENTRY(cmp0_and_store_int, 2) - mov ar.ccv=0 - ;; - cmpxchg4.acq r8=[in0],in1,ar.ccv - br.ret.sptk.many rp -END(cmp0_and_store_int) - -ENTRY(atomic_add_int, 2) -1: ld4 r14=[in0] - ;; - mov ar.ccv=r14 - add r15=in1,r14 - ;; - cmpxchg4.acq r16=[in0],r15,ar.ccv - ;; - cmp.ne p6,p0=r14,r16 -(p6) br.cond.spnt.few 1b - br.ret.sptk.many rp -END(atomic_add_int) - -/* Atomically increment an int. */ -ENTRY(atomic_incr_int, 1) -1: ld4 r14=[in0] - ;; - mov ar.ccv=r14 - add r15=1,r14 - ;; - cmpxchg4.acq r16=[in0],r15,ar.ccv - ;; - cmp.ne p6,p0=r14,r16 -(p6) br.cond.spnt.few 1b - br.ret.sptk.many rp -END(atomic_incr_int) - -/* Atomically decrement an int. */ -ENTRY(atomic_decr_int, 1) -1: ld4 r14=[in0] - ;; - mov ar.ccv=r14 - add r15=-1,r14 - ;; - cmpxchg4.acq r16=[in0],r15,ar.ccv - ;; - cmp.ne p6,p0=r14,r16 -(p6) br.cond.spnt.few 1b - br.ret.sptk.many rp -END(atomic_decr_int) - #define DT_NULL 0 /* Terminating entry. */ #define DT_RELA 7 /* Address of ElfNN_Rela relocations. */ #define DT_RELASZ 8 /* Total size of ElfNN_Rela relocations. */ Index: powerpc/rtld_machdep.h =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/powerpc/rtld_machdep.h,v retrieving revision 1.1 diff -u -r1.1 rtld_machdep.h --- powerpc/rtld_machdep.h 4 Dec 2002 07:32:20 -0000 1.1 +++ powerpc/rtld_machdep.h 1 May 2003 17:43:43 -0000 @@ -29,11 +29,9 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include #include -#define atomic_incr_int(p) atomic_add_int((p), 1) -#define atomic_decr_int(p) atomic_subtract_int((p), 1) - #define CACHE_LINE_SIZE 32 struct Struct_Obj_Entry; @@ -53,7 +51,7 @@ (((InitFunc)(target))()) /* - * Lazy binding entry point, called via PLT. + * Lazy binding entry point, called via PLT. */ void _rtld_bind_start(void); Index: sparc64/rtld_machdep.h =================================================================== RCS file: /home/ncvs/src/libexec/rtld-elf/sparc64/rtld_machdep.h,v retrieving revision 1.3 diff -u -r1.3 rtld_machdep.h --- sparc64/rtld_machdep.h 18 Nov 2002 22:08:50 -0000 1.3 +++ sparc64/rtld_machdep.h 1 May 2003 17:39:20 -0000 @@ -32,16 +32,6 @@ #include #include -#define atomic_incr_int(p) atomic_add_int((p), 1) -#define atomic_decr_int(p) atomic_subtract_int((p), 1) - -/* - * This value of CACHE_LINE_SIZE is conservative. The actual size - * is 32 on the 21064, 21064A, 21066, 21066A, and 21164. It is 64 - * on the 21264. Compaq recommends sequestering each lock in its own - * 128-byte block to allow for future implementations with larger - * cache lines. - */ #define CACHE_LINE_SIZE 128 struct Struct_Obj_Entry;