Index: include/asmacros.h =================================================================== --- include/asmacros.h (revision 224216) +++ include/asmacros.h (working copy) @@ -49,98 +49,142 @@ /* * Atomically decrement an integer in memory. */ -#define ATOMIC_DEC_INT(r1, r2, r3) \ - lduw [r1], r2 ; \ -9: sub r2, 1, r3 ; \ - casa [r1] ASI_N, r2, r3 ; \ - cmp r2, r3 ; \ - bne,pn %icc, 9b ; \ +#define ATOMIC_DEC_INT(r1, r2, r3) \ + lduw [r1], r2 ; \ +9: sub r2, 1, r3 ; \ + casa [r1] ASI_N, r2, r3 ; \ + cmp r2, r3 ; \ + bne,pn %icc, 9b ; \ mov r3, r2 /* * Atomically increment an integer in memory. */ -#define ATOMIC_INC_INT(r1, r2, r3) \ - lduw [r1], r2 ; \ -9: add r2, 1, r3 ; \ - casa [r1] ASI_N, r2, r3 ; \ - cmp r2, r3 ; \ - bne,pn %icc, 9b ; \ +#define ATOMIC_INC_INT(r1, r2, r3) \ + lduw [r1], r2 ; \ +9: add r2, 1, r3 ; \ + casa [r1] ASI_N, r2, r3 ; \ + cmp r2, r3 ; \ + bne,pn %icc, 9b ; \ mov r3, r2 /* - * Atomically increment an u_long in memory. + * Atomically increment a long in memory. */ -#define ATOMIC_INC_ULONG(r1, r2, r3) \ - ldx [r1], r2 ; \ -9: add r2, 1, r3 ; \ - casxa [r1] ASI_N, r2, r3 ; \ - cmp r2, r3 ; \ - bne,pn %icc, 9b ; \ +#define ATOMIC_INC_LONG(r1, r2, r3) \ + ldx [r1], r2 ; \ +9: add r2, 1, r3 ; \ + casxa [r1] ASI_N, r2, r3 ; \ + cmp r2, r3 ; \ + bne,pn %xcc, 9b ; \ mov r3, r2 /* * Atomically clear a number of bits of an integer in memory. */ -#define ATOMIC_CLEAR_INT(r1, r2, r3, bits) \ - lduw [r1], r2 ; \ -9: andn r2, bits, r3 ; \ - casa [r1] ASI_N, r2, r3 ; \ - cmp r2, r3 ; \ - bne,pn %icc, 9b ; \ +#define ATOMIC_CLEAR_INT(r1, r2, r3, bits) \ + lduw [r1], r2 ; \ +9: andn r2, bits, r3 ; \ + casa [r1] ASI_N, r2, r3 ; \ + cmp r2, r3 ; \ + bne,pn %icc, 9b ; \ mov r3, r2 /* - * Atomically clear a number of bits of an u_long in memory. + * Atomically clear a number of bits of a long in memory. */ -#define ATOMIC_CLEAR_LONG(r1, r2, r3, bits) \ - ldx [r1], r2 ; \ -9: andn r2, bits, r3 ; \ - casxa [r1] ASI_N, r2, r3 ; \ - cmp r2, r3 ; \ - bne,pn %icc, 9b ; \ +#define ATOMIC_CLEAR_LONG(r1, r2, r3, bits) \ + ldx [r1], r2 ; \ +9: andn r2, bits, r3 ; \ + casxa [r1] ASI_N, r2, r3 ; \ + cmp r2, r3 ; \ + bne,pn %xcc, 9b ; \ mov r3, r2 +/* + * Atomically set a number of bits of an integer in memory. + */ +#define ATOMIC_SET_INT(r1, r2, r3, bits) \ + lduw [r1], r2 ; \ +9: or r2, bits, r3 ; \ + casa [r1] ASI_N, r2, r3 ; \ + cmp r2, r3 ; \ + bne,pn %icc, 9b ; \ + mov r3, r2 + +/* + * Atomically set a number of bits of a long in memory. + */ +#define ATOMIC_SET_LONG(r1, r2, r3, bits) \ + ldx [r1], r2 ; \ +9: or r2, bits, r3 ; \ + casxa [r1] ASI_N, r2, r3 ; \ + cmp r2, r3 ; \ + bne,pn %xcc, 9b ; \ + mov r3, r2 + +/* + * Atomically store a value of an integer in memory. + */ +#define ATOMIC_STORE_INT(r1, r2, r3, val) \ + lduw [r1], r2 ; \ +9: mov val, r3 ; \ + casa [r1] ASI_N, r2, r3 ; \ + cmp r2, r3 ; \ + bne,pn %icc, 9b ; \ + mov r3, r2 + +/* + * Atomically store a value of a long in memory. + */ +#define ATOMIC_STORE_LONG(r1, r2, r3, val) \ + ldx [r1], r2 ; \ +9: mov val, r3 ; \ + casxa [r1] ASI_N, r2, r3 ; \ + cmp r2, r3 ; \ + bne,pn %xcc, 9b ; \ + mov r3, r2 + #define PCPU(member) PCPU_REG + PC_ ## member -#define PCPU_ADDR(member, reg) \ +#define PCPU_ADDR(member, reg) \ add PCPU_REG, PC_ ## member, reg -#define DEBUGGER() \ +#define DEBUGGER() \ ta %xcc, 1 -#define PANIC(msg, r1) \ - .sect .rodata ; \ -9: .asciz msg ; \ - .previous ; \ - SET(9b, r1, %o0) ; \ - call panic ; \ +#define PANIC(msg, r1) \ + .sect .rodata ; \ +9: .asciz msg ; \ + .previous ; \ + SET(9b, r1, %o0) ; \ + call panic ; \ nop #ifdef INVARIANTS -#define KASSERT(r1, msg) \ - brnz,pt r1, 8f ; \ - nop ; \ - PANIC(msg, r1) ; \ +#define KASSERT(r1, msg) \ + brnz,pt r1, 8f ; \ + nop ; \ + PANIC(msg, r1) ; \ 8: #else #define KASSERT(r1, msg) #endif -#define PUTS(msg, r1) \ - .sect .rodata ; \ -9: .asciz msg ; \ - .previous ; \ - SET(9b, r1, %o0) ; \ - call printf ; \ +#define PUTS(msg, r1) \ + .sect .rodata ; \ +9: .asciz msg ; \ + .previous ; \ + SET(9b, r1, %o0) ; \ + call printf ; \ nop #define _ALIGN_DATA .align 8 -#define DATA(name) \ - .data ; \ - _ALIGN_DATA ; \ - .globl name ; \ - .type name, @object ; \ +#define DATA(name) \ + .data ; \ + _ALIGN_DATA ; \ + .globl name ; \ + .type name, @object ; \ name: #define EMPTY Index: include/atomic.h =================================================================== --- include/atomic.h (revision 224216) +++ include/atomic.h (working copy) @@ -74,12 +74,16 @@ * * the return value of cas is used to avoid the extra reload. * - * The memory barriers provided by the acq and rel variants are intended - * to be sufficient for use of relaxed memory ordering. Due to the - * suggested assembly syntax of the membar operands containing a # - * character, they cannot be used in macros. The cmask and mmask bits + * We only include a memory barrier in the rel variants as in total store + * order which we use for running the kernel and all of the userland atomic + * loads and stores behave as if the were followed by a membar with a mask + * of #LoadLoad | #LoadStore | #StoreStore. In order to be also sufficient + * for use of relaxed memory ordering, the atomic_cas() in the acq variants + * additionally would have to be followed by a membar #LoadLoad | #LoadStore. + * Due to the suggested assembly syntax of the membar operands containing a + * # character, they cannot be used in macros. The cmask and mmask bits thus * are hard coded in machine/cpufunc.h and used here through macros. - * Hopefully sun will choose not to change the bit numbers. + * Hopefully the bit numbers won't change in the future. */ #define itype(sz) uint ## sz ## _t @@ -93,7 +97,6 @@ #define atomic_cas_acq(p, e, s, sz) ({ \ itype(sz) v; \ v = atomic_cas(p, e, s, sz); \ - membar(LoadLoad | LoadStore); \ v; \ }) @@ -118,7 +121,6 @@ #define atomic_op_acq(p, op, v, sz) ({ \ itype(sz) t; \ t = atomic_op(p, op, v, sz); \ - membar(LoadLoad | LoadStore); \ t; \ }) @@ -135,7 +137,6 @@ #define atomic_load_acq(p, sz) ({ \ itype(sz) v; \ v = atomic_load(p, sz); \ - membar(LoadLoad | LoadStore); \ v; \ }) Index: sparc64/pmap.c =================================================================== --- sparc64/pmap.c (revision 224216) +++ sparc64/pmap.c (working copy) @@ -100,19 +100,6 @@ __FBSDID("$FreeBSD$"); #include #include -#define PMAP_DEBUG - -#ifndef PMAP_SHPGPERPROC -#define PMAP_SHPGPERPROC 200 -#endif - -/* XXX */ -#include "opt_sched.h" -#ifndef SCHED_4BSD -#error "sparc64 only works with SCHED_4BSD which uses a global scheduler lock." -#endif -extern struct mtx sched_lock; - /* * Virtual address of message buffer */ @@ -1240,11 +1227,9 @@ pmap_pinit(pmap_t pm) if (pm->pm_tsb_obj == NULL) pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES); - mtx_lock_spin(&sched_lock); for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = -1; CPU_ZERO(&pm->pm_active); - mtx_unlock_spin(&sched_lock); VM_OBJECT_LOCK(pm->pm_tsb_obj); for (i = 0; i < TSB_PAGES; i++) { @@ -1271,7 +1256,9 @@ pmap_release(pmap_t pm) { vm_object_t obj; vm_page_t m; +#ifdef SMP struct pcpu *pc; +#endif CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p", pm->pm_context[curcpu], pm->pm_tsb); @@ -1291,11 +1278,18 @@ pmap_release(pmap_t pm) * - A process that referenced this pmap ran on a CPU, but we switched * to a kernel thread, leaving the pmap pointer unchanged. */ - mtx_lock_spin(&sched_lock); +#ifdef SMP + sched_pin(); STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) - if (pc->pc_pmap == pm) - pc->pc_pmap = NULL; - mtx_unlock_spin(&sched_lock); + atomic_cmpset_rel_ptr((uintptr_t *)&pc->pc_pmap, + (uintptr_t)pm, (uintptr_t)NULL); + sched_unpin(); +#else + critical_enter(); + if (PCPU_GET(pmap) == pm) + PCPU_SET(pmap, NULL); + critical_exit(); +#endif pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES); obj = pm->pm_tsb_obj; @@ -2244,11 +2238,14 @@ pmap_activate(struct thread *td) } PCPU_SET(tlb_ctx, context + 1); - mtx_lock_spin(&sched_lock); pm->pm_context[curcpu] = context; +#ifdef SMP + CPU_SET_ATOMIC(PCPU_GET(cpuid), &pm->pm_active); + atomic_store_ptr((uintptr_t *)PCPU_PTR(pmap), (uintptr_t)pm); +#else CPU_SET(PCPU_GET(cpuid), &pm->pm_active); PCPU_SET(pmap, pm); - mtx_unlock_spin(&sched_lock); +#endif stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb); stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb); Index: sparc64/swtch.S =================================================================== --- sparc64/swtch.S (revision 224216) +++ sparc64/swtch.S (working copy) @@ -180,9 +180,15 @@ ENTRY(cpu_switch) sub %l3, %l5, %l5 mov 1, %l6 sllx %l6, %l5, %l5 +#ifdef SMP + add %l2, %l4, %l4 + membar #LoadStore | #StoreStore + ATOMIC_CLEAR_LONG(%l4, %l6, %l7, %l5) +#else ldx [%l2 + %l4], %l6 andn %l6, %l5, %l6 stx %l6, [%l2 + %l4] +#endif /* * Take away its context number. @@ -237,14 +243,24 @@ ENTRY(cpu_switch) sub %l3, %l5, %l5 mov 1, %l6 sllx %l6, %l5, %l5 +#ifdef SMP + add %l1, %l4, %l4 + ATOMIC_SET_LONG(%l4, %l6, %l7, %l5) +#else ldx [%l1 + %l4], %l6 or %l6, %l5, %l6 stx %l6, [%l1 + %l4] +#endif /* * Make note of the change in pmap. */ +#ifdef SMP + PCPU_ADDR(PMAP, %l4) + ATOMIC_STORE_LONG(%l4, %l5, %l6, %l1) +#else stx %l1, [PCPU(PMAP)] +#endif /* * Fiddle the hardware bits. Set the TSB registers and install the