Index: arm/arm/elf_machdep.c =================================================================== --- arm/arm/elf_machdep.c (revision 194610) +++ arm/arm/elf_machdep.c (working copy) @@ -149,7 +149,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relo if (local) { if (rtype == R_ARM_RELATIVE) { /* A + B */ - addr = relocbase + addend; + addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } Index: arm/xscale/i8134x/crb_machdep.c =================================================================== --- arm/xscale/i8134x/crb_machdep.c (revision 194610) +++ arm/xscale/i8134x/crb_machdep.c (working copy) @@ -183,6 +183,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -232,6 +233,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Index: arm/xscale/i80321/iq31244_machdep.c =================================================================== --- arm/xscale/i80321/iq31244_machdep.c (revision 194610) +++ arm/xscale/i80321/iq31244_machdep.c (working copy) @@ -187,6 +187,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -236,6 +237,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Index: arm/xscale/i80321/ep80219_machdep.c =================================================================== --- arm/xscale/i80321/ep80219_machdep.c (revision 194610) +++ arm/xscale/i80321/ep80219_machdep.c (working copy) @@ -186,6 +186,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -236,6 +237,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Index: arm/xscale/pxa/pxa_machdep.c =================================================================== --- arm/xscale/pxa/pxa_machdep.c (revision 194610) +++ arm/xscale/pxa/pxa_machdep.c (working copy) @@ -166,6 +166,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop; u_int l1pagetable; vm_offset_t freemempos; @@ -218,6 +219,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Index: arm/xscale/ixp425/avila_machdep.c =================================================================== --- arm/xscale/ixp425/avila_machdep.c (revision 194610) +++ arm/xscale/ixp425/avila_machdep.c (working copy) @@ -225,6 +225,7 @@ initarm(void *arg, void *arg2) #define next_chunk2(a,b) (((a) + (b)) &~ ((b)-1)) #define next_page(a) next_chunk2(a,PAGE_SIZE) struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -296,6 +297,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Index: arm/mv/mv_machdep.c =================================================================== --- arm/mv/mv_machdep.c (revision 194610) +++ arm/mv/mv_machdep.c (working copy) @@ -358,6 +358,7 @@ void * initarm(void *mdp, void *unused __unused) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; vm_offset_t freemempos, l2_start, lastaddr; uint32_t memsize, l2size; struct bi_mem_region *mr; @@ -479,6 +480,10 @@ initarm(void *mdp, void *unused __unused) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Index: arm/sa11x0/assabet_machdep.c =================================================================== --- arm/sa11x0/assabet_machdep.c (revision 194610) +++ arm/sa11x0/assabet_machdep.c (working copy) @@ -209,6 +209,7 @@ initarm(void *arg, void *arg2) struct pv_addr kernel_l1pt; struct pv_addr md_addr; struct pv_addr md_bla; + struct pv_addr dpcpu; int loop; u_int l1pagetable; vm_offset_t freemempos; @@ -268,6 +269,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Index: arm/at91/at91_machdep.c =================================================================== --- arm/at91/at91_machdep.c (revision 194610) +++ arm/at91/at91_machdep.c (working copy) @@ -215,6 +215,7 @@ void * initarm(void *arg, void *arg2) { struct pv_addr kernel_l1pt; + struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; @@ -264,6 +265,10 @@ initarm(void *arg, void *arg2) */ valloc_pages(systempage, 1); + /* Allocate dynamic per-cpu area. */ + valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu.pv_va, 0); + /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); Index: powerpc/booke/pmap.c =================================================================== --- powerpc/booke/pmap.c (revision 194610) +++ powerpc/booke/pmap.c (working copy) @@ -963,6 +963,7 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_size_t physsz, hwphyssz, kstack0_sz; vm_offset_t kernel_pdir, kstack0, va; vm_paddr_t kstack0_phys; + void *dpcpu; pte_t *pte; debugf("mmu_booke_bootstrap: entered\n"); @@ -988,6 +989,11 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, data_end = round_page(data_end); + /* Allocate the dynamic per-cpu area. */ + dpcpu = (void *)data_end; + data_end += DPCPU_SIZE; + dpcpu_init(dpcpu, 0); + /* Allocate space for ptbl_bufs. */ ptbl_bufs = (struct ptbl_buf *)data_end; data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; Index: powerpc/powerpc/elf_machdep.c =================================================================== --- powerpc/powerpc/elf_machdep.c (revision 194610) +++ powerpc/powerpc/elf_machdep.c (working copy) @@ -194,7 +194,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relo break; case R_PPC_RELATIVE: /* word32 B + A */ - *where = relocbase + addend; + *where = elf_relocaddr(lf, relocbase + addend); break; default: Index: powerpc/powerpc/mp_machdep.c =================================================================== --- powerpc/powerpc/mp_machdep.c (revision 194610) +++ powerpc/powerpc/mp_machdep.c (working copy) @@ -146,8 +146,12 @@ cpu_mp_start(void) goto next; } if (cpu.cr_cpuid != bsp.cr_cpuid) { + void *dpcpu; + pc = &__pcpu[cpu.cr_cpuid]; + dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); pcpu_init(pc, cpu.cr_cpuid, sizeof(*pc)); + dpcpu_init(dpcpu, cpu.cr_cpuid); } else { pc = pcpup; pc->pc_cpuid = bsp.cr_cpuid; Index: powerpc/aim/mmu_oea.c =================================================================== --- powerpc/aim/mmu_oea.c (revision 194610) +++ powerpc/aim/mmu_oea.c (working copy) @@ -669,6 +669,7 @@ moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart int ofw_mappings; vm_size_t size, physsz, hwphyssz; vm_offset_t pa, va, off; + void *dpcpu; /* * Set up BAT0 to map the lowest 256 MB area @@ -938,6 +939,20 @@ moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart pa += PAGE_SIZE; va += PAGE_SIZE; } + + /* + * Allocate virtual address space for the dynamic percpu area. + */ + pa = moea_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); + dpcpu = (void *)virtual_avail; + va = virtual_avail; + virtual_avail += DPCPU_SIZE; + while (va < virtual_avail) { + moea_kenter(mmup, va, pa);; + pa += PAGE_SIZE; + va += PAGE_SIZE; + } + dpcpu_init(dpcpu, 0); } /* Index: powerpc/aim/mmu_oea64.c =================================================================== --- powerpc/aim/mmu_oea64.c (revision 194610) +++ powerpc/aim/mmu_oea64.c (working copy) @@ -726,6 +726,7 @@ moea64_bridge_bootstrap(mmu_t mmup, vm_offset_t ke vm_size_t size, physsz, hwphyssz; vm_offset_t pa, va, off; uint32_t msr; + void *dpcpu; /* We don't have a direct map since there is no BAT */ hw_direct_map = 0; @@ -1027,6 +1028,20 @@ moea64_bridge_bootstrap(mmu_t mmup, vm_offset_t ke pa += PAGE_SIZE; va += PAGE_SIZE; } + + /* + * Allocate virtual address space for the dynamic percpu area. + */ + pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); + dpcpu = (void *)virtual_avail; + va = virtual_avail; + virtual_avail += DPCPU_SIZE; + while (va < virtual_avail) { + moea64_kenter(mmup, va, pa);; + pa += PAGE_SIZE; + va += PAGE_SIZE; + } + dpcpu_init(dpcpu, 0); } /* Index: sparc64/sparc64/elf_machdep.c =================================================================== --- sparc64/sparc64/elf_machdep.c (revision 194610) +++ sparc64/sparc64/elf_machdep.c (working copy) @@ -285,7 +285,7 @@ elf_reloc_local(linker_file_t lf, Elf_Addr relocba value = rela->r_addend + (Elf_Addr)lf->address; where = (Elf_Addr *)((Elf_Addr)lf->address + rela->r_offset); - *where = value; + *where = elf_relocaddr(lf, value); return (0); } @@ -338,8 +338,9 @@ elf_reloc(linker_file_t lf, Elf_Addr relocbase, co if (RELOC_PC_RELATIVE(rtype)) value -= (Elf_Addr)where; - if (RELOC_BASE_RELATIVE(rtype)) - value += relocbase; + if (RELOC_BASE_RELATIVE(rtype)) { + value = elf_relocaddr(lf, value + relocbase); + } mask = RELOC_VALUE_BITMASK(rtype); value >>= RELOC_VALUE_RIGHTSHIFT(rtype); Index: sparc64/sparc64/mp_machdep.c =================================================================== --- sparc64/sparc64/mp_machdep.c (revision 194610) +++ sparc64/sparc64/mp_machdep.c (working copy) @@ -290,6 +290,8 @@ cpu_mp_start(void) va = kmem_alloc(kernel_map, PCPU_PAGES * PAGE_SIZE); pc = (struct pcpu *)(va + (PCPU_PAGES * PAGE_SIZE)) - 1; pcpu_init(pc, cpuid, sizeof(*pc)); + dpcpu_init((void *)kmem_alloc(kernel_map, DPCPU_SIZE), + cpuid); pc->pc_addr = va; pc->pc_clock = clock; pc->pc_mid = mid; Index: sparc64/sparc64/pmap.c =================================================================== --- sparc64/sparc64/pmap.c (revision 194610) +++ sparc64/sparc64/pmap.c (working copy) @@ -363,6 +363,11 @@ pmap_bootstrap(vm_offset_t ekva) bzero(tsb_kernel, tsb_kernel_size); /* + * Allocate and map the dynamic per-CPU area for the BSP. + */ + dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pmap_bootstrap_alloc(DPCPU_SIZE)); + + /* * Allocate and map the message buffer. */ msgbuf_phys = pmap_bootstrap_alloc(MSGBUF_SIZE); Index: sparc64/sparc64/machdep.c =================================================================== --- sparc64/sparc64/machdep.c (revision 194610) +++ sparc64/sparc64/machdep.c (working copy) @@ -124,6 +124,7 @@ int cold = 1; long Maxmem; long realmem; +void *dpcpu0; char pcpu0[PCPU_PAGES * PAGE_SIZE]; struct trapframe frame0; @@ -480,8 +481,10 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_ delay_func = delay_tick; /* - * Initialize the message buffer (after setting trap table). + * Initialize the dynamic per-CPU area for the BSP and the message + * buffer (after setting the trap table). */ + dpcpu_init(dpcpu0, 0); msgbufinit(msgbufp, MSGBUF_SIZE); mutex_init(); Index: sparc64/include/pcpu.h =================================================================== --- sparc64/include/pcpu.h (revision 194610) +++ sparc64/include/pcpu.h (working copy) @@ -62,6 +62,8 @@ struct pmap; #ifdef _KERNEL +extern void *dpcpu0; + struct pcb; struct pcpu; Index: kern/link_elf_obj.c =================================================================== --- kern/link_elf_obj.c (revision 194610) +++ kern/link_elf_obj.c (working copy) @@ -333,7 +333,21 @@ link_elf_link_preload(linker_class_t cls, const ch if (ef->shstrtab && shdr[i].sh_name != 0) ef->progtab[pb].name = ef->shstrtab + shdr[i].sh_name; + if (ef->progtab[pb].name != NULL && + !strcmp(ef->progtab[pb].name, "set_pcpu")) { + void *dpcpu; + dpcpu = dpcpu_alloc(shdr[i].sh_size); + if (dpcpu == NULL) { + error = ENOSPC; + goto out; + } + memcpy(dpcpu, ef->progtab[pb].addr, + ef->progtab[pb].size); + dpcpu_copy(dpcpu, shdr[i].sh_size); + ef->progtab[pb].addr = dpcpu; + } + /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { es = &ef->ddbsymtab[j]; @@ -712,9 +726,27 @@ link_elf_load_file(linker_class_t cls, const char alignmask = shdr[i].sh_addralign - 1; mapbase += alignmask; mapbase &= ~alignmask; - ef->progtab[pb].addr = (void *)(uintptr_t)mapbase; + if (ef->shstrtab && shdr[i].sh_name != 0) + ef->progtab[pb].name = + ef->shstrtab + shdr[i].sh_name; + else if (shdr[i].sh_type == SHT_PROGBITS) + ef->progtab[pb].name = "<>"; + else + ef->progtab[pb].name = "<>"; + if (ef->progtab[pb].name != NULL && + !strcmp(ef->progtab[pb].name, "set_pcpu")) + ef->progtab[pb].addr = + dpcpu_alloc(shdr[i].sh_size); + else + ef->progtab[pb].addr = + (void *)(uintptr_t)mapbase; + if (ef->progtab[pb].addr == NULL) { + error = ENOSPC; + goto out; + } + ef->progtab[pb].size = shdr[i].sh_size; + ef->progtab[pb].sec = i; if (shdr[i].sh_type == SHT_PROGBITS) { - ef->progtab[pb].name = "<>"; error = vn_rdwr(UIO_READ, nd.ni_vp, ef->progtab[pb].addr, shdr[i].sh_size, shdr[i].sh_offset, @@ -726,15 +758,12 @@ link_elf_load_file(linker_class_t cls, const char error = EINVAL; goto out; } - } else { - ef->progtab[pb].name = "<>"; + /* Initialize the per-cpu area. */ + if (ef->progtab[pb].addr != (void *)mapbase) + dpcpu_copy(ef->progtab[pb].addr, + shdr[i].sh_size); + } else bzero(ef->progtab[pb].addr, shdr[i].sh_size); - } - ef->progtab[pb].size = shdr[i].sh_size; - ef->progtab[pb].sec = i; - if (ef->shstrtab && shdr[i].sh_name != 0) - ef->progtab[pb].name = - ef->shstrtab + shdr[i].sh_name; /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { @@ -839,6 +868,17 @@ link_elf_unload_file(linker_file_t file) /* Notify MD code that a module is being unloaded. */ elf_cpu_unload_file(file); + if (ef->progtab) { + for (i = 0; i < ef->nprogtab; i++) { + if (ef->progtab[i].size == 0) + continue; + if (ef->progtab[i].name == NULL) + continue; + if (!strcmp(ef->progtab[i].name, "set_pcpu")) + dpcpu_free(ef->progtab[i].addr, + ef->progtab[i].size); + } + } if (ef->preloaded) { if (ef->reltab) free(ef->reltab, M_LINKER); Index: kern/subr_pcpu.c =================================================================== --- kern/subr_pcpu.c (revision 194610) +++ kern/subr_pcpu.c (working copy) @@ -49,13 +49,28 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include +#include #include #include #include +#include #include +MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting."); + +struct dpcpu_free { + uintptr_t df_start; + int df_len; + TAILQ_ENTRY(dpcpu_free) df_link; +}; + +static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]); +static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head); +static struct sx dpcpu_lock; +uintptr_t dpcpu_off[MAXCPU]; struct pcpu *cpuid_to_pcpu[MAXCPU]; struct cpuhead cpuhead = SLIST_HEAD_INITIALIZER(cpuhead); @@ -79,10 +94,149 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t siz #ifdef KTR snprintf(pcpu->pc_name, sizeof(pcpu->pc_name), "CPU %d", cpuid); #endif +} +void +dpcpu_init(void *dpcpu, int cpuid) +{ + struct pcpu *pcpu; + + pcpu = pcpu_find(cpuid); + pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START; + + /* + * Initialize defaults from our linker section. + */ + memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES); + + /* + * Place it in the global pcpu offset array. + */ + dpcpu_off[cpuid] = pcpu->pc_dynamic; } +static void +dpcpu_startup(void *dummy __unused) +{ + struct dpcpu_free *df; + + df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO); + df->df_start = (uintptr_t)&DPCPU_NAME(modspace); + df->df_len = DPCPU_MODSIZE; + TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link); + sx_init(&dpcpu_lock, "dpcpu alloc lock"); +} +SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, 0); + /* + * First-fit extent based allocator for allocating space in the per-cpu + * region reserved for modules. This is only intended for use by the + * kernel linkers to place module linker sets. + */ +void * +dpcpu_alloc(int size) +{ + struct dpcpu_free *df; + void *s; + + s = NULL; + size = roundup2(size, sizeof(void *)); + sx_xlock(&dpcpu_lock); + TAILQ_FOREACH(df, &dpcpu_head, df_link) { + if (df->df_len < size) + continue; + if (df->df_len == size) { + s = (void *)df->df_start; + TAILQ_REMOVE(&dpcpu_head, df, df_link); + free(df, M_PCPU); + break; + } + s = (void *)df->df_start; + df->df_len -= size; + df->df_start = df->df_start + size; + break; + } + sx_xunlock(&dpcpu_lock); + + return (s); +} + +/* + * Free dynamic per-cpu space at module unload time. + */ +void +dpcpu_free(void *s, int size) +{ + struct dpcpu_free *df; + struct dpcpu_free *dn; + uintptr_t start; + uintptr_t end; + + size = roundup2(size, sizeof(void *)); + start = (uintptr_t)s; + end = start + size; + /* + * Free a region of space and merge it with as many neighbors as + * possible. Keeping the list sorted simplifies this operation. + */ + sx_xlock(&dpcpu_lock); + TAILQ_FOREACH(df, &dpcpu_head, df_link) { + if (df->df_start > end) + break; + /* + * If we expand at the end of an entry we may have to + * merge it with the one following it as well. + */ + if (df->df_start + df->df_len == start) { + df->df_len += size; + dn = TAILQ_NEXT(df, df_link); + if (df->df_start + df->df_len == dn->df_start) { + df->df_len += dn->df_len; + TAILQ_REMOVE(&dpcpu_head, dn, df_link); + free(dn, M_PCPU); + } + sx_xunlock(&dpcpu_lock); + return; + } + if (df->df_start == end) { + df->df_start = start; + df->df_len += size; + sx_xunlock(&dpcpu_lock); + return; + } + } + dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO); + dn->df_start = start; + dn->df_len = size; + if (df) + TAILQ_INSERT_BEFORE(df, dn, df_link); + else + TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link); + sx_xunlock(&dpcpu_lock); +} + +/* + * Initialize the per-cpu storage from an updated linker-set region. + */ +void +dpcpu_copy(void *s, int size) +{ +#ifdef SMP + uintptr_t dpcpu; + int i; + + for (i = 0; i < mp_ncpus; ++i) { + dpcpu = dpcpu_off[i]; + if (dpcpu == 0) + continue; + memcpy((void *)(dpcpu + (uintptr_t)s), s, size); + } +#else + memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size); +#endif +} + +/* * Destroy a struct pcpu. */ void @@ -91,6 +245,7 @@ pcpu_destroy(struct pcpu *pcpu) SLIST_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu); cpuid_to_pcpu[pcpu->pc_cpuid] = NULL; + dpcpu_off[pcpu->pc_cpuid] = 0; } /* @@ -103,6 +258,48 @@ pcpu_find(u_int cpuid) return (cpuid_to_pcpu[cpuid]); } +int +sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS) +{ + int64_t count; +#ifdef SMP + uintptr_t dpcpu; + int i; + + count = 0; + for (i = 0; i < mp_ncpus; ++i) { + dpcpu = dpcpu_off[i]; + if (dpcpu == 0) + continue; + count += *(int64_t *)(dpcpu + (uintptr_t)arg1); + } +#else + count = *(int64_t *)(dpcpu_off[0] + (uintptr_t)arg1); +#endif + return (SYSCTL_OUT(req, &count, sizeof(count))); +} + +int +sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS) +{ + int count; +#ifdef SMP + uintptr_t dpcpu; + int i; + + count = 0; + for (i = 0; i < mp_ncpus; ++i) { + dpcpu = dpcpu_off[i]; + if (dpcpu == 0) + continue; + count += *(int *)(dpcpu + (uintptr_t)arg1); + } +#else + count = *(int *)(dpcpu_off[0] + (uintptr_t)arg1); +#endif + return (SYSCTL_OUT(req, &count, sizeof(count))); +} + #ifdef DDB static void @@ -111,6 +308,7 @@ show_pcpu(struct pcpu *pc) struct thread *td; db_printf("cpuid = %d\n", pc->pc_cpuid); + db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic); db_printf("curthread = "); td = pc->pc_curthread; if (td != NULL) Index: kern/kern_synch.c =================================================================== --- kern/kern_synch.c (revision 194610) +++ kern/kern_synch.c (working copy) @@ -377,6 +377,8 @@ kdb_switch(void) panic("%s: did not reenter debugger", __func__); } +DPCPU_DEFINE(uint64_t, switches); + /* * The machine independent parts of context switching. */ @@ -415,6 +417,7 @@ mi_switch(int flags, struct thread *newtd) #ifdef SCHED_STATS SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]); #endif + DPCPU_GET(switches)++; /* * Compute the amount of time during which the current * thread was running, and add that to its total so far. @@ -529,6 +532,7 @@ synch_setup(void *dummy) { callout_init(&loadav_callout, CALLOUT_MPSAFE); + SYSCTL_ADD_PROC(NULL, &sysctl__debug_children, OID_AUTO, "switches", CTLTYPE_QUAD|CTLFLAG_RD|CTLFLAG_MPSAFE, &DPCPU_NAME(switches), 0, sysctl_dpcpu_quad, "IU", "Context switches"); /* Kick off timeout driven events by calling first time. */ loadav(NULL); } Index: kern/link_elf.c =================================================================== --- kern/link_elf.c (revision 194610) +++ kern/link_elf.c (working copy) @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -107,6 +108,9 @@ typedef struct elf_file { caddr_t ctfoff; /* CTF offset table */ caddr_t typoff; /* Type offset table */ long typlen; /* Number of type entries. */ + Elf_Addr pcpu_start; /* Pre-relocation pcpu set start. */ + Elf_Addr pcpu_stop; /* Pre-relocation pcpu set stop. */ + Elf_Addr pcpu_base; /* Relocated pcpu set address. */ #ifdef GDB struct link_map gdb; /* hooks for gdb */ #endif @@ -475,6 +479,34 @@ parse_dynamic(elf_file_t ef) } static int +parse_dpcpu(elf_file_t ef) +{ + int count; + int error; + + ef->pcpu_start = 0; + ef->pcpu_stop = 0; + error = link_elf_lookup_set(&ef->lf, "pcpu", (void ***)&ef->pcpu_start, + (void ***)&ef->pcpu_stop, &count); + /* Error just means there is no pcpu set to relocate. */ + if (error) + return (0); + count *= sizeof(void *); + /* + * Allocate space in the primary pcpu area. Copy in our initialization + * from the data section and then initialize all per-cpu storage from + * that. + */ + ef->pcpu_base = (Elf_Addr)(uintptr_t)dpcpu_alloc(count); + if (ef->pcpu_base == (Elf_Addr)NULL) + return (ENOSPC); + memcpy((void *)ef->pcpu_base, (void *)ef->pcpu_start, count); + dpcpu_copy((void *)ef->pcpu_base, count); + + return (0); +} + +static int link_elf_link_preload(linker_class_t cls, const char* filename, linker_file_t *result) { @@ -519,6 +551,8 @@ link_elf_link_preload(linker_class_t cls, lf->size = *(size_t *)sizeptr; error = parse_dynamic(ef); + if (error == 0) + error = parse_dpcpu(ef); if (error) { linker_file_unload(lf, LINKER_UNLOAD_FORCE); return error; @@ -801,6 +835,9 @@ link_elf_load_file(linker_class_t cls, const char* error = parse_dynamic(ef); if (error) goto out; + error = parse_dpcpu(ef); + if (error) + goto out; link_elf_reloc_local(lf); VOP_UNLOCK(nd.ni_vp, 0); @@ -897,11 +934,26 @@ out: return error; } +Elf_Addr +elf_relocaddr(linker_file_t lf, Elf_Addr x) +{ + elf_file_t ef; + + ef = (elf_file_t)lf; + if (x >= ef->pcpu_start && x < ef->pcpu_stop) + return ((x - ef->pcpu_start) + ef->pcpu_base); + return (x); +} + + static void link_elf_unload_file(linker_file_t file) { elf_file_t ef = (elf_file_t) file; + if (ef->pcpu_base) { + dpcpu_free((void *)ef->pcpu_base, ef->pcpu_stop - ef->pcpu_start); + } #ifdef GDB if (ef->gdb.l_ld) { GDB_STATE(RT_DELETE); Index: ia64/ia64/elf_machdep.c =================================================================== --- ia64/ia64/elf_machdep.c (revision 194610) +++ ia64/ia64/elf_machdep.c (working copy) @@ -211,7 +211,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relo if (local) { if (rtype == R_IA_64_REL64LSB) - *where = relocbase + addend; + *where = elf_relocaddr(lf, relocbase + addend); return (0); } Index: ia64/ia64/mp_machdep.c =================================================================== --- ia64/ia64/mp_machdep.c (revision 194610) +++ ia64/ia64/mp_machdep.c (working copy) @@ -207,6 +207,7 @@ cpu_mp_add(u_int acpiid, u_int apicid, u_int apice { struct pcpu *pc; u_int64_t lid; + void *dpcpu; /* Ignore any processor numbers outside our range */ if (acpiid > mp_maxid) @@ -224,7 +225,9 @@ cpu_mp_add(u_int acpiid, u_int apicid, u_int apice if (acpiid != 0) { pc = (struct pcpu *)malloc(sizeof(*pc), M_SMP, M_WAITOK); + dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); pcpu_init(pc, acpiid, sizeof(*pc)); + dpcpu_init(dpcpu, acpiid); } else pc = pcpup; Index: ia64/ia64/machdep.c =================================================================== --- ia64/ia64/machdep.c (revision 194610) +++ ia64/ia64/machdep.c (working copy) @@ -647,6 +647,21 @@ ia64_init(void) bootverbose = 1; /* + * Find the beginning and end of the kernel. + */ + kernstart = trunc_page(kernel_text); +#ifdef DDB + ksym_start = bootinfo.bi_symtab; + ksym_end = bootinfo.bi_esymtab; + kernend = (vm_offset_t)round_page(ksym_end); +#else + kernend = (vm_offset_t)round_page(_end); +#endif + /* But if the bootstrap tells us otherwise, believe it! */ + if (bootinfo.bi_kernend) + kernend = round_page(bootinfo.bi_kernend); + + /* * Setup the PCPU data for the bootstrap processor. It is needed * by printf(). Also, since printf() has critical sections, we * need to initialize at least pc_curthread. @@ -654,6 +669,8 @@ ia64_init(void) pcpup = &pcpu0; ia64_set_k4((u_int64_t)pcpup); pcpu_init(pcpup, 0, sizeof(pcpu0)); + dpcpu_init((void *)kernend, 0); + kernend += DPCPU_SIZE; PCPU_SET(curthread, &thread0); /* @@ -682,21 +699,6 @@ ia64_init(void) ia64_sal_init(); calculate_frequencies(); - /* - * Find the beginning and end of the kernel. - */ - kernstart = trunc_page(kernel_text); -#ifdef DDB - ksym_start = bootinfo.bi_symtab; - ksym_end = bootinfo.bi_esymtab; - kernend = (vm_offset_t)round_page(ksym_end); -#else - kernend = (vm_offset_t)round_page(_end); -#endif - - /* But if the bootstrap tells us otherwise, believe it! */ - if (bootinfo.bi_kernend) - kernend = round_page(bootinfo.bi_kernend); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); Index: mips/mips/elf_machdep.c =================================================================== --- mips/mips/elf_machdep.c (revision 194610) +++ mips/mips/elf_machdep.c (working copy) @@ -134,7 +134,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relo if (local) { #if 0 /* TBD */ if (rtype == R_386_RELATIVE) { /* A + B */ - addr = relocbase + addend; + addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } Index: mips/mips/mp_machdep.c =================================================================== --- mips/mips/mp_machdep.c (revision 194610) +++ mips/mips/mp_machdep.c (working copy) @@ -224,12 +224,15 @@ static int smp_start_secondary(int cpuid) { struct pcpu *pcpu; + void *dpcpu; int i; if (bootverbose) printf("smp_start_secondary: starting cpu %d\n", cpuid); + dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); pcpu_init(&__pcpu[cpuid], cpuid, sizeof(struct pcpu)); + dpcpu_init(dpcpu, cpuid); if (bootverbose) printf("smp_start_secondary: cpu %d started\n", cpuid); Index: mips/mips/pmap.c =================================================================== --- mips/mips/pmap.c (revision 194610) +++ mips/mips/pmap.c (working copy) @@ -331,6 +331,9 @@ again: msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); msgbufinit(msgbufp, MSGBUF_SIZE); + /* Steal memory for the dynamic per-cpu area. */ + dpcpu_init((void *)pmap_steal_memory(DPCPU_SIZE), 0); + /* * Steal thread0 kstack. */ Index: sun4v/sun4v/pmap.c =================================================================== --- sun4v/sun4v/pmap.c (revision 194610) +++ sun4v/sun4v/pmap.c (working copy) @@ -767,6 +767,11 @@ skipshuffle: mmu_fault_status_area = pmap_bootstrap_alloc(MMFSA_SIZE*MAXCPU); /* + * Allocate and map the dynamic per-CPU area for the BSP. + */ + dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pmap_bootstrap_alloc(DPCPU_SIZE)); + + /* * Allocate and map the message buffer. */ msgbuf_phys = pmap_bootstrap_alloc(MSGBUF_SIZE); Index: sun4v/sun4v/mp_machdep.c =================================================================== --- sun4v/sun4v/mp_machdep.c (revision 194610) +++ sun4v/sun4v/mp_machdep.c (working copy) @@ -324,6 +324,8 @@ cpu_mp_start(void) va = kmem_alloc(kernel_map, PCPU_PAGES * PAGE_SIZE); pc = (struct pcpu *)(va + (PCPU_PAGES * PAGE_SIZE)) - 1; pcpu_init(pc, cpuid, sizeof(*pc)); + dpcpu_init((void *)kmem_alloc(kernel_map, DPCPU_SIZE), + cpuid); pc->pc_addr = va; all_cpus |= 1 << cpuid; Index: sun4v/sun4v/machdep.c =================================================================== --- sun4v/sun4v/machdep.c (revision 194610) +++ sun4v/sun4v/machdep.c (working copy) @@ -129,6 +129,7 @@ int cold = 1; long Maxmem; long realmem; +void *dpcpu0; char pcpu0[PCPU_PAGES * PAGE_SIZE]; struct trapframe frame0; int trap_conversion[256]; @@ -500,6 +501,7 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_ * Initialize the message buffer (after setting trap table). */ BVPRINTF("initialize msgbuf\n"); + dpcpu_init(dpcpu0, 0); msgbufinit(msgbufp, MSGBUF_SIZE); BVPRINTF("initialize mutexes\n"); Index: sun4v/include/pcpu.h =================================================================== --- sun4v/include/pcpu.h (revision 194610) +++ sun4v/include/pcpu.h (working copy) @@ -89,6 +89,8 @@ struct pmap; #ifdef _KERNEL +extern void *dpcpu0; + struct pcpu; register struct pcpu *pcpup __asm__(__XSTRING(PCPU_REG)); Index: vm/vm_radix_tree.c =================================================================== Index: vm/vm_radix_tree.h =================================================================== Index: pc98/pc98/machdep.c =================================================================== --- pc98/pc98/machdep.c (revision 194610) +++ pc98/pc98/machdep.c (working copy) @@ -1954,6 +1954,7 @@ init386(first) struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x; struct pcpu *pc; + int pa; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) @@ -2010,6 +2011,11 @@ init386(first) lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); + for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) + pmap_kenter(pa + KERNBASE, pa); + dpcpu_init((void *)(first + KERNBASE), 0); + first += DPCPU_SIZE; + PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); Index: i386/i386/elf_machdep.c =================================================================== --- i386/i386/elf_machdep.c (revision 194610) +++ i386/i386/elf_machdep.c (working copy) @@ -149,7 +149,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relo if (local) { if (rtype == R_386_RELATIVE) { /* A + B */ - addr = relocbase + addend; + addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } Index: i386/i386/mp_machdep.c =================================================================== --- i386/i386/mp_machdep.c (revision 194610) +++ i386/i386/mp_machdep.c (working copy) @@ -143,6 +143,7 @@ static int bootAP; /* Free these after use */ void *bootstacks[MAXCPU]; +static void *dpcpu; /* Hotwire a 0->4MB V==P mapping */ extern pt_entry_t *KPTphys; @@ -610,6 +611,7 @@ init_secondary(void) /* prime data page for it to use */ pcpu_init(pc, myid, sizeof(struct pcpu)); + dpcpu_init(dpcpu, myid); pc->pc_apic_id = cpu_apic_ids[myid]; pc->pc_prvspace = pc; pc->pc_curthread = 0; @@ -897,8 +899,9 @@ start_all_aps(void) apic_id = cpu_apic_ids[cpu]; /* allocate and set up a boot stack data page */ - bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); - + bootstacks[cpu] = + (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); + dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); Index: i386/i386/machdep.c =================================================================== --- i386/i386/machdep.c (revision 194610) +++ i386/i386/machdep.c (working copy) @@ -2448,7 +2448,7 @@ init386(first) int first; { unsigned long gdtmachpfn; - int error, gsel_tss, metadata_missing, x; + int error, gsel_tss, metadata_missing, x, pa; struct pcpu *pc; struct callback_register event = { .type = CALLBACKTYPE_event, @@ -2532,6 +2532,11 @@ init386(first) GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); #endif pcpu_init(pc, 0, sizeof(struct pcpu)); + for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) + pmap_kenter(pa + KERNBASE, pa); + dpcpu_init((void *)(first + KERNBASE), 0); + first += DPCPU_SIZE; + PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); @@ -2665,7 +2670,7 @@ init386(first) int first; { struct gate_descriptor *gdp; - int gsel_tss, metadata_missing, x; + int gsel_tss, metadata_missing, x, pa; struct pcpu *pc; thread0.td_kstack = proc0kstack; @@ -2718,6 +2723,10 @@ init386(first) lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); + for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) + pmap_kenter(pa + KERNBASE, pa); + dpcpu_init((void *)(first + KERNBASE), 0); + first += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); Index: i386/xen/mp_machdep.c =================================================================== --- i386/xen/mp_machdep.c (revision 194610) +++ i386/xen/mp_machdep.c (working copy) @@ -744,6 +744,7 @@ start_all_aps(void) /* Get per-cpu data */ pc = &__pcpu[bootAP]; pcpu_init(pc, bootAP, sizeof(struct pcpu)); + dpcpu_init((void *)kmem_alloc(kernel_map, DPCPU_SIZE), bootAP); pc->pc_apic_id = cpu_apic_ids[bootAP]; pc->pc_prvspace = pc; pc->pc_curthread = 0; Index: amd64/conf/GENERIC =================================================================== --- amd64/conf/GENERIC (revision 194610) +++ amd64/conf/GENERIC (working copy) @@ -84,6 +84,8 @@ options INVARIANTS # Enable calls of extra sanit options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed +options ALT_BREAK_TO_DEBUGGER +options DEBUG_VFS_LOCKS # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel Index: amd64/amd64/mp_machdep.c =================================================================== --- amd64/amd64/mp_machdep.c (revision 194610) +++ amd64/amd64/mp_machdep.c (working copy) @@ -93,9 +93,10 @@ static int bootAP; /* Free these after use */ void *bootstacks[MAXCPU]; -/* Temporary holder for double fault stack */ +/* Temporary variables for init_secondary() */ char *doublefault_stack; char *nmi_stack; +void *dpcpu; /* Hotwire a 0->4MB V==P mapping */ extern pt_entry_t *KPTphys; @@ -590,6 +591,7 @@ init_secondary(void) /* prime data page for it to use */ pcpu_init(pc, cpu, sizeof(struct pcpu)); + dpcpu_init(dpcpu, cpu); pc->pc_apic_id = cpu_apic_ids[cpu]; pc->pc_prvspace = pc; pc->pc_curthread = 0; @@ -885,6 +887,7 @@ start_all_aps(void) bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); + dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; bootAP = cpu; Index: amd64/amd64/machdep.c =================================================================== --- amd64/amd64/machdep.c (revision 194610) +++ amd64/amd64/machdep.c (working copy) @@ -1501,6 +1501,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); + dpcpu_init((void *)(physfree + KERNBASE), 0); + physfree += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); Index: sys/sysctl.h =================================================================== --- sys/sysctl.h (revision 194610) +++ sys/sysctl.h (working copy) @@ -178,6 +178,9 @@ int sysctl_handle_intptr(SYSCTL_HANDLER_ARGS); int sysctl_handle_string(SYSCTL_HANDLER_ARGS); int sysctl_handle_opaque(SYSCTL_HANDLER_ARGS); +int sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS); +int sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS); + #ifdef VIMAGE int sysctl_handle_v_int(SYSCTL_HANDLER_ARGS); int sysctl_handle_v_string(SYSCTL_HANDLER_ARGS); Index: sys/linker.h =================================================================== --- sys/linker.h (revision 194610) +++ sys/linker.h (working copy) @@ -264,6 +264,7 @@ typedef Elf_Addr elf_lookup_fn(linker_file_t, Elf_ /* Support functions */ int elf_reloc(linker_file_t _lf, Elf_Addr base, const void *_rel, int _type, elf_lookup_fn _lu); int elf_reloc_local(linker_file_t _lf, Elf_Addr base, const void *_rel, int _type, elf_lookup_fn _lu); +Elf_Addr elf_relocaddr(linker_file_t _lf, Elf_Addr addr); const Elf_Sym *elf_get_sym(linker_file_t _lf, Elf_Size _symidx); const char *elf_get_symname(linker_file_t _lf, Elf_Size _symidx); Index: sys/pcpu.h =================================================================== --- sys/pcpu.h (revision 194610) +++ sys/pcpu.h (working copy) @@ -45,6 +45,68 @@ struct pcb; struct thread; +/* + * Define a set for pcpu data. + * + * We don't use SET_DECLARE because it defines the set as 'a' when we + * want 'aw'. GCC considers uninitialized data in a seperate section + * writable and there is no generic zero initializer that works for + * structs and scalars. + */ +extern uintptr_t *__start_set_pcpu; +extern uintptr_t *__stop_set_pcpu; + +#if defined(__arm__) +__asm__(".section set_pcpu, \"aw\", %progbits"); +#else +__asm__(".section set_pcpu, \"aw\", @progbits"); +#endif +__asm__(".previous"); + +/* + * Array of dynamic pcpu base offsets. Indexed by id. + */ +extern uintptr_t dpcpu_off[]; + +/* + * Convenience defines. + */ +#define DPCPU_START (uintptr_t)&__start_set_pcpu +#define DPCPU_STOP (uintptr_t)&__stop_set_pcpu +#define DPCPU_BYTES (DPCPU_STOP - DPCPU_START) +#define DPCPU_MODMIN 2048 +#define DPCPU_SIZE roundup2(DPCPU_BYTES, PAGE_SIZE) +#define DPCPU_MODSIZE (DPCPU_SIZE - (DPCPU_BYTES - DPCPU_MODMIN)) + +/* + * Declaration and definition. + */ +#define DPCPU_NAME(n) pcpu_entry_##n +#define DPCPU_DECLARE(t, n) extern t DPCPU_NAME(n) +#define DPCPU_DEFINE(t, n) t DPCPU_NAME(n) __section("set_pcpu") __used + +/* + * Accessors with a given base. + */ +#define _DPCPU_PTR(b, n) \ + (__typeof(DPCPU_NAME(n))*)((b) + (uintptr_t)&DPCPU_NAME(n)) +#define _DPCPU_GET(b, n) (*_DPCPU_PTR(b, n)) +#define _DPCPU_SET(b, n, v) (*_DPCPU_PTR(b, n) = v) + +/* + * Accessors for the current cpu. + */ +#define DPCPU_PTR(n) _DPCPU_PTR(PCPU_GET(dynamic), n) +#define DPCPU_GET(n) (*DPCPU_PTR(n)) +#define DPCPU_SET(n, v) (*DPCPU_PTR(n) = v) + +/* + * Accessors for remote cpus. + */ +#define DPCPU_ID_PTR(i, n) _DPCPU_PTR(dpcpu_off[(i)], n) +#define DPCPU_ID_GET(i, n) (*DPCPU_ID_PTR(i, n)) +#define DPCPU_ID_SET(i, n, v) (*DPCPU_ID_PTR(i, n) = v) + /* * XXXUPS remove as soon as we have per cpu variable * linker sets and can define rm_queue in _rm_lock.h @@ -93,6 +155,11 @@ struct pcpu { struct rm_queue pc_rm_queue; /* + * Dynamic per-cpu data area. + */ + uintptr_t pc_dynamic; + + /* * Keep MD fields last, so that CPU-specific variations on a * single architecture don't result in offset variations of * the machine-independent fields of the pcpu. Even though @@ -103,7 +170,7 @@ struct pcpu { * If only to make kernel debugging easier... */ PCPU_MD_FIELDS; -}; +} __aligned(128); #ifdef _KERNEL @@ -133,6 +200,10 @@ void db_show_mdpcpu(struct pcpu *pcpu); void pcpu_destroy(struct pcpu *pcpu); struct pcpu *pcpu_find(u_int cpuid); void pcpu_init(struct pcpu *pcpu, int cpuid, size_t size); +void *dpcpu_alloc(int size); +void dpcpu_copy(void *s, int size); +void dpcpu_free(void *s, int size); +void dpcpu_init(void *dpcpu, int cpuid); #endif /* _KERNEL */