sys/amd64/amd64/apic_vector.S | 149 ++++++++++++++++++++++++++++--- sys/amd64/amd64/cpu_switch.S | 34 +++++-- sys/amd64/amd64/genassym.c | 4 + sys/amd64/amd64/machdep.c | 2 +- sys/amd64/amd64/mp_machdep.c | 61 +++++++++---- sys/amd64/amd64/pmap.c | 168 +++++++++++++++++++++++++---------- sys/amd64/amd64/trap.c | 2 + sys/amd64/amd64/vm_machdep.c | 2 + sys/amd64/include/pcpu.h | 2 + sys/amd64/include/pmap.h | 12 ++- sys/amd64/include/smp.h | 15 ++-- sys/dev/drm2/i915/intel_ringbuffer.c | 8 +- sys/kern/subr_unit.c | 45 ++++------ sys/sys/_unrhdr.h | 51 +++++++++++ sys/sys/bitset.h | 9 +- sys/sys/cpuset.h | 1 + sys/sys/systm.h | 1 + 17 files changed, 438 insertions(+), 128 deletions(-) diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index 6465247..e3d8713 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -149,17 +149,41 @@ IDTVEC(invltlb) #endif pushq %rax + pushq %rdx - movq %cr3, %rax /* invalidate the TLB */ - movq %rax, %cr3 - + cmpl $0,pmap_pcid_enabled + je 1f + + cmpq $0,smp_tlb_invpcid + je 1f + + /* + * For PCID-enabled pmap, set bit 63 of loaded %cr3 to zero. + */ + movq %cr3,%rax + movq $pcid_cr3,%rdx + cmpq %rax,%rdx + je 1f + movq %rdx,%cr3 + btsq $63,%rax + jmp 2f + + /* + * Invalidate the TLB. + */ +1: + movq %cr3,%rax +2: + movq %rax,%cr3 movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait + popq %rdx popq %rax + jmp doreti_iret /* @@ -182,16 +206,62 @@ IDTVEC(invlpg) #endif pushq %rax - - movq smp_tlb_addr1, %rax - invlpg (%rax) /* invalidate single page */ - + pushq %rdx + movq $smp_tlb_invpcid,%rdx + + cmpl $0,pmap_pcid_enabled + je 2f + + cmpl $0,invpcid_works + jne 1f + + /* kernel pmap - use invlpg to invalidate global mapping */ + cmpl $0,(%rdx) + je 2f + + /* + * PCID supported, but INVPCID is not. + * Temporarily switch to the target address space and do INVLPG. + */ + pushq %rcx + movq %cr3,%rcx + movq pcid_cr3,%rax + cmp %rcx,%rax + je 1f + btsq $63,%rax + movq %rax,%cr3 +1: movq 8(%rdx),%rax + invlpg (%rax) + btsq $63,%rcx + movq %rcx,%cr3 + popq %rcx + jmp 3f + +1: + /* + * Invalidate the TLB entry using INVPCID. + */ + xorl %eax,%eax +// invpcid (%rdx),%rax + .byte 0x66,0x0f,0x38,0x82,0x02 + jmp 3f + +2: + /* + * PCID is not supported. + * Invalidate single page using INVLPG. + */ + movq 8(%rdx),%rax + invlpg (%rax) + +3: movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait + popq %rdx popq %rax jmp doreti_iret @@ -216,14 +286,65 @@ IDTVEC(invlrng) pushq %rax pushq %rdx - - movq smp_tlb_addr1, %rdx - movq smp_tlb_addr2, %rax -1: invlpg (%rdx) /* invalidate single page */ - addq $PAGE_SIZE, %rdx - cmpq %rax, %rdx + movq $smp_tlb_invpcid,%rdx + + cmpl $0,pmap_pcid_enabled + je 4f + + cmpl $0,invpcid_works + jne 2f + + /* kernel pmap - use invlpg to invalidate global mapping */ + cmpl $0,(%rdx) + je 4f + + pushq %rcx + movq %cr3,%rcx + movq pcid_cr3,%rax + cmpq %rcx,%rax + je 1f + btsq $63,%rax + movq %rax,%cr3 +1: movq 8(%rdx),%rdx + movq smp_tlb_addr2,%rax +1: invlpg (%rdx) + addq $PAGE_SIZE,%rdx + cmpq %rax,%rdx jb 1b - + btsq $63,%rcx + movq %rcx,%cr3 + popq %rcx + jmp 6f +2: + pushq %rcx + movq (%rdx),%rcx + movq %rcx,PCPU(INVPCID_DESCR) + movq 8(%rdx),%rax + movq %rax,PCPU(INVPCID_DESCR)+8 + movq smp_tlb_addr2,%rcx + xorl %eax,%eax + movq $PC_INVPCID_DESCR,%rdx + gs + subq 8(%rdx),%rcx + shrq $PAGE_SHIFT,%rcx +3: + gs +// invpcid (%rdx),%rax + .byte 0x66,0x0f,0x38,0x82,0x02 + gs + addq $PAGE_SIZE,8(%rdx) + dec %rcx + jne 3b + popq %rcx + jmp 6f +4: + movq 8(%rdx),%rdx + movq smp_tlb_addr2,%rax +5: invlpg (%rdx) /* invalidate single page */ + addq $PAGE_SIZE,%rdx + cmpq %rax,%rdx + jb 5b +6: movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index ed1ccb5..d0d6926 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -77,8 +77,7 @@ ENTRY(cpu_throw) LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */ 1: movq TD_PCB(%rsi),%r8 /* newtd->td_pcb */ - movq PCB_CR3(%r8),%rdx - movq %rdx,%cr3 /* new address space */ + movq PCB_CR3(%r8),%rcx /* new address space */ jmp swact END(cpu_throw) @@ -145,20 +144,41 @@ ctx_switch_xsave: SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ jmp sw1 swinact: - movq %rcx,%cr3 /* new address space */ - movl PCPU(CPUID), %eax + movl PCPU(CPUID),%eax /* Release bit from old pmap->pm_active */ - movq PCPU(CURPMAP),%rcx - LK btrl %eax,PM_ACTIVE(%rcx) /* clear old */ - SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ + movq PCPU(CURPMAP),%r12 + LK btrl %eax,PM_ACTIVE(%r12) /* clear old */ + SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */ swact: /* Set bit in new pmap->pm_active */ movq TD_PROC(%rsi),%rdx /* newproc */ movq P_VMSPACE(%rdx), %rdx addq $VM_PMAP,%rdx + cmpl $-1,PM_PCID(%rdx) + je 1f + LK btsl %eax,PM_SAVE(%rdx) + jnc 1f + btsq $63,%rcx /* CR3_PCID_SAVE */ + incq PCPU(PM_SAVE_CNT) +1: + movq %rcx,%cr3 /* new address space */ LK btsl %eax,PM_ACTIVE(%rdx) /* set new */ movq %rdx,PCPU(CURPMAP) + /* + * We might loose the race and other CPU might have changed + * the pmap after we set our bit in pmap->pm_save. Recheck. + * Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was + * modified, causing TLB flush for this pcid. + */ + btrq $63,%rcx + jnc 1f + LK btsl %eax,PM_SAVE(%rdx) + jc 1f + decq PCPU(PM_SAVE_CNT) + movq %rcx,%cr3 +1: + sw1: #if defined(SCHED_ULE) && defined(SMP) /* Wait for the new thread to become unblocked */ diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 3043bb5..62017e7 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$"); ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); +ASSYM(PM_SAVE, offsetof(struct pmap, pm_save)); +ASSYM(PM_PCID, offsetof(struct pmap, pm_pcid)); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); @@ -225,6 +227,8 @@ ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p)); ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt)); ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp)); ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss)); +ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt)); +ASSYM(PC_INVPCID_DESCR, offsetof(struct pcpu, pc_invpcid_descr)); ASSYM(LA_VER, offsetof(struct LAPIC, version)); ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index a1c61eb..e7d6eff 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1873,7 +1873,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; - thread0.td_pcb->pcb_cr3 = KPML4phys; + thread0.td_pcb->pcb_cr3 = KPML4phys; /* PCID 0 is reserved for kernel */ thread0.td_frame = &proc0_tf; env = getenv("kernelname"); diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 3ad00e7..1ac870a 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -103,9 +103,10 @@ struct pcb stoppcbs[MAXCPU]; struct pcb **susppcbs; /* Variables needed for SMP tlb shootdown. */ -vm_offset_t smp_tlb_addr1; vm_offset_t smp_tlb_addr2; +struct invpcid_descr smp_tlb_invpcid; volatile int smp_tlb_wait; +uint64_t pcid_cr3; #ifdef COUNT_IPIS /* Interrupt counts. */ @@ -599,6 +600,8 @@ cpu_mp_announce(void) } } +extern int pmap_pcid_enabled; + /* * AP CPU's call this to initialize themselves. */ @@ -759,6 +762,8 @@ init_secondary(void) */ load_cr4(rcr4() | CR4_PGE); + if (pmap_pcid_enabled) + load_cr4(rcr4() | CR4_PCIDE); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); @@ -899,7 +904,7 @@ start_all_aps(void) /* install the AP 1st level boot code */ pmap_kenter(va, boot_address); - pmap_invalidate_page(kernel_pmap, va); + pmap_invalidate_page(kernel_pmap, va, FALSE); bcopy(mptramp_start, (void *)va, bootMP_size); /* Locate the page tables, they'll be below the trampoline */ @@ -1106,7 +1111,8 @@ ipi_send_cpu(int cpu, u_int ipi) * Flush the TLB on all other CPU's */ static void -smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1, + vm_offset_t addr2) { u_int ncpu; @@ -1116,7 +1122,14 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); - smp_tlb_addr1 = addr1; + smp_tlb_invpcid.addr = addr1; + if (pmap == NULL || pmap->pm_pcid == -1) + smp_tlb_invpcid.pcid = 0; + else { + smp_tlb_invpcid.pcid = pmap->pm_pcid; + pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | + pmap->pm_pcid; + } smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); ipi_all_but_self(vector); @@ -1126,7 +1139,8 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, + vm_offset_t addr1, vm_offset_t addr2) { int cpu, ncpu, othercpus; @@ -1142,7 +1156,14 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_of if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); - smp_tlb_addr1 = addr1; + smp_tlb_invpcid.addr = addr1; + if (pmap == NULL || pmap->pm_pcid == -1) + smp_tlb_invpcid.pcid = 0; + else { + smp_tlb_invpcid.pcid = pmap->pm_pcid; + pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | + pmap->pm_pcid; + } smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); if (CPU_ISFULLSET(&mask)) { @@ -1169,15 +1190,15 @@ smp_cache_flush(void) { if (smp_started) - smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); + smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0); } void -smp_invltlb(void) +smp_invltlb(pmap_t pmap) { if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, 0, 0); + smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_global++; #endif @@ -1185,11 +1206,11 @@ smp_invltlb(void) } void -smp_invlpg(vm_offset_t addr) +smp_invlpg(pmap_t pmap, vm_offset_t addr) { if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, addr, 0); + smp_tlb_shootdown(IPI_INVLPG, pmap, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_page++; #endif @@ -1197,11 +1218,11 @@ smp_invlpg(vm_offset_t addr) } void -smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) +smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); + smp_tlb_shootdown(IPI_INVLRNG, pmap, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_range++; ipi_range_size += (addr2 - addr1) / PAGE_SIZE; @@ -1210,11 +1231,11 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpuset_t mask) +smp_masked_invltlb(cpuset_t mask, pmap_t pmap) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, NULL, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_global++; #endif @@ -1222,11 +1243,11 @@ smp_masked_invltlb(cpuset_t mask) } void -smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, pmap_t pmap, vm_offset_t addr) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_page++; #endif @@ -1234,11 +1255,13 @@ smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1, + vm_offset_t addr2) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, + addr2); #ifdef COUNT_XINVLTLB_HITS ipi_masked_range++; ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 19be4e0..db5bf19 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -116,11 +116,8 @@ __FBSDID("$FreeBSD$"); #include #include #include -#ifdef SMP +#include #include -#else -#include -#endif #include #include @@ -249,6 +246,29 @@ static struct md_page *pv_table; pt_entry_t *CMAP1 = 0; caddr_t CADDR1 = 0; +static struct unrhdr pcid_unr; +static struct mtx pcid_mtx; +int pmap_pcid_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN, &pmap_pcid_enabled, + 0, "Is TLB Context ID enabled ?"); +int invpcid_works = 0; + +static int +pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS) +{ + int i; + uint64_t res; + + res = 0; + CPU_FOREACH(i) { + res += cpuid_to_pcpu[i]->pc_pm_save_cnt; + } + return (sysctl_handle_64(oidp, &res, 0, req)); +} +SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW | + CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU", + "Count of saved TLB context on switch"); + /* * Crashdump maps. */ @@ -668,6 +688,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ + CPU_ZERO(&kernel_pmap->pm_save); TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* @@ -699,6 +720,18 @@ pmap_bootstrap(vm_paddr_t *firstaddr) /* Initialize the PAT MSR. */ pmap_init_pat(); + + /* Initialize TLB Context Id. */ + TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); + if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { + load_cr4(rcr4() | CR4_PCIDE); + mtx_init(&pcid_mtx, "pcid", NULL, MTX_DEF); + init_unrhdr(&pcid_unr, 1, (1 << 12) - 1, &pcid_mtx); + /* Check for INVPCID support */ + invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID) + != 0; + } else + pmap_pcid_enabled = 0; } /* @@ -983,50 +1016,55 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) * processor. */ void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +pmap_invalidate_page(pmap_t pmap, vm_offset_t va, boolean_t global) { cpuset_t other_cpus; u_int cpuid; sched_pin(); - if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus) || + global) { invlpg(va); - smp_invlpg(va); + smp_invlpg(pmap, va); } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; + CPU_AND_ATOMIC(&pmap->pm_save, &pmap->pm_active); CPU_CLR(cpuid, &other_cpus); if (CPU_ISSET(cpuid, &pmap->pm_active)) invlpg(va); CPU_AND(&other_cpus, &pmap->pm_active); if (!CPU_EMPTY(&other_cpus)) - smp_masked_invlpg(other_cpus, va); + smp_masked_invlpg(other_cpus, pmap, va); } sched_unpin(); } void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + boolean_t global) { cpuset_t other_cpus; vm_offset_t addr; u_int cpuid; sched_pin(); - if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus) || + global) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - smp_invlpg_range(sva, eva); + smp_invlpg_range(pmap, sva, eva); } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); + CPU_AND_ATOMIC(&pmap->pm_save, &pmap->pm_active); if (CPU_ISSET(cpuid, &pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); CPU_AND(&other_cpus, &pmap->pm_active); if (!CPU_EMPTY(&other_cpus)) - smp_masked_invlpg_range(other_cpus, sva, eva); + smp_masked_invlpg_range(other_cpus, pmap, sva, eva); } sched_unpin(); } @@ -1040,16 +1078,17 @@ pmap_invalidate_all(pmap_t pmap) sched_pin(); if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); - smp_invltlb(); + smp_invltlb(pmap); } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); + CPU_AND_ATOMIC(&pmap->pm_save, &pmap->pm_active); if (CPU_ISSET(cpuid, &pmap->pm_active)) invltlb(); CPU_AND(&other_cpus, &pmap->pm_active); if (!CPU_EMPTY(&other_cpus)) - smp_masked_invltlb(other_cpus); + smp_masked_invltlb(other_cpus, pmap); } sched_unpin(); } @@ -1111,8 +1150,10 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) CPU_CLR(cpuid, &other_cpus); if (pmap == kernel_pmap) active = all_cpus; - else + else { active = pmap->pm_active; + CPU_AND_ATOMIC(&pmap->pm_save, &active); + } if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpuid; act.invalidate = active; @@ -1136,7 +1177,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) * We inline these within pmap.c for speed. */ PMAP_INLINE void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +pmap_invalidate_page(pmap_t pmap, vm_offset_t va, boolean_t global) { if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) @@ -1144,7 +1185,8 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) } PMAP_INLINE void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + boolean_t global) { vm_offset_t addr; @@ -1175,6 +1217,8 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pde_store(pde, newpde); if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); + else + CPU_ZERO(&pmap->pm_save); } #endif /* !SMP */ @@ -1463,7 +1507,7 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) } if (__predict_false((oldpte & PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * - PAGE_SIZE); + PAGE_SIZE, TRUE); } /* @@ -1482,7 +1526,7 @@ pmap_qremove(vm_offset_t sva, int count) pmap_kremove(va); va += PAGE_SIZE; } - pmap_invalidate_range(kernel_pmap, sva, va); + pmap_invalidate_range(kernel_pmap, sva, va, TRUE); } /*************************************************** @@ -1658,6 +1702,11 @@ pmap_pinit0(pmap_t pmap) PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + if (pmap_pcid_enabled) + pmap->pm_pcid = 0; + else + pmap->pm_pcid = -1; + CPU_ZERO(&pmap->pm_save); } /* @@ -1699,6 +1748,12 @@ pmap_pinit(pmap_t pmap) TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + if (pmap_pcid_enabled) + pmap->pm_pcid = alloc_unr(&pcid_unr); + else + pmap->pm_pcid = -1; + CPU_ZERO(&pmap->pm_save); + return (1); } @@ -1950,6 +2005,8 @@ pmap_release(pmap_t pmap) atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(m); PMAP_LOCK_DESTROY(pmap); + if (pmap->pm_pcid != -1) + free_unr(&pcid_unr, pmap->pm_pcid); } static int @@ -2174,7 +2231,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) continue; tpte = pte_load_clear(pte); if ((tpte & PG_G) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, TRUE); m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); @@ -2715,7 +2772,8 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, free = NULL; pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free, lockp); - pmap_invalidate_page(pmap, trunc_2mpage(va)); + pmap_invalidate_page(pmap, trunc_2mpage(va), + !!(oldpde & PG_G)); pmap_free_zero_pages(free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" " in pmap %p", va, pmap); @@ -2780,7 +2838,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * Invalidate a stale recursive mapping of the page table page. */ if (va >= VM_MAXUSER_ADDRESS) - pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); + pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va), FALSE); /* * Demote the PV entry. @@ -2818,7 +2876,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, * PG_G. */ if (oldpde & PG_G) - pmap_invalidate_page(kernel_pmap, sva); + pmap_invalidate_page(kernel_pmap, sva, TRUE); pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); if (oldpde & PG_MANAGED) { CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME); @@ -2895,19 +2953,20 @@ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, vm_page_t *free) { struct rwlock *lock; - pt_entry_t *pte; + pt_entry_t *pte, rpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((*pde & PG_V) == 0) return; pte = pmap_pde_to_pte(pde, va); - if ((*pte & PG_V) == 0) + rpte = *pte; + if ((rpte & PG_V) == 0) return; lock = NULL; pmap_remove_pte(pmap, pte, va, *pde, free, &lock); if (lock != NULL) rw_wunlock(lock); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, !!(rpte & PG_G)); } /* @@ -3028,7 +3087,14 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) sva += PAGE_SIZE) { if (*pte == 0) { if (va != va_next) { - pmap_invalidate_range(pmap, va, sva); + /* + * If PG_G is set, + * pmap_invalidate_all() is + * called later anyway, so + * global can be FALSE. + */ + pmap_invalidate_range(pmap, va, sva, + FALSE); va = va_next; } continue; @@ -3044,7 +3110,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } } if (va != va_next) - pmap_invalidate_range(pmap, va, sva); + pmap_invalidate_range(pmap, va, sva, FALSE); } if (lock != NULL) rw_wunlock(lock); @@ -3116,7 +3182,7 @@ small_mappings: if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, !!(tpte & PG_G)); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); @@ -3158,7 +3224,7 @@ retry: if (!atomic_cmpset_long(pde, oldpde, newpde)) goto retry; if (oldpde & PG_G) - pmap_invalidate_page(pmap, sva); + pmap_invalidate_page(pmap, sva, TRUE); else anychanged = TRUE; } @@ -3290,7 +3356,7 @@ retry: if (!atomic_cmpset_long(pte, obits, pbits)) goto retry; if (obits & PG_G) - pmap_invalidate_page(pmap, sva); + pmap_invalidate_page(pmap, sva, TRUE); else anychanged = TRUE; } @@ -3607,7 +3673,7 @@ validate: goto unchanged; } if ((origpte & PG_A) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, !!(origpte & PG_G)); } else pte_store(pte, newpte); @@ -3670,7 +3736,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, lockp)) { free = NULL; if (pmap_unwire_ptp(pmap, va, mpde, &free)) { - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, FALSE); pmap_free_zero_pages(free); } CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" @@ -3847,7 +3913,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, if (mpte != NULL) { free = NULL; if (pmap_unwire_ptp(pmap, va, mpte, &free)) { - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, FALSE); pmap_free_zero_pages(free); } mpte = NULL; @@ -4156,7 +4222,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, if (pmap_unwire_ptp(dst_pmap, addr, dstmpte, &free)) { pmap_invalidate_page(dst_pmap, - addr); + addr, FALSE); pmap_free_zero_pages(free); } goto out; @@ -4714,7 +4780,7 @@ retry: goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, !!(oldpte & PG_G)); } PMAP_UNLOCK(pmap); } @@ -4796,7 +4862,7 @@ small_mappings: pte = pmap_pde_to_pte(pde, pv->pv_va); if ((*pte & PG_A) != 0) { atomic_clear_long(pte, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, FALSE); rtval++; if (rtval > 4) pvn = NULL; @@ -4863,7 +4929,8 @@ pmap_clear_modify(vm_page_t m) oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, + !!(oldpte & PG_G)); } } } @@ -4878,9 +4945,10 @@ small_mappings: KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); - if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { + oldpte = *pte; + if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { atomic_clear_long(pte, PG_M); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, !!(oldpte & PG_G)); } PMAP_UNLOCK(pmap); } @@ -4899,7 +4967,7 @@ pmap_clear_reference(vm_page_t m) pmap_t pmap; pv_entry_t next_pv, pv; pd_entry_t oldpde, *pde; - pt_entry_t *pte; + pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, @@ -4938,9 +5006,10 @@ small_mappings: KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); - if (*pte & PG_A) { + oldpte = *pte; + if (oldpte & PG_A) { atomic_clear_long(pte, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, !!(oldpte & PG_G)); } PMAP_UNLOCK(pmap); } @@ -5014,7 +5083,7 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) pa = trunc_page(pa); for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); - pmap_invalidate_range(kernel_pmap, va, va + tmpsize); + pmap_invalidate_range(kernel_pmap, va, va + tmpsize, TRUE); pmap_invalidate_cache_range(va, va + tmpsize); return ((void *)(va + offset)); } @@ -5093,7 +5162,7 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) /* * Invalidate a stale recursive mapping of the page directory page. */ - pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va)); + pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va), FALSE); pmap_pdpe_demotions++; CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx" @@ -5341,7 +5410,7 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) * shouldn't be, etc. */ if (changed) { - pmap_invalidate_range(kernel_pmap, base, tmpva); + pmap_invalidate_range(kernel_pmap, base, tmpva, FALSE); pmap_invalidate_cache_range(base, tmpva); } return (error); @@ -5390,7 +5459,7 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate) } } if (changed && invalidate) - pmap_invalidate_page(kernel_pmap, va); + pmap_invalidate_page(kernel_pmap, va, TRUE); PMAP_UNLOCK(kernel_pmap); } } @@ -5455,15 +5524,20 @@ pmap_activate(struct thread *td) critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); + CPU_ZERO(&pmap->pm_save); cpuid = PCPU_GET(cpuid); #ifdef SMP CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); CPU_SET_ATOMIC(cpuid, &pmap->pm_active); + CPU_SET_ATOMIC(cpuid, &pmap->pm_save); #else CPU_CLR(cpuid, &oldpmap->pm_active); CPU_SET(cpuid, &pmap->pm_active); + CPU_SET(cpuid, &pmap->pm_save); #endif cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4); + if (pmap->pm_pcid != -1) + cr3 |= pmap->pm_pcid; td->td_pcb->pcb_cr3 = cr3; load_cr3(cr3); PCPU_SET(curpmap, pmap); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 3eaf3fd..84dfe4c 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -790,6 +790,8 @@ nogo: trap_fatal(frame, eva); return (-1); } +printf("pid %d cmd %s addr 0x%lx err %lx rip 0x%lx rsp 0x%lx", curproc->p_pid, curproc->p_comm, eva, frame->tf_err, frame->tf_rip, frame->tf_rsp); + return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index acb5b93..6c20013 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -221,6 +221,8 @@ cpu_fork(td1, p2, td2, flags) */ pmap2 = vmspace_pmap(p2->p_vmspace); pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4); + if (pmap2->pm_pcid != -1) + pcb2->pcb_cr3 |= pmap2->pm_pcid; pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */ pcb2->pcb_rbp = 0; pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *); diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index ba4c618..44b3bab 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -76,6 +76,8 @@ struct system_segment_descriptor *pc_ldt; \ /* Pointer to the CPU TSS descriptor */ \ struct system_segment_descriptor *pc_tss; \ + uint64_t pc_pm_save_cnt; \ + char pc_invpcid_descr[16]; \ u_int pc_cmci_mask /* MCx banks for CMCI */ \ PCPU_XEN_FIELDS; \ uint64_t pc_dbreg[16]; /* ddb debugging regs */ \ diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index dc02e49..8321924 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -245,6 +245,8 @@ struct pmap { pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ cpuset_t pm_active; /* active on cpus */ + cpuset_t pm_save; /* Context valid on cpus mask */ + int pm_pcid; /* context id */ /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ struct vm_radix pm_root; /* spare page table pages */ @@ -291,6 +293,12 @@ struct pv_chunk { struct pv_entry pc_pventry[_NPCPV]; }; +struct invpcid_descr { + uint64_t pcid:12 __packed; + uint64_t pad:52 __packed; + uint64_t addr; +} __packed; + #ifdef _KERNEL extern caddr_t CADDR1; @@ -318,8 +326,8 @@ void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int); boolean_t pmap_page_is_mapped(vm_page_t m); void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); void pmap_unmapdev(vm_offset_t, vm_size_t); -void pmap_invalidate_page(pmap_t, vm_offset_t); -void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); +void pmap_invalidate_page(pmap_t, vm_offset_t, boolean_t); +void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t, boolean_t); void pmap_invalidate_all(pmap_t); void pmap_invalidate_cache(void); void pmap_invalidate_cache_pages(vm_page_t *pages, int count); diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 16d87ea..d6cd476 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -54,6 +54,8 @@ inthand_t IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */ IDTVEC(rendezvous); /* handle CPU rendezvous */ +struct pmap; + /* functions in mp_machdep.c */ void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); @@ -67,13 +69,14 @@ int ipi_nmi_handler(void); void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); void smp_cache_flush(void); -void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); -void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, +void smp_invlpg(struct pmap *pmap, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, struct pmap *pmap, vm_offset_t addr); +void smp_invlpg_range(struct pmap *pmap, vm_offset_t startva, vm_offset_t endva); -void smp_invltlb(void); -void smp_masked_invltlb(cpuset_t mask); +void smp_masked_invlpg_range(cpuset_t mask, struct pmap *pmap, + vm_offset_t startva, vm_offset_t endva); +void smp_invltlb(struct pmap *pmap); +void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap); #endif /* !LOCORE */ #endif /* SMP */ diff --git a/sys/dev/drm2/i915/intel_ringbuffer.c b/sys/dev/drm2/i915/intel_ringbuffer.c index c0b752b..78f3706 100644 --- a/sys/dev/drm2/i915/intel_ringbuffer.c +++ b/sys/dev/drm2/i915/intel_ringbuffer.c @@ -366,7 +366,7 @@ init_pipe_control(struct intel_ring_buffer *ring) goto err_unpin; pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1); pmap_invalidate_range(kernel_pmap, (vm_offset_t)pc->cpu_page, - (vm_offset_t)pc->cpu_page + PAGE_SIZE); + (vm_offset_t)pc->cpu_page + PAGE_SIZE, TRUE); pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page, (vm_offset_t)pc->cpu_page + PAGE_SIZE); @@ -395,7 +395,7 @@ cleanup_pipe_control(struct intel_ring_buffer *ring) obj = pc->obj; pmap_qremove((vm_offset_t)pc->cpu_page, 1); pmap_invalidate_range(kernel_pmap, (vm_offset_t)pc->cpu_page, - (vm_offset_t)pc->cpu_page + PAGE_SIZE); + (vm_offset_t)pc->cpu_page + PAGE_SIZE, TRUE); kmem_free(kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE); i915_gem_object_unpin(obj); drm_gem_object_unreference(&obj->base); @@ -974,7 +974,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring) pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1); pmap_invalidate_range(kernel_pmap, (vm_offset_t)ring->status_page.page_addr, - (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); + (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE, TRUE); kmem_free(kernel_map, (vm_offset_t)ring->status_page.page_addr, PAGE_SIZE); i915_gem_object_unpin(obj); @@ -1016,7 +1016,7 @@ static int init_status_page(struct intel_ring_buffer *ring) 1); pmap_invalidate_range(kernel_pmap, (vm_offset_t)ring->status_page.page_addr, - (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); + (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE, TRUE); pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr, (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); ring->status_page.obj = obj; diff --git a/sys/kern/subr_unit.c b/sys/kern/subr_unit.c index 9cf1781..3bf7aaf 100644 --- a/sys/kern/subr_unit.c +++ b/sys/kern/subr_unit.c @@ -68,8 +68,8 @@ */ #include -#include #include +#include #ifdef _KERNEL @@ -187,22 +187,6 @@ CTASSERT(sizeof(struct unr) == sizeof(struct unrb)); /* Number of bits in the bitmap */ #define NBITS ((int)sizeof(((struct unrb *)NULL)->map) * 8) -/* Header element for a unr number space. */ - -struct unrhdr { - TAILQ_HEAD(unrhd,unr) head; - u_int low; /* Lowest item */ - u_int high; /* Highest item */ - u_int busy; /* Count of allocated items */ - u_int alloc; /* Count of memory allocations */ - u_int first; /* items in allocated from start */ - u_int last; /* items free at end */ - struct mtx *mtx; - TAILQ_HEAD(unrfr,unr) ppfree; /* Items to be freed after mtx - lock dropped */ -}; - - #if defined(DIAGNOSTIC) || !defined(_KERNEL) /* * Consistency check function. @@ -315,20 +299,12 @@ clean_unrhdr(struct unrhdr *uh) mtx_unlock(uh->mtx); } -/* - * Allocate a new unrheader set. - * - * Highest and lowest valid values given as parameters. - */ - -struct unrhdr * -new_unrhdr(int low, int high, struct mtx *mutex) +void +init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex) { - struct unrhdr *uh; KASSERT(low >= 0 && low <= high, ("UNR: use error: new_unrhdr(%d, %d)", low, high)); - uh = Malloc(sizeof *uh); if (mutex != NULL) uh->mtx = mutex; else @@ -340,6 +316,21 @@ new_unrhdr(int low, int high, struct mtx *mutex) uh->first = 0; uh->last = 1 + (high - low); check_unrhdr(uh, __LINE__); +} + +/* + * Allocate a new unrheader set. + * + * Highest and lowest valid values given as parameters. + */ + +struct unrhdr * +new_unrhdr(int low, int high, struct mtx *mutex) +{ + struct unrhdr *uh; + + uh = Malloc(sizeof *uh); + init_unrhdr(uh, low, high, mutex); return (uh); } diff --git a/sys/sys/_unrhdr.h b/sys/sys/_unrhdr.h new file mode 100644 index 0000000..f3c25d1 --- /dev/null +++ b/sys/sys/_unrhdr.h @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2004 Poul-Henning Kamp + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_UNRHDR_H +#define _SYS_UNRHDR_H + +#include + +struct mtx; + +/* Header element for a unr number space. */ + +struct unrhdr { + TAILQ_HEAD(unrhd,unr) head; + u_int low; /* Lowest item */ + u_int high; /* Highest item */ + u_int busy; /* Count of allocated items */ + u_int alloc; /* Count of memory allocations */ + u_int first; /* items in allocated from start */ + u_int last; /* items free at end */ + struct mtx *mtx; + TAILQ_HEAD(unrfr,unr) ppfree; /* Items to be freed after mtx + lock dropped */ +}; + +#endif diff --git a/sys/sys/bitset.h b/sys/sys/bitset.h index dee5542..7c24ecd 100644 --- a/sys/sys/bitset.h +++ b/sys/sys/bitset.h @@ -135,7 +135,14 @@ atomic_set_long(&(p)->__bits[__bitset_word(_s, n)], \ __bitset_mask((_s), n)) -/* Convenience functions catering special cases. */ +/* Convenience functions catering special cases. */ +#define BIT_AND_ATOMIC(_s, d, s) do { \ + __size_t __i; \ + for (__i = 0; __i < __bitset_words((_s)); __i++) \ + atomic_clear_long(&(d)->__bits[__i], \ + ~(s)->__bits[__i]); \ +} while (0) + #define BIT_OR_ATOMIC(_s, d, s) do { \ __size_t __i; \ for (__i = 0; __i < __bitset_words((_s)); __i++) \ diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h index fc078d3..e1ee37d 100644 --- a/sys/sys/cpuset.h +++ b/sys/sys/cpuset.h @@ -55,6 +55,7 @@ #define CPU_NAND(d, s) BIT_NAND(CPU_SETSIZE, d, s) #define CPU_CLR_ATOMIC(n, p) BIT_CLR_ATOMIC(CPU_SETSIZE, n, p) #define CPU_SET_ATOMIC(n, p) BIT_SET_ATOMIC(CPU_SETSIZE, n, p) +#define CPU_AND_ATOMIC(n, p) BIT_AND_ATOMIC(CPU_SETSIZE, n, p) #define CPU_OR_ATOMIC(d, s) BIT_OR_ATOMIC(CPU_SETSIZE, d, s) #define CPU_COPY_STORE_REL(f, t) BIT_COPY_STORE_REL(CPU_SETSIZE, f, t) #define CPU_FFS(p) BIT_FFS(CPU_SETSIZE, p) diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 4887d71..e3ea9cf 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -396,6 +396,7 @@ int root_mounted(void); */ struct unrhdr; struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex); +void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex); void delete_unrhdr(struct unrhdr *uh); void clean_unrhdr(struct unrhdr *uh); void clean_unrhdrl(struct unrhdr *uh);