diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index 96c778d..5b7b759 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -149,17 +149,40 @@ IDTVEC(invltlb) #endif pushq %rax + pushq %rdx - movq %cr3, %rax /* invalidate the TLB */ - movq %rax, %cr3 - + cmpl $0,pmap_pcid_enabled + je 1f + + cmpq $0,smp_tlb_invpcid + je 1f + + /* + * For PCID-enabled pmap, set bit 63 of loaded %cr3 to zero. + */ + movq %cr3,%rax + movq $pcid_cr3,%rdx + cmpq %rax,%rdx + je 1f + movq %rdx,%cr3 + jmp 2f + + /* + * Invalidate the TLB. + */ +1: + movq %cr3,%rax +2: + movq %rax,%cr3 movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait + popq %rdx popq %rax + jmp doreti_iret /* @@ -182,16 +205,62 @@ IDTVEC(invlpg) #endif pushq %rax - - movq smp_tlb_addr1, %rax - invlpg (%rax) /* invalidate single page */ - + pushq %rdx + movq $smp_tlb_invpcid,%rdx + + cmpl $0,pmap_pcid_enabled + je 2f + + cmpl $0,invpcid_works + jne 1f + + /* kernel pmap - use invlpg to invalidate global mapping */ + cmpl $0,(%rdx) + je 2f + + /* + * PCID supported, but INVPCID is not. + * Temporarily switch to the target address space and do INVLPG. + */ + pushq %rcx + movq %cr3,%rcx + movq pcid_cr3,%rax + cmp %rcx,%rax + je 1f + btsq $63,%rax + movq %rax,%cr3 +1: movq 8(%rdx),%rax + invlpg (%rax) + btsq $63,%rcx + movq %rcx,%cr3 + popq %rcx + jmp 3f + +1: + /* + * Invalidate the TLB entry using INVPCID. + */ + xorl %eax,%eax +// invpcid (%rdx),%rax + .byte 0x66,0x0f,0x38,0x82,0x02 + jmp 3f + +2: + /* + * PCID is not supported. + * Invalidate single page using INVLPG. + */ + movq 8(%rdx),%rax + invlpg (%rax) + +3: movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait + popq %rdx popq %rax jmp doreti_iret @@ -216,14 +285,65 @@ IDTVEC(invlrng) pushq %rax pushq %rdx - - movq smp_tlb_addr1, %rdx - movq smp_tlb_addr2, %rax -1: invlpg (%rdx) /* invalidate single page */ - addq $PAGE_SIZE, %rdx - cmpq %rax, %rdx + movq $smp_tlb_invpcid,%rdx + + cmpl $0,pmap_pcid_enabled + je 4f + + cmpl $0,invpcid_works + jne 2f + + /* kernel pmap - use invlpg to invalidate global mapping */ + cmpl $0,(%rdx) + je 4f + + pushq %rcx + movq %cr3,%rcx + movq pcid_cr3,%rax + cmpq %rcx,%rax + je 1f + btsq $63,%rax + movq %rax,%cr3 +1: movq 8(%rdx),%rdx + movq smp_tlb_addr2,%rax +1: invlpg (%rdx) + addq $PAGE_SIZE,%rdx + cmpq %rax,%rdx jb 1b - + btsq $63,%rcx + movq %rcx,%cr3 + popq %rcx + jmp 6f +2: + pushq %rcx + movq (%rdx),%rcx + movq %rcx,PCPU(INVPCID_DESCR) + movq 8(%rdx),%rax + movq %rax,PCPU(INVPCID_DESCR)+8 + movq smp_tlb_addr2,%rcx + xorl %eax,%eax + movq $PC_INVPCID_DESCR,%rdx + gs + subq 8(%rdx),%rcx + shrq $PAGE_SHIFT,%rcx +3: + gs +// invpcid (%rdx),%rax + .byte 0x66,0x0f,0x38,0x82,0x02 + gs + addq $PAGE_SIZE,8(%rdx) + dec %rcx + jne 3b + popq %rcx + jmp 6f +4: + movq 8(%rdx),%rdx + movq smp_tlb_addr2,%rax +5: invlpg (%rdx) /* invalidate single page */ + addq $PAGE_SIZE,%rdx + cmpq %rax,%rdx + jb 5b +6: movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index bef4b75..a3a17d7 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -77,8 +77,7 @@ ENTRY(cpu_throw) LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */ 1: movq TD_PCB(%rsi),%r8 /* newtd->td_proc */ - movq PCB_CR3(%r8),%rdx - movq %rdx,%cr3 /* new address space */ + movq PCB_CR3(%r8),%rcx /* new address space */ jmp swact END(cpu_throw) @@ -143,20 +142,41 @@ done_store_dr: SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ jmp sw1 swinact: - movq %rcx,%cr3 /* new address space */ - movl PCPU(CPUID), %eax + movl PCPU(CPUID),%eax /* Release bit from old pmap->pm_active */ - movq PCPU(CURPMAP),%rcx - LK btrl %eax,PM_ACTIVE(%rcx) /* clear old */ - SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ + movq PCPU(CURPMAP),%r12 + LK btrl %eax,PM_ACTIVE(%r12) /* clear old */ + SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */ swact: /* Set bit in new pmap->pm_active */ movq TD_PROC(%rsi),%rdx /* newproc */ movq P_VMSPACE(%rdx), %rdx addq $VM_PMAP,%rdx + cmpl $-1,PM_PCID(%rdx) + je 1f + LK btsl %eax,PM_SAVE(%rdx) + jnc 1f + btsq $63,%rcx /* CR3_PCID_SAVE */ + incq PCPU(PM_SAVE_CNT) +1: + movq %rcx,%cr3 /* new address space */ LK btsl %eax,PM_ACTIVE(%rdx) /* set new */ movq %rdx,PCPU(CURPMAP) + /* + * We might loose the race and other CPU might have changed + * the pmap after we set our bit in pmap->pm_save. Recheck. + * Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was + * modified, causing TLB flush for this pcid. + */ + btrq $63,%rcx + jnc 1f + LK btsl %eax,PM_SAVE(%rdx) + jc 1f + decq PCPU(PM_SAVE_CNT) + movq %rcx,%cr3 +1: + sw1: #if defined(SCHED_ULE) && defined(SMP) /* Wait for the new thread to become unblocked */ diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 3796aa8..03de95b 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$"); ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); +ASSYM(PM_SAVE, offsetof(struct pmap, pm_save)); +ASSYM(PM_PCID, offsetof(struct pmap, pm_pcid)); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); @@ -220,6 +222,8 @@ ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p)); ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt)); ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp)); ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss)); +ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt)); +ASSYM(PC_INVPCID_DESCR, offsetof(struct pcpu, pc_invpcid_descr)); ASSYM(LA_VER, offsetof(struct LAPIC, version)); ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c index 805ba1e..6030fb8 100644 --- a/sys/amd64/amd64/identcpu.c +++ b/sys/amd64/amd64/identcpu.c @@ -384,6 +384,16 @@ printcpuinfo(void) ); } + if (cpu_stdext_feature != 0) { + printf("\n Standard Extended Features=0x%b", + cpu_stdext_feature, + "\020" + "\001GSFSBASE" + "\007SMEP" + "\012INVPCID" + ); + } + if (via_feature_rng != 0 || via_feature_xcrypt != 0) print_via_padlock_info(); @@ -501,6 +511,11 @@ identify_cpu(void) } } + if (cpu_high >= 7) { + cpuid_count(7, 0, regs); + cpu_stdext_feature = regs[1]; + } + if (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_CENTAUR) { diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index 02588af..fb5eb0b 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -72,6 +72,7 @@ u_int cpu_vendor_id; /* CPU vendor ID */ u_int cpu_fxsr; /* SSE enabled */ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ u_int cpu_clflush_line_size = 32; +u_int cpu_stdext_feature; u_int cpu_max_ext_state_size; SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD, diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index bc14745..ea8b59a 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1853,7 +1853,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; - thread0.td_pcb->pcb_cr3 = KPML4phys; + thread0.td_pcb->pcb_cr3 = KPML4phys; /* PCID 0 is reserved for kernel */ thread0.td_frame = &proc0_tf; env = getenv("kernelname"); diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index da1812d..d46f48c 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -103,9 +103,10 @@ struct pcb **susppcbs; void **suspfpusave; /* Variables needed for SMP tlb shootdown. */ -vm_offset_t smp_tlb_addr1; vm_offset_t smp_tlb_addr2; +struct invpcid_descr smp_tlb_invpcid; volatile int smp_tlb_wait; +uint64_t pcid_cr3; #ifdef COUNT_IPIS /* Interrupt counts. */ @@ -599,6 +600,8 @@ cpu_mp_announce(void) } } +extern int pmap_pcid_enabled; + /* * AP CPU's call this to initialize themselves. */ @@ -759,6 +762,8 @@ init_secondary(void) */ load_cr4(rcr4() | CR4_PGE); + if (pmap_pcid_enabled) + load_cr4(rcr4() | CR4_PCIDE); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); @@ -899,7 +904,7 @@ start_all_aps(void) /* install the AP 1st level boot code */ pmap_kenter(va, boot_address); - pmap_invalidate_page(kernel_pmap, va); + pmap_invalidate_page(kernel_pmap, va, FALSE); bcopy(mptramp_start, (void *)va, bootMP_size); /* Locate the page tables, they'll be below the trampoline */ @@ -1110,7 +1115,8 @@ ipi_send_cpu(int cpu, u_int ipi) * Flush the TLB on all other CPU's */ static void -smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1, + vm_offset_t addr2) { u_int ncpu; @@ -1120,7 +1126,14 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); - smp_tlb_addr1 = addr1; + smp_tlb_invpcid.addr = addr1; + if (pmap == NULL || pmap->pm_pcid == -1) + smp_tlb_invpcid.pcid = 0; + else { + smp_tlb_invpcid.pcid = pmap->pm_pcid; + pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | + pmap->pm_pcid; + } smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); ipi_all_but_self(vector); @@ -1130,7 +1143,8 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, + vm_offset_t addr1, vm_offset_t addr2) { int cpu, ncpu, othercpus; @@ -1146,7 +1160,14 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_of if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); - smp_tlb_addr1 = addr1; + smp_tlb_invpcid.addr = addr1; + if (pmap == NULL || pmap->pm_pcid == -1) + smp_tlb_invpcid.pcid = 0; + else { + smp_tlb_invpcid.pcid = pmap->pm_pcid; + pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | + pmap->pm_pcid; + } smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); if (CPU_ISFULLSET(&mask)) { @@ -1173,15 +1194,15 @@ smp_cache_flush(void) { if (smp_started) - smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); + smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0); } void -smp_invltlb(void) +smp_invltlb(pmap_t pmap) { if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, 0, 0); + smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_global++; #endif @@ -1189,11 +1210,11 @@ smp_invltlb(void) } void -smp_invlpg(vm_offset_t addr) +smp_invlpg(pmap_t pmap, vm_offset_t addr) { if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, addr, 0); + smp_tlb_shootdown(IPI_INVLPG, pmap, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_page++; #endif @@ -1201,11 +1222,11 @@ smp_invlpg(vm_offset_t addr) } void -smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) +smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); + smp_tlb_shootdown(IPI_INVLRNG, pmap, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_range++; ipi_range_size += (addr2 - addr1) / PAGE_SIZE; @@ -1214,11 +1235,11 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpuset_t mask) +smp_masked_invltlb(cpuset_t mask, pmap_t pmap) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, NULL, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_global++; #endif @@ -1226,11 +1247,11 @@ smp_masked_invltlb(cpuset_t mask) } void -smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, pmap_t pmap, vm_offset_t addr) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_page++; #endif @@ -1238,11 +1259,13 @@ smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1, + vm_offset_t addr2) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, + addr2); #ifdef COUNT_XINVLTLB_HITS ipi_masked_range++; ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; @@ -1439,7 +1462,10 @@ cpususpend_handler(void) CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); - /* Restore CR3 and enable interrupts */ + /* + * Restore CR3 and enable interrupts. + * Do flush TBL, in particular, by not or'ing CR3_PCID_SAVE. + */ load_cr3(cr3); mca_resume(); lapic_setup(0); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 4c7bd2f..335c0c6 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -121,11 +121,8 @@ __FBSDID("$FreeBSD$"); #include #include #include -#ifdef SMP +#include #include -#else -#include -#endif #include #include @@ -216,6 +213,29 @@ static int shpgperproc = PMAP_SHPGPERPROC; pt_entry_t *CMAP1 = 0; caddr_t CADDR1 = 0; +static struct unrhdr pcid_unr; +static struct mtx pcid_mtx; +int pmap_pcid_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN, &pmap_pcid_enabled, + 0, "Is TLB Context ID enabled ?"); +int invpcid_works = 0; + +static int +pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS) +{ + int i; + uint64_t res; + + res = 0; + CPU_FOREACH(i) { + res += cpuid_to_pcpu[i]->pc_pm_save_cnt; + } + return (sysctl_handle_64(oidp, &res, 0, req)); +} +SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW | + CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU", + "Count of saved TLB context on switch"); + /* * Crashdump maps. */ @@ -584,6 +604,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_root = NULL; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ + CPU_ZERO(&kernel_pmap->pm_save); TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* @@ -610,6 +631,18 @@ pmap_bootstrap(vm_paddr_t *firstaddr) /* Initialize the PAT MSR. */ pmap_init_pat(); + + /* Initialize TLB Context Id. */ + TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); + if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { + load_cr4(rcr4() | CR4_PCIDE); + mtx_init(&pcid_mtx, "pcid", NULL, MTX_DEF); + init_unrhdr(&pcid_unr, 1, (1 << 12) - 1, &pcid_mtx); + /* Check for INVPCID support */ + invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID) + != 0; + } else + pmap_pcid_enabled = 0; } /* @@ -923,50 +956,55 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) * processor. */ void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +pmap_invalidate_page(pmap_t pmap, vm_offset_t va, boolean_t global) { cpuset_t other_cpus; u_int cpuid; sched_pin(); - if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus) || + global) { invlpg(va); - smp_invlpg(va); + smp_invlpg(pmap, va); } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; + CPU_AND_ATOMIC(&pmap->pm_save, &pmap->pm_active); CPU_CLR(cpuid, &other_cpus); if (CPU_ISSET(cpuid, &pmap->pm_active)) invlpg(va); CPU_AND(&other_cpus, &pmap->pm_active); if (!CPU_EMPTY(&other_cpus)) - smp_masked_invlpg(other_cpus, va); + smp_masked_invlpg(other_cpus, pmap, va); } sched_unpin(); } void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + boolean_t global) { cpuset_t other_cpus; vm_offset_t addr; u_int cpuid; sched_pin(); - if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus) || + global) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - smp_invlpg_range(sva, eva); + smp_invlpg_range(pmap, sva, eva); } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); + CPU_AND_ATOMIC(&pmap->pm_save, &pmap->pm_active); if (CPU_ISSET(cpuid, &pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); CPU_AND(&other_cpus, &pmap->pm_active); if (!CPU_EMPTY(&other_cpus)) - smp_masked_invlpg_range(other_cpus, sva, eva); + smp_masked_invlpg_range(other_cpus, pmap, sva, eva); } sched_unpin(); } @@ -980,16 +1018,17 @@ pmap_invalidate_all(pmap_t pmap) sched_pin(); if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); - smp_invltlb(); + smp_invltlb(pmap); } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); + CPU_AND_ATOMIC(&pmap->pm_save, &pmap->pm_active); if (CPU_ISSET(cpuid, &pmap->pm_active)) invltlb(); CPU_AND(&other_cpus, &pmap->pm_active); if (!CPU_EMPTY(&other_cpus)) - smp_masked_invltlb(other_cpus); + smp_masked_invltlb(other_cpus, pmap); } sched_unpin(); } @@ -1051,8 +1090,10 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) CPU_CLR(cpuid, &other_cpus); if (pmap == kernel_pmap) active = all_cpus; - else + else { active = pmap->pm_active; + CPU_AND_ATOMIC(&pmap->pm_save, &active); + } if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpuid; act.invalidate = active; @@ -1076,7 +1117,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) * We inline these within pmap.c for speed. */ PMAP_INLINE void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +pmap_invalidate_page(pmap_t pmap, vm_offset_t va, boolean_t global) { if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) @@ -1084,7 +1125,8 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) } PMAP_INLINE void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + boolean_t global) { vm_offset_t addr; @@ -1115,6 +1157,8 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pde_store(pde, newpde); if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); + else + CPU_ZERO(&pmap->pm_save); } #endif /* !SMP */ @@ -1394,7 +1438,7 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) } if (__predict_false((oldpte & PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * - PAGE_SIZE); + PAGE_SIZE, TRUE); } /* @@ -1412,7 +1456,7 @@ pmap_qremove(vm_offset_t sva, int count) pmap_kremove(va); va += PAGE_SIZE; } - pmap_invalidate_range(kernel_pmap, sva, va); + pmap_invalidate_range(kernel_pmap, sva, va, TRUE); } /*************************************************** @@ -1626,6 +1670,11 @@ pmap_pinit0(pmap_t pmap) PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + if (pmap_pcid_enabled) + pmap->pm_pcid = 0; + else + pmap->pm_pcid = -1; + CPU_ZERO(&pmap->pm_save); } /* @@ -1667,6 +1716,12 @@ pmap_pinit(pmap_t pmap) TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + if (pmap_pcid_enabled) + pmap->pm_pcid = alloc_unr(&pcid_unr); + else + pmap->pm_pcid = -1; + CPU_ZERO(&pmap->pm_save); + return (1); } @@ -1925,6 +1980,8 @@ pmap_release(pmap_t pmap) atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(m); PMAP_LOCK_DESTROY(pmap); + if (pmap->pm_pcid != -1) + free_unr(&pcid_unr, pmap->pm_pcid); } static int @@ -2128,7 +2185,7 @@ pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) vm_page_dirty(m); free = NULL; pmap_unuse_pt(pmap, va, *pde, &free); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, FALSE); pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); @@ -2504,7 +2561,8 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { free = NULL; pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free); - pmap_invalidate_page(pmap, trunc_2mpage(va)); + pmap_invalidate_page(pmap, trunc_2mpage(va), + !!(oldpde & PG_G)); pmap_free_zero_pages(free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" " in pmap %p", va, pmap); @@ -2558,7 +2616,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) * Invalidate a stale recursive mapping of the page table page. */ if (va >= VM_MAXUSER_ADDRESS) - pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); + pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va), FALSE); /* * Demote the pv entry. This depends on the earlier demotion @@ -2602,7 +2660,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, * PG_G. */ if (oldpde & PG_G) - pmap_invalidate_page(kernel_pmap, sva); + pmap_invalidate_page(kernel_pmap, sva, TRUE); pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); if (oldpde & PG_MANAGED) { pvh = pa_to_pvh(oldpde & PG_PS_FRAME); @@ -2669,16 +2727,17 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, vm_page_t *free) { - pt_entry_t *pte; + pt_entry_t *pte, rpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((*pde & PG_V) == 0) return; pte = pmap_pde_to_pte(pde, va); - if ((*pte & PG_V) == 0) + rpte = *pte; + if ((rpte & PG_V) == 0) return; pmap_remove_pte(pmap, pte, va, *pde, free); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, !!(rpte & PG_G)); } /* @@ -2796,7 +2855,14 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) sva += PAGE_SIZE) { if (*pte == 0) { if (va != va_next) { - pmap_invalidate_range(pmap, va, sva); + /* + * If PG_G is set, + * pmap_invalidate_all() is + * called later anyway, so + * global can be FALSE. + */ + pmap_invalidate_range(pmap, va, sva, + FALSE); va = va_next; } continue; @@ -2811,7 +2877,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } } if (va != va_next) - pmap_invalidate_range(pmap, va, sva); + pmap_invalidate_range(pmap, va, sva, FALSE); } out: if (anyvalid) @@ -2878,7 +2944,7 @@ pmap_remove_all(vm_page_t m) if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, !!(tpte & PG_G)); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); @@ -2920,7 +2986,7 @@ retry: if (!atomic_cmpset_long(pde, oldpde, newpde)) goto retry; if (oldpde & PG_G) - pmap_invalidate_page(pmap, sva); + pmap_invalidate_page(pmap, sva, TRUE); else anychanged = TRUE; } @@ -3035,7 +3101,7 @@ retry: if (!atomic_cmpset_long(pte, obits, pbits)) goto retry; if (obits & PG_G) - pmap_invalidate_page(pmap, sva); + pmap_invalidate_page(pmap, sva, TRUE); else anychanged = 1; } @@ -3339,7 +3405,8 @@ validate: TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)) vm_page_aflag_clear(om, PGA_WRITEABLE); if (invlva) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, + !!(origpte & PG_G)); } else pte_store(pte, newpte); } @@ -3396,7 +3463,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) { free = NULL; if (pmap_unwire_pte_hold(pmap, va, mpde, &free)) { - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, FALSE); pmap_free_zero_pages(free); } CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" @@ -3559,7 +3626,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, if (mpte != NULL) { free = NULL; if (pmap_unwire_pte_hold(pmap, va, mpte, &free)) { - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, FALSE); pmap_free_zero_pages(free); } mpte = NULL; @@ -3865,7 +3932,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, if (pmap_unwire_pte_hold(dst_pmap, addr, dstmpte, &free)) { pmap_invalidate_page(dst_pmap, - addr); + addr, FALSE); pmap_free_zero_pages(free); } goto out; @@ -4364,7 +4431,7 @@ retry: goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, !!(oldpte & PG_G)); } PMAP_UNLOCK(pmap); } @@ -4443,7 +4510,7 @@ pmap_ts_referenced(vm_page_t m) pte = pmap_pde_to_pte(pde, pv->pv_va); if ((*pte & PG_A) != 0) { atomic_clear_long(pte, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, FALSE); rtval++; if (rtval > 4) pvn = NULL; @@ -4508,7 +4575,8 @@ pmap_clear_modify(vm_page_t m) oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va, + !!(oldpte & PG_G)); } } } @@ -4522,9 +4590,10 @@ pmap_clear_modify(vm_page_t m) KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); - if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { + oldpte = *pte; + if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { atomic_clear_long(pte, PG_M); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, !!(oldpte & PG_G)); } PMAP_UNLOCK(pmap); } @@ -4543,7 +4612,7 @@ pmap_clear_reference(vm_page_t m) pmap_t pmap; pv_entry_t next_pv, pv; pd_entry_t oldpde, *pde; - pt_entry_t *pte; + pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, @@ -4579,9 +4648,10 @@ pmap_clear_reference(vm_page_t m) KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); - if (*pte & PG_A) { + oldpte = *pte; + if (oldpte & PG_A) { atomic_clear_long(pte, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va, !!(oldpte & PG_G)); } PMAP_UNLOCK(pmap); } @@ -4655,7 +4725,7 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) pa = trunc_page(pa); for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); - pmap_invalidate_range(kernel_pmap, va, va + tmpsize); + pmap_invalidate_range(kernel_pmap, va, va + tmpsize, TRUE); pmap_invalidate_cache_range(va, va + tmpsize); return ((void *)(va + offset)); } @@ -4687,7 +4757,7 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size) size = roundup(offset + size, PAGE_SIZE); for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) pmap_kremove(tmpva); - pmap_invalidate_range(kernel_pmap, va, tmpva); + pmap_invalidate_range(kernel_pmap, va, tmpva, TRUE); kmem_free(kernel_map, base, size); } @@ -4737,7 +4807,7 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) /* * Invalidate a stale recursive mapping of the page directory page. */ - pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va)); + pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va), FALSE); pmap_pdpe_demotions++; CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx" @@ -4985,7 +5055,7 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) * shouldn't be, etc. */ if (changed) { - pmap_invalidate_range(kernel_pmap, base, tmpva); + pmap_invalidate_range(kernel_pmap, base, tmpva, FALSE); pmap_invalidate_cache_range(base, tmpva); } return (error); @@ -5034,7 +5104,7 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate) } } if (changed && invalidate) - pmap_invalidate_page(kernel_pmap, va); + pmap_invalidate_page(kernel_pmap, va, TRUE); PMAP_UNLOCK(kernel_pmap); } } @@ -5099,15 +5169,20 @@ pmap_activate(struct thread *td) critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); + CPU_ZERO(&pmap->pm_save); cpuid = PCPU_GET(cpuid); #ifdef SMP CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); CPU_SET_ATOMIC(cpuid, &pmap->pm_active); + CPU_SET_ATOMIC(cpuid, &pmap->pm_save); #else CPU_CLR(cpuid, &oldpmap->pm_active); CPU_SET(cpuid, &pmap->pm_active); + CPU_SET(cpuid, &pmap->pm_active); #endif cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4); + if (pmap->pm_pcid != -1) + cr3 |= pmap->pm_pcid; td->td_pcb->pcb_cr3 = cr3; load_cr3(cr3); PCPU_SET(curpmap, pmap); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index a2363db..11955bd 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -735,6 +735,7 @@ nogo: trap_fatal(frame, eva); return (-1); } +printf("pid %d cmd %s addr 0x%lx err %lx rip 0x%lx rsp 0x%lx", curproc->p_pid, curproc->p_comm, eva, frame->tf_err, frame->tf_rip, frame->tf_rsp); return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index acb2188..b7630df 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -216,6 +216,8 @@ cpu_fork(td1, p2, td2, flags) */ pmap2 = vmspace_pmap(p2->p_vmspace); pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4); + if (pmap2->pm_pcid != -1) + pcb2->pcb_cr3 |= pmap2->pm_pcid; pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */ pcb2->pcb_rbp = 0; pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *); diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index ff11ea1..b49f39a 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -48,6 +48,7 @@ extern u_int amd_pminfo; extern u_int via_feature_rng; extern u_int via_feature_xcrypt; extern u_int cpu_clflush_line_size; +extern u_int cpu_stdext_feature; extern u_int cpu_fxsr; extern u_int cpu_high; extern u_int cpu_id; diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index d07dbac..256c594 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -76,6 +76,8 @@ struct system_segment_descriptor *pc_ldt; \ /* Pointer to the CPU TSS descriptor */ \ struct system_segment_descriptor *pc_tss; \ + uint64_t pc_pm_save_cnt; \ + char pc_invpcid_descr[16]; \ u_int pc_cmci_mask /* MCx banks for CMCI */ \ PCPU_XEN_FIELDS diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 1b8108a..d967d85 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -253,6 +253,8 @@ struct pmap { pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ cpuset_t pm_active; /* active on cpus */ + cpuset_t pm_save; /* Context valid on cpus mask */ + int pm_pcid; /* context id */ /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ vm_page_t pm_root; /* spare page table pages */ @@ -299,6 +301,12 @@ struct pv_chunk { struct pv_entry pc_pventry[_NPCPV]; }; +struct invpcid_descr { + uint64_t pcid:12 __packed; + uint64_t pad:52 __packed; + uint64_t addr; +} __packed; + #ifdef _KERNEL extern caddr_t CADDR1; @@ -325,8 +333,8 @@ void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int); boolean_t pmap_page_is_mapped(vm_page_t m); void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); void pmap_unmapdev(vm_offset_t, vm_size_t); -void pmap_invalidate_page(pmap_t, vm_offset_t); -void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); +void pmap_invalidate_page(pmap_t, vm_offset_t, boolean_t); +void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t, boolean_t); void pmap_invalidate_all(pmap_t); void pmap_invalidate_cache(void); void pmap_invalidate_cache_pages(vm_page_t *pages, int count); diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index de686b7..71e1630 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -54,6 +54,8 @@ inthand_t IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */ IDTVEC(rendezvous); /* handle CPU rendezvous */ +struct pmap; + /* functions in mp_machdep.c */ void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); @@ -66,13 +68,14 @@ int ipi_nmi_handler(void); void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); void smp_cache_flush(void); -void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); -void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, +void smp_invlpg(struct pmap *pmap, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, struct pmap *pmap, vm_offset_t addr); +void smp_invlpg_range(struct pmap *pmap, vm_offset_t startva, vm_offset_t endva); -void smp_invltlb(void); -void smp_masked_invltlb(cpuset_t mask); +void smp_masked_invlpg_range(cpuset_t mask, struct pmap *pmap, + vm_offset_t startva, vm_offset_t endva); +void smp_invltlb(struct pmap *pmap); +void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap); #endif /* !LOCORE */ #endif /* SMP */ diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h index 7ba5f9f..be102d5 100644 --- a/sys/amd64/include/specialreg.h +++ b/sys/amd64/include/specialreg.h @@ -52,6 +52,8 @@ #define CR0_NW 0x20000000 /* Not Write-through */ #define CR0_CD 0x40000000 /* Cache Disable */ +#define CR3_PCID_SAVE 0x8000000000000000 + /* * Bits in PPro special registers */ @@ -66,6 +68,7 @@ #define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ #define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ #define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ +#define CR4_PCIDE 0x00020000 /* Enable Context ID */ #define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */ /* @@ -255,6 +258,10 @@ #define AMDID_COREID_SIZE 0x0000f000 #define AMDID_COREID_SIZE_SHIFT 12 +#define CPUID_STDEXT_GSFSBASE 0x00000001 +#define CPUID_STDEXT_SMEP 0x00000080 +#define CPUID_STDEXT_INVPCID 0x00000400 + /* * CPUID manufacturers identifiers */ diff --git a/sys/kern/subr_unit.c b/sys/kern/subr_unit.c index 7f90a02..ac2b871 100644 --- a/sys/kern/subr_unit.c +++ b/sys/kern/subr_unit.c @@ -68,8 +68,8 @@ */ #include -#include #include +#include #ifdef _KERNEL @@ -187,22 +187,6 @@ CTASSERT(sizeof(struct unr) == sizeof(struct unrb)); /* Number of bits in the bitmap */ #define NBITS ((int)sizeof(((struct unrb *)NULL)->map) * 8) -/* Header element for a unr number space. */ - -struct unrhdr { - TAILQ_HEAD(unrhd,unr) head; - u_int low; /* Lowest item */ - u_int high; /* Highest item */ - u_int busy; /* Count of allocated items */ - u_int alloc; /* Count of memory allocations */ - u_int first; /* items in allocated from start */ - u_int last; /* items free at end */ - struct mtx *mtx; - TAILQ_HEAD(unrfr,unr) ppfree; /* Items to be freed after mtx - lock dropped */ -}; - - #if defined(DIAGNOSTIC) || !defined(_KERNEL) /* * Consistency check function. @@ -315,20 +299,12 @@ clean_unrhdr(struct unrhdr *uh) mtx_unlock(uh->mtx); } -/* - * Allocate a new unrheader set. - * - * Highest and lowest valid values given as parameters. - */ - -struct unrhdr * -new_unrhdr(int low, int high, struct mtx *mutex) +void +init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex) { - struct unrhdr *uh; KASSERT(low >= 0 && low <= high, ("UNR: use error: new_unrhdr(%d, %d)", low, high)); - uh = Malloc(sizeof *uh); if (mutex != NULL) uh->mtx = mutex; else @@ -340,6 +316,21 @@ new_unrhdr(int low, int high, struct mtx *mutex) uh->first = 0; uh->last = 1 + (high - low); check_unrhdr(uh, __LINE__); +} + +/* + * Allocate a new unrheader set. + * + * Highest and lowest valid values given as parameters. + */ + +struct unrhdr * +new_unrhdr(int low, int high, struct mtx *mutex) +{ + struct unrhdr *uh; + + uh = Malloc(sizeof *uh); + init_unrhdr(uh, low, high, mutex); return (uh); } diff --git a/sys/sys/_unrhdr.h b/sys/sys/_unrhdr.h new file mode 100644 index 0000000..f3c25d1 --- /dev/null +++ b/sys/sys/_unrhdr.h @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2004 Poul-Henning Kamp + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_UNRHDR_H +#define _SYS_UNRHDR_H + +#include + +struct mtx; + +/* Header element for a unr number space. */ + +struct unrhdr { + TAILQ_HEAD(unrhd,unr) head; + u_int low; /* Lowest item */ + u_int high; /* Highest item */ + u_int busy; /* Count of allocated items */ + u_int alloc; /* Count of memory allocations */ + u_int first; /* items in allocated from start */ + u_int last; /* items free at end */ + struct mtx *mtx; + TAILQ_HEAD(unrfr,unr) ppfree; /* Items to be freed after mtx + lock dropped */ +}; + +#endif diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h index 3b2fdbb..9b6a533 100644 --- a/sys/sys/cpuset.h +++ b/sys/sys/cpuset.h @@ -139,6 +139,13 @@ (s)->__bits[__i]); \ } while (0) +#define CPU_AND_ATOMIC(d, s) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_clear_long(&(d)->__bits[__i], \ + ~(s)->__bits[__i]); \ +} while (0) + #define CPU_COPY_STORE_REL(f, t) do { \ __size_t __i; \ for (__i = 0; __i < _NCPUWORDS; __i++) \ diff --git a/sys/sys/systm.h b/sys/sys/systm.h index e5d60d4..b3eaabb 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -375,6 +375,7 @@ int root_mounted(void); */ struct unrhdr; struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex); +void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex); void delete_unrhdr(struct unrhdr *uh); void clean_unrhdr(struct unrhdr *uh); void clean_unrhdrl(struct unrhdr *uh);