diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 6465247..876d39e 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -43,6 +43,12 @@
 
 #include "assym.s"
 
+#ifdef SMP
+#define LK	lock ;
+#else
+#define LK
+#endif
+
 /*
  * I/O Interrupt Entry Point.  Rather than having one entry point for
  * each interrupt source, we use one entry point for each 32-bit word
@@ -133,6 +139,38 @@ IDTVEC(errorint)
  * Global address space TLB shootdown.
  */
 	.text
+
+#define	NAKE_INTR_CS	24
+
+	SUPERALIGN_TEXT
+global_invltlb:
+	movl	%cr4,%eax
+	andl	$~0x80,%eax
+	movl	%eax,%cr4
+	orl	$0x80,%eax
+	movl	%eax,%cr4
+invltlb_ret_clear_pm_save:
+	movq	smp_tlb_pmap,%rdx
+	testq	%rdx,%rdx
+	jz	invltlb_ret
+	testb	$SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
+	jz	1f
+	swapgs
+1:
+	movl	PCPU(CPUID),%eax
+	jz	2f
+	swapgs
+2:
+	LK btcl	%eax,PM_SAVE(%rdx)
+	SUPERALIGN_TEXT
+invltlb_ret:
+	movq	lapic, %rax
+	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	LK incl	smp_tlb_wait
+	popq	%rdx
+	popq	%rax
+	jmp	doreti_iret
+
 	SUPERALIGN_TEXT
 IDTVEC(invltlb)
 #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
@@ -149,18 +187,44 @@ IDTVEC(invltlb)
 #endif
 
 	pushq	%rax
+	pushq	%rdx
 
-	movq	%cr3, %rax		/* invalidate the TLB */
-	movq	%rax, %cr3
-
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
-
-	lock
-	incl	smp_tlb_wait
-
-	popq	%rax
-	jmp	doreti_iret
+	movq	%cr3,%rax
+	cmpl	$0,pmap_pcid_enabled
+	je	2f
+
+	movq	$smp_tlb_invpcid,%rdx
+	cmpl	$0,(%rdx)
+	je	global_invltlb
+	cmpl	$-1,(%rdx)
+	je	global_invltlb
+
+	/*
+	 * Non-zero smp_tlb_invpcid, only invalidate TLB for entries with
+	 * current PCID.
+	 */
+	cmpl	$0,invpcid_works
+	je	1f
+	/* Use invpcid if available. */
+	movl	$1,%eax /* INVPCID_CTX */
+	/* invpcid (%rdx),%rax */
+	.byte 0x66,0x0f,0x38,0x82,0x02
+	jmp	invltlb_ret_clear_pm_save
+1:
+	/* Otherwise reload %cr3 twice. */
+	movq	pcid_cr3,%rdx
+	cmpq	%rax,%rdx
+	je	2f
+	movq	%rdx,%cr3	/* Invalidate, bit 63 is zero. */
+	btsq	$63,%rax
+
+	/*
+	 * Invalidate the TLB if PCID is not enabled.
+	 * Restore the old address space.
+	 */
+2:
+	movq	%rax,%cr3
+	jmp	invltlb_ret_clear_pm_save
 
 /*
  * Single page TLB shootdown
@@ -182,18 +246,54 @@ IDTVEC(invlpg)
 #endif
 
 	pushq	%rax
-
-	movq	smp_tlb_addr1, %rax
-	invlpg	(%rax)			/* invalidate single page */
-
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
-
-	lock
-	incl	smp_tlb_wait
-
-	popq	%rax
-	jmp	doreti_iret
+	pushq	%rdx
+	movq	$smp_tlb_invpcid,%rdx
+	cmpl	$0,pmap_pcid_enabled
+	je	3f
+	cmpl	$0,invpcid_works
+	jne	2f
+
+	/* kernel pmap - use invlpg to invalidate global mapping */
+	cmpl	$0,(%rdx)
+	je	3f
+	cmpl	$-1,(%rdx)
+	je	global_invltlb
+
+	/*
+	 * PCID supported, but INVPCID is not.
+	 * Temporarily switch to the target address space and do INVLPG.
+	 */
+	pushq	%rcx
+	movq	%cr3,%rcx
+	movq	pcid_cr3,%rax
+	cmp	%rcx,%rax
+	je	1f
+	btsq	$63,%rax
+	movq	%rax,%cr3
+1:	movq	8(%rdx),%rax
+	invlpg	(%rax)
+	btsq	$63,%rcx
+	movq	%rcx,%cr3
+	popq	%rcx
+	jmp	invltlb_ret
+
+	/*
+	 * Invalidate the TLB entry using INVPCID_ADDR.
+	 */
+2:
+	xorl	%eax,%eax
+/*	invpcid	(%rdx),%rax */
+	.byte	0x66,0x0f,0x38,0x82,0x02
+	jmp	invltlb_ret
+
+	/*
+	 * PCID is not supported or kernel pmap.
+	 * Invalidate single page using INVLPG.
+	 */
+3:
+	movq	8(%rdx),%rax
+	invlpg	(%rax)
+	jmp	invltlb_ret
 
 /*
  * Page range TLB shootdown.
@@ -216,23 +316,76 @@ IDTVEC(invlrng)
 
 	pushq	%rax
 	pushq	%rdx
-
-	movq	smp_tlb_addr1, %rdx
-	movq	smp_tlb_addr2, %rax
+	movq	$smp_tlb_invpcid,%rdx
+	cmpl	$0,pmap_pcid_enabled
+	jne	invlrng_single_page
+	cmpl	$0,invpcid_works
+	jne	invlrng_invpcid
+
+	/* kernel pmap - use invlpg to invalidate global mapping */
+	cmpl	$0,(%rdx)
+	je	invlrng_single_page
+	cmpl	$-1,(%rdx)
+	je	global_invltlb
+
+	pushq	%rcx
+	movq	%cr3,%rcx
+	movq	pcid_cr3,%rax
+	cmpq	%rcx,%rax
+	je	1f
+	btsq	$63,%rax
+	movq	%rax,%cr3
+1:
+	movq	8(%rdx),%rdx
+	movq	smp_tlb_addr2,%rax
+2:
+	invlpg	(%rdx)
+	addq	$PAGE_SIZE,%rdx
+	cmpq	%rax,%rdx
+	jb	2b
+	btsq	$63,%rcx
+	movq	%rcx,%cr3
+	popq	%rcx
+	jmp	invltlb_ret
+
+invlrng_invpcid:
+	testb	$SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
+	jz	1f
+	swapgs
+1:
+	pushq	%rcx
+	movq	(%rdx),%rcx
+	movq	%rcx,PCPU(INVPCID_DESCR)
+	movq	8(%rdx),%rax
+	movq	%rax,PCPU(INVPCID_DESCR)+8
+	movq	smp_tlb_addr2,%rcx
+	xorl	%eax,%eax
+	movq	$PC_INVPCID_DESCR,%rdx
+	gs
+	subq	8(%rdx),%rcx
+	shrq	$PAGE_SHIFT,%rcx
+2:
+	gs
+//	invpcid	(%rdx),%rax
+	.byte	0x66,0x0f,0x38,0x82,0x02
+	gs
+	addq	$PAGE_SIZE,8(%rdx)
+	dec	%rcx
+	jne	2b
+	popq	%rcx
+	testb	$SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
+	jz	invltlb_ret
+	swapgs
+	jmp	invltlb_ret
+
+invlrng_single_page:
+	movq	8(%rdx),%rdx
+	movq	smp_tlb_addr2,%rax
 1:	invlpg	(%rdx)			/* invalidate single page */
-	addq	$PAGE_SIZE, %rdx
-	cmpq	%rax, %rdx
+	addq	$PAGE_SIZE,%rdx
+	cmpq	%rax,%rdx
 	jb	1b
-
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
-
-	lock
-	incl	smp_tlb_wait
-
-	popq	%rdx
-	popq	%rax
-	jmp	doreti_iret
+	jmp	invltlb_ret
 
 /*
  * Invalidate cache.
@@ -249,17 +402,9 @@ IDTVEC(invlcache)
 #endif
 
 	pushq	%rax
-
+	pushq	%rdx
 	wbinvd
-
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
-
-	lock
-	incl	smp_tlb_wait
-
-	popq	%rax
-	jmp	doreti_iret
+	jmp	invltlb_ret
 
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index ed1ccb5..ac30990 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -77,8 +77,7 @@ ENTRY(cpu_throw)
 	LK btrl	%eax,PM_ACTIVE(%rdx)		/* clear old */
 1:
 	movq	TD_PCB(%rsi),%r8		/* newtd->td_pcb */
-	movq	PCB_CR3(%r8),%rdx
-	movq	%rdx,%cr3			/* new address space */
+	movq	PCB_CR3(%r8),%rcx		/* new address space */
 	jmp	swact
 END(cpu_throw)
 
@@ -145,20 +144,41 @@ ctx_switch_xsave:
 	SETLK	%rdx, TD_LOCK(%rdi)		/* Release the old thread */
 	jmp	sw1
 swinact:
-	movq	%rcx,%cr3			/* new address space */
-	movl	PCPU(CPUID), %eax
+	movl	PCPU(CPUID),%eax
 	/* Release bit from old pmap->pm_active */
-	movq	PCPU(CURPMAP),%rcx
-	LK btrl	%eax,PM_ACTIVE(%rcx)		/* clear old */
-	SETLK	%rdx, TD_LOCK(%rdi)		/* Release the old thread */
+	movq	PCPU(CURPMAP),%r12
+	LK btrl	%eax,PM_ACTIVE(%r12)		/* clear old */
+	SETLK	%rdx,TD_LOCK(%rdi)		/* Release the old thread */
 swact:
 	/* Set bit in new pmap->pm_active */
 	movq	TD_PROC(%rsi),%rdx		/* newproc */
 	movq	P_VMSPACE(%rdx), %rdx
 	addq	$VM_PMAP,%rdx
+	cmpl	$-1,PM_PCID(%rdx)
+	je	1f
+	LK btsl	%eax,PM_SAVE(%rdx)
+	jnc	1f
+	btsq	$63,%rcx			/* CR3_PCID_SAVE */
+	incq	PCPU(PM_SAVE_CNT)
+1:
+	movq	%rcx,%cr3			/* new address space */
 	LK btsl	%eax,PM_ACTIVE(%rdx)		/* set new */
 	movq	%rdx,PCPU(CURPMAP)
 
+	/*
+	 * We might lose the race and other CPU might have changed
+	 * the pmap after we set our bit in pmap->pm_save.  Recheck.
+	 * Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was
+	 * modified, causing TLB flush for this pcid.
+	 */
+	btrq	$63,%rcx
+	jnc	1f
+	LK btsl	%eax,PM_SAVE(%rdx)
+	jc	1f
+	decq	PCPU(PM_SAVE_CNT)
+	movq	%rcx,%cr3
+1:
+
 sw1:
 #if defined(SCHED_ULE) && defined(SMP)
 	/* Wait for the new thread to become unblocked */
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 3043bb5..62017e7 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$");
 ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
 ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
+ASSYM(PM_SAVE, offsetof(struct pmap, pm_save));
+ASSYM(PM_PCID, offsetof(struct pmap, pm_pcid));
 
 ASSYM(P_MD, offsetof(struct proc, p_md));
 ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
@@ -225,6 +227,8 @@ ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p));
 ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt));
 ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp));
 ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
+ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
+ASSYM(PC_INVPCID_DESCR, offsetof(struct pcpu, pc_invpcid_descr));
  
 ASSYM(LA_VER, offsetof(struct LAPIC, version));
 ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 7a39ef8..deca6a6 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1903,7 +1903,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
-	thread0.td_pcb->pcb_cr3 = KPML4phys;
+	thread0.td_pcb->pcb_cr3 = KPML4phys; /* PCID 0 is reserved for kernel */
 	thread0.td_frame = &proc0_tf;
 
         env = getenv("kernelname");
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 79aeb9c..2f1df0a 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -103,9 +103,11 @@ struct pcb stoppcbs[MAXCPU];
 struct pcb **susppcbs;
 
 /* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1;
 vm_offset_t smp_tlb_addr2;
+struct invpcid_descr smp_tlb_invpcid;
 volatile int smp_tlb_wait;
+uint64_t pcid_cr3;
+pmap_t smp_tlb_pmap;
 
 #ifdef COUNT_IPIS
 /* Interrupt counts. */
@@ -599,6 +601,8 @@ cpu_mp_announce(void)
 	}
 }
 
+extern int pmap_pcid_enabled;
+
 /*
  * AP CPU's call this to initialize themselves.
  */
@@ -759,6 +763,8 @@ init_secondary(void)
 	 */
 
 	load_cr4(rcr4() | CR4_PGE);
+	if (pmap_pcid_enabled)
+		load_cr4(rcr4() | CR4_PCIDE);
 	load_ds(_udatasel);
 	load_es(_udatasel);
 	load_fs(_ufssel);
@@ -1110,7 +1116,8 @@ ipi_send_cpu(int cpu, u_int ipi)
  * Flush the TLB on all other CPU's
  */
 static void
-smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1,
+    vm_offset_t addr2)
 {
 	u_int ncpu;
 
@@ -1120,8 +1127,16 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 	if (!(read_rflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
-	smp_tlb_addr1 = addr1;
+	smp_tlb_invpcid.addr = addr1;
+	if (pmap == NULL) {
+		smp_tlb_invpcid.pcid = 0;
+	} else {
+		smp_tlb_invpcid.pcid = pmap->pm_pcid;
+		pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) |
+		    (pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid);
+	}
 	smp_tlb_addr2 = addr2;
+	smp_tlb_pmap = pmap;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	ipi_all_but_self(vector);
 	while (smp_tlb_wait < ncpu)
@@ -1130,7 +1145,8 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 }
 
 static void
-smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
+    vm_offset_t addr1, vm_offset_t addr2)
 {
 	int cpu, ncpu, othercpus;
 
@@ -1146,8 +1162,16 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_of
 	if (!(read_rflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
-	smp_tlb_addr1 = addr1;
+	smp_tlb_invpcid.addr = addr1;
+	if (pmap == NULL) {
+		smp_tlb_invpcid.pcid = 0;
+	} else {
+		smp_tlb_invpcid.pcid = pmap->pm_pcid;
+		pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) |
+		    (pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid);
+	}
 	smp_tlb_addr2 = addr2;
+	smp_tlb_pmap = pmap;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	if (CPU_ISFULLSET(&mask)) {
 		ncpu = othercpus;
@@ -1173,15 +1197,15 @@ smp_cache_flush(void)
 {
 
 	if (smp_started)
-		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
+		smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
 }
 
 void
-smp_invltlb(void)
+smp_invltlb(pmap_t pmap)
 {
 
 	if (smp_started) {
-		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+		smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_global++;
 #endif
@@ -1189,11 +1213,11 @@ smp_invltlb(void)
 }
 
 void
-smp_invlpg(vm_offset_t addr)
+smp_invlpg(pmap_t pmap, vm_offset_t addr)
 {
 
 	if (smp_started) {
-		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+		smp_tlb_shootdown(IPI_INVLPG, pmap, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_page++;
 #endif
@@ -1201,11 +1225,11 @@ smp_invlpg(vm_offset_t addr)
 }
 
 void
-smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
+smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
-		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+		smp_tlb_shootdown(IPI_INVLRNG, pmap, addr1, addr2);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_range++;
 		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
@@ -1214,11 +1238,11 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 }
 
 void
-smp_masked_invltlb(cpuset_t mask)
+smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
 {
 
 	if (smp_started) {
-		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, NULL, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_global++;
 #endif
@@ -1226,11 +1250,11 @@ smp_masked_invltlb(cpuset_t mask)
 }
 
 void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, pmap_t pmap, vm_offset_t addr)
 {
 
 	if (smp_started) {
-		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_page++;
 #endif
@@ -1238,11 +1262,13 @@ smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 }
 
 void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
+    vm_offset_t addr2)
 {
 
 	if (smp_started) {
-		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1,
+		    addr2);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_range++;
 		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 7fb1277..3a471b9 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -116,11 +116,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
-#ifdef SMP
+#include <sys/_unrhdr.h>
 #include <sys/smp.h>
-#else
-#include <sys/cpuset.h>
-#endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -250,6 +247,53 @@ static struct md_page *pv_table;
 pt_entry_t *CMAP1 = 0;
 caddr_t CADDR1 = 0;
 
+static struct unrhdr pcid_unr;
+static struct mtx pcid_mtx;
+int pmap_pcid_enabled = 1;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN, &pmap_pcid_enabled,
+    0, "Is TLB Context ID enabled ?");
+int invpcid_works = 0;
+
+/*
+ * Perform the guaranteed invalidation of all TLB entries.  This
+ * includes the global entries, and entries in all PCIDs, not only the
+ * current context.  The function works both on non-PCID CPUs and CPUs
+ * with the PCID turned off or on.  See IA-32 SDM Vol. 3a 4.10.4.1
+ * Operations that Invalidate TLBs and Paging-Structure Caches.
+ */
+static __inline void
+invltlb_globpcid(void)
+{
+	uint64_t cr4;
+
+	cr4 = rcr4();
+	load_cr4(cr4 & ~CR4_PGE);
+	/*
+	 * Although preemption at this point could be detrimental to
+	 * performance, it would not lead to an error.  PG_G is simply
+	 * ignored if CR4.PGE is clear.  Moreover, in case this block
+	 * is re-entered, the load_cr4() either above or below will
+	 * modify CR4.PGE flushing the TLB.
+	 */
+	load_cr4(cr4 | CR4_PGE);
+}
+
+static int
+pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
+{
+	int i;
+	uint64_t res;
+
+	res = 0;
+	CPU_FOREACH(i) {
+		res += cpuid_to_pcpu[i]->pc_pm_save_cnt;
+	}
+	return (sysctl_handle_64(oidp, &res, 0, req));
+}
+SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW |
+    CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
+    "Count of saved TLB context on switch");
+
 /*
  * Crashdump maps.
  */
@@ -685,6 +729,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
+	CPU_ZERO(&kernel_pmap->pm_save);
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 
  	/*
@@ -716,6 +761,19 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 
 	/* Initialize the PAT MSR. */
 	pmap_init_pat();
+
+	/* Initialize TLB Context Id. */
+	TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
+	if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
+		load_cr4(rcr4() | CR4_PCIDE);
+		mtx_init(&pcid_mtx, "pcid", NULL, MTX_DEF);
+		init_unrhdr(&pcid_unr, 1, (1 << 12) - 1, &pcid_mtx);
+		/* Check for INVPCID support */
+		invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
+		    != 0;
+		kernel_pmap->pm_pcid = 0;
+	} else
+		pmap_pcid_enabled = 0;
 }
 
 /*
@@ -952,7 +1010,6 @@ pmap_cache_bits(int mode, boolean_t is_pde)
 static void
 pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 {
-	u_long cr4;
 
 	if ((newpde & PG_PS) == 0)
 		/* Demotion: flush a specific 2MB page mapping. */
@@ -968,19 +1025,34 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 		 * Promotion: flush every 4KB page mapping from the TLB,
 		 * including any global (PG_G) mappings.
 		 */
-		cr4 = rcr4();
-		load_cr4(cr4 & ~CR4_PGE);
-		/*
-		 * Although preemption at this point could be detrimental to
-		 * performance, it would not lead to an error.  PG_G is simply
-		 * ignored if CR4.PGE is clear.  Moreover, in case this block
-		 * is re-entered, the load_cr4() either above or below will
-		 * modify CR4.PGE flushing the TLB.
-		 */
-		load_cr4(cr4 | CR4_PGE);
+		invltlb_globpcid();
 	}
 }
 #ifdef SMP
+
+static void
+pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va)
+{
+	struct invpcid_descr d;
+	uint64_t cr3;
+
+	if (invpcid_works) {
+		d.pcid = pmap->pm_pcid;
+		d.pad = 0;
+		d.addr = va;
+		invpcid(&d, INVPCID_ADDR);
+		return;
+	}
+
+	cr3 = rcr3();
+	critical_enter();
+	load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid |
+	    CR3_PCID_SAVE);
+	invlpg(va);
+	load_cr3(cr3 | CR3_PCID_SAVE);
+	critical_exit();
+}
+
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
  *
@@ -1008,21 +1080,68 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 
 	sched_pin();
 	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
-		invlpg(va);
-		smp_invlpg(va);
+		if (!pmap_pcid_enabled) {
+			invlpg(va);
+		} else {
+			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
+				if (pmap == vmspace_pmap(curproc->p_vmspace))
+					invlpg(va);
+				else
+					pmap_invalidate_page_pcid(pmap, va);
+			} else {
+				invltlb_globpcid();
+			}
+		}
+		smp_invlpg(pmap, va);
 	} else {
 		cpuid = PCPU_GET(cpuid);
 		other_cpus = all_cpus;
 		CPU_CLR(cpuid, &other_cpus);
 		if (CPU_ISSET(cpuid, &pmap->pm_active))
 			invlpg(va);
-		CPU_AND(&other_cpus, &pmap->pm_active);
+		else if (pmap_pcid_enabled) {
+			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
+				pmap_invalidate_page_pcid(pmap, va);
+			else
+				invltlb_globpcid();
+		}
+		if (pmap_pcid_enabled)
+			CPU_AND(&other_cpus, &pmap->pm_save);
+		else
+			CPU_AND(&other_cpus, &pmap->pm_active);
 		if (!CPU_EMPTY(&other_cpus))
-			smp_masked_invlpg(other_cpus, va);
+			smp_masked_invlpg(other_cpus, pmap, va);
 	}
 	sched_unpin();
 }
 
+static void
+pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	struct invpcid_descr d;
+	uint64_t cr3;
+	vm_offset_t addr;
+
+	if (invpcid_works) {
+		d.pcid = pmap->pm_pcid;
+		d.pad = 0;
+		for (addr = sva; addr < eva; addr += PAGE_SIZE) {
+			d.addr = addr;
+			invpcid(&d, INVPCID_ADDR);
+		}
+		return;
+	}
+
+	cr3 = rcr3();
+	critical_enter();
+	load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid |
+	    CR3_PCID_SAVE);
+	for (addr = sva; addr < eva; addr += PAGE_SIZE)
+		invlpg(addr);
+	load_cr3(cr3 | CR3_PCID_SAVE);
+	critical_exit();
+}
+
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
@@ -1032,19 +1151,43 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 
 	sched_pin();
 	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
-		for (addr = sva; addr < eva; addr += PAGE_SIZE)
-			invlpg(addr);
-		smp_invlpg_range(sva, eva);
+		if (!pmap_pcid_enabled) {
+			for (addr = sva; addr < eva; addr += PAGE_SIZE)
+				invlpg(addr);
+		} else {
+			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
+				if (pmap == vmspace_pmap(curproc->p_vmspace)) {
+					for (addr = sva; addr < eva;
+					    addr += PAGE_SIZE)
+						invlpg(addr);
+				} else {
+					pmap_invalidate_range_pcid(pmap,
+					    sva, eva);
+				}
+			} else {
+				invltlb_globpcid();
+			}
+		}
+		smp_invlpg_range(pmap, sva, eva);
 	} else {
 		cpuid = PCPU_GET(cpuid);
 		other_cpus = all_cpus;
 		CPU_CLR(cpuid, &other_cpus);
-		if (CPU_ISSET(cpuid, &pmap->pm_active))
+		if (CPU_ISSET(cpuid, &pmap->pm_active)) {
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
-		CPU_AND(&other_cpus, &pmap->pm_active);
+		} else if (pmap_pcid_enabled) {
+			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
+				pmap_invalidate_range_pcid(pmap, sva, eva);
+			else
+				invltlb_globpcid();
+		}
+		if (pmap_pcid_enabled)
+			CPU_AND(&other_cpus, &pmap->pm_save);
+		else
+			CPU_AND(&other_cpus, &pmap->pm_active);
 		if (!CPU_EMPTY(&other_cpus))
-			smp_masked_invlpg_range(other_cpus, sva, eva);
+			smp_masked_invlpg_range(other_cpus, pmap, sva, eva);
 	}
 	sched_unpin();
 }
@@ -1053,21 +1196,63 @@ void
 pmap_invalidate_all(pmap_t pmap)
 {
 	cpuset_t other_cpus;
+	struct invpcid_descr d;
+	uint64_t cr3;
 	u_int cpuid;
 
 	sched_pin();
-	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
-		invltlb();
-		smp_invltlb();
+	cpuid = PCPU_GET(cpuid);
+	if (pmap == kernel_pmap ||
+	    (pmap_pcid_enabled && !CPU_CMP(&pmap->pm_save, &all_cpus)) ||
+	    !CPU_CMP(&pmap->pm_active, &all_cpus)) {
+		if (invpcid_works) {
+			bzero(&d, sizeof(d));
+			invpcid(&d, INVPCID_CTXGLOB);
+		} else {
+			invltlb_globpcid();
+		}
+		CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
+		smp_invltlb(pmap);
 	} else {
-		cpuid = PCPU_GET(cpuid);
 		other_cpus = all_cpus;
 		CPU_CLR(cpuid, &other_cpus);
-		if (CPU_ISSET(cpuid, &pmap->pm_active))
+
+		/*
+		 * This logic is duplicated in the Xinvltlb shootdown
+		 * IPI handler.
+		 */
+		if (pmap_pcid_enabled) {
+			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
+				if (invpcid_works) {
+					d.pcid = pmap->pm_pcid;
+					d.pad = 0;
+					d.addr = 0;
+					invpcid(&d, INVPCID_CTX);
+				} else {
+					cr3 = rcr3();
+					critical_enter();
+
+					/*
+					 * Bit 63 is clear, pcid TLB
+					 * entries are invalidated.
+					 */
+					load_cr3(DMAP_TO_PHYS((vm_offset_t)
+					    pmap->pm_pml4) | pmap->pm_pcid);
+					load_cr3(cr3 | CR3_PCID_SAVE);
+					critical_exit();
+				}
+			} else {
+				invltlb_globpcid();
+			}
+		} else if (CPU_ISSET(cpuid, &pmap->pm_active))
 			invltlb();
-		CPU_AND(&other_cpus, &pmap->pm_active);
+		CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
+		if (pmap_pcid_enabled)
+			CPU_AND(&other_cpus, &pmap->pm_save);
+		else
+			CPU_AND(&other_cpus, &pmap->pm_active);
 		if (!CPU_EMPTY(&other_cpus))
-			smp_masked_invltlb(other_cpus);
+			smp_masked_invltlb(other_cpus, pmap);
 	}
 	sched_unpin();
 }
@@ -1129,8 +1314,10 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 	CPU_CLR(cpuid, &other_cpus);
 	if (pmap == kernel_pmap)
 		active = all_cpus;
-	else
+	else {
 		active = pmap->pm_active;
+		CPU_AND_ATOMIC(&pmap->pm_save, &active);
+	}
 	if (CPU_OVERLAP(&active, &other_cpus)) { 
 		act.store = cpuid;
 		act.invalidate = active;
@@ -1193,6 +1380,8 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 	pde_store(pde, newpde);
 	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		pmap_update_pde_invalidate(va, newpde);
+	else
+		CPU_ZERO(&pmap->pm_save);
 }
 #endif /* !SMP */
 
@@ -1675,6 +1864,11 @@ pmap_pinit0(pmap_t pmap)
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+	if (pmap_pcid_enabled)
+		pmap->pm_pcid = 0;
+	else
+		pmap->pm_pcid = -1;
+	CPU_ZERO(&pmap->pm_save);
 }
 
 /*
@@ -1717,6 +1911,13 @@ pmap_pinit(pmap_t pmap)
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
+	if (pmap_pcid_enabled) {
+		pmap->pm_pcid = alloc_unr(&pcid_unr);
+		pmap_invalidate_all(pmap);
+	} else
+		pmap->pm_pcid = -1;
+	CPU_ZERO(&pmap->pm_save);
+
 	return (1);
 }
 
@@ -1957,6 +2158,14 @@ pmap_release(pmap_t pmap)
 	KASSERT(vm_radix_is_empty(&pmap->pm_root),
 	    ("pmap_release: pmap has reserved page table page(s)"));
 
+	if (pmap_pcid_enabled) {
+		/*
+		 * Invalidate any left TLB entries, to allow the reuse
+		 * of the pcid.
+		 */
+		pmap_invalidate_all(pmap);
+	}
+
 	m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
 
 	for (i = 0; i < NKPML4E; i++)	/* KVA */
@@ -1968,6 +2177,8 @@ pmap_release(pmap_t pmap)
 	m->wire_count--;
 	atomic_subtract_int(&cnt.v_wire_count, 1);
 	vm_page_free_zero(m);
+	if (pmap->pm_pcid != -1)
+		free_unr(&pcid_unr, pmap->pm_pcid);
 }
 
 static int
@@ -5612,15 +5823,20 @@ pmap_activate(struct thread *td)
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
+	CPU_ZERO(&pmap->pm_save);
 	cpuid = PCPU_GET(cpuid);
 #ifdef SMP
 	CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
 	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
+	CPU_SET_ATOMIC(cpuid, &pmap->pm_save);
 #else
 	CPU_CLR(cpuid, &oldpmap->pm_active);
 	CPU_SET(cpuid, &pmap->pm_active);
+	CPU_SET(cpuid, &pmap->pm_save);
 #endif
 	cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4);
+	if (pmap->pm_pcid != -1)
+		cr3 |= pmap->pm_pcid;
 	td->td_pcb->pcb_cr3 = cr3;
 	load_cr3(cr3);
 	PCPU_SET(curpmap, pmap);
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index ed0e7e9..3764f72 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -221,6 +221,8 @@ cpu_fork(td1, p2, td2, flags)
 	 */
 	pmap2 = vmspace_pmap(p2->p_vmspace);
 	pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4);
+	if (pmap2->pm_pcid != -1)
+		pcb2->pcb_cr3 |= pmap2->pm_pcid;
 	pcb2->pcb_r12 = (register_t)fork_return;	/* fork_trampoline argument */
 	pcb2->pcb_rbp = 0;
 	pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 881fcd2..3d381c6 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -472,6 +472,26 @@ invlpg(u_long addr)
 	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
 }
 
+#define	INVPCID_ADDR	0
+#define	INVPCID_CTX	1
+#define	INVPCID_CTXGLOB	2
+#define	INVPCID_ALLCTX	3
+
+struct invpcid_descr {
+	uint64_t	pcid:12 __packed;
+	uint64_t	pad:52 __packed;
+	uint64_t	addr;
+} __packed;
+
+static __inline void
+invpcid(struct invpcid_descr *d, int type)
+{
+
+	/* invpcid (%rdx),%rax */
+	__asm __volatile(".byte 0x66,0x0f,0x38,0x82,0x02"
+	    : : "d" (d), "a" ((u_long)type) : "memory");
+}
+
 static __inline u_short
 rfs(void)
 {
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index ba4c618..44b3bab 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -76,6 +76,8 @@
 	struct system_segment_descriptor *pc_ldt;			\
 	/* Pointer to the CPU TSS descriptor */				\
 	struct system_segment_descriptor *pc_tss;			\
+	uint64_t	pc_pm_save_cnt;					\
+	char		pc_invpcid_descr[16];				\
 	u_int	pc_cmci_mask		/* MCx banks for CMCI */	\
 	PCPU_XEN_FIELDS;						\
 	uint64_t pc_dbreg[16];		/* ddb debugging regs */	\
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index aacb9ba..fa42389 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -240,6 +240,8 @@ struct pmap {
 	pml4_entry_t		*pm_pml4;	/* KVA of level 4 page table */
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
 	cpuset_t		pm_active;	/* active on cpus */
+	cpuset_t		pm_save;	/* Context valid on cpus mask */
+	int			pm_pcid;	/* context id */
 	/* spare u_int here due to padding */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	struct vm_radix		pm_root;	/* spare page table pages */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index 16d87ea..d6cd476 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -54,6 +54,8 @@ inthand_t
 	IDTVEC(cpususpend),	/* CPU suspends & waits to be resumed */
 	IDTVEC(rendezvous);	/* handle CPU rendezvous */
 
+struct pmap;
+
 /* functions in mp_machdep.c */
 void	cpu_add(u_int apic_id, char boot_cpu);
 void	cpustop_handler(void);
@@ -67,13 +69,14 @@ int	ipi_nmi_handler(void);
 void	ipi_selected(cpuset_t cpus, u_int ipi);
 u_int	mp_bootaddress(u_int);
 void	smp_cache_flush(void);
-void	smp_invlpg(vm_offset_t addr);
-void	smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
-void	smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
-void	smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
+void	smp_invlpg(struct pmap *pmap, vm_offset_t addr);
+void	smp_masked_invlpg(cpuset_t mask, struct pmap *pmap, vm_offset_t addr);
+void	smp_invlpg_range(struct pmap *pmap, vm_offset_t startva,
 	    vm_offset_t endva);
-void	smp_invltlb(void);
-void	smp_masked_invltlb(cpuset_t mask);
+void	smp_masked_invlpg_range(cpuset_t mask, struct pmap *pmap,
+	    vm_offset_t startva, vm_offset_t endva);
+void	smp_invltlb(struct pmap *pmap);
+void	smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
 
 #endif /* !LOCORE */
 #endif /* SMP */
diff --git a/sys/kern/subr_unit.c b/sys/kern/subr_unit.c
index 9cf1781..3bf7aaf 100644
--- a/sys/kern/subr_unit.c
+++ b/sys/kern/subr_unit.c
@@ -68,8 +68,8 @@
  */
 
 #include <sys/types.h>
-#include <sys/queue.h>
 #include <sys/bitstring.h>
+#include <sys/_unrhdr.h>
 
 #ifdef _KERNEL
 
@@ -187,22 +187,6 @@ CTASSERT(sizeof(struct unr) == sizeof(struct unrb));
 /* Number of bits in the bitmap */
 #define NBITS	((int)sizeof(((struct unrb *)NULL)->map) * 8)
 
-/* Header element for a unr number space. */
-
-struct unrhdr {
-	TAILQ_HEAD(unrhd,unr)	head;
-	u_int			low;	/* Lowest item */
-	u_int			high;	/* Highest item */
-	u_int			busy;	/* Count of allocated items */
-	u_int			alloc;	/* Count of memory allocations */
-	u_int			first;	/* items in allocated from start */
-	u_int			last;	/* items free at end */
-	struct mtx		*mtx;
-	TAILQ_HEAD(unrfr,unr)	ppfree;	/* Items to be freed after mtx
-					   lock dropped */
-};
-
-
 #if defined(DIAGNOSTIC) || !defined(_KERNEL)
 /*
  * Consistency check function.
@@ -315,20 +299,12 @@ clean_unrhdr(struct unrhdr *uh)
 	mtx_unlock(uh->mtx);
 }
 
-/*
- * Allocate a new unrheader set.
- *
- * Highest and lowest valid values given as parameters.
- */
-
-struct unrhdr *
-new_unrhdr(int low, int high, struct mtx *mutex)
+void
+init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex)
 {
-	struct unrhdr *uh;
 
 	KASSERT(low >= 0 && low <= high,
 	    ("UNR: use error: new_unrhdr(%d, %d)", low, high));
-	uh = Malloc(sizeof *uh);
 	if (mutex != NULL)
 		uh->mtx = mutex;
 	else
@@ -340,6 +316,21 @@ new_unrhdr(int low, int high, struct mtx *mutex)
 	uh->first = 0;
 	uh->last = 1 + (high - low);
 	check_unrhdr(uh, __LINE__);
+}
+
+/*
+ * Allocate a new unrheader set.
+ *
+ * Highest and lowest valid values given as parameters.
+ */
+
+struct unrhdr *
+new_unrhdr(int low, int high, struct mtx *mutex)
+{
+	struct unrhdr *uh;
+
+	uh = Malloc(sizeof *uh);
+	init_unrhdr(uh, low, high, mutex);
 	return (uh);
 }
 
diff --git a/sys/sys/_unrhdr.h b/sys/sys/_unrhdr.h
new file mode 100644
index 0000000..f3c25d1
--- /dev/null
+++ b/sys/sys/_unrhdr.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2004 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_UNRHDR_H
+#define _SYS_UNRHDR_H
+
+#include <sys/queue.h>
+
+struct mtx;
+
+/* Header element for a unr number space. */
+
+struct unrhdr {
+	TAILQ_HEAD(unrhd,unr)	head;
+	u_int			low;	/* Lowest item */
+	u_int			high;	/* Highest item */
+	u_int			busy;	/* Count of allocated items */
+	u_int			alloc;	/* Count of memory allocations */
+	u_int			first;	/* items in allocated from start */
+	u_int			last;	/* items free at end */
+	struct mtx		*mtx;
+	TAILQ_HEAD(unrfr,unr)	ppfree;	/* Items to be freed after mtx
+					   lock dropped */
+};
+
+#endif
diff --git a/sys/sys/bitset.h b/sys/sys/bitset.h
index dee5542..7c24ecd 100644
--- a/sys/sys/bitset.h
+++ b/sys/sys/bitset.h
@@ -135,7 +135,14 @@
 	atomic_set_long(&(p)->__bits[__bitset_word(_s, n)],		\
 	    __bitset_mask((_s), n))
 
-/* Convenience functions catering special cases. */ 
+/* Convenience functions catering special cases. */
+#define	BIT_AND_ATOMIC(_s, d, s) do {					\
+	__size_t __i;							\
+	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
+		atomic_clear_long(&(d)->__bits[__i],			\
+		    ~(s)->__bits[__i]);					\
+} while (0)
+
 #define	BIT_OR_ATOMIC(_s, d, s) do {					\
 	__size_t __i;							\
 	for (__i = 0; __i < __bitset_words((_s)); __i++)		\
diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h
index fc078d3..e1ee37d 100644
--- a/sys/sys/cpuset.h
+++ b/sys/sys/cpuset.h
@@ -55,6 +55,7 @@
 #define	CPU_NAND(d, s)			BIT_NAND(CPU_SETSIZE, d, s)
 #define	CPU_CLR_ATOMIC(n, p)		BIT_CLR_ATOMIC(CPU_SETSIZE, n, p)
 #define	CPU_SET_ATOMIC(n, p)		BIT_SET_ATOMIC(CPU_SETSIZE, n, p)
+#define	CPU_AND_ATOMIC(n, p)		BIT_AND_ATOMIC(CPU_SETSIZE, n, p)
 #define	CPU_OR_ATOMIC(d, s)		BIT_OR_ATOMIC(CPU_SETSIZE, d, s)
 #define	CPU_COPY_STORE_REL(f, t)	BIT_COPY_STORE_REL(CPU_SETSIZE, f, t)
 #define	CPU_FFS(p)			BIT_FFS(CPU_SETSIZE, p)
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 4887d71..e3ea9cf 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -396,6 +396,7 @@ int root_mounted(void);
  */
 struct unrhdr;
 struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex);
+void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex);
 void delete_unrhdr(struct unrhdr *uh);
 void clean_unrhdr(struct unrhdr *uh);
 void clean_unrhdrl(struct unrhdr *uh);