diff --git a/sys/arm/arm/vm_machdep.c b/sys/arm/arm/vm_machdep.c
index 1a907cc..6bd5799 100644
--- a/sys/arm/arm/vm_machdep.c
+++ b/sys/arm/arm/vm_machdep.c
@@ -119,9 +119,6 @@ cpu_fork(register struct thread *td1, register struct proc *p2,
 #ifdef __XSCALE__
 #ifndef CPU_XSCALE_CORE3
 	pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE);
-	if (td2->td_altkstack)
-		pmap_use_minicache(td2->td_altkstack, td2->td_altkstack_pages *
-		    PAGE_SIZE);
 #endif
 #endif
 	td2->td_pcb = pcb2;
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 4e2eaa9..03d8cbc 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 
 #include "opt_kdtrace.h"
 #include "opt_ktrace.h"
+#include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -276,25 +277,29 @@ norfproc_fail:
 
 	mem_charged = 0;
 	vm2 = NULL;
+	if (pages == 0)
+		pages = KSTACK_PAGES;
 	/* Allocate new proc. */
 	newproc = uma_zalloc(proc_zone, M_WAITOK);
-	if (TAILQ_EMPTY(&newproc->p_threads)) {
-		td2 = thread_alloc();
+	td2 = FIRST_THREAD_IN_PROC(newproc);
+	if (td2 == NULL) {
+		td2 = thread_alloc(pages);
 		if (td2 == NULL) {
 			error = ENOMEM;
 			goto fail1;
 		}
 		proc_linkup(newproc, td2);
-	} else
-		td2 = FIRST_THREAD_IN_PROC(newproc);
-
-	/* Allocate and switch to an alternate kstack if specified. */
-	if (pages != 0) {
-		if (!vm_thread_new_altkstack(td2, pages)) {
-			error = ENOMEM;
-			goto fail1;
+	} else {
+		if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) {
+			if (td2->td_kstack != 0)
+				vm_thread_dispose(td2);
+			if (!thread_alloc_stack(td2, pages)) {
+				error = ENOMEM;
+				goto fail1;
+			}
 		}
 	}
+
 	if ((flags & RFMEM) == 0) {
 		vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);
 		if (vm2 == NULL) {
diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index 1092832..3c5248e 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c
@@ -256,7 +256,7 @@ kthread_add(void (*func)(void *), void *arg, struct proc *p,
 	}
 
 	/* Initialize our new td  */
-	newtd = thread_alloc();
+	newtd = thread_alloc(pages);
 	if (newtd == NULL)
 		return (ENOMEM);
 
@@ -282,9 +282,6 @@ kthread_add(void (*func)(void *), void *arg, struct proc *p,
 
 	newtd->td_pflags |= TDP_KTHREAD;
 	newtd->td_ucred = crhold(p->p_ucred);
-	/* Allocate and switch to an alternate kstack if specified. */
-	if (pages != 0)
-		vm_thread_new_altkstack(newtd, pages);
 
 	/* this code almost the same as create_thread() in kern_thr.c */
 	PROC_LOCK(p);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index cdbc012..e012a3e 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -203,14 +203,6 @@ proc_dtor(void *mem, int size, void *arg)
 #endif
 		/* Free all OSD associated to this thread. */
 		osd_thread_exit(td);
-
-		/* Dispose of an alternate kstack, if it exists.
-		 * XXX What if there are more than one thread in the proc?
-		 *     The first thread in the proc is special and not
-		 *     freed, so you gotta do this here.
-		 */
-		if (((p->p_flag & P_KTHREAD) != 0) && (td->td_altkstack != 0))
-			vm_thread_dispose_altkstack(td);
 	}
 	EVENTHANDLER_INVOKE(process_dtor, p);
 	if (p->p_ksi != NULL)
@@ -767,8 +759,6 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
 		FOREACH_THREAD_IN_PROC(p, td0) {
 			if (!TD_IS_SWAPPED(td0))
 				kp->ki_rssize += td0->td_kstack_pages;
-			if (td0->td_altkstack_obj != NULL)
-				kp->ki_rssize += td0->td_altkstack_pages;
 		}
 		kp->ki_swrss = vm->vm_swrss;
 		kp->ki_tsize = vm->vm_tsize;
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index c478c63..630069b 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -176,7 +176,7 @@ create_thread(struct thread *td, mcontext_t *ctx,
 	}
 
 	/* Initialize our td */
-	newtd = thread_alloc();
+	newtd = thread_alloc(0);
 	if (newtd == NULL)
 		return (ENOMEM);
 
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index d47bd8c..4f3b32c 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -283,7 +283,7 @@ thread_reap(void)
  * Allocate a thread.
  */
 struct thread *
-thread_alloc(void)
+thread_alloc(int pages)
 {
 	struct thread *td;
 
@@ -291,7 +291,7 @@ thread_alloc(void)
 
 	td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
 	KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
-	if (!vm_thread_new(td, 0)) {
+	if (!vm_thread_new(td, pages)) {
 		uma_zfree(thread_zone, td);
 		return (NULL);
 	}
@@ -299,6 +299,17 @@ thread_alloc(void)
 	return (td);
 }
 
+int
+thread_alloc_stack(struct thread *td, int pages)
+{
+
+	KASSERT(td->td_kstack == 0,
+	    ("thread_alloc_stack called on a thread with kstack"));
+	if (!vm_thread_new(td, pages))
+		return (0);
+	cpu_thread_alloc(td);
+	return (1);
+}
 
 /*
  * Deallocate a thread.
@@ -312,8 +323,6 @@ thread_free(struct thread *td)
 		cpuset_rel(td->td_cpuset);
 	td->td_cpuset = NULL;
 	cpu_thread_free(td);
-	if (td->td_altkstack != 0)
-		vm_thread_dispose_altkstack(td);
 	if (td->td_kstack != 0)
 		vm_thread_dispose(td);
 	uma_zfree(thread_zone, td);
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index b65db62..6e57167 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -267,9 +267,6 @@ struct thread {
 	struct vm_object *td_kstack_obj;/* (a) Kstack object. */
 	vm_offset_t	td_kstack;	/* (a) Kernel VA of kstack. */
 	int		td_kstack_pages; /* (a) Size of the kstack. */
-	struct vm_object *td_altkstack_obj;/* (a) Alternate kstack object. */
-	vm_offset_t	td_altkstack;	/* (a) Kernel VA of alternate kstack. */
-	int		td_altkstack_pages; /* (a) Size of alternate kstack. */
 	volatile u_int	td_critnest;	/* (k*) Critical section nest level. */
 	struct mdthread td_md;		/* (k) Any machine-dependent fields. */
 	struct td_sched	*td_sched;	/* (*) Scheduler-specific data. */
@@ -850,7 +847,8 @@ void	cpu_thread_exit(struct thread *);
 void	cpu_thread_free(struct thread *);
 void	cpu_thread_swapin(struct thread *);
 void	cpu_thread_swapout(struct thread *);
-struct	thread *thread_alloc(void);
+struct	thread *thread_alloc(int pages);
+int	thread_alloc_stack(struct thread *, int pages);
 void	thread_exit(void) __dead2;
 void	thread_free(struct thread *td);
 void	thread_link(struct thread *td, struct proc *p);
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index 53f7694..65b6c8e 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -80,9 +80,7 @@ int vm_fault_quick(caddr_t v, int prot);
 struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
 void vm_imgact_unmap_page(struct sf_buf *sf);
 void vm_thread_dispose(struct thread *td);
-void vm_thread_dispose_altkstack(struct thread *td);
 int vm_thread_new(struct thread *td, int pages);
-int vm_thread_new_altkstack(struct thread *td, int pages);
 void vm_thread_swapin(struct thread *td);
 void vm_thread_swapout(struct thread *td);
 #endif				/* _KERNEL */
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 9e43a3f..851c733 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
+#include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/unistd.h>
@@ -308,6 +309,20 @@ vm_imgact_unmap_page(struct sf_buf *sf)
 	vm_page_unlock_queues();
 }
 
+struct kstack_cache_entry {
+	vm_object_t ksobj;
+	struct kstack_cache_entry *next_ks_entry;
+};
+
+static struct kstack_cache_entry *kstack_cache;
+static int kstack_cache_size = 128;
+static int kstacks;
+static struct mtx kstack_cache_mtx;
+SYSCTL_INT(_vm, OID_AUTO, kstack_cache_size, CTLFLAG_RW, &kstack_cache_size, 0,
+    "");
+SYSCTL_INT(_vm, OID_AUTO, kstacks, CTLFLAG_RD, &kstacks, 0,
+    "");
+
 #ifndef KSTACK_MAX_PAGES
 #define KSTACK_MAX_PAGES 32
 #endif
@@ -323,6 +338,7 @@ vm_thread_new(struct thread *td, int pages)
 	vm_object_t ksobj;
 	vm_offset_t ks;
 	vm_page_t m, ma[KSTACK_MAX_PAGES];
+	struct kstack_cache_entry *ks_ce;
 	int i;
 
 	/* Bounds check */
@@ -330,6 +346,22 @@ vm_thread_new(struct thread *td, int pages)
 		pages = KSTACK_PAGES;
 	else if (pages > KSTACK_MAX_PAGES)
 		pages = KSTACK_MAX_PAGES;
+
+	if (pages == KSTACK_PAGES) {
+		mtx_lock(&kstack_cache_mtx);
+		if (kstack_cache != NULL) {
+			ks_ce = kstack_cache;
+			kstack_cache = ks_ce->next_ks_entry;
+			mtx_unlock(&kstack_cache_mtx);
+
+			td->td_kstack_obj = ks_ce->ksobj;
+			td->td_kstack = (vm_offset_t)ks_ce;
+			td->td_kstack_pages = KSTACK_PAGES;
+			return (1);
+		}
+		mtx_unlock(&kstack_cache_mtx);
+	}
+
 	/*
 	 * Allocate an object for the kstack.
 	 */
@@ -345,7 +377,8 @@ vm_thread_new(struct thread *td, int pages)
 		vm_object_deallocate(ksobj);
 		return (0);
 	}
-	
+
+	atomic_add_int(&kstacks, 1);
 	if (KSTACK_GUARD_PAGES != 0) {
 		pmap_qremove(ks, KSTACK_GUARD_PAGES);
 		ks += KSTACK_GUARD_PAGES * PAGE_SIZE;
@@ -376,20 +409,13 @@ vm_thread_new(struct thread *td, int pages)
 	return (1);
 }
 
-/*
- * Dispose of a thread's kernel stack.
- */
-void
-vm_thread_dispose(struct thread *td)
+static void
+vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t ks, int pages)
 {
-	vm_object_t ksobj;
-	vm_offset_t ks;
 	vm_page_t m;
-	int i, pages;
+	int i;
 
-	pages = td->td_kstack_pages;
-	ksobj = td->td_kstack_obj;
-	ks = td->td_kstack;
+	atomic_add_int(&kstacks, -1);
 	pmap_qremove(ks, pages);
 	VM_OBJECT_LOCK(ksobj);
 	for (i = 0; i < pages; i++) {
@@ -405,9 +431,66 @@ vm_thread_dispose(struct thread *td)
 	vm_object_deallocate(ksobj);
 	kmem_free(kernel_map, ks - (KSTACK_GUARD_PAGES * PAGE_SIZE),
 	    (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
+}
+
+/*
+ * Dispose of a thread's kernel stack.
+ */
+void
+vm_thread_dispose(struct thread *td)
+{
+	vm_object_t ksobj;
+	vm_offset_t ks;
+	struct kstack_cache_entry *ks_ce;
+	int pages;
+
+	pages = td->td_kstack_pages;
+	ksobj = td->td_kstack_obj;
+	ks = td->td_kstack;
 	td->td_kstack = 0;
+	td->td_kstack_pages = 0;
+	if (pages == KSTACK_PAGES && kstacks <= kstack_cache_size) {
+		ks_ce = (struct kstack_cache_entry *)ks;
+		ks_ce->ksobj = ksobj;
+		mtx_lock(&kstack_cache_mtx);
+		ks_ce->next_ks_entry = kstack_cache;
+		kstack_cache = ks_ce;
+		mtx_unlock(&kstack_cache_mtx);
+		return;
+	}
+	vm_thread_stack_dispose(ksobj, ks, pages);
 }
 
+static void
+vm_thread_stack_lowmem(void *nulll)
+{
+	struct kstack_cache_entry *ks_ce, *ks_ce1;
+
+	mtx_lock(&kstack_cache_mtx);
+	ks_ce = kstack_cache;
+	kstack_cache = NULL;
+	mtx_unlock(&kstack_cache_mtx);
+
+	while (ks_ce != NULL) {
+		ks_ce1 = ks_ce;
+		ks_ce = ks_ce->next_ks_entry;
+
+		vm_thread_stack_dispose(ks_ce1->ksobj, (vm_offset_t)ks_ce1,
+		    KSTACK_PAGES);
+	}
+}
+
+static void
+kstack_cache_init(void *nulll)
+{
+
+	EVENTHANDLER_REGISTER(vm_lowmem, vm_thread_stack_lowmem, NULL,
+	    EVENTHANDLER_PRI_ANY);
+}
+
+MTX_SYSINIT(kstack_cache, &kstack_cache_mtx, "kstkch", MTX_DEF);
+SYSINIT(vm_kstacks, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, kstack_cache_init, NULL);
+
 /*
  * Allow a thread's kernel stack to be paged out.
  */
@@ -468,37 +551,6 @@ vm_thread_swapin(struct thread *td)
 }
 
 /*
- * Set up a variable-sized alternate kstack.
- */
-int
-vm_thread_new_altkstack(struct thread *td, int pages)
-{
-
-	td->td_altkstack = td->td_kstack;
-	td->td_altkstack_obj = td->td_kstack_obj;
-	td->td_altkstack_pages = td->td_kstack_pages;
-
-	return (vm_thread_new(td, pages));
-}
-
-/*
- * Restore the original kstack.
- */
-void
-vm_thread_dispose_altkstack(struct thread *td)
-{
-
-	vm_thread_dispose(td);
-
-	td->td_kstack = td->td_altkstack;
-	td->td_kstack_obj = td->td_altkstack_obj;
-	td->td_kstack_pages = td->td_altkstack_pages;
-	td->td_altkstack = 0;
-	td->td_altkstack_obj = NULL;
-	td->td_altkstack_pages = 0;
-}
-
-/*
  * Implement fork's actions on an address space.
  * Here we arrange for the address space to be copied or referenced,
  * allocate a user struct (pcb and kernel stack), then call the