diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 4a2d4e0..9012d65 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -33,6 +33,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
 {
+	unsigned long flags;
 	unsigned cpu = smp_processor_id();
 
 	if (likely(prev != next)) {
@@ -43,6 +44,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		percpu_write(cpu_tlbstate.active_mm, next);
 #endif
 		cpumask_set_cpu(cpu, mm_cpumask(next));
+		spin_lock_irqsave(&lazy_unmap_lock, flags);
+		list_splice_tail(&unmapped_in_tlb[cpu], &unmapped_safe[cpu]);
+		INIT_LIST_HEAD(&unmapped_in_tlb[cpu]);
+		spin_unlock_irqrestore(&lazy_unmap_lock, flags);
 
 		/* Re-load page tables */
 		load_cr3(next->pgd);
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 8d3ad0a..98dfe9d 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -662,6 +662,9 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open			298
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
+#define __NR_munmap_lazy			299
+__SYSCALL(__NR_munmap_lazy, sys_munmap_lazy)
+
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
diff --git a/fs/aio.c b/fs/aio.c
index 02a2c93..fe2fd67 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -90,7 +90,7 @@ static void aio_free_ring(struct kioctx *ctx)
 
 	if (info->mmap_size) {
 		down_write(&ctx->mm->mmap_sem);
-		do_munmap(ctx->mm, info->mmap_base, info->mmap_size);
+		do_munmap(ctx->mm, info->mmap_base, info->mmap_size, 0);
 		up_write(&ctx->mm->mmap_sem);
 	}
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b9b3bb5..4cc594a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -345,7 +345,8 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
 		total_size = ELF_PAGEALIGN(total_size);
 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
 		if (!BAD_ADDR(map_addr))
-			do_munmap(current->mm, map_addr+size, total_size-size);
+			do_munmap(current->mm, map_addr+size, total_size-size,
+			    0);
 	} else
 		map_addr = do_mmap(filep, addr, size, prot, type, off);
 
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index d76b66a..ee5565a 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -623,8 +623,11 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open 241
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
+#define __NR_munmap_lazy 242
+__SYSCALL(__NR_munmap_lazy, sys_munmap_lazy)
+
 #undef __NR_syscalls
-#define __NR_syscalls 242
+#define __NR_syscalls 243
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 24c3956..2de2d39 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -103,6 +103,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
+#define VM_PHANTOM_UNLINKED	0x20000000	/* Lazily unmapped but also unlinked from mm due to exit (XXX new flag) */
 #define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
 #define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
 
@@ -256,6 +257,18 @@ static inline int put_page_testzero(struct page *page)
 	return atomic_dec_and_test(&page->_count);
 }
 
+static inline void get_page_lazy(struct page *page)
+{
+	VM_BUG_ON(atomic_read(&page->lazy_unmap_count));
+	atomic_inc(&page->lazy_unmap_count);
+}
+
+static inline int put_page_lazy(struct page *page)
+{
+	VM_BUG_ON(atomic_read(&page->lazy_unmap_count) == 0);
+	return atomic_dec_and_test(&page->lazy_unmap_count);
+}
+
 /*
  * Try to grab a ref unless the page has a refcount of zero, return false if
  * that is the case.
@@ -750,6 +763,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlb,
 		struct vm_area_struct *start_vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *);
+unsigned long unmap_vmas_lazy(struct mm_struct *mm, struct vm_area_struct *vma,
+    unsigned long start_addr, unsigned long end_addr,
+    struct list_head *batches);
 
 /**
  * mm_walk - callbacks for walk_page_range
@@ -1151,7 +1167,7 @@ out:
 	return ret;
 }
 
-extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+extern int do_munmap(struct mm_struct *, unsigned long, size_t, int);
 
 extern unsigned long do_brk(unsigned long, unsigned long);
 
@@ -1322,5 +1338,11 @@ extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern atomic_long_t mce_bad_pages;
 
+
+extern struct list_head unmapped_in_tlb[NR_CPUS];
+extern struct list_head unmapped_safe[NR_CPUS];
+extern spinlock_t lazy_unmap_lock;
+
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84a524a..e2f74dc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -41,6 +41,7 @@ struct page {
 	unsigned long flags;		/* Atomic flags, some possibly
 					 * updated asynchronously */
 	atomic_t _count;		/* Usage count, see below. */
+	atomic_t lazy_unmap_count;
 	union {
 		atomic_t _mapcount;	/* Count of ptes mapped in mms,
 					 * to show when page is mapped
@@ -186,6 +187,8 @@ struct vm_area_struct {
 #ifdef CONFIG_NUMA
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
 #endif
+	struct list_head lazy_unmap_list;
+	atomic_t vm_phantom_count;
 };
 
 struct core_thread {
@@ -292,4 +295,13 @@ struct mm_struct {
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
 #define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
 
+/* XXX use slightly less than 512 to internal fragmentation? */
+#define LAZY_UNMAP_BATCH_SIZE 512
+
+struct lazy_unmap_batch {
+	struct list_head list; 
+	struct vm_area_struct *vma;
+	struct page *pages[LAZY_UNMAP_BATCH_SIZE];
+};
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace..a522b26 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -527,6 +527,7 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags);
 asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice);
 asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice);
 asmlinkage long sys_munmap(unsigned long addr, size_t len);
+asmlinkage long sys_munmap_lazy(unsigned long addr, size_t len);
 asmlinkage long sys_mlock(unsigned long start, size_t len);
 asmlinkage long sys_munlock(unsigned long start, size_t len);
 asmlinkage long sys_mlockall(int flags);
diff --git a/ipc/shm.c b/ipc/shm.c
index 464694e..7398cd9 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1019,7 +1019,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 
 
 			size = vma->vm_file->f_path.dentry->d_inode->i_size;
-			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, 0);
 			/*
 			 * We discovered the size of the shm segment, so
 			 * break out of here and fall through to the next
@@ -1046,7 +1046,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 		if ((vma->vm_ops == &shm_vm_ops) &&
 			(vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
 
-			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, 0);
 		vma = next;
 	}
 
diff --git a/mm/memory.c b/mm/memory.c
index 6ab19dd..5240a4f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -959,6 +959,154 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
 	return addr;
 }
 
+static unsigned long
+zap_pte_range_lazy(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd,
+    unsigned long addr, unsigned long end, struct list_head *batches,
+    int *pages)
+{
+	struct lazy_unmap_batch *batch;
+	spinlock_t *ptl;
+	pte_t *pte;
+	int i;
+
+	i = 0;
+	batch = kzalloc(sizeof(struct lazy_unmap_batch), GFP_KERNEL);
+	list_add(&batch->list, batches);
+	batch->vma = vma;
+
+	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	do {
+		pte_t ptent = *pte;
+		if (pte_none(ptent))
+			continue;
+		if (pte_present(ptent)) {
+			struct page *page;
+
+			page = vm_normal_page(vma, addr, ptent);
+			ptent = ptep_get_and_clear(mm, addr, pte);
+			if (page == NULL) {
+				/* XXX do a regular TLB shootdown */
+				//printk("%s: current %s not normal page addr 0x%lx\n", __func__, current->comm, addr);
+				continue;
+			}
+			if (!PageAnon(page)) {
+				if (pte_dirty(ptent))
+					set_page_dirty(page);
+				if (pte_young(ptent) &&
+				    likely(!VM_SequentialReadHint(vma)))
+					mark_page_accessed(page);
+			}
+			/*
+			 * If we're the first lazy unmap of the page, get a
+			 * reference to it to make sure it doesn't get
+			 * freed by someone else until it's still in a batch.
+			 */
+			if (atomic_add_return(num_online_cpus(),
+			    &page->lazy_unmap_count) == 0)
+				get_page(page);
+			if (unlikely(page_mapcount(page) < 0))
+				print_bad_pte(vma, addr, ptent, page);
+			batch->pages[i++] = page;
+			//printk(KERN_ALERT "%s: page %p count %d lazy count %d\n", __func__, page, page_count(page), atomic_read(&page->lazy_unmap_count));
+			(*pages)++;
+		}
+		/* XXX deal with non-present (nonlinear/swap) */
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	pte_unmap_unlock(pte - 1, ptl);
+	return addr;
+}
+
+static unsigned long
+zap_pmd_range_lazy(struct mm_struct *mm, struct vm_area_struct *vma, pud_t *pud,
+    unsigned long addr, unsigned long end, struct list_head *batches,
+    int *pages)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		next = zap_pte_range_lazy(mm, vma, pmd, addr, next, batches,
+		    pages);
+	} while (pmd++, addr = next, addr != end);
+
+	return addr;
+}
+
+static unsigned long
+zap_pud_range_lazy(struct mm_struct *mm, struct vm_area_struct *vma, pgd_t *pgd,
+    unsigned long addr, unsigned long end, struct list_head *batches,
+    int *pages)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		next = zap_pmd_range_lazy(mm, vma, pud, addr, next, batches,
+		    pages);
+	} while (pud++, addr = next, addr != end);
+
+	return addr;
+}
+
+static unsigned long
+unmap_page_range_lazy(struct mm_struct *mm, struct vm_area_struct *vma,
+    unsigned long addr, unsigned long end, struct list_head *batches,
+    int *pages)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	BUG_ON(addr >= end);
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		next = zap_pud_range_lazy(mm, vma, pgd, addr, next, batches,
+		    pages);
+	} while (pgd++, addr = next, addr != end);
+
+	return addr;
+}
+
+unsigned long
+unmap_vmas_lazy(struct mm_struct *mm, struct vm_area_struct *vma,
+    unsigned long start_addr, unsigned long end_addr, struct list_head *batches)
+{
+	unsigned long start = start_addr;
+	int vma_pages;
+
+	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
+		unsigned long end;
+
+		start = max(vma->vm_start, start_addr);
+		if (start >= vma->vm_end)
+			continue;
+		end = min(vma->vm_end, end_addr);
+		if (end <= vma->vm_start)
+			continue;
+		vma_pages = 0;
+		while (start != end) {
+			start = unmap_page_range_lazy(mm, vma, start, end,
+			    batches, &vma_pages);
+		}
+
+		atomic_add(vma_pages, &vma->vm_phantom_count);
+		//printk(KERN_ALERT "%s: vma %p new count %d vma_pages %d\n", __func__, vma, atomic_read(&vma->vm_phantom_count), vma_pages);
+	}
+
+	return (start);
+}
+
 #ifdef CONFIG_PREEMPT
 # define ZAP_BLOCK_SIZE	(8 * PAGE_SIZE)
 #else
diff --git a/mm/mmap.c b/mm/mmap.c
index 73f5e4b..eedf50a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -29,6 +29,7 @@
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_event.h>
+#include <linux/cpu.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -88,6 +89,15 @@ int sysctl_overcommit_ratio = 50;	/* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 struct percpu_counter vm_committed_as;
 
+/* XXX avoid false sharing? */
+struct list_head unmapped_in_tlb[NR_CPUS];
+struct list_head unmapped_safe[NR_CPUS];
+struct timer_list lazy_unmap_timer[NR_CPUS];
+struct list_head lazy_unmap_free_vmas[NR_CPUS];;
+
+DEFINE_SPINLOCK(lazy_unmap_lock);
+static void lazy_unmap_start_timer(int cpu);
+
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
@@ -230,6 +240,13 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 {
 	struct vm_area_struct *next = vma->vm_next;
 
+	/* Lazily unmapped VMA. Let the laziness take care of the freeing */
+	if (atomic_read(&vma->vm_phantom_count)) {
+		/* XXX should be atomic? */
+		vma->vm_flags |= VM_PHANTOM_UNLINKED;
+		return next;
+	}
+
 	might_sleep();
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
@@ -278,7 +295,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 
 	/* Always allow shrinking brk. */
 	if (brk <= mm->brk) {
-		if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+		if (!do_munmap(mm, newbrk, oldbrk-newbrk, 0))
 			goto set_brk;
 		goto out;
 	}
@@ -513,6 +530,8 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	long adjust_next = 0;
 	int remove_next = 0;
 
+	BUG_ON(atomic_read(&vma->vm_phantom_count) != 0);
+
 	if (next && !insert) {
 		if (end >= next->vm_end) {
 			/*
@@ -641,6 +660,10 @@ again:			remove_next = 1 + (end > next->vm_end);
 		}
 		mm->map_count--;
 		mpol_put(vma_policy(next));
+		if (atomic_read(&next->vm_phantom_count)) {
+			printk(KERN_ALERT "%s: freeing phantom vma %p\n", __func__, next);
+			WARN_ON(1);
+		}
 		kmem_cache_free(vm_area_cachep, next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
@@ -670,6 +693,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
 		return 0;
 	if (vma->vm_ops && vma->vm_ops->close)
 		return 0;
+	if (atomic_read(&vma->vm_phantom_count) != 0)
+		return 0;
 	return 1;
 }
 
@@ -1134,7 +1159,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 munmap_back:
 	vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
 	if (vma && vma->vm_start < addr + len) {
-		if (do_munmap(mm, addr, len))
+		if (do_munmap(mm, addr, len, 0))
 			return -ENOMEM;
 		goto munmap_back;
 	}
@@ -1260,6 +1285,9 @@ unmap_and_free_vma:
 	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
 	charged = 0;
 free_vma:
+	if (atomic_read(&vma->vm_phantom_count)) {
+		printk(KERN_ALERT "%s: freeing phanto vma %p\n", __func__, vma);
+	}
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
 	if (charged)
@@ -1805,20 +1833,24 @@ static void unmap_region(struct mm_struct *mm,
  */
 static void
 detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
-	struct vm_area_struct *prev, unsigned long end)
+	struct vm_area_struct *prev, unsigned long end, int lazy)
 {
 	struct vm_area_struct **insertion_point;
 	struct vm_area_struct *tail_vma = NULL;
 	unsigned long addr;
+	int i;
 
+	i = 0;
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
 	do {
 		rb_erase(&vma->vm_rb, &mm->mm_rb);
 		mm->map_count--;
 		tail_vma = vma;
 		vma = vma->vm_next;
+		i++;
 	} while (vma && vma->vm_start < end);
 	*insertion_point = vma;
+	WARN_ON(lazy && i > 1);
 	tail_vma->vm_next = NULL;
 	if (mm->unmap_area == arch_unmap_area)
 		addr = prev ? prev->vm_end : mm->mmap_base;
@@ -1851,6 +1883,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 
 	/* most fields are the same, copy all, and then fixup */
 	*new = *vma;
+	atomic_set(&new->vm_phantom_count, 0);
 
 	if (new_below)
 		new->vm_end = addr;
@@ -1861,6 +1894,9 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 
 	pol = mpol_dup(vma_policy(vma));
 	if (IS_ERR(pol)) {
+		if (atomic_read(&vma->vm_phantom_count)) {
+			printk(KERN_ALERT "%s: freeing phanto vma %p\n", __func__, vma);
+		}
 		kmem_cache_free(vm_area_cachep, new);
 		return PTR_ERR(pol);
 	}
@@ -1884,12 +1920,160 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	return 0;
 }
 
+static void
+lazy_unmap_work(struct work_struct *work)
+{
+	struct vm_area_struct *next, *prev, *_vma, *vma, **insertion_point;
+	struct mm_struct *mm;
+	unsigned long flags;
+	int cpu;
+
+	cpu = get_cpu();
+	local_irq_save(flags);
+	list_for_each_entry_safe(vma, next, &lazy_unmap_free_vmas[cpu],
+	    lazy_unmap_list) {
+		mm = vma->vm_mm;
+		if (vma->vm_flags & VM_PHANTOM_UNLINKED) {
+			//printk(KERN_ALERT "%s: cpu %d delayed free vma %p start 0x%lx end 0x%lx\n", __func__, cpu, vma, vma->vm_start, vma->vm_end);
+			list_del(&vma->lazy_unmap_list);
+			kmem_cache_free(vm_area_cachep, vma);
+			continue;
+		}
+		BUG_ON(mm == NULL);
+		//printk(KERN_ALERT "%s: cpu %d delayed detach vma %p start 0x%lx end 0x%lx\n", __func__, cpu, vma, vma->vm_start, vma->vm_end);
+		list_del(&vma->lazy_unmap_list);
+		local_irq_restore(flags);
+
+		down_write(&mm->mmap_sem);
+		_vma = find_vma_prev(mm, vma->vm_start, &prev);
+		BUG_ON(_vma != vma);
+		WARN_ON(prev == NULL);
+
+		insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+		rb_erase(&vma->vm_rb, &mm->mm_rb);
+		mm->map_count--;
+	        *insertion_point = vma->vm_next;
+		vma->vm_next = NULL;
+
+		//detach_vmas_to_be_unmapped(mm, vma, prev, vma->vm_end, 1);
+		remove_vma_list(mm, vma);
+		up_write(&mm->mmap_sem);
+
+		local_irq_save(flags);
+	}
+	local_irq_restore(flags);
+	put_cpu();
+
+	kfree(work);
+}
+
+static void
+lazy_unmap_free_page(struct page *page, struct vm_area_struct *vma)
+{
+	struct work_struct *work;
+	int cpu;
+
+	//printk("%s: releasing page %p mapcount %d\n", __func__, page, page_mapcount(page));
+
+	cpu = smp_processor_id();
+
+	page_remove_rmap(page);
+	put_page(page);
+
+	//printk(KERN_ALERT "cpu %ld vma %p count %d\n", cpu, vma, atomic_read(&vma->vm_phantom_count));
+
+	BUG_ON(atomic_read(&vma->vm_phantom_count) <= 0);
+
+	/* Do we need to free the vma? */
+	if (atomic_dec_and_test(&vma->vm_phantom_count)) {
+
+		/* The mm was destroyed already, so we can just free the vma */
+		if (vma->vm_flags & VM_PHANTOM_UNLINKED) {
+			//printk(KERN_ALERT "%s: freeing (immediate) vma %p start 0x%lx end 0x%lx\n", __func__, vma, vma->vm_start, vma->vm_end);
+			kmem_cache_free(vm_area_cachep, vma);
+			return;
+		}
+
+		//printk(KERN_ALERT "%s: cpu %d freeing (delayed) vma %p start 0x%lx end 0x%lx\n", __func__, cpu, vma, vma->vm_start, vma->vm_end);
+		list_add(&vma->lazy_unmap_list, &lazy_unmap_free_vmas[cpu]);
+		work = kmalloc(sizeof(struct work_struct), GFP_ATOMIC);
+		if (work == NULL)
+			return;
+		INIT_WORK(work, lazy_unmap_work);
+		schedule_work_on(cpu, work);
+	}
+}
+
+static void
+lazy_unmap_timer_fn(unsigned long cpu)
+{
+	struct lazy_unmap_batch *b, *next;
+	struct vm_area_struct *vma;
+	unsigned long flags;
+	struct page *page;
+	int done, i;
+
+	done = 0;
+	spin_lock_irqsave(&lazy_unmap_lock, flags);
+	list_for_each_entry_safe(b, next, &unmapped_safe[cpu], list) {
+		for (i = 0; i < LAZY_UNMAP_BATCH_SIZE; i++) {
+			vma = b->vma;
+			page = b->pages[i];
+			if (page == NULL)
+				break;
+			done++;
+			//printk(KERN_ALERT "%s cpu %ld page %p count %d lazy count %d vma %p count %d\n", __func__, cpu, page, page_count(page), atomic_read(&page->lazy_unmap_count), vma, atomic_read(&vma->vm_phantom_count));
+			if (put_page_lazy(page))
+				lazy_unmap_free_page(page, vma);
+		}
+		list_del(&b->list);
+		kfree(b);
+		if (done >= 512) /* XXX tunable? */
+			break;
+	}
+	spin_unlock_irqrestore(&lazy_unmap_lock, flags);
+
+	lazy_unmap_start_timer(cpu);
+}
+
+static void
+do_lazy_munmap(struct mm_struct *mm, struct vm_area_struct *vma,
+    unsigned long start, unsigned long end)
+{
+	struct lazy_unmap_batch *b, *cpubatch, *next;
+	struct list_head batches;
+	unsigned long flags;
+	int cpu;
+
+	INIT_LIST_HEAD(&batches);
+
+	unmap_vmas_lazy(mm, vma, start, end, &batches);
+
+	list_for_each_entry_safe(b, next, &batches, list) {
+		list_del(&b->list);
+		for_each_online_cpu(cpu) {
+			cpubatch = kmalloc(sizeof(struct lazy_unmap_batch),
+			    GFP_KERNEL);
+			memcpy(cpubatch, b, sizeof(struct lazy_unmap_batch));
+
+			/* XXX don't grab lock for each one? */
+			spin_lock_irqsave(&lazy_unmap_lock, flags);
+			/* XXX move to safe list for current cpu */
+			list_add(&cpubatch->list, &unmapped_in_tlb[cpu]);
+			spin_unlock_irqrestore(&lazy_unmap_lock, flags);
+		}
+	}
+	/* XXX make it work */
+	//free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+	//    next? next->vm_start: 0);
+
+}
 /* Munmap is split into 2 main parts -- this part which finds
  * what needs doing, and the areas themselves, which do the
  * work.  This now handles partial unmappings.
  * Jeremy Fitzhardinge <jeremy@goop.org>
  */
-int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, int lazy)
 {
 	unsigned long end;
 	struct vm_area_struct *vma, *prev, *last;
@@ -1919,7 +2103,11 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 	 * places tmp vma above, and higher split_vma places tmp vma below.
 	 */
 	if (start > vma->vm_start) {
-		int error = split_vma(mm, vma, start, 0);
+		int error;
+		/* XXX disallow splitting (unless it's the last vma) for lazy unmap? */
+		//if (lazy)
+			//printk(KERN_ALERT "%s: splitting lazy vma %p start 0x%lx end 0x%lx requested start 0x%lx requested end 0x%lx\n", __func__, vma, vma->vm_start, vma->vm_end, start, end);
+		error = split_vma(mm, vma, start, 0);
 		if (error)
 			return error;
 		prev = vma;
@@ -1928,7 +2116,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 	/* Does it split the last one? */
 	last = find_vma(mm, end);
 	if (last && end > last->vm_start) {
-		int error = split_vma(mm, last, end, 1);
+		int error;
+		//if (lazy)
+			//printk(KERN_ALERT "%s: splitting lazy last vma %p start 0x%lx end 0x%lx requested start 0x%lx requested end 0x%lx\n", __func__, last, last->vm_start, last->vm_end, start, end);
+		error = split_vma(mm, last, end, 1);
 		if (error)
 			return error;
 	}
@@ -1948,14 +2139,18 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 		}
 	}
 
-	/*
-	 * Remove the vma's, and unmap the actual pages
-	 */
-	detach_vmas_to_be_unmapped(mm, vma, prev, end);
-	unmap_region(mm, vma, prev, start, end);
+	if (lazy == 0) {
+		/*
+		 * Remove the vma's, and unmap the actual pages
+		 */
+		detach_vmas_to_be_unmapped(mm, vma, prev, end, 0);
+		unmap_region(mm, vma, prev, start, end);
 
-	/* Fix up all other VM information */
-	remove_vma_list(mm, vma);
+		/* Fix up all other VM information */
+		remove_vma_list(mm, vma);
+	} else {
+		do_lazy_munmap(mm, vma, start, end);
+	}
 
 	return 0;
 }
@@ -1970,11 +2165,27 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
 	profile_munmap(addr);
 
 	down_write(&mm->mmap_sem);
-	ret = do_munmap(mm, addr, len);
+	ret = do_munmap(mm, addr, len, 0);
 	up_write(&mm->mmap_sem);
 	return ret;
 }
 
+SYSCALL_DEFINE2(munmap_lazy, unsigned long, addr, size_t, len)
+{
+	struct mm_struct *mm;
+	int ret;
+
+	mm = current->mm;
+
+	//printk(KERN_ALERT "%s: addr %lx len %ld\n", __func__, addr, len);
+
+	down_write(&mm->mmap_sem);
+	ret = do_munmap(mm, addr, len, 1);
+	up_write(&mm->mmap_sem);
+
+	return (ret);
+}
+
 static inline void verify_mm_writelocked(struct mm_struct *mm)
 {
 #ifdef CONFIG_DEBUG_VM
@@ -2044,7 +2255,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
  munmap_back:
 	vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
 	if (vma && vma->vm_start < addr + len) {
-		if (do_munmap(mm, addr, len))
+		if (do_munmap(mm, addr, len, 0))
 			return -ENOMEM;
 		goto munmap_back;
 	}
@@ -2136,7 +2347,7 @@ void exit_mmap(struct mm_struct *mm)
 	while (vma)
 		vma = remove_vma(vma);
 
-	BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
+	//BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
 }
 
 /* Insert vm structure into process list sorted by address
@@ -2209,8 +2420,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 		if (new_vma) {
 			*new_vma = *vma;
+			atomic_set(&new_vma->vm_phantom_count, 0);
 			pol = mpol_dup(vma_policy(vma));
 			if (IS_ERR(pol)) {
+				if (atomic_read(&vma->vm_phantom_count)) {
+					printk(KERN_ALERT "%s: freeing phanto vma %p\n", __func__, vma);
+				}
 				kmem_cache_free(vm_area_cachep, new_vma);
 				return NULL;
 			}
@@ -2317,6 +2532,9 @@ int install_special_mapping(struct mm_struct *mm,
 	vma->vm_private_data = pages;
 
 	if (unlikely(insert_vm_struct(mm, vma))) {
+		if (atomic_read(&vma->vm_phantom_count)) {
+			printk(KERN_ALERT "%s: freeing phanto vma %p\n", __func__, vma);
+		}
 		kmem_cache_free(vm_area_cachep, vma);
 		return -ENOMEM;
 	}
@@ -2492,13 +2710,56 @@ void mm_drop_all_locks(struct mm_struct *mm)
 	mutex_unlock(&mm_all_locks_mutex);
 }
 
+static void 
+lazy_unmap_start_timer(int cpu)
+{
+	struct timer_list *timer;
+
+	timer = &lazy_unmap_timer[cpu];
+	init_timer(timer);
+	timer_stats_timer_set_start_info(timer);
+	timer->expires = jiffies + HZ * 1; /* XXX constant */
+	timer->data = cpu;
+	timer->function = lazy_unmap_timer_fn;
+
+	add_timer_on(timer, cpu);
+}
+
+static int __cpuinit
+lazy_unmap_cpu_up(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	long cpu;
+
+	cpu = (long)hcpu;
+	switch (action) {
+	case CPU_ONLINE:
+		lazy_unmap_start_timer(cpu);
+		break;
+		/* XXX offline */
+	}
+
+	return (NOTIFY_OK);
+}
+
+static struct notifier_block __cpuinitdata lazy_unmap_cpu_up_notifier = {
+        &lazy_unmap_cpu_up, NULL, 0
+};
+
 /*
  * initialise the VMA slab
  */
 void __init mmap_init(void)
 {
-	int ret;
+	int i, ret;
 
 	ret = percpu_counter_init(&vm_committed_as, 0);
 	VM_BUG_ON(ret);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		INIT_LIST_HEAD(&unmapped_in_tlb[i]);
+		INIT_LIST_HEAD(&unmapped_safe[i]);
+		INIT_LIST_HEAD(&lazy_unmap_free_vmas[i]);
+	}
+	register_cpu_notifier(&lazy_unmap_cpu_up_notifier);
+	lazy_unmap_start_timer(smp_processor_id());
 }
diff --git a/mm/mremap.c b/mm/mremap.c
index 97bff25..127262f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -237,7 +237,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	mm->total_vm += new_len >> PAGE_SHIFT;
 	vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
 
-	if (do_munmap(mm, old_addr, old_len) < 0) {
+	if (do_munmap(mm, old_addr, old_len, 0) < 0) {
 		/* OOM: unable to split vma, just get accounts right */
 		vm_unacct_memory(excess >> PAGE_SHIFT);
 		excess = 0;
@@ -317,7 +317,7 @@ unsigned long do_mremap(unsigned long addr,
 		if (ret)
 			goto out;
 
-		ret = do_munmap(mm, new_addr, new_len);
+		ret = do_munmap(mm, new_addr, new_len, 0);
 		if (ret)
 			goto out;
 	}
@@ -328,7 +328,7 @@ unsigned long do_mremap(unsigned long addr,
 	 * do_munmap does all the needed commit accounting
 	 */
 	if (old_len >= new_len) {
-		ret = do_munmap(mm, addr+new_len, old_len - new_len);
+		ret = do_munmap(mm, addr+new_len, old_len - new_len, 0);
 		if (ret && old_len != new_len)
 			goto out;
 		ret = addr;
diff --git a/mm/nommu.c b/mm/nommu.c
index 9876fa0..4c17796 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1508,7 +1508,7 @@ static int shrink_vma(struct mm_struct *mm,
  * - under NOMMU conditions the chunk to be unmapped must be backed by a single
  *   VMA, though it need not cover the whole VMA
  */
-int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, int lazy)
 {
 	struct vm_area_struct *vma;
 	struct rb_node *rb;
@@ -1589,7 +1589,7 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
 	struct mm_struct *mm = current->mm;
 
 	down_write(&mm->mmap_sem);
-	ret = do_munmap(mm, addr, len);
+	ret = do_munmap(mm, addr, len, 0);
 	up_write(&mm->mmap_sem);
 	return ret;
 }