commit dd491091f3260f9fbb7b4ca73d76a66d4de54edf Author: Andriy Gapon Date: Wed Dec 21 00:10:23 2011 +0200 - exclude buffers from execution and coredump diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index af41f14..3583d6e 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1371,9 +1371,12 @@ brelse(struct buf *bp) } if ((bp->b_flags & B_INVAL) == 0) { - pmap_qenter( - trunc_page((vm_offset_t)bp->b_data), - bp->b_pages, bp->b_npages); + pmap_qenter_prot( + trunc_page((vm_offset_t)bp->b_data), + bp->b_pages, + bp->b_npages, + (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXCLUDE) + ); } m = bp->b_pages[i]; } @@ -3128,12 +3131,13 @@ allocbuf(struct buf *bp, int size) bp->b_data = (caddr_t) trunc_page((vm_offset_t)bp->b_data); - pmap_qenter( - (vm_offset_t)bp->b_data, - bp->b_pages, - bp->b_npages - ); - + pmap_qenter_prot( + (vm_offset_t)bp->b_data, + bp->b_pages, + bp->b_npages, + (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXCLUDE) + ); + bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | (vm_offset_t)(bp->b_offset & PAGE_MASK)); } @@ -3417,8 +3421,9 @@ bufdone_finish(struct buf *bp) vm_object_pip_wakeupn(obj, 0); VM_OBJECT_UNLOCK(obj); if (bogus) - pmap_qenter(trunc_page((vm_offset_t)bp->b_data), - bp->b_pages, bp->b_npages); + pmap_qenter_prot(trunc_page((vm_offset_t)bp->b_data), + bp->b_pages, bp->b_npages, + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXCLUDE); } /* @@ -3461,8 +3466,12 @@ vfs_unbusy_pages(struct buf *bp) if (!m) panic("vfs_unbusy_pages: page missing\n"); bp->b_pages[i] = m; - pmap_qenter(trunc_page((vm_offset_t)bp->b_data), - bp->b_pages, bp->b_npages); + pmap_qenter_prot( + trunc_page((vm_offset_t)bp->b_data), + bp->b_pages, + bp->b_npages, + (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXCLUDE) + ); } vm_object_pip_subtract(obj, 1); vm_page_io_finish(m); @@ -3628,8 +3637,12 @@ vfs_busy_pages(struct buf *bp, int clear_modify) } VM_OBJECT_UNLOCK(obj); if (bogus) - pmap_qenter(trunc_page((vm_offset_t)bp->b_data), - bp->b_pages, bp->b_npages); + pmap_qenter_prot( + trunc_page((vm_offset_t)bp->b_data), + bp->b_pages, + bp->b_npages, + (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXCLUDE) + ); } /* @@ -3766,7 +3779,8 @@ tryagain: VM_WAIT; goto tryagain; } - pmap_qenter(pg, &p, 1); + pmap_qenter_prot(pg, &p, 1, + (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXCLUDE)); bp->b_pages[index] = p; } bp->b_npages = index; @@ -3825,7 +3839,8 @@ vmapbuf(struct buf *bp) (vm_offset_t)bp->b_data, bp->b_bufsize, prot, bp->b_pages, btoc(MAXPHYS))) < 0) return (-1); - pmap_qenter((vm_offset_t)bp->b_saveaddr, bp->b_pages, pidx); + pmap_qenter_prot((vm_offset_t)bp->b_saveaddr, bp->b_pages, pidx, + (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXCLUDE)); kva = bp->b_saveaddr; bp->b_npages = pidx; commit d49ce39d8344251464b455276ab0c3b3032e6762 Author: kmacy Date: Sat Jul 11 03:00:37 2009 +0000 - don't map kernel thread stacks executable - exclude vnode pager buffers from core dumps diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index e4a4bd8..2c3535a 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -431,7 +431,8 @@ vm_thread_new(struct thread *td, int pages) m->valid = VM_PAGE_BITS_ALL; } VM_OBJECT_UNLOCK(ksobj); - pmap_qenter(ks, ma, pages); + pmap_qenter_prot(ks, ma, pages, + (VM_PROT_READ|VM_PROT_WRITE)); return (1); } @@ -585,7 +586,8 @@ vm_thread_swapin(struct thread *td) vm_page_wakeup(ma[i]); } VM_OBJECT_UNLOCK(ksobj); - pmap_qenter(td->td_kstack, ma, pages); + pmap_qenter_prot(td->td_kstack, ma, pages, + (VM_PROT_READ|VM_PROT_WRITE)); cpu_thread_swapin(td); } #endif /* !NO_SWAPPING */ diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 929fa4f..b24b934 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -898,7 +898,8 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) /* * and map the pages to be read into the kva */ - pmap_qenter(kva, m, count); + pmap_qenter_prot(kva, m, count, + (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXCLUDE)); /* build a minimal buffer header */ bp->b_iocmd = BIO_READ; commit ce2563681041ce1310ca21ba21a2d6ef780ee8ab Author: kmacy Date: Sat Jul 11 02:58:36 2009 +0000 implement pmap_qenter_prot for "amd64" diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f7c0d2d..b6173ef 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1374,10 +1374,16 @@ pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) * Note: SMP coherent. Uses a ranged shootdown IPI. */ void -pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) +pmap_qenter_prot(vm_offset_t sva, vm_page_t *ma, int count, vm_prot_t prot) { pt_entry_t *endpte, oldpte, pa, *pte; vm_page_t m; + uint64_t flags = 0; + + if (prot & VM_PROT_WRITE) + flags |= PG_RW; + if ((prot & VM_PROT_EXECUTE) == 0) + flags |= PG_NX; oldpte = 0; pte = vtopte(sva); @@ -1387,7 +1393,9 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { oldpte |= *pte; - pte_store(pte, pa | PG_G | PG_RW | PG_V); + pte_store(pte, pa | PG_G | PG_V | flags); + if (prot & VM_PROT_EXCLUDE) + dump_exclude_page(VM_PAGE_TO_PHYS(m)); } pte++; } @@ -1396,6 +1404,16 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) PAGE_SIZE); } +void +pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) +{ + + pmap_qenter_prot(sva, ma, count, + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); + +} + + /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. @@ -1408,6 +1426,7 @@ pmap_qremove(vm_offset_t sva, int count) va = sva; while (count-- > 0) { + dump_unexclude_page(pmap_kextract(va)); pmap_kremove(va); va += PAGE_SIZE; } commit f51860b3ba849a2654d16c50c82f96c9c0ca42ae Author: Andriy Gapon Date: Wed Dec 21 00:06:26 2011 +0200 - add VM_PROT_EXCLUDE to exclude a mapping from dumps - add pmap_qenter_prot to allow specifying of protections for a kernel mapping diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index a4f1e70..2fc3621 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -133,6 +133,7 @@ int pmap_pinit(pmap_t); void pmap_pinit0(pmap_t); void pmap_protect(pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); void pmap_qenter(vm_offset_t, vm_page_t *, int); +void pmap_qenter_prot(vm_offset_t, vm_page_t *, int, vm_prot_t); void pmap_qremove(vm_offset_t, int); void pmap_release(pmap_t); void pmap_remove(pmap_t, vm_offset_t, vm_offset_t); diff --git a/sys/vm/vm.h b/sys/vm/vm.h index 67cc922..4b786df 100644 --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -77,6 +77,7 @@ typedef u_char vm_prot_t; /* protection codes */ #define VM_PROT_WRITE ((vm_prot_t) 0x02) #define VM_PROT_EXECUTE ((vm_prot_t) 0x04) #define VM_PROT_COPY ((vm_prot_t) 0x08) /* copy-on-read */ +#define VM_PROT_EXCLUDE ((vm_prot_t) 0x10) /* don't include in core-dump */ #define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) #define VM_PROT_RW (VM_PROT_READ|VM_PROT_WRITE) commit 871f6c6252d1230a1d9601147e4f0c375bafb9c7 Author: Andriy Gapon Date: Tue Dec 20 23:59:04 2011 +0200 exclude ZFS data buffers from kernel core dumps diff --git a/sys/cddl/compat/opensolaris/sys/kmem.h b/sys/cddl/compat/opensolaris/sys/kmem.h index 6be2735..428badf 100644 --- a/sys/cddl/compat/opensolaris/sys/kmem.h +++ b/sys/cddl/compat/opensolaris/sys/kmem.h @@ -45,7 +45,9 @@ MALLOC_DECLARE(M_SOLARIS); #define KM_SLEEP M_WAITOK #define KM_PUSHPAGE M_WAITOK #define KM_NOSLEEP M_NOWAIT -#define KMC_NODEBUG 0 +#define KM_ZERO M_ZERO +#define KM_NODEBUG M_NODUMP +#define KMC_NODEBUG UMA_ZONE_NODUMP #define KMC_NOTOUCH 0 typedef struct kmem_cache { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c index 5f8f00b..e28ff43 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c @@ -138,7 +138,7 @@ zio_init(void) char name[36]; (void) sprintf(name, "zio_buf_%lu", (ulong_t)size); zio_buf_cache[c] = kmem_cache_create(name, size, - align, NULL, NULL, NULL, NULL, NULL, cflags); + align, NULL, NULL, NULL, NULL, NULL, cflags); /* XXX */ /* * Since zio_data bufs do not appear in crash dumps, we @@ -242,7 +242,7 @@ zio_data_buf_alloc(size_t size) if (zio_use_uma) return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE)); else - return (kmem_alloc(size, KM_SLEEP)); + return (kmem_alloc(size, KM_SLEEP | KM_NODEBUG)); } void commit ffe4e55ba3df4270fc9b94bb88be7d1eb9c4d4b5 Author: Andriy Gapon Date: Tue Dec 20 23:36:43 2011 +0200 add and use page exclusion API and *_NODUMP flags to uma and malloc ... to enable caller to specify excluding memory from the dump diff --git a/sys/amd64/amd64/minidump_machdep.c b/sys/amd64/amd64/minidump_machdep.c index 577de07..9df18cc 100644 --- a/sys/amd64/amd64/minidump_machdep.c +++ b/sys/amd64/amd64/minidump_machdep.c @@ -60,6 +60,7 @@ CTASSERT(sizeof(struct kerneldumpheader) == 512); #define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) uint64_t *vm_page_dump; +uint64_t *vm_page_dump_exclude; int vm_page_dump_size; static struct kerneldumpheader kdh; @@ -75,10 +76,16 @@ CTASSERT(sizeof(*vm_page_dump) == 8); static int is_dumpable(vm_paddr_t pa) { - int i; + int i, idx, bit, isdata; + uint64_t pfn = pa; + + pfn >>= PAGE_SHIFT; + idx = pfn >> 6; /* 2^6 = 64 */ + bit = pfn & 63; + isdata = ((vm_page_dump_exclude[idx] & (1ul << bit)) == 0); for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { - if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) + if (pa >= dump_avail[i] && pa < dump_avail[i + 1] && isdata) return (1); } return (0); @@ -406,7 +413,7 @@ minidumpsys(struct dumperinfo *di) /* Dump memory chunks */ /* XXX cluster it up and use blk_dump() */ for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { - bits = vm_page_dump[i]; + bits = vm_page_dump[i] & ~(vm_page_dump_exclude[i]); while (bits) { bit = bsfq(bits); pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; @@ -474,3 +481,25 @@ dump_drop_page(vm_paddr_t pa) bit = pa & 63; atomic_clear_long(&vm_page_dump[idx], 1ul << bit); } + +void +dump_exclude_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_set_long(&vm_page_dump_exclude[idx], 1ul << bit); +} + +void +dump_unexclude_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_clear_long(&vm_page_dump_exclude[idx], 1ul << bit); +} diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c index 3583975..dc9c307 100644 --- a/sys/amd64/amd64/uma_machdep.c +++ b/sys/amd64/amd64/uma_machdep.c @@ -65,7 +65,8 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) break; } pa = m->phys_addr; - dump_add_page(pa); + if ((wait & M_NODUMP) == 0) + dump_add_page(pa); va = (void *)PHYS_TO_DMAP(pa); if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) pagezero(va); diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 479c84e..368ef12 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -60,6 +60,7 @@ extern char kstack[]; extern char sigcode[]; extern int szsigcode; extern uint64_t *vm_page_dump; +extern uint64_t *vm_page_dump_exclude; extern int vm_page_dump_size; extern int workaround_erratum383; extern int _udatasel; @@ -94,6 +95,8 @@ void fsbase_load_fault(void) __asm(__STRING(fsbase_load_fault)); void gsbase_load_fault(void) __asm(__STRING(gsbase_load_fault)); void dump_add_page(vm_paddr_t); void dump_drop_page(vm_paddr_t); +void dump_exclude_page(vm_paddr_t); +void dump_unexclude_page(vm_paddr_t); void initializecpu(void); void initializecpucache(void); void fillw(int /*u_short*/ pat, void *base, size_t cnt); diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 0c65602..0835b96 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -77,6 +77,11 @@ #define UMA_MD_SMALL_ALLOC /* + * We machine specific sparse kernel dump + */ +#define VM_MD_MINIDUMP + +/* * The physical address space is densely populated. */ #define VM_PHYSSEG_DENSE diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index 76e94be..a6450b3 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -50,6 +50,7 @@ #define M_ZERO 0x0100 /* bzero the allocation */ #define M_NOVM 0x0200 /* don't ask VM for pages */ #define M_USE_RESERVE 0x0400 /* can alloc out of reserve memory */ +#define M_NODUMP 0x0800 /* don't dump pages in this allocation */ #define M_MAGIC 877983977 /* time when first defined :-) */ diff --git a/sys/vm/uma.h b/sys/vm/uma.h index fbba22f..e17e6ef 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -248,6 +248,10 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master); * backend pages and can fail early. */ #define UMA_ZONE_VTOSLAB 0x2000 /* Zone uses vtoslab for lookup. */ +#define UMA_ZONE_NODUMP 0x4000 /* + * Zone's pages will not be included in + * mini-dumps. + */ /* * These flags are shared between the keg and zone. In zones wishing to add diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 9fbea55..eaa2faf 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -845,6 +845,9 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) else wait &= ~M_ZERO; + if (keg->uk_flags & UMA_ZONE_NODUMP) + wait |= M_NODUMP; + /* zone is passed for legacy reasons. */ mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait); if (mem == NULL) { diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index ea2c904..b7e069b 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -338,6 +338,10 @@ retry: for (; m < end_m; m++) { if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); +#ifdef VM_MD_MINIDUMP + if (flags & M_NODUMP) + dump_exclude_page(VM_PAGE_TO_PHYS(m)); +#endif m->valid = VM_PAGE_BITS_ALL; } VM_OBJECT_UNLOCK(object); diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 2c23c48..fd738cf 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -85,6 +85,10 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef VM_MD_MINIDUMP +#include +#endif + vm_map_t kernel_map=0; vm_map_t kmem_map=0; vm_map_t exec_map=0; @@ -239,8 +243,15 @@ kmem_free(map, addr, size) vm_offset_t addr; vm_size_t size; { + vm_offset_t start = trunc_page(addr); + vm_offset_t end = round_page(addr + size); +#ifdef VM_MD_MINIDUMP + vm_offset_t temp = start; - (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); + for (; temp < end; temp += PAGE_SIZE) + dump_unexclude_page(pmap_kextract(temp)); +#endif + (void) vm_map_remove(map, start, end); } /* @@ -427,6 +438,10 @@ retry: } if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); +#ifdef VM_MD_MINIDUMP + if (flags & M_NODUMP) + dump_exclude_page(VM_PAGE_TO_PHYS(m)); +#endif m->valid = VM_PAGE_BITS_ALL; KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("kmem_malloc: page %p is managed", m)); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index ee4f2a4..645a90a 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -347,6 +347,10 @@ vm_page_startup(vm_offset_t vaddr) vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)vm_page_dump, vm_page_dump_size); + new_end -= vm_page_dump_size; + vm_page_dump_exclude = (void *)(uintptr_t)pmap_map(&vaddr, new_end, + new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); + bzero((void *)vm_page_dump, vm_page_dump_size); #endif #ifdef __amd64__ /*