--- //depot/yahoo/ybsd_6/src/lib/libkvm/kvm_amd64.c 2005/10/26 18:07:02 +++ //depot/peter/peter_minidump/lib/libkvm/kvm_amd64.c 2006/04/06 23:27:46 @@ -1,11 +1,6 @@ /*- - * Copyright (c) 1989, 1992, 1993 - * The Regents of the University of California. All rights reserved. + * Copyright (c) 2006 Peter Wemm * - * This code is derived from software developed by the Computer Systems - * Engineering group at Lawrence Berkeley Laboratory under DARPA contract - * BG 91-66 and contributed to Berkeley. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -14,18 +9,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -38,15 +26,8 @@ #include __FBSDID("$FreeBSD: src/lib/libkvm/kvm_amd64.c,v 1.18.2.1 2005/10/26 00:06:58 peter Exp $"); -#if defined(LIBC_SCCS) && !defined(lint) -#if 0 -static char sccsid[] = "@(#)kvm_hp300.c 8.1 (Berkeley) 6/4/93"; -#endif -#endif /* LIBC_SCCS and not lint */ - /* - * AMD64 machine dependent routines for kvm. Hopefully, the forthcoming - * vm code will one day obsolete this module. + * AMD64 machine dependent routines for kvm and minidumps. */ #include @@ -54,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -63,64 +45,87 @@ #include #include +#include #include #include "kvm_private.h" -#ifndef btop -#define btop(x) (amd64_btop(x)) -#define ptob(x) (amd64_ptob(x)) -#endif +/* XXX move to common md header */ +struct myhdr { + char magic[24]; /* "minidump FreeBSD/amd64" */ + uint32_t version; + uint32_t msgbufsize; + uint32_t bitmapsize; + uint32_t ptesize; + uint64_t kernbase; + uint64_t dmapbase; + uint64_t dmapend; +}; + +struct hpte { + struct hpte *next; + vm_paddr_t pa; + int64_t off; +}; + +#define HPT_SIZE 1024 struct vmstate { - void *mmapbase; - size_t mmapsize; - pml4_entry_t *PML4; + struct myhdr hdr; + void *hpt_head[HPT_SIZE]; + uint64_t *bitmap; + uint64_t *ptemap; }; -/* - * Map the ELF headers into the process' address space. We do this in two - * steps: first the ELF header itself and using that information the whole - * set of headers. (Taken from kvm_ia64.c) - */ -static int -_kvm_maphdrs(kvm_t *kd, size_t sz) +static void +hpt_insert(kvm_t *kd, vm_paddr_t pa, int64_t off) { - struct vmstate *vm = kd->vmst; + struct hpte *hpte; + uint32_t fnv = FNV1_32_INIT; + + fnv = fnv_32_buf(&pa, sizeof(pa), fnv); + fnv &= (HPT_SIZE - 1); + hpte = malloc(sizeof(*hpte)); + hpte->pa = pa; + hpte->off = off; + hpte->next = kd->vmst->hpt_head[fnv]; + kd->vmst->hpt_head[fnv] = hpte; +} - /* munmap() previous mmap(). */ - if (vm->mmapbase != NULL) { - munmap(vm->mmapbase, vm->mmapsize); - vm->mmapbase = NULL; - } +static int64_t +hpt_find(kvm_t *kd, vm_paddr_t pa) +{ + struct hpte *hpte; + uint32_t fnv = FNV1_32_INIT; - vm->mmapsize = sz; - vm->mmapbase = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, kd->pmfd, 0); - if (vm->mmapbase == MAP_FAILED) { - _kvm_err(kd, kd->program, "cannot mmap corefile"); - return (-1); + fnv = fnv_32_buf(&pa, sizeof(pa), fnv); + fnv &= (HPT_SIZE - 1); + for (hpte = kd->vmst->hpt_head[fnv]; hpte != NULL; hpte = hpte->next) { + if (pa == hpte->pa) + return (hpte->off); } - return (0); + return (-1); } -/* - * Translate a physical memory address to a file-offset in the crash-dump. - * (Taken from kvm_ia64.c) - */ -static size_t -_kvm_pa2off(kvm_t *kd, uint64_t pa, off_t *ofs) +static int +inithash(kvm_t *kd, uint64_t *base, int len, off_t off) { - Elf_Ehdr *e = kd->vmst->mmapbase; - Elf_Phdr *p = (Elf_Phdr*)((char*)e + e->e_phoff); - int n = e->e_phnum; + uint64_t idx; + uint64_t bit, bits; + vm_paddr_t pa; - while (n && (pa < p->p_paddr || pa >= p->p_paddr + p->p_memsz)) - p++, n--; - if (n == 0) - return (0); - *ofs = (pa - p->p_paddr) + p->p_offset; - return (PAGE_SIZE - ((size_t)pa & PAGE_MASK)); + for (idx = 0; idx < len / sizeof(*base); idx++) { + bits = base[idx]; + while (bits) { + bit = bsfq(bits); + bits &= ~(1ul << bit); + pa = (idx * sizeof(*base) * NBBY + bit) * PAGE_SIZE; + hpt_insert(kd, pa, off); + off += PAGE_SIZE; + } + } + return (off); } void @@ -128,10 +133,10 @@ { struct vmstate *vm = kd->vmst; - if (vm->mmapbase != NULL) - munmap(vm->mmapbase, vm->mmapsize); - if (vm->PML4) - free(vm->PML4); + if (vm->bitmap) + free(vm->bitmap); + if (vm->ptemap) + free(vm->ptemap); free(vm); kd->vmst = NULL; } @@ -139,55 +144,53 @@ int _kvm_initvtop(kvm_t *kd) { - struct nlist nlist[2]; u_long pa; - u_long kernbase; - pml4_entry_t *PML4; - Elf_Ehdr *ehdr; - size_t hdrsz; + struct vmstate *vmst; + off_t off; - kd->vmst = (struct vmstate *)_kvm_malloc(kd, sizeof(*kd->vmst)); - if (kd->vmst == 0) { + vmst = _kvm_malloc(kd, sizeof(*vmst)); + if (vmst == 0) { _kvm_err(kd, kd->program, "cannot allocate vm"); return (-1); } - kd->vmst->PML4 = 0; - - if (_kvm_maphdrs(kd, sizeof(Elf_Ehdr)) == -1) + kd->vmst = vmst; + bzero(vmst, sizeof(*vmst)); + if (pread(kd->pmfd, &vmst->hdr, sizeof(vmst->hdr), 0) != + sizeof(vmst->hdr)) { + _kvm_err(kd, kd->program, "cannot read dump header"); return (-1); + } - ehdr = kd->vmst->mmapbase; - hdrsz = ehdr->e_phoff + ehdr->e_phentsize * ehdr->e_phnum; - if (_kvm_maphdrs(kd, hdrsz) == -1) - return (-1); + /* Skip header and msgbuf */ + off = PAGE_SIZE + round_page(vmst->hdr.msgbufsize); - nlist[0].n_name = "kernbase"; - nlist[1].n_name = 0; - - if (kvm_nlist(kd, nlist) != 0) { - _kvm_err(kd, kd->program, "bad namelist - no kernbase"); + vmst->bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); + if (vmst->bitmap == NULL) { + _kvm_err(kd, kd->program, "cannot allocate %d bytes for bitmap", vmst->hdr.bitmapsize); return (-1); } - kernbase = nlist[0].n_value; - - nlist[0].n_name = "KPML4phys"; - nlist[1].n_name = 0; - - if (kvm_nlist(kd, nlist) != 0) { - _kvm_err(kd, kd->program, "bad namelist - no KPML4phys"); + if (pread(kd->pmfd, vmst->bitmap, vmst->hdr.bitmapsize, off) != + vmst->hdr.bitmapsize) { + _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", vmst->hdr.bitmapsize); return (-1); } - if (kvm_read(kd, (nlist[0].n_value - kernbase), &pa, sizeof(pa)) != - sizeof(pa)) { - _kvm_err(kd, kd->program, "cannot read KPML4phys"); + off += round_page(vmst->hdr.bitmapsize); + + vmst->ptemap = _kvm_malloc(kd, vmst->hdr.ptesize); + if (vmst->ptemap == NULL) { + _kvm_err(kd, kd->program, "cannot allocate %d bytes for ptemap", vmst->hdr.ptesize); return (-1); } - PML4 = _kvm_malloc(kd, PAGE_SIZE); - if (kvm_read(kd, pa, PML4, PAGE_SIZE) != PAGE_SIZE) { - _kvm_err(kd, kd->program, "cannot read KPML4phys"); + if (pread(kd->pmfd, vmst->ptemap, vmst->hdr.ptesize, off) != + vmst->hdr.ptesize) { + _kvm_err(kd, kd->program, "cannot read %d bytes for ptemap", vmst->hdr.ptesize); return (-1); } - kd->vmst->PML4 = PML4; + off += vmst->hdr.ptesize; + + /* build physical address hash table for sparse pages */ + inithash(kd, vmst->bitmap, vmst->hdr.bitmapsize, off); + return (0); } @@ -196,133 +199,43 @@ { struct vmstate *vm; u_long offset; - u_long pdpe_pa; - u_long pde_pa; - u_long pte_pa; - pml4_entry_t pml4e; - pdp_entry_t pdpe; - pd_entry_t pde; pt_entry_t pte; - u_long pml4eindex; - u_long pdpeindex; - u_long pdeindex; u_long pteindex; int i; u_long a; off_t ofs; - size_t s; vm = kd->vmst; offset = va & (PAGE_SIZE - 1); - /* - * If we are initializing (kernel page table descriptor pointer - * not yet set) then return pa == va to avoid infinite recursion. - */ - if (vm->PML4 == 0) { - s = _kvm_pa2off(kd, va, pa); - if (s == 0) { - _kvm_err(kd, kd->program, - "_kvm_vatop: bootstrap data not in dump"); + if (va >= vm->hdr.kernbase) { + pteindex = (va - vm->hdr.kernbase) >> PAGE_SHIFT; + pte = vm->ptemap[pteindex]; + if (((u_long)pte & PG_V) == 0) { + _kvm_err(kd, kd->program, "_kvm_vatop: pte not valid"); + goto invalid; + } + a = pte & PG_FRAME; + ofs = hpt_find(kd, a); + if (ofs == -1) { + _kvm_err(kd, kd->program, "_kvm_vatop: physical address 0x%lx not in minidump", a); goto invalid; - } else - return (PAGE_SIZE - offset); - } - - pml4eindex = (va >> PML4SHIFT) & (NPML4EPG - 1); - pml4e = vm->PML4[pml4eindex]; - if (((u_long)pml4e & PG_V) == 0) { - _kvm_err(kd, kd->program, "_kvm_vatop: pml4e not valid"); - goto invalid; - } - - pdpeindex = (va >> PDPSHIFT) & (NPDPEPG-1); - pdpe_pa = ((u_long)pml4e & PG_FRAME) + - (pdpeindex * sizeof(pdp_entry_t)); - - s = _kvm_pa2off(kd, pdpe_pa, &ofs); - if (s < sizeof pdpe) { - _kvm_err(kd, kd->program, "_kvm_vatop: pdpe_pa not found"); - goto invalid; - } - if (lseek(kd->pmfd, ofs, 0) == -1) { - _kvm_syserr(kd, kd->program, "_kvm_vatop: lseek pdpe_pa"); - goto invalid; - } - if (read(kd->pmfd, &pdpe, sizeof pdpe) != sizeof pdpe) { - _kvm_syserr(kd, kd->program, "_kvm_vatop: read pdpe"); - goto invalid; - } - if (((u_long)pdpe & PG_V) == 0) { - _kvm_err(kd, kd->program, "_kvm_vatop: pdpe not valid"); - goto invalid; - } - - pdeindex = (va >> PDRSHIFT) & (NPDEPG-1); - pde_pa = ((u_long)pdpe & PG_FRAME) + (pdeindex * sizeof(pd_entry_t)); - - s = _kvm_pa2off(kd, pde_pa, &ofs); - if (s < sizeof pde) { - _kvm_syserr(kd, kd->program, "_kvm_vatop: pde_pa not found"); - goto invalid; - } - if (lseek(kd->pmfd, ofs, 0) == -1) { - _kvm_err(kd, kd->program, "_kvm_vatop: lseek pde_pa"); - goto invalid; - } - if (read(kd->pmfd, &pde, sizeof pde) != sizeof pde) { - _kvm_syserr(kd, kd->program, "_kvm_vatop: read pde"); - goto invalid; - } - if (((u_long)pde & PG_V) == 0) { - _kvm_err(kd, kd->program, "_kvm_vatop: pde not valid"); - goto invalid; - } - - if ((u_long)pde & PG_PS) { - /* - * No final-level page table; ptd describes one 2MB page. - */ -#define PAGE2M_MASK (NBPDR - 1) -#define PG_FRAME2M (~PAGE2M_MASK) - a = ((u_long)pde & PG_FRAME2M) + (va & PAGE2M_MASK); - s = _kvm_pa2off(kd, a, pa); - if (s == 0) { - _kvm_err(kd, kd->program, - "_kvm_vatop: 2MB page address not in dump"); + } + *pa = ofs + offset; + return (PAGE_SIZE - offset); + } else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) { + a = (va - vm->hdr.dmapbase) & ~PAGE_MASK; + ofs = hpt_find(kd, a); + if (ofs == -1) { + _kvm_err(kd, kd->program, "_kvm_vatop: direct map address 0x%lx not in minidump", va); goto invalid; - } else - return (NBPDR - (va & PAGE2M_MASK)); - } - - pteindex = (va >> PAGE_SHIFT) & (NPTEPG-1); - pte_pa = ((u_long)pde & PG_FRAME) + (pteindex * sizeof(pt_entry_t)); - - s = _kvm_pa2off(kd, pte_pa, &ofs); - if (s < sizeof pte) { - _kvm_err(kd, kd->program, "_kvm_vatop: pte_pa not found"); - goto invalid; - } - if (lseek(kd->pmfd, ofs, 0) == -1) { - _kvm_syserr(kd, kd->program, "_kvm_vatop: lseek"); + } + *pa = ofs + offset; + return (PAGE_SIZE - offset); + } else { + _kvm_err(kd, kd->program, "_kvm_vatop: virtual address 0x%lx not minidumped", va); goto invalid; } - if (read(kd->pmfd, &pte, sizeof pte) != sizeof pte) { - _kvm_syserr(kd, kd->program, "_kvm_vatop: read"); - goto invalid; - } - if (((u_long)pte & PG_V) == 0) { - _kvm_err(kd, kd->program, "_kvm_vatop: pte not valid"); - goto invalid; - } - - a = ((u_long)pte & PG_FRAME) + offset; - s = _kvm_pa2off(kd, a, pa); - if (s == 0) { - _kvm_err(kd, kd->program, "_kvm_vatop: address not in dump"); - goto invalid; - } else - return (PAGE_SIZE - offset); invalid: _kvm_err(kd, 0, "invalid address (0x%lx)", va); --- //depot/yahoo/ybsd_6/src/sys/amd64/amd64/dump_machdep.c 2005/07/02 15:49:03 +++ //depot/peter/peter_minidump/sys/amd64/amd64/dump_machdep.c 2006/04/11 20:05:31 @@ -33,10 +33,13 @@ #include #include #include +#include #include #include +#include #include #include +#include CTASSERT(sizeof(struct kerneldumpheader) == 512); @@ -49,53 +52,42 @@ #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) #define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) -struct md_pa { - vm_paddr_t md_start; - vm_paddr_t md_size; -}; +extern uint64_t KPDPphys; -typedef int callback_t(struct md_pa *, int, void *); +uint64_t *vm_page_dump; +int vm_page_dump_size; static struct kerneldumpheader kdh; -static off_t dumplo, fileofs; +static off_t dumplo; -/* Handle buffered writes. */ -static char buffer[DEV_BSIZE]; +/* Handle chunked writes. */ static size_t fragsz; +static void *dump_va; +static size_t counter, progress; -/* 20 phys_avail entry pairs correspond to 10 md_pa's */ -static struct md_pa dump_map[10]; +CTASSERT(sizeof(*vm_page_dump) == 8); + +struct myhdr { + char magic[24]; /* "minidump FreeBSD/amd64" */ + uint32_t version; + uint32_t msgbufsize; + uint32_t bitmapsize; + uint32_t ptesize; + uint64_t kernbase; + uint64_t dmapbase; + uint64_t dmapend; +}; -static void -md_pa_init(void) +static int +is_dumpable(vm_paddr_t pa) { - int n, idx; + int i; - bzero(dump_map, sizeof(dump_map)); - for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) { - idx = n * 2; - if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0) - break; - dump_map[n].md_start = dump_avail[idx]; - dump_map[n].md_size = dump_avail[idx + 1] - dump_avail[idx]; + for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { + if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) + return (1); } -} - -static struct md_pa * -md_pa_first(void) -{ - - return (&dump_map[0]); -} - -static struct md_pa * -md_pa_next(struct md_pa *mdp) -{ - - mdp++; - if (mdp->md_size == 0) - mdp = NULL; - return (mdp); + return (0); } /* XXX should be MI */ @@ -119,88 +111,76 @@ kdh->parity = kerneldump_parity(kdh); } -static int -buf_write(struct dumperinfo *di, char *ptr, size_t sz) -{ - size_t len; - int error; +#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) - while (sz) { - len = DEV_BSIZE - fragsz; - if (len > sz) - len = sz; - bcopy(ptr, buffer + fragsz, len); - fragsz += len; - ptr += len; - sz -= len; - if (fragsz == DEV_BSIZE) { - error = di->dumper(di->priv, buffer, 0, dumplo, - DEV_BSIZE); - if (error) - return error; - dumplo += DEV_BSIZE; - fragsz = 0; - } - } - - return (0); -} - static int -buf_flush(struct dumperinfo *di) +blk_flush(struct dumperinfo *di) { int error; if (fragsz == 0) return (0); - error = di->dumper(di->priv, buffer, 0, dumplo, DEV_BSIZE); - dumplo += DEV_BSIZE; + error = di->dumper(di->priv, dump_va, 0, dumplo, fragsz); + dumplo += fragsz; fragsz = 0; return (error); } -#define PG2MB(pgs) ((pgs + (1 << 8) - 1) >> 8) - static int -cb_dumpdata(struct md_pa *mdp, int seqnr, void *arg) +blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) { - struct dumperinfo *di = (struct dumperinfo*)arg; - vm_paddr_t a, pa; - void *va; - uint64_t pgs; - size_t counter, sz, chunk; - int i, c, error, twiddle; + size_t len; + int error, i, c; - error = 0; /* catch case in which chunk size is 0 */ - counter = 0; /* Update twiddle every 16MB */ - twiddle = 0; - va = 0; - pgs = mdp->md_size / PAGE_SIZE; - pa = mdp->md_start; - - printf(" chunk %d: %ldMB (%ld pages)", seqnr, PG2MB(pgs), pgs); - - while (pgs) { - chunk = pgs; - if (chunk > MAXDUMPPGS) - chunk = MAXDUMPPGS; - sz = chunk << PAGE_SHIFT; - counter += sz; + error = 0; + if ((sz % PAGE_SIZE) != 0) { + printf("size not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL && pa != 0) { + printf("cant have both va and pa!\n"); + return (EINVAL); + } + if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { + printf("address not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL) { + /* If we're doing a virtual dump, flush any pre-existing pa pages */ + error = blk_flush(di); + if (error) + return (error); + } + while (sz) { + len = (MAXDUMPPGS * PAGE_SIZE) - fragsz; + if (len > sz) + len = sz; + counter += len; + progress += len; if (counter >> 24) { - printf(" %ld", PG2MB(pgs)); + printf(" %ld", PG2MB(progress >> PAGE_SHIFT)); counter &= (1<<24) - 1; } - for (i = 0; i < chunk; i++) { - a = pa + i * PAGE_SIZE; - va = pmap_kenter_temporary(trunc_page(a), i); + if (ptr) { + error = di->dumper(di->priv, ptr, 0, dumplo, len); + if (error) + return (error); + dumplo += len; + ptr += len; + sz -= len; + } else { + for (i = 0; i < len; i += PAGE_SIZE) + dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); + fragsz += len; + pa += len; + sz -= len; + if (fragsz == (MAXDUMPPGS * PAGE_SIZE)) { + error = blk_flush(di); + if (error) + return (error); + } } - error = di->dumper(di->priv, va, 0, dumplo, sz); - if (error) - break; - dumplo += sz; - pgs -= chunk; - pa += sz; /* Check for user abort. */ c = cncheckc(); @@ -209,99 +189,81 @@ if (c != -1) printf(" (CTRL-C to abort) "); } - printf(" ... %s\n", (error) ? "fail" : "ok"); - return (error); -} -static int -cb_dumphdr(struct md_pa *mdp, int seqnr, void *arg) -{ - struct dumperinfo *di = (struct dumperinfo*)arg; - Elf_Phdr phdr; - uint64_t size; - int error; - - size = mdp->md_size; - bzero(&phdr, sizeof(phdr)); - phdr.p_type = PT_LOAD; - phdr.p_flags = PF_R; /* XXX */ - phdr.p_offset = fileofs; - phdr.p_vaddr = mdp->md_start; - phdr.p_paddr = mdp->md_start; - phdr.p_filesz = size; - phdr.p_memsz = size; - phdr.p_align = PAGE_SIZE; - - error = buf_write(di, (char*)&phdr, sizeof(phdr)); - fileofs += phdr.p_filesz; - return (error); -} - -static int -cb_size(struct md_pa *mdp, int seqnr, void *arg) -{ - uint64_t *sz = (uint64_t*)arg; - - *sz += (uint64_t)mdp->md_size; return (0); } -static int -foreach_chunk(callback_t cb, void *arg) -{ - struct md_pa *mdp; - int error, seqnr; +/* A fake page table page, to avoid having to handle both 4K and 2M pages */ +static uint64_t fakept[NPTEPG]; - seqnr = 0; - mdp = md_pa_first(); - while (mdp != NULL) { - error = (*cb)(mdp, seqnr++, arg); - if (error) - return (-error); - mdp = md_pa_next(mdp); - } - return (seqnr); -} - void dumpsys(struct dumperinfo *di) { - Elf_Ehdr ehdr; - uint64_t dumpsize; - off_t hdrgap; - size_t hdrsz; + uint64_t dumpsize, ptesize; + vm_offset_t va; int error; + uint64_t bits; + uint64_t *pdp, *pd, *pt, pa; + int i, j, k, bit; + struct myhdr myhdr; - bzero(&ehdr, sizeof(ehdr)); - ehdr.e_ident[EI_MAG0] = ELFMAG0; - ehdr.e_ident[EI_MAG1] = ELFMAG1; - ehdr.e_ident[EI_MAG2] = ELFMAG2; - ehdr.e_ident[EI_MAG3] = ELFMAG3; - ehdr.e_ident[EI_CLASS] = ELF_CLASS; -#if BYTE_ORDER == LITTLE_ENDIAN - ehdr.e_ident[EI_DATA] = ELFDATA2LSB; -#else - ehdr.e_ident[EI_DATA] = ELFDATA2MSB; -#endif - ehdr.e_ident[EI_VERSION] = EV_CURRENT; - ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */ - ehdr.e_type = ET_CORE; - ehdr.e_machine = EM_X86_64; - ehdr.e_phoff = sizeof(ehdr); - ehdr.e_flags = 0; - ehdr.e_ehsize = sizeof(ehdr); - ehdr.e_phentsize = sizeof(Elf_Phdr); - ehdr.e_shentsize = sizeof(Elf_Shdr); - - md_pa_init(); + counter = 0; + progress = 0; + /* Walk page table pages, set bits in vm_page_dump */ + ptesize = 0; + pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); + for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { + i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); + /* + * We always write a page, even if it is zero. Each + * page written corresponds to 2MB of space + */ + ptesize += PAGE_SIZE; + if ((pdp[i] & PG_V) == 0) + continue; + pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); + j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is an entire 2M page. */ + pa = pd[j] & PG_FRAME & ~PDRMASK; + for (k = 0; k < NPTEPG; k++) { + dump_add_page(pa + k * PAGE_SIZE); + } + continue; + } + if ((pd[j] & PG_V) == PG_V) { + /* set bit for each valid page in this 2MB block */ + pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); + for (k = 0; k < NPTEPG; k++) { + if ((pt[k] & PG_V) == PG_V) { + pa = pt[k] & PG_FRAME; + dump_add_page(pa); + } + } + } else { + /* nothing, we're going to dump a null page */ + } + } /* Calculate dump size. */ - dumpsize = 0L; - ehdr.e_phnum = foreach_chunk(cb_size, &dumpsize); - hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize; - fileofs = MD_ALIGN(hdrsz); - dumpsize += fileofs; - hdrgap = fileofs - DEV_ALIGN(hdrsz); + dumpsize = ptesize; + dumpsize += round_page(msgbufp->msg_size); + dumpsize += round_page(vm_page_dump_size); + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfq(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + /* Clear out undumpable pages now if needed */ + if (is_dumpable(pa)) { + dumpsize += PAGE_SIZE; + } else { + dump_drop_page(pa); + } + bits &= ~(1ul << bit); + } + } + dumpsize += PAGE_SIZE; /* Determine dump offset on device. */ if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { @@ -311,10 +273,20 @@ dumplo = di->mediaoffset + di->mediasize - dumpsize; dumplo -= sizeof(kdh) * 2; + /* Initialize myhdr */ + bzero(&myhdr, sizeof(myhdr)); + strcpy(myhdr.magic, "minidump FreeBSD/amd64"); + myhdr.version = 1; + myhdr.msgbufsize = msgbufp->msg_size; + myhdr.bitmapsize = vm_page_dump_size; + myhdr.ptesize = ptesize; + myhdr.kernbase = KERNBASE; + myhdr.dmapbase = DMAP_MIN_ADDRESS; + myhdr.dmapend = DMAP_MAX_ADDRESS; + mkdumpheader(&kdh, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); - printf("Dumping %llu MB (%d chunks)\n", (long long)dumpsize >> 20, - ehdr.e_phnum); + printf("Dumping %llu MB:", (long long)dumpsize >> 20); /* Dump leader */ error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh)); @@ -322,35 +294,96 @@ goto fail; dumplo += sizeof(kdh); - /* Dump ELF header */ - error = buf_write(di, (char*)&ehdr, sizeof(ehdr)); + /* Dump my header */ + bzero(&fakept, sizeof(fakept)); + bcopy(&myhdr, &fakept, sizeof(myhdr)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + + /* Dump msgbuf up front */ + error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); if (error) goto fail; - /* Dump program headers */ - error = foreach_chunk(cb_dumphdr, di); - if (error < 0) + /* Dump bitmap */ + error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); + if (error) goto fail; - buf_flush(di); + + /* Dump kernel page table pages */ + pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys); + for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { + i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); + /* We always write a page, even if it is zero */ + if ((pdp[i] & PG_V) == 0) { + bzero(fakept, sizeof(fakept)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + continue; + } + pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); + j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is a single 2M block. Generate a fake PTP */ + pa = pd[j] & PG_FRAME & ~PDRMASK; + for (k = 0; k < NPTEPG; k++) { + fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; + } + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + continue; + } + if ((pd[j] & PG_V) == PG_V) { + pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); + error = blk_write(di, (char *)pt, 0, PAGE_SIZE); + if (error) + goto fail; + } else { + bzero(fakept, sizeof(fakept)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + } + } - /* - * All headers are written using blocked I/O, so we know the - * current offset is (still) block aligned. Skip the alignement - * in the file to have the segment contents aligned at page - * boundary. We cannot use MD_ALIGN on dumplo, because we don't - * care and may very well be unaligned within the dump device. - */ - dumplo += hdrgap; + /* Dump memory chunks */ + /* XXX cluster it up and use blk_dump() */ + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfq(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + bits &= ~(1ul << bit); + } + } - /* Dump memory chunks (updates dumplo) */ - error = foreach_chunk(cb_dumpdata, di); - if (error < 0) + error = blk_flush(di); + if (error) goto fail; /* Dump trailer */ error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; + dumplo += sizeof(kdh); /* Signal completion, signoff and exit stage left. */ di->dumper(di->priv, NULL, 0, 0, 0); @@ -368,3 +401,25 @@ else printf("\n** DUMP FAILED (ERROR %d) **\n", error); } + +void +dump_add_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_set_long(&vm_page_dump[idx], 1ul << bit); +} + +void +dump_drop_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_clear_long(&vm_page_dump[idx], 1ul << bit); +} --- //depot/yahoo/ybsd_6/src/sys/amd64/amd64/pmap.c 2006/03/08 15:46:28 +++ //depot/peter/peter_minidump/sys/amd64/amd64/pmap.c 2006/03/29 16:30:38 @@ -174,7 +174,7 @@ static u_int64_t KPTphys; /* phys addr of kernel level 1 */ static u_int64_t KPDphys; /* phys addr of kernel level 2 */ -static u_int64_t KPDPphys; /* phys addr of kernel level 3 */ +u_int64_t KPDPphys; /* phys addr of kernel level 3 */ u_int64_t KPML4phys; /* phys addr of kernel level 4 */ static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ --- //depot/yahoo/ybsd_6/src/sys/amd64/amd64/uma_machdep.c 2004/12/23 17:57:00 +++ //depot/peter/peter_minidump/sys/amd64/amd64/uma_machdep.c 2006/04/04 15:04:58 @@ -44,6 +44,7 @@ { static vm_pindex_t colour; vm_page_t m; + vm_paddr_t pa; void *va; int pflags; @@ -64,7 +65,9 @@ } else break; } - va = (void *)PHYS_TO_DMAP(m->phys_addr); + pa = m->phys_addr; + dump_add_page(pa); + va = (void *)PHYS_TO_DMAP(pa); if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) pagezero(va); return (va); @@ -74,8 +77,11 @@ uma_small_free(void *mem, int size, u_int8_t flags) { vm_page_t m; + vm_paddr_t pa; - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)mem)); + pa = DMAP_TO_PHYS((vm_offset_t)mem); + dump_drop_page(pa); + m = PHYS_TO_VM_PAGE(pa); vm_page_lock_queues(); vm_page_free(m); vm_page_unlock_queues(); --- //depot/yahoo/ybsd_6/src/sys/amd64/include/md_var.h 2004/12/23 17:57:00 +++ //depot/peter/peter_minidump/sys/amd64/include/md_var.h 2006/04/04 15:04:58 @@ -51,6 +51,8 @@ extern char kstack[]; extern char sigcode[]; extern int szsigcode; +extern uint64_t *vm_page_dump; +extern int vm_page_dump_size; extern struct pcpu __pcpu[]; @@ -64,6 +66,8 @@ void cpu_setregs(void); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); +void dump_add_page(vm_paddr_t); +void dump_drop_page(vm_paddr_t); void initializecpu(void); void fillw(int /*u_short*/ pat, void *base, size_t cnt); void fpstate_drop(struct thread *td); --- //depot/yahoo/ybsd_6/src/sys/vm/vm_page.c 2006/03/07 16:12:08 +++ //depot/peter/peter_minidump/sys/vm/vm_page.c 2006/04/05 12:23:11 @@ -122,6 +122,8 @@ #include #include +#include + /* * Associated with page of user-allocatable memory is a * page structure. @@ -233,6 +235,18 @@ bzero((void *)mapped, end - new_end); uma_startup((void *)mapped, boot_pages); +#ifdef __amd64__ + /* + * Allocate a bitmap to indicate that a random physical page + * needs to be included in a minidump. + */ + page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; + vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); + new_end -= vm_page_dump_size; + vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, + new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); + bzero((void *)vm_page_dump, vm_page_dump_size); +#endif /* * Compute the number of pages of memory that will be available for * use (taking into account the overhead of a page structure per