--- //depot/vendor/freebsd/src/sys/conf/options.i386 2003/03/04 12:25:23 +++ //depot/user/jake/pae/src/sys/conf/options.i386 2003/03/08 18:20:53 @@ -34,6 +34,9 @@ # Change KVM size. Changes things all over the kernel. KVA_PAGES opt_global.h +# Physical address extensions and support for >4G ram. As above. +PAE opt_global.h + CLK_CALIBRATION_LOOP opt_clock.h CLK_USE_I8254_CALIBRATION opt_clock.h CLK_USE_TSC_CALIBRATION opt_clock.h --- //depot/vendor/freebsd/src/sys/dev/aic7xxx/aic7xxx.c 2003/01/20 12:45:18 +++ //depot/user/jake/pae/src/sys/dev/aic7xxx/aic7xxx.c 2003/03/24 18:42:36 @@ -4600,7 +4600,9 @@ /* DMA tag for mapping buffers into device visible space. */ if (ahc_dma_tag_create(ahc, ahc->parent_dmat, /*alignment*/1, /*boundary*/BUS_SPACE_MAXADDR_32BIT + 1, - /*lowaddr*/BUS_SPACE_MAXADDR, + /*lowaddr*/ahc->flags & AHC_39BIT_ADDRESSING + ? (bus_addr_t)0x7FFFFFFFFFULL + : BUS_SPACE_MAXADDR_32BIT, /*highaddr*/BUS_SPACE_MAXADDR, /*filter*/NULL, /*filterarg*/NULL, /*maxsize*/(AHC_NSEG - 1) * PAGE_SIZE, --- //depot/vendor/freebsd/src/sys/dev/aic7xxx/aic7xxx_osm.c 2002/11/30 11:10:16 +++ //depot/user/jake/pae/src/sys/dev/aic7xxx/aic7xxx_osm.c 2003/03/24 18:42:36 @@ -1386,7 +1386,8 @@ panic("ahc_setup_data - Transfer size " "larger than can device max"); - seg.ds_addr = (bus_addr_t)csio->data_ptr; + seg.ds_addr = + (bus_addr_t)(vm_offset_t)csio->data_ptr; seg.ds_len = csio->dxfer_len; ahc_execute_scb(scb, &seg, 1, 0); } --- //depot/vendor/freebsd/src/sys/dev/em/if_em.c 2003/03/21 13:50:33 +++ //depot/user/jake/pae/src/sys/dev/em/if_em.c 2003/03/24 18:42:36 @@ -328,7 +328,8 @@ /* Allocate Transmit Descriptor ring */ if (!(adapter->tx_desc_base = (struct em_tx_desc *) - contigmalloc(tsize, M_DEVBUF, M_NOWAIT, 0, ~0, PAGE_SIZE, 0))) { + contigmalloc(tsize, M_DEVBUF, M_NOWAIT, 0, 0xffffffff, + PAGE_SIZE, 0))) { printf("em%d: Unable to allocate TxDescriptor memory\n", adapter->unit); em_free_pci_resources(adapter); @@ -341,7 +342,8 @@ /* Allocate Receive Descriptor ring */ if (!(adapter->rx_desc_base = (struct em_rx_desc *) - contigmalloc(rsize, M_DEVBUF, M_NOWAIT, 0, ~0, PAGE_SIZE, 0))) { + contigmalloc(rsize, M_DEVBUF, M_NOWAIT, 0, 0xffffffff, + PAGE_SIZE, 0))) { printf("em%d: Unable to allocate rx_desc memory\n", adapter->unit); em_free_pci_resources(adapter); --- //depot/vendor/freebsd/src/sys/i386/i386/bios.c 2003/02/18 21:53:12 +++ //depot/user/jake/pae/src/sys/i386/i386/bios.c 2003/03/08 18:20:53 @@ -384,12 +384,16 @@ args->seg.code32.limit = 0xffff; ptd = (pd_entry_t *)rcr3(); - if (ptd == (u_int *)IdlePTD) { +#ifdef PAE + if (ptd == IdlePDPT) { +#else + if (ptd == IdlePTD) { +#endif /* * no page table, so create one and install it. */ pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = (pd_entry_t *)((u_int)ptd + KERNBASE); + ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE); *ptd = vtophys(pte) | PG_RW | PG_V; } else { /* --- //depot/vendor/freebsd/src/sys/i386/i386/db_interface.c 2003/03/24 02:20:31 +++ //depot/user/jake/pae/src/sys/i386/i386/db_interface.c 2003/03/24 16:56:29 @@ -262,7 +262,7 @@ if (addr > trunc_page((vm_offset_t)btext) - size && addr < round_page((vm_offset_t)etext)) { - ptep0 = pmap_pte(kernel_pmap, addr); + ptep0 = vtopte(addr); oldmap0 = *ptep0; *ptep0 |= PG_RW; @@ -270,14 +270,14 @@ if ((*ptep0 & PG_PS) == 0) { addr1 = trunc_page(addr + size - 1); if (trunc_page(addr) != addr1) { - ptep1 = pmap_pte(kernel_pmap, addr1); + ptep1 = vtopte(addr1); oldmap1 = *ptep1; *ptep1 |= PG_RW; } } else { addr1 = trunc_4mpage(addr + size - 1); if (trunc_4mpage(addr) != addr1) { - ptep1 = pmap_pte(kernel_pmap, addr1); + ptep1 = vtopte(addr1); oldmap1 = *ptep1; *ptep1 |= PG_RW; } --- //depot/vendor/freebsd/src/sys/i386/i386/locore.s 2003/02/23 14:15:16 +++ //depot/user/jake/pae/src/sys/i386/i386/locore.s 2003/03/08 18:20:53 @@ -138,6 +138,11 @@ .globl IdlePTD IdlePTD: .long 0 /* phys addr of kernel PTD */ +#ifdef PAE + .globl IdlePDPT +IdlePDPT: .long 0 /* phys addr of kernel PDPT */ +#endif + #ifdef SMP .globl KPTphys #endif @@ -323,8 +328,16 @@ 1: /* Now enable paging */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else movl R(IdlePTD), %eax movl %eax,%cr3 /* load ptd addr into mmu */ +#endif movl %cr0,%eax /* get control word */ orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* and let's page NOW! */ @@ -341,7 +354,11 @@ xorl %ebp,%ebp /* mark end of frames */ +#ifdef PAE + movl IdlePDPT,%esi +#else movl IdlePTD,%esi +#endif movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) pushl physfree /* value of first for init386(first) */ @@ -749,6 +766,11 @@ movl %esi,R(KPTphys) /* Allocate Page Table Directory */ +#ifdef PAE + /* XXX only need 32 bytes (easier for now) */ + ALLOCPAGES(1) + movl %esi,R(IdlePDPT) +#endif ALLOCPAGES(NPGPTD) movl %esi,R(IdlePTD) @@ -804,6 +826,12 @@ fillkptphys(%edx) /* Map page directory. */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl $1, %ecx + fillkptphys($PG_RW) +#endif + movl R(IdlePTD), %eax movl $NPGPTD, %ecx fillkptphys($PG_RW) @@ -889,4 +917,11 @@ movl $NPGPTD,%ecx fillkpt(R(IdlePTD), $PG_RW) +#ifdef PAE + movl R(IdlePTD), %eax + xorl %ebx, %ebx + movl $NPGPTD, %ecx + fillkpt(R(IdlePDPT), $0x0) +#endif + ret --- //depot/vendor/freebsd/src/sys/i386/i386/machdep.c 2003/03/24 16:10:49 +++ //depot/user/jake/pae/src/sys/i386/i386/machdep.c 2003/03/24 18:46:39 @@ -1573,11 +1573,13 @@ if (smap->length == 0) goto next_run; +#ifndef PAE if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); goto next_run; } +#endif for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { @@ -2066,7 +2068,11 @@ dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); +#ifdef PAE + dblfault_tss.tss_cr3 = (int)IdlePDPT; +#else dblfault_tss.tss_cr3 = (int)IdlePTD; +#endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = @@ -2110,7 +2116,11 @@ /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ +#ifdef PAE + thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; +#else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; +#endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } --- //depot/vendor/freebsd/src/sys/i386/i386/mpboot.s 2001/07/17 00:20:42 +++ //depot/user/jake/pae/src/sys/i386/i386/mpboot.s 2003/03/08 18:20:53 @@ -40,6 +40,8 @@ #include "assym.s" +#define R(x) ((x)-KERNBASE) + /* * this code MUST be enabled here and in mp_machdep.c * it follows the very early stages of AP boot by placing values in CMOS ram. @@ -74,8 +76,16 @@ NON_GPROF_ENTRY(MPentry) CHECKPOINT(0x36, 3) /* Now enable paging mode */ - movl IdlePTD-KERNBASE, %eax +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else + movl R(IdlePTD), %eax movl %eax,%cr3 +#endif movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* let the games begin! */ --- //depot/vendor/freebsd/src/sys/i386/i386/pmap.c 2003/03/24 16:10:49 +++ //depot/user/jake/pae/src/sys/i386/i386/pmap.c 2003/03/24 16:56:29 @@ -41,6 +41,37 @@ * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.398 2003/03/25 00:07:02 jake Exp $ */ +/*- + * Copyright (c) 2003 Networks Associates Technology, Inc. + * All rights reserved. + * + * This software was developed for the FreeBSD Project by Jake Burkholder, + * Safeport Network Services, and Network Associates Laboratories, the + * Security Research Division of Network Associates, Inc. under + * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA + * CHATS research program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ /* * Manages physical address maps. @@ -87,6 +118,7 @@ #ifdef SMP #include #endif +#include #include #include @@ -166,6 +198,10 @@ vm_offset_t kernel_vm_end; extern u_int32_t KERNend; +#ifdef PAE +static uma_zone_t pdptzone; +#endif + /* * Data for the pv entry allocation mechanism */ @@ -198,7 +234,6 @@ static pt_entry_t *PADDR1 = 0; static PMAP_INLINE void free_pv_entry(pv_entry_t pv); -static pt_entry_t *get_ptbase(pmap_t pmap); static pv_entry_t get_pv_entry(void); static void i386_protection_init(void); static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); @@ -219,7 +254,10 @@ static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); -static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#ifdef PAE +static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#endif static pd_entry_t pdir4mb; @@ -227,31 +265,6 @@ CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); /* - * Routine: pmap_pte - * Function: - * Extract the page table entry associated - * with the given map/virtual_address pair. - */ - -PMAP_INLINE pt_entry_t * -pmap_pte(pmap, va) - register pmap_t pmap; - vm_offset_t va; -{ - pd_entry_t *pdeaddr; - - if (pmap) { - pdeaddr = pmap_pde(pmap, va); - if (*pdeaddr & PG_PS) - return pdeaddr; - if (*pdeaddr) { - return get_ptbase(pmap) + i386_btop(va); - } - } - return (0); -} - -/* * Move the kernel virtual free pointer to the next * 4MB. This is used to help improve performance * by using a large (4MB) page for much of the kernel @@ -319,6 +332,9 @@ * Initialize the kernel pmap (which is statically allocated). */ kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); +#ifdef PAE + kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); +#endif kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvlist); LIST_INIT(&allpmaps); @@ -336,7 +352,7 @@ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; - pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); + pte = vtopte(va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. @@ -500,12 +516,21 @@ } static void * -pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { *flags = UMA_SLAB_PRIV; return (void *)kmem_alloc(kernel_map, bytes); } +#ifdef PAE +static void * +pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0)); +} +#endif + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -541,9 +566,15 @@ initial_pvs = MINPV; pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); - uma_zone_set_allocf(pvzone, pmap_allocf); + uma_zone_set_allocf(pvzone, pmap_pv_allocf); uma_prealloc(pvzone, initial_pvs); +#ifdef PAE + pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, + NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0); + uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); +#endif + /* * Now it is safe to enable pv_table recording. */ @@ -765,46 +796,13 @@ (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)); } -/* - * Are we alternate address space? - */ -static __inline int -pmap_is_alternate(pmap_t pmap) +#ifdef SMP +static void +pmap_pte_quick_switchin(void) { - return ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == - (APTDpde[0] & PG_FRAME)); + invlpg((vm_offset_t)PADDR1); } - -/* - * Map in a pmap's pagetables as alternate address space. - */ -static __inline void -pmap_set_alternate(pmap_t pmap) -{ - - if (!pmap_is_alternate(pmap)) { - APTDpde[0] = pmap->pm_pdir[PTDPTDI]; - pmap_invalidate_all(kernel_pmap); /* XXX Bandaid */ - } -} - -/* - * Return an address which is the base of the Virtual mapping of - * all the PTEs for the given pmap. Note this doesn't say that - * all the PTEs will be present or that the pages there are valid. - * The PTEs are made available by the recursive mapping trick. - * It will map in the alternate PTE space if needed. - */ -static pt_entry_t * -get_ptbase(pmap) - pmap_t pmap; -{ - - if (pmap_is_current(pmap)) - return PTmap; - pmap_set_alternate(pmap); - return APTmap; -} +#endif /* * Super fast pmap_pte routine best used when scanning @@ -813,13 +811,13 @@ * scans are across different pmaps. It is very wasteful * to do an entire invltlb for checking a single mapping. */ - static pt_entry_t * pmap_pte_quick(pmap, va) register pmap_t pmap; vm_offset_t va; { pd_entry_t pde, newpf; + pde = pmap->pm_pdir[va >> PDRSHIFT]; if (pde != 0) { unsigned index = i386_btop(va); @@ -829,7 +827,10 @@ newpf = pde & PG_FRAME; if (((*PMAP1) & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1); +#ifdef SMP + curthread->td_switchin = pmap_pte_quick_switchin; +#endif + invlpg((vm_offset_t)PADDR1); } return PADDR1 + (index & (NPTEPG - 1)); } @@ -848,20 +849,18 @@ vm_offset_t va; { vm_paddr_t rtval; - vm_offset_t pdirindex; + pt_entry_t *pte; + pd_entry_t pde; if (pmap == 0) return 0; - pdirindex = va >> PDRSHIFT; - rtval = pmap->pm_pdir[pdirindex]; - if (rtval != 0) { - pt_entry_t *pte; - if ((rtval & PG_PS) != 0) { - rtval &= ~(NBPDR - 1); - rtval |= va & (NBPDR - 1); + pde = pmap->pm_pdir[va >> PDRSHIFT]; + if (pde != 0) { + if ((pde & PG_PS) != 0) { + rtval = (pde & ~PDRMASK) | (va & PDRMASK); return rtval; } - pte = get_ptbase(pmap) + i386_btop(va); + pte = pmap_pte_quick(pmap, va); rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); return rtval; } @@ -1278,6 +1277,9 @@ { pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); +#ifdef PAE + pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); +#endif pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1302,9 +1304,18 @@ * No need to allocate page table space yet but we do need a valid * page directory table. */ - if (pmap->pm_pdir == NULL) + if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, NBPTD); +#ifdef PAE + pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); + KASSERT(((vm_offset_t)pmap->pm_pdpt & + ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, + ("pmap_pinit: pdpt misaligned")); + KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), + ("pmap_pinit: pdpt above 4g")); +#endif + } /* * allocate object for the ptes @@ -1347,6 +1358,9 @@ for (i = 0; i < NPGPTD; i++) { pa = VM_PAGE_TO_PHYS(ptdpg[i]); pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; +#ifdef PAE + pmap->pm_pdpt[i] = pa | PG_V; +#endif } pmap->pm_active = 0; @@ -1522,6 +1536,10 @@ vm_page_lock_queues(); for (i = 0; i < NPGPTD; i++) { m = TAILQ_FIRST(&object->memq); +#ifdef PAE + KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), + ("pmap_release: got wrong ptd page")); +#endif m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_busy(m); @@ -1717,7 +1735,12 @@ pt_entry_t oldpte; vm_page_t m; +#if 0 oldpte = atomic_readandclear_int(ptq); +#else + oldpte = *ptq; + *ptq = 0; +#endif if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* @@ -1756,24 +1779,12 @@ static void pmap_remove_page(pmap_t pmap, vm_offset_t va) { - register pt_entry_t *ptq; + pt_entry_t *pte; - /* - * if there is no pte for this address, just skip it!!! - */ - if (*pmap_pde(pmap, va) == 0) { + if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) return; - } - - /* - * get a local va for mappings for this pmap. - */ - ptq = get_ptbase(pmap) + i386_btop(va); - if (*ptq) { - (void) pmap_remove_pte(pmap, ptq, va); - pmap_invalidate_page(pmap, va); - } - return; + pmap_remove_pte(pmap, pte, va); + pmap_invalidate_page(pmap, va); } /* @@ -1785,10 +1796,9 @@ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - register pt_entry_t *ptbase; vm_offset_t pdnxt; pd_entry_t ptpaddr; - vm_offset_t sindex, eindex; + pt_entry_t *pte; int anyvalid; if (pmap == NULL) @@ -1810,33 +1820,18 @@ anyvalid = 0; - /* - * Get a local virtual address for the mappings that are being - * worked with. - */ - ptbase = get_ptbase(pmap); - - sindex = i386_btop(sva); - eindex = i386_btop(eva); - - for (; sindex < eindex; sindex = pdnxt) { + for (; sva < eva; sva = pdnxt) { unsigned pdirindex; /* * Calculate index for next page table. */ - pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); + pdnxt = (sva + NBPDR) & ~PDRMASK; if (pmap->pm_stats.resident_count == 0) break; - pdirindex = sindex / NPDEPG; + pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; - if ((ptpaddr & PG_PS) != 0) { - pmap->pm_pdir[pdirindex] = 0; - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anyvalid++; - continue; - } /* * Weed out invalid mappings. Note: we assume that the page @@ -1846,23 +1841,29 @@ continue; /* + * Check for large page. + */ + if ((ptpaddr & PG_PS) != 0) { + pmap->pm_pdir[pdirindex] = 0; + pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + anyvalid = 1; + continue; + } + + /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ - if (pdnxt > eindex) { - pdnxt = eindex; - } + if (pdnxt > eva) + pdnxt = eva; - for (; sindex != pdnxt; sindex++) { - vm_offset_t va; - if (ptbase[sindex] == 0) { + for (; sva != pdnxt; sva += PAGE_SIZE) { + if ((pte = pmap_pte_quick(pmap, sva)) == NULL || + *pte == 0) continue; - } - va = i386_ptob(sindex); - - anyvalid++; - if (pmap_remove_pte(pmap, ptbase + sindex, va)) + anyvalid = 1; + if (pmap_remove_pte(pmap, pte, sva)) break; } } @@ -1905,7 +1906,12 @@ while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pv->pv_pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); +#if 0 tpte = atomic_readandclear_int(pte); +#else + tpte = *pte; + *pte = 0; +#endif if (tpte & PG_W) pv->pv_pmap->pm_stats.wired_count--; if (tpte & PG_A) @@ -1943,10 +1949,8 @@ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { - register pt_entry_t *ptbase; vm_offset_t pdnxt; pd_entry_t ptpaddr; - vm_offset_t sindex, eindex; int anychanged; if (pmap == NULL) @@ -1962,25 +1966,13 @@ anychanged = 0; - ptbase = get_ptbase(pmap); - - sindex = i386_btop(sva); - eindex = i386_btop(eva); - - for (; sindex < eindex; sindex = pdnxt) { - + for (; sva < eva; sva = pdnxt) { unsigned pdirindex; - pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); + pdnxt = (sva + NBPDR) & ~PDRMASK; - pdirindex = sindex / NPDEPG; + pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; - if ((ptpaddr & PG_PS) != 0) { - pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anychanged++; - continue; - } /* * Weed out invalid mappings. Note: we assume that the page @@ -1989,17 +1981,27 @@ if (ptpaddr == 0) continue; - if (pdnxt > eindex) { - pdnxt = eindex; + /* + * Check for large page. + */ + if ((ptpaddr & PG_PS) != 0) { + pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); + pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + anychanged = 1; + continue; } - for (; sindex != pdnxt; sindex++) { + if (pdnxt > eva) + pdnxt = eva; + for (; sva != pdnxt; sva += PAGE_SIZE) { pt_entry_t pbits; + pt_entry_t *pte; vm_page_t m; - pbits = ptbase[sindex]; - + if ((pte = pmap_pte_quick(pmap, sva)) == NULL) + continue; + pbits = *pte; if (pbits & PG_MANAGED) { m = NULL; if (pbits & PG_A) { @@ -2007,20 +2009,19 @@ vm_page_flag_set(m, PG_REFERENCED); pbits &= ~PG_A; } - if (pbits & PG_M) { - if (pmap_track_modified(i386_ptob(sindex))) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(pbits); - vm_page_dirty(m); - pbits &= ~PG_M; - } + if ((pbits & PG_M) != 0 && + pmap_track_modified(sva)) { + if (m == NULL) + m = PHYS_TO_VM_PAGE(pbits); + vm_page_dirty(m); + pbits &= ~PG_M; } } pbits &= ~PG_RW; - if (pbits != ptbase[sindex]) { - ptbase[sindex] = pbits; + if (pbits != *pte) { + *pte = pbits; anychanged = 1; } } @@ -2081,7 +2082,7 @@ } #endif - pte = pmap_pte(pmap, va); + pte = pmap_pte_quick(pmap, va); /* * Page Directory table entry not valid, we need a new PT page @@ -2092,7 +2093,7 @@ } pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; - origpte = *(vm_offset_t *)pte; + origpte = *pte; opa = origpte & PG_FRAME; if (origpte & PG_PS) @@ -2184,7 +2185,7 @@ /* * Now validate mapping with desired protection/wiring. */ - newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V); + newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V); if (wired) newpte |= PG_W; @@ -2576,7 +2577,7 @@ if (pmap == NULL) return; - pte = pmap_pte(pmap, va); + pte = pmap_pte_quick(pmap, va); if (wired && !pmap_pte_w(pte)) pmap->pm_stats.wired_count++; @@ -2633,7 +2634,7 @@ pv_entry_count > pv_entry_high_water) break; - pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); + pdnxt = (addr + NBPDR) & ~PDRMASK; ptepindex = addr >> PDRSHIFT; srcptepaddr = src_pmap->pm_pdir[ptepindex]; @@ -2643,7 +2644,8 @@ if (srcptepaddr & PG_PS) { if (dst_pmap->pm_pdir[ptepindex] == 0) { dst_pmap->pm_pdir[ptepindex] = srcptepaddr; - dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; + dst_pmap->pm_stats.resident_count += + NBPDR / PAGE_SIZE; } continue; } @@ -2656,13 +2658,7 @@ if (pdnxt > end_addr) pdnxt = end_addr; - /* - * Have to recheck this before every avtopte() call below - * in case we have blocked and something else used APTDpde. - */ - pmap_set_alternate(dst_pmap); src_pte = vtopte(addr); - dst_pte = avtopte(addr); while (addr < pdnxt) { pt_entry_t ptetemp; ptetemp = *src_pte; @@ -2676,6 +2672,7 @@ * block. */ dstmpte = pmap_allocpte(dst_pmap, addr); + dst_pte = pmap_pte_quick(dst_pmap, addr); if ((*dst_pte == 0) && (ptetemp = *src_pte)) { /* * Clear the modified and @@ -2697,7 +2694,6 @@ } addr += PAGE_SIZE; src_pte++; - dst_pte++; } } } @@ -3288,7 +3284,7 @@ vm_page_t m; int val = 0; - ptep = pmap_pte(pmap, addr); + ptep = pmap_pte_quick(pmap, addr); if (ptep == 0) { return 0; } @@ -3352,7 +3348,11 @@ #else pmap->pm_active |= 1; #endif +#ifdef PAE + cr3 = vtophys(pmap->pm_pdpt); +#else cr3 = vtophys(pmap->pm_pdir); +#endif /* XXXKSE this is wrong. * pmap_activate is for the current thread on the current cpu */ --- //depot/vendor/freebsd/src/sys/i386/i386/vm86bios.s 2002/11/06 17:35:17 +++ //depot/user/jake/pae/src/sys/i386/i386/vm86bios.s 2003/03/08 18:20:53 @@ -123,6 +123,9 @@ movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */ movl %eax,0(%ebx) /* ... install as PTD entry 0 */ +#ifdef PAE + movl IdlePDPT,%ecx +#endif movl %ecx,%cr3 /* new page tables */ movl SCR_VMFRAME(%edx),%esp /* switch to new stack */ --- //depot/vendor/freebsd/src/sys/i386/i386/vm_machdep.c 2003/03/24 16:10:49 +++ //depot/user/jake/pae/src/sys/i386/i386/vm_machdep.c 2003/03/24 16:56:29 @@ -170,7 +170,11 @@ * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; @@ -342,7 +346,11 @@ * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; --- //depot/vendor/freebsd/src/sys/i386/include/_types.h 2003/03/24 16:10:49 +++ //depot/user/jake/pae/src/sys/i386/include/_types.h 2003/03/24 18:42:36 @@ -102,7 +102,11 @@ typedef __uint32_t __u_register_t; typedef __uint32_t __vm_offset_t; typedef __int64_t __vm_ooffset_t; +#ifdef PAE +typedef __uint64_t __vm_paddr_t; +#else typedef __uint32_t __vm_paddr_t; +#endif typedef __uint64_t __vm_pindex_t; typedef __uint32_t __vm_size_t; --- //depot/vendor/freebsd/src/sys/i386/include/bus_at386.h 2003/03/11 11:45:17 +++ //depot/user/jake/pae/src/sys/i386/include/bus_at386.h 2003/03/24 18:42:36 @@ -92,15 +92,23 @@ /* * Bus address and size types */ -typedef u_int bus_addr_t; -typedef u_int bus_size_t; +#ifdef PAE +typedef uint64_t bus_addr_t; +#else +typedef uint32_t bus_addr_t; +#endif +typedef uint32_t bus_size_t; #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF +#ifdef PAE +#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL +#else #define BUS_SPACE_MAXADDR 0xFFFFFFFF +#endif #define BUS_SPACE_UNRESTRICTED (~0) --- //depot/vendor/freebsd/src/sys/i386/include/param.h 2003/03/24 16:10:49 +++ //depot/user/jake/pae/src/sys/i386/include/param.h 2003/03/24 16:56:29 @@ -87,8 +87,13 @@ #define PAGE_MASK (PAGE_SIZE-1) #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) +#ifdef PAE +#define NPGPTD 4 +#define PDRSHIFT 21 /* LOG2(NBPDR) */ +#else #define NPGPTD 1 #define PDRSHIFT 22 /* LOG2(NBPDR) */ +#endif #define NBPTD (NPGPTD< -typedef u_int32_t pd_entry_t; -typedef u_int32_t pt_entry_t; +#ifdef PAE + +typedef uint64_t pdpt_entry_t; +typedef uint64_t pd_entry_t; +typedef uint64_t pt_entry_t; + +#define PTESHIFT (3) +#define PDESHIFT (3) + +#else + +typedef uint32_t pd_entry_t; +typedef uint32_t pt_entry_t; #define PTESHIFT (2) #define PDESHIFT (2) +#endif + /* * Address of current and alternate address space page table maps * and directories. @@ -149,6 +170,9 @@ extern pd_entry_t PTD[], APTD[]; extern pd_entry_t PTDpde[], APTDpde[]; +#ifdef PAE +extern pdpt_entry_t *IdlePDPT; +#endif extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ #endif @@ -176,7 +200,7 @@ if ((pa = (vm_offset_t) PTD[va >> PDRSHIFT]) & PG_PS) { pa = (pa & ~(NBPDR - 1)) | (va & (NBPDR - 1)); } else { - pa = *(vm_offset_t *)vtopte(va); + pa = *vtopte(va); pa = (pa & PG_FRAME) | (va & PAGE_MASK); } return pa; @@ -202,6 +226,10 @@ int pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ +#ifdef PAE + pdpt_entry_t *pm_pdpt; /* KVA of page director pointer + table */ +#endif }; #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) --- //depot/vendor/freebsd/src/sys/kern/vfs_subr.c 2003/03/22 05:25:24 +++ //depot/user/jake/pae/src/sys/kern/vfs_subr.c 2003/03/24 18:42:36 @@ -458,7 +458,7 @@ vntblinit(void *dummy __unused) { - desiredvnodes = maxproc + cnt.v_page_count / 4; + desiredvnodes = maxproc + cnt.v_page_count / 16; minvnodes = desiredvnodes / 4; mtx_init(&mountlist_mtx, "mountlist", NULL, MTX_DEF); mtx_init(&mntvnode_mtx, "mntvnode", NULL, MTX_DEF);