Index: conf/options.i386 =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/conf/options.i386,v retrieving revision 1.3 diff -u -p -r1.3 options.i386 --- conf/options.i386 1 Jul 2008 15:43:02 -0000 1.3 +++ conf/options.i386 30 Jan 2009 22:28:41 -0000 @@ -34,6 +34,9 @@ KVA_PAGES opt_global.h # Physical address extensions and support for >4G ram. As above. PAE opt_global.h +# Use a seperate 4G address space for the kernel +KVA_4G opt_global.h + CLK_CALIBRATION_LOOP opt_clock.h CLK_USE_I8254_CALIBRATION opt_clock.h TIMER_FREQ opt_clock.h Index: i386/i386/apic_vector.s =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/i386/apic_vector.s,v retrieving revision 1.3 diff -u -p -r1.3 apic_vector.s --- i386/i386/apic_vector.s 12 Apr 2008 00:09:36 -0000 1.3 +++ i386/i386/apic_vector.s 30 Jan 2009 22:28:52 -0000 @@ -52,7 +52,7 @@ pushal ; /* 8 ints */ \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ - pushl %fs + pushl %fs ; #define POP_FRAME \ popl %fs ; \ @@ -78,6 +78,7 @@ IDTVEC(vec_name) ; \ movl %eax, %es ; \ movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \ movl %eax, %fs ; \ + KVA_ENTER ; \ FAKE_MCOUNT(TF_EIP(%esp)) ; \ movl lapic, %edx ; /* pointer to local APIC */ \ movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \ @@ -128,6 +129,7 @@ IDTVEC(timerint) movl %eax, %es movl $KPSEL, %eax movl %eax, %fs + KVA_ENTER movl lapic, %edx movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ @@ -151,6 +153,7 @@ IDTVEC(invltlb) pushl %ds movl $KDSEL, %eax /* Kernel data selector */ movl %eax, %ds + KVA_ENTER #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) pushl %fs @@ -167,8 +170,10 @@ IDTVEC(invltlb) #endif #endif +#ifndef KVA_4G movl %cr3, %eax /* invalidate the TLB */ movl %eax, %cr3 +#endif movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ @@ -176,6 +181,7 @@ IDTVEC(invltlb) lock incl smp_tlb_wait + KVA_EXIT popl %ds popl %eax iret @@ -190,6 +196,7 @@ IDTVEC(invlpg) pushl %ds movl $KDSEL, %eax /* Kernel data selector */ movl %eax, %ds + KVA_ENTER #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) pushl %fs @@ -215,6 +222,7 @@ IDTVEC(invlpg) lock incl smp_tlb_wait + KVA_EXIT popl %ds popl %eax iret @@ -230,6 +238,7 @@ IDTVEC(invlrng) pushl %ds movl $KDSEL, %eax /* Kernel data selector */ movl %eax, %ds + KVA_ENTER #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) pushl %fs @@ -259,6 +268,7 @@ IDTVEC(invlrng) lock incl smp_tlb_wait + KVA_EXIT popl %ds popl %edx popl %eax @@ -274,6 +284,7 @@ IDTVEC(invlcache) pushl %ds movl $KDSEL, %eax /* Kernel data selector */ movl %eax, %ds + KVA_ENTER #ifdef COUNT_IPIS pushl %fs @@ -293,6 +304,7 @@ IDTVEC(invlcache) lock incl smp_tlb_wait + KVA_EXIT popl %ds popl %eax iret @@ -310,6 +322,7 @@ IDTVEC(ipi_intr_bitmap_handler) movl %eax, %es movl $KPSEL, %eax movl %eax, %fs + KVA_ENTER movl lapic, %edx movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ @@ -346,6 +359,7 @@ IDTVEC(cpustop) movl %eax, %es movl $KPSEL, %eax movl %eax, %fs + KVA_ENTER movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ @@ -380,6 +394,7 @@ IDTVEC(cpustop) call *%eax 2: + KVA_EXIT popl %fs popl %es popl %ds /* restore previous data segment */ @@ -404,6 +419,7 @@ IDTVEC(rendezvous) movl %eax, %es movl $KPSEL, %eax movl %eax, %fs + KVA_ENTER #ifdef COUNT_IPIS movl PCPU(CPUID), %eax @@ -414,6 +430,7 @@ IDTVEC(rendezvous) movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + KVA_EXIT POP_FRAME iret @@ -429,6 +446,7 @@ IDTVEC(lazypmap) movl %eax, %es movl $KPSEL, %eax movl %eax, %fs + KVA_ENTER #ifdef COUNT_IPIS movl PCPU(CPUID), %eax @@ -439,6 +457,7 @@ IDTVEC(lazypmap) movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + KVA_EXIT POP_FRAME iret #endif /* SMP */ Index: i386/i386/exception.s =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/i386/exception.s,v retrieving revision 1.2 diff -u -p -r1.2 exception.s --- i386/i386/exception.s 6 Mar 2007 20:36:46 -0000 1.2 +++ i386/i386/exception.s 30 Jan 2009 22:28:52 -0000 @@ -71,6 +71,27 @@ * must load them with appropriate values for supervisor mode operation. */ +#ifdef KVA_4G +#define KVA_ENTER \ + movl %cr3, %eax ; \ + cmpl IdlePTD, %eax ; \ + je 33f ; \ + movl IdlePTD, %eax ; \ + movl %eax, %cr3 ; \ +33: + +#define KVA_EXIT \ + testb $SEL_RPL_MASK,TF_CS(%esp) ; /* returning to user? */ \ + jz 33f ; \ + movl PCPU(CURPCB), %eax ; \ + movl PCB_CR3(%eax), %eax ; \ + movl %eax,%cr3 ; \ +33: +#else +#define KVA_ENTER +#define KVA_EXIT +#endif + MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) @@ -134,6 +155,7 @@ alltraps_with_regs_pushed: movl %eax,%es movl $KPSEL,%eax movl %eax,%fs + KVA_ENTER FAKE_MCOUNT(TF_EIP(%esp)) calltrap: call trap @@ -171,6 +193,7 @@ IDTVEC(lcall_syscall) movl %eax,%es movl $KPSEL,%eax movl %eax,%fs + KVA_ENTER FAKE_MCOUNT(TF_EIP(%esp)) call syscall MEXITCOUNT @@ -196,6 +219,7 @@ IDTVEC(int0x80_syscall) movl %eax,%es movl $KPSEL,%eax movl %eax,%fs + KVA_ENTER FAKE_MCOUNT(TF_EIP(%esp)) call syscall MEXITCOUNT @@ -305,6 +329,7 @@ doreti_exit_to_user: incl PCPU(INTR_CTXT_SW) doreti_exit: MEXITCOUNT + KVA_EXIT .globl doreti_popl_fs doreti_popl_fs: Index: i386/i386/machdep.c =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/i386/machdep.c,v retrieving revision 1.14 diff -u -p -r1.14 machdep.c --- i386/i386/machdep.c 24 Mar 2008 20:32:38 -0000 1.14 +++ i386/i386/machdep.c 30 Jan 2009 22:28:52 -0000 @@ -2350,6 +2350,8 @@ init386(first) * components in it. If so just link td->td_proc here. */ proc_linkup(&proc0, &thread0); + proc0.p_flag = P_SYSTEM | P_INMEM; + thread0.td_flags = TDF_INMEM; metadata_missing = 0; if (bootinfo.bi_modulep) { @@ -3230,6 +3232,181 @@ user_dbreg_trap(void) return 0; } +#ifdef KVA_4G + +int +copyin(const void *uaddr, void *kaddr, size_t len) +{ + struct iovec iov; + struct uio uio; + int error; + + if (cold) { + memcpy(kaddr, uaddr, len); + return (0); + } + iov.iov_base = __DECONST(void *, kaddr); + iov.iov_len = len; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = (off_t)(uintptr_t)uaddr; + uio.uio_resid = len; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_READ; + uio.uio_td = curthread; + error = pmap_rwmem(curproc, &uio); + + if (error || uio.uio_resid != 0) + return (EFAULT); + + return (0); +} + +int +copyout(const void *kaddr, void *uaddr, size_t len) +{ + struct iovec iov; + struct uio uio; + int error; + + if (cold) { + memcpy(uaddr, kaddr, len); + return (0); + } + iov.iov_base = __DECONST(void *, kaddr); + iov.iov_len = len; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = (off_t)(uintptr_t)uaddr; + uio.uio_resid = len; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_WRITE; + uio.uio_td = curthread; + error = pmap_rwmem(curproc, &uio); + + if (error || uio.uio_resid != 0) + return (EFAULT); + + return (0); +} + +int +copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) +{ + u_char *src; + u_char *dst; + int error; + int i; + + for (src = __DECONST(void *, uaddr), dst = kaddr, i = 0; + i < len; i++, src++, dst++) { + error = copyin(src, dst, 1); + if (error) + return (error); + if (*dst == '\0') { + if (done) + *done = i + 1; + return (0); + } + } + return (ENAMETOOLONG); +} + +int +suword(void *base, long word) +{ + return copyout(&word, base, sizeof(int)); +} + +int +suword32(void *base, int32_t word) +{ + return suword(base, word); +} + +int +suword16(void *base, int word) +{ + return copyout(&word, base, sizeof(uint16_t)); +} + +int +subyte(void *base, int byte) +{ + return copyout(&byte, base, sizeof(u_char)); +} + +long +fuword(const void *base) +{ + int error; + int word; + + error = copyin(base, &word, sizeof(word)); + if (error) + return (-1); + return (word); +} + +int +fuword16(void *base) +{ + int error; + uint16_t word; + + error = copyin(base, &word, sizeof(uint16_t)); + if (error) + return (-1); + return (word); +} + +int32_t +fuword32(const void *base) +{ + + return fuword(base); +} + +int +fubyte(const void *base) +{ + u_char byte; + int error; + + error = copyin(base, &byte, sizeof(byte)); + if (error) + return (-1); + return (byte); +} + +uint32_t +casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval) +{ + return 0; +} + +u_long +casuword(volatile u_long *p, u_long oldval, u_long newval) +{ + return 0; +} + +#include + +int +fuswintr(void *base) +{ + return 0; +} + +int +suswintr(void *base, int word) +{ + return 0; +} + +#endif /* KVA_4G */ + #ifndef DEV_APIC #include Index: i386/i386/pmap.c =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/i386/pmap.c,v retrieving revision 1.7 diff -u -p -r1.7 pmap.c --- i386/i386/pmap.c 27 Sep 2008 07:22:54 -0000 1.7 +++ i386/i386/pmap.c 30 Jan 2009 22:28:52 -0000 @@ -168,6 +168,15 @@ __IPSOID("$IPSO: src/sys/i386/i386/pmap. #define PMAP_INLINE #endif +int vm_max_kernel_address = VM_MAX_KERNEL_ADDRESS; +int vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS; +int upt_max_address = UPT_MAX_ADDRESS; +int upt_min_address = UPT_MIN_ADDRESS; +int vm_maxuser_address = VM_MAXUSER_ADDRESS; +int userstack = USRSTACK; +int vm_max_address = VM_MAX_ADDRESS; +int vm_min_address = VM_MIN_ADDRESS; + /* * Get PDEs and PTEs for user/kernel address space */ @@ -199,6 +208,11 @@ static int nkpt; vm_offset_t kernel_vm_end; extern u_int32_t KERNend; +#ifdef KVA_4G +vm_offset_t kernel_high_end; +static int high_nkpt; +#endif + #ifdef PAE static uma_zone_t pdptzone; #endif @@ -833,10 +847,13 @@ pmap_invalidate_cache(void) static __inline int pmap_is_current(pmap_t pmap) { - +#ifdef KVA_4G + return (0); +#else return (pmap == kernel_pmap || (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); +#endif } /* @@ -1424,8 +1441,10 @@ pmap_lazyfix_action(void) { u_int mymask = PCPU_GET(cpumask); +#ifndef KVA_CR3 if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); +#endif atomic_clear_int(lazymask, mymask); atomic_store_rel_int(&lazywait, 1); } @@ -1434,8 +1453,10 @@ static void pmap_lazyfix_self(u_int mymask) { +#ifndef KVA_CR3 if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); +#endif atomic_clear_int(lazymask, mymask); } @@ -1492,7 +1513,9 @@ pmap_lazyfix(pmap_t pmap) cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { +#ifndef KVA_CR3 load_cr3(PCPU_GET(curpcb)->pcb_cr3); +#endif pmap->pm_active &= ~(PCPU_GET(cpumask)); } } @@ -1559,6 +1582,10 @@ kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; +#ifdef KVA_4G + kfree += VM_MAXUSER_ADDRESS - kernel_high_end; +#endif + return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, @@ -1575,16 +1602,34 @@ pmap_growkernel(vm_offset_t addr) vm_page_t nkpg; pd_entry_t newpdir; pt_entry_t *pde; + vm_offset_t kend, kstart; + int cnt; + +#ifdef KVA_4G + if (addr < VM_MAXUSER_ADDRESS) { + kend = kernel_high_end; + kstart = VM_MIN_KERNEL_ADDRESS; + cnt = high_nkpt; + } else { + kend = kernel_vm_end; + kstart = KERNBASE; + cnt = nkpt; + } +#else + kend = kernel_vm_end; + kstart = KERNBASE; + cnt = nkpt; +#endif mtx_assert(&kernel_map->system_mtx, MA_OWNED); if (kernel_vm_end == 0) { - kernel_vm_end = KERNBASE; - nkpt = 0; - while (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - nkpt++; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + kend = kstart; + cnt = 0; + while (pdir_pde(PTD, kend)) { + kend = (kend + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + cnt++; + if (kend - 1 >= kernel_map->max_offset) { + kend = kernel_map->max_offset; break; } } @@ -1592,11 +1637,11 @@ pmap_growkernel(vm_offset_t addr) addr = roundup2(addr, PAGE_SIZE * NPTEPG); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; - while (kernel_vm_end < addr) { - if (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + while (kend < addr) { + if (pdir_pde(PTD, kend)) { + kend = (kend + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + if (kend - 1 >= kernel_map->max_offset) { + kend = kernel_map->max_offset; break; } continue; @@ -1605,30 +1650,42 @@ pmap_growkernel(vm_offset_t addr) /* * This index is bogus, but out of the way */ - nkpg = vm_page_alloc(NULL, nkpt, + nkpg = vm_page_alloc(NULL, cnt, VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); if (!nkpg) panic("pmap_growkernel: no memory to grow kernel"); - nkpt++; + cnt++; pmap_zero_page(nkpg); ptppaddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); - pdir_pde(PTD, kernel_vm_end) = newpdir; + pdir_pde(PTD, kend) = newpdir; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { - pde = pmap_pde(pmap, kernel_vm_end); + pde = pmap_pde(pmap, kend); pde_store(pde, newpdir); } mtx_unlock_spin(&allpmaps_lock); - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + kend = (kend + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + if (kend - 1 >= kernel_map->max_offset) { + kend = kernel_map->max_offset; break; } } +#ifdef KVA_4G + if (kstart == KERNBASE) { + kernel_vm_end = kend; + nkpt = cnt; + } else { + kernel_high_end = kend; + high_nkpt = cnt; + } +#else + kernel_vm_end = kend; + nkpt = cnt; +#endif } @@ -2352,13 +2409,18 @@ pmap_enter_quick_locked(pmap_t pmap, vm_ * quick entry into any pmap, one would likely use pmap_pte_quick. * But that isn't as quick as vtopte. */ +#ifdef KVA_4G + sched_pin(); + pte = pmap_pte_quick(pmap, va); +#else pte = vtopte(va); +#endif if (*pte) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } - return (mpte); + goto out; } /* @@ -2375,7 +2437,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_ mpte = NULL; } - return (mpte); + goto out; } /* @@ -2392,6 +2454,10 @@ pmap_enter_quick_locked(pmap_t pmap, vm_ pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); +out: +#ifdef KVA_4G + sched_unpin(); +#endif return mpte; } @@ -2772,7 +2838,9 @@ pmap_page_exists_quick(pmap, m) return (FALSE); } +#ifndef KVA_4G #define PMAP_REMOVE_PAGES_CURPROC_ONLY +#endif /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code @@ -2910,8 +2978,14 @@ pmap_is_prefaultable(pmap_t pmap, vm_off rv = FALSE; PMAP_LOCK(pmap); if (*pmap_pde(pmap, addr)) { +#ifdef KVA_4G + pte = pmap_pte(pmap, addr); + rv = *pte == 0; + pmap_pte_release(pte); +#else pte = vtopte(addr); rv = *pte == 0; +#endif } PMAP_UNLOCK(pmap); return (rv); @@ -3147,7 +3221,7 @@ pmap_change_attr(va, size, mode) size = roundup(offset + size, PAGE_SIZE); /* Only supported on kernel virtual addresses. */ - if (base <= VM_MAXUSER_ADDRESS) + if (base < VM_MIN_KERNEL_ADDRESS) return (EINVAL); /* 4MB pages and pages that aren't mapped aren't supported. */ @@ -3297,11 +3371,88 @@ pmap_activate(struct thread *td) * pmap_activate is for the current thread on the current cpu */ td->td_pcb->pcb_cr3 = cr3; +#ifndef KVA_4G load_cr3(cr3); +#endif PCPU_SET(curpmap, pmap); critical_exit(); } +#include + +int +pmap_rwmem(struct proc *p, struct uio *uio) +{ + vm_map_t map; + pmap_t pmap; + vm_prot_t reqprot; + int error, writing; + + /* + * The map we want... + */ + map = &p->p_vmspace->vm_map; + pmap = map->pmap; + + writing = uio->uio_rw == UIO_WRITE; + reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : + VM_PROT_READ; + + sched_pin(); + /* + * Only map in one page at a time. We don't have to, but it + * makes things easier. This way is trivial - right? + */ + do { + vm_offset_t pageno; + vm_offset_t uva; + int page_offset; /* offset into page */ + vm_page_t m; + u_int len; + + uva = (vm_offset_t)uio->uio_offset; + + /* + * Get the page number of this segment. + */ + pageno = trunc_page(uva); + page_offset = uva - pageno; + + /* + * How many bytes to copy + */ + len = min(PAGE_SIZE - page_offset, uio->uio_resid); + + m = pmap_extract_and_hold(pmap, uva, reqprot); + + /* + * Fault the page on behalf of the process + */ + if (m == NULL) { + error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL); + if (error == 0) + m = pmap_extract_and_hold(pmap, uva, reqprot); + if (m == NULL) { + error = EFAULT; + break; + } + } + + /* + * Now do the i/o move. + */ + error = uiomove_fromphys(&m, page_offset, len, uio); + + vm_page_lock_queues(); + vm_page_unhold(m); + vm_page_unlock_queues(); + + } while (error == 0 && uio->uio_resid > 0); + sched_unpin(); + + return (error); +} + vm_offset_t pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) { Index: i386/i386/support.s =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/i386/support.s,v retrieving revision 1.2 diff -u -p -r1.2 support.s --- i386/i386/support.s 12 Apr 2008 00:09:36 -0000 1.2 +++ i386/i386/support.s 30 Jan 2009 22:28:52 -0000 @@ -49,11 +49,13 @@ bcopy_vector: bzero_vector: .long generic_bzero .globl copyin_vector +#ifndef KVA_4G copyin_vector: .long generic_copyin .globl copyout_vector copyout_vector: .long generic_copyout +#endif #if defined(I586_CPU) && defined(DEV_NPX) kernel_fpu_lock: .byte 0xfe @@ -690,6 +692,7 @@ ENTRY(memcpy) ret +#ifndef KVA_4G /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ @@ -1354,6 +1357,7 @@ cpystrflt_x: popl %esi ret +#endif /* KVA_4G */ /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE Index: i386/i386/swtch.s =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/i386/swtch.s,v retrieving revision 1.2 diff -u -p -r1.2 swtch.s --- i386/i386/swtch.s 24 Mar 2008 20:32:38 -0000 1.2 +++ i386/i386/swtch.s 30 Jan 2009 22:28:52 -0000 @@ -87,8 +87,10 @@ ENTRY(cpu_throw) 1: movl 8(%esp),%ecx /* New thread */ movl TD_PCB(%ecx),%edx +#ifndef KVA_4G movl PCB_CR3(%edx),%eax movl %eax,%cr3 /* new address space */ +#endif /* set bit in new pm_active */ movl TD_PROC(%ecx),%eax movl P_VMSPACE(%eax), %ebx @@ -180,10 +182,12 @@ ENTRY(cpu_switch) cmpl %eax,IdlePTD /* Kernel address space? */ #endif je sw0 +#ifndef KVA_4G movl %cr3,%ebx /* The same address space? */ cmpl %ebx,%eax je sw0 movl %eax,%cr3 /* new address space */ +#endif movl %esi,%eax movl PCPU(CPUID),%esi SETOP %eax,TD_LOCK(%edi) /* Switchout td_lock */ @@ -361,8 +365,10 @@ ENTRY(savectx) movl (%esp),%eax movl %eax,PCB_EIP(%ecx) +#ifndef KVA_4G movl %cr3,%eax movl %eax,PCB_CR3(%ecx) +#endif movl %ebx,PCB_EBX(%ecx) movl %esp,PCB_ESP(%ecx) Index: i386/i386/trap.c =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/i386/trap.c,v retrieving revision 1.10.4.1 diff -u -p -r1.10.4.1 trap.c --- i386/i386/trap.c 10 Oct 2008 23:52:20 -0000 1.10.4.1 +++ i386/i386/trap.c 30 Jan 2009 22:28:52 -0000 @@ -718,6 +718,11 @@ trap_pfault(frame, usermode, eva) struct thread *td = curthread; struct proc *p = td->td_proc; + if (frame->tf_err & PGEX_W) + ftype = VM_PROT_WRITE; + else + ftype = VM_PROT_READ; + va = trunc_page(eva); if (va >= KERNBASE) { /* @@ -737,6 +742,16 @@ trap_pfault(frame, usermode, eva) map = kernel_map; } else { +#ifdef KVA_4G + /* + * If we weren't in usermode this is still a kernel map fault. + * All user accesses are done without indirectly faulting. + */ + if (!usermode) { + map = kernel_map; + goto kernfault; + } +#endif /* * This is a fault on non-kernel virtual memory. * vm is initialized above to NULL. If curproc is NULL @@ -751,11 +766,6 @@ trap_pfault(frame, usermode, eva) map = &vm->vm_map; } - if (frame->tf_err & PGEX_W) - ftype = VM_PROT_WRITE; - else - ftype = VM_PROT_READ; - if (map != kernel_map) { /* * Keep swapout from messing with us during this @@ -774,6 +784,9 @@ trap_pfault(frame, usermode, eva) --p->p_lock; PROC_UNLOCK(p); } else { +#ifdef KVA_4G +kernfault: +#endif /* * Don't have to worry about process locking or stacks in the * kernel. Index: i386/include/pmap.h =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/include/pmap.h,v retrieving revision 1.3 diff -u -p -r1.3 pmap.h --- i386/include/pmap.h 1 Jul 2008 15:43:06 -0000 1.3 +++ i386/include/pmap.h 30 Jan 2009 22:28:53 -0000 @@ -376,6 +376,10 @@ void pmap_invalidate_page(pmap_t, vm_off void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); void pmap_invalidate_all(pmap_t); void pmap_invalidate_cache(void); +struct uio; +struct proc; +int pmap_rwmem(struct proc *p, struct uio *uio); + #endif /* _KERNEL */ Index: i386/include/vmparam.h =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/include/vmparam.h,v retrieving revision 1.4 diff -u -p -r1.4 vmparam.h --- i386/include/vmparam.h 6 Sep 2007 04:41:11 -0000 1.4 +++ i386/include/vmparam.h 30 Jan 2009 22:28:53 -0000 @@ -113,6 +113,11 @@ #define VM_KMEM_SIZE (12 * 1024 * 1024) #endif +#ifdef KVA_4G +#undef VM_MIN_KERNEL_ADDRESS +#define VM_MIN_KERNEL_ADDRESS (128 * 1024 * 1024) +#endif + /* * How many physical pages per KVA page allocated. * min(max(VM_KMEM_SIZE, Physical memory/VM_KMEM_SIZE_SCALE), VM_KMEM_SIZE_MAX) Index: i386/isa/atpic_vector.s =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/i386/isa/atpic_vector.s,v retrieving revision 1.1 diff -u -p -r1.1 atpic_vector.s --- i386/isa/atpic_vector.s 10 Feb 2006 03:53:22 -0000 1.1 +++ i386/isa/atpic_vector.s 30 Jan 2009 22:28:53 -0000 @@ -58,6 +58,7 @@ IDTVEC(vec_name) ; \ movl %eax, %es ; \ movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \ movl %eax, %fs ; \ + KVA_ENTER ; \ ; \ FAKE_MCOUNT(TF_EIP(%esp)) ; \ pushl $irq_num; /* pass the IRQ */ \ Index: kern/kern_proc.c =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/kern/kern_proc.c,v retrieving revision 1.4 diff -u -p -r1.4 kern_proc.c --- kern/kern_proc.c 24 Mar 2008 20:32:45 -0000 1.4 +++ kern/kern_proc.c 30 Jan 2009 22:28:58 -0000 @@ -910,19 +910,30 @@ sysctl_out_proc(struct proc *p, struct s error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, sizeof(kinfo_proc)); } else { + struct kinfo_proc *kinfo_threads; + int i; + + kinfo_threads = malloc(sizeof(*kinfo_threads) * p->p_numthreads, + M_TEMP, M_NOWAIT | M_ZERO); + i = 0; PROC_SLOCK(p); - if (FIRST_THREAD_IN_PROC(p) != NULL) - FOREACH_THREAD_IN_PROC(p, td) { - fill_kinfo_thread(td, &kinfo_proc); - error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, - sizeof(kinfo_proc)); + if (FIRST_THREAD_IN_PROC(p) != NULL && kinfo_threads != NULL) + FOREACH_THREAD_IN_PROC(p, td) + fill_kinfo_thread(td, &kinfo_threads[i++]); + PROC_SUNLOCK(p); + if (FIRST_THREAD_IN_PROC(p) != NULL && kinfo_threads != NULL) { + for (i = 0; i < p->p_numthreads; i++) { + error = SYSCTL_OUT(req, + (caddr_t)&kinfo_threads[i], + sizeof(kinfo_proc)); if (error) break; } - else + } else error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, sizeof(kinfo_proc)); - PROC_SUNLOCK(p); + if (kinfo_threads) + free(kinfo_threads, M_TEMP); } PROC_UNLOCK(p); if (error) Index: vm/pmap.h =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/vm/pmap.h,v retrieving revision 1.3 diff -u -p -r1.3 pmap.h --- vm/pmap.h 6 Feb 2008 02:01:34 -0000 1.3 +++ vm/pmap.h 30 Jan 2009 22:29:14 -0000 @@ -90,6 +90,10 @@ struct thread; */ extern vm_offset_t kernel_vm_end; +#ifdef KVA_4G +extern vm_offset_t kernel_high_end; +#endif + extern int pmap_pagedaemon_waken; void pmap_change_wiring(pmap_t, vm_offset_t, boolean_t); Index: vm/vm_extern.h =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/vm/vm_extern.h,v retrieving revision 1.3 diff -u -p -r1.3 vm_extern.h --- vm/vm_extern.h 6 Feb 2008 02:01:35 -0000 1.3 +++ vm/vm_extern.h 30 Jan 2009 22:29:14 -0000 @@ -64,6 +64,7 @@ void kmem_free_wakeup(vm_map_t, vm_offse void kmem_init(vm_offset_t, vm_offset_t); vm_offset_t kmem_malloc(vm_map_t, vm_size_t, boolean_t); vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t); +vm_map_t kmem_suballocat(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t); void swapout_procs(int); int useracc(void *, int, int); int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int); Index: vm/vm_glue.c =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/vm/vm_glue.c,v retrieving revision 1.4 diff -u -p -r1.4 vm_glue.c --- vm/vm_glue.c 24 Mar 2008 20:33:00 -0000 1.4 +++ vm/vm_glue.c 30 Jan 2009 22:29:14 -0000 @@ -116,6 +116,12 @@ static int swapout(struct proc *); static void swapclear(struct proc *); #endif +#ifdef KVA_4G +#define stack_map high_map +#else +#define stack_map kernel_map +#endif + static volatile int proc0_rescan; @@ -342,7 +348,7 @@ vm_thread_new(struct thread *td, int pag /* * Get a kernel virtual address for this thread's kstack. */ - ks = kmem_alloc_nofault(kernel_map, + ks = kmem_alloc_nofault(stack_map, (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); if (ks == 0) panic("vm_thread_new: kstack allocation failed"); @@ -401,7 +407,7 @@ vm_thread_dispose(struct thread *td) } VM_OBJECT_UNLOCK(ksobj); vm_object_deallocate(ksobj); - kmem_free(kernel_map, ks - (KSTACK_GUARD_PAGES * PAGE_SIZE), + kmem_free(stack_map, ks - (KSTACK_GUARD_PAGES * PAGE_SIZE), (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); } Index: vm/vm_kern.c =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/vm/vm_kern.c,v retrieving revision 1.1 diff -u -p -r1.1 vm_kern.c --- vm/vm_kern.c 10 Feb 2006 03:59:19 -0000 1.1 +++ vm/vm_kern.c 30 Jan 2009 22:29:14 -0000 @@ -88,6 +88,10 @@ vm_map_t exec_map=0; vm_map_t pipe_map; vm_map_t buffer_map=0; +#ifdef KVA_4G +vm_map_t high_map; +#endif + /* * kmem_alloc_nofault: * @@ -227,16 +231,35 @@ kmem_suballoc(parent, min, max, size) vm_offset_t *min, *max; vm_size_t size; { + *min = (vm_offset_t) vm_map_min(parent); + return kmem_suballocat(parent, min, max, size); +} +vm_map_t +kmem_suballocat(parent, min, max, size) + vm_map_t parent; + vm_offset_t *min, *max; + vm_size_t size; +{ + vm_offset_t sav; int ret; vm_map_t result; size = round_page(size); - *min = (vm_offset_t) vm_map_min(parent); + sav = *min; ret = vm_map_find(parent, NULL, (vm_offset_t) 0, min, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); if (ret != KERN_SUCCESS) { + vm_map_entry_t entry; printf("kmem_suballoc: bad status return of %d.\n", ret); + + for (entry = parent->header.next; entry != &parent->header; + entry = entry->next) { + printf("map entry %p: start=%p, end=%p\n", + (void *)entry, (void *)entry->start, + (void *)entry->end); + } + printf("Requested: %p %d\n", (void *)sav, size); panic("kmem_suballoc"); } *max = *min + size; @@ -491,8 +514,17 @@ kmem_init(start, end) vm_map_lock(m); /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ kernel_map = m; +#ifndef KVA_4G (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0); - /* ... and ending with the completion of the above `insert' */ vm_map_unlock(m); +#else + (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, + VM_MAXUSER_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0); + vm_map_unlock(m); + start = virtual_avail; + high_map = kmem_suballocat(kernel_map, &start, &end, + virtual_end - virtual_avail); +#endif + /* ... and ending with the completion of the above `insert' */ } Index: vm/vm_kern.h =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/vm/vm_kern.h,v retrieving revision 1.1 diff -u -p -r1.1 vm_kern.h --- vm/vm_kern.h 10 Feb 2006 03:59:19 -0000 1.1 +++ vm/vm_kern.h 30 Jan 2009 22:29:14 -0000 @@ -72,4 +72,8 @@ extern vm_map_t exec_map; extern vm_map_t pipe_map; extern u_int vm_kmem_size; +#ifdef KVA_4G +extern vm_map_t high_map; +#endif + #endif /* _VM_VM_KERN_H_ */ Index: vm/vm_map.c =================================================================== RCS file: /CVS/CVS_IPSO/src/sys/vm/vm_map.c,v retrieving revision 1.4 diff -u -p -r1.4 vm_map.c --- vm/vm_map.c 24 Mar 2008 20:33:00 -0000 1.4 +++ vm/vm_map.c 30 Jan 2009 22:29:14 -0000 @@ -1163,6 +1163,10 @@ found: end = round_page(*addr + length); if (end > kernel_vm_end) pmap_growkernel(end); +#ifdef KVA_4G + else if (end < VM_MAXUSER_ADDRESS && end > kernel_high_end) + pmap_growkernel(end); +#endif } return (0); }