Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c (revision 255043) +++ sys/amd64/amd64/pmap.c (working copy) @@ -208,6 +208,46 @@ pmap_accessed_bit(pmap_t pmap) return (mask); } +static __inline pt_entry_t +pmap_execute_bit(pmap_t pmap) +{ + pt_entry_t mask; + + switch (pmap->pm_type) { + case PT_X86: + mask = 0; + break; + case PT_EPT: + mask = EPT_PG_EX; + break; + default: + panic("pmap_execute_bit: invalid pm_type %d", pmap->pm_type); + } + + return (mask); +} + +#undef PG_U +#define X86_PG_U 0x004 +static __inline pt_entry_t +pmap_user_bit(pmap_t pmap) +{ + pt_entry_t mask; + + switch (pmap->pm_type) { + case PT_X86: + mask = X86_PG_U; + break; + case PT_EPT: + mask = 0; + break; + default: + panic("pmap_user_bit: invalid pm_type %d", pmap->pm_type); + } + + return (mask); +} + #undef PG_M #define X86_PG_M 0x040 static __inline pt_entry_t @@ -585,6 +625,63 @@ vtopde(vm_offset_t va) return (PDmap + ((va >> PDRSHIFT) & mask)); } +static pt_entry_t +pmap_enable_execute(pmap_t pmap, pt_entry_t pte) +{ + + switch (pmap->pm_type) { + case PT_X86: + pte &= ~pg_nx; + break; + case PT_EPT: + pte |= EPT_PG_EX; + break; + default: + panic("pmap_enable_execute: invalid pm_type %d", pmap->pm_type); + } + + return (pte); +} + +static pt_entry_t +pmap_disable_execute(pmap_t pmap, pt_entry_t pte) +{ + + switch (pmap->pm_type) { + case PT_X86: + pte |= pg_nx; + break; + case PT_EPT: + pte &= ~EPT_PG_EX; + break; + default: + panic("pmap_disable_execute:invalid pm_type %d", pmap->pm_type); + } + + return (pte); +} + +static boolean_t +pmap_is_executable(pmap_t pmap, pt_entry_t pte) +{ + boolean_t rv = FALSE; + + switch (pmap->pm_type) { + case PT_X86: + if ((pte & pg_nx) == 0) + rv = TRUE; + break; + case PT_EPT: + if ((pte & EPT_PG_EX) != 0) + rv = TRUE; + break; + default: + panic("pmap_is_executable :invalid pm_type %d", pmap->pm_type); + } + + return (rv); +} + static u_int64_t allocpages(vm_paddr_t *firstaddr, int n) { @@ -636,11 +733,12 @@ create_pagetables(vm_paddr_t *firstaddr) pd_entry_t *pd_p; pdp_entry_t *pdp_p; pml4_entry_t *p4_p; - pt_entry_t PG_G, PG_A, PG_M; + pt_entry_t PG_G, PG_A, PG_M, PG_U; PG_G = pmap_global_bit(kernel_pmap); PG_A = pmap_accessed_bit(kernel_pmap); PG_M = pmap_modified_bit(kernel_pmap); + PG_U = pmap_user_bit(kernel_pmap); /* Allocate page table pages for the direct map */ ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT; @@ -2014,7 +2112,7 @@ int pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) { vm_page_t pml4pg; - pt_entry_t PG_A, PG_M; + pt_entry_t PG_A, PG_M, PG_U; int i; /* @@ -2037,6 +2135,7 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_typ if ((pmap->pm_type = pm_type) == PT_X86) { PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); + PG_U = pmap_user_bit(pmap); /* Wire in kernel global address entries. */ for (i = 0; i < NKPML4E; i++) { @@ -2085,12 +2184,14 @@ static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { vm_page_t m, pdppg, pdpg; - pt_entry_t PG_A, PG_M; + pt_entry_t PG_A, PG_M, PG_U, PG_E; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); + PG_U = pmap_user_bit(pmap); + PG_E = pmap_execute_bit(pmap); /* * Allocate a page table page. @@ -2127,7 +2228,8 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, /* Wire up a new PDPE page */ pml4index = ptepindex - (NUPDE + NUPDPE); pml4 = &pmap->pm_pml4[pml4index]; - *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; + *pml4 = VM_PAGE_TO_PHYS(m) | + PG_E | PG_U | PG_RW | PG_V | PG_A | PG_M; } else if (ptepindex >= NUPDE) { vm_pindex_t pml4index; @@ -2158,7 +2260,8 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, /* Now find the pdp page */ pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; - *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; + *pdp = VM_PAGE_TO_PHYS(m) | + PG_E | PG_U | PG_RW | PG_V | PG_A | PG_M; } else { vm_pindex_t pml4index; @@ -2207,7 +2310,8 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, /* Now we know where the page directory page is */ pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; - *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; + *pd = VM_PAGE_TO_PHYS(m) | + PG_E | PG_U | PG_RW | PG_V | PG_A | PG_M; } pmap_resident_count_inc(pmap, 1); @@ -3070,7 +3174,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pd struct rwlock **lockp) { pd_entry_t newpde, oldpde; - pt_entry_t *firstpte, newpte, PG_G, PG_A, PG_M; + pt_entry_t *firstpte, newpte, PG_G, PG_A, PG_M, PG_U, PG_E; vm_paddr_t mptepa; vm_page_t mpte; struct spglist free; @@ -3079,6 +3183,8 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pd PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); + PG_U = pmap_user_bit(pmap); + PG_E = pmap_execute_bit(pmap); PG_PTE_CACHE = pmap_cache_mask(pmap, 0); PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -3123,7 +3229,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pd } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); - newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; + newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_E | PG_V; KASSERT((oldpde & PG_A) != 0, ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, @@ -3623,7 +3729,8 @@ retry: newpde |= PG_RO; } if ((prot & VM_PROT_EXECUTE) == 0) - newpde |= pg_nx; + newpde = pmap_disable_execute(pmap, newpde); + if (newpde != oldpde) { if (!atomic_cmpset_long(pde, oldpde, newpde)) goto retry; @@ -3762,7 +3869,7 @@ retry: pbits |= PG_RO; } if ((prot & VM_PROT_EXECUTE) == 0) - pbits |= pg_nx; + pbits = pmap_disable_execute(pmap, pbits); if (pbits != obits) { if (!atomic_cmpset_long(pte, obits, pbits)) @@ -3793,7 +3900,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_ struct rwlock **lockp) { pd_entry_t newpde; - pt_entry_t *firstpte, oldpte, pa, *pte, PG_G, PG_A, PG_M; + pt_entry_t *firstpte, oldpte, pa, *pte, PG_G, PG_A, PG_M, PG_U, PG_E; vm_offset_t oldpteva; vm_page_t mpte; int PG_PTE_CACHE; @@ -3801,6 +3908,8 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_ PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); + PG_U = pmap_user_bit(pmap); + PG_E = pmap_execute_bit(pmap); PG_PTE_CACHE = pmap_cache_mask(pmap, 0); PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -3940,7 +4049,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t { struct rwlock *lock; pd_entry_t *pde; - pt_entry_t *pte, PG_G, PG_A, PG_M; + pt_entry_t *pte, PG_G, PG_A, PG_M, PG_U; pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; @@ -3949,6 +4058,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); + PG_U = pmap_user_bit(pmap); va = trunc_page(va); KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); @@ -3969,7 +4079,9 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t KASSERT((newpte & (PG_M | PG_RW)) != PG_M, ("pmap_enter: access includes VM_PROT_WRITE but prot doesn't")); if ((prot & VM_PROT_EXECUTE) == 0) - newpte |= pg_nx; + newpte = pmap_disable_execute(pmap, newpte); + else + newpte = pmap_enable_execute(pmap, newpte); if (wired) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) @@ -4146,7 +4258,8 @@ validate: * invalidation may nonetheless be required because * the PTE no longer has PG_M set. */ - } else if ((origpte & PG_NX) != 0 || (newpte & PG_NX) == 0) { + } else if (pmap_is_executable(pmap, origpte) == FALSE || + pmap_is_executable(pmap, newpte) == TRUE) { /* * This PTE change does not require TLB invalidation. */ @@ -4185,10 +4298,13 @@ static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp) { + pt_entry_t PG_U; pd_entry_t *pde, newpde; vm_page_t mpde; struct spglist free; + PG_U = pmap_user_bit(pmap); + rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -4236,7 +4352,10 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_pag } } if ((prot & VM_PROT_EXECUTE) == 0) - newpde |= pg_nx; + newpde = pmap_disable_execute(pmap, newpde); + else + newpde = pmap_enable_execute(pmap, newpde); + if (va < VM_MAXUSER_ADDRESS) newpde |= PG_U; @@ -4333,9 +4452,11 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t v vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; - pt_entry_t *pte; + pt_entry_t *pte, PG_U; vm_paddr_t pa; + PG_U = pmap_user_bit(pmap); + KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); @@ -4428,7 +4549,9 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t v pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 0); if ((prot & VM_PROT_EXECUTE) == 0) - pa |= pg_nx; + pa = pmap_disable_execute(pmap, pa); + else + pa = pmap_enable_execute(pmap, pa); /* * Now validate mapping with RO protection @@ -4465,7 +4588,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; - pt_entry_t PG_A, PG_M; + pt_entry_t PG_A, PG_M, PG_U; vm_paddr_t pa, ptepa; vm_page_t p, pdpg; int pat_mode; @@ -4472,6 +4595,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); + PG_U = pmap_user_bit(pmap); VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, @@ -5800,12 +5924,14 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, v { pdp_entry_t newpdpe, oldpdpe; pd_entry_t *firstpde, newpde, *pde; - pt_entry_t PG_A, PG_M; + pt_entry_t PG_A, PG_M, PG_U, PG_E; vm_paddr_t mpdepa; vm_page_t mpde; PG_A = pmap_accessed_bit(pmap); + PG_E = pmap_execute_bit(pmap); PG_M = pmap_modified_bit(pmap); + PG_U = pmap_user_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpdpe = *pdpe; @@ -5819,7 +5945,7 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, v } mpdepa = VM_PAGE_TO_PHYS(mpde); firstpde = (pd_entry_t *)PHYS_TO_DMAP(mpdepa); - newpdpe = mpdepa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V; + newpdpe = mpdepa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_E | PG_V; KASSERT((oldpdpe & PG_A) != 0, ("pmap_demote_pdpe: oldpdpe is missing PG_A")); KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW, Index: sys/amd64/include/pmap.h =================================================================== --- sys/amd64/include/pmap.h (revision 255043) +++ sys/amd64/include/pmap.h (working copy) @@ -76,7 +76,6 @@ #define PG_MANAGED PG_AVAIL2 #define PG_FRAME (0x000ffffffffff000ul) #define PG_PS_FRAME (0x000fffffffe00000ul) -#define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ /* @@ -94,7 +93,7 @@ * (PTE) page mappings have identical settings for the following fields: */ #define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \ - PG_M | PG_A | PG_U | PG_RW | PG_V | PG_RO) + PG_M | PG_A | PG_U | PG_RW | PG_V | PG_RO | PG_E) /* * Page Protection Exception bits Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c (revision 255043) +++ sys/amd64/vmm/vmm.c (working copy) @@ -919,6 +919,16 @@ vm_handle_paging(struct vm *vm, int vcpuid, boolea goto done; } + /* + * If this is an exec fault then simply change the proctection. + */ + if (ftype == VM_PROT_EXECUTE) { + vm_map_protect(&vm->vmspace->vm_map, + trunc_page(vme->u.paging.gpa), + trunc_page(vme->u.paging.gpa + PAGE_SIZE), + VM_PROT_ALL, FALSE); + } + map = &vm->vmspace->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); Index: sys/amd64/vmm/vmm_mem.c =================================================================== --- sys/amd64/vmm/vmm_mem.c (revision 255043) +++ sys/amd64/vmm/vmm_mem.c (working copy) @@ -129,7 +129,7 @@ vmm_mem_alloc(struct vmspace *vmspace, vm_paddr_t obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); if (obj != NULL) { error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, - VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); + VMFS_NO_SPACE, VM_PROT_RW, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) { vm_object_deallocate(obj); obj = NULL;