diff --git a/sys/arm/arm/bus_space_asm_generic.S b/sys/arm/arm/bus_space_asm_generic.S
index a63dae6..53a0609 100644
--- a/sys/arm/arm/bus_space_asm_generic.S
+++ b/sys/arm/arm/bus_space_asm_generic.S
@@ -51,7 +51,7 @@ ENTRY(generic_bs_r_1)
 	ldrb	r0, [r1, r2]
 	RET
 
-#if (ARM_ARCH_4 + ARM_ARCH_5) > 0
+#if (ARM_ARCH_4 + ARM_ARCH_5 + ARM_ARCH_6) > 0
 ENTRY(generic_armv4_bs_r_2)
 	ldrh	r0, [r1, r2]
 	RET
@@ -69,7 +69,7 @@ ENTRY(generic_bs_w_1)
 	strb	r3, [r1, r2]
 	RET
 
-#if (ARM_ARCH_4 + ARM_ARCH_5) > 0
+#if (ARM_ARCH_4 + ARM_ARCH_5 + ARM_ARCH_6) > 0
 ENTRY(generic_armv4_bs_w_2)
 	strh	r3, [r1, r2]
 	RET
@@ -97,7 +97,7 @@ ENTRY(generic_bs_rm_1)
 
 	RET
 
-#if (ARM_ARCH_4 + ARM_ARCH_5) > 0
+#if (ARM_ARCH_4 + ARM_ARCH_5 + ARM_ARCH_6) > 0
 ENTRY(generic_armv4_bs_rm_2)
 	add	r0, r1, r2
 	mov	r1, r3
@@ -145,7 +145,7 @@ ENTRY(generic_bs_wm_1)
 
 	RET
 
-#if (ARM_ARCH_4 + ARM_ARCH_5) > 0
+#if (ARM_ARCH_4 + ARM_ARCH_5 + ARM_ARCH_6) > 0
 ENTRY(generic_armv4_bs_wm_2)
 	add	r0, r1, r2
 	mov	r1, r3
@@ -193,7 +193,7 @@ ENTRY(generic_bs_rr_1)
 
 	RET
 
-#if (ARM_ARCH_4 + ARM_ARCH_5) > 0
+#if (ARM_ARCH_4 + ARM_ARCH_5 + ARM_ARCH_6) > 0
 ENTRY(generic_armv4_bs_rr_2)
 	add	r0, r1, r2
 	mov	r1, r3
@@ -241,7 +241,7 @@ ENTRY(generic_bs_wr_1)
 
 	RET
 
-#if (ARM_ARCH_4 + ARM_ARCH_5) > 0
+#if (ARM_ARCH_4 + ARM_ARCH_5 + ARM_ARCH_6) > 0
 ENTRY(generic_armv4_bs_wr_2)
 	add	r0, r1, r2
 	mov	r1, r3
@@ -288,7 +288,7 @@ ENTRY(generic_bs_sr_1)
 
 	RET
 
-#if (ARM_ARCH_4 + ARM_ARCH_5) > 0
+#if (ARM_ARCH_4 + ARM_ARCH_5 + ARM_ARCH_6) > 0
 ENTRY(generic_armv4_bs_sr_2)
 	add	r0, r1, r2
 	mov	r1, r3
@@ -320,7 +320,7 @@ ENTRY(generic_bs_sr_4)
  * copy region
  */
 
-#if (ARM_ARCH_4 + ARM_ARCH_5) > 0
+#if (ARM_ARCH_4 + ARM_ARCH_5 + ARM_ARCH_6) > 0
 ENTRY(generic_armv4_bs_c_2)
 	add	r0, r1, r2
 	ldr	r2, [sp, #0]
diff --git a/sys/arm/arm/busdma_machdep-v6.c b/sys/arm/arm/busdma_machdep-v6.c
new file mode 100644
index 0000000..3941b94
--- /dev/null
+++ b/sys/arm/arm/busdma_machdep-v6.c
@@ -0,0 +1,1501 @@
+/*-
+ * Copyright (c) 2010 Mark Tinguely
+ * Copyright (c) 2004 Olivier Houchard
+ * Copyright (c) 2002 Peter Grehan
+ * Copyright (c) 1997, 1998 Justin T. Gibbs.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification, immediately at the beginning of the file.
+ * 2. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *  From i386/busdma_machdep.c 191438 2009-04-23 20:24:19Z jhb
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/arm/arm/v7_busdma_machdep.c 191438 2010-02-01 12:00:00Z jhb $");
+
+#define _ARM32_BUS_DMA_PRIVATE
+#include <sys/param.h>
+#include <sys/kdb.h>
+#include <ddb/ddb.h>
+#include <ddb/db_output.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/proc.h>
+#include <sys/mutex.h>
+#include <sys/mbuf.h>
+#include <sys/uio.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/cpufunc.h>
+#include <machine/md_var.h>
+
+#define MAX_BPAGES 64
+#define BUS_DMA_COULD_BOUNCE	BUS_DMA_BUS3
+#define BUS_DMA_MIN_ALLOC_COMP	BUS_DMA_BUS4
+
+#define FIX_DMAP_BUS_DMASYNC_POSTREAD
+
+struct bounce_zone;
+
+struct bus_dma_tag {
+	bus_dma_tag_t	  parent;
+	bus_size_t	  alignment;
+	bus_size_t	  boundary;
+	bus_addr_t	  lowaddr;
+	bus_addr_t	  highaddr;
+	bus_dma_filter_t *filter;
+	void		 *filterarg;
+	bus_size_t	  maxsize;
+	u_int		  nsegments;
+	bus_size_t	  maxsegsz;
+	int		  flags;
+	int		  ref_count;
+	int		  map_count;
+	bus_dma_lock_t	 *lockfunc;
+	void		 *lockfuncarg;
+	bus_dma_segment_t *segments;
+	struct bounce_zone *bounce_zone;
+	/*
+	 * DMA range for this tag.  If the page doesn't fall within
+	 * one of these ranges, an error is returned.  The caller
+	 * may then decide what to do with the transfer.  If the
+	 * range pointer is NULL, it is ignored.
+	 */
+	struct arm32_dma_range	*ranges;
+	int			_nranges;
+
+};
+
+struct bounce_page {
+	vm_offset_t	vaddr;		/* kva of bounce buffer */
+	bus_addr_t	busaddr;	/* Physical address */
+	vm_offset_t	datavaddr;	/* kva of client data */
+	bus_size_t	datacount;	/* client data count */
+	STAILQ_ENTRY(bounce_page) links;
+};
+
+struct sync_list {
+	vm_offset_t	vaddr;		/* kva of bounce buffer */
+	bus_size_t	datacount;	/* client data count */
+	STAILQ_ENTRY(sync_list) slinks;
+};
+
+int busdma_swi_pending;
+
+struct bounce_zone {
+	STAILQ_ENTRY(bounce_zone) links;
+	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
+	int		total_bpages;
+	int		free_bpages;
+	int		reserved_bpages;
+	int		active_bpages;
+	int		total_bounced;
+	int		total_deferred;
+	int		map_count;
+	bus_size_t	alignment;
+	bus_addr_t	lowaddr;
+	char		zoneid[8];
+	char		lowaddrid[20];
+	struct sysctl_ctx_list sysctl_tree;
+	struct sysctl_oid *sysctl_tree_top;
+};
+
+static struct mtx bounce_lock;
+static int total_bpages;
+static int busdma_zonecount;
+static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
+
+SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
+SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
+	   "Total bounce pages");
+
+struct bus_dmamap {
+	struct bp_list	       bpages;
+	int		       pagesneeded;
+	int		       pagesreserved;
+	bus_dma_tag_t	       dmat;
+	void		      *buf;		/* unmapped buffer pointer */
+	bus_size_t	       buflen;		/* unmapped buffer length */
+	pmap_t		       pmap;
+	bus_dmamap_callback_t *callback;
+	void		      *callback_arg;
+	STAILQ_ENTRY(bus_dmamap) links;
+	STAILQ_HEAD(,sync_list)	slist;
+};
+
+static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
+static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
+
+static void init_bounce_pages(void *dummy);
+static int alloc_bounce_zone(bus_dma_tag_t dmat);
+static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
+static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
+				int commit);
+static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
+				   vm_offset_t vaddr, bus_size_t size);
+static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
+int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
+static int _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
+    void *buf, bus_size_t buflen, int flags);
+
+static __inline int
+_bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr)
+{
+	int i;
+	for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) {
+		if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1])
+		    || (lowaddr < phys_avail[i] &&
+		    highaddr > phys_avail[i]))
+			return (1);
+	}
+	return (0);
+}
+
+static __inline struct arm32_dma_range *
+_bus_dma_inrange(struct arm32_dma_range *ranges, int nranges,
+    bus_addr_t curaddr)
+{
+	struct arm32_dma_range *dr;
+	int i;
+
+	for (i = 0, dr = ranges; i < nranges; i++, dr++) {
+		if (curaddr >= dr->dr_sysbase &&
+		    round_page(curaddr) <= (dr->dr_sysbase + dr->dr_len))
+			return (dr);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Return true if a match is made.
+ *
+ * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'.
+ *
+ * If paddr is within the bounds of the dma tag then call the filter callback
+ * to check for a match, if there is no filter callback then assume a match.
+ */
+int
+run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
+{
+	int retval;
+
+	retval = 0;
+
+	do {
+		if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr)
+		 || ((paddr & (dmat->alignment - 1)) != 0))
+		 && (dmat->filter == NULL
+		  || (*dmat->filter)(dmat->filterarg, paddr) != 0))
+			retval = 1;
+
+		dmat = dmat->parent;
+	} while (retval == 0 && dmat != NULL);
+	return (retval);
+}
+
+/*
+ * Convenience function for manipulating driver locks from busdma (during
+ * busdma_swi, for example).  Drivers that don't provide their own locks
+ * should specify &Giant to dmat->lockfuncarg.  Drivers that use their own
+ * non-mutex locking scheme don't have to use this at all.
+ */
+void
+busdma_lock_mutex(void *arg, bus_dma_lock_op_t op)
+{
+	struct mtx *dmtx;
+
+	dmtx = (struct mtx *)arg;
+	switch (op) {
+	case BUS_DMA_LOCK:
+		mtx_lock(dmtx);
+		break;
+	case BUS_DMA_UNLOCK:
+		mtx_unlock(dmtx);
+		break;
+	default:
+		panic("Unknown operation 0x%x for busdma_lock_mutex!", op);
+	}
+}
+
+/*
+ * dflt_lock should never get called.  It gets put into the dma tag when
+ * lockfunc == NULL, which is only valid if the maps that are associated
+ * with the tag are meant to never be defered.
+ * XXX Should have a way to identify which driver is responsible here.
+ */
+static void
+dflt_lock(void *arg, bus_dma_lock_op_t op)
+{
+	panic("driver error: busdma dflt_lock called");
+}
+
+/*
+ * Allocate a device specific dma_tag.
+ */
+int
+bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
+		   bus_size_t boundary, bus_addr_t lowaddr,
+		   bus_addr_t highaddr, bus_dma_filter_t *filter,
+		   void *filterarg, bus_size_t maxsize, int nsegments,
+		   bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
+		   void *lockfuncarg, bus_dma_tag_t *dmat)
+{
+	bus_dma_tag_t newtag;
+	int error = 0;
+
+#if 0
+	if (!parent)
+		parent = arm_root_dma_tag;
+#endif
+
+	/* Basic sanity checking */
+	if (boundary != 0 && boundary < maxsegsz)
+		maxsegsz = boundary;
+
+	/* Return a NULL tag on failure */
+	*dmat = NULL;
+
+	if (maxsegsz == 0) {
+		return (EINVAL);
+	}
+
+	newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF,
+	    M_ZERO | M_NOWAIT);
+	if (newtag == NULL) {
+		CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
+		    __func__, newtag, 0, error);
+		return (ENOMEM);
+	}
+
+	newtag->parent = parent;
+	newtag->alignment = alignment;
+	newtag->boundary = boundary;
+	newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1);
+	newtag->highaddr = trunc_page((vm_paddr_t)highaddr) +
+	    (PAGE_SIZE - 1);
+	newtag->filter = filter;
+	newtag->filterarg = filterarg;
+	newtag->maxsize = maxsize;
+	newtag->nsegments = nsegments;
+	newtag->maxsegsz = maxsegsz;
+	newtag->flags = flags;
+	newtag->ref_count = 1; /* Count ourself */
+	newtag->map_count = 0;
+	newtag->ranges = bus_dma_get_range();
+	newtag->_nranges = bus_dma_get_range_nb();
+	if (lockfunc != NULL) {
+		newtag->lockfunc = lockfunc;
+		newtag->lockfuncarg = lockfuncarg;
+	} else {
+		newtag->lockfunc = dflt_lock;
+		newtag->lockfuncarg = NULL;
+	}
+	newtag->segments = NULL;
+
+	/* Take into account any restrictions imposed by our parent tag */
+	if (parent != NULL) {
+		newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
+		newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
+		if (newtag->boundary == 0)
+			newtag->boundary = parent->boundary;
+		else if (parent->boundary != 0)
+			newtag->boundary = MIN(parent->boundary,
+					       newtag->boundary);
+		if ((newtag->filter != NULL) ||
+		    ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0))
+			newtag->flags |= BUS_DMA_COULD_BOUNCE;
+		if (newtag->filter == NULL) {
+			/*
+			 * Short circuit looking at our parent directly
+			 * since we have encapsulated all of its information
+			 */
+			newtag->filter = parent->filter;
+			newtag->filterarg = parent->filterarg;
+			newtag->parent = parent->parent;
+		}
+		if (newtag->parent != NULL)
+			atomic_add_int(&parent->ref_count, 1);
+	}
+
+	if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr)
+	 || newtag->alignment > 1)
+		newtag->flags |= BUS_DMA_COULD_BOUNCE;
+
+	if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
+	    (flags & BUS_DMA_ALLOCNOW) != 0) {
+		struct bounce_zone *bz;
+
+		/* Must bounce */
+
+		if ((error = alloc_bounce_zone(newtag)) != 0) {
+			free(newtag, M_DEVBUF);
+			return (error);
+		}
+		bz = newtag->bounce_zone;
+
+		if (ptoa(bz->total_bpages) < maxsize) {
+			int pages;
+
+			pages = atop(maxsize) - bz->total_bpages;
+
+			/* Add pages to our bounce pool */
+			if (alloc_bounce_pages(newtag, pages) < pages)
+				error = ENOMEM;
+		}
+		/* Performed initial allocation */
+		newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
+	} else
+		newtag->bounce_zone = NULL;
+
+	if (error != 0) {
+		free(newtag, M_DEVBUF);
+	} else {
+		*dmat = newtag;
+	}
+	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
+	    __func__, newtag, (newtag != NULL ? newtag->flags : 0), error);
+	return (error);
+}
+
+int
+bus_dma_tag_destroy(bus_dma_tag_t dmat)
+{
+	bus_dma_tag_t dmat_copy;
+	int error;
+
+	error = 0;
+	dmat_copy = dmat;
+
+	if (dmat != NULL) {
+
+		if (dmat->map_count != 0) {
+			error = EBUSY;
+			goto out;
+		}
+
+		while (dmat != NULL) {
+			bus_dma_tag_t parent;
+
+			parent = dmat->parent;
+			atomic_subtract_int(&dmat->ref_count, 1);
+			if (dmat->ref_count == 0) {
+				if (dmat->segments != NULL)
+					free(dmat->segments, M_DEVBUF);
+				free(dmat, M_DEVBUF);
+				/*
+				 * Last reference count, so
+				 * release our reference
+				 * count on our parent.
+				 */
+				dmat = parent;
+			} else
+				dmat = NULL;
+		}
+	}
+out:
+	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
+	return (error);
+}
+
+/*
+ * Allocate a handle for mapping from kva/uva/physical
+ * address space into bus device space.
+ */
+int
+bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
+{
+	int error;
+
+	error = 0;
+
+	*mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF,
+					     M_NOWAIT | M_ZERO);
+	if (*mapp == NULL) {
+		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
+		return (ENOMEM);
+	}
+	STAILQ_INIT(&((*mapp)->slist));
+
+	if (dmat->segments == NULL) {
+		dmat->segments = (bus_dma_segment_t *)malloc(
+		    sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF,
+		    M_NOWAIT);
+		if (dmat->segments == NULL) {
+			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
+			    __func__, dmat, ENOMEM);
+			free(*mapp, M_DEVBUF);
+			*mapp = NULL;
+			return (ENOMEM);
+		}
+	}
+	/*
+	 * Bouncing might be required if the driver asks for an active
+	 * exclusion region, a data alignment that is stricter than 1, and/or
+	 * an active address boundary.
+	 */
+	if (dmat->flags & BUS_DMA_COULD_BOUNCE) {
+
+		/* Must bounce */
+		struct bounce_zone *bz;
+		int maxpages;
+
+		if (dmat->bounce_zone == NULL) {
+			if ((error = alloc_bounce_zone(dmat)) != 0) {
+				free(*mapp, M_DEVBUF);
+				*mapp = NULL;
+				return (error);
+			}
+		}
+		bz = dmat->bounce_zone;
+
+		/* Initialize the new map */
+		STAILQ_INIT(&((*mapp)->bpages));
+
+		/*
+		 * Attempt to add pages to our pool on a per-instance
+		 * basis up to a sane limit.
+		 */
+		maxpages = MAX_BPAGES;
+		if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0
+		 || (bz->map_count > 0 && bz->total_bpages < maxpages)) {
+			int pages;
+
+			pages = MAX(atop(dmat->maxsize), 1);
+			pages = MIN(maxpages - bz->total_bpages, pages);
+			pages = MAX(pages, 1);
+			if (alloc_bounce_pages(dmat, pages) < pages)
+				error = ENOMEM;
+
+			if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) {
+				if (error == 0)
+					dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
+			} else {
+				error = 0;
+			}
+		}
+		bz->map_count++;
+	}
+	if (error == 0)
+		dmat->map_count++;
+	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+	    __func__, dmat, dmat->flags, error);
+	return (error);
+}
+
+/*
+ * Destroy a handle for mapping from kva/uva/physical
+ * address space into bus device space.
+ */
+int
+bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+	if (STAILQ_FIRST(&map->bpages) != NULL ||
+	    STAILQ_FIRST(&map->slist) != NULL) {
+		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
+		    __func__, dmat, EBUSY);
+		return (EBUSY);
+	}
+	if (dmat->bounce_zone)
+		dmat->bounce_zone->map_count--;
+	free(map, M_DEVBUF);
+	dmat->map_count--;
+	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
+	return (0);
+}
+
+
+/*
+ * Allocate a piece of memory that can be efficiently mapped into
+ * bus device space based on the constraints lited in the dma tag.
+ * A dmamap to for use with dmamap_load is also allocated.
+ */
+int
+bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
+		 bus_dmamap_t *mapp)
+{
+	int mflags, len;
+
+	if (flags & BUS_DMA_NOWAIT)
+		mflags = M_NOWAIT;
+	else
+		mflags = M_WAITOK;
+
+	/* ARM non-snooping caches need a map for the VA cache sync structure */
+
+	*mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF,
+					     M_NOWAIT | M_ZERO);
+	if (*mapp == NULL) {
+		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+		    __func__, dmat, dmat->flags, ENOMEM);
+		return (ENOMEM);
+	}
+
+	STAILQ_INIT(&((*mapp)->slist));
+
+	if (dmat->segments == NULL) {
+		dmat->segments = (bus_dma_segment_t *)malloc(
+		    sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF,
+		    mflags);
+		if (dmat->segments == NULL) {
+			CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+			    __func__, dmat, dmat->flags, ENOMEM);
+			free(*mapp, M_DEVBUF);
+			*mapp = NULL;
+			return (ENOMEM);
+		}
+	}
+
+	if (flags & BUS_DMA_ZERO)
+		mflags |= M_ZERO;
+
+	/* 
+	 * XXX:
+	 * (dmat->alignment < dmat->maxsize) is just a quick hack; the exact
+	 * alignment guarantees of malloc need to be nailed down, and the
+	 * code below should be rewritten to take that into account.
+	 *
+	 * In the meantime, we'll warn the user if malloc gets it wrong.
+	 *
+	 * allocate at least a cache line. This should help avoid cache
+	 * corruption.
+	 */
+	len = max(dmat->maxsize, arm_dcache_align);
+        if (len <= PAGE_SIZE &&
+	   (dmat->alignment < len) &&
+	   !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) {
+		*vaddr = malloc(len, M_DEVBUF, mflags);
+	} else {
+		/*
+		 * XXX Use Contigmalloc until it is merged into this facility
+		 *     and handles multi-seg allocations.  Nobody is doing
+		 *     multi-seg allocations yet though.
+		 * XXX Certain AGP hardware does.
+		 */
+		*vaddr = contigmalloc(len, M_DEVBUF, mflags,
+		    0ul, dmat->lowaddr, dmat->alignment? dmat->alignment : 1ul,
+		    dmat->boundary);
+	}
+	if (*vaddr == NULL) {
+		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+		    __func__, dmat, dmat->flags, ENOMEM);
+		free(*mapp, M_DEVBUF);
+		*mapp = NULL;
+		return (ENOMEM);
+	} else if ((uintptr_t)*vaddr & (dmat->alignment - 1)) {
+		printf("bus_dmamem_alloc failed to align memory properly.\n");
+	}
+	dmat->map_count++;
+#ifdef mftnotyet
+	if (flags & BUS_DMA_NOCACHE)
+		pmap_change_attr((vm_offset_t)*vaddr, len,
+		    ARM_UNCACHEABLE);
+#endif
+	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
+	    __func__, dmat, dmat->flags, 0);
+	return (0);
+}
+
+/*
+ * Free a piece of memory and it's allociated dmamap, that was allocated
+ * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
+ */
+void
+bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
+{
+	int len;
+
+#ifdef mftnotyet
+	pmap_change_attr((vm_offset_t)vaddr, dmat->maxsize, ARM_WRITE_BACK);
+#endif
+	len = max(dmat->maxsize, arm_dcache_align);
+        if (len <= PAGE_SIZE &&
+	   (dmat->alignment < len) &&
+	   !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr))
+		free(vaddr, M_DEVBUF);
+	else {
+		contigfree(vaddr, len, M_DEVBUF);
+	}
+	dmat->map_count--;
+	free(map, M_DEVBUF);
+	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
+}
+
+static int
+_bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
+    void *buf, bus_size_t buflen, int flags)
+{
+	vm_offset_t vaddr;
+	vm_offset_t vendaddr;
+	bus_addr_t paddr;
+
+	if (map->pagesneeded == 0) {
+		CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d"
+		    " map= %p, pagesneeded= %d",
+		    dmat->lowaddr, dmat->boundary, dmat->alignment,
+		    map, map->pagesneeded);
+		/*
+		 * Count the number of bounce pages
+		 * needed in order to complete this transfer
+		 */
+		vaddr = (vm_offset_t)buf;
+		vendaddr = (vm_offset_t)buf + buflen;
+
+		while (vaddr < vendaddr) {
+			if (__predict_true(map->pmap == pmap_kernel()))
+				paddr = pmap_kextract(vaddr);
+			else
+				paddr = pmap_extract(map->pmap, vaddr);
+			if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
+			    run_filter(dmat, paddr) != 0) {
+				map->pagesneeded++;
+			}
+			vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK));
+
+		}
+		CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded);
+	}
+
+	/* Reserve Necessary Bounce Pages */
+	if (map->pagesneeded != 0) {
+		mtx_lock(&bounce_lock);
+		if (flags & BUS_DMA_NOWAIT) {
+			if (reserve_bounce_pages(dmat, map, 0) != 0) {
+				map->pagesneeded = 0;
+				mtx_unlock(&bounce_lock);
+				return (ENOMEM);
+			}
+		} else {
+			if (reserve_bounce_pages(dmat, map, 1) != 0) {
+				/* Queue us for resources */
+				map->dmat = dmat;
+				map->buf = buf;
+				map->buflen = buflen;
+				STAILQ_INSERT_TAIL(&bounce_map_waitinglist,
+				    map, links);
+				mtx_unlock(&bounce_lock);
+				return (EINPROGRESS);
+			}
+		}
+		mtx_unlock(&bounce_lock);
+	}
+
+	return (0);
+}
+
+/*
+ * Utility function to load a linear buffer. lastaddrp holds state
+ * between invocations (for multiple-buffer loads).  segp contains
+ * the starting segment on entrace, and the ending segment on exit.
+ * first indicates if this is the first invocation of this function.
+ */
+static __inline int
+_bus_dmamap_load_buffer(bus_dma_tag_t dmat,
+			bus_dmamap_t map,
+			void *buf, bus_size_t buflen,
+			int flags,
+			bus_addr_t *lastaddrp,
+			bus_dma_segment_t *segs,
+			int *segp,
+			int first)
+{
+	bus_size_t sgsize;
+	bus_addr_t curaddr, lastaddr, baddr, bmask;
+	vm_offset_t vaddr;
+	struct sync_list *sl;
+	int seg, error;
+
+	if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
+		error = _bus_dmamap_count_pages(dmat, map, buf, buflen, flags);
+		if (error)
+			return (error);
+	}
+
+	sl = NULL;
+	vaddr = (vm_offset_t)buf;
+	lastaddr = *lastaddrp;
+	bmask = ~(dmat->boundary - 1);
+
+	for (seg = *segp; buflen > 0 ; ) {
+		/*
+		 * Get the physical address for this segment.
+		 */
+		if (__predict_true(map->pmap == pmap_kernel()))
+			curaddr = pmap_kextract(vaddr);
+		else
+			curaddr = pmap_extract(map->pmap, vaddr);
+
+		/*
+		 * Compute the segment size, and adjust counts.
+		 */
+		sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK);
+		if (sgsize > dmat->maxsegsz)
+			sgsize = dmat->maxsegsz;
+		if (buflen < sgsize)
+			sgsize = buflen;
+
+		/*
+		 * Make sure we don't cross any boundaries.
+		 */
+		if (dmat->boundary > 0) {
+			baddr = (curaddr + dmat->boundary) & bmask;
+			if (sgsize > (baddr - curaddr))
+				sgsize = (baddr - curaddr);
+		}
+
+		if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
+		    map->pagesneeded != 0 && run_filter(dmat, curaddr)) {
+			curaddr = add_bounce_page(dmat, map, vaddr, sgsize);
+		} else {
+			/* add_sync_list(dmat, map, vaddr, sgsize, cflag); */
+			sl = (struct sync_list *)malloc(sizeof(struct sync_list),
+						M_DEVBUF, M_NOWAIT | M_ZERO);
+			if (sl == NULL)
+				goto cleanup;
+			STAILQ_INSERT_TAIL(&(map->slist), sl, slinks);
+			sl->vaddr = vaddr;
+			sl->datacount = sgsize;
+		}
+
+
+		if (dmat->ranges) {
+			struct arm32_dma_range *dr;
+
+			dr = _bus_dma_inrange(dmat->ranges, dmat->_nranges,
+			    curaddr);
+			if (dr == NULL) {
+				_bus_dmamap_unload(dmat, map);
+				return (EINVAL);
+			}
+			/*
+			 * In a valid DMA range.  Translate the physical
+			 * memory address to an address in the DMA window.
+			 */
+			curaddr = (curaddr - dr->dr_sysbase) + dr->dr_busbase;
+		}
+
+		/*
+		 * Insert chunk into a segment, coalescing with
+		 * previous segment if possible.
+		 */
+		if (first) {
+			segs[seg].ds_addr = curaddr;
+			segs[seg].ds_len = sgsize;
+			first = 0;
+		} else {
+			if (curaddr == lastaddr &&
+			    (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
+			    (dmat->boundary == 0 ||
+			     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
+				segs[seg].ds_len += sgsize;
+			else {
+				if (++seg >= dmat->nsegments)
+					break;
+				segs[seg].ds_addr = curaddr;
+				segs[seg].ds_len = sgsize;
+			}
+		}
+
+		lastaddr = curaddr + sgsize;
+		vaddr += sgsize;
+		buflen -= sgsize;
+	}
+
+	*segp = seg;
+	*lastaddrp = lastaddr;
+cleanup:
+	/*
+	 * Did we fit?
+	 */
+	if (buflen != 0) {
+		_bus_dmamap_unload(dmat, map);
+		return(EFBIG); /* XXX better return value here? */
+	}
+	return (0);
+}
+
+/*
+ * Map the buffer buf into bus space using the dmamap map.
+ */
+int
+bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
+		bus_size_t buflen, bus_dmamap_callback_t *callback,
+		void *callback_arg, int flags)
+{
+	bus_addr_t		lastaddr = 0;
+	int			error, nsegs = 0;
+
+	flags |= BUS_DMA_WAITOK;
+	map->callback = callback;
+	map->callback_arg = callback_arg;
+	map->pmap = kernel_pmap;
+
+	error = _bus_dmamap_load_buffer(dmat, map, buf, buflen, flags,
+		     &lastaddr, dmat->segments, &nsegs, 1);
+
+	CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+	    __func__, dmat, dmat->flags, error, nsegs + 1);
+
+	if (error == EINPROGRESS) {
+		return (error);
+	}
+
+	if (error)
+		(*callback)(callback_arg, dmat->segments, 0, error);
+	else
+		(*callback)(callback_arg, dmat->segments, nsegs + 1, 0);
+
+	/*
+	 * Return ENOMEM to the caller so that it can pass it up the stack.
+	 * This error only happens when NOWAIT is set, so deferal is disabled.
+	 */
+	if (error == ENOMEM)
+		return (error);
+
+	return (0);
+}
+
+
+/*
+ * Like _bus_dmamap_load(), but for mbufs.
+ */
+static __inline int
+_bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
+			struct mbuf *m0, bus_dma_segment_t *segs, int *nsegs,
+			int flags)
+{
+	int error;
+
+	M_ASSERTPKTHDR(m0);
+	map->pmap = kernel_pmap;
+
+	flags |= BUS_DMA_NOWAIT;
+	*nsegs = 0;
+	error = 0;
+	if (m0->m_pkthdr.len <= dmat->maxsize) {
+		int first = 1;
+		bus_addr_t lastaddr = 0;
+		struct mbuf *m;
+
+		for (m = m0; m != NULL && error == 0; m = m->m_next) {
+			if (m->m_len > 0) {
+				error = _bus_dmamap_load_buffer(dmat, map,
+						m->m_data, m->m_len,
+						flags, &lastaddr,
+						segs, nsegs, first);
+				first = 0;
+			}
+		}
+	} else {
+		error = EINVAL;
+	}
+
+	/* XXX FIXME: Having to increment nsegs is really annoying */
+	++*nsegs;
+	CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+	    __func__, dmat, dmat->flags, error, *nsegs);
+	return (error);
+}
+
+int
+bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map,
+		     struct mbuf *m0,
+		     bus_dmamap_callback2_t *callback, void *callback_arg,
+		     int flags)
+{
+	int nsegs, error;
+
+	error = _bus_dmamap_load_mbuf_sg(dmat, map, m0, dmat->segments, &nsegs,
+		    flags);
+
+	if (error) {
+		/* force "no valid mappings" in callback */
+		(*callback)(callback_arg, dmat->segments, 0, 0, error);
+	} else {
+		(*callback)(callback_arg, dmat->segments,
+			    nsegs, m0->m_pkthdr.len, error);
+	}
+	CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+	    __func__, dmat, dmat->flags, error, nsegs);
+
+	return (error);
+}
+
+int
+bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
+			struct mbuf *m0, bus_dma_segment_t *segs, int *nsegs,
+			int flags)
+{
+	return (_bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags));
+}
+
+/*
+ * Like _bus_dmamap_load(), but for uios.
+ */
+int
+bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map,
+		    struct uio *uio,
+		    bus_dmamap_callback2_t *callback, void *callback_arg,
+		    int flags)
+{
+	bus_addr_t lastaddr;
+	int nsegs, error, first, i;
+	bus_size_t resid;
+	struct iovec *iov;
+
+	flags |= BUS_DMA_NOWAIT;
+	resid = uio->uio_resid;
+	iov = uio->uio_iov;
+
+	if (uio->uio_segflg == UIO_USERSPACE) {
+		KASSERT(uio->uio_td != NULL,
+			("bus_dmamap_load_uio: USERSPACE but no proc"));
+		map->pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace);
+	} else
+		map->pmap = kernel_pmap;
+
+	nsegs = 0;
+	error = 0;
+	first = 1;
+	lastaddr = (bus_addr_t) 0;
+	for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) {
+		/*
+		 * Now at the first iovec to load.  Load each iovec
+		 * until we have exhausted the residual count.
+		 */
+		bus_size_t minlen =
+			resid < iov[i].iov_len ? resid : iov[i].iov_len;
+		caddr_t addr = (caddr_t) iov[i].iov_base;
+
+		if (minlen > 0) {
+			error = _bus_dmamap_load_buffer(dmat, map,
+					addr, minlen, flags, &lastaddr,
+					dmat->segments, &nsegs, first);
+			first = 0;
+			resid -= minlen;
+		}
+	}
+
+	if (error) {
+		/* force "no valid mappings" in callback */
+		(*callback)(callback_arg, dmat->segments, 0, 0, error);
+	} else {
+		(*callback)(callback_arg, dmat->segments,
+			    nsegs+1, uio->uio_resid, error);
+	}
+	CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+	    __func__, dmat, dmat->flags, error, nsegs + 1);
+	return (error);
+}
+
+/*
+ * Release the mapping held by map.
+ */
+void
+_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+	struct bounce_page *bpage;
+	struct bounce_zone *bz;
+	struct sync_list *sl;
+
+        while ((sl = STAILQ_FIRST(&map->slist)) != NULL) {
+                STAILQ_REMOVE_HEAD(&map->slist, slinks);
+                free(sl, M_DEVBUF);
+        }
+
+	if ((bz = dmat->bounce_zone) != NULL) {
+		while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
+			STAILQ_REMOVE_HEAD(&map->bpages, links);
+			free_bounce_page(dmat, bpage);
+		}
+
+		bz = dmat->bounce_zone;
+		bz->free_bpages += map->pagesreserved;
+		bz->reserved_bpages -= map->pagesreserved;
+		map->pagesreserved = 0;
+		map->pagesneeded = 0;
+	}
+}
+
+#ifdef notyetbounceuser
+	/* If busdma uses user pages, then the interrupt handler could
+	 * be use the kernel vm mapping. Both bounce pages and sync list
+	 * do not cross page boundaries.
+	 * Below is a rough sequence that a person would do to fix the
+	 * user page reference in the kernel vmspace. This would be
+	 * done in the dma post routine.
+	 */
+void
+_bus_dmamap_fix_user(vm_offset_t buf, bus_size_t len,
+			pmap_t pmap, int op)
+{
+	bus_size_t sgsize;
+	bus_addr_t curaddr;
+	vm_offset_t va;
+
+		/* each synclist entry is contained within a single page.
+		 *
+		 * this would be needed if BUS_DMASYNC_POSTxxxx was implemented
+		*/
+	curaddr = pmap_extract(pmap, buf);
+	va = pmap_dma_map(curaddr);
+	switch (op) {
+	case SYNC_USER_INV:
+		cpu_dcache_wb_range(va, sgsize);
+		break;
+
+	case SYNC_USER_COPYTO:
+		bcopy((void *)va, (void *)bounce, sgsize);
+		break;
+
+	case SYNC_USER_COPYFROM:
+		bcopy((void *) bounce, (void *)va, sgsize);
+		break;
+
+	default:
+		break;
+	}
+
+	pmap_dma_unmap(va);
+}
+#endif
+
+void
+_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
+{
+	struct bounce_page *bpage;
+	struct sync_list *sl;
+	bus_size_t len, unalign;
+	vm_offset_t buf, ebuf;
+#ifdef FIX_DMAP_BUS_DMASYNC_POSTREAD
+	vm_offset_t bbuf;
+	char _tmp_cl[arm_dcache_align], _tmp_clend[arm_dcache_align];
+#endif
+
+		/* if buffer was from user space, it it possible that this
+		 * is not the same vm map. The fix is to map each page in
+		 * the buffer into the current address space (KVM) and then
+		 * do the bounce copy or sync list cache operation.
+		 *
+		 * The sync list entries are already broken into
+		 * their respective physical pages.
+		 */
+	if (!pmap_dmap_iscurrent(map->pmap))
+		printf("_bus_dmamap_sync: wrong user map: %p %x\n", map->pmap, op);
+
+	if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
+
+		/* Handle data bouncing. */
+
+		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
+		    "performing bounce", __func__, dmat, dmat->flags, op);
+
+		if (op & BUS_DMASYNC_PREWRITE) {
+			while (bpage != NULL) {
+				bcopy((void *)bpage->datavaddr,
+				      (void *)bpage->vaddr,
+				      bpage->datacount);
+				cpu_dcache_wb_range((vm_offset_t)bpage->vaddr,
+					bpage->datacount);
+				bpage = STAILQ_NEXT(bpage, links);
+			}
+			dmat->bounce_zone->total_bounced++;
+		}
+
+		if (op & BUS_DMASYNC_POSTREAD) {
+			if (!pmap_dmap_iscurrent(map->pmap))
+			    panic("_bus_dmamap_sync: wrong user map. apply fix");
+
+			cpu_dcache_inv_range((vm_offset_t)bpage->vaddr,
+					bpage->datacount);
+			while (bpage != NULL) {
+				bcopy((void *)bpage->vaddr,
+				      (void *)bpage->datavaddr,
+				      bpage->datacount);
+				bpage = STAILQ_NEXT(bpage, links);
+			}
+			dmat->bounce_zone->total_bounced++;
+		}
+	}
+
+	if ((sl = STAILQ_FIRST(&map->slist)) != NULL) {
+		/* ARM caches are not self-snooping for dma */
+
+		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
+		    "performing sync", __func__, dmat, dmat->flags, op);
+
+		switch (op) {
+		case BUS_DMASYNC_PREWRITE:
+			while (sl != NULL) {
+			    cpu_dcache_wb_range(sl->vaddr, sl->datacount);
+			    sl = STAILQ_NEXT(sl, slinks);
+			}
+			break;
+
+		case BUS_DMASYNC_PREREAD:
+			while (sl != NULL) {
+					/* write back the unaligned portions */
+				buf = sl->vaddr;
+				len = sl->datacount;
+				ebuf = buf + len;	/* end of buffer */
+				unalign = buf & arm_dcache_align_mask;
+				if (unalign) {
+						/* wbinv leading fragment */
+					buf &= ~arm_dcache_align_mask;
+					cpu_dcache_wbinv_range(buf,
+							arm_dcache_align);
+					buf += arm_dcache_align;
+						/* number byte in buffer wbinv */
+					unalign = arm_dcache_align - unalign;
+					if (len > unalign)
+						len -= unalign;
+					else
+						len = 0;
+				}
+				unalign = ebuf & arm_dcache_align_mask;
+				if (ebuf > buf && unalign) {
+						/* wbinv trailing fragment */
+					len -= unalign;
+					ebuf -= unalign;
+					cpu_dcache_wbinv_range(ebuf,
+								arm_dcache_align);
+				}
+				if (ebuf > buf) {
+					cpu_dcache_inv_range(buf, len);
+				}
+				sl = STAILQ_NEXT(sl, slinks);
+			}
+			break;
+
+		case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
+			while (sl != NULL) {
+				cpu_dcache_wbinv_range(sl->vaddr, sl->datacount);
+				sl = STAILQ_NEXT(sl, slinks);
+			}
+			break;
+
+#ifdef FIX_DMAP_BUS_DMASYNC_POSTREAD
+		case BUS_DMASYNC_POSTREAD:
+			if (!pmap_dmap_iscurrent(map->pmap))
+			     panic("_bus_dmamap_sync: wrong user map. apply fix");
+			while (sl != NULL) {
+					/* write back the unaligned portions */
+				buf = sl->vaddr;
+				len = sl->datacount;
+				bbuf = buf & ~arm_dcache_align_mask;
+				ebuf = buf + len;
+				unalign = buf & arm_dcache_align_mask;
+				if (unalign) {
+					memcpy(_tmp_cl, (void *)bbuf, unalign);
+					len += unalign; /* inv entire cache line */
+				}
+				unalign = ebuf & arm_dcache_align_mask;
+				if (unalign) {
+					unalign = arm_dcache_align - unalign;
+					memcpy(_tmp_clend, (void *)ebuf, unalign);
+					len += unalign; /* inv entire cache line */
+				}
+					/* inv are cache length aligned */
+				cpu_dcache_inv_range(bbuf, len);
+
+				unalign = (vm_offset_t)buf & arm_dcache_align_mask;
+				if (unalign) {
+					memcpy((void *)bbuf, _tmp_cl, unalign);
+				}
+				unalign = ebuf & arm_dcache_align_mask;
+				if (unalign) {
+					unalign = arm_dcache_align - unalign;
+					memcpy((void *)ebuf, _tmp_clend, unalign);
+				}
+				sl = STAILQ_NEXT(sl, slinks);
+			}
+				break;
+#endif /* FIX_DMAP_BUS_DMASYNC_POSTREAD */
+
+		default:
+			break;
+		}
+	}
+}
+
+static void
+init_bounce_pages(void *dummy __unused)
+{
+
+	total_bpages = 0;
+	STAILQ_INIT(&bounce_zone_list);
+	STAILQ_INIT(&bounce_map_waitinglist);
+	STAILQ_INIT(&bounce_map_callbacklist);
+	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
+}
+SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
+
+static struct sysctl_ctx_list *
+busdma_sysctl_tree(struct bounce_zone *bz)
+{
+	return (&bz->sysctl_tree);
+}
+
+static struct sysctl_oid *
+busdma_sysctl_tree_top(struct bounce_zone *bz)
+{
+	return (bz->sysctl_tree_top);
+}
+
+static int
+alloc_bounce_zone(bus_dma_tag_t dmat)
+{
+	struct bounce_zone *bz;
+
+	/* Check to see if we already have a suitable zone */
+	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
+		if ((dmat->alignment <= bz->alignment)
+		 && (dmat->lowaddr >= bz->lowaddr)) {
+			dmat->bounce_zone = bz;
+			return (0);
+		}
+	}
+
+	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF,
+	    M_NOWAIT | M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	STAILQ_INIT(&bz->bounce_page_list);
+	bz->free_bpages = 0;
+	bz->reserved_bpages = 0;
+	bz->active_bpages = 0;
+	bz->lowaddr = dmat->lowaddr;
+	bz->alignment = MAX(dmat->alignment, PAGE_SIZE);
+	bz->map_count = 0;
+	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
+	busdma_zonecount++;
+	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
+	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
+	dmat->bounce_zone = bz;
+
+	sysctl_ctx_init(&bz->sysctl_tree);
+	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
+	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
+	    CTLFLAG_RD, 0, "");
+	if (bz->sysctl_tree_top == NULL) {
+		sysctl_ctx_free(&bz->sysctl_tree);
+		return (0);	/* XXX error code? */
+	}
+
+	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
+	    "Total bounce pages");
+	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
+	    "Free bounce pages");
+	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
+	    "Reserved bounce pages");
+	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
+	    "Active bounce pages");
+	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
+	    "Total bounce requests");
+	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
+	    "Total bounce requests that were deferred");
+	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
+	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
+	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
+	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
+	    "alignment", CTLFLAG_RD, &bz->alignment, 0, "");
+
+	return (0);
+}
+
+static int
+alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
+{
+	struct bounce_zone *bz;
+	int count;
+
+	bz = dmat->bounce_zone;
+	count = 0;
+	while (numpages > 0) {
+		struct bounce_page *bpage;
+
+		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF,
+						     M_NOWAIT | M_ZERO);
+
+		if (bpage == NULL)
+			break;
+		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF,
+							 M_NOWAIT, 0ul,
+							 bz->lowaddr,
+							 PAGE_SIZE,
+							 0);
+		if (bpage->vaddr == 0) {
+			free(bpage, M_DEVBUF);
+			break;
+		}
+		bpage->busaddr = pmap_kextract(bpage->vaddr);
+		mtx_lock(&bounce_lock);
+		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
+		total_bpages++;
+		bz->total_bpages++;
+		bz->free_bpages++;
+		mtx_unlock(&bounce_lock);
+		count++;
+		numpages--;
+	}
+	return (count);
+}
+
+static int
+reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
+{
+	struct bounce_zone *bz;
+	int pages;
+
+	mtx_assert(&bounce_lock, MA_OWNED);
+	bz = dmat->bounce_zone;
+	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
+	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
+		return (map->pagesneeded - (map->pagesreserved + pages));
+	bz->free_bpages -= pages;
+	bz->reserved_bpages += pages;
+	map->pagesreserved += pages;
+	pages = map->pagesneeded - map->pagesreserved;
+
+	return (pages);
+}
+
+static bus_addr_t
+add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
+		bus_size_t size)
+{
+	struct bounce_zone *bz;
+	struct bounce_page *bpage;
+
+	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
+	KASSERT(map != NULL,
+	    ("add_bounce_page: bad map %p", map));
+
+	bz = dmat->bounce_zone;
+	if (map->pagesneeded == 0)
+		panic("add_bounce_page: map doesn't need any pages");
+	map->pagesneeded--;
+
+	if (map->pagesreserved == 0)
+		panic("add_bounce_page: map doesn't need any pages");
+	map->pagesreserved--;
+
+	mtx_lock(&bounce_lock);
+	bpage = STAILQ_FIRST(&bz->bounce_page_list);
+	if (bpage == NULL)
+		panic("add_bounce_page: free page list is empty");
+
+	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
+	bz->reserved_bpages--;
+	bz->active_bpages++;
+	mtx_unlock(&bounce_lock);
+
+	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
+		/* Page offset needs to be preserved. */
+		bpage->vaddr |= vaddr & PAGE_MASK;
+		bpage->busaddr |= vaddr & PAGE_MASK;
+	}
+	bpage->datavaddr = vaddr;
+	bpage->datacount = size;
+	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
+	return (bpage->busaddr);
+}
+
+static void
+free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
+{
+	struct bus_dmamap *map;
+	struct bounce_zone *bz;
+
+	bz = dmat->bounce_zone;
+	bpage->datavaddr = 0;
+	bpage->datacount = 0;
+	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
+		/*
+		 * Reset the bounce page to start at offset 0.  Other uses
+		 * of this bounce page may need to store a full page of
+		 * data and/or assume it starts on a page boundary.
+		 */
+		bpage->vaddr &= ~PAGE_MASK;
+		bpage->busaddr &= ~PAGE_MASK;
+	}
+
+	mtx_lock(&bounce_lock);
+	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
+	bz->free_bpages++;
+	bz->active_bpages--;
+	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
+		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
+			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
+			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
+					   map, links);
+			busdma_swi_pending = 1;
+			bz->total_deferred++;
+			swi_sched(vm_ih, 0);
+		}
+	}
+	mtx_unlock(&bounce_lock);
+}
+
+void
+busdma_swi(void)
+{
+	bus_dma_tag_t dmat;
+	struct bus_dmamap *map;
+
+	mtx_lock(&bounce_lock);
+	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
+		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
+		mtx_unlock(&bounce_lock);
+		dmat = map->dmat;
+		(dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK);
+		bus_dmamap_load(map->dmat, map, map->buf, map->buflen,
+				map->callback, map->callback_arg, /*flags*/0);
+		(dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK);
+		mtx_lock(&bounce_lock);
+	}
+	mtx_unlock(&bounce_lock);
+}
diff --git a/sys/arm/arm/cpufunc.c b/sys/arm/arm/cpufunc.c
index 433b388..616c3cd 100644
--- a/sys/arm/arm/cpufunc.c
+++ b/sys/arm/arm/cpufunc.c
@@ -481,6 +482,67 @@ struct cpu_functions arm10_cpufuncs = {
 };
 #endif /* CPU_ARM10 */
 
+#ifdef CPU_ARM11
+struct cpu_functions sheeva2_cpufuncs = {
+	/* CPU functions */
+
+	cpufunc_id,			/* id			*/
+	cpufunc_nullop,			/* cpwait		*/
+
+	/* MMU functions */
+
+	cpufunc_control,		/* control		*/
+	cpufunc_domains,		/* Domain		*/
+	sheeva2_setttb,			/* Setttb		*/
+	cpufunc_faultstatus,		/* Faultstatus		*/
+	cpufunc_faultaddress,		/* Faultaddress		*/
+
+	/* TLB functions */
+
+	arm11_tlb_flushID,		/* tlb_flushID		*/
+	arm11_tlb_flushID_SE,		/* tlb_flushID_SE	*/
+	arm11_tlb_flushI,		/* tlb_flushI		*/
+	arm11_tlb_flushI_SE,		/* tlb_flushI_SE	*/
+	arm11_tlb_flushD,		/* tlb_flushD		*/
+	arm11_tlb_flushD_SE,		/* tlb_flushD_SE	*/
+
+	/* Cache operations */
+	armv5_ec_icache_sync_all,	/* icache_sync_all	*/
+	sheeva2_icache_sync_range,	/* icache_sync_range	*/
+
+	armv5_ec_dcache_wbinv_all,	/* dcache_wbinv_all	*/
+	sheeva2_dcache_wbinv_range,	/* dcache_wbinv_range	*/
+	sheeva2_dcache_inv_range,	/* dcache_inv_range	*/
+	sheeva2_dcache_wb_range,	/* dcache_wb_range	*/
+
+	armv5_ec_idcache_wbinv_all,	/* idcache_wbinv_all	*/
+	sheeva2_idcache_wbinv_range,	/* idcache_wbinv_all	*/
+
+	sheeva2_l2cache_wbinv_all,	/* l2cache_wbinv_all    */
+	sheeva2_l2cache_wbinv_range,	/* l2cache_wbinv_range  */
+	sheeva2_l2cache_inv_range,	/* l2cache_inv_range    */
+	sheeva2_l2cache_wb_range,	/* l2cache_wb_range     */
+
+	/* Other functions */
+
+	sheeva2_drain_readbuf,		/* flush_prefetchbuf	*/
+	arm11_drain_writebuf,		/* drain_writebuf	*/
+	sheeva2_flush_brnchtgt_all,	/* flush_brnchtgt_C	*/
+	sheeva2_flush_brnchtgt_va,	/* flush_brnchtgt_E	*/
+
+	sheeva2_sleep,			/* sleep		*/
+
+	/* Soft functions */
+
+	cpufunc_null_fixup,		/* dataabt_fixup	*/
+	cpufunc_null_fixup,		/* prefetchabt_fixup	*/
+
+	arm11_context_switch,		/* context_switch	*/
+
+	arm11_setup			/* cpu setup		*/
+};
+#endif /* CPU_ARM11 */
+
 #ifdef CPU_SA110
 struct cpu_functions sa110_cpufuncs = {
 	/* CPU functions */
@@ -799,7 +861,7 @@ u_int cputype;
 u_int cpu_reset_needs_v4_MMU_disable;	/* flag used in locore.s */
 
 #if defined(CPU_ARM7TDMI) || defined(CPU_ARM8) || defined(CPU_ARM9) || \
-  defined (CPU_ARM9E) || defined (CPU_ARM10) ||			       \
+  defined (CPU_ARM9E) || defined (CPU_ARM10) || defined(CPU_ARM11) ||  \
   defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) ||	       \
   defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||	       \
   defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342)
@@ -999,8 +1061,10 @@ set_cpufuncs()
 		if (cputype == CPU_ID_MV88FR131 ||
 		    cputype == CPU_ID_MV88FR571_VD ||
 		    cputype == CPU_ID_MV88FR571_41) {
+			uint32_t sheeva_ctrl;
 
-			cpufuncs = sheeva_cpufuncs;
+			sheeva_ctrl = (MV_DC_STREAM_ENABLE | MV_BTB_DISABLE |
+			    MV_L2_ENABLE);
 			/*
 			 * Workaround for Marvell MV78100 CPU: Cache prefetch
 			 * mechanism may affect the cache coherency validity,
@@ -1010,16 +1074,13 @@ set_cpufuncs()
 			 * L2 Prefetching Mechanism) for details.
 			 */
 			if (cputype == CPU_ID_MV88FR571_VD ||
-			    cputype == CPU_ID_MV88FR571_41) {
-				sheeva_control_ext(0xffffffff,
-				    FC_DCACHE_STREAM_EN | FC_WR_ALLOC_EN |
-				    FC_BRANCH_TARG_BUF_DIS | FC_L2CACHE_EN |
-				    FC_L2_PREF_DIS);
-			} else {
-				sheeva_control_ext(0xffffffff,
-				    FC_DCACHE_STREAM_EN | FC_WR_ALLOC_EN |
-				    FC_BRANCH_TARG_BUF_DIS | FC_L2CACHE_EN);
-			}
+			    cputype == CPU_ID_MV88FR571_41)
+				sheeva_ctrl |= MV_L2_PREFETCH_DISABLE;
+
+			sheeva_control_ext(0xffffffff & ~MV_WA_ENABLE,
+			    sheeva_ctrl);
+
+			cpufuncs = sheeva_cpufuncs;
 		} else
 			cpufuncs = armv5_ec_cpufuncs;
 
@@ -1049,6 +1110,15 @@ set_cpufuncs()
 		goto out;
 	}
 #endif /* CPU_ARM10 */
+#ifdef CPU_ARM11
+	if (cputype == CPU_ID_MV88SV581X) {
+		cpufuncs = sheeva2_cpufuncs;
+		cpu_reset_needs_v4_MMU_disable = 1;	/* V4 or higher */
+		get_cachetype_cp15();
+		pmap_pte_init_arm11();
+		goto out;
+	}
+#endif /* CPU_ARM11 */
 #ifdef CPU_SA110
 	if (cputype == CPU_ID_SA110) {
 		cpufuncs = sa110_cpufuncs;
@@ -1600,7 +1670,7 @@ late_abort_fixup(arg)
   defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) ||		\
   defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||		\
   defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342) || \
-  defined(CPU_ARM10) ||  defined(CPU_ARM11)
+  defined(CPU_ARM10) || defined(CPU_ARM11)
 
 #define IGN	0
 #define OR	1
@@ -1917,7 +1987,8 @@ arm11_setup(args)
 
 	cpuctrl = CPU_CONTROL_MMU_ENABLE | CPU_CONTROL_SYST_ENABLE
 	    | CPU_CONTROL_IC_ENABLE | CPU_CONTROL_DC_ENABLE
-	    /* | CPU_CONTROL_BPRD_ENABLE */;
+	    /* | CPU_CONTROL_BPRD_ENABLE */ | CPU_CONTROL_V6_EXTPAGE
+	    /* | CPU_CONTROL_L2_ENABLE */;
 	cpuctrlmask = CPU_CONTROL_MMU_ENABLE | CPU_CONTROL_SYST_ENABLE
 	    | CPU_CONTROL_IC_ENABLE | CPU_CONTROL_DC_ENABLE
 	    | CPU_CONTROL_ROM_ENABLE | CPU_CONTROL_BPRD_ENABLE
@@ -1933,19 +2004,23 @@ arm11_setup(args)
 #ifdef __ARMEB__
 	cpuctrl |= CPU_CONTROL_BEND_ENABLE;
 #endif
-
 	/* Clear out the cache */
 	cpu_idcache_wbinv_all();
+	cpu_l2cache_wbinv_all();
 
 	/* Now really make sure they are clean.  */
 	__asm __volatile ("mcr\tp15, 0, r0, c7, c7, 0" : : );
 
+	if (vector_page == ARM_VECTORS_HIGH)
+		cpuctrl |= CPU_CONTROL_VECRELOC;
+
 	/* Set the control register */
-	curcpu()->ci_ctrl = cpuctrl;
+	ctrl = cpuctrl;
 	cpu_control(0xffffffff, cpuctrl);
 
 	/* And again. */
 	cpu_idcache_wbinv_all();
+	cpu_l2cache_wbinv_all();
 }
 #endif	/* CPU_ARM11 */
 
diff --git a/sys/arm/arm/cpufunc_asm_sheeva2.S b/sys/arm/arm/cpufunc_asm_sheeva2.S
new file mode 100644
index 0000000..56dd8cb
--- /dev/null
+++ b/sys/arm/arm/cpufunc_asm_sheeva2.S
@@ -0,0 +1,153 @@
+/*-
+ * Copyright (C) 2008 MARVELL INTERNATIONAL LTD.
+ * All rights reserved.
+ *
+ * Developed by Semihalf.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of MARVELL nor the names of contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD: src/sys/arm/arm/cpufunc_asm_sheeva.S,v 1.2 2009/04/16 11:21:52 raj Exp $");
+
+#include <machine/param.h>
+
+.Lsheeva2_cache_line_size:
+	.word	_C_LABEL(arm_pdcache_line_size)
+
+ENTRY(sheeva2_setttb)
+	/* Cache synchronization is not required as this core has PIPT caches */
+	mcr	p15, 0, r1, c7, c10, 4	/* drain the write buffer */
+	mcr	p15, 0, r0, c2, c0, 0	/* load new TTB */
+	mcr	p15, 0, r0, c8, c7, 0	/* invalidate I+D TLBs */
+	RET
+
+ENTRY(sheeva2_icache_sync_range)
+	sub	r1, r1, #1
+	add	r1, r0, r1
+	mcrr	p15, 0, r1, r0, c5	/* invalidate IC range */
+	mcrr	p15, 0, r1, r0, c12	/* clean DC range */
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	RET
+
+ENTRY(sheeva2_dcache_inv_range)
+	sub	r1, r1, #1
+	add	r1, r0, r1
+	mcrr	p15, 0, r1, r0, c6	/* invalidate DC range */
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	RET
+
+ENTRY(sheeva2_dcache_wb_range)
+	sub	r1, r1, #1
+	add	r1, r0, r1
+	mcrr	p15, 0, r1, r0, c12	/* clean DC range */
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	RET
+
+ENTRY(sheeva2_dcache_wbinv_range)
+	sub	r1, r1, #1
+	add	r1, r0, r1
+	mcrr	p15, 0, r1, r0, c14	/* clean and invalidate DC range */
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	RET
+
+ENTRY(sheeva2_idcache_wbinv_range)
+	sub	r1, r1, #1
+	add	r1, r0, r1
+	mcrr	p15, 0, r1, r0, c5	/* invalidate IC range */
+	mcrr	p15, 0, r1, r0, c14	/* clean and invalidate DC range */
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	RET
+
+ENTRY(sheeva2_l2cache_wbinv_all)
+	mcr	p15, 1, r0, c7, c11, 0	/* L2C clean all */
+	mcr	p15, 1, r0, c7, c7, 0	/* L2C invalidate all */
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	RET
+
+ENTRY(sheeva2_l2cache_wbinv_range)
+	ldr	ip, .Lsheeva2_cache_line_size
+	ldr	ip, [ip]
+	sub	r1, r1, #1		/* Don't overrun */
+	sub	r3, ip, #1
+	and	r2, r0, r3
+	add	r1, r1, r2
+	bic	r0, r0, r3
+1:
+	mcr	p15, 1, r0, c7, c15, 1	/* L2C clean and invalidate entry */
+	add	r0, r0, ip
+	subs	r1, r1, ip
+	bpl	1b
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	RET
+
+ENTRY(sheeva2_l2cache_wb_range)
+	ldr	ip, .Lsheeva2_cache_line_size
+	ldr	ip, [ip]
+	sub	r1, r1, #1		/* Don't overrun */
+	sub	r3, ip, #1
+	and	r2, r0, r3
+	add	r1, r1, r2
+	bic	r0, r0, r3
+1:
+	mcr	p15, 1, r0, c7, c11, 1	/* L2C clean single entry by MVA */
+	add	r0, r0, ip
+	subs	r1, r1, ip
+	bpl	1b
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	RET
+
+ENTRY(sheeva2_l2cache_inv_range)
+	ldr	ip, .Lsheeva2_cache_line_size
+	ldr	ip, [ip]
+	sub	r1, r1, #1		/* Don't overrun */
+	sub	r3, ip, #1
+	and	r2, r0, r3
+	add	r1, r1, r2
+	bic	r0, r0, r3
+1:
+	mcr	p15, 1, r0, c7, c7, 1	/* L2C invalidate single entry by MVA */
+	add	r0, r0, ip
+	subs	r1, r1, ip
+	bpl	1b
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	RET
+
+ENTRY(sheeva2_drain_readbuf)
+	mcr	p15, 0, r0, c7, c5, 4	/* flush prefetch buffers */
+	RET
+
+ENTRY(sheeva2_flush_brnchtgt_all)
+	mcr	p15, 0, r0, c7, c5, 6	/* flush entrie branch target cache */
+	RET
+
+ENTRY(sheeva2_flush_brnchtgt_va)
+	mcr	p15, 0, r0, c7, c5, 7	/* flush branch target cache by VA */
+	RET
+
+ENTRY(sheeva2_sleep)
+	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
+	mcr	p15, 0, r0, c7, c0, 4	/* wait for interrupt */
+	RET
diff --git a/sys/arm/arm/elf_trampoline.c b/sys/arm/arm/elf_trampoline.c
index 7bf70d8..14944b2 100644
--- a/sys/arm/arm/elf_trampoline.c
+++ b/sys/arm/arm/elf_trampoline.c
@@ -57,7 +57,7 @@ void __startC(void);
 #define cpu_idcache_wbinv_all	arm8_cache_purgeID
 #elif defined(CPU_ARM9)
 #define cpu_idcache_wbinv_all	arm9_idcache_wbinv_all
-#elif defined(CPU_ARM9E)
+#elif defined(CPU_ARM9E) || defined(CPU_ARM11)
 #define cpu_idcache_wbinv_all	armv5_ec_idcache_wbinv_all
 #elif defined(CPU_ARM10)
 #define cpu_idcache_wbinv_all	arm10_idcache_wbinv_all
diff --git a/sys/arm/arm/identcpu.c b/sys/arm/arm/identcpu.c
index effa678..b835d73 100644
--- a/sys/arm/arm/identcpu.c
+++ b/sys/arm/arm/identcpu.c
@@ -311,11 +311,11 @@ const struct cpuidtab cpuids[] = {
 
 	{ CPU_ID_MV88FR131,	CPU_CLASS_MARVELL,	"Feroceon 88FR131",
 	  generic_steppings },
-
 	{ CPU_ID_MV88FR571_VD,	CPU_CLASS_MARVELL,	"Feroceon 88FR571-VD",
 	  generic_steppings },
-
-	{ CPU_ID_MV88FR571_41,	CPU_CLASS_MARVELL,	"Early Feroceon 88FR571",
+	{ CPU_ID_MV88FR571_41,	CPU_CLASS_MARVELL,	"Feroceon 88FR571 (early)",
+	  generic_steppings },
+	{ CPU_ID_MV88SV581X,	CPU_CLASS_MARVELL,	"Sheeva 88SV581x",
 	  generic_steppings },
 
 	{ 0, CPU_CLASS_NONE, NULL, NULL }
diff --git a/sys/arm/arm/locore.S b/sys/arm/arm/locore.S
index 4ee7409..4cb4ebe 100644
--- a/sys/arm/arm/locore.S
+++ b/sys/arm/arm/locore.S
@@ -153,14 +153,18 @@ Lunmapped:
 	orrne	r5, r5, #PHYSADDR
 	movne	pc, r5
 
-	mcr	p15, 0, r0, c2, c0, 0	/* Set TTB */
+	mcr	p15, 0, r0, c2, c0, 0	/* Set TTB0 */
 	mcr	p15, 0, r0, c8, c7, 0	/* Flush TLB */
 
+	mov	r0, #0
+	mcr	p15, 0, r0, c13, c0, 1	/* Set ASID to 0 */
+
 	/* Set the Domain Access register.  Very important! */
 	mov     r0, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT)
 	mcr	p15, 0, r0, c3, c0, 0
 	/* Enable MMU */
 	mrc	p15, 0, r0, c1, c0, 0
+	orr	r0, r0, #CPU_CONTROL_V6_EXTPAGE
 	orr	r0, r0, #CPU_CONTROL_MMU_ENABLE
 	mcr	p15, 0, r0, c1, c0, 0
 	nop
diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c
new file mode 100644
index 0000000..ef675d2
--- /dev/null
+++ b/sys/arm/arm/pmap-v6.c
@@ -0,0 +1,3762 @@
+/* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */
+/*-
+ * Copyright 2004 Olivier Houchard.
+ * Copyright 2003 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Steve C. Woodford for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed for the NetBSD Project by
+ *      Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2002-2003 Wasabi Systems, Inc.
+ * Copyright (c) 2001 Richard Earnshaw
+ * Copyright (c) 2001-2002 Christopher Gilbert
+ * All rights reserved.
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ *    endorse or promote products derived from this software without specific
+ *    prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*-
+ * Copyright (c) 1999 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Charles M. Hannum.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1994-1998 Mark Brinicombe.
+ * Copyright (c) 1994 Brini.
+ * All rights reserved.
+ *
+ * This code is derived from software written for Brini by Mark Brinicombe
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Mark Brinicombe.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *
+ * RiscBSD kernel project
+ *
+ * pmap.c
+ *
+ * Machine dependant vm stuff
+ *
+ * Created      : 20/09/94
+ */
+
+/*
+ * Special compilation symbols
+ * PMAP_DEBUG           - Build in pmap_debug_level code
+ */
+/* Include header files */
+
+#include "opt_vm.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/arm/arm/pmap.c,v 1.113 2009/07/24 13:50:29 jhb Exp $");
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/msgbuf.h>
+#include <sys/vmmeter.h>
+#include <sys/mman.h>
+#include <sys/smp.h>
+#include <sys/sched.h>
+
+#include <vm/vm.h>
+#include <vm/uma.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_extern.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+#include <machine/cpu.h>
+#include <machine/cpufunc.h>
+#include <machine/pcb.h>
+
+#ifdef PMAP_DEBUG
+#define PDEBUG(_lev_,_stat_) \
+        if (pmap_debug_level >= (_lev_)) \
+                ((_stat_))
+#define dprintf printf
+
+int pmap_debug_level = 0;
+#define PMAP_INLINE
+#else   /* PMAP_DEBUG */
+#define PDEBUG(_lev_,_stat_) /* Nothing */
+#define dprintf(x, arg...)
+#define PMAP_INLINE __inline
+#endif  /* PMAP_DEBUG */
+
+extern struct pv_addr systempage;
+/*
+ * Internal function prototypes
+ */
+static void pmap_free_pv_entry (pv_entry_t);
+static pv_entry_t pmap_get_pv_entry(void);
+
+static void		pmap_enter_locked(pmap_t, vm_offset_t, vm_page_t,
+    vm_prot_t, boolean_t, int);
+static void		pmap_alloc_l1(pmap_t);
+static void		pmap_free_l1(pmap_t);
+static void		pmap_use_l1(pmap_t);
+
+static int		pmap_clearbit(struct vm_page *, u_int);
+
+static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t);
+static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t);
+static void		pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int);
+static vm_offset_t	kernel_pt_lookup(vm_paddr_t);
+
+static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1");
+
+vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
+vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
+vm_offset_t pmap_curmaxkvaddr;
+vm_paddr_t kernel_l1pa;
+
+extern void *end;
+vm_offset_t kernel_vm_end = 0;
+
+struct pmap kernel_pmap_store;
+
+static pt_entry_t *csrc_pte, *cdst_pte;
+static vm_offset_t csrcp, cdstp;
+static struct mtx cmtx;
+
+static void		pmap_init_l1(struct l1_ttable *, pd_entry_t *);
+/*
+ * These routines are called when the CPU type is identified to set up
+ * the PTE prototypes, cache modes, etc.
+ *
+ * The variables are always here, just in case LKMs need to reference
+ * them (though, they shouldn't).
+ */
+
+pt_entry_t	pte_l1_s_cache_mode;
+pt_entry_t	pte_l1_s_cache_mode_pt;
+pt_entry_t	pte_l1_s_cache_mask;
+
+pt_entry_t	pte_l2_l_cache_mode;
+pt_entry_t	pte_l2_l_cache_mode_pt;
+pt_entry_t	pte_l2_l_cache_mask;
+
+pt_entry_t	pte_l2_s_cache_mode;
+pt_entry_t	pte_l2_s_cache_mode_pt;
+pt_entry_t	pte_l2_s_cache_mask;
+
+pt_entry_t	pte_l2_s_prot_u;
+pt_entry_t	pte_l2_s_prot_w;
+pt_entry_t	pte_l2_s_prot_mask;
+
+pt_entry_t	pte_l1_s_proto;
+pt_entry_t	pte_l1_c_proto;
+pt_entry_t	pte_l2_s_proto;
+
+void		(*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t);
+void		(*pmap_zero_page_func)(vm_paddr_t, int, int);
+/*
+ * Which pmap is currently 'live' in the cache
+ *
+ * XXXSCW: Fix for SMP ...
+ */
+union pmap_cache_state *pmap_cache_state;
+
+struct msgbuf *msgbufp = 0;
+
+/*
+ * Crashdump maps.
+ */
+static caddr_t crashdumpmap;
+
+extern void bcopy_page(vm_offset_t, vm_offset_t);
+extern void bzero_page(vm_offset_t);
+
+extern vm_offset_t alloc_firstaddr;
+
+char *_tmppt;
+
+/*
+ * Metadata for L1 translation tables.
+ */
+struct l1_ttable {
+	/* Entry on the L1 Table list */
+	SLIST_ENTRY(l1_ttable) l1_link;
+
+	/* Entry on the L1 Least Recently Used list */
+	TAILQ_ENTRY(l1_ttable) l1_lru;
+
+	/* Track how many domains are allocated from this L1 */
+	volatile u_int l1_domain_use_count;
+
+	/*
+	 * A free-list of domain numbers for this L1.
+	 * We avoid using ffs() and a bitmap to track domains since ffs()
+	 * is slow on ARM.
+	 */
+	u_int8_t l1_domain_first;
+	u_int8_t l1_domain_free[PMAP_DOMAINS];
+
+	/* Physical address of this L1 page table */
+	vm_paddr_t l1_physaddr;
+
+	/* KVA of this L1 page table */
+	pd_entry_t *l1_kva;
+};
+
+/*
+ * Convert a virtual address into its L1 table index. That is, the
+ * index used to locate the L2 descriptor table pointer in an L1 table.
+ * This is basically used to index l1->l1_kva[].
+ *
+ * Each L2 descriptor table represents 1MB of VA space.
+ */
+#define	L1_IDX(va)		(((vm_offset_t)(va)) >> L1_S_SHIFT)
+
+/*
+ * L1 Page Tables are tracked using a Least Recently Used list.
+ *  - New L1s are allocated from the HEAD.
+ *  - Freed L1s are added to the TAIl.
+ *  - Recently accessed L1s (where an 'access' is some change to one of
+ *    the userland pmaps which owns this L1) are moved to the TAIL.
+ */
+static TAILQ_HEAD(, l1_ttable) l1_lru_list;
+/*
+ * A list of all L1 tables
+ */
+static SLIST_HEAD(, l1_ttable) l1_list;
+static struct mtx l1_lru_lock;
+
+/*
+ * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots.
+ *
+ * This is normally 16MB worth L2 page descriptors for any given pmap.
+ * Reference counts are maintained for L2 descriptors so they can be
+ * freed when empty.
+ */
+struct l2_dtable {
+	/* The number of L2 page descriptors allocated to this l2_dtable */
+	u_int l2_occupancy;
+
+	/* List of L2 page descriptors */
+	struct l2_bucket {
+		pt_entry_t *l2b_kva;	/* KVA of L2 Descriptor Table */
+		vm_paddr_t l2b_phys;	/* Physical address of same */
+		u_short l2b_l1idx;	/* This L2 table's L1 index */
+		u_short l2b_occupancy;	/* How many active descriptors */
+	} l2_bucket[L2_BUCKET_SIZE];
+};
+
+/* pmap_kenter_internal flags */
+#define KENTER_CACHE	0x1
+#define KENTER_USER	0x2
+
+/*
+ * Given an L1 table index, calculate the corresponding l2_dtable index
+ * and bucket index within the l2_dtable.
+ */
+#define	L2_IDX(l1idx)		(((l1idx) >> L2_BUCKET_LOG2) & \
+				 (L2_SIZE - 1))
+#define	L2_BUCKET(l1idx)	((l1idx) & (L2_BUCKET_SIZE - 1))
+
+/*
+ * Given a virtual address, this macro returns the
+ * virtual address required to drop into the next L2 bucket.
+ */
+#define	L2_NEXT_BUCKET(va)	(((va) & L1_S_FRAME) + L1_S_SIZE)
+
+/*
+ * L2 allocation.
+ */
+#define	pmap_alloc_l2_dtable()		\
+		(void*)uma_zalloc(l2table_zone, M_NOWAIT|M_USE_RESERVE)
+#define	pmap_free_l2_dtable(l2)		\
+		uma_zfree(l2table_zone, l2)
+
+/*
+ * We try to map the page tables write-through, if possible.  However, not
+ * all CPUs have a write-through cache mode, so on those we have to sync
+ * the cache when we frob page tables.
+ *
+ * We try to evaluate this at compile time, if possible.  However, it's
+ * not always possible to do that, hence this run-time var.
+ */
+int	pmap_needs_pte_sync;
+
+/*
+ * Macro to determine if a mapping might be resident in the
+ * instruction cache and/or TLB
+ */
+#define	PV_BEEN_EXECD(f)  (((f) & (PVF_REF | PVF_EXEC)) == (PVF_REF | PVF_EXEC))
+
+/*
+ * Macro to determine if a mapping might be resident in the
+ * data cache and/or TLB
+ */
+#define	PV_BEEN_REFD(f)   (((f) & PVF_REF) != 0)
+
+#ifndef PMAP_SHPGPERPROC
+#define PMAP_SHPGPERPROC 200
+#endif
+
+#define pmap_is_current(pm)	((pm) == pmap_kernel() || \
+            curproc->p_vmspace->vm_map.pmap == (pm))
+static uma_zone_t pvzone = NULL;
+uma_zone_t l2zone;
+static uma_zone_t l2table_zone;
+static vm_offset_t pmap_kernel_l2dtable_kva;
+static vm_offset_t pmap_kernel_l2ptp_kva;
+static vm_paddr_t pmap_kernel_l2ptp_phys;
+static struct vm_object pvzone_obj;
+static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
+
+/*
+ * This list exists for the benefit of pmap_map_chunk().  It keeps track
+ * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can
+ * find them as necessary.
+ *
+ * Note that the data on this list MUST remain valid after initarm() returns,
+ * as pmap_bootstrap() uses it to contruct L2 table metadata.
+ */
+SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list);
+
+static void
+pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt)
+{
+	int i;
+
+	l1->l1_kva = l1pt;
+	l1->l1_domain_use_count = 0;
+	l1->l1_domain_first = 0;
+
+	for (i = 0; i < PMAP_DOMAINS; i++)
+		l1->l1_domain_free[i] = i + 1;
+
+	/*
+	 * Copy the kernel's L1 entries to each new L1.
+	 */
+	if (l1pt != pmap_kernel()->pm_l1->l1_kva)
+		memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE);
+
+	if ((l1->l1_physaddr = pmap_extract(pmap_kernel(), (vm_offset_t)l1pt)) == 0)
+		panic("pmap_init_l1: can't get PA of L1 at %p", l1pt);
+	SLIST_INSERT_HEAD(&l1_list, l1, l1_link);
+	TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
+}
+
+static vm_offset_t
+kernel_pt_lookup(vm_paddr_t pa)
+{
+	struct pv_addr *pv;
+
+	SLIST_FOREACH(pv, &kernel_pt_list, pv_list) {
+		if (pv->pv_pa == pa)
+			return (pv->pv_va);
+	}
+	return (0);
+}
+
+#if defined(CPU_ARM11)
+void
+pmap_pte_init_arm11(void)
+{
+	/*
+	 * ARM11 is mostly compatible with ARM10, however some
+	 * features are taken from XScale.
+	 */
+	pte_l1_s_cache_mode = L1_S_C;
+	pte_l1_s_cache_mask = L1_S_CACHE_MASK_generic;
+
+	pte_l2_l_cache_mode = L2_C;
+	pte_l2_l_cache_mask = L2_L_CACHE_MASK_generic;
+
+	pte_l2_s_cache_mode = L2_C;
+	pte_l2_s_cache_mask = L2_S_CACHE_MASK_generic;
+
+	pte_l1_s_cache_mode_pt = L1_S_C;
+	pte_l2_l_cache_mode_pt = L2_C;
+	pte_l2_s_cache_mode_pt = L2_C;
+
+	pte_l2_s_prot_u = L2_S_PROT_U_xscale;
+	pte_l2_s_prot_w = L2_S_PROT_W_xscale;
+	pte_l2_s_prot_mask = L2_S_PROT_MASK_xscale;
+
+	pte_l1_s_proto = L1_S_PROTO_xscale;
+	pte_l1_c_proto = L1_C_PROTO_xscale;
+	pte_l2_s_proto = L2_S_PROTO_generic;
+
+	pmap_copy_page_func = pmap_copy_page_generic;
+	pmap_zero_page_func = pmap_zero_page_generic;
+}
+#endif /* CPU_ARM11 */
+
+/*
+ * Allocate an L1 translation table for the specified pmap.
+ * This is called at pmap creation time.
+ */
+static void
+pmap_alloc_l1(pmap_t pm)
+{
+	struct l1_ttable *l1;
+	u_int8_t domain;
+
+	/*
+	 * Remove the L1 at the head of the LRU list
+	 */
+	mtx_lock(&l1_lru_lock);
+	l1 = TAILQ_FIRST(&l1_lru_list);
+	TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
+
+	/*
+	 * Pick the first available domain number, and update
+	 * the link to the next number.
+	 */
+	domain = l1->l1_domain_first;
+	l1->l1_domain_first = l1->l1_domain_free[domain];
+
+	/*
+	 * If there are still free domain numbers in this L1,
+	 * put it back on the TAIL of the LRU list.
+	 */
+	if (++l1->l1_domain_use_count < PMAP_DOMAINS)
+		TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
+
+	mtx_unlock(&l1_lru_lock);
+
+	/*
+	 * Fix up the relevant bits in the pmap structure
+	 */
+	pm->pm_l1 = l1;
+	pm->pm_domain = domain + 1;
+}
+
+/*
+ * Free an L1 translation table.
+ * This is called at pmap destruction time.
+ */
+static void
+pmap_free_l1(pmap_t pm)
+{
+	struct l1_ttable *l1 = pm->pm_l1;
+
+	mtx_lock(&l1_lru_lock);
+
+	/*
+	 * If this L1 is currently on the LRU list, remove it.
+	 */
+	if (l1->l1_domain_use_count < PMAP_DOMAINS)
+		TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
+
+	/*
+	 * Free up the domain number which was allocated to the pmap
+	 */
+	l1->l1_domain_free[pm->pm_domain - 1] = l1->l1_domain_first;
+	l1->l1_domain_first = pm->pm_domain - 1;
+	l1->l1_domain_use_count--;
+
+	/*
+	 * The L1 now must have at least 1 free domain, so add
+	 * it back to the LRU list. If the use count is zero,
+	 * put it at the head of the list, otherwise it goes
+	 * to the tail.
+	 */
+	if (l1->l1_domain_use_count == 0) {
+		TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru);
+	}	else
+		TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
+
+	mtx_unlock(&l1_lru_lock);
+}
+
+static PMAP_INLINE void
+pmap_use_l1(pmap_t pm)
+{
+	struct l1_ttable *l1;
+
+	/*
+	 * Do nothing if we're in interrupt context.
+	 * Access to an L1 by the kernel pmap must not affect
+	 * the LRU list.
+	 */
+	if (pm == pmap_kernel())
+		return;
+
+	l1 = pm->pm_l1;
+
+	/*
+	 * If the L1 is not currently on the LRU list, just return
+	 */
+	if (l1->l1_domain_use_count == PMAP_DOMAINS)
+		return;
+
+	mtx_lock(&l1_lru_lock);
+
+	/*
+	 * Check the use count again, now that we've acquired the lock
+	 */
+	if (l1->l1_domain_use_count == PMAP_DOMAINS) {
+		mtx_unlock(&l1_lru_lock);
+		return;
+	}
+
+	/*
+	 * Move the L1 to the back of the LRU list
+	 */
+	TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
+	TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
+
+	mtx_unlock(&l1_lru_lock);
+}
+
+
+/*
+ * Returns a pointer to the L2 bucket associated with the specified pmap
+ * and VA, or NULL if no L2 bucket exists for the address.
+ */
+static PMAP_INLINE struct l2_bucket *
+pmap_get_l2_bucket(pmap_t pm, vm_offset_t va)
+{
+	struct l2_dtable *l2;
+	struct l2_bucket *l2b;
+	u_short l1idx;
+
+	l1idx = L1_IDX(va);
+
+	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL ||
+	    (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL)
+		return (NULL);
+
+	return (l2b);
+}
+
+/*
+ * Returns a pointer to the L2 bucket associated with the specified pmap
+ * and VA.
+ *
+ * If no L2 bucket exists, perform the necessary allocations to put an L2
+ * bucket/page table in place.
+ *
+ * Note that if a new L2 bucket/page was allocated, the caller *must*
+ * increment the bucket occupancy counter appropriately *before*
+ * releasing the pmap's lock to ensure no other thread or cpu deallocates
+ * the bucket/page in the meantime.
+ */
+static struct l2_bucket *
+pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
+{
+	struct l2_dtable *l2;
+	struct l2_bucket *l2b;
+	u_short l1idx;
+
+	l1idx = L1_IDX(va);
+
+	PMAP_ASSERT_LOCKED(pm);
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
+		/*
+		 * No mapping at this address, as there is
+		 * no entry in the L1 table.
+		 * Need to allocate a new l2_dtable.
+		 */
+again_l2table:
+		PMAP_UNLOCK(pm);
+		vm_page_unlock_queues();
+		if ((l2 = pmap_alloc_l2_dtable()) == NULL) {
+			vm_page_lock_queues();
+			PMAP_LOCK(pm);
+			return (NULL);
+		}
+		vm_page_lock_queues();
+		PMAP_LOCK(pm);
+		if (pm->pm_l2[L2_IDX(l1idx)] != NULL) {
+			PMAP_UNLOCK(pm);
+			vm_page_unlock_queues();
+			uma_zfree(l2table_zone, l2);
+			vm_page_lock_queues();
+			PMAP_LOCK(pm);
+			l2 = pm->pm_l2[L2_IDX(l1idx)];
+			if (l2 == NULL)
+				goto again_l2table;
+			/*
+			 * Someone already allocated the l2_dtable while
+			 * we were doing the same.
+			 */
+		} else {
+			bzero(l2, sizeof(*l2));
+			/*
+			 * Link it into the parent pmap
+			 */
+			pm->pm_l2[L2_IDX(l1idx)] = l2;
+		}
+	}
+
+	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
+
+	/*
+	 * Fetch pointer to the L2 page table associated with the address.
+	 */
+	if (l2b->l2b_kva == NULL) {
+		pt_entry_t *ptep;
+
+		/*
+		 * No L2 page table has been allocated. Chances are, this
+		 * is because we just allocated the l2_dtable, above.
+		 */
+again_ptep:
+		PMAP_UNLOCK(pm);
+		vm_page_unlock_queues();
+		ptep = (void*)uma_zalloc(l2zone, M_NOWAIT|M_USE_RESERVE);
+		vm_page_lock_queues();
+		PMAP_LOCK(pm);
+		if (l2b->l2b_kva != 0) {
+			/* We lost the race. */
+			PMAP_UNLOCK(pm);
+			vm_page_unlock_queues();
+			uma_zfree(l2zone, ptep);
+			vm_page_lock_queues();
+			PMAP_LOCK(pm);
+			if (l2b->l2b_kva == 0)
+				goto again_ptep;
+			return (l2b);
+		}
+		l2b->l2b_phys = vtophys(ptep);
+		if (ptep == NULL) {
+			/*
+			 * Oops, no more L2 page tables available at this
+			 * time. We may need to deallocate the l2_dtable
+			 * if we allocated a new one above.
+			 */
+			if (l2->l2_occupancy == 0) {
+				pm->pm_l2[L2_IDX(l1idx)] = NULL;
+				pmap_free_l2_dtable(l2);
+			}
+			return (NULL);
+		}
+
+		l2->l2_occupancy++;
+		l2b->l2b_kva = ptep;
+		l2b->l2b_l1idx = l1idx;
+	}
+
+	return (l2b);
+}
+
+static PMAP_INLINE void
+#ifndef PMAP_INCLUDE_PTE_SYNC
+pmap_free_l2_ptp(pt_entry_t *l2)
+#else
+pmap_free_l2_ptp(boolean_t need_sync, pt_entry_t *l2)
+#endif
+{
+#ifdef PMAP_INCLUDE_PTE_SYNC
+	/*
+	 * Note: With a write-back cache, we may need to sync this
+	 * L2 table before re-using it.
+	 * This is because it may have belonged to a non-current
+	 * pmap, in which case the cache syncs would have been
+	 * skipped when the pages were being unmapped. If the
+	 * L2 table were then to be immediately re-allocated to
+	 * the *current* pmap, it may well contain stale mappings
+	 * which have not yet been cleared by a cache write-back
+	 * and so would still be visible to the mmu.
+	 */
+	if (need_sync)
+		PTE_SYNC_RANGE(l2, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
+#endif
+	uma_zfree(l2zone, l2);
+}
+/*
+ * One or more mappings in the specified L2 descriptor table have just been
+ * invalidated.
+ *
+ * Garbage collect the metadata and descriptor table itself if necessary.
+ *
+ * The pmap lock must be acquired when this is called (not necessary
+ * for the kernel pmap).
+ */
+static void
+pmap_free_l2_bucket(pmap_t pm, struct l2_bucket *l2b, u_int count)
+{
+	struct l2_dtable *l2;
+	pd_entry_t *pl1pd, l1pd;
+	pt_entry_t *ptep;
+	u_short l1idx;
+
+
+	/*
+	 * Update the bucket's reference count according to how many
+	 * PTEs the caller has just invalidated.
+	 */
+	l2b->l2b_occupancy -= count;
+
+	/*
+	 * Note:
+	 *
+	 * Level 2 page tables allocated to the kernel pmap are never freed
+	 * as that would require checking all Level 1 page tables and
+	 * removing any references to the Level 2 page table. See also the
+	 * comment elsewhere about never freeing bootstrap L2 descriptors.
+	 *
+	 * We make do with just invalidating the mapping in the L2 table.
+	 *
+	 * This isn't really a big deal in practice and, in fact, leads
+	 * to a performance win over time as we don't need to continually
+	 * alloc/free.
+	 */
+	if (l2b->l2b_occupancy > 0 || pm == pmap_kernel())
+		return;
+
+	/*
+	 * There are no more valid mappings in this level 2 page table.
+	 * Go ahead and NULL-out the pointer in the bucket, then
+	 * free the page table.
+	 */
+	l1idx = l2b->l2b_l1idx;
+	ptep = l2b->l2b_kva;
+	l2b->l2b_kva = NULL;
+
+	pl1pd = &pm->pm_l1->l1_kva[l1idx];
+
+	/*
+	 * If the L1 slot matches the pmap's domain
+	 * number, then invalidate it.
+	 */
+	l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK);
+	if (l1pd == (L1_C_DOM(pm->pm_domain) | L1_TYPE_C)) {
+		*pl1pd = 0;
+		PTE_SYNC(pl1pd);
+	}
+
+	/*
+	 * Release the L2 descriptor table back to the pool cache.
+	 */
+#ifndef PMAP_INCLUDE_PTE_SYNC
+	pmap_free_l2_ptp(ptep);
+#else
+	pmap_free_l2_ptp(!pmap_is_current(pm), ptep);
+#endif
+
+	/*
+	 * Update the reference count in the associated l2_dtable
+	 */
+	l2 = pm->pm_l2[L2_IDX(l1idx)];
+	if (--l2->l2_occupancy > 0)
+		return;
+
+	/*
+	 * There are no more valid mappings in any of the Level 1
+	 * slots managed by this l2_dtable. Go ahead and NULL-out
+	 * the pointer in the parent pmap and free the l2_dtable.
+	 */
+	pm->pm_l2[L2_IDX(l1idx)] = NULL;
+	pmap_free_l2_dtable(l2);
+}
+
+/*
+ * Pool cache constructors for L2 descriptor tables, metadata and pmap
+ * structures.
+ */
+static int
+pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags)
+{
+	struct l2_bucket *l2b;
+	pt_entry_t *ptep, pte;
+	vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK;
+
+	/*
+	 * The mappings for these page tables were initially made using
+	 * pmap_kenter() by the pool subsystem. Therefore, the cache-
+	 * mode will not be right for page table mappings. To avoid
+	 * polluting the pmap_kenter() code with a special case for
+	 * page tables, we simply fix up the cache-mode here if it's not
+	 * correct.
+	 */
+	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
+	ptep = &l2b->l2b_kva[l2pte_index(va)];
+	pte = *ptep;
+
+	if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
+		/*
+		 * Page tables must have the cache-mode set to
+		 * Write-Thru.
+		 */
+		*ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
+		PTE_SYNC(ptep);
+		cpu_tlb_flushD_SE(va);
+		cpu_cpwait();
+	}
+
+	memset(mem, 0, L2_TABLE_SIZE_REAL);
+	PTE_SYNC_RANGE(mem, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
+	return (0);
+}
+
+/*
+ * PTE_SYNC_CURRENT:
+ *
+ *     Make sure the pte is written out to RAM.
+ *     We need to do this for one of two cases:
+ *       - We're dealing with the kernel pmap
+ *       - There is no pmap active in the cache/tlb.
+ *       - The specified pmap is 'active' in the cache/tlb.
+ */
+#ifdef PMAP_INCLUDE_PTE_SYNC
+#define	PTE_SYNC_CURRENT(pm, ptep)	\
+do {					\
+	if (PMAP_NEEDS_PTE_SYNC &&	\
+	    pmap_is_current(pm))	\
+		PTE_SYNC(ptep);		\
+} while (/*CONSTCOND*/0)
+#else
+#define	PTE_SYNC_CURRENT(pm, ptep)	/* nothing */
+#endif
+
+/*
+ * Modify pte bits for all ptes corresponding to the given physical address.
+ * We use `maskbits' rather than `clearbits' because we're always passing
+ * constants and the latter would require an extra inversion at run-time.
+ */
+static int
+pmap_clearbit(struct vm_page *pg, u_int maskbits)
+{
+	struct l2_bucket *l2b;
+	struct pv_entry *pv;
+	pt_entry_t *ptep, npte, opte;
+	pmap_t pm;
+	vm_offset_t va;
+	u_int oflags;
+	int count = 0;
+
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+
+	if (maskbits & PVF_WRITE)
+		maskbits |= PVF_MOD;
+	/*
+	 * Clear saved attributes (modify, reference)
+	 */
+	pg->md.pvh_attrs &= ~(maskbits & (PVF_MOD | PVF_REF));
+
+	if (TAILQ_EMPTY(&pg->md.pv_list)) {
+		return (0);
+	}
+
+	/*
+	 * Loop over all current mappings setting/clearing as appropos
+	 */
+	TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
+		va = pv->pv_va;
+		pm = pv->pv_pmap;
+		oflags = pv->pv_flags;
+		pv->pv_flags &= ~maskbits;
+
+		PMAP_LOCK(pm);
+
+		l2b = pmap_get_l2_bucket(pm, va);
+
+		ptep = &l2b->l2b_kva[l2pte_index(va)];
+		npte = opte = *ptep;
+
+		if (maskbits & (PVF_WRITE|PVF_MOD)) {
+			if (opte & L2_S_PROT_W)
+				vm_page_dirty(pg);
+
+			/* make the pte read only */
+			npte &= ~L2_S_PROT_W;
+		}
+
+		if (maskbits & PVF_REF) {
+			/*
+			 * Make the PTE invalid so that we will take a
+			 * page fault the next time the mapping is
+			 * referenced.
+			 */
+			npte &= ~L2_TYPE_MASK;
+			npte |= L2_TYPE_INV;
+		}
+
+		if (npte != opte) {
+			count++;
+			*ptep = npte;
+			PTE_SYNC(ptep);
+			/* Flush the TLB entry if a current pmap. */
+			if (PV_BEEN_EXECD(oflags))
+				cpu_tlb_flushID_SE(pv->pv_va);
+			else if (PV_BEEN_REFD(oflags))
+				cpu_tlb_flushD_SE(pv->pv_va);
+		}
+
+		PMAP_UNLOCK(pm);
+
+	}
+
+	if (maskbits & PVF_WRITE)
+		vm_page_flag_clear(pg, PG_WRITEABLE);
+	return (count);
+}
+
+/*
+ * main pv_entry manipulation functions:
+ *   pmap_enter_pv: enter a mapping onto a vm_page list
+ *   pmap_remove_pv: remove a mappiing from a vm_page list
+ *
+ * NOTE: pmap_enter_pv expects to lock the pvh itself
+ *       pmap_remove_pv expects te caller to lock the pvh before calling
+ */
+
+/*
+ * pmap_enter_pv: enter a mapping onto a vm_page lst
+ *
+ * => caller should hold the proper lock on pmap_main_lock
+ * => caller should have pmap locked
+ * => we will gain the lock on the vm_page and allocate the new pv_entry
+ * => caller should adjust ptp's wire_count before calling
+ * => caller should not adjust pmap's wire_count
+ */
+static void
+pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, pmap_t pm,
+    vm_offset_t va, u_int flags)
+{
+
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+
+	PMAP_ASSERT_LOCKED(pm);
+	pve->pv_pmap = pm;
+	pve->pv_va = va;
+	pve->pv_flags = flags;
+
+	TAILQ_INSERT_HEAD(&pg->md.pv_list, pve, pv_list);
+	TAILQ_INSERT_HEAD(&pm->pm_pvlist, pve, pv_plist);
+	pg->md.pvh_attrs |= flags & (PVF_REF | PVF_MOD);
+	if (pve->pv_flags & PVF_WIRED)
+		++pm->pm_stats.wired_count;
+	vm_page_flag_set(pg, PG_REFERENCED);
+}
+
+/*
+ *
+ * pmap_find_pv: Find a pv entry
+ *
+ * => caller should hold lock on vm_page
+ */
+static PMAP_INLINE struct pv_entry *
+pmap_find_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va)
+{
+	struct pv_entry *pv;
+
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list)
+	    if (pm == pv->pv_pmap && va == pv->pv_va)
+		    break;
+	return (pv);
+}
+
+/*
+ * vector_page_setprot:
+ *
+ *	Manipulate the protection of the vector page.
+ */
+void
+vector_page_setprot(int prot)
+{
+	struct l2_bucket *l2b;
+	pt_entry_t *ptep;
+
+	l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page);
+
+	ptep = &l2b->l2b_kva[l2pte_index(vector_page)];
+
+	*ptep = (*ptep & ~L1_S_PROT_MASK) | L2_S_PROT(PTE_KERNEL, prot);
+	PTE_SYNC(ptep);
+	cpu_tlb_flushD_SE(vector_page);
+	cpu_cpwait();
+}
+
+/*
+ * pmap_remove_pv: try to remove a mapping from a pv_list
+ *
+ * => caller should hold proper lock on pmap_main_lock
+ * => pmap should be locked
+ * => caller should hold lock on vm_page [so that attrs can be adjusted]
+ * => caller should adjust ptp's wire_count and free PTP if needed
+ * => caller should NOT adjust pmap's wire_count
+ * => we return the removed pve
+ */
+
+static void
+pmap_nuke_pv(struct vm_page *pg, pmap_t pm, struct pv_entry *pve)
+{
+
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	PMAP_ASSERT_LOCKED(pm);
+
+	TAILQ_REMOVE(&pg->md.pv_list, pve, pv_list);
+	TAILQ_REMOVE(&pm->pm_pvlist, pve, pv_plist);
+
+	if (pve->pv_flags & PVF_WIRED)
+		--pm->pm_stats.wired_count;
+
+	if (pg->md.pvh_attrs & PVF_MOD)
+		vm_page_dirty(pg);
+
+	if (TAILQ_FIRST(&pg->md.pv_list) == NULL)
+		pg->md.pvh_attrs &= ~PVF_REF;
+	else
+		vm_page_flag_set(pg, PG_REFERENCED);
+
+	if (pve->pv_flags & PVF_WRITE) {
+		TAILQ_FOREACH(pve, &pg->md.pv_list, pv_list)
+		    if (pve->pv_flags & PVF_WRITE)
+			    break;
+		if (!pve) {
+			pg->md.pvh_attrs &= ~PVF_MOD;
+			vm_page_flag_clear(pg, PG_WRITEABLE);
+		}
+	}
+}
+
+static struct pv_entry *
+pmap_remove_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va)
+{
+	struct pv_entry *pve;
+
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	pve = TAILQ_FIRST(&pg->md.pv_list);
+
+	while (pve) {
+		if (pve->pv_pmap == pm && pve->pv_va == va) {	/* match? */
+			pmap_nuke_pv(pg, pm, pve);
+			break;
+		}
+		pve = TAILQ_NEXT(pve, pv_list);
+	}
+
+	return(pve);				/* return removed pve */
+}
+
+/*
+ *
+ * pmap_modify_pv: Update pv flags
+ *
+ * => caller should hold lock on vm_page [so that attrs can be adjusted]
+ * => caller should NOT adjust pmap's wire_count
+ * => we return the old flags
+ *
+ * Modify a physical-virtual mapping in the pv table
+ */
+static u_int
+pmap_modify_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va,
+    u_int clr_mask, u_int set_mask)
+{
+	struct pv_entry *npv;
+	u_int flags, oflags;
+
+	PMAP_ASSERT_LOCKED(pm);
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	if ((npv = pmap_find_pv(pg, pm, va)) == NULL)
+		return (0);
+
+	/*
+	 * There is at least one VA mapping this page.
+	 */
+
+	if (clr_mask & (PVF_REF | PVF_MOD))
+		pg->md.pvh_attrs |= set_mask & (PVF_REF | PVF_MOD);
+
+	oflags = npv->pv_flags;
+	npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask;
+
+	if ((flags ^ oflags) & PVF_WIRED) {
+		if (flags & PVF_WIRED)
+			++pm->pm_stats.wired_count;
+		else
+			--pm->pm_stats.wired_count;
+	}
+
+	return (oflags);
+}
+
+/* Function to set the debug level of the pmap code */
+#ifdef PMAP_DEBUG
+void
+pmap_debug(int level)
+{
+	pmap_debug_level = level;
+	dprintf("pmap_debug: level=%d\n", pmap_debug_level);
+}
+#endif  /* PMAP_DEBUG */
+
+void
+pmap_pinit0(struct pmap *pmap)
+{
+	PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap));
+
+	dprintf("pmap_pinit0: pmap = %08x, pm_pdir = %08x\n",
+		(u_int32_t) pmap, (u_int32_t) pmap->pm_pdir);
+	bcopy(kernel_pmap, pmap, sizeof(*pmap));
+	bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx));
+	PMAP_LOCK_INIT(pmap);
+}
+
+/*
+ *	Initialize a vm_page's machine-dependent fields.
+ */
+void
+pmap_page_init(vm_page_t m)
+{
+
+	TAILQ_INIT(&m->md.pv_list);
+}
+
+/*
+ *      Initialize the pmap module.
+ *      Called by vm_init, to initialize any structures that the pmap
+ *      system needs to map virtual memory.
+ */
+void
+pmap_init(void)
+{
+	int shpgperproc = PMAP_SHPGPERPROC;
+
+	PDEBUG(1, printf("pmap_init: phys_start = %08x\n"));
+
+	/*
+	 * init the pv free list
+	 */
+	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
+	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	/*
+	 * Now it is safe to enable pv_table recording.
+	 */
+	PDEBUG(1, printf("pmap_init: done!\n"));
+
+	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
+
+	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
+	pv_entry_high_water = 9 * (pv_entry_max / 10);
+	l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor,
+	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
+	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
+
+	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
+
+}
+
+int
+pmap_fault_fixup(pmap_t pm, vm_offset_t va, vm_prot_t ftype, int user)
+{
+	struct l2_dtable *l2;
+	struct l2_bucket *l2b;
+	pd_entry_t *pl1pd, l1pd;
+	pt_entry_t *ptep, pte;
+	vm_paddr_t pa;
+	u_int l1idx;
+	int rv = 0;
+
+	l1idx = L1_IDX(va);
+	vm_page_lock_queues();
+	PMAP_LOCK(pm);
+
+	/*
+	 * If there is no l2_dtable for this address, then the process
+	 * has no business accessing it.
+	 *
+	 * Note: This will catch userland processes trying to access
+	 * kernel addresses.
+	 */
+	l2 = pm->pm_l2[L2_IDX(l1idx)];
+	if (l2 == NULL)
+		goto out;
+
+	/*
+	 * Likewise if there is no L2 descriptor table
+	 */
+	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
+	if (l2b->l2b_kva == NULL)
+		goto out;
+
+	/*
+	 * Check the PTE itself.
+	 */
+	ptep = &l2b->l2b_kva[l2pte_index(va)];
+	pte = *ptep;
+	if (pte == 0)
+		goto out;
+
+	/*
+	 * Catch a userland access to the vector page mapped at 0x0
+	 */
+	if (user && (pte & L2_S_PROT_U) == 0)
+		goto out;
+	if (va == vector_page)
+		goto out;
+
+	pa = l2pte_pa(pte);
+
+	if ((ftype & VM_PROT_WRITE) && (pte & L2_S_PROT_W) == 0) {
+		/*
+		 * This looks like a good candidate for "page modified"
+		 * emulation...
+		 */
+		struct pv_entry *pv;
+		struct vm_page *pg;
+
+		/* Extract the physical address of the page */
+		if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) {
+			goto out;
+		}
+		/* Get the current flags for this page. */
+
+		pv = pmap_find_pv(pg, pm, va);
+		if (pv == NULL) {
+			goto out;
+		}
+
+		/*
+		 * Do the flags say this page is writable? If not then it
+		 * is a genuine write fault. If yes then the write fault is
+		 * our fault as we did not reflect the write access in the
+		 * PTE. Now we know a write has occurred we can correct this
+		 * and also set the modified bit
+		 */
+		if ((pv->pv_flags & PVF_WRITE) == 0) {
+			goto out;
+		}
+		pg->md.pvh_attrs |= PVF_REF | PVF_MOD;
+		vm_page_dirty(pg);
+		pv->pv_flags |= PVF_REF | PVF_MOD;
+
+		/* Re-enable write permissions for the page */
+		*ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO | L2_S_PROT_W;
+		PTE_SYNC(ptep);
+		rv = 1;
+	} else
+	if ((pte & L2_TYPE_MASK) == L2_TYPE_INV) {
+		/*
+		 * This looks like a good candidate for "page referenced"
+		 * emulation.
+		 */
+		struct pv_entry *pv;
+		struct vm_page *pg;
+
+		/* Extract the physical address of the page */
+		if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
+			goto out;
+		/* Get the current flags for this page. */
+
+		pv = pmap_find_pv(pg, pm, va);
+		if (pv == NULL)
+			goto out;
+
+		pg->md.pvh_attrs |= PVF_REF;
+		pv->pv_flags |= PVF_REF;
+
+
+		*ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO;
+		PTE_SYNC(ptep);
+		rv = 1;
+	}
+
+	/*
+	 * We know there is a valid mapping here, so simply
+	 * fix up the L1 if necessary.
+	 */
+	pl1pd = &pm->pm_l1->l1_kva[l1idx];
+	l1pd = l2b->l2b_phys | L1_C_DOM(pm->pm_domain) | L1_C_PROTO;
+	if (*pl1pd != l1pd) {
+		*pl1pd = l1pd;
+		PTE_SYNC(pl1pd);
+		rv = 1;
+	}
+
+#ifdef DEBUG
+	/*
+	 * If 'rv == 0' at this point, it generally indicates that there is a
+	 * stale TLB entry for the faulting address. This happens when two or
+	 * more processes are sharing an L1. Since we don't flush the TLB on
+	 * a context switch between such processes, we can take domain faults
+	 * for mappings which exist at the same VA in both processes. EVEN IF
+	 * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for
+	 * example.
+	 *
+	 * This is extremely likely to happen if pmap_enter() updated the L1
+	 * entry for a recently entered mapping. In this case, the TLB is
+	 * flushed for the new mapping, but there may still be TLB entries for
+	 * other mappings belonging to other processes in the 1MB range
+	 * covered by the L1 entry.
+	 *
+	 * Since 'rv == 0', we know that the L1 already contains the correct
+	 * value, so the fault must be due to a stale TLB entry.
+	 *
+	 * Since we always need to flush the TLB anyway in the case where we
+	 * fixed up the L1, or frobbed the L2 PTE, we effectively deal with
+	 * stale TLB entries dynamically.
+	 *
+	 * However, the above condition can ONLY happen if the current L1 is
+	 * being shared. If it happens when the L1 is unshared, it indicates
+	 * that other parts of the pmap are not doing their job WRT managing
+	 * the TLB.
+	 */
+	if (rv == 0 && pm->pm_l1->l1_domain_use_count == 1) {
+		extern int last_fault_code;
+		printf("fixup: pm %p, va 0x%lx, ftype %d - nothing to do!\n",
+		    pm, va, ftype);
+		printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n",
+		    l2, l2b, ptep, pl1pd);
+		printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n",
+		    pte, l1pd, last_fault_code);
+#ifdef DDB
+		Debugger();
+#endif
+	}
+#endif
+
+	cpu_tlb_flushID_SE(va);
+	cpu_cpwait();
+
+	rv = 1;
+
+out:
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pm);
+	return (rv);
+}
+
+void
+pmap_postinit(void)
+{
+	struct l2_bucket *l2b;
+	struct l1_ttable *l1;
+	pd_entry_t *pl1pt;
+	pt_entry_t *ptep, pte;
+	vm_offset_t va, eva;
+	u_int loop, needed;
+
+	needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0);
+	needed -= 1;
+	l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK);
+
+	for (loop = 0; loop < needed; loop++, l1++) {
+		/* Allocate a L1 page table */
+		va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0,
+		    0xffffffff, L1_TABLE_SIZE, 0);
+
+		if (va == 0)
+			panic("Cannot allocate L1 KVM");
+
+		eva = va + L1_TABLE_SIZE;
+		pl1pt = (pd_entry_t *)va;
+
+		while (va < eva) {
+				l2b = pmap_get_l2_bucket(pmap_kernel(), va);
+				ptep = &l2b->l2b_kva[l2pte_index(va)];
+				pte = *ptep;
+				pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
+				*ptep = pte;
+				PTE_SYNC(ptep);
+				cpu_tlb_flushD_SE(va);
+
+				va += PAGE_SIZE;
+		}
+		pmap_init_l1(l1, pl1pt);
+	}
+
+
+#ifdef DEBUG
+	printf("pmap_postinit: Allocated %d static L1 descriptor tables\n",
+	    needed);
+#endif
+}
+
+/*
+ * This is used to stuff certain critical values into the PCB where they
+ * can be accessed quickly from cpu_switch() et al.
+ */
+void
+pmap_set_pcb_pagedir(pmap_t pm, struct pcb *pcb)
+{
+	struct l2_bucket *l2b;
+
+	pcb->pcb_pagedir = pm->pm_l1->l1_physaddr;
+	pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) |
+	    (DOMAIN_CLIENT << (pm->pm_domain * 2));
+
+	if (vector_page < KERNBASE) {
+		pcb->pcb_pl1vec = &pm->pm_l1->l1_kva[L1_IDX(vector_page)];
+		l2b = pmap_get_l2_bucket(pm, vector_page);
+		pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO |
+		    L1_C_DOM(pm->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL);
+	} else
+		pcb->pcb_pl1vec = NULL;
+}
+
+void
+pmap_activate(struct thread *td)
+{
+	pmap_t pm;
+	struct pcb *pcb;
+
+	pm = vmspace_pmap(td->td_proc->p_vmspace);
+	pcb = td->td_pcb;
+
+	critical_enter();
+	pmap_set_pcb_pagedir(pm, pcb);
+
+	if (td == curthread) {
+		u_int cur_dacr, cur_ttb;
+
+		__asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb));
+		__asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr));
+
+		cur_ttb &= ~(L1_TABLE_SIZE - 1);
+
+		if (cur_ttb == (u_int)pcb->pcb_pagedir &&
+		    cur_dacr == pcb->pcb_dacr) {
+			/*
+			 * No need to switch address spaces.
+			 */
+			critical_exit();
+			return;
+		}
+
+
+		/*
+		 * We MUST, I repeat, MUST fix up the L1 entry corresponding
+		 * to 'vector_page' in the incoming L1 table before switching
+		 * to it otherwise subsequent interrupts/exceptions (including
+		 * domain faults!) will jump into hyperspace.
+		 */
+		if (pcb->pcb_pl1vec) {
+
+			*pcb->pcb_pl1vec = pcb->pcb_l1vec;
+			/*
+			 * Don't need to PTE_SYNC() at this point since
+			 * cpu_setttb() is about to flush both the cache
+			 * and the TLB.
+			 */
+		}
+
+		cpu_domains(pcb->pcb_dacr);
+		cpu_setttb(pcb->pcb_pagedir);
+	}
+	critical_exit();
+}
+
+static int
+pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va)
+{
+	pd_entry_t *pdep, pde;
+	pt_entry_t *ptep, pte;
+	vm_offset_t pa;
+	int rv = 0;
+
+	/*
+	 * Make sure the descriptor itself has the correct cache mode
+	 */
+	pdep = &kl1[L1_IDX(va)];
+	pde = *pdep;
+
+	if (l1pte_section_p(pde)) {
+		if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) {
+			*pdep = (pde & ~L1_S_CACHE_MASK) |
+			    pte_l1_s_cache_mode_pt;
+			PTE_SYNC(pdep);
+			rv = 1;
+		}
+	} else {
+		pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
+		ptep = (pt_entry_t *)kernel_pt_lookup(pa);
+		if (ptep == NULL)
+			panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep);
+
+		ptep = &ptep[l2pte_index(va)];
+		pte = *ptep;
+		if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
+			*ptep = (pte & ~L2_S_CACHE_MASK) |
+			    pte_l2_s_cache_mode_pt;
+			rv = 1;
+		}
+	}
+
+	return (rv);
+}
+
+static void
+pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap,
+    pt_entry_t **ptep)
+{
+	vm_offset_t va = *availp;
+	struct l2_bucket *l2b;
+
+	if (ptep) {
+		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
+		if (l2b == NULL)
+			panic("pmap_alloc_specials: no l2b for 0x%x", va);
+
+		*ptep = &l2b->l2b_kva[l2pte_index(va)];
+	}
+
+	*vap = va;
+	*availp = va + (PAGE_SIZE * pages);
+}
+
+/*
+ *	Bootstrap the system enough to run with virtual memory.
+ *
+ *	On the arm this is called after mapping has already been enabled
+ *	and just syncs the pmap module with what has already been done.
+ *	[We can't call it easily with mapping off since the kernel is not
+ *	mapped with PA == VA, hence we would have to relocate every address
+ *	from the linked base (virtual) address "KERNBASE" to the actual
+ *	(physical) address starting relative to 0]
+ */
+#define PMAP_STATIC_L2_SIZE 16
+
+void
+pmap_bootstrap(vm_offset_t firstaddr, vm_offset_t lastaddr, struct pv_addr *l1pt)
+{
+	static struct l1_ttable static_l1;
+	static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE];
+	struct l1_ttable *l1 = &static_l1;
+	struct l2_dtable *l2;
+	struct l2_bucket *l2b;
+	pd_entry_t pde;
+	pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va;
+	pt_entry_t *ptep;
+	vm_paddr_t pa;
+	vm_offset_t va;
+	vm_size_t size;
+	int l1idx, l2idx, l2next = 0;
+
+	PDEBUG(1, printf("firstaddr = %08x, loadaddr = %08x\n",
+	    firstaddr, loadaddr));
+
+	virtual_avail = firstaddr;
+	kernel_pmap->pm_l1 = l1;
+	kernel_l1pa = l1pt->pv_pa;
+
+	/*
+	 * Scan the L1 translation table created by initarm() and create
+	 * the required metadata for all valid mappings found in it.
+	 */
+	for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) {
+		pde = kernel_l1pt[l1idx];
+
+		/*
+		 * We're only interested in Coarse mappings.
+		 * pmap_extract() can deal with section mappings without
+		 * recourse to checking L2 metadata.
+		 */
+		if ((pde & L1_TYPE_MASK) != L1_TYPE_C)
+			continue;
+
+		/*
+		 * Lookup the KVA of this L2 descriptor table
+		 */
+		pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
+		ptep = (pt_entry_t *)kernel_pt_lookup(pa);
+
+		if (ptep == NULL) {
+			panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx",
+			    (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa);
+		}
+
+		/*
+		 * Fetch the associated L2 metadata structure.
+		 * Allocate a new one if necessary.
+		 */
+		if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) {
+			if (l2next == PMAP_STATIC_L2_SIZE)
+				panic("pmap_bootstrap: out of static L2s");
+			kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 =
+			    &static_l2[l2next++];
+		}
+
+		/*
+		 * One more L1 slot tracked...
+		 */
+		l2->l2_occupancy++;
+
+		/*
+		 * Fill in the details of the L2 descriptor in the
+		 * appropriate bucket.
+		 */
+		l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
+		l2b->l2b_kva = ptep;
+		l2b->l2b_phys = pa;
+		l2b->l2b_l1idx = l1idx;
+
+		/*
+		 * Establish an initial occupancy count for this descriptor
+		 */
+		for (l2idx = 0;
+		    l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
+		    l2idx++) {
+			if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) {
+				l2b->l2b_occupancy++;
+			}
+		}
+
+		/*
+		 * Make sure the descriptor itself has the correct cache mode.
+		 * If not, fix it, but whine about the problem. Port-meisters
+		 * should consider this a clue to fix up their initarm()
+		 * function. :)
+		 */
+		if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) {
+			printf("pmap_bootstrap: WARNING! wrong cache mode for "
+			    "L2 pte @ %p\n", ptep);
+		}
+	}
+
+
+	/*
+	 * Ensure the primary (kernel) L1 has the correct cache mode for
+	 * a page table. Bitch if it is not correctly set.
+	 */
+	for (va = (vm_offset_t)kernel_l1pt;
+	    va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) {
+		if (pmap_set_pt_cache_mode(kernel_l1pt, va))
+			printf("pmap_bootstrap: WARNING! wrong cache mode for "
+			    "primary L1 @ 0x%x\n", va);
+	}
+
+	cpu_dcache_wbinv_all();
+	cpu_l2cache_wbinv_all();
+	cpu_tlb_flushID();
+	cpu_cpwait();
+
+	PMAP_LOCK_INIT(kernel_pmap);
+	kernel_pmap->pm_active = -1;
+	kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL;
+	TAILQ_INIT(&kernel_pmap->pm_pvlist);
+
+	/*
+	 * Reserve some special page table entries/VA space for temporary
+	 * mapping of pages.
+	 */
+#define SYSMAP(c, p, v, n)						\
+    v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
+
+	pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte);
+	pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)csrc_pte);
+	pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte);
+	pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)cdst_pte);
+	size = ((lastaddr - pmap_curmaxkvaddr) + L1_S_OFFSET) / L1_S_SIZE;
+	pmap_alloc_specials(&virtual_avail,
+	    round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE,
+	    &pmap_kernel_l2ptp_kva, NULL);
+
+	size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE;
+	pmap_alloc_specials(&virtual_avail,
+	    round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE,
+	    &pmap_kernel_l2dtable_kva, NULL);
+
+	pmap_alloc_specials(&virtual_avail,
+	    1, (vm_offset_t*)&_tmppt, NULL);
+	pmap_alloc_specials(&virtual_avail,
+	    MAXDUMPPGS, (vm_offset_t *)&crashdumpmap, NULL);
+	SLIST_INIT(&l1_list);
+	TAILQ_INIT(&l1_lru_list);
+	mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF);
+	pmap_init_l1(l1, kernel_l1pt);
+	cpu_dcache_wbinv_all();
+	cpu_l2cache_wbinv_all();
+
+	virtual_avail = round_page(virtual_avail);
+	virtual_end = lastaddr;
+	kernel_vm_end = pmap_curmaxkvaddr;
+	arm_nocache_startaddr = lastaddr;
+	mtx_init(&cmtx, "TMP mappings mtx", NULL, MTX_DEF);
+
+	pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb);
+}
+
+/***************************************************
+ * Pmap allocation/deallocation routines.
+ ***************************************************/
+
+/*
+ * Release any resources held by the given physical map.
+ * Called when a pmap initialized by pmap_pinit is being released.
+ * Should only be called if the map contains no valid mappings.
+ */
+void
+pmap_release(pmap_t pmap)
+{
+	struct pcb *pcb;
+
+	cpu_idcache_wbinv_all();
+	cpu_l2cache_wbinv_all();
+	cpu_tlb_flushID();
+	cpu_cpwait();
+	if (vector_page < KERNBASE) {
+		struct pcb *curpcb = PCPU_GET(curpcb);
+		pcb = thread0.td_pcb;
+		if (pmap_is_current(pmap)) {
+			/*
+			 * Frob the L1 entry corresponding to the vector
+			 * page so that it contains the kernel pmap's domain
+			 * number. This will ensure pmap_remove() does not
+			 * pull the current vector page out from under us.
+			 */
+			critical_enter();
+			*pcb->pcb_pl1vec = pcb->pcb_l1vec;
+			cpu_domains(pcb->pcb_dacr);
+			cpu_setttb(pcb->pcb_pagedir);
+			critical_exit();
+		}
+		pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE);
+		/*
+		 * Make sure cpu_switch(), et al, DTRT. This is safe to do
+		 * since this process has no remaining mappings of its own.
+		 */
+		curpcb->pcb_pl1vec = pcb->pcb_pl1vec;
+		curpcb->pcb_l1vec = pcb->pcb_l1vec;
+		curpcb->pcb_dacr = pcb->pcb_dacr;
+		curpcb->pcb_pagedir = pcb->pcb_pagedir;
+
+	}
+	pmap_free_l1(pmap);
+	PMAP_LOCK_DESTROY(pmap);
+
+	dprintf("pmap_release()\n");
+}
+
+
+
+/*
+ * Helper function for pmap_grow_l2_bucket()
+ */
+static __inline int
+pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap)
+{
+	struct l2_bucket *l2b;
+	pt_entry_t *ptep;
+	vm_paddr_t pa;
+	struct vm_page *pg;
+
+	pg = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
+	if (pg == NULL)
+		return (1);
+	pa = VM_PAGE_TO_PHYS(pg);
+
+	if (pap)
+		*pap = pa;
+
+	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
+
+	ptep = &l2b->l2b_kva[l2pte_index(va)];
+	*ptep = L2_S_PROTO | pa | cache_mode |
+	    L2_S_PROT(PTE_KERNEL, VM_PROT_READ | VM_PROT_WRITE);
+	PTE_SYNC(ptep);
+	return (0);
+}
+
+/*
+ * This is the same as pmap_alloc_l2_bucket(), except that it is only
+ * used by pmap_growkernel().
+ */
+static __inline struct l2_bucket *
+pmap_grow_l2_bucket(pmap_t pm, vm_offset_t va)
+{
+	struct l2_dtable *l2;
+	struct l2_bucket *l2b;
+	struct l1_ttable *l1;
+	pd_entry_t *pl1pd;
+	u_short l1idx;
+	vm_offset_t nva;
+
+	l1idx = L1_IDX(va);
+
+	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
+		/*
+		 * No mapping at this address, as there is
+		 * no entry in the L1 table.
+		 * Need to allocate a new l2_dtable.
+		 */
+		nva = pmap_kernel_l2dtable_kva;
+		if ((nva & PAGE_MASK) == 0) {
+			/*
+			 * Need to allocate a backing page
+			 */
+			if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
+				return (NULL);
+		}
+
+		l2 = (struct l2_dtable *)nva;
+		nva += sizeof(struct l2_dtable);
+
+		if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva &
+		    PAGE_MASK)) {
+			/*
+			 * The new l2_dtable straddles a page boundary.
+			 * Map in another page to cover it.
+			 */
+			if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
+				return (NULL);
+		}
+
+		pmap_kernel_l2dtable_kva = nva;
+
+		/*
+		 * Link it into the parent pmap
+		 */
+		pm->pm_l2[L2_IDX(l1idx)] = l2;
+		memset(l2, 0, sizeof(*l2));
+	}
+
+	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
+
+	/*
+	 * Fetch pointer to the L2 page table associated with the address.
+	 */
+	if (l2b->l2b_kva == NULL) {
+		pt_entry_t *ptep;
+
+		/*
+		 * No L2 page table has been allocated. Chances are, this
+		 * is because we just allocated the l2_dtable, above.
+		 */
+		nva = pmap_kernel_l2ptp_kva;
+		ptep = (pt_entry_t *)nva;
+		if ((nva & PAGE_MASK) == 0) {
+			/*
+			 * Need to allocate a backing page
+			 */
+			if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt,
+			    &pmap_kernel_l2ptp_phys))
+				return (NULL);
+			PTE_SYNC_RANGE(ptep, PAGE_SIZE / sizeof(pt_entry_t));
+		}
+		memset(ptep, 0, L2_TABLE_SIZE_REAL);
+		l2->l2_occupancy++;
+		l2b->l2b_kva = ptep;
+		l2b->l2b_l1idx = l1idx;
+		l2b->l2b_phys = pmap_kernel_l2ptp_phys;
+
+		pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL;
+		pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL;
+	}
+
+	/* Distribute new L1 entry to all other L1s */
+	SLIST_FOREACH(l1, &l1_list, l1_link) {
+			pl1pd = &l1->l1_kva[L1_IDX(va)];
+			*pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) |
+			    L1_C_PROTO;
+			PTE_SYNC(pl1pd);
+	}
+
+	return (l2b);
+}
+
+
+/*
+ * grow the number of kernel page table entries, if needed
+ */
+void
+pmap_growkernel(vm_offset_t addr)
+{
+	pmap_t kpm = pmap_kernel();
+
+	if (addr <= pmap_curmaxkvaddr)
+		return;		/* we are OK */
+
+	/*
+	 * whoops!   we need to add kernel PTPs
+	 */
+
+	/* Map 1MB at a time */
+	for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE)
+		pmap_grow_l2_bucket(kpm, pmap_curmaxkvaddr);
+
+	/*
+	 * flush out the cache, expensive but growkernel will happen so
+	 * rarely
+	 */
+	cpu_dcache_wbinv_all();
+	cpu_l2cache_wbinv_all();
+	cpu_tlb_flushD();
+	cpu_cpwait();
+	kernel_vm_end = pmap_curmaxkvaddr;
+}
+
+
+/*
+ * Remove all pages from specified address space
+ * this aids process exit speeds.  Also, this code
+ * is special cased for current process only, but
+ * can have the more generic (and slightly slower)
+ * mode enabled.  This is much faster than pmap_remove
+ * in the case of running down an entire address space.
+ */
+void
+pmap_remove_pages(pmap_t pmap)
+{
+	struct pv_entry *pv, *npv;
+	struct l2_bucket *l2b = NULL;
+	vm_page_t m;
+	pt_entry_t *pt;
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
+		if (pv->pv_flags & PVF_WIRED) {
+			/* Cannot remove wired pages now. */
+			npv = TAILQ_NEXT(pv, pv_plist);
+			continue;
+		}
+		pmap->pm_stats.resident_count--;
+		l2b = pmap_get_l2_bucket(pmap, pv->pv_va);
+		KASSERT(l2b != NULL, ("No L2 bucket in pmap_remove_pages"));
+		pt = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
+		m = PHYS_TO_VM_PAGE(*pt & L2_ADDR_MASK);
+		KASSERT((vm_offset_t)m >= KERNBASE, ("Trying to access non-existent page va %x pte %x", pv->pv_va, *pt));
+		*pt = 0;
+		PTE_SYNC(pt);
+		npv = TAILQ_NEXT(pv, pv_plist);
+		pmap_nuke_pv(m, pmap, pv);
+		if (TAILQ_EMPTY(&m->md.pv_list))
+			vm_page_flag_clear(m, PG_WRITEABLE);
+		pmap_free_pv_entry(pv);
+		pmap_free_l2_bucket(pmap, l2b, 1);
+	}
+	vm_page_unlock_queues();
+	cpu_tlb_flushID();
+	cpu_cpwait();
+	PMAP_UNLOCK(pmap);
+}
+
+
+/***************************************************
+ * Low level mapping routines.....
+ ***************************************************/
+
+#ifdef ARM_HAVE_SUPERSECTIONS
+/* Map a super section into the KVA. */
+
+void
+pmap_kenter_supersection(vm_offset_t va, uint64_t pa, int flags)
+{
+	pd_entry_t pd = L1_S_PROTO | L1_S_SUPERSEC | (pa & L1_SUP_FRAME) |
+	    (((pa >> 32) & 0xf) << 20) | L1_S_PROT(PTE_KERNEL,
+	    VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL);
+	struct l1_ttable *l1;
+	vm_offset_t va0, va_end;
+
+	KASSERT(((va | pa) & L1_SUP_OFFSET) == 0,
+	    ("Not a valid super section mapping"));
+	if (flags & SECTION_CACHE)
+		pd |= pte_l1_s_cache_mode;
+	else if (flags & SECTION_PT)
+		pd |= pte_l1_s_cache_mode_pt;
+	va0 = va & L1_SUP_FRAME;
+	va_end = va + L1_SUP_SIZE;
+	SLIST_FOREACH(l1, &l1_list, l1_link) {
+		va = va0;
+		for (; va < va_end; va += L1_S_SIZE) {
+			l1->l1_kva[L1_IDX(va)] = pd;
+			PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
+		}
+	}
+}
+#endif
+
+/* Map a section into the KVA. */
+
+void
+pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags)
+{
+	pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL,
+	    VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL);
+	struct l1_ttable *l1;
+
+	KASSERT(((va | pa) & L1_S_OFFSET) == 0,
+	    ("Not a valid section mapping"));
+	if (flags & SECTION_CACHE)
+		pd |= pte_l1_s_cache_mode;
+	else if (flags & SECTION_PT)
+		pd |= pte_l1_s_cache_mode_pt;
+	SLIST_FOREACH(l1, &l1_list, l1_link) {
+		l1->l1_kva[L1_IDX(va)] = pd;
+		PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
+	}
+}
+
+/*
+ * Make a temporary mapping for a physical address.  This is only intended
+ * to be used for panic dumps.
+ */
+void *
+pmap_kenter_temp(vm_paddr_t pa, int i)
+{
+	vm_offset_t va;
+
+	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
+	pmap_kenter(va, pa);
+	return ((void *)crashdumpmap);
+}
+
+/*
+ * add a wired page to the kva
+ * note that in order for the mapping to take effect -- you
+ * should do a invltlb after doing the pmap_kenter...
+ */
+static PMAP_INLINE void
+pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags)
+{
+	struct l2_bucket *l2b;
+	pt_entry_t *pte;
+	pt_entry_t opte;
+
+	PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n",
+	    (uint32_t) va, (uint32_t) pa));
+
+
+	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
+	if (l2b == NULL)
+		l2b = pmap_grow_l2_bucket(pmap_kernel(), va);
+	KASSERT(l2b != NULL, ("No L2 Bucket"));
+	pte = &l2b->l2b_kva[l2pte_index(va)];
+	opte = *pte;
+	PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n",
+	    (uint32_t) pte, opte, *pte));
+	if (l2pte_valid(opte)) {
+		cpu_tlb_flushD_SE(va);
+		cpu_cpwait();
+	} else {
+		if (opte == 0)
+			l2b->l2b_occupancy++;
+	}
+	*pte = L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL,
+	    VM_PROT_READ | VM_PROT_WRITE);
+	if (flags & KENTER_CACHE)
+		*pte |= pte_l2_s_cache_mode;
+	if (flags & KENTER_USER)
+		*pte |= L2_S_PROT_U;
+	PTE_SYNC(pte);
+}
+
+void
+pmap_kenter(vm_offset_t va, vm_paddr_t pa)
+{
+	pmap_kenter_internal(va, pa, KENTER_CACHE);
+}
+
+void
+pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa)
+{
+
+	pmap_kenter_internal(va, pa, 0);
+}
+
+void
+pmap_kenter_user(vm_offset_t va, vm_paddr_t pa)
+{
+
+	pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER);
+	/*
+	 * Call pmap_fault_fixup now, to make sure we'll have no exception
+	 * at the first use of the new address, or bad things will happen,
+	 * as we use one of these addresses in the exception handlers.
+	 */
+	pmap_fault_fixup(pmap_kernel(), va, VM_PROT_READ|VM_PROT_WRITE, 1);
+}
+
+/*
+ * remove a page from the kernel pagetables
+ */
+void
+pmap_kremove(vm_offset_t va)
+{
+	struct l2_bucket *l2b;
+	pt_entry_t *pte, opte;
+
+	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
+	if (!l2b)
+		return;
+	KASSERT(l2b != NULL, ("No L2 Bucket"));
+	pte = &l2b->l2b_kva[l2pte_index(va)];
+	opte = *pte;
+	if (l2pte_valid(opte)) {
+		va = va & ~PAGE_MASK;
+		cpu_tlb_flushD_SE(va);
+		cpu_cpwait();
+		*pte = 0;
+	}
+}
+
+
+/*
+ *	Used to map a range of physical addresses into kernel
+ *	virtual address space.
+ *
+ *	The value passed in '*virt' is a suggested virtual address for
+ *	the mapping. Architectures which can support a direct-mapped
+ *	physical to virtual region can return the appropriate address
+ *	within that region, leaving '*virt' unchanged. Other
+ *	architectures should map the pages starting at '*virt' and
+ *	update '*virt' with the first usable address after the mapped
+ *	region.
+ */
+vm_offset_t
+pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
+{
+	vm_offset_t sva = *virt;
+	vm_offset_t va = sva;
+
+	PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, "
+	    "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end,
+	    prot));
+
+	while (start < end) {
+		pmap_kenter(va, start);
+		va += PAGE_SIZE;
+		start += PAGE_SIZE;
+	}
+	*virt = va;
+	return (sva);
+}
+
+/*
+ * Add a list of wired pages to the kva
+ * this routine is only used for temporary
+ * kernel mappings that do not need to have
+ * page modification or references recorded.
+ * Note that old mappings are simply written
+ * over.  The page *must* be wired.
+ */
+void
+pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]),
+		    KENTER_CACHE);
+		va += PAGE_SIZE;
+	}
+}
+
+
+/*
+ * this routine jerks page mappings from the
+ * kernel -- it is meant only for temporary mappings.
+ */
+void
+pmap_qremove(vm_offset_t va, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (vtophys(va))
+			pmap_kremove(va);
+
+		va += PAGE_SIZE;
+	}
+}
+
+
+/*
+ * pmap_object_init_pt preloads the ptes for a given object
+ * into the specified pmap.  This eliminates the blast of soft
+ * faults on process startup and immediately after an mmap.
+ */
+void
+pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
+    vm_pindex_t pindex, vm_size_t size)
+{
+
+	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
+	    ("pmap_object_init_pt: non-device object"));
+}
+
+
+/*
+ *	pmap_is_prefaultable:
+ *
+ *	Return whether or not the specified virtual address is elgible
+ *	for prefault.
+ */
+boolean_t
+pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
+{
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+
+	if (!pmap_get_pde_pte(pmap, addr, &pde, &pte))
+		return (FALSE);
+	KASSERT(pte != NULL, ("Valid mapping but no pte ?"));
+	if (*pte == 0)
+		return (TRUE);
+	return (FALSE);
+}
+
+/*
+ * Fetch pointers to the PDE/PTE for the given pmap/VA pair.
+ * Returns TRUE if the mapping exists, else FALSE.
+ *
+ * NOTE: This function is only used by a couple of arm-specific modules.
+ * It is not safe to take any pmap locks here, since we could be right
+ * in the middle of debugging the pmap anyway...
+ *
+ * It is possible for this routine to return FALSE even though a valid
+ * mapping does exist. This is because we don't lock, so the metadata
+ * state may be inconsistent.
+ *
+ * NOTE: We can return a NULL *ptp in the case where the L1 pde is
+ * a "section" mapping.
+ */
+boolean_t
+pmap_get_pde_pte(pmap_t pm, vm_offset_t va, pd_entry_t **pdp, pt_entry_t **ptp)
+{
+	struct l2_dtable *l2;
+	pd_entry_t *pl1pd, l1pd;
+	pt_entry_t *ptep;
+	u_short l1idx;
+
+	if (pm->pm_l1 == NULL)
+		return (FALSE);
+
+	l1idx = L1_IDX(va);
+	*pdp = pl1pd = &pm->pm_l1->l1_kva[l1idx];
+	l1pd = *pl1pd;
+
+	if (l1pte_section_p(l1pd)) {
+		*ptp = NULL;
+		return (TRUE);
+	}
+
+	if (pm->pm_l2 == NULL)
+		return (FALSE);
+
+	l2 = pm->pm_l2[L2_IDX(l1idx)];
+
+	if (l2 == NULL ||
+	    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
+		return (FALSE);
+	}
+
+	*ptp = &ptep[l2pte_index(va)];
+	return (TRUE);
+}
+
+/*
+ *      Routine:        pmap_remove_all
+ *      Function:
+ *              Removes this physical page from
+ *              all physical maps in which it resides.
+ *              Reflects back modify bits to the pager.
+ *
+ *      Notes:
+ *              Original versions of this routine were very
+ *              inefficient because they iteratively called
+ *              pmap_remove (slow...)
+ */
+void
+pmap_remove_all(vm_page_t m)
+{
+	pv_entry_t pv;
+	pt_entry_t *ptep;
+	struct l2_bucket *l2b;
+	boolean_t flush = FALSE;
+	pmap_t curpm;
+	int flags = 0;
+
+#if defined(PMAP_DEBUG)
+	/*
+	 * XXX This makes pmap_remove_all() illegal for non-managed pages!
+	 */
+	if (m->flags & PG_FICTITIOUS) {
+		panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
+	}
+#endif
+
+	if (TAILQ_EMPTY(&m->md.pv_list))
+		return;
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	pmap_remove_write(m);
+	curpm = vmspace_pmap(curproc->p_vmspace);
+	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+		if (flush == FALSE && (pv->pv_pmap == curpm ||
+		    pv->pv_pmap == pmap_kernel()))
+			flush = TRUE;
+
+		PMAP_LOCK(pv->pv_pmap);
+		l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va);
+		KASSERT(l2b != NULL, ("No l2 bucket"));
+		ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
+		*ptep = 0;
+		PTE_SYNC_CURRENT(pv->pv_pmap, ptep);
+		pmap_free_l2_bucket(pv->pv_pmap, l2b, 1);
+		if (pv->pv_flags & PVF_WIRED)
+			pv->pv_pmap->pm_stats.wired_count--;
+		pv->pv_pmap->pm_stats.resident_count--;
+		flags |= pv->pv_flags;
+		pmap_nuke_pv(m, pv->pv_pmap, pv);
+		PMAP_UNLOCK(pv->pv_pmap);
+		pmap_free_pv_entry(pv);
+	}
+
+	if (flush) {
+		if (PV_BEEN_EXECD(flags))
+			cpu_tlb_flushID();
+		else
+			cpu_tlb_flushD();
+	}
+	vm_page_flag_clear(m, PG_WRITEABLE);
+}
+
+
+/*
+ *	Set the physical protection on the
+ *	specified range of this map as requested.
+ */
+void
+pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
+{
+	struct l2_bucket *l2b;
+	pt_entry_t *ptep, pte;
+	vm_offset_t next_bucket;
+	u_int flags;
+	int flush;
+
+	CTR4(KTR_PMAP, "pmap_protect: pmap %p sva 0x%08x eva 0x%08x prot %x",
+	    pm, sva, eva, prot);
+
+	if ((prot & VM_PROT_READ) == 0) {
+		pmap_remove(pm, sva, eva);
+		return;
+	}
+
+	if (prot & VM_PROT_WRITE) {
+		/*
+		 * If this is a read->write transition, just ignore it and let
+		 * vm_fault() take care of it later.
+		 */
+		return;
+	}
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pm);
+
+	/*
+	 * OK, at this point, we know we're doing write-protect operation.
+	 * If the pmap is active, write-back the range.
+	 */
+
+	flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1;
+	flags = 0;
+
+	while (sva < eva) {
+		next_bucket = L2_NEXT_BUCKET(sva);
+		if (next_bucket > eva)
+			next_bucket = eva;
+
+		l2b = pmap_get_l2_bucket(pm, sva);
+		if (l2b == NULL) {
+			sva = next_bucket;
+			continue;
+		}
+
+		ptep = &l2b->l2b_kva[l2pte_index(sva)];
+
+		while (sva < next_bucket) {
+			if ((pte = *ptep) != 0 && (pte & L2_S_PROT_W) != 0) {
+				struct vm_page *pg;
+				u_int f;
+
+				pg = PHYS_TO_VM_PAGE(l2pte_pa(pte));
+				pte &= ~L2_S_PROT_W;
+				*ptep = pte;
+				PTE_SYNC(ptep);
+
+				if (pg != NULL) {
+					f = pmap_modify_pv(pg, pm, sva,
+					    PVF_WRITE, 0);
+					vm_page_dirty(pg);
+				} else
+					f = PVF_REF | PVF_EXEC;
+
+				if (flush >= 0) {
+					flush++;
+					flags |= f;
+				} else
+				if (PV_BEEN_EXECD(f))
+					cpu_tlb_flushID_SE(sva);
+				else
+				if (PV_BEEN_REFD(f))
+					cpu_tlb_flushD_SE(sva);
+			}
+
+			sva += PAGE_SIZE;
+			ptep++;
+		}
+	}
+
+
+	if (flush) {
+		if (PV_BEEN_EXECD(flags))
+			cpu_tlb_flushID();
+		else
+		if (PV_BEEN_REFD(flags))
+			cpu_tlb_flushD();
+	}
+	vm_page_unlock_queues();
+
+	PMAP_UNLOCK(pm);
+}
+
+
+/*
+ *	Insert the given physical page (p) at
+ *	the specified virtual address (v) in the
+ *	target physical map with the protection requested.
+ *
+ *	If specified, the page will be wired down, meaning
+ *	that the related pte can not be reclaimed.
+ *
+ *	NB:  This is the only routine which MAY NOT lazy-evaluate
+ *	or lose information.  That is, this routine must actually
+ *	insert this page into the given map NOW.
+ */
+
+void
+pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
+    vm_prot_t prot, boolean_t wired)
+{
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	pmap_enter_locked(pmap, va, m, prot, wired, M_WAITOK);
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ *	The page queues and pmap must be locked.
+ */
+static void
+pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+    boolean_t wired, int flags)
+{
+	struct l2_bucket *l2b = NULL;
+	struct vm_page *opg;
+	struct pv_entry *pve = NULL;
+	pt_entry_t *ptep, npte, opte;
+	u_int nflags;
+	u_int oflags;
+	vm_paddr_t pa;
+
+	PMAP_ASSERT_LOCKED(pmap);
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	if (va == vector_page) {
+		pa = systempage.pv_pa;
+		m = NULL;
+	} else
+		pa = VM_PAGE_TO_PHYS(m);
+
+	nflags = 0;
+
+	if (prot & VM_PROT_WRITE)
+		nflags |= PVF_WRITE;
+	if (prot & VM_PROT_EXECUTE)
+		nflags |= PVF_EXEC;
+	if (wired)
+		nflags |= PVF_WIRED;
+
+	PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, prot = %x, "
+	    "wired = %x\n", (uint32_t) pmap, va, (uint32_t) m, prot, wired));
+
+	if (pmap == pmap_kernel()) {
+		l2b = pmap_get_l2_bucket(pmap, va);
+		if (l2b == NULL)
+			l2b = pmap_grow_l2_bucket(pmap, va);
+	} else {
+do_l2b_alloc:
+		l2b = pmap_alloc_l2_bucket(pmap, va);
+		if (l2b == NULL) {
+			if (flags & M_WAITOK) {
+				PMAP_UNLOCK(pmap);
+				vm_page_unlock_queues();
+				VM_WAIT;
+				vm_page_lock_queues();
+				PMAP_LOCK(pmap);
+				goto do_l2b_alloc;
+			}
+			return;
+		}
+	}
+
+	ptep = &l2b->l2b_kva[l2pte_index(va)];
+
+	opte = *ptep;
+	npte = pa;
+	oflags = 0;
+	if (opte) {
+		/*
+		 * There is already a mapping at this address.
+		 * If the physical address is different, lookup the
+		 * vm_page.
+		 */
+		if (l2pte_pa(opte) != pa)
+			opg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
+		else
+			opg = m;
+	} else
+		opg = NULL;
+
+	if ((prot & (VM_PROT_ALL)) ||
+	    (!m || m->md.pvh_attrs & PVF_REF)) {
+		/*
+		 * - The access type indicates that we don't need
+		 *   to do referenced emulation.
+		 * OR
+		 * - The physical page has already been referenced
+		 *   so no need to re-do referenced emulation here.
+		 */
+		npte |= L2_S_PROTO;
+
+		nflags |= PVF_REF;
+
+		if (m && ((prot & VM_PROT_WRITE) != 0 ||
+		    (m->md.pvh_attrs & PVF_MOD))) {
+			/*
+			 * This is a writable mapping, and the
+			 * page's mod state indicates it has
+			 * already been modified. Make it
+			 * writable from the outset.
+			 */
+			nflags |= PVF_MOD;
+			if (!(m->md.pvh_attrs & PVF_MOD))
+				vm_page_dirty(m);
+		}
+		if (m && opte)
+			vm_page_flag_set(m, PG_REFERENCED);
+	} else {
+		/*
+		 * Need to do page referenced emulation.
+		 */
+		npte |= L2_TYPE_INV;
+	}
+
+	if (prot & VM_PROT_WRITE) {
+		npte |= L2_S_PROT_W;
+		if (m != NULL)
+			vm_page_flag_set(m, PG_WRITEABLE);
+	}
+
+	npte |= pte_l2_s_cache_mode;
+
+	/*
+	 * Make sure userland mappings get the right permissions
+	 */
+	if (pmap != pmap_kernel() && va != vector_page) {
+		npte |= L2_S_PROT_U;
+	}
+
+	if (m && m == opg) {
+		/*
+		 * We're changing the attrs of an existing mapping.
+		 */
+		oflags = pmap_modify_pv(m, pmap, va,
+		    PVF_WRITE | PVF_EXEC | PVF_WIRED |
+		    PVF_MOD | PVF_REF, nflags);
+	} else {
+		/*
+		 * New mapping, or changing the backing page
+		 * of an existing mapping.
+		 */
+		if (opg) {
+			/*
+			 * Replacing an existing mapping with a new one.
+			 * It is part of our managed memory so we
+			 * must remove it from the PV list
+			 */
+			if ((pve = pmap_remove_pv(opg, pmap, va))) {
+			    oflags = pve->pv_flags;
+
+			    if (m && ((m->flags & (PG_UNMANAGED | PG_FICTITIOUS)))) {
+				pmap_free_pv_entry(pve);
+				pve = NULL;
+			    }
+			}
+		}
+
+		if ((m && !(m->flags & (PG_UNMANAGED | PG_FICTITIOUS)))) {
+			if ((!pve) && (pve = pmap_get_pv_entry()) == NULL)
+				panic("pmap_enter: no pv entries");
+
+			KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
+			("pmap_enter: managed mapping within the clean submap"));
+			KASSERT(pve != NULL, ("No pv"));
+			pmap_enter_pv(m, pve, pmap, va, nflags);
+		}
+	}
+
+	/*
+	 * Keep the stats up to date
+	 */
+	if (opte == 0) {
+		l2b->l2b_occupancy++;
+		pmap->pm_stats.resident_count++;
+	}
+
+
+	/*
+	 * If this is just a wiring change, the two PTEs will be
+	 * identical, so there's no need to update the page table.
+	 */
+	if (npte != opte) {
+		boolean_t is_cached = pmap_is_current(pmap);
+
+		*ptep = npte;
+		if (is_cached) {
+			/*
+			 * We only need to frob the cache/tlb if this pmap
+			 * is current
+			 */
+			PTE_SYNC(ptep);
+			if (L1_IDX(va) != L1_IDX(vector_page) &&
+			    l2pte_valid(npte)) {
+				/*
+				 * This mapping is likely to be accessed as
+				 * soon as we return to userland. Fix up the
+				 * L1 entry to avoid taking another
+				 * page/domain fault.
+				 */
+				pd_entry_t *pl1pd, l1pd;
+
+				pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
+				l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) |
+				    L1_C_PROTO;
+				if (*pl1pd != l1pd) {
+					*pl1pd = l1pd;
+					PTE_SYNC(pl1pd);
+				}
+			}
+		}
+
+		if (PV_BEEN_EXECD(oflags))
+			cpu_tlb_flushID_SE(va);
+		else if (PV_BEEN_REFD(oflags))
+			cpu_tlb_flushD_SE(va);
+	}
+}
+
+/*
+ * Maps a sequence of resident pages belonging to the same object.
+ * The sequence begins with the given page m_start.  This page is
+ * mapped at the given virtual address start.  Each subsequent page is
+ * mapped at a virtual address that is offset from start by the same
+ * amount as the page is offset from m_start within the object.  The
+ * last page in the sequence is the page with the largest offset from
+ * m_start that can be mapped at a virtual address less than the given
+ * virtual address end.  Not every virtual page between start and end
+ * is mapped; only those for which a resident page exists with the
+ * corresponding offset from m_start are mapped.
+ */
+void
+pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
+    vm_page_t m_start, vm_prot_t prot)
+{
+	vm_page_t m;
+	vm_pindex_t diff, psize;
+
+	psize = atop(end - start);
+	m = m_start;
+	PMAP_LOCK(pmap);
+	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
+		pmap_enter_locked(pmap, start + ptoa(diff), m, prot &
+		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE, M_NOWAIT);
+		m = TAILQ_NEXT(m, listq);
+	}
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ * this code makes some *MAJOR* assumptions:
+ * 1. Current pmap & pmap exists.
+ * 2. Not wired.
+ * 3. Read access.
+ * 4. No page table pages.
+ * but is *MUCH* faster than pmap_enter...
+ */
+
+void
+pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
+{
+
+	PMAP_LOCK(pmap);
+	pmap_enter_locked(pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
+	    FALSE, M_NOWAIT);
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ *	Routine:	pmap_change_wiring
+ *	Function:	Change the wiring attribute for a map/virtual-address
+ *			pair.
+ *	In/out conditions:
+ *			The mapping must already exist in the pmap.
+ */
+void
+pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
+{
+	struct l2_bucket *l2b;
+	pt_entry_t *ptep, pte;
+	vm_page_t pg;
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	l2b = pmap_get_l2_bucket(pmap, va);
+	KASSERT(l2b, ("No l2b bucket in pmap_change_wiring"));
+	ptep = &l2b->l2b_kva[l2pte_index(va)];
+	pte = *ptep;
+	pg = PHYS_TO_VM_PAGE(l2pte_pa(pte));
+	if (pg)
+		pmap_modify_pv(pg, pmap, va, PVF_WIRED, wired);
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+}
+
+
+/*
+ *	Copy the range specified by src_addr/len
+ *	from the source map to the range dst_addr/len
+ *	in the destination map.
+ *
+ *	This routine is only advisory and need not do anything.
+ */
+void
+pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
+    vm_size_t len, vm_offset_t src_addr)
+{
+}
+
+
+/*
+ *	Routine:	pmap_extract
+ *	Function:
+ *		Extract the physical page address associated
+ *		with the given map/virtual_address pair.
+ */
+vm_paddr_t
+pmap_extract(pmap_t pm, vm_offset_t va)
+{
+	struct l2_dtable *l2;
+	pd_entry_t l1pd;
+	pt_entry_t *ptep, pte;
+	vm_paddr_t pa;
+	u_int l1idx;
+	l1idx = L1_IDX(va);
+
+	PMAP_LOCK(pm);
+	l1pd = pm->pm_l1->l1_kva[l1idx];
+	if (l1pte_section_p(l1pd)) {
+		/*
+		 * These should only happen for pmap_kernel()
+		 */
+		KASSERT(pm == pmap_kernel(), ("huh"));
+		/* XXX: what to do about the bits > 32 ? */
+		if (l1pd & L1_S_SUPERSEC)
+			pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
+		else
+			pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
+	} else {
+		/*
+		 * Note that we can't rely on the validity of the L1
+		 * descriptor as an indication that a mapping exists.
+		 * We have to look it up in the L2 dtable.
+		 */
+		l2 = pm->pm_l2[L2_IDX(l1idx)];
+
+		if (l2 == NULL ||
+		    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
+			PMAP_UNLOCK(pm);
+			return (0);
+		}
+
+		ptep = &ptep[l2pte_index(va)];
+		pte = *ptep;
+
+		if (pte == 0) {
+			PMAP_UNLOCK(pm);
+			return (0);
+		}
+
+		switch (pte & L2_TYPE_MASK) {
+		case L2_TYPE_L:
+			pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
+			break;
+
+		default:
+			pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
+			break;
+		}
+	}
+
+	PMAP_UNLOCK(pm);
+	return (pa);
+}
+
+/*
+ * Atomically extract and hold the physical page with the given
+ * pmap and virtual address pair if that mapping permits the given
+ * protection.
+ *
+ */
+vm_page_t
+pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
+{
+	struct l2_dtable *l2;
+	pd_entry_t l1pd;
+	pt_entry_t *ptep, pte;
+	vm_paddr_t pa;
+	vm_page_t m = NULL;
+	u_int l1idx;
+	l1idx = L1_IDX(va);
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	l1pd = pmap->pm_l1->l1_kva[l1idx];
+	if (l1pte_section_p(l1pd)) {
+		/*
+		 * These should only happen for pmap_kernel()
+		 */
+		KASSERT(pmap == pmap_kernel(), ("huh"));
+		/* XXX: what to do about the bits > 32 ? */
+		if (l1pd & L1_S_SUPERSEC)
+			pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
+		else
+			pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
+		if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
+			m = PHYS_TO_VM_PAGE(pa);
+			vm_page_hold(m);
+		}
+	} else {
+		/*
+		 * Note that we can't rely on the validity of the L1
+		 * descriptor as an indication that a mapping exists.
+		 * We have to look it up in the L2 dtable.
+		 */
+		l2 = pmap->pm_l2[L2_IDX(l1idx)];
+
+		if (l2 == NULL ||
+		    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
+			PMAP_UNLOCK(pmap);
+			vm_page_unlock_queues();
+			return (NULL);
+		}
+
+		ptep = &ptep[l2pte_index(va)];
+		pte = *ptep;
+
+		if (pte == 0) {
+			PMAP_UNLOCK(pmap);
+			vm_page_unlock_queues();
+			return (NULL);
+		}
+		if (pte & L2_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
+			switch (pte & L2_TYPE_MASK) {
+			case L2_TYPE_L:
+				pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
+				break;
+			default:
+				pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
+				break;
+			}
+			m = PHYS_TO_VM_PAGE(pa);
+			vm_page_hold(m);
+		}
+	}
+
+	PMAP_UNLOCK(pmap);
+	vm_page_unlock_queues();
+	return (m);
+}
+
+/*
+ * Initialize a preallocated and zeroed pmap structure,
+ * such as one in a vmspace structure.
+ */
+
+int
+pmap_pinit(pmap_t pmap)
+{
+	PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap));
+
+	PMAP_LOCK_INIT(pmap);
+	pmap_alloc_l1(pmap);
+	bzero(pmap->pm_l2, sizeof(pmap->pm_l2));
+
+	pmap->pm_active = 0;
+
+	TAILQ_INIT(&pmap->pm_pvlist);
+	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+	pmap->pm_stats.resident_count = 1;
+	if (vector_page < KERNBASE) {
+		pmap_enter(pmap, vector_page,
+		    VM_PROT_READ, PHYS_TO_VM_PAGE(systempage.pv_pa),
+		    VM_PROT_READ, 1);
+	}
+	return (1);
+}
+
+
+/***************************************************
+ * page management routines.
+ ***************************************************/
+
+
+static void
+pmap_free_pv_entry(pv_entry_t pv)
+{
+	pv_entry_count--;
+	uma_zfree(pvzone, pv);
+}
+
+
+/*
+ * get a new pv_entry, allocating a block from the system
+ * when needed.
+ * the memory allocation is performed bypassing the malloc code
+ * because of the possibility of allocations at interrupt time.
+ */
+static pv_entry_t
+pmap_get_pv_entry(void)
+{
+	pv_entry_t ret_value;
+
+	pv_entry_count++;
+	if (pv_entry_count > pv_entry_high_water)
+		pagedaemon_wakeup();
+	ret_value = uma_zalloc(pvzone, M_NOWAIT);
+	return ret_value;
+}
+
+/*
+ *	Remove the given range of addresses from the specified map.
+ *
+ *	It is assumed that the start and end are properly
+ *	rounded to the page size.
+ */
+#define	PMAP_REMOVE_CLEAN_LIST_SIZE	3
+void
+pmap_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
+{
+	struct l2_bucket *l2b;
+	vm_offset_t next_bucket;
+	pt_entry_t *ptep;
+	u_int total;
+	u_int mappings, is_exec, is_refd;
+	int flushall = 0;
+
+
+	/*
+	 * we lock in the pmap => pv_head direction
+	 */
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pm);
+	total = 0;
+	while (sva < eva) {
+		/*
+		 * Do one L2 bucket's worth at a time.
+		 */
+		next_bucket = L2_NEXT_BUCKET(sva);
+		if (next_bucket > eva)
+			next_bucket = eva;
+
+		l2b = pmap_get_l2_bucket(pm, sva);
+		if (l2b == NULL) {
+			sva = next_bucket;
+			continue;
+		}
+
+		ptep = &l2b->l2b_kva[l2pte_index(sva)];
+		mappings = 0;
+
+		while (sva < next_bucket) {
+			struct vm_page *pg;
+			pt_entry_t pte;
+			vm_paddr_t pa;
+
+			pte = *ptep;
+
+			if (pte == 0) {
+				/*
+				 * Nothing here, move along
+				 */
+				sva += PAGE_SIZE;
+				ptep++;
+				continue;
+			}
+
+			pm->pm_stats.resident_count--;
+			pa = l2pte_pa(pte);
+			is_exec = 0;
+			is_refd = 1;
+
+			/*
+			 * Update flags. In a number of circumstances,
+			 * we could cluster a lot of these and do a
+			 * number of sequential pages in one go.
+			 */
+			if ((pg = PHYS_TO_VM_PAGE(pa)) != NULL) {
+				struct pv_entry *pve;
+
+				pve = pmap_remove_pv(pg, pm, sva);
+				if (pve) {
+					is_exec = PV_BEEN_EXECD(pve->pv_flags);
+					is_refd = PV_BEEN_REFD(pve->pv_flags);
+					pmap_free_pv_entry(pve);
+				}
+			}
+
+			if (pmap_is_current(pm)) {
+				total++;
+				if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) {
+					if (is_exec)
+						cpu_tlb_flushID_SE(sva);
+					else if (is_refd)
+						cpu_tlb_flushD_SE(sva);
+				} else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE) {
+					flushall = 1;
+				}
+			}
+			*ptep = 0;
+			PTE_SYNC(ptep);
+
+			sva += PAGE_SIZE;
+			ptep++;
+			mappings++;
+		}
+
+		pmap_free_l2_bucket(pm, l2b, mappings);
+	}
+
+	vm_page_unlock_queues();
+	if (flushall)
+		cpu_tlb_flushID();
+	PMAP_UNLOCK(pm);
+}
+
+/*
+ * pmap_zero_page()
+ *
+ * Zero a given physical page by mapping it at a page hook point.
+ * In doing the zero page op, the page we zero is mapped cachable, as with
+ * StrongARM accesses to non-cached pages are non-burst making writing
+ * _any_ bulk data very slow.
+ */
+#if (ARM_MMU_GENERIC + ARM_MMU_V6 + ARM_MMU_SA1) != 0 || \
+    defined(CPU_XSCALE_CORE3)
+void
+pmap_zero_page_generic(vm_paddr_t phys, int off, int size)
+{
+
+#ifdef DEBUG
+	struct vm_page *pg = PHYS_TO_VM_PAGE(phys);
+
+	if (pg->md.pvh_list != NULL)
+		panic("pmap_zero_page: page has mappings");
+#endif
+
+	if (_arm_bzero && size >= _min_bzero_size &&
+	    _arm_bzero((void *)(phys + off), size, IS_PHYSICAL) == 0)
+		return;
+
+	mtx_lock(&cmtx);
+	/*
+	 * Hook in the page, zero it, invalidate the TLB as needed.
+	 *
+	 * Note the temporary zero-page mapping must be a non-cached page in
+	 * order to work without corruption when write-allocate is enabled.
+	 */
+	*cdst_pte = L2_S_PROTO | phys | L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
+	cpu_tlb_flushD_SE(cdstp);
+	cpu_cpwait();
+	if (off || size != PAGE_SIZE)
+		bzero((void *)(cdstp + off), size);
+	else
+		bzero_page(cdstp);
+
+	mtx_unlock(&cmtx);
+}
+#endif /* (ARM_MMU_GENERIC + ARM_MMU_V6 + ARM_MMU_SA1) != 0 */
+
+/*
+ *	pmap_zero_page zeros the specified hardware page by mapping
+ *	the page into KVM and using bzero to clear its contents.
+ */
+void
+pmap_zero_page(vm_page_t m)
+{
+	pmap_zero_page_func(VM_PAGE_TO_PHYS(m), 0, PAGE_SIZE);
+}
+
+
+/*
+ *	pmap_zero_page_area zeros the specified hardware page by mapping
+ *	the page into KVM and using bzero to clear its contents.
+ *
+ *	off and size may not cover an area beyond a single hardware page.
+ */
+void
+pmap_zero_page_area(vm_page_t m, int off, int size)
+{
+
+	pmap_zero_page_func(VM_PAGE_TO_PHYS(m), off, size);
+}
+
+
+/*
+ *	pmap_zero_page_idle zeros the specified hardware page by mapping
+ *	the page into KVM and using bzero to clear its contents.  This
+ *	is intended to be called from the vm_pagezero process only and
+ *	outside of Giant.
+ */
+void
+pmap_zero_page_idle(vm_page_t m)
+{
+
+	pmap_zero_page(m);
+}
+
+/*
+ *	pmap_copy_page copies the specified (machine independent)
+ *	page by mapping the page into virtual memory and using
+ *	bcopy to copy the page, one machine dependent page at a
+ *	time.
+ */
+
+/*
+ * pmap_copy_page()
+ *
+ * Copy one physical page into another, by mapping the pages into
+ * hook points. The same comment regarding cachability as in
+ * pmap_zero_page also applies here.
+ */
+#if (ARM_MMU_GENERIC + ARM_MMU_V6 + ARM_MMU_SA1) != 0 || \
+    defined(CPU_XSCALE_CORE3)
+void
+pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
+{
+	/*
+	 * Hold the source page's lock for the duration of the copy
+	 * so that no other mappings can be created while we have a
+	 * potentially aliased mapping.
+	 * Map the pages into the page hook points, copy them, and purge
+	 * the cache for the appropriate page. Invalidate the TLB
+	 * as required.
+	 */
+	mtx_lock(&cmtx);
+
+	/* For ARMv6 using System bit is deprecated and mapping with AP
+	 * bits set to 0x0 makes page not accessible. csrc_pte is mapped
+	 * read/write until proper mapping defines are created for ARMv6.
+	 */
+	*csrc_pte = L2_S_PROTO | src |
+	    L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
+	PTE_SYNC(csrc_pte);
+	*cdst_pte = L2_S_PROTO | dst |
+	    L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
+	PTE_SYNC(cdst_pte);
+	cpu_tlb_flushD_SE(csrcp);
+	cpu_tlb_flushD_SE(cdstp);
+	cpu_cpwait();
+	bcopy_page(csrcp, cdstp);
+	mtx_unlock(&cmtx);
+}
+#endif /* (ARM_MMU_GENERIC + ARM_MMU_V6 + ARM_MMU_SA1) != 0 */
+
+void
+pmap_copy_page(vm_page_t src, vm_page_t dst)
+{
+
+	if (_arm_memcpy && PAGE_SIZE >= _min_memcpy_size &&
+	    _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst),
+	    (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0)
+		return;
+
+	pmap_copy_page_func(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
+}
+
+/*
+ * this routine returns true if a physical page resides
+ * in the given pmap.
+ */
+boolean_t
+pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
+{
+	pv_entry_t pv;
+	int loops = 0;
+
+	if (m->flags & PG_FICTITIOUS)
+		return (FALSE);
+
+	/*
+	 * Not found, check current mappings returning immediately
+	 */
+	for (pv = TAILQ_FIRST(&m->md.pv_list);
+	    pv;
+	    pv = TAILQ_NEXT(pv, pv_list)) {
+		if (pv->pv_pmap == pmap) {
+			return (TRUE);
+		}
+		loops++;
+		if (loops >= 16)
+			break;
+	}
+	return (FALSE);
+}
+
+/*
+ *	pmap_page_wired_mappings:
+ *
+ *	Return the number of managed mappings to the given physical page
+ *	that are wired.
+ */
+int
+pmap_page_wired_mappings(vm_page_t m)
+{
+	pv_entry_t pv;
+	int count;
+
+	count = 0;
+	if ((m->flags & PG_FICTITIOUS) != 0)
+		return (count);
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
+		if ((pv->pv_flags & PVF_WIRED) != 0)
+			count++;
+	return (count);
+}
+
+/*
+ *	pmap_ts_referenced:
+ *
+ *	Return the count of reference bits for a page, clearing all of them.
+ */
+int
+pmap_ts_referenced(vm_page_t m)
+{
+
+	if (m->flags & PG_FICTITIOUS)
+		return (0);
+	return (pmap_clearbit(m, PVF_REF));
+}
+
+
+boolean_t
+pmap_is_modified(vm_page_t m)
+{
+
+	if (m->md.pvh_attrs & PVF_MOD)
+		return (TRUE);
+
+	return(FALSE);
+}
+
+
+/*
+ *	Clear the modify bits on the specified physical page.
+ */
+void
+pmap_clear_modify(vm_page_t m)
+{
+
+	if (m->md.pvh_attrs & PVF_MOD)
+		pmap_clearbit(m, PVF_MOD);
+}
+
+
+/*
+ *	pmap_clear_reference:
+ *
+ *	Clear the reference bit on the specified physical page.
+ */
+void
+pmap_clear_reference(vm_page_t m)
+{
+
+	if (m->md.pvh_attrs & PVF_REF)
+		pmap_clearbit(m, PVF_REF);
+}
+
+
+/*
+ * Clear the write and modified bits in each of the given page's mappings.
+ */
+void
+pmap_remove_write(vm_page_t m)
+{
+
+	if (m->flags & PG_WRITEABLE)
+		pmap_clearbit(m, PVF_WRITE);
+}
+
+
+/*
+ * perform the pmap work for mincore
+ */
+int
+pmap_mincore(pmap_t pmap, vm_offset_t addr)
+{
+	printf("pmap_mincore()\n");
+
+	return (0);
+}
+
+
+/*
+ *	Increase the starting virtual address of the given mapping if a
+ *	different alignment might result in more superpage mappings.
+ */
+void
+pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
+    vm_offset_t *addr, vm_size_t size)
+{
+}
+
+
+/*
+ * Map a set of physical memory pages into the kernel virtual
+ * address space. Return a pointer to where it is mapped. This
+ * routine is intended to be used for mapping device memory,
+ * NOT real memory.
+ */
+void *
+pmap_mapdev(vm_offset_t pa, vm_size_t size)
+{
+	vm_offset_t va, tmpva, offset;
+
+	offset = pa & PAGE_MASK;
+	size = roundup(size, PAGE_SIZE);
+
+	GIANT_REQUIRED;
+
+	va = kmem_alloc_nofault(kernel_map, size);
+	if (!va)
+		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
+	for (tmpva = va; size > 0;) {
+		pmap_kenter_internal(tmpva, pa, 0);
+		size -= PAGE_SIZE;
+		tmpva += PAGE_SIZE;
+		pa += PAGE_SIZE;
+	}
+
+	return ((void *)(va + offset));
+}
+
+#define BOOTSTRAP_DEBUG
+
+/*
+ * pmap_map_section:
+ *
+ *	Create a single section mapping.
+ */
+void
+pmap_map_section(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa,
+    int prot, int cache)
+{
+	pd_entry_t *pde = (pd_entry_t *) l1pt;
+	pd_entry_t fl;
+
+	KASSERT(((va | pa) & L1_S_OFFSET) == 0, ("ouin2"));
+
+	switch (cache) {
+	case PTE_NOCACHE:
+	default:
+		fl = 0;
+		break;
+
+	case PTE_CACHE:
+		fl = pte_l1_s_cache_mode;
+		break;
+
+	case PTE_PAGETABLE:
+		fl = pte_l1_s_cache_mode_pt;
+		break;
+	}
+
+	pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
+	    L1_S_PROT(PTE_KERNEL, prot) | fl | L1_S_DOM(PMAP_DOMAIN_KERNEL);
+	PTE_SYNC(&pde[va >> L1_S_SHIFT]);
+
+}
+
+/*
+ * pmap_link_l2pt:
+ *
+ *	Link the L2 page table specified by l2pv.pv_pa into the L1
+ *	page table at the slot for "va".
+ */
+void
+pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv)
+{
+	pd_entry_t *pde = (pd_entry_t *) l1pt, proto;
+	u_int slot = va >> L1_S_SHIFT;
+
+	proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO;
+
+#ifdef VERBOSE_INIT_ARM
+	printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va);
+#endif
+
+	pde[slot + 0] = proto | (l2pv->pv_pa + 0x000);
+
+	PTE_SYNC(&pde[slot]);
+
+	SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list);
+
+}
+
+/*
+ * pmap_map_entry
+ *
+ *	Create a single page mapping.
+ */
+void
+pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot,
+    int cache)
+{
+	pd_entry_t *pde = (pd_entry_t *) l1pt;
+	pt_entry_t fl;
+	pt_entry_t *pte;
+
+	KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin"));
+
+	switch (cache) {
+	case PTE_NOCACHE:
+	default:
+		fl = 0;
+		break;
+
+	case PTE_CACHE:
+		fl = pte_l2_s_cache_mode;
+		break;
+
+	case PTE_PAGETABLE:
+		fl = pte_l2_s_cache_mode_pt;
+		break;
+	}
+
+	if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
+		panic("pmap_map_entry: no L2 table for VA 0x%08x", va);
+
+	pte = (pt_entry_t *) kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK);
+
+	if (pte == NULL)
+		panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va);
+
+	pte[l2pte_index(va)] =
+	    L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | fl;
+	PTE_SYNC(&pte[l2pte_index(va)]);
+}
+
+/*
+ * pmap_map_chunk:
+ *
+ *	Map a chunk of memory using the most efficient mappings
+ *	possible (section. large page, small page) into the
+ *	provided L1 and L2 tables at the specified virtual address.
+ */
+vm_size_t
+pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa,
+    vm_size_t size, int prot, int cache)
+{
+	pd_entry_t *pde = (pd_entry_t *) l1pt;
+	pt_entry_t *pte, f1, f2s, f2l;
+	vm_size_t resid;
+	int i;
+
+	resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+
+	if (l1pt == 0)
+		panic("pmap_map_chunk: no L1 table provided");
+
+#ifdef VERBOSE_INIT_ARM
+	printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x "
+	    "prot=0x%x cache=%d\n", pa, va, size, resid, prot, cache);
+#endif
+
+	switch (cache) {
+	case PTE_NOCACHE:
+	default:
+		f1 = 0;
+		f2l = 0;
+		f2s = 0;
+		break;
+
+	case PTE_CACHE:
+		f1 = pte_l1_s_cache_mode;
+		f2l = pte_l2_l_cache_mode;
+		f2s = pte_l2_s_cache_mode;
+		break;
+
+	case PTE_PAGETABLE:
+		f1 = pte_l1_s_cache_mode_pt;
+		f2l = pte_l2_l_cache_mode_pt;
+		f2s = pte_l2_s_cache_mode_pt;
+		break;
+	}
+
+	size = resid;
+
+	while (resid > 0) {
+		/* See if we can use a section mapping. */
+		if (L1_S_MAPPABLE_P(va, pa, resid)) {
+#ifdef VERBOSE_INIT_ARM
+			printf("S");
+#endif
+			pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
+			    L1_S_PROT(PTE_KERNEL, prot) | f1 |
+			    L1_S_DOM(PMAP_DOMAIN_KERNEL);
+			PTE_SYNC(&pde[va >> L1_S_SHIFT]);
+			va += L1_S_SIZE;
+			pa += L1_S_SIZE;
+			resid -= L1_S_SIZE;
+			continue;
+		}
+
+		/*
+		 * Ok, we're going to use an L2 table.  Make sure
+		 * one is actually in the corresponding L1 slot
+		 * for the current VA.
+		 */
+		if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
+			panic("pmap_map_chunk: no L2 table for VA 0x%08x", va);
+
+		pte = (pt_entry_t *) kernel_pt_lookup(
+		    pde[L1_IDX(va)] & L1_C_ADDR_MASK);
+		if (pte == NULL)
+			panic("pmap_map_chunk: can't find L2 table for VA"
+			    "0x%08x", va);
+		/* See if we can use a L2 large page mapping. */
+		if (L2_L_MAPPABLE_P(va, pa, resid)) {
+#ifdef VERBOSE_INIT_ARM
+			printf("L");
+#endif
+			for (i = 0; i < 16; i++) {
+				pte[l2pte_index(va) + i] =
+				    L2_L_PROTO | pa |
+				    L2_L_PROT(PTE_KERNEL, prot) | f2l;
+				PTE_SYNC(&pte[l2pte_index(va) + i]);
+			}
+			va += L2_L_SIZE;
+			pa += L2_L_SIZE;
+			resid -= L2_L_SIZE;
+			continue;
+		}
+
+		/* Use a small page mapping. */
+#ifdef VERBOSE_INIT_ARM
+		printf("P");
+#endif
+		pte[l2pte_index(va)] =
+		    L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | f2s;
+		PTE_SYNC(&pte[l2pte_index(va)]);
+		va += PAGE_SIZE;
+		pa += PAGE_SIZE;
+		resid -= PAGE_SIZE;
+	}
+#ifdef VERBOSE_INIT_ARM
+	printf("\n");
+#endif
+	return (size);
+
+}
+
+/********************** Static device map routines ***************************/
+
+static const struct pmap_devmap *pmap_devmap_table;
+
+/*
+ * Register the devmap table.  This is provided in case early console
+ * initialization needs to register mappings created by bootstrap code
+ * before pmap_devmap_bootstrap() is called.
+ */
+void
+pmap_devmap_register(const struct pmap_devmap *table)
+{
+
+	pmap_devmap_table = table;
+}
+
+/*
+ * Map all of the static regions in the devmap table, and remember
+ * the devmap table so other parts of the kernel can look up entries
+ * later.
+ */
+void
+pmap_devmap_bootstrap(vm_offset_t l1pt, const struct pmap_devmap *table)
+{
+	int i;
+
+	pmap_devmap_table = table;
+
+	for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
+#ifdef VERBOSE_INIT_ARM
+		printf("devmap: %08x -> %08x @ %08x\n",
+		    pmap_devmap_table[i].pd_pa,
+		    pmap_devmap_table[i].pd_pa +
+			pmap_devmap_table[i].pd_size - 1,
+		    pmap_devmap_table[i].pd_va);
+#endif
+		pmap_map_chunk(l1pt, pmap_devmap_table[i].pd_va,
+		    pmap_devmap_table[i].pd_pa,
+		    pmap_devmap_table[i].pd_size,
+		    pmap_devmap_table[i].pd_prot,
+		    pmap_devmap_table[i].pd_cache);
+	}
+}
+
+const struct pmap_devmap *
+pmap_devmap_find_pa(vm_paddr_t pa, vm_size_t size)
+{
+	int i;
+
+	if (pmap_devmap_table == NULL)
+		return (NULL);
+
+	for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
+		if (pa >= pmap_devmap_table[i].pd_pa &&
+		    pa + size <= pmap_devmap_table[i].pd_pa +
+				 pmap_devmap_table[i].pd_size)
+			return (&pmap_devmap_table[i]);
+	}
+
+	return (NULL);
+}
+
+const struct pmap_devmap *
+pmap_devmap_find_va(vm_offset_t va, vm_size_t size)
+{
+	int i;
+
+	if (pmap_devmap_table == NULL)
+		return (NULL);
+
+	for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
+		if (va >= pmap_devmap_table[i].pd_va &&
+		    va + size <= pmap_devmap_table[i].pd_va +
+				 pmap_devmap_table[i].pd_size)
+			return (&pmap_devmap_table[i]);
+	}
+
+	return (NULL);
+}
+
+int
+pmap_dmap_iscurrent(pmap_t pmap)
+{
+	return(pmap_is_current(pmap));
+}
+
diff --git a/sys/arm/arm/swtch.S b/sys/arm/arm/swtch.S
index c0098fa..69240bf 100644
--- a/sys/arm/arm/swtch.S
+++ b/sys/arm/arm/swtch.S
@@ -141,8 +141,6 @@ ENTRY(cpu_throw)
 	/* Switch to lwp0 context */
 
 	ldr	r9, .Lcpufuncs
-	mov	lr, pc
-	ldr	pc, [r9, #CF_IDCACHE_WBINV_ALL]
 	ldr	r0, [r7, #(PCB_PL1VEC)]
 	ldr	r1, [r7, #(PCB_DACR)]
 	/*
@@ -333,14 +331,6 @@ ENTRY(cpu_switch)
 	cmpeq	r0, r5				/* Same DACR? */
 	beq	.Lcs_context_switched		/* yes! */
 
-	/*
-	 * Definately need to flush the cache.
-	 */
-
-	ldr	r1, .Lcpufuncs
-	mov	lr, pc
-	ldr	pc, [r1, #CF_IDCACHE_WBINV_ALL]
-.Lcs_cache_purge_skipped:
 	/* rem: r6 = lock */
 	/* rem: r9 = new PCB */
 	/* rem: r10 = old L1 */
@@ -363,16 +353,6 @@ ENTRY(cpu_switch)
 	beq	.Lcs_same_vector
 	str	r0, [r7]		/* Otherwise, update it */
 
-	/*
-	 * Need to sync the cache to make sure that last store is
-	 * visible to the MMU.
-	 */
-	ldr	r2, .Lcpufuncs
-	mov	r0, r7
-	mov	r1, #4
-	mov	lr, pc
-	ldr	pc, [r2, #CF_DCACHE_WB_RANGE]
-
 .Lcs_same_vector:
 #endif /* PMAP_INCLUDE_PTE_SYNC */
 
diff --git a/sys/arm/include/armreg.h b/sys/arm/include/armreg.h
index 41e6df0..95defd9 100644
--- a/sys/arm/include/armreg.h
+++ b/sys/arm/include/armreg.h
@@ -151,6 +151,7 @@
 #define CPU_ID_MV88FR131	0x56251310 /* Marvell Feroceon 88FR131 Core */
 #define CPU_ID_MV88FR571_VD	0x56155710 /* Marvell Feroceon 88FR571-VD Core (ID from datasheet) */
 #define	CPU_ID_MV88FR571_41	0x41159260 /* Marvell Feroceon 88FR571-VD Core (actual ID from CPU reg) */
+#define CPU_ID_MV88SV581X	0x560F5810 /* Marvell Sheeva 88SV581x Core */
 #define	CPU_ID_FA526		0x66015260
 #define CPU_ID_SA1110		0x6901b110
 #define CPU_ID_IXP1200		0x6901c120
@@ -243,6 +244,7 @@
 #define CPU_CONTROL_VECRELOC	0x00002000 /* V: Vector relocation */
 #define CPU_CONTROL_ROUNDROBIN	0x00004000 /* RR: Predictable replacement */
 #define CPU_CONTROL_V4COMPAT	0x00008000 /* L4: ARMv4 compat LDR R15 etc */
+#define CPU_CONTROL_V6_EXTPAGE	0x00800000 /* XP: ARMv6 extended page tables */
 #define CPU_CONTROL_L2_ENABLE	0x04000000 /* L2 Cache enabled */
 
 #define CPU_CONTROL_IDC_ENABLE	CPU_CONTROL_DC_ENABLE
diff --git a/sys/arm/include/cpuconf.h b/sys/arm/include/cpuconf.h
index 0511991..636f9d6 100644
--- a/sys/arm/include/cpuconf.h
+++ b/sys/arm/include/cpuconf.h
@@ -110,6 +110,10 @@
  *
  *	ARM_MMU_GENERIC		Generic ARM MMU, compatible with ARM6.
  *
+ *	ARM_MMU_V6		ARMv6 MMU with disabled compatibility.
+ *				Includes several extensions which require
+ *				different page table layout.
+ *
  *	ARM_MMU_SA1		StrongARM SA-1 MMU.  Compatible with generic
  *				ARM MMU, but has no write-through cache mode.
  *
@@ -125,12 +129,18 @@
 
 #if (defined(CPU_ARM6) || defined(CPU_ARM7) || defined(CPU_ARM7TDMI) ||	\
      defined(CPU_ARM8) || defined(CPU_ARM9) || defined(CPU_ARM9E) ||	\
-     defined(CPU_ARM10) || defined(CPU_ARM11))
+     defined(CPU_ARM10))
 #define	ARM_MMU_GENERIC		1
 #else
 #define	ARM_MMU_GENERIC		0
 #endif
 
+#if defined(CPU_ARM11)
+#define ARM_MMU_V6		1
+#else
+#define ARM_MMU_V6		0
+#endif
+
 #if (defined(CPU_SA110) || defined(CPU_SA1100) || defined(CPU_SA1110) ||\
      defined(CPU_IXP12X0))
 #define	ARM_MMU_SA1		1
@@ -146,7 +156,7 @@
 #define	ARM_MMU_XSCALE		0
 #endif
 
-#define	ARM_NMMUS		(ARM_MMU_MEMC + ARM_MMU_GENERIC +	\
+#define	ARM_NMMUS		(ARM_MMU_MEMC + ARM_MMU_GENERIC + ARM_MMU_V6 + \
 				 ARM_MMU_SA1 + ARM_MMU_XSCALE)
 #if ARM_NMMUS == 0 && !defined(KLD_MODULE) && defined(_KERNEL)
 #error ARM_NMMUS is 0
diff --git a/sys/arm/include/cpufunc.h b/sys/arm/include/cpufunc.h
index 01862ee..0614beb 100644
--- a/sys/arm/include/cpufunc.h
+++ b/sys/arm/include/cpufunc.h
@@ -405,9 +405,29 @@ void	arm11_tlb_flushD	(void);
 void	arm11_tlb_flushD_SE	(u_int va);
 
 void	arm11_drain_writebuf	(void);
+
+void	sheeva2_setttb			(u_int);
+
+void	sheeva2_icache_sync_range(vm_offset_t, vm_size_t);
+
+void	sheeva2_dcache_wbinv_range(vm_offset_t, vm_size_t);
+void	sheeva2_dcache_inv_range(vm_offset_t, vm_size_t);
+void	sheeva2_dcache_wb_range(vm_offset_t, vm_size_t);
+
+void	sheeva2_idcache_wbinv_range(vm_offset_t, vm_size_t);
+
+void	sheeva2_l2cache_wbinv_range	(vm_offset_t, vm_size_t);
+void	sheeva2_l2cache_inv_range	(vm_offset_t, vm_size_t);
+void	sheeva2_l2cache_wb_range	(vm_offset_t, vm_size_t);
+void	sheeva2_l2cache_wbinv_all	(void);
+
+void	sheeva2_drain_readbuf		(void);
+void	sheeva2_flush_brnchtgt_all	(void);
+void	sheeva2_flush_brnchtgt_va	(u_int);
+void	sheeva2_sleep			(int);
 #endif
 
-#if defined(CPU_ARM9E) || defined (CPU_ARM10)
+#if defined(CPU_ARM9E) || defined (CPU_ARM10) || defined(CPU_ARM11)
 void	armv5_ec_setttb(u_int);
 
 void	armv5_ec_icache_sync_all(void);
diff --git a/sys/arm/include/intr.h b/sys/arm/include/intr.h
index 5c2923b..3b12983 100644
--- a/sys/arm/include/intr.h
+++ b/sys/arm/include/intr.h
@@ -48,7 +48,7 @@
 #elif defined(SOC_MV_DISCOVERY)
 #define NIRQ		96
 #elif defined(CPU_ARM9) || defined(SOC_MV_KIRKWOOD) || \
-    defined(CPU_XSCALE_IXP435)
+    defined(SOC_MV_DOVE) || defined(CPU_XSCALE_IXP435)
 #define NIRQ		64
 #else
 #define NIRQ		32
diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h
index f474557..b4c1d51 100644
--- a/sys/arm/include/pmap.h
+++ b/sys/arm/include/pmap.h
@@ -223,6 +223,7 @@ void
 pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot,
     int cache);
 int pmap_fault_fixup(pmap_t, vm_offset_t, vm_prot_t, int);
+int pmap_dmap_iscurrent(pmap_t pmap);
 
 /*
  * Definitions for MMU domains
@@ -332,6 +333,19 @@ extern int pmap_needs_pte_sync;
 #define	L1_C_PROTO		L1_C_PROTO_xscale
 #define	L2_S_PROTO		L2_S_PROTO_xscale
 
+#elif ARM_MMU_V6 == 1
+#define	L2_S_PROT_U		L2_S_PROT_U_xscale
+#define	L2_S_PROT_W		L2_S_PROT_W_xscale
+#define	L2_S_PROT_MASK		L2_S_PROT_MASK_xscale
+
+#define	L1_S_CACHE_MASK		L1_S_CACHE_MASK_generic
+#define	L2_L_CACHE_MASK		L2_L_CACHE_MASK_generic
+#define	L2_S_CACHE_MASK		L2_S_CACHE_MASK_generic
+
+#define	L1_S_PROTO		L1_S_PROTO_xscale
+#define	L1_C_PROTO		L1_C_PROTO_xscale
+#define	L2_S_PROTO		L2_S_PROTO_generic
+
 #endif /* ARM_NMMUS > 1 */
 
 #ifdef SKYEYE_WORKAROUNDS
@@ -423,7 +437,7 @@ extern pt_entry_t		pte_l2_s_proto;
 extern void (*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t);
 extern void (*pmap_zero_page_func)(vm_paddr_t, int, int);
 
-#if (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 || defined(CPU_XSCALE_81342)
+#if (ARM_MMU_GENERIC + ARM_MMU_V6 + ARM_MMU_SA1) != 0 || defined(CPU_XSCALE_81342)
 void	pmap_copy_page_generic(vm_paddr_t, vm_paddr_t);
 void	pmap_zero_page_generic(vm_paddr_t, int, int);
 
@@ -437,6 +451,9 @@ void	pmap_pte_init_arm9(void);
 #if defined(CPU_ARM10)
 void	pmap_pte_init_arm10(void);
 #endif /* CPU_ARM10 */
+#if defined(CPU_ARM11)
+void	pmap_pte_init_arm11(void);
+#endif /* CPU_ARM11 */
 #endif /* (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 */
 
 #if /* ARM_MMU_SA1 == */1
diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h
index aa8af4b..cf741df 100644
--- a/sys/arm/include/vmparam.h
+++ b/sys/arm/include/vmparam.h
@@ -109,7 +109,9 @@
 #endif
 #define VM_MAXUSER_ADDRESS	KERNBASE - ARM_KERN_DIRECTMAP
 #else /* ARM_USE_SMALL_ALLOC */
+#ifndef VM_MAXUSER_ADDRESS
 #define VM_MAXUSER_ADDRESS      KERNBASE
+#endif /* VM_MAXUSER_ADDRESS */
 #endif /* ARM_USE_SMALL_ALLOC */
 #define VM_MAX_ADDRESS          VM_MAXUSER_ADDRESS
 
diff --git a/sys/arm/mv/common.c b/sys/arm/mv/common.c
index 76758be..e838a65 100644
--- a/sys/arm/mv/common.c
+++ b/sys/arm/mv/common.c
@@ -243,9 +382,6 @@ WIN_REG_IDX_WR(win_cpu, br, MV_WIN_CPU_BASE, MV_MBUS_BRIDGE_BASE)
 WIN_REG_IDX_WR(win_cpu, remap_l, MV_WIN_CPU_REMAP_LO, MV_MBUS_BRIDGE_BASE)
 WIN_REG_IDX_WR(win_cpu, remap_h, MV_WIN_CPU_REMAP_HI, MV_MBUS_BRIDGE_BASE)
 
-WIN_REG_IDX_RD(ddr, br, MV_WIN_DDR_BASE, MV_DDR_CADR_BASE)
-WIN_REG_IDX_RD(ddr, sz, MV_WIN_DDR_SIZE, MV_DDR_CADR_BASE)
-
 WIN_REG_IDX_RD2(win_usb, cr, MV_WIN_USB_CTRL, MV_USB_AWR_BASE)
 WIN_REG_IDX_RD2(win_usb, br, MV_WIN_USB_BASE, MV_USB_AWR_BASE)
 WIN_REG_IDX_WR2(win_usb, cr, MV_WIN_USB_CTRL, MV_USB_AWR_BASE)
@@ -301,6 +437,44 @@ WIN_REG_IDX_RD(win_sata, br, MV_WIN_SATA_BASE, MV_SATAHC_BASE);
 WIN_REG_IDX_WR(win_sata, cr, MV_WIN_SATA_CTRL, MV_SATAHC_BASE);
 WIN_REG_IDX_WR(win_sata, br, MV_WIN_SATA_BASE, MV_SATAHC_BASE);
 
+#ifndef SOC_MV_DOVE
+WIN_REG_IDX_RD(ddr, br, MV_WIN_DDR_BASE, MV_DDR_CADR_BASE)
+WIN_REG_IDX_RD(ddr, sz, MV_WIN_DDR_SIZE, MV_DDR_CADR_BASE)
+#else
+/*
+ * On 88F6781 (Dove) SoC DDR Controller is accessed through
+ * single MBUS <-> AXI bridge. In this case we provide emulated
+ * ddr_br_read() and ddr_sz_read() functions to keep compatibility
+ * with common decoding windows setup code.
+ */
+
+static inline uint32_t ddr_br_read(int i)
+{
+	uint32_t mmap;
+
+	/* Read Memory Address Map Register for CS i */
+	mmap = bus_space_read_4(obio_tag, MV_DDR_CADR_BASE + (i * 0x10), 0);
+
+	/* Return CS i base address */
+	return (mmap & 0xFF000000);
+}
+
+static inline uint32_t ddr_sz_read(int i)
+{
+	uint32_t mmap, size;
+
+	/* Read Memory Address Map Register for CS i */
+	mmap = bus_space_read_4(obio_tag, MV_DDR_CADR_BASE + (i * 0x10), 0);
+
+	/* Extract size of CS space in 64kB units */
+	size = (1 << ((mmap >> 16) & 0x0F));
+
+	/* Return CS size and enable/disable status */
+	return (((size - 1) << 16) | (mmap & 0x01));
+}
+#endif
+
+
 /**************************************************************************
  * Decode windows helper routines
  **************************************************************************/
@@ -366,6 +540,7 @@ win_cpu_can_remap(int i)
 	if ((dev == MV_DEV_88F5182 && i < 2) ||
 	    (dev == MV_DEV_88F5281 && i < 4) ||
 	    (dev == MV_DEV_88F6281 && i < 4) ||
+	    (dev == MV_DEV_88F6781 && i < 4) ||
 	    (dev == MV_DEV_MV78100 && i < 8) ||
 	    (dev == MV_DEV_MV78100_Z0 && i < 8))
 		return (1);
@@ -566,18 +748,21 @@ ddr_size(int i)
 uint32_t
 ddr_attr(int i)
 {
-
+#ifdef SOC_MV_DOVE
+	return (0);
+#else
 	return (i == 0 ? 0xe :
 	    (i == 1 ? 0xd :
 	    (i == 2 ? 0xb :
 	    (i == 3 ? 0x7 : 0xff))));
+#endif
 }
 
 uint32_t
 ddr_target(int i)
 {
 
-	/* Mbus unit ID is 0x0 for DDR SDRAM controller */
+	/* Mbus unit ID is 0x0 for DDR SDRAM controller / AXI bridge */
 	return (0);
 }
 
diff --git a/sys/arm/mv/dove/db88f6781.c b/sys/arm/mv/dove/db88f6781.c
new file mode 100644
index 0000000..2e3a198
--- /dev/null
+++ b/sys/arm/mv/dove/db88f6781.c
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (C) 2008 MARVELL INTERNATIONAL LTD.
+ * All rights reserved.
+ *
+ * Developed by Semihalf.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of MARVELL nor the names of contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/arm/mv/kirkwood/db88f6xxx.c,v 1.4 2009/06/12 20:00:38 marcel Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <machine/intr.h>
+#include <machine/pte.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+
+#include <arm/mv/mvreg.h>
+#include <arm/mv/mvvar.h>
+#include <arm/mv/mvwin.h>
+
+#include <dev/iicbus/pca9555.h>
+
+/*
+ * Virtual address space layout:
+ * -----------------------------
+ * 0x0000_0000 - 0x7FFF_FFFF	: User Process (2 GB)
+ * 0x8000_0000 - 0x9FFF_FFFF	: PCI/PCIE Memory (512 MB)
+ * 0xA000_0000 - 0xBBFF_FFFF	: Unused (448 MB)
+ * 0xBC00_0000 - 0xBDFF_FFFF	: Device Bus: CS1 (32 MB)
+ * 0xBE00_0000 - 0xBECF_FFFF	: Unused (13 MB)
+ * 0xBED0_0000 - 0xBEDF_FFFF	: Device Bus: CS2 (1 MB)
+ * 0xBEE0_0000 - 0xBEEF_FFFF	: Device Bus: CS0 (1 MB)
+ * 0xBEF0_0000 - 0xBEFF_FFFF	: Device Bus: BOOT (1 MB)
+ * 0xBF00_0000 - 0xBFFF_FFFF	: PCI/PCIE I/O (16 MB)
+ * 0xC000_0000 - virtual_avail	: Kernel Reserved (text, data, page tables,
+ * 				: stack etc.)
+ * virtual-avail - 0xEFFF_FFFF	: KVA (virtual_avail is typically < 0xc0a0_0000)
+ * 0xF000_0000 - 0xF0FF_FFFF	: No-Cache allocation area (16 MB)
+ * 0xF100_0000 - 0xF10F_FFFF	: SoC Integrated devices registers range (1 MB)
+ * 0xF110_0000 - 0xF11F_FFFF	: CESA SRAM (1 MB)
+ * 0xF120_0000 - 0xF7FE_FFFF	: Unused
+ * 0xF180_0000 - 0xF1FF_FFFF	: AXI Integrated registers range (16 MB)
+ * 0xF200_0000 - 0xFFFE_FFFF	: Unused
+ * 0xFFFF_0000 - 0xFFFF_0FFF	: 'High' vectors page (4 kB)
+ * 0xFFFF_1000 - 0xFFFF_1FFF	: ARM_TP_ADDRESS/RAS page (4 kB)
+ * 0xFFFF_2000 - 0xFFFF_FFFF	: Unused (56 kB)
+ */
+
+/* Static device mappings. */
+const struct pmap_devmap pmap_devmap[] = {
+	/*
+	 * Map the on-board devices VA == PA so that we can access them
+	 * with the MMU on or off.
+	 */
+	{ /* SoC integrated peripherals registers range */
+		MV_BASE,
+		MV_PHYS_BASE,
+		MV_SIZE,
+		VM_PROT_READ | VM_PROT_WRITE,
+		PTE_NOCACHE,
+	},
+	{ /* AXI integrated peripherals registers range */
+		MV_AXI_BASE,
+		MV_AXI_PHYS_BASE,
+		MV_AXI_SIZE,
+		VM_PROT_READ | VM_PROT_WRITE,
+		PTE_NOCACHE,
+	},
+	{ /* PCIE I/O */
+		MV_PCI_IO_BASE,
+		MV_PCI_IO_PHYS_BASE,
+		MV_PCI_IO_SIZE,
+		VM_PROT_READ | VM_PROT_WRITE,
+		PTE_NOCACHE,
+	},
+	{ /* PCIE Memory */
+		MV_PCI_MEM_BASE,
+		MV_PCI_MEM_PHYS_BASE,
+		MV_PCI_MEM_SIZE,
+		VM_PROT_READ | VM_PROT_WRITE,
+		PTE_NOCACHE,
+	},
+	{ /* CESA SRAM */
+		MV_CESA_SRAM_BASE,
+		MV_CESA_SRAM_PHYS_BASE,
+		MV_CESA_SRAM_SIZE,
+		VM_PROT_READ | VM_PROT_WRITE,
+		PTE_NOCACHE,
+	},
+	{ 0, 0, 0, 0, 0, }
+};
+
+const struct obio_platform_device obio_platform_devices[] = {
+	{ NULL, { 0 }, { 0 } }
+};
+
+const struct gpio_config mv_gpio_config[] = {
+	{ -1, -1, -1 }
+};
+
+const struct sdio_signal_config mv_sdio_signal_config[] = {
+	{ -1, -1 }
+};
+
+/* PCA9555 platform pin configuration. */
+const struct pca9555_config pca9555_platform_config[] = {
+	{ -1 }
+};
+
+void
+platform_mpp_init(void)
+{
+
+}
+
+static void
+platform_identify(void *dummy)
+{
+
+	soc_identify();
+
+	/*
+	 * XXX Board identification e.g. read out from FPGA or similar should
+	 * go here
+	 */
+}
+SYSINIT(platform_identify, SI_SUB_CPU, SI_ORDER_SECOND, platform_identify, NULL);
diff --git a/sys/arm/mv/dove/dove.c b/sys/arm/mv/dove/dove.c
new file mode 100644
index 0000000..a273b65
--- /dev/null
+++ b/sys/arm/mv/dove/dove.c
@@ -0,0 +1,178 @@
+/*-
+ * Copyright (C) 2008 MARVELL INTERNATIONAL LTD.
+ * All rights reserved.
+ *
+ * Developed by Semihalf.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of MARVELL nor the names of contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/arm/mv/kirkwood/kirkwood.c,v 1.7 2009/06/25 10:03:51 raj Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+
+#include <machine/bus.h>
+
+#include <arm/mv/mvreg.h>
+#include <arm/mv/mvvar.h>
+#include <arm/mv/mvwin.h>
+
+struct obio_device obio_devices[] = {
+	{ "ic", MV_IC_BASE, MV_IC_SIZE,
+		{ -1 },
+		{ -1 },
+		CPU_PM_CTRL_NONE
+	},
+	{ "timer", MV_TIMERS_BASE, MV_TIMERS_SIZE,
+		{ MV_INT_BRIDGE, -1 },
+		{ -1 },
+		CPU_PM_CTRL_NONE
+	},
+	{ "rtc", MV_RTC_BASE, MV_RTC_SIZE,
+		{ -1 },
+		{ -1 },
+		CPU_PM_CTRL_NONE
+	},
+#if 0
+	{ "gpio", MV_GPIO_BASE, MV_GPIO_SIZE,
+		{ MV_INT_GPIO7_0, MV_INT_GPIO15_8,
+		  MV_INT_GPIO23_16, MV_INT_GPIO31_24, 
+		  MV_INT_GPIOHI7_0, MV_INT_GPIOHI15_8,
+		  MV_INT_GPIOHI23_16, -1 },
+		{ -1 },
+		CPU_PM_CTRL_NONE
+	},
+#endif
+	{ "uart", MV_UART0_BASE, MV_UART_SIZE,
+		{ MV_INT_UART0, -1 },
+		{ -1 },
+		CPU_PM_CTRL_NONE
+	},
+	{ "uart", MV_UART1_BASE, MV_UART_SIZE,
+		{ MV_INT_UART1, -1 },
+		{ -1 },
+		CPU_PM_CTRL_NONE
+	},
+	{ "ehci", MV_USB0_BASE, MV_USB_SIZE,
+		{ MV_INT_USB_BERR, MV_INT_USB0, -1 },
+		{ -1 },
+		/* TODO: CPU_PM_CTRL_USB0 */ CPU_PM_CTRL_NONE
+	},
+	{ "mge", MV_ETH0_BASE, MV_ETH_SIZE,
+		{ MV_INT_GBERX, MV_INT_GBETX, MV_INT_GBEMISC,
+		  MV_INT_GBESUM, MV_INT_GBEERR, -1 },
+		{ -1 },
+		/* CPU_PM_CTRL_GE0 */ CPU_PM_CTRL_NONE
+	},
+	{ "sata", MV_SATAHC_BASE, MV_SATAHC_SIZE,
+		{ MV_INT_SATA, -1 },
+		{ -1 },
+		/* CPU_PM_CTRL_SATA0 | CPU_PM_CTRL_SATA1 */ CPU_PM_CTRL_NONE
+	},
+
+	{ NULL, 0, 0, { 0 }, { 0 }, 0 }
+};
+
+const struct obio_pci mv_pci_info[] = {
+	{ MV_TYPE_PCIE,
+		MV_PCIE_BASE, MV_PCIE_SIZE,
+		MV_PCI_IO_SLICE(0), MV_PCI_IO_SLICE_SIZE,	4, 0xE0,
+		MV_PCI_MEM_SLICE(0), MV_PCI_MEM_SLICE_SIZE,	4, 0xE8,
+		NULL, MV_INT_PEX0
+	},
+
+	{ 0, 0, 0 }
+};
+
+struct resource_spec mv_gpio_res[] = {
+	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		0,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		1,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		2,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		3,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		4,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		5,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		6,	RF_ACTIVE },
+	{ -1, 0 }
+};
+
+struct resource_spec mv_xor_res[] = {
+	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		0,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		1,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		2,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		3,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		4,	RF_ACTIVE },
+	{ SYS_RES_IRQ,		5,	RF_ACTIVE },
+	{ -1, 0 }
+};
+
+const struct decode_win cpu_win_tbl[] = {
+	/* Device bus BOOT */
+	{ 1, 0x0f, MV_DEV_BOOT_PHYS_BASE, MV_DEV_BOOT_SIZE, -1 },
+
+	/* Device bus CS0 */
+	{ 1, 0x1e, MV_DEV_CS0_PHYS_BASE, MV_DEV_CS0_SIZE, -1 },
+
+	/* Device bus CS1 */
+	{ 1, 0x1d, MV_DEV_CS1_PHYS_BASE, MV_DEV_CS1_SIZE, -1 },
+
+	/* Device bus CS2 */
+	{ 1, 0x1b, MV_DEV_CS2_PHYS_BASE, MV_DEV_CS2_SIZE, -1 },
+
+	/* CESA */
+	{ 3, 0x01, MV_CESA_SRAM_PHYS_BASE, MV_CESA_SRAM_SIZE, -1 },
+
+};
+const struct decode_win *cpu_wins = cpu_win_tbl;
+int cpu_wins_no = sizeof(cpu_win_tbl) / sizeof(struct decode_win);
+
+const struct decode_win xor_win_tbl[] = {
+	/* PCIE MEM */
+	{ 4, 0xE8, MV_PCI_MEM_SLICE(0), MV_PCI_MEM_SLICE_SIZE, -1 },
+};
+const struct decode_win *xor_wins = xor_win_tbl;
+int xor_wins_no = sizeof(xor_win_tbl) / sizeof(struct decode_win);
+
+uint32_t
+get_tclk(void)
+{
+	uint32_t dev, rev;
+
+	/*
+	 * On Kirkwood TCLK is not configurable and depends on silicon
+	 * revision:
+	 * - A0 has TCLK hardcoded to 200 MHz.
+	 * - Z0 and others have TCLK hardcoded to 166 MHz.
+	 */
+	soc_id(&dev, &rev);
+	if (dev == MV_DEV_88F6281 && rev == 2)
+		return (TCLK_200MHZ);
+
+	return (TCLK_166MHZ);
+}
diff --git a/sys/arm/mv/files.mv b/sys/arm/mv/files.mv
index 3d9839f..0b38190 100644
--- a/sys/arm/mv/files.mv
+++ b/sys/arm/mv/files.mv
@@ -14,8 +14,11 @@
 #
 arm/arm/bus_space_generic.c	standard
 arm/arm/cpufunc_asm_arm10.S	standard
+arm/arm/cpufunc_asm_arm11.S	standard
+arm/arm/cpufunc_asm_armv5.S	standard
 arm/arm/cpufunc_asm_armv5_ec.S	standard
 arm/arm/cpufunc_asm_sheeva.S	standard
+arm/arm/cpufunc_asm_sheeva2.S	standard
 arm/arm/irq_dispatch.S		standard
 
 arm/mv/bus_space.c		standard
diff --git a/sys/arm/mv/ic.c b/sys/arm/mv/ic.c
index 6277db3..6290a8f 100644
--- a/sys/arm/mv/ic.c
+++ b/sys/arm/mv/ic.c
@@ -98,8 +98,8 @@ mv_ic_attach(device_t dev)
 	sc->ic_high_regs = 0;
 	sc->ic_error_regs = 0;
 
-	if (dev_id == MV_DEV_88F6281 || dev_id == MV_DEV_MV78100 ||
-	    dev_id == MV_DEV_MV78100_Z0)
+	if (dev_id == MV_DEV_88F6281 || dev_id == MV_DEV_88F6781 ||
+	    dev_id == MV_DEV_MV78100 || dev_id == MV_DEV_MV78100_Z0)
 		sc->ic_high_regs = 1;
 
 	if (dev_id == MV_DEV_MV78100 || dev_id == MV_DEV_MV78100_Z0)
diff --git a/sys/arm/mv/mvreg.h b/sys/arm/mv/mvreg.h
index 74bcba6..431731d 100644
--- a/sys/arm/mv/mvreg.h
+++ b/sys/arm/mv/mvreg.h
@@ -158,6 +159,66 @@
 #define MV_INT_XOR_ERR		74	/* XOR engine error */
 #define MV_INT_WD		79	/* WD Timer interrupt */
 
+#elif defined(SOC_MV_DOVE)
+
+#define MV_INT_BRIDGE		0	/* Downstream Bridge Interrupt */
+#define MV_INT_H2C_DOORBELL	1	/* Host2CPU Doorbell Interrupt */
+#define MV_INT_C2H_DOORBELL	2	/* CPU2Host Doorbell Interrupt */
+#define MV_INT_NAND_FLASH	3	/* NandFlash Interrupt */
+#define MV_INT_PDMA		4	/* Peripheral DMA Interrupt */
+#define MV_INT_SPI1		5	/* SPI1 Ready Interrupt */
+#define MV_INT_SPI0		6	/* SPI0 Ready Interrupt */
+#define MV_INT_UART0		7	/* UART0 Interrupt */
+#define MV_INT_UART1		8	/* UART1 Interrupt */
+#define MV_INT_UART2		9	/* UART2 Interrupt */
+#define MV_INT_UART3		10	/* UART3 Interrupt */
+#define MV_INT_TWSI		11	/* TWSI Interrupt */
+#define MV_INT_GPIO7_0		12	/* GPIO[7:0] Interrupt */
+#define MV_INT_GPIO15_8		13	/* GPIO[15:8] Interrupt */
+#define MV_INT_GPIO23_16	14	/* GPIO[23:16] Interrupt */
+#define MV_INT_PEX0_ERR		15	/* PCI Express 0 Error */
+#define MV_INT_PEX0		16	/* PCI Express 0 Interrupt */
+#define MV_INT_PEX1_ERR		17	/* PCI Express 1 Error */
+#define MV_INT_PEX1		18	/* PCI Express 1 Interrupt */
+#define MV_INT_AUDIO0		19	/* Audio 0 Interrupt */
+#define MV_INT_AUDIO0_ERR	20	/* Audio 0 Error */
+#define MV_INT_AUDIO1		21	/* Audio 1 Interrupt */
+#define MV_INT_AUDIO1_ERR	22	/* Audio 1 Error */
+#define MV_INT_USB_BERR		23	/* USB Bridge Error */
+#define MV_INT_USB0		24	/* USB 0 Interrupt */
+#define MV_INT_USB1		25	/* USB 1 Interrupt */
+#define MV_INT_GBERX		26	/* GbE Receive Interrupt */
+#define MV_INT_GBETX		27	/* GbE Transmit Interrupt */
+#define MV_INT_GBEMISC		28	/* GbE Miscellaneous Interrupt */
+#define MV_INT_GBESUM		29	/* GbE Summary */
+#define MV_INT_GBEERR		30	/* GbE Error */
+#define MV_INT_CESA		31	/* Security Interrupt */
+#define MV_INT_AC97		32	/* AC97 Interrupt */
+#define MV_INT_PMU		33	/* Power Management Unit Interrupt */
+#define MV_INT_CAM		34	/* Cafe Camera Interrupt */
+#define MV_INT_SD0		35	/* SD0 IRQ Interrupt */
+#define MV_INT_SD1		36	/* SD1 IRQ Interrupt */
+#define MV_INT_XOR0_DMA0	39	/* XOR Unit 0 DMA 0 Completion */
+#define MV_INT_XOR0_DMA1	40	/* XOR Unit 0 DMA 1 Completion */
+#define MV_INT_XOR0_ERR		41	/* XOR Unit 0 Error Interrupt */
+#define MV_INT_XOR1_DMA0	42	/* XOR Unit 1 DMA 0 Completion */
+#define MV_INT_XOR1_DMA1	43	/* XOR Unit 1 DMA 1 Completion */
+#define MV_INT_XOR1_ERR		44	/* XOR Unit 1 Error Interrupt */
+#define MV_INT_IRE_DCON		45	/* IRE OR DCON Interrupt */
+#define MV_INT_LCD1		46	/* LCD1 Interrupt */
+#define MV_INT_LCD0		47	/* LCD0 Interrupt */
+#define MV_INT_GPU		48	/* Graphics Processing Unit Interrupt */
+#define MV_INT_VMETA		51	/* Video Decode Unit Semaphore Int. */
+#define MV_INT_SSP_TIMER	54	/* SSP Timer Interrupt */
+#define MV_INT_SSP		55	/* SSP Interrupt */
+#define MV_INT_MEM_ERR		56	/* Memory Controller or L2 ECC Error */
+#define MV_INT_DOWNSTR_ERR	57	/* Downstream Bus Error */
+#define MV_INT_UPSTR_ERR	58	/* Upstream Bus Error */
+#define MV_INT_CESA_ERR		59	/* Security Error */
+#define MV_INT_GPIO31_24	60	/* GPIO[31:24] Interrupt */
+#define MV_INT_HIGH_GPIO	61	/* Interrupt from High GPIO[31:0] */
+#define MV_INT_SATA		62	/* SATA Interrupt */
+
 #endif /* SOC_MV_ORION */
 
 #define BRIDGE_IRQ_CAUSE	0x10
@@ -390,6 +570,11 @@
 #define MPP_CONTROL4		0x10
 #define MPP_CONTROL5		0x14
 #define MPP_CONTROL6		0x18
+#elif defined(SOC_MV_DOVE)
+#define MPP_CONTROL0		0x00
+#define MPP_CONTROL1		0x04
+#define MPP_CONTROL2		0x08
+#define MPP_CONTROL4		0x40	/* MPP_CONTROL3 does not exist */
 #else
 #error SOC_MV_XX not defined
 #endif
@@ -401,6 +586,9 @@
 #elif defined(SOC_MV_DISCOVERY)
 #define SAMPLE_AT_RESET_LO	0x30
 #define SAMPLE_AT_RESET_HI	0x34
+#elif defined(SOC_MV_DOVE)
+#define SAMPLE_AT_RESET_LO	0x14
+#define SAMPLE_AT_RESET_HI	0x18
 #else
 #error SOC_MV_XX not defined
 #endif
@@ -424,12 +612,26 @@
 #define TCLK_200MHZ		200000000
 
 /*
+ * CPU Cache Configuration
+ */
+
+#define CPU_CONFIG		0x00000000
+#define CPU_CONFIG_IC_PREF	0x00010000
+#define CPU_CONFIG_DC_PREF	0x00020000
+#define CPU_CONTROL		0x00000004
+#define CPU_CONTROL_L2_SIZE	0x00200000	/* Only on Discovery */
+#define CPU_CONTROL_L2_MODE	0x00020000	/* Only on Discovery */
+#define CPU_L2_CONFIG		0x00000028	/* Only on Kirkwood */
+#define CPU_L2_CONFIG_MODE	0x00000010	/* Only on Kirkwood */
+
+/*
  * Chip ID
  */
 #define MV_DEV_88F5181		0x5181
 #define MV_DEV_88F5182		0x5182
 #define MV_DEV_88F5281		0x5281
 #define MV_DEV_88F6281		0x6281
+#define MV_DEV_88F6781		0x6781
 #define MV_DEV_MV78100_Z0	0x6381
 #define MV_DEV_MV78100		0x7810
 
diff --git a/sys/arm/mv/std-sheeva2.mv b/sys/arm/mv/std-sheeva2.mv
new file mode 100644
index 0000000..7fcf7dd
--- /dev/null
+++ b/sys/arm/mv/std-sheeva2.mv
@@ -0,0 +1,7 @@
+# $FreeBSD: src/sys/arm/mv/std.mv,v 1.1 2008/10/13 20:07:13 raj Exp $
+
+files		"../mv/files.mv"
+cpu		CPU_ARM11
+makeoptions	CONF_CFLAGS="-march=armv6"
+
+options		VM_MAXUSER_ADDRESS="(KERNBASE-(1024*1024*1024))"
diff --git a/sys/arm/mv/std.mv b/sys/arm/mv/std.mv
index 5e97ba3..447bce0 100644
--- a/sys/arm/mv/std.mv
+++ b/sys/arm/mv/std.mv
@@ -3,3 +3,5 @@
 files		"../mv/files.mv"
 cpu		CPU_ARM9E
 makeoptions	CONF_CFLAGS="-march=armv5te"
+
+options		VM_MAXUSER_ADDRESS="(KERNBASE-(1024*1024*1024))"
diff --git a/sys/conf/Makefile.arm b/sys/conf/Makefile.arm
index 90ef03f..fe11aa9 100644
--- a/sys/conf/Makefile.arm
+++ b/sys/conf/Makefile.arm
@@ -73,7 +73,7 @@ FILES_CPU_FUNC =	$S/$M/$M/cpufunc_asm_arm7tdmi.S \
 	$S/$M/$M/cpufunc_asm_sa1.S $S/$M/$M/cpufunc_asm_arm10.S \
 	$S/$M/$M/cpufunc_asm_xscale.S $S/$M/$M/cpufunc_asm.S \
 	$S/$M/$M/cpufunc_asm_xscale_c3.S $S/$M/$M/cpufunc_asm_armv5_ec.S \
-	$S/$M/$M/cpufunc_asm_sheeva.S
+	$S/$M/$M/cpufunc_asm_sheeva.S $S/$M/$M/cpufunc_asm_sheeva2.S
 KERNEL_EXTRA=trampoline
 KERNEL_EXTRA_INSTALL=kernel.gz.tramp
 trampoline: ${KERNEL_KO}.tramp
diff --git a/sys/conf/files.arm b/sys/conf/files.arm
index 0c1a77f..8eb0746 100644
--- a/sys/conf/files.arm
+++ b/sys/conf/files.arm
@@ -7,7 +7,7 @@ arm/arm/bcopyinout.S		standard
 arm/arm/blockio.S		standard
 arm/arm/bootconfig.c		standard
 arm/arm/bus_space_asm_generic.S	standard
-arm/arm/busdma_machdep.c 	standard
+arm/arm/busdma_machdep-v6.c 	standard
 arm/arm/copystr.S		standard
 arm/arm/cpufunc.c		standard
 arm/arm/cpufunc_asm.S		standard
@@ -32,7 +32,7 @@ arm/arm/machdep.c		standard
 arm/arm/mem.c			optional	mem
 arm/arm/minidump_machdep.c	optional	mem
 arm/arm/nexus.c			standard
-arm/arm/pmap.c			standard
+arm/arm/pmap-v6.c		standard
 arm/arm/setcpsr.S		standard
 arm/arm/setstack.s		standard
 arm/arm/stack_machdep.c		optional	ddb | stack
diff --git a/sys/conf/options.arm b/sys/conf/options.arm
index b384a36..d18eb49 100644
--- a/sys/conf/options.arm
+++ b/sys/conf/options.arm
@@ -11,6 +11,7 @@ CPU_SA1100		opt_global.h
 CPU_SA1110		opt_global.h
 CPU_ARM9		opt_global.h
 CPU_ARM9E		opt_global.h
+CPU_ARM11		opt_global.h
 CPU_XSCALE_80219	opt_global.h
 CPU_XSCALE_80321	opt_global.h
 CPU_XSCALE_81342	opt_global.h
@@ -24,14 +25,17 @@ KERNVIRTADDR		opt_global.h
 LOADERRAMADDR		opt_global.h
 PHYSADDR		opt_global.h
 PHYSMEM_SIZE		opt_global.h
+MII_ADDR_BASE		opt_global.h
 SKYEYE_WORKAROUNDS	opt_global.h
 SOC_MV_DISCOVERY	opt_global.h
+SOC_MV_DOVE		opt_global.h
 SOC_MV_KIRKWOOD		opt_global.h
 SOC_MV_ORION		opt_global.h
 STARTUP_PAGETABLE_ADDR	opt_global.h
 XSCALE_CACHE_READ_WRITE_ALLOCATE	opt_global.h
 XSACLE_DISABLE_CCNT	opt_timer.h
 VERBOSE_INIT_ARM	opt_global.h
+VM_MAXUSER_ADDRESS	opt_global.h
 AT91_BWCT		opt_at91.h
 AT91_TSC		opt_at91.h
 AT91_KWIKBYTE		opt_at91.h