diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
index 76237fb..0d0ef86 100644
--- a/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
+++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
@@ -91,7 +91,6 @@ __FBSDID("$FreeBSD$");
 #include <ulp/tom/cxgb_t3_ddp.h>
 #include <ulp/tom/cxgb_toepcb.h>
 #include <ulp/tom/cxgb_tcp.h>
-#include <ulp/tom/cxgb_vm.h>
 
 
 static int	(*pru_sosend)(struct socket *so, struct sockaddr *addr,
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_ddp.c b/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
index a54598c..1c3953d 100644
--- a/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
+++ b/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
@@ -90,7 +90,6 @@ __FBSDID("$FreeBSD$");
 #include <ulp/tom/cxgb_t3_ddp.h>
 #include <ulp/tom/cxgb_toepcb.h>
 #include <ulp/tom/cxgb_tcp.h>
-#include <ulp/tom/cxgb_vm.h>
 
 
 #define MAX_SCHEDULE_TIMEOUT	300
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_vm.c b/sys/dev/cxgb/ulp/tom/cxgb_vm.c
deleted file mode 100644
index e7a3893..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_vm.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/types.h>
-#include <sys/fcntl.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mbuf.h>
-#include <sys/condvar.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/syslog.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_extern.h>
-#include <vm/pmap.h>
-#include <ulp/tom/cxgb_vm.h>
-
-/*
- * This routine takes a user's map, array of pages, number of pages, and flags
- * and then does the following:
- *  - validate that the user has access to those pages (flags indicates read
- *	or write) - if not fail
- *  - validate that count is enough to hold range number of pages - if not fail
- *  - fault in any non-resident pages
- *  - if the user is doing a read force a write fault for any COWed pages
- *  - if the user is doing a read mark all pages as dirty
- *  - hold all pages
- */
-int
-vm_fault_hold_user_pages(vm_map_t map, vm_offset_t addr, vm_page_t *mp,
-    int count, vm_prot_t prot)
-{
-	vm_offset_t end, va;
-	int faults, rv;
-	pmap_t pmap;
-	vm_page_t m, *pages;
-	
-	pmap = vm_map_pmap(map);
-	pages = mp;
-	addr &= ~PAGE_MASK;
-	/*
-	 * Check that virtual address range is legal
-	 * This check is somewhat bogus as on some architectures kernel
-	 * and user do not share VA - however, it appears that all FreeBSD
-	 * architectures define it
-	 */
-	end = addr + (count * PAGE_SIZE);
-	if (end > VM_MAXUSER_ADDRESS) {
-		log(LOG_WARNING, "bad address passed to vm_fault_hold_user_pages");
-		return (EFAULT);
-	}
-
-	/*
-	 * First optimistically assume that all pages are resident 
-	 * (and R/W if for write) if so just mark pages as held (and 
-	 * dirty if for write) and return
-	 */
-	vm_page_lock_queues();
-	for (pages = mp, faults = 0, va = addr; va < end;
-	     va += PAGE_SIZE, pages++) {
-		/*
-		 * page queue mutex is recursable so this is OK
-		 * it would be really nice if we had an unlocked
-		 * version of this so we were only acquiring the 
-		 * pmap lock 1 time as opposed to potentially
-		 * many dozens of times
-		 */
-		*pages = m = pmap_extract_and_hold(pmap, va, prot);
-		if (m == NULL) {
-			faults++;
-			continue;
-		}
-		/*
-		 * Preemptively mark dirty - the pages
-		 * will never have the modified bit set if
-		 * they are only changed via DMA
-		 */
-		if (prot & VM_PROT_WRITE)
-			vm_page_dirty(m);
-		
-	}
-	vm_page_unlock_queues();
-	
-	if (faults == 0)
-		return (0);
-	
-	/*
-	 * Pages either have insufficient permissions or are not present
-	 * trigger a fault where neccessary
-	 * 
-	 */
-	rv = 0;
-	for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) {
-		/*
-		 * Account for a very narrow race where the page may be
-		 * taken away from us before it is held
-		 */
-		while (*pages == NULL) {
-			rv = vm_fault(map, va, prot,
-			    (prot & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL);
-			if (rv) 
-				goto error;
-			*pages = pmap_extract_and_hold(pmap, va, prot);
-		}
-	}
-	return (0);
-error:	
-	log(LOG_WARNING,
-	    "vm_fault bad return rv=%d va=0x%zx\n", rv, va);
-	vm_page_lock_queues();
-	for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++)
-		if (*pages) {
-			vm_page_unhold(*pages);
-			*pages = NULL;
-		}
-	vm_page_unlock_queues();
-	return (EFAULT);
-}
-
-void
-vm_fault_unhold_pages(vm_page_t *mp, int count)
-{
-
-	KASSERT(count >= 0, ("negative count %d", count));
-	vm_page_lock_queues();
-	while (count--) {
-		vm_page_unhold(*mp);
-		mp++;
-	}
-	vm_page_unlock_queues();
-}
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_vm.h b/sys/dev/cxgb/ulp/tom/cxgb_vm.h
deleted file mode 100644
index 7532e20..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_vm.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-
-$FreeBSD$
-
-***************************************************************************/
-#ifndef CXGB_VM_H_
-#define CXGB_VM_H_
-
-int vm_fault_hold_user_pages(vm_map_t map, vm_offset_t addr,
-    vm_page_t *mp, int count, vm_prot_t prot);
-void vm_fault_unhold_pages(vm_page_t *mp, int count);
-
-#endif
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
index ce1afd2..331712f 100644
--- a/sys/kern/kern_subr.c
+++ b/sys/kern/kern_subr.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
+#include <vm/vm_extern.h>
 #ifdef ZERO_COPY_SOCKETS
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 2f085d9..4b19eb2 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -62,6 +62,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/syslog.h>
 #include <sys/unistd.h>
 
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
 #include <security/mac/mac_framework.h>
 
 static fo_rdwr_t	vn_read;
@@ -363,14 +366,43 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
 	int *aresid;
 	struct thread *td;
 {
-	struct uio auio;
-	struct iovec aiov;
+	struct uio auio, auio_clone;
+	struct iovec aiov, aiov_clone;
 	struct mount *mp;
 	struct ucred *cred;
-	int error;
+	vm_page_t *m_hold;
+	int wired_pages, error;
 
 	VFS_ASSERT_GIANT(vp->v_mount);
 
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = base;
+	aiov.iov_len = len;
+	auio.uio_resid = len;
+	auio.uio_offset = offset;
+	auio.uio_segflg = segflg;
+	auio.uio_rw = rw;
+	auio.uio_td = td;
+	error = 0;
+
+	m_hold = NULL;
+	if (segflg == UIO_USERSPACE) {
+		m_hold = malloc(sizeof(vm_page_t) * (btoc(len) + 1), M_IOV,
+		    M_WAITOK);
+		aiov_clone = aiov;
+		auio_clone = auio;
+		auio_clone.uio_iov = &aiov_clone;
+		error = vm_wireuio(&auio, m_hold,
+		    round_page((vm_offset_t)base + len) -
+		    trunc_page((vm_offset_t)base),
+		    &wired_pages);
+		if (error) {
+			free(m_hold, M_IOV);
+			return (error);
+		}
+	}
+
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		mp = NULL;
 		if (rw == UIO_WRITE) { 
@@ -384,16 +416,6 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
 
 	}
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
-	auio.uio_iov = &aiov;
-	auio.uio_iovcnt = 1;
-	aiov.iov_base = base;
-	aiov.iov_len = len;
-	auio.uio_resid = len;
-	auio.uio_offset = offset;
-	auio.uio_segflg = segflg;
-	auio.uio_rw = rw;
-	auio.uio_td = td;
-	error = 0;
 #ifdef MAC
 	if ((ioflg & IO_NOMACCHECK) == 0) {
 		if (rw == UIO_READ)
@@ -424,6 +446,10 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
 			vn_finished_write(mp);
 		VOP_UNLOCK(vp, 0);
 	}
+	if (segflg == UIO_USERSPACE) {
+		vm_unwireuio(&auio_clone, m_hold, wired_pages);
+		free(m_hold, M_IOV);
+	}
 	return (error);
 }
 
@@ -485,31 +511,65 @@ vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
 	return (error);
 }
 
-/*
- * File table vnode read routine.
- */
+static int uio_hold_pages = 12;
+
 static int
-vn_read(fp, uio, active_cred, flags, td)
-	struct file *fp;
-	struct uio *uio;
-	struct ucred *active_cred;
-	struct thread *td;
-	int flags;
+do_vn_rw_chunked(struct file *fp, struct uio *uio, struct ucred *active_cred,
+    int flags, int ioflag, struct thread *td,
+    int (*vn_chunk_func)(struct file *, struct uio *, struct ucred *,
+	int, int, struct thread *))
+{
+	struct uio *uio_clone;
+	vm_page_t *m_hold;
+	int error, wire_bytes, io_chunk, total_cnt, cnt;
+	int first_chunk, wired_pages;
+
+	if (uio->uio_segflg != UIO_USERSPACE || fp->f_vnode->v_type != VREG)
+		return (vn_chunk_func(fp, uio, active_cred, flags, ioflag, td));
+
+	first_chunk = 1;
+	m_hold = malloc(sizeof(vm_page_t) * uio_hold_pages, M_IOV, M_WAITOK);
+	while (uio->uio_resid > 0) {
+		io_chunk = min(uio_hold_pages * PAGE_SIZE, uio->uio_resid);  /* XXXKIB */
+		wire_bytes = round_page(io_chunk);
+		error = vm_wireuio(uio, m_hold, wire_bytes, &wired_pages);
+		if (error != 0) {
+			if (!first_chunk)
+				error = 0;
+			break;
+		}
+		uio_clone = cloneuio(uio);
+		total_cnt = uio->uio_resid;
+		uio->uio_resid = io_chunk;
+		error = vn_chunk_func(fp, uio, active_cred, flags, ioflag, td);
+		vm_unwireuio(uio_clone, m_hold, wired_pages);
+		free(uio_clone, M_IOV);
+		cnt = io_chunk - uio->uio_resid;
+		uio->uio_resid = total_cnt - cnt;
+		if (error != 0) {
+			if (!first_chunk)
+				error = 0;
+			break;
+		}
+		if (cnt == 0)
+			break;
+		first_chunk = 0;
+	}
+	free(m_hold, M_IOV);
+	return (error);
+}
+
+static inline int
+vn_read_wired_chunk(struct file *fp, struct uio *uio, struct ucred *active_cred,
+    int flags, int ioflag, struct thread *td)
 {
 	struct vnode *vp;
-	int error, ioflag;
 	struct mtx *mtxp;
-	int vfslocked;
+	int error, vfslocked;
 
-	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
-	    uio->uio_td, td));
 	mtxp = NULL;
 	vp = fp->f_vnode;
-	ioflag = 0;
-	if (fp->f_flag & FNONBLOCK)
-		ioflag |= IO_NDELAY;
-	if (fp->f_flag & O_DIRECT)
-		ioflag |= IO_DIRECT;
+
 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	/*
 	 * According to McKusick the vn lock was protecting f_offset here.
@@ -552,38 +612,38 @@ vn_read(fp, uio, active_cred, flags, td)
 }
 
 /*
- * File table vnode write routine.
+ * File table vnode read routine.
  */
 static int
-vn_write(fp, uio, active_cred, flags, td)
-	struct file *fp;
-	struct uio *uio;
-	struct ucred *active_cred;
-	struct thread *td;
-	int flags;
+vn_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
+    struct thread *td)
 {
-	struct vnode *vp;
-	struct mount *mp;
-	int error, ioflag;
-	int vfslocked;
+	int ioflag;
 
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
 	    uio->uio_td, td));
-	vp = fp->f_vnode;
-	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
-	if (vp->v_type == VREG)
-		bwillwrite();
-	ioflag = IO_UNIT;
-	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
-		ioflag |= IO_APPEND;
+	ioflag = 0;
 	if (fp->f_flag & FNONBLOCK)
 		ioflag |= IO_NDELAY;
 	if (fp->f_flag & O_DIRECT)
 		ioflag |= IO_DIRECT;
-	if ((fp->f_flag & O_FSYNC) ||
-	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
-		ioflag |= IO_SYNC;
+	return (do_vn_rw_chunked(fp, uio, active_cred, flags, ioflag, td,
+	    vn_read_wired_chunk));
+}
+
+static inline int
+vn_write_wired_chunk(struct file *fp, struct uio *uio,
+    struct ucred *active_cred, int flags, int ioflag, struct thread *td)
+{
+	struct mount *mp;
+	struct vnode *vp;
+	int error, vfslocked;
+
 	mp = NULL;
+	vp = fp->f_vnode;
+	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	if (vp->v_type == VREG)
+		bwillwrite();
 	if (vp->v_type != VCHR &&
 	    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		goto unlock;
@@ -608,6 +668,33 @@ unlock:
 }
 
 /*
+ * File table vnode write routine.
+ */
+static int
+vn_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
+    struct thread *td)
+{
+	struct vnode *vp;
+	int ioflag;
+
+	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
+	    uio->uio_td, td));
+	vp = fp->f_vnode;
+	ioflag = IO_UNIT;
+	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+		ioflag |= IO_APPEND;
+	if (fp->f_flag & FNONBLOCK)
+		ioflag |= IO_NDELAY;
+	if (fp->f_flag & O_DIRECT)
+		ioflag |= IO_DIRECT;
+	if ((fp->f_flag & O_FSYNC) ||
+	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
+		ioflag |= IO_SYNC;
+	return (do_vn_rw_chunked(fp, uio, active_cred, flags, ioflag, td,
+	    vn_write_wired_chunk));
+}
+
+/*
  * File table truncate routine.
  */
 static int
diff --git a/sys/modules/cxgb/tom/Makefile b/sys/modules/cxgb/tom/Makefile
index 2b8750a..d2566fd 100644
--- a/sys/modules/cxgb/tom/Makefile
+++ b/sys/modules/cxgb/tom/Makefile
@@ -5,7 +5,7 @@ CXGB = ${.CURDIR}/../../../dev/cxgb
 
 KMOD=	tom
 SRCS=   cxgb_tom.c cxgb_cpl_io.c cxgb_listen.c cxgb_tom_sysctl.c cxgb_cpl_socket.c
-SRCS+=  cxgb_ddp.c cxgb_vm.c cxgb_l2t.c cxgb_tcp_offload.c
+SRCS+=  cxgb_ddp.c cxgb_l2t.c cxgb_tcp_offload.c
 SRCS+=	opt_compat.h opt_inet.h opt_inet6.h opt_ipsec.h opt_mac.h 
 SRCS+=	opt_tcpdebug.h opt_ddb.h opt_sched.h opt_global.h opt_ktr.h
 SRCS+=	device_if.h bus_if.h pci_if.h
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index 475a20e..f2b4d16 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -34,11 +34,13 @@
 #define	_VM_EXTERN_H_
 
 struct buf;
+struct iovec;
 struct proc;
 struct vmspace;
 struct vmtotal;
 struct mount;
 struct vnode;
+struct uio;
 
 #ifdef _KERNEL
 
@@ -56,6 +58,9 @@ void swapout_procs(int);
 int useracc(void *, int, int);
 int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
 void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t);
+int vm_fault_hold_user_pages(vm_map_t map, vm_offset_t addr,
+    vm_page_t *mp, int count, vm_prot_t prot);
+void vm_fault_unhold_pages(vm_page_t *mp, int count);
 void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
 int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
 int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int);
@@ -84,5 +89,9 @@ int vm_thread_new(struct thread *td, int pages);
 int vm_thread_new_altkstack(struct thread *td, int pages);
 void vm_thread_swapin(struct thread *td);
 void vm_thread_swapout(struct thread *td);
+int vm_wireuio(struct uio *uiop, struct vm_page *m_hold[], int wire_bytes,
+    int *wired_pages);
+void vm_unwireuio(struct uio *, struct vm_page *m_hold[], int wired_pages);
+
 #endif				/* _KERNEL */
 #endif				/* !_VM_EXTERN_H_ */
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 48ed991..1b0dbcf 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -1,4 +1,31 @@
+
 /*-
+
+Copyright (c) 2007-2008, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 1994 John S. Dyson
@@ -1362,3 +1389,115 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
 	/* return number of pages */
 	return i;
 }
+
+/*
+ * This routine takes a user's map, array of pages, number of pages, and flags
+ * and then does the following:
+ *  - validate that the user has access to those pages (flags indicates read
+ *	or write) - if not fail
+ *  - validate that count is enough to hold range number of pages - if not fail
+ *  - fault in any non-resident pages
+ *  - if the user is doing a read force a write fault for any COWed pages
+ *  - if the user is doing a read mark all pages as dirty
+ *  - hold all pages
+ */
+int
+vm_fault_hold_user_pages(vm_map_t map, vm_offset_t addr, vm_page_t *mp,
+    int count, vm_prot_t prot)
+{
+	vm_offset_t end, va;
+	int faults, rv;
+	pmap_t pmap;
+	vm_page_t m, *pages;
+
+	pmap = vm_map_pmap(map);
+	pages = mp;
+	addr &= ~PAGE_MASK;
+
+	/*
+	 * Check that virtual address range is legal.
+	 * This check is somewhat bogus as on some architectures kernel
+	 * and user do not share VA - however, it appears that all FreeBSD
+	 * architectures define it
+	 */
+	end = addr + (count * PAGE_SIZE);
+	if (end > VM_MAXUSER_ADDRESS)
+		return (EFAULT);
+
+	/*
+	 * First optimistically assume that all pages are resident
+	 * (and R/W if for write) if so just mark pages as held (and
+	 * dirty if for write) and return.
+	 */
+	vm_page_lock_queues();
+	for (pages = mp, faults = 0, va = addr; va < end;
+	     va += PAGE_SIZE, pages++) {
+		/*
+		 * Page queue mutex is recursable so this is OK.
+		 * It would be really nice if we had an unlocked
+		 * version of this so we were only acquiring the
+		 * pmap lock 1 time as opposed to potentially
+		 * many dozens of times.
+		 */
+		*pages = m = pmap_extract_and_hold(pmap, va, prot);
+		if (m == NULL) {
+			faults++;
+			continue;
+		}
+
+		/*
+		 * Preemptively mark dirty - the pages will never have
+		 * the modified bit set if they are only changed via
+		 * DMA.
+		 */
+		if (prot & VM_PROT_WRITE)
+			vm_page_dirty(m);
+	}
+	vm_page_unlock_queues();
+
+	if (faults == 0)
+		return (0);
+
+	/*
+	 * Pages either have insufficient permissions or are not present
+	 * trigger a fault where neccessary.
+	 */
+	rv = 0;
+	for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) {
+		/*
+		 * Account for a very narrow race where the page may be
+		 * taken away from us before it is held.
+		 */
+		while (*pages == NULL) {
+			rv = vm_fault(map, va, prot, (prot & VM_PROT_WRITE) ?
+			    VM_FAULT_DIRTY : VM_FAULT_NORMAL);
+			if (rv)
+				goto error;
+			*pages = pmap_extract_and_hold(pmap, va, prot);
+		}
+	}
+	return (0);
+
+error:
+	vm_page_lock_queues();
+	for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++)
+		if (*pages) {
+			vm_page_unhold(*pages);
+			*pages = NULL;
+		}
+	vm_page_unlock_queues();
+	return (EFAULT);
+}
+
+void
+vm_fault_unhold_pages(vm_page_t *mp, int count)
+{
+
+	KASSERT(count >= 0, ("negative count %d", count));
+	vm_page_lock_queues();
+	while (count--) {
+		vm_page_unhold(*mp);
+		mp++;
+	}
+	vm_page_unlock_queues();
+}
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 2c5821c..ca73db3 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1374,6 +1374,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 	new_entry->protection = prot;
 	new_entry->max_protection = max;
 	new_entry->wired_count = 0;
+	new_entry->pin_count = 0;
 
 	/*
 	 * Insert the new entry into the list
@@ -2796,7 +2797,8 @@ reclip_start:
 		 */
 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 ||
 		    (vm_map_pmap(map) != kernel_pmap &&
-		    vm_map_entry_system_wired_count(entry) != 0)) {
+		    vm_map_entry_system_wired_count(entry) != 0) ||
+		    (entry->pin_count != 0)) {
 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 			last_timestamp = map->timestamp;
 
@@ -2816,6 +2818,7 @@ reclip_start:
 				holder_entry->max_protection = VM_PROT_NONE;
 				holder_entry->inheritance = VM_INHERIT_NONE;
 				holder_entry->wired_count = 0;
+				holder_entry->pin_count = 0;
 				vm_map_entry_link(map, entry->prev, holder_entry);
 			}
 			(void) vm_map_unlock_and_wait(map, 0);
@@ -3865,6 +3868,166 @@ vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
 	vm_map_unlock_read(map);
 }
 
+static boolean_t
+vm_map_unpin_entries(vm_map_t map, struct uio *uiop, int pinned_entries)
+{
+	vm_offset_t start;
+	struct iovec *iov;
+	vm_map_entry_t entry;
+	int i, acc, wire;
+	boolean_t need_wakeup;
+
+	iov = uiop->uio_iov;
+	need_wakeup = FALSE;
+
+	for (i = 0, acc = 0; acc < pinned_entries; iov++, i++) {
+		KASSERT(i < uiop->uio_iovcnt, ("wireio: iovcnt overflow %d %d %d",
+			i, uiop->uio_iovcnt, pinned_entries));
+		wire = round_page(iov->iov_len);
+		if (acc + wire > pinned_entries)
+			wire = pinned_entries - acc;
+		start = trunc_page((vm_offset_t)iov->iov_base);
+		for (;;) {
+			if (!vm_map_lookup_entry(map, start, &entry)) {
+#ifdef INVARIANTS
+				panic("vm_unwireuio: hole");
+#endif
+			}
+			KASSERT(entry->pin_count > 0, ("pin_count %p", entry));
+			if (--entry->pin_count == 0 &&
+			    (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP)) {
+				entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
+				need_wakeup = TRUE;
+			}
+			if (entry->end >= start + wire) {
+				acc += wire;
+				break;
+			} else {
+				acc += entry->end - start;
+				wire -= entry->end - start;
+				start = entry->end;
+			}
+		}
+	}
+	return (need_wakeup);
+}
+
+/*
+ * vm_wireuio
+ *
+ * Given userspace struct uio, we set up vm state such that after the
+ * successfull return there will be no page faults during uiomove with
+ * this uio until vm_unwireuio is called. At most wire_bytes bytes of
+ * the user address space are held.
+ *
+ * Function performs this by first pinning all map entries that will
+ * be referenced. This guarantees that our ranges of user address
+ * space cannot be remmapped during the operation. Then, all accessed
+ * pages are faulted in and held.
+ */
+int
+vm_wireuio(struct uio *uiop, struct vm_page *m_hold[], int wire_bytes,
+    int *wired_pages)
+{
+	vm_map_t map;
+	vm_offset_t start, start1;
+	struct iovec *iov;
+	vm_map_entry_t entry;
+	struct vm_page **m_hold1;
+	int i, acc, wire, wire_pages, pinned_entries, rv, prot;
+	int error;
+	boolean_t need_wakeup;
+
+	KASSERT(round_page(wire_bytes) == wire_bytes,
+	    ("wireuio: wire_bytes is not page-size aligned"));
+	KASSERT(uiop->uio_segflg == UIO_USERSPACE,
+	    ("wireuio: !UIO_USERSPACE"));
+
+	error = 0;
+	prot = uiop->uio_rw == UIO_READ ? VM_PROT_WRITE : VM_PROT_READ;
+	m_hold1 = m_hold;
+	pinned_entries = 0;
+	*wired_pages = 0;
+	map = &uiop->uio_td->td_proc->p_vmspace->vm_map;
+	iov = uiop->uio_iov;
+
+	/*
+	 * Do the pass over iov.
+	 */
+	for (i = 0, acc = 0; acc < wire_bytes; iov++, i++) {
+		wire = round_page(iov->iov_len);
+		if (acc + wire > wire_bytes)
+			wire = wire_bytes - acc;
+		acc += wire;
+		wire_pages = btoc(wire);
+		start1 = start = trunc_page((vm_offset_t)iov->iov_base);
+		if (start < vm_map_min(map) || start + wire > vm_map_max(map) ||
+		    start > start + wire) {
+			error = EINVAL;
+			goto fault;
+		}
+		vm_map_lock(map);
+
+		/*
+		 * Pin each entry referenced by addresses in iov.
+		 */
+		for (;;) {
+			if (!vm_map_lookup_entry(map, start1, &entry) ||
+			    (entry->eflags & MAP_ENTRY_IS_HOLDER))
+				goto fault;
+			entry->pin_count++;
+			if (entry->end >= start1 + wire) {
+				pinned_entries += wire;
+				break;
+			} else {
+				pinned_entries += entry->end - start1;
+				wire -= entry->end - start1;
+				start1 = entry->end;
+			}
+		}
+		vm_map_unlock(map);
+
+		/*
+		 * If entries are successfully pinned, the
+		 * corresponding pages are faulted in and held.
+		 */
+		rv = vm_fault_hold_user_pages(map, start, m_hold1, wire_pages,
+		    prot);
+		if (rv != KERN_SUCCESS) {
+			error = EFAULT;
+			goto fault;
+		}
+		*wired_pages += wire_pages;
+		m_hold1 += wire_pages;
+	}
+	return (0);
+ fault:
+	vm_fault_unhold_pages(m_hold, *wired_pages);
+	vm_map_lock(map);
+	need_wakeup = vm_map_unpin_entries(map, uiop, pinned_entries);
+	vm_map_unlock(map);
+	if (need_wakeup)
+		vm_map_wakeup(map);
+	return (error);
+}
+
+void
+vm_unwireuio(struct uio *uiop, struct vm_page *m_hold[], int wired_pages)
+{
+	vm_map_t map;
+	boolean_t need_wakeup;
+
+	map = &uiop->uio_td->td_proc->p_vmspace->vm_map;
+
+	vm_fault_unhold_pages(m_hold, wired_pages);
+
+	vm_map_lock(map);
+	need_wakeup = vm_map_unpin_entries(map, uiop, ctob(wired_pages));
+	vm_map_unlock(map);
+	if (need_wakeup)
+		vm_map_wakeup(map);
+}
+
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index f2c4fd3..9310718 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -114,6 +114,7 @@ struct vm_map_entry {
 	vm_inherit_t inheritance;	/* inheritance */
 	int wired_count;		/* can be paged if = 0 */
 	vm_pindex_t lastr;		/* last read */
+	unsigned pin_count;		/* non-exclusive pin count */
 };
 
 #define MAP_ENTRY_NOSYNC		0x0001
@@ -383,5 +384,6 @@ int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
 int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags);
 int vmspace_swap_count (struct vmspace *vmspace);
+
 #endif				/* _KERNEL */
 #endif				/* _VM_MAP_ */
diff --git a/tools/regression/file/uio/uio.c b/tools/regression/file/uio/uio.c
new file mode 100644
index 0000000..d857605
--- /dev/null
+++ b/tools/regression/file/uio/uio.c
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+int chunk_cnt = 1024;
+int chunk_size = 1024;
+
+int
+main(int argc, char *argv[])
+{
+	struct iovec *wiov, *riov;
+	char **wdata, **rdata;
+	int fd, i;
+	ssize_t io_error;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: uio file [chunk count [chunk size]]\n");
+		return (2);
+	}
+	fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
+	if (fd == -1) {
+		fprintf(stderr, "Failed to create %s: %s\n",
+		    argv[1], strerror(errno));
+		return (1);
+	}
+
+	if (argc > 2)
+		chunk_cnt = atoi(argv[2]);
+	if (argc > 3)
+		chunk_size = atoi(argv[3]);
+
+	wiov = calloc(chunk_cnt, sizeof(*wiov));
+	wdata = calloc(chunk_cnt, sizeof(*wdata));
+
+	riov = calloc(chunk_cnt, sizeof(*riov));
+	rdata = calloc(chunk_cnt, sizeof(*rdata));
+
+	for (i = 0; i < chunk_cnt; i++) {
+		rdata[i] = malloc(chunk_size);
+		riov[i].iov_base = rdata[i];
+		riov[i].iov_len = chunk_size;
+
+		wdata[i] = malloc(chunk_size);
+		memset(wdata[i], i, chunk_size);
+		wiov[i].iov_base = wdata[i];
+		wiov[i].iov_len = chunk_size;
+	}
+
+	io_error = writev(fd, wiov, chunk_cnt);
+	if (io_error == -1) {
+		fprintf(stderr, "write failed: %s\n", strerror(errno));
+		return (1);
+	} else if (io_error != chunk_cnt * chunk_size) {
+		fprintf(stderr, "truncated write: %d %d\n",
+		     io_error, chunk_cnt * chunk_size);
+		return (1);
+	}
+
+	if (lseek(fd, 0, SEEK_SET) == -1) {
+		fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+		return (1);
+	}
+
+	io_error = readv(fd, riov, chunk_cnt);
+	if (io_error == -1) {
+		fprintf(stderr, "read failed: %s\n", strerror(errno));
+		return (1);
+	} else if (io_error != chunk_cnt * chunk_size) {
+		fprintf(stderr, "truncated read: %d %d\n",
+		     io_error, chunk_cnt * chunk_size);
+		return (1);
+	}
+
+	for (i = 0; i < chunk_cnt; i++) {
+		if (memcmp(rdata[i], wdata[i], chunk_size) != 0) {
+			fprintf(stderr, "chunk %d differs\n", i);
+			return (1);
+		}
+	}
+
+	return (0);
+}