diff --git a/sys/conf/files b/sys/conf/files
index f5217db..a9ead10 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2009,6 +2009,7 @@ kern/kern_poll.c		optional device_polling
 kern/kern_priv.c		standard
 kern/kern_proc.c		standard
 kern/kern_prot.c		standard
+kern/kern_rangelock.c		standard
 kern/kern_resource.c		standard
 kern/kern_rmlock.c		standard
 kern/kern_rwlock.c		standard
@@ -2696,6 +2697,7 @@ vm/vm_page.c			standard
 vm/vm_pageout.c			standard
 vm/vm_pager.c			standard
 vm/vm_phys.c			standard
+vm/vm_readwrite.c		standard
 vm/vm_reserv.c			standard
 vm/vm_unix.c			standard
 vm/vm_zeroidle.c		standard
diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c
new file mode 100644
index 0000000..00e5f4a
--- /dev/null
+++ b/sys/kern/kern_rangelock.c
@@ -0,0 +1,166 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
+ * All rights reserved.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/rangelock.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+
+uma_zone_t rl_entry_zone;
+
+static void
+rangelock_sys_init(void)
+{
+
+	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, rangelock_sys_init, NULL);
+
+void
+rangelock_init(struct rangelock *lock)
+{
+
+	TAILQ_INIT(&lock->rl_waiters);
+	lock->rl_currdep = NULL;
+}
+
+void
+rangelock_destroy(struct rangelock *lock)
+{
+
+	KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
+}
+
+static int
+rangelock_incompatible(const struct rl_q_entry *e1,
+    const struct rl_q_entry *e2)
+{
+
+	if ((e1->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ &&
+	    (e2->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ)
+		return (0);
+#define	IN_RANGE(a, e) (a <= e->rl_q_start && a < e->rl_q_end)
+	if (IN_RANGE(e1->rl_q_start, e2) || IN_RANGE(e2->rl_q_start, e1) ||
+	    IN_RANGE(e1->rl_q_end, e2) || IN_RANGE(e2->rl_q_end, e1))
+		return (1);
+#undef	IN_RANGE
+	return (0);
+}
+
+static void
+rangelock_calc_block(struct rangelock *lock)
+{
+	struct rl_q_entry *entry, *entry1, *whead;
+
+	if (lock->rl_currdep == TAILQ_FIRST(&lock->rl_waiters) &&
+	    lock->rl_currdep != NULL)
+		lock->rl_currdep = TAILQ_NEXT(lock->rl_currdep, rl_q_link);
+	for (entry = lock->rl_currdep; entry;
+	     entry = TAILQ_NEXT(entry, rl_q_link)) {
+		TAILQ_FOREACH(entry1, &lock->rl_waiters, rl_q_link) {
+			if (rangelock_incompatible(entry, entry1))
+				goto out;
+			if (entry1 == entry)
+				break;
+		}
+	}
+out:
+	lock->rl_currdep = entry;
+	TAILQ_FOREACH(whead, &lock->rl_waiters, rl_q_link) {
+		if (whead == lock->rl_currdep)
+			break;
+		if (!(whead->rl_q_flags & RL_LOCK_GRANTED)) {
+			whead->rl_q_flags |= RL_LOCK_GRANTED;
+			wakeup(whead);
+		}
+	}
+}
+
+static void
+rangelock_unlock_vp_locked(struct vnode *vp, struct rl_q_entry *entry)
+{
+
+	ASSERT_VI_LOCKED(vp, "rangelock");
+	KASSERT(entry != vp->v_rl.rl_currdep, ("stuck currdep"));
+	TAILQ_REMOVE(&vp->v_rl.rl_waiters, entry, rl_q_link);
+	rangelock_calc_block(&vp->v_rl);
+	VI_UNLOCK(vp);
+	uma_zfree(rl_entry_zone, entry);
+}
+
+void
+rangelock_unlock(struct vnode *vp, void *cookie)
+{
+	struct rl_q_entry *entry;
+
+	entry = cookie;
+	VI_LOCK(vp);
+	rangelock_unlock_vp_locked(vp, entry);
+}
+
+void *
+rangelock_unlock_range(struct vnode *vp, void *cookie, off_t base, size_t len)
+{
+	struct rl_q_entry *entry;
+
+	entry = cookie;
+	VI_LOCK(vp);
+	KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED, ("XXX"));
+	KASSERT(entry->rl_q_start == base, ("XXX"));
+	KASSERT(entry->rl_q_end >= base + len, ("XXX"));
+	if (entry->rl_q_end == base + len) {
+		rangelock_unlock_vp_locked(vp, cookie);
+		return (NULL);
+	}
+	entry->rl_q_end = base + len;
+	rangelock_calc_block(&vp->v_rl);
+	VI_UNLOCK(vp);
+	return (cookie);
+}
+
+static void *
+rangelock_enqueue(struct vnode *vp, struct rl_q_entry *entry)
+{
+
+	VI_LOCK(vp);
+	TAILQ_INSERT_TAIL(&vp->v_rl.rl_waiters, entry, rl_q_link);
+	if (vp->v_rl.rl_currdep == NULL)
+		vp->v_rl.rl_currdep = entry;
+	rangelock_calc_block(&vp->v_rl);
+	while (!(entry->rl_q_flags & RL_LOCK_GRANTED))
+		msleep(entry, &vp->v_interlock, 0, "range", 0);
+	VI_UNLOCK(vp);
+	return (entry);
+}
+
+void *
+rangelock_rlock(struct vnode *vp, off_t base, size_t len)
+{
+	struct rl_q_entry *entry;
+
+	entry = uma_zalloc(rl_entry_zone, M_WAITOK);
+	entry->rl_q_flags = RL_LOCK_READ;
+	entry->rl_q_start = base;
+	entry->rl_q_end = base + len;
+	return (rangelock_enqueue(vp, entry));
+}
+
+void *
+rangelock_wlock(struct vnode *vp, off_t base, size_t len)
+{
+	struct rl_q_entry *entry;
+
+	entry = uma_zalloc(rl_entry_zone, M_WAITOK);
+	entry->rl_q_flags = RL_LOCK_WRITE;
+	entry->rl_q_start = base;
+	entry->rl_q_end = base + len;
+	return (rangelock_enqueue(vp, entry));
+}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index d92555f..8aa145f 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -853,6 +853,7 @@ vdestroy(struct vnode *vp)
 	/* XXX Elsewhere we can detect an already freed vnode via NULL v_op. */
 	vp->v_op = NULL;
 #endif
+	rangelock_destroy(&vp->v_rl);
 	lockdestroy(vp->v_vnlock);
 	mtx_destroy(&vp->v_interlock);
 	mtx_destroy(BO_MTX(bo));
@@ -1007,6 +1008,7 @@ alloc:
 		if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
 			vp->v_vflag |= VV_NOKNOTE;
 	}
+	rangelock_init(&vp->v_rl);
 
 	*vpp = vp;
 	return (0);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 03e8d93..6618bc8 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -62,6 +62,9 @@ __FBSDID("$FreeBSD$");
 
 #include <security/mac/mac_framework.h>
 
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
 static fo_rdwr_t	vn_read;
 static fo_rdwr_t	vn_write;
 static fo_truncate_t	vn_truncate;
@@ -351,74 +354,69 @@ sequential_heuristic(struct uio *uio, struct file *fp)
  * Package up an I/O request on a vnode into a uio and do it.
  */
 int
-vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
-    aresid, td)
-	enum uio_rw rw;
-	struct vnode *vp;
-	void *base;
-	int len;
-	off_t offset;
-	enum uio_seg segflg;
-	int ioflg;
-	struct ucred *active_cred;
-	struct ucred *file_cred;
-	int *aresid;
-	struct thread *td;
+vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset,
+    enum uio_seg segflg, int ioflg, struct ucred *active_cred,
+    struct ucred *file_cred, int *aresid, struct thread *td)
 {
 	struct uio auio;
 	struct iovec aiov;
 	struct mount *mp;
 	struct ucred *cred;
+	void *rl_cookie;
 	int error, lock_flags;
 
 	VFS_ASSERT_GIANT(vp->v_mount);
 
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = base;
+	aiov.iov_len = len;
+	auio.uio_resid = len;
+	auio.uio_offset = offset;
+	auio.uio_segflg = segflg;
+	auio.uio_rw = rw;
+	auio.uio_td = td;
+	error = 0;
+
+	if ((ioflg & IO_NODELOCKED) == 0) {
+		if (rw == UIO_READ)
+			rl_cookie = rangelock_rlock(vp, offset, len);
+		else
+			rl_cookie = rangelock_wlock(vp, offset, len);
+	} else
+		rl_cookie = NULL;
+
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		mp = NULL;
 		if (rw == UIO_WRITE) { 
 			if (vp->v_type != VCHR &&
 			    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
 			    != 0)
-				return (error);
+				goto out;
 			if (MNT_SHARED_WRITES(mp) ||
-			    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
+			    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount)))
 				lock_flags = LK_SHARED;
-			} else {
+			else
 				lock_flags = LK_EXCLUSIVE;
-			}
 			vn_lock(vp, lock_flags | LK_RETRY);
-		} else
-			vn_lock(vp, LK_SHARED | LK_RETRY);
-
+		}
 	}
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
-	auio.uio_iov = &aiov;
-	auio.uio_iovcnt = 1;
-	aiov.iov_base = base;
-	aiov.iov_len = len;
-	auio.uio_resid = len;
-	auio.uio_offset = offset;
-	auio.uio_segflg = segflg;
-	auio.uio_rw = rw;
-	auio.uio_td = td;
-	error = 0;
 #ifdef MAC
 	if ((ioflg & IO_NOMACCHECK) == 0) {
-		if (rw == UIO_READ)
-			error = mac_vnode_check_read(active_cred, file_cred,
-			    vp);
-		else
+		if (rw == UIO_WRITE)
 			error = mac_vnode_check_write(active_cred, file_cred,
 			    vp);
 	}
 #endif
 	if (error == 0) {
-		if (file_cred)
+		if (file_cred != NULL)
 			cred = file_cred;
 		else
 			cred = active_cred;
 		if (rw == UIO_READ)
-			error = VOP_READ(vp, &auio, ioflg, cred);
+			error = vn_read_chunk(vp, &auio, active_cred, cred,
+			    ioflg);
 		else
 			error = VOP_WRITE(vp, &auio, ioflg, cred);
 	}
@@ -428,10 +426,15 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
 		if (auio.uio_resid && error == 0)
 			error = EIO;
 	if ((ioflg & IO_NODELOCKED) == 0) {
-		if (rw == UIO_WRITE && vp->v_type != VCHR)
-			vn_finished_write(mp);
-		VOP_UNLOCK(vp, 0);
+		if (rw == UIO_WRITE) {
+			if (vp->v_type != VCHR)
+				vn_finished_write(mp);
+			VOP_UNLOCK(vp, 0);
+		}
 	}
+ out:
+	if (rl_cookie != NULL)
+		rangelock_unlock(vp, rl_cookie);
 	return (error);
 }
 
@@ -493,126 +496,149 @@ vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
 	return (error);
 }
 
+static struct mtx *
+vn_lock_foffset(struct file *fp)
+{
+	struct mtx *mtxp;
+
+	mtxp = mtx_pool_find(mtxpool_sleep, fp);
+	mtx_lock(mtxp);
+	while (fp->f_vnread_flags & FOFFSET_LOCKED) {
+		fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
+		msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
+		    "vnread offlock", 0);
+	}
+	fp->f_vnread_flags |= FOFFSET_LOCKED;
+	mtx_unlock(mtxp);
+	return (mtxp);
+}
+
+static void
+vn_unlock_foffset(struct file *fp, struct mtx *mtxp)
+{
+
+	mtx_lock(mtxp);
+	if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
+		wakeup(&fp->f_vnread_flags);
+	fp->f_vnread_flags = 0;
+	mtx_unlock(mtxp);
+}
+
+int
+vn_read_chunk(struct vnode *vp, struct uio *uio, struct ucred *active_cred,
+    struct ucred *fcred, int ioflag)
+{
+	int error, vfslocked;
+
+	error = 0;
+	vfslocked = 0; /* gcc */
+
+	if ((ioflag & IO_NODELOCKED) == 0) {
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		vn_lock(vp, LK_SHARED | LK_RETRY);
+	}
+
+#ifdef MAC
+	if ((ioflag & IO_NOMACCHECK) == 0)
+		error = mac_vnode_check_read(active_cred, fcred, vp);
+#endif
+	if (error == 0) {
+		error = vnode_pager_read(vp, uio, ioflag);
+		if (error == EOPNOTSUPP)
+			error = VOP_READ(vp, uio, ioflag, fcred);
+	}
+	if ((ioflag & IO_NODELOCKED) == 0) {
+		VOP_UNLOCK(vp, 0);
+		VFS_UNLOCK_GIANT(vfslocked);
+	}
+	return (error);
+}
+
 /*
  * File table vnode read routine.
  */
 static int
-vn_read(fp, uio, active_cred, flags, td)
-	struct file *fp;
-	struct uio *uio;
-	struct ucred *active_cred;
-	struct thread *td;
-	int flags;
+vn_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
+    struct thread *td)
 {
 	struct vnode *vp;
-	int error, ioflag;
 	struct mtx *mtxp;
-	int vfslocked;
+	void *rl_cookie;
+	int ioflag;
+	int error;
 
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
 	    uio->uio_td, td));
-	mtxp = NULL;
-	vp = fp->f_vnode;
 	ioflag = 0;
 	if (fp->f_flag & FNONBLOCK)
 		ioflag |= IO_NDELAY;
 	if (fp->f_flag & O_DIRECT)
 		ioflag |= IO_DIRECT;
-	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	vp = fp->f_vnode;
+
 	/*
 	 * According to McKusick the vn lock was protecting f_offset here.
 	 * It is now protected by the FOFFSET_LOCKED flag.
 	 */
 	if ((flags & FOF_OFFSET) == 0) {
-		mtxp = mtx_pool_find(mtxpool_sleep, fp);
-		mtx_lock(mtxp);
-		while(fp->f_vnread_flags & FOFFSET_LOCKED) {
-			fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
-			msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
-			    "vnread offlock", 0);
-		}
-		fp->f_vnread_flags |= FOFFSET_LOCKED;
-		mtx_unlock(mtxp);
-		vn_lock(vp, LK_SHARED | LK_RETRY);
+		mtxp = vn_lock_foffset(fp);
 		uio->uio_offset = fp->f_offset;
 	} else
-		vn_lock(vp, LK_SHARED | LK_RETRY);
-
+		mtxp = NULL; /* gcc */
+	if (vp->v_type == VREG)
+		rl_cookie = rangelock_rlock(vp, uio->uio_offset,
+		    uio->uio_resid);
+	else
+		rl_cookie = NULL;
 	ioflag |= sequential_heuristic(uio, fp);
-
-#ifdef MAC
-	error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
-	if (error == 0)
-#endif
-		error = VOP_READ(vp, uio, ioflag, fp->f_cred);
+	error = vn_read_chunk(vp, uio, active_cred, fp->f_cred, ioflag);
+	fp->f_nextoff = uio->uio_offset;
+	if (rl_cookie != NULL)
+		rangelock_unlock(vp, rl_cookie);
 	if ((flags & FOF_OFFSET) == 0) {
 		fp->f_offset = uio->uio_offset;
-		mtx_lock(mtxp);
-		if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
-			wakeup(&fp->f_vnread_flags);
-		fp->f_vnread_flags = 0;
-		mtx_unlock(mtxp);
+		vn_unlock_foffset(fp, mtxp);
 	}
-	fp->f_nextoff = uio->uio_offset;
-	VOP_UNLOCK(vp, 0);
-	VFS_UNLOCK_GIANT(vfslocked);
 	return (error);
 }
 
-/*
- * File table vnode write routine.
- */
-static int
-vn_write(fp, uio, active_cred, flags, td)
-	struct file *fp;
-	struct uio *uio;
-	struct ucred *active_cred;
-	struct thread *td;
-	int flags;
+static inline int
+vn_write_chunk(struct file *fp, struct uio *uio, struct ucred *active_cred,
+    int flags, int ioflag, struct thread *td)
 {
-	struct vnode *vp;
 	struct mount *mp;
-	int error, ioflag, lock_flags;
-	int vfslocked;
+	struct vnode *vp;
+	int error, lock_flags, vfslocked;
 
-	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
-	    uio->uio_td, td));
+	mp = NULL;
 	vp = fp->f_vnode;
 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	if (vp->v_type == VREG)
 		bwillwrite();
-	ioflag = IO_UNIT;
-	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
-		ioflag |= IO_APPEND;
-	if (fp->f_flag & FNONBLOCK)
-		ioflag |= IO_NDELAY;
-	if (fp->f_flag & O_DIRECT)
-		ioflag |= IO_DIRECT;
-	if ((fp->f_flag & O_FSYNC) ||
-	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
-		ioflag |= IO_SYNC;
-	mp = NULL;
 	if (vp->v_type != VCHR &&
 	    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		goto unlock;
  
 	if ((MNT_SHARED_WRITES(mp) ||
 	    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) &&
-	    (flags & FOF_OFFSET) != 0) {
+	    (flags & FOF_OFFSET) != 0)
 		lock_flags = LK_SHARED;
-	} else {
+	else
 		lock_flags = LK_EXCLUSIVE;
-	}
-
 	vn_lock(vp, lock_flags | LK_RETRY);
 	if ((flags & FOF_OFFSET) == 0)
 		uio->uio_offset = fp->f_offset;
 	ioflag |= sequential_heuristic(uio, fp);
 #ifdef MAC
 	error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
-	if (error == 0)
+#else
+	error = 0;
 #endif
-		error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
+	if (error == 0) {
+		error = vnode_pager_write(vp, uio, ioflag);
+		if (error == EOPNOTSUPP)
+			error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
+	}
 	if ((flags & FOF_OFFSET) == 0)
 		fp->f_offset = uio->uio_offset;
 	fp->f_nextoff = uio->uio_offset;
@@ -625,6 +651,48 @@ unlock:
 }
 
 /*
+ * File table vnode write routine.
+ */
+static int
+vn_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
+    struct thread *td)
+{
+	struct vnode *vp;
+	void *rl_cookie;
+	int error, ioflag;
+
+	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
+	    uio->uio_td, td));
+	vp = fp->f_vnode;
+	ioflag = IO_UNIT;
+	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+		ioflag |= IO_APPEND;
+	if (fp->f_flag & FNONBLOCK)
+		ioflag |= IO_NDELAY;
+	if (fp->f_flag & O_DIRECT)
+		ioflag |= IO_DIRECT;
+	if ((fp->f_flag & O_FSYNC) ||
+	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
+		ioflag |= IO_SYNC;
+	if (vp->v_type == VREG) {
+		if ((ioflag & IO_APPEND) || !(flags & FOF_OFFSET))
+			/*
+			 * For appenders, punt and lock the whole
+			 * range. It also protects f_offset.
+			 */
+			rl_cookie = rangelock_wlock(vp, 0, (size_t)-1);
+		else
+			rl_cookie = rangelock_wlock(vp, uio->uio_offset,
+			    uio->uio_resid);
+	} else
+		rl_cookie = NULL;
+	error = vn_write_chunk(fp, uio, active_cred, flags, ioflag, td);
+	if (rl_cookie != NULL)
+		rangelock_unlock(vp, rl_cookie);
+	return (error);
+}
+
+/*
  * File table truncate routine.
  */
 static int
diff --git a/sys/sys/rangelock.h b/sys/sys/rangelock.h
new file mode 100644
index 0000000..5ec6433
--- /dev/null
+++ b/sys/sys/rangelock.h
@@ -0,0 +1,48 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
+ * All rights reserved.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_SYS_RANGELOCK_H
+#define	_SYS_RANGELOCK_H
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/lock.h>
+#include <sys/queue.h>
+#include <sys/sx.h>
+
+#ifdef _KERNEL
+
+struct vnode;
+
+struct rl_q_entry
+{
+	TAILQ_ENTRY(rl_q_entry) rl_q_link;
+	size_t rl_q_start, rl_q_end;
+	int rl_q_flags;
+};
+
+#define	RL_LOCK_READ		0x0001
+#define	RL_LOCK_WRITE		0x0002
+#define	RL_LOCK_TYPE_MASK	0x0003
+#define	RL_LOCK_GRANTED		0x0004
+
+struct rangelock
+{
+	TAILQ_HEAD(, rl_q_entry) rl_waiters;
+	struct rl_q_entry *rl_currdep;
+};
+
+void	rangelock_init(struct rangelock *lock);
+void	rangelock_destroy(struct rangelock *lock);
+void	rangelock_unlock(struct vnode *vp, void *cookie);
+void   *rangelock_unlock_range(struct vnode *vp, void *cookie, off_t base,
+    size_t len);
+void   *rangelock_rlock(struct vnode *vp, off_t base, size_t len);
+void   *rangelock_wlock(struct vnode *vp, off_t base, size_t len);
+#endif
+
+#endif
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index b38c1d0..2e23522 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -38,6 +38,7 @@
 #include <sys/lock.h>
 #include <sys/lockmgr.h>
 #include <sys/mutex.h>
+#include <sys/rangelock.h>
 #include <sys/selinfo.h>
 #include <sys/uio.h>
 #include <sys/acl.h>
@@ -168,7 +169,8 @@ struct vnode {
 	 */
 	struct vpollinfo *v_pollinfo;		/* G Poll events, p for *v_pi */
 	struct label *v_label;			/* MAC label for vnode */
-	struct lockf *v_lockf;			/* Byte-level lock list */
+	struct lockf *v_lockf;			/* Byte-level adv lock list */
+	struct rangelock v_rl;			/* Byte-range lock */
 };
 
 #endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
@@ -655,6 +657,8 @@ int	vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base,
 	    size_t len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *active_cred, struct ucred *file_cred, size_t *aresid,
 	    struct thread *td);
+int	vn_read_chunk(struct vnode *vp, struct uio *uio,
+	    struct ucred *active_cred, struct ucred *f_cred, int ioflag);
 int	vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
 	    struct ucred *file_cred, struct thread *td);
 int	vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 464a761..4c173fe 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -463,7 +463,7 @@ ffs_read(ap)
 			return error;
 	}
 #endif
-
+	KASSERT(uio->uio_segflg != UIO_USERSPACE, ("ffs_read: UIO_USERSPACE"));
 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 	ip = VTOI(vp);
 
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 08b77ae..8f537b3 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1900,7 +1900,8 @@ ufs_readdir(ap)
 	uio->uio_iov->iov_len = count;
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
-			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
+			error = vn_read_chunk(ap->a_vp, uio, ap->a_cred,
+			    ap->a_cred, IO_NODELOCKED);
 		} else {
 			struct dirent *dp, *edp;
 			struct uio auio;
@@ -1916,7 +1917,8 @@ ufs_readdir(ap)
 			aiov.iov_len = count;
 			dirbuf = malloc(count, M_TEMP, M_WAITOK);
 			aiov.iov_base = dirbuf;
-			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
+			error = vn_read_chunk(ap->a_vp, &auio, ap->a_cred,
+			    ap->a_cred, IO_NODELOCKED);
 			if (error == 0) {
 				readcnt = count - auio.uio_resid;
 				edp = (struct dirent *)&dirbuf[readcnt];
@@ -1938,7 +1940,8 @@ ufs_readdir(ap)
 			free(dirbuf, M_TEMP);
 		}
 #	else
-		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
+		error = vn_read_chunk(ap->a_vp, uio, ap->a_cred,
+		    ap->a_cred, IO_NODELOCKED);
 #	endif
 	if (!error && ap->a_ncookies != NULL) {
 		struct dirent* dpStart;
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index ff48983..ccfd066 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -84,5 +84,8 @@ void vm_thread_dispose(struct thread *td);
 int vm_thread_new(struct thread *td, int pages);
 void vm_thread_swapin(struct thread *td);
 void vm_thread_swapout(struct thread *td);
+int vnode_pager_read(struct vnode *vp, struct uio *uio, int ioflags);
+int vnode_pager_write(struct vnode *vp, struct uio *uio, int ioflags);
+
 #endif				/* _KERNEL */
 #endif				/* !_VM_EXTERN_H_ */
diff --git a/sys/vm/vm_readwrite.c b/sys/vm/vm_readwrite.c
new file mode 100644
index 0000000..cfb8bef
--- /dev/null
+++ b/sys/vm/vm_readwrite.c
@@ -0,0 +1,341 @@
+/*-
+ * Copyright (c) 2008 Jeffrey Roberson <jeff@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bufobj.h>
+#include <sys/lock.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/uio.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_pageout.h>
+#include <vm/vnode_pager.h>
+
+/*
+ * XXXKIB TODO
+ *
+ * 1. Backpressure for writes
+ * 2. VOP_REALLOCBLKS
+ *
+ */
+
+/*
+ * Grab a page, waiting until we are woken up due to the page
+ * changing state.  We keep on waiting, if the page continues
+ * to be in the object.  If the page doesn't exist allocate it.
+ *
+ * This routine may block.
+ */
+static vm_page_t
+vm_page_grab_next(vm_object_t object, vm_page_t prev, vm_pindex_t pindex)
+{
+	vm_page_t m;
+
+	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+	m = NULL;
+	if (prev) {
+		m = TAILQ_NEXT(prev, listq);
+		if (m && m->pindex != pindex)
+			m = NULL;
+	}
+	for (;;) {
+		if (m == NULL)
+			m = vm_page_lookup(object, pindex);
+		if (m != NULL) {
+			if (vm_page_sleep_if_busy(m, TRUE, "pgrnbwt") == 0)
+				break;
+			m = NULL;
+			continue;
+		}
+		m = vm_page_alloc(object, pindex,
+		    VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
+		if (m != NULL)
+			break;
+		VM_OBJECT_UNLOCK(object);
+		VM_WAIT;
+		VM_OBJECT_LOCK(object);
+	}
+	return (m);
+}
+
+/*
+ * Read a cluster starting at 'm'.
+ */
+static int
+vnode_pager_read_cluster(struct vnode *vp, vm_page_t m, int maxrun)
+{
+	vm_page_t pa[MAXPHYS / PAGE_SIZE];
+	vm_object_t obj;
+	vm_pindex_t idx;
+	daddr_t blkno;
+	int bsize;
+	int error;
+	int run;
+	int i;
+
+	obj = vp->v_object;
+	idx = m->pindex;
+	bsize = vp->v_mount->mnt_stat.f_iosize;
+	VM_OBJECT_UNLOCK(obj);
+	error = VOP_BMAP(vp, IDX_TO_OFF(idx)/bsize, NULL, &blkno, NULL, &run);
+	VM_OBJECT_LOCK(obj);
+	run = MIN(run, maxrun);
+	if (error || run == 0 || blkno == -1)
+		return (vm_pager_get_pages(obj, &m, 1, 0));
+	run = (run + 1) * bsize / PAGE_SIZE;
+	run = MIN(run, vp->v_mount->mnt_iosize_max / PAGE_SIZE);
+	pa[0] = m;
+	for (i = 1; i < run; i++) {
+		m = vm_page_grab_next(obj, m, idx + i);
+		if (m->valid) {
+			run = i;
+			break;
+		}
+		vm_page_busy(m);
+		pa[i] = m;
+	}
+	return (vm_pager_get_pages(obj, pa, run, 0));
+}
+
+int
+vnode_pager_read(struct vnode *vp, struct uio *uio, int ioflags)
+{
+	vm_object_t obj;
+	vm_offset_t off;
+	vm_pindex_t idx;
+	vm_page_t m;
+	ssize_t size;
+	int error, vn_locked, obj_locked;
+	struct thread *td;
+
+	if (ioflags & (IO_EXT|IO_DIRECT))
+		return (EOPNOTSUPP);
+	ASSERT_VOP_LOCKED(vp, "vnode_pager_read");
+	if (vp->v_iflag & VI_DOOMED)
+		return (EBADF);
+	obj = vp->v_object;
+	if (obj == NULL)
+		return (EOPNOTSUPP);
+	error = 0;
+	m = NULL;
+	vn_locked = VOP_ISLOCKED(vp);
+	obj_locked = 0;
+	td = uio->uio_td;
+	if (td == NULL)
+		td = curthread;
+	while (uio->uio_resid > 0) {
+		if (!obj_locked) {
+			VM_OBJECT_LOCK(obj);
+			obj_locked = 1;
+		}
+		size = obj->un_pager.vnp.vnp_size - uio->uio_offset;
+		if (size <= 0)
+			break;
+		idx = OFF_TO_IDX(uio->uio_offset);
+		off = uio->uio_offset - IDX_TO_OFF(idx);
+		size = MIN(MIN(PAGE_SIZE - off, uio->uio_resid), size);
+		m = vm_page_grab_next(obj, m, idx);
+		if (!vm_page_is_valid(m, off, size)) {
+			vm_page_busy(m);
+			error = vnode_pager_read_cluster(vp, m,
+			    howmany((ioflags >> IO_SEQSHIFT), PAGE_SIZE));
+			m = vm_page_lookup(obj, idx);
+			if (m == NULL) {
+				if (error == VM_PAGER_OK)
+					continue;
+				error = EIO;
+				break;
+			}
+			if (m->valid == 0 || error != VM_PAGER_OK) {
+				vm_page_lock_queues();
+				vm_page_free(m);
+				vm_page_unlock_queues();
+				error = EIO;
+				break;
+			}
+			vm_page_wakeup(m);
+		}
+		vm_object_pip_add(obj, 1);
+		vm_page_io_start(m);
+		VM_OBJECT_UNLOCK(obj);
+		VOP_UNLOCK(vp, 0);
+		error = uiomove_fromphys(&m, off, size, uio);
+		VM_OBJECT_LOCK(obj);
+		vm_page_io_finish(m);
+		vm_object_pip_wakeup(obj);
+		VM_OBJECT_UNLOCK(obj);
+		obj_locked = 0;
+		vn_lock(vp, vn_locked | LK_RETRY);
+		if (error != 0 || (vp->v_iflag & VI_DOOMED))
+			break;
+	}
+	if (obj_locked)
+		VM_OBJECT_UNLOCK(obj);
+	if (error == 0)
+		vfs_mark_atime(vp, td->td_ucred);
+
+	return (error);
+}
+
+int
+vnode_pager_write(struct vnode *vp, struct uio *uio, int ioflags)
+{
+	vm_object_t obj;
+	vm_offset_t off;
+	vm_pindex_t idx;
+	vm_page_t m;
+	struct vattr vattr;
+	ssize_t size, osize, osize1, resid, sresid;
+	int error, vn_locked, obj_locked, bits;
+	struct thread *td;
+
+	if (ioflags & (IO_EXT|IO_INVAL|IO_DIRECT))
+		return (EOPNOTSUPP);
+	ASSERT_VOP_LOCKED(vp, "vnode_pager_write");
+	if (vp->v_iflag & VI_DOOMED)
+		return (EBADF);
+	obj = vp->v_object;
+	if (obj == NULL)
+		return (EOPNOTSUPP);
+	error = 0;
+	m = NULL;
+	vn_locked = VOP_ISLOCKED(vp);
+	obj_locked = 0;
+	if (ioflags & IO_APPEND)
+		uio->uio_offset = obj->un_pager.vnp.vnp_size;
+	td = uio->uio_td;
+	if (td == NULL)
+		td = curthread;
+	if (vp->v_type == VREG) {
+		PROC_LOCK(td->td_proc);
+		if (uio->uio_offset + uio->uio_resid >
+		    lim_cur(td->td_proc, RLIMIT_FSIZE)) {
+			psignal(td->td_proc, SIGXFSZ);
+			PROC_UNLOCK(td->td_proc);
+			return (EFBIG);
+		}
+		PROC_UNLOCK(td->td_proc);
+	}
+	osize = osize1 = obj->un_pager.vnp.vnp_size;
+	resid = uio->uio_resid;
+	VATTR_NULL(&vattr);
+	VI_LOCK(vp);
+	KASSERT(vp->v_writecount > 0, ("vnode_pager_write: writecount"));
+	vp->v_writecount++;
+	VI_UNLOCK(vp);
+	while (uio->uio_resid > 0) {
+		size = uio->uio_resid;
+		idx = OFF_TO_IDX(uio->uio_offset);
+		off = uio->uio_offset - IDX_TO_OFF(idx);
+		size = MIN(PAGE_SIZE - off, uio->uio_resid);
+		osize1 = obj->un_pager.vnp.vnp_size;
+		if (osize1 < uio->uio_offset + size) {
+			vattr.va_size = uio->uio_offset + size;
+			if (obj_locked) {
+				VM_OBJECT_UNLOCK(obj);
+				obj_locked = 0;
+			}
+			error = VOP_SETATTR(vp, &vattr, td->td_ucred);
+			if (error != 0)
+				break;
+		}
+
+		if (!obj_locked) {
+			VM_OBJECT_LOCK(obj);
+			obj_locked = 1;
+		}
+		m = vm_page_grab_next(obj, m, idx);
+		bits = vm_page_bits(off, size);
+		if ((m->valid & ~bits) != ~bits) {
+			vm_page_busy(m);
+			error = vnode_pager_read_cluster(vp, m, 1);
+			m = vm_page_lookup(obj, idx);
+			if (m == NULL) {
+				if (error == VM_PAGER_OK)
+					continue;
+				error = EIO;
+				break;
+			}
+			if (m->valid == 0 || error != VM_PAGER_OK) {
+				vm_page_lock_queues();
+				vm_page_free(m);
+				vm_page_unlock_queues();
+				error = EIO;
+				break;
+			}
+			vm_page_wakeup(m);
+		}
+		vm_object_pip_add(obj, 1);
+		vm_page_io_start(m);
+		VM_OBJECT_UNLOCK(obj);
+		VOP_UNLOCK(vp, 0);
+		sresid = uio->uio_resid;
+		error = uiomove_fromphys(&m, off, size, uio);
+		VM_OBJECT_LOCK(obj);
+		if (error == 0) {
+			vm_page_lock_queues();
+			m->valid |= vm_page_bits(off, sresid - uio->uio_resid);
+			vm_page_dirty(m);
+			vm_page_unlock_queues();
+			vm_object_set_writeable_dirty(obj);
+		}
+		vm_page_io_finish(m);
+		vm_object_pip_wakeup(obj);
+		if (error == 0 && (ioflags & IO_SYNC))
+			vm_object_page_clean(obj, idx, idx + 1, ioflags);
+		VM_OBJECT_UNLOCK(obj);
+		obj_locked = 0;
+		vn_lock(vp, vn_locked | LK_RETRY);
+		if (error != 0 || (vp->v_iflag & VI_DOOMED))
+			break;
+	}
+	if (obj_locked)
+		VM_OBJECT_UNLOCK(obj);
+	if (error == 0 && (ioflags & IO_SYNC))
+		error = VOP_FSYNC(vp, MNT_WAIT, td);
+	if (error != 0) {
+		vattr.va_size = (ioflags & IO_UNIT) ? osize : osize1;
+		VOP_SETATTR(vp, &vattr, td->td_ucred);
+		if (ioflags & IO_UNIT) {
+			uio->uio_offset -= resid - uio->uio_resid;
+			uio->uio_resid = resid;
+		}
+	}
+	VI_LOCK(vp);
+	vp->v_writecount--;
+	VI_UNLOCK(vp);
+
+	return (error);
+}
diff --git a/tools/regression/file/uio/uio.c b/tools/regression/file/uio/uio.c
new file mode 100644
index 0000000..d857605
--- /dev/null
+++ b/tools/regression/file/uio/uio.c
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+int chunk_cnt = 1024;
+int chunk_size = 1024;
+
+int
+main(int argc, char *argv[])
+{
+	struct iovec *wiov, *riov;
+	char **wdata, **rdata;
+	int fd, i;
+	ssize_t io_error;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: uio file [chunk count [chunk size]]\n");
+		return (2);
+	}
+	fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
+	if (fd == -1) {
+		fprintf(stderr, "Failed to create %s: %s\n",
+		    argv[1], strerror(errno));
+		return (1);
+	}
+
+	if (argc > 2)
+		chunk_cnt = atoi(argv[2]);
+	if (argc > 3)
+		chunk_size = atoi(argv[3]);
+
+	wiov = calloc(chunk_cnt, sizeof(*wiov));
+	wdata = calloc(chunk_cnt, sizeof(*wdata));
+
+	riov = calloc(chunk_cnt, sizeof(*riov));
+	rdata = calloc(chunk_cnt, sizeof(*rdata));
+
+	for (i = 0; i < chunk_cnt; i++) {
+		rdata[i] = malloc(chunk_size);
+		riov[i].iov_base = rdata[i];
+		riov[i].iov_len = chunk_size;
+
+		wdata[i] = malloc(chunk_size);
+		memset(wdata[i], i, chunk_size);
+		wiov[i].iov_base = wdata[i];
+		wiov[i].iov_len = chunk_size;
+	}
+
+	io_error = writev(fd, wiov, chunk_cnt);
+	if (io_error == -1) {
+		fprintf(stderr, "write failed: %s\n", strerror(errno));
+		return (1);
+	} else if (io_error != chunk_cnt * chunk_size) {
+		fprintf(stderr, "truncated write: %d %d\n",
+		     io_error, chunk_cnt * chunk_size);
+		return (1);
+	}
+
+	if (lseek(fd, 0, SEEK_SET) == -1) {
+		fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+		return (1);
+	}
+
+	io_error = readv(fd, riov, chunk_cnt);
+	if (io_error == -1) {
+		fprintf(stderr, "read failed: %s\n", strerror(errno));
+		return (1);
+	} else if (io_error != chunk_cnt * chunk_size) {
+		fprintf(stderr, "truncated read: %d %d\n",
+		     io_error, chunk_cnt * chunk_size);
+		return (1);
+	}
+
+	for (i = 0; i < chunk_cnt; i++) {
+		if (memcmp(rdata[i], wdata[i], chunk_size) != 0) {
+			fprintf(stderr, "chunk %d differs\n", i);
+			return (1);
+		}
+	}
+
+	return (0);
+}
diff --git a/tools/regression/ufs/ba_clrbuf/ba_clrbuf.c b/tools/regression/ufs/ba_clrbuf/ba_clrbuf.c
new file mode 100644
index 0000000..1b0acbe
--- /dev/null
+++ b/tools/regression/ufs/ba_clrbuf/ba_clrbuf.c
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static const int blks = 2;
+
+static void
+flush_buffers(int fd)
+{
+	struct stat st;
+	char *addr;
+	int error;
+
+	printf("Flushing buffers\n");
+	error = fstat(fd, &st);
+	if (error == -1)
+		err(2, "stat");
+	fsync(fd);
+	addr = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (addr == (char *)-1)
+		err(2, "mmap");
+	error = msync(addr, st.st_size, MS_SYNC | MS_INVALIDATE);
+	if (error == -1)
+		err(2, "msync");
+	munmap(addr, st.st_size);
+}
+
+int
+main(int argc, char *argv[])
+{
+	struct statfs fst;
+	char *data, *vrfy;
+	size_t sz;
+	int fd, i, error, ret;
+
+	if (argc < 2)
+		errx(2, "Usage: ba_clrbuf file");
+
+	fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
+	if (fd == -1)
+		err(2, "Failed to create %s", argv[1]);
+
+	if (fstatfs(fd, &fst) == -1)
+		err(2, "stat");
+
+	sz = fst.f_iosize * blks;
+	data = malloc(sz);
+	if (data == NULL)
+		err(2, "malloc");
+	vrfy = malloc(sz);
+	if (vrfy == NULL)
+		err(2, "malloc");
+	for (i = 0; i < (int)sz; i++)
+		data[i] = i;
+	error = write(fd, data, sz);
+	if (error == -1)
+		err(2, "write");
+	else if (error != (int)sz)
+		errx(2, "Short write %d %d", error, sz);
+
+	flush_buffers(fd);
+
+	error = lseek(fd, 0, SEEK_SET);
+	if (error == -1)
+		err(2, "lseek 0");
+	else if (error != 0)
+		errx(2, "lseek 0 returned %d", error);
+	error = write(fd, NULL, fst.f_iosize);
+	printf("faulty write, error %s\n", strerror(errno));
+
+	error = lseek(fd, 0, SEEK_SET);
+	if (error == -1)
+		err(2, "lseek 0/2");
+	else if (error != 0)
+		errx(2, "lseek 0/2 returned %d", error);
+	error = read(fd, vrfy, sz);
+	if (error == -1)
+		err(2, "read");
+	else if (error != (int)sz)
+		errx(2, "short read %d %d", error, sz);
+
+	if (memcmp(data, vrfy, fst.f_iosize) != 0) {
+		printf("Zero block corrupted, byte at 0 is %x\n",
+		    (unsigned char)vrfy[0]);
+		ret = 1;
+	} else {
+		printf("No corruption\n");
+		ret = 0;
+	}
+
+	return (ret);
+}