Index: sys/kern/vfs_mount.c
===================================================================
--- sys/kern/vfs_mount.c	(revision 205589)
+++ sys/kern/vfs_mount.c	(working copy)
@@ -505,6 +505,7 @@
 	mac_mount_create(cred, mp);
 #endif
 	arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
+	vfs_syncer_init(mp);
 	return (mp);
 }
 
@@ -543,6 +544,7 @@
 	if (mp->mnt_lockref != 0)
 		panic("vfs_mount_destroy: nonzero lock refcount");
 	MNT_IUNLOCK(mp);
+	vfs_syncer_destroy(mp);
 #ifdef MAC
 	mac_mount_destroy(mp);
 #endif
@@ -1035,12 +1037,14 @@
 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
 		MNT_IUNLOCK(mp);
 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+			vfs_syncer_attach(mp, vfsp);
 			if (mp->mnt_syncer == NULL)
 				error = vfs_allocate_syncvnode(mp);
 		} else {
 			if (mp->mnt_syncer != NULL)
 				vrele(mp->mnt_syncer);
 			mp->mnt_syncer = NULL;
+			vfs_syncer_detach(mp);
 		}
 		vfs_unbusy(mp);
 		VI_LOCK(vp);
@@ -1077,11 +1081,15 @@
 		VOP_UNLOCK(vp, 0);
 		mountcheckdirs(vp, newdp);
 		vrele(newdp);
-		if ((mp->mnt_flag & MNT_RDONLY) == 0)
+		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+			vfs_syncer_attach(mp, vfsp);
 			error = vfs_allocate_syncvnode(mp);
+		}
 		vfs_unbusy(mp);
-		if (error)
+		if (error) {
 			vrele(vp);
+			vfs_syncer_detach(mp);
+		}
 	} else {
 		vfs_unbusy(mp);
 		vfs_mount_destroy(mp);
@@ -1327,6 +1335,7 @@
 			VOP_UNLOCK(coveredvp, 0);
 		return (error);
 	}
+	vfs_syncer_detach(mp);
 	mtx_lock(&mountlist_mtx);
 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c	(revision 205589)
+++ sys/kern/vfs_subr.c	(working copy)
@@ -89,14 +89,12 @@
 #include <ddb/ddb.h>
 #endif
 
-#define	WI_MPSAFEQ	0
-#define	WI_GIANTQ	1
-
 static MALLOC_DEFINE(M_NETADDR, "subr_export_host", "Export host address structure");
 
 static void	delmntque(struct vnode *vp);
 static int	flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo,
 		    int slpflag, int slptimeo);
+static void	sched_sync(void *arg);
 static void	syncer_shutdown(void *arg, int howto);
 static int	vtryrecycle(struct vnode *vp);
 static void	vbusy(struct vnode *vp);
@@ -216,25 +214,10 @@
  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  *
  */
-static int syncer_delayno;
-static long syncer_mask;
-LIST_HEAD(synclist, bufobj);
-static struct synclist *syncer_workitem_pending[2];
-/*
- * The sync_mtx protects:
- *	bo->bo_synclist
- *	sync_vnode_count
- *	syncer_delayno
- *	syncer_state
- *	syncer_workitem_pending
- *	syncer_worklist_len
- *	rushjob
- */
-static struct mtx sync_mtx;
-static struct cv sync_wakeup;
 
 #define SYNCER_MAXDELAY		32
-static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
+#define	SYNCER_MASK		(SYNCER_MAXDELAY - 1)
+
 static int syncdelay = 30;		/* max time to delay syncing data */
 static int filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
@@ -242,18 +225,15 @@
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
 static int metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
-static int rushjob;		/* number of slots to run ASAP */
+#if 0
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
+#endif
 
 /*
  * When shutting down the syncer, run it at four times normal speed.
  */
 #define SYNCER_SHUTDOWN_SPEEDUP		4
-static int sync_vnode_count;
-static int syncer_worklist_len;
-static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }
-    syncer_state;
 
 /*
  * Number of vnodes we want to exist at any one time.  This is mostly used
@@ -279,7 +259,6 @@
 #define VSHOULDFREE(vp) (!((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt)
 #define VSHOULDBUSY(vp) (((vp)->v_iflag & VI_FREE) && (vp)->v_holdcnt)
 
-
 /*
  * Initialize the vnode management data structures.
  */
@@ -313,16 +292,6 @@
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 	vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-	/*
-	 * Initialize the filesystem syncer.
-	 */
-	syncer_workitem_pending[WI_MPSAFEQ] = hashinit(syncer_maxdelay, M_VNODE,
-	    &syncer_mask);
-	syncer_workitem_pending[WI_GIANTQ] = hashinit(syncer_maxdelay, M_VNODE,
-	    &syncer_mask);
-	syncer_maxdelay = syncer_mask + 1;
-	mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);
-	cv_init(&sync_wakeup, "syncer");
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);
 
@@ -521,6 +490,89 @@
 	mtx_unlock(&mntid_mtx);
 }
 
+void
+vfs_syncer_init(struct mount *mp)
+{
+
+	MPASS(mp != NULL);
+
+	mp->mnt_sync_delayno = 0;
+	mp->mnt_sync_vnode_count = 0;
+	mp->mnt_sync_worklist_len = 0;
+	mp->mnt_sync_rushjob = 0;
+	mp->mnt_sync_attached = 0;
+	cv_init(&mp->mnt_sync_wakeup, "syncer");
+	mp->mnt_sync_workitem_pending = malloc(SYNCER_MAXDELAY *
+	    sizeof(struct synclist), M_TEMP, M_NOWAIT | M_ZERO);
+	if (mp->mnt_sync_workitem_pending == NULL)
+		panic("%s: ENOMEM for the vnodes pending queue", __func__);
+}
+
+void
+vfs_syncer_attach(struct mount *mp, struct vfsconf *vfsp)
+{
+	int error;
+
+	MPASS(mp != NULL);
+
+	MNT_ILOCK(mp);
+	if (mp->mnt_sync_attached != 0) {
+		MNT_IUNLOCK(mp);
+		return;
+	}
+	if (vfsp == NULL) {
+		vfsp = mp->mnt_vfc;
+		MPASS(vfsp != NULL);
+	}
+	mp->mnt_sync_state = SYNCER_RUNNING;
+	mp->mnt_sync_attached = 1;
+	MNT_IUNLOCK(mp);
+	error = kthread_add(sched_sync, mp, NULL, &mp->mnt_sync_thr, 0,
+	    0, "syncer %s:%d", vfsp->vfc_name, vfsp->vfc_refcount);
+	if (error != 0)
+		panic("%s: kthread_add() failed with %d errno", __func__,
+		    error);
+}
+
+void
+vfs_syncer_destroy(struct mount *mp)
+{
+
+	MPASS(mp != NULL);
+
+	if (mp->mnt_sync_workitem_pending != NULL) {
+		free(mp->mnt_sync_workitem_pending, M_TEMP);
+		mp->mnt_sync_workitem_pending = NULL;
+		cv_destroy(&mp->mnt_sync_wakeup);
+	}
+}
+
+void
+vfs_syncer_detach(struct mount *mp)
+{
+
+	MNT_ILOCK(mp);
+	if (mp->mnt_sync_attached == 0) {
+		MNT_IUNLOCK(mp);
+		return;
+	}
+
+	/* Until the syncer thread is not entirely closed, stop the unmount. */
+	MPASS(mp->mnt_sync_state != SYNCER_TO_DIE);
+	while (mp->mnt_sync_state != SYNCER_DIED) {
+		mp->mnt_sync_state = SYNCER_TO_DIE;
+
+		/*
+		 * Wakeup the possible sleeping syncer in order to speedup the
+		 * operation.
+		 */
+		cv_broadcast(&mp->mnt_sync_wakeup);
+		msleep(&mp->mnt_sync_thr, MNT_MTX(mp), PVFS, "syncer drain", 0);
+	}
+	mp->mnt_sync_attached = 0;
+	MNT_IUNLOCK(mp);
+}
+
 /*
  * Knob to control the precision of file timestamps:
  *
@@ -1590,6 +1642,7 @@
 brelvp(struct buf *bp)
 {
 	struct bufobj *bo;
+	struct mount *mp;
 	struct vnode *vp;
 
 	CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
@@ -1598,20 +1651,24 @@
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
-	vp = bp->b_vp;		/* XXX */
+	vp = bp->b_vp;
+	mp = vp->v_mount;
 	bo = bp->b_bufobj;
+	MNT_ILOCK(mp);
 	BO_LOCK(bo);
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		buf_vlist_remove(bp);
-	else
+	else {
+		BO_UNLOCK(bo);
+		MNT_IUNLOCK(mp);
 		panic("brelvp: Buffer %p not on queue.", bp);
+	}
 	if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
 		bo->bo_flag &= ~BO_ONWORKLST;
-		mtx_lock(&sync_mtx);
 		LIST_REMOVE(bo, bo_synclist);
-		syncer_worklist_len--;
-		mtx_unlock(&sync_mtx);
+		mp->mnt_sync_worklist_len--;
 	}
+	MNT_IUNLOCK(mp);
 	bp->b_flags &= ~B_NEEDSGIANT;
 	bp->b_vp = NULL;
 	bp->b_bufobj = NULL;
@@ -1623,138 +1680,103 @@
  * Add an item to the syncer work queue.
  */
 static void
-vn_syncer_add_to_worklist(struct bufobj *bo, int delay)
+vn_syncer_add_to_worklist(struct mount *mp, struct bufobj *bo, int delay)
 {
-	int queue, slot;
+	int slot;
 
+	ASSERT_MP_ILOCKED(mp);
 	ASSERT_BO_LOCKED(bo);
 
-	mtx_lock(&sync_mtx);
 	if (bo->bo_flag & BO_ONWORKLST)
 		LIST_REMOVE(bo, bo_synclist);
 	else {
 		bo->bo_flag |= BO_ONWORKLST;
-		syncer_worklist_len++;
+		mp->mnt_sync_worklist_len++;
 	}
-
-	if (delay > syncer_maxdelay - 2)
-		delay = syncer_maxdelay - 2;
-	slot = (syncer_delayno + delay) & syncer_mask;
-
-	queue = VFS_NEEDSGIANT(bo->__bo_vnode->v_mount) ? WI_GIANTQ :
-	    WI_MPSAFEQ;
-	LIST_INSERT_HEAD(&syncer_workitem_pending[queue][slot], bo,
-	    bo_synclist);
-	mtx_unlock(&sync_mtx);
+	if (delay > SYNCER_MAXDELAY - 2)
+		delay = SYNCER_MAXDELAY - 2;
+	slot = (mp->mnt_sync_delayno + delay) & SYNCER_MASK;
+	LIST_INSERT_HEAD(&mp->mnt_sync_workitem_pending[slot], bo, bo_synclist);
 }
 
-static int
-sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS)
+/*
+ * Syncs on the filesystem a vnode (feeded through its bufobj).
+ * It does expects a mountpoint locked and correctly referenced, and a
+ * valid bufobj.
+ */
+static void
+sync_vnode(struct mount *mp, struct bufobj *bo)
 {
-	int error, len;
+	struct vnode *vp;
 
-	mtx_lock(&sync_mtx);
-	len = syncer_worklist_len - sync_vnode_count;
-	mtx_unlock(&sync_mtx);
-	error = SYSCTL_OUT(req, &len, sizeof(len));
-	return (error);
-}
+	MPASS(mp != NULL && bo != NULL);
+	ASSERT_MP_ILOCKED(mp);
+	MPASS(mp->mnt_writeopcount > 0);
 
-SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0,
-    sysctl_vfs_worklist_len, "I", "Syncer thread worklist length");
+	vp = bo->__bo_vnode;
 
-static struct proc *updateproc;
-static void sched_sync(void);
-static struct kproc_desc up_kp = {
-	"syncer",
-	sched_sync,
-	&updateproc
-};
-SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp);
-
-static int
-sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td)
-{
-	struct vnode *vp;
-	struct mount *mp;
-
-	*bo = LIST_FIRST(slp);
-	if (*bo == NULL)
-		return (0);
-	vp = (*bo)->__bo_vnode;	/* XXX */
-	if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0)
-		return (1);
 	/*
-	 * We use vhold in case the vnode does not
-	 * successfully sync.  vhold prevents the vnode from
-	 * going away when we unlock the sync_mtx so that
-	 * we can acquire the vnode interlock.
+	 * vhold() the vnode linked to the bufobj in order to avoid
+	 * possible recycling when releasing the mountpoint interlock.
 	 */
-	vholdl(vp);
-	mtx_unlock(&sync_mtx);
-	VI_UNLOCK(vp);
-	if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
-		vdrop(vp);
-		mtx_lock(&sync_mtx);
-		return (*bo == LIST_FIRST(slp));
-	}
+	vhold(vp);
+	MNT_IUNLOCK(mp);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-	(void) VOP_FSYNC(vp, MNT_LAZY, td);
+	VOP_FSYNC(vp, MNT_LAZY, curthread);
 	VOP_UNLOCK(vp, 0);
-	vn_finished_write(mp);
-	BO_LOCK(*bo);
-	if (((*bo)->bo_flag & BO_ONWORKLST) != 0) {
+	MNT_ILOCK(mp);
+	BO_LOCK(bo);
+	if ((bo->bo_flag & BO_ONWORKLST) != 0) {
+
 		/*
 		 * Put us back on the worklist.  The worklist
 		 * routine will remove us from our current
 		 * position and then add us back in at a later
 		 * position.
 		 */
-		vn_syncer_add_to_worklist(*bo, syncdelay);
+		vn_syncer_add_to_worklist(mp, bo, syncdelay);
 	}
-	BO_UNLOCK(*bo);
+	BO_UNLOCK(bo);
 	vdrop(vp);
-	mtx_lock(&sync_mtx);
-	return (0);
 }
 
 /*
  * System filesystem synchronizer daemon.
  */
 static void
-sched_sync(void)
+sched_sync(void *arg)
 {
-	struct synclist *gnext, *next;
-	struct synclist *gslp, *slp;
+	eventhandler_tag shutdown_tag;
+	long starttime;
+	struct mount *mp;
+	struct synclist *next;
+	struct synclist *slp;
 	struct bufobj *bo;
-	long starttime;
-	struct thread *td = curthread;
+	struct thread *td;
 	int last_work_seen;
 	int net_worklist_len;
 	int syncer_final_iter;
 	int first_printf;
-	int error;
+	int vfslocked;
 
+	mp = arg;
 	last_work_seen = 0;
 	syncer_final_iter = 0;
 	first_printf = 1;
-	syncer_state = SYNCER_RUNNING;
 	starttime = time_uptime;
+	td = curthread;
 	td->td_pflags |= TDP_NORUNNINGBUF;
 
-	EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc,
-	    SHUTDOWN_PRI_LAST);
+	shutdown_tag = EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown,
+	    mp, SHUTDOWN_PRI_LAST);
 
-	mtx_lock(&sync_mtx);
-	for (;;) {
-		if (syncer_state == SYNCER_FINAL_DELAY &&
-		    syncer_final_iter == 0) {
-			mtx_unlock(&sync_mtx);
-			kproc_suspend_check(td->td_proc);
-			mtx_lock(&sync_mtx);
-		}
-		net_worklist_len = syncer_worklist_len - sync_vnode_count;
-		if (syncer_state != SYNCER_RUNNING &&
+	vfslocked = VFS_LOCK_GIANT(mp);
+	MNT_ILOCK(mp);
+	while (mp->mnt_sync_state != SYNCER_TO_DIE) {
+		MPASS(mp->mnt_sync_state != SYNCER_DIED);
+		net_worklist_len = mp->mnt_sync_worklist_len -
+		    mp->mnt_sync_vnode_count;
+		if (mp->mnt_sync_state != SYNCER_RUNNING &&
 		    starttime != time_uptime) {
 			if (first_printf) {
 				printf("\nSyncing disks, vnodes remaining...");
@@ -1771,27 +1793,28 @@
 		 * Skip over empty worklist slots when shutting down.
 		 */
 		do {
-			slp = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno];
-			gslp = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno];
-			syncer_delayno += 1;
-			if (syncer_delayno == syncer_maxdelay)
-				syncer_delayno = 0;
-			next = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno];
-			gnext = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno];
+			slp =
+			    &mp->mnt_sync_workitem_pending[mp->mnt_sync_delayno];
+			mp->mnt_sync_delayno += 1;
+			if (mp->mnt_sync_delayno == SYNCER_MAXDELAY)
+				mp->mnt_sync_delayno = 0;
+			next =
+			    &mp->mnt_sync_workitem_pending[mp->mnt_sync_delayno];
+
 			/*
 			 * If the worklist has wrapped since the
 			 * it was emptied of all but syncer vnodes,
 			 * switch to the FINAL_DELAY state and run
 			 * for one more second.
 			 */
-			if (syncer_state == SYNCER_SHUTTING_DOWN &&
+			if (mp->mnt_sync_state == SYNCER_SHUTTING_DOWN &&
 			    net_worklist_len == 0 &&
-			    last_work_seen == syncer_delayno) {
-				syncer_state = SYNCER_FINAL_DELAY;
+			    last_work_seen == mp->mnt_sync_delayno) {
+				mp->mnt_sync_state = SYNCER_FINAL_DELAY;
 				syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP;
 			}
-		} while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) &&
-		    LIST_EMPTY(gslp) && syncer_worklist_len > 0);
+		} while (mp->mnt_sync_state != SYNCER_RUNNING &&
+		    LIST_EMPTY(slp) && mp->mnt_sync_worklist_len > 0);
 
 		/*
 		 * Keep track of the last time there was anything
@@ -1799,35 +1822,69 @@
 		 * Return to the SHUTTING_DOWN state if any
 		 * new work appears.
 		 */
-		if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING)
-			last_work_seen = syncer_delayno;
-		if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY)
-			syncer_state = SYNCER_SHUTTING_DOWN;
-		while (!LIST_EMPTY(slp)) {
-			error = sync_vnode(slp, &bo, td);
-			if (error == 1) {
+		if (net_worklist_len > 0 ||
+		    mp->mnt_sync_state == SYNCER_RUNNING)
+			last_work_seen = mp->mnt_sync_delayno;
+		if (net_worklist_len > 0 &&
+		    mp->mnt_sync_state == SYNCER_FINAL_DELAY)
+			mp->mnt_sync_state = SYNCER_SHUTTING_DOWN;
+
+		/*
+		 * As long as the mountpoint interlock is already held and
+		 * a simple, non-sleeping, call to vn_start_write() is due,
+		 * it is convenient to just emulate interesting bits
+		 * directly in the code.
+		 * Furthermore, it is already known that the mountpoint is
+		 * not a filesystem bypass, thus VOP_GETWRITEMOUNT()
+		 * inquirying is not necessary and that the syncer thread
+		 * didn't set TDP_IGNSUSP flag, thus this check is skipped.
+		 * Ultimately, the syncer also doesn't need to acquire a
+		 * refcount on the mountpoint because the races against
+		 * unmount are handled via the syncer state movements.
+		 */
+		if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
+
+			/*
+			 * The filesystem is already suspending.
+			 * Move the bufobjs into the next syncer queue
+			 * and skip any further magic.
+			 */
+			while (!LIST_EMPTY(slp)) {
+				bo = LIST_FIRST(slp);
 				LIST_REMOVE(bo, bo_synclist);
 				LIST_INSERT_HEAD(next, bo, bo_synclist);
+			}
+		} else {
+			mp->mnt_writeopcount++;
+
+			/*
+			 * The mountpoint is ready to be written now.
+			 * sync_buflist() may release the mountpoint
+			 * interlock so use a strict loop for dealing with
+			 * lost insertion races.
+			 */
+			while (!LIST_EMPTY(slp))
+				sync_vnode(mp, LIST_FIRST(slp));
+
+			/* Emulate a vn_finished_write(). */
+			mp->mnt_writeopcount--;
+			MPASS(mp->mnt_writeopcount >= 0);
+			if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
+			    mp->mnt_writeopcount == 0)
+				wakeup(&mp->mnt_writeopcount);
+
+			/*
+			 * If an unmount operation started while the mountpoint
+			 * interlock has been released during sync_buflist()
+			 * invocation, quit the syncer thread now.
+			 */
+			if (mp->mnt_sync_state == SYNCER_TO_DIE)
 				continue;
-			}
 		}
-		if (!LIST_EMPTY(gslp)) {
-			mtx_unlock(&sync_mtx);
-			mtx_lock(&Giant);
-			mtx_lock(&sync_mtx);
-			while (!LIST_EMPTY(gslp)) {
-				error = sync_vnode(gslp, &bo, td);
-				if (error == 1) {
-					LIST_REMOVE(bo, bo_synclist);
-					LIST_INSERT_HEAD(gnext, bo,
-					    bo_synclist);
-					continue;
-				}
-			}
-			mtx_unlock(&Giant);
-		}
-		if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0)
+		if (mp->mnt_sync_state == SYNCER_FINAL_DELAY &&
+		    syncer_final_iter > 0)
 			syncer_final_iter--;
+
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
@@ -1838,10 +1895,11 @@
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
-		if (rushjob > 0) {
-			rushjob -= 1;
+		if (mp->mnt_sync_rushjob > 0) {
+			mp->mnt_sync_rushjob -= 1;
 			continue;
 		}
+
 		/*
 		 * Just sleep for a short period of time between
 		 * iterations when shutting down to allow some I/O
@@ -1854,12 +1912,23 @@
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
-		if (syncer_state != SYNCER_RUNNING)
-			cv_timedwait(&sync_wakeup, &sync_mtx,
+		if (mp->mnt_sync_state != SYNCER_RUNNING)
+			cv_timedwait(&mp->mnt_sync_wakeup, MNT_MTX(mp),
 			    hz / SYNCER_SHUTDOWN_SPEEDUP);
 		else if (time_uptime == starttime)
-			cv_timedwait(&sync_wakeup, &sync_mtx, hz);
+			cv_timedwait(&mp->mnt_sync_wakeup, MNT_MTX(mp), hz);
 	}
+
+	/*
+	 * If this point is reached, an unmount request is in progress.
+	 * Wakeup the unmount thread and close the syncer.
+	 */
+	mp->mnt_sync_state = SYNCER_DIED;
+	wakeup(&mp->mnt_sync_thr);
+	MNT_IUNLOCK(mp);
+	VFS_UNLOCK_GIANT(vfslocked);
+	EVENTHANDLER_DEREGISTER(shutdown_pre_sync, shutdown_tag);
+	kthread_exit();
 }
 
 /*
@@ -1870,6 +1939,7 @@
 int
 speedup_syncer(void)
 {
+#if 0
 	int ret = 0;
 
 	mtx_lock(&sync_mtx);
@@ -1881,6 +1951,8 @@
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	return (ret);
+#endif
+	return (0);
 }
 
 /*
@@ -1890,15 +1962,17 @@
 static void
 syncer_shutdown(void *arg, int howto)
 {
+	struct mount *mp;
 
+	mp = arg;
 	if (howto & RB_NOSYNC)
 		return;
-	mtx_lock(&sync_mtx);
-	syncer_state = SYNCER_SHUTTING_DOWN;
-	rushjob = 0;
-	mtx_unlock(&sync_mtx);
-	cv_broadcast(&sync_wakeup);
-	kproc_shutdown(arg, howto);
+	MNT_ILOCK(mp);
+	mp->mnt_sync_state = SYNCER_SHUTTING_DOWN;
+	mp->mnt_sync_rushjob = 0;
+	MNT_IUNLOCK(mp);
+	cv_broadcast(&mp->mnt_sync_wakeup);
+	kthread_shutdown(arg, howto);
 }
 
 /*
@@ -1909,6 +1983,7 @@
 void
 reassignbuf(struct buf *bp)
 {
+	struct mount *mp;
 	struct vnode *vp;
 	struct bufobj *bo;
 	int delay;
@@ -1917,6 +1992,7 @@
 #endif
 
 	vp = bp->b_vp;
+	mp = vp->v_mount;
 	bo = bp->b_bufobj;
 	++reassignbufcalls;
 
@@ -1931,12 +2007,21 @@
 
 	/*
 	 * Delete from old vnode list, if on one.
+	 *
+	 * Lock the mountpoint now in order to avoid a LOR with the bufobj lock
+	 * as we may be needing to insert a dirty buffer within the appropriate
+	 * syncer pending worklist.
 	 */
+	MNT_ILOCK(mp);
 	BO_LOCK(bo);
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		buf_vlist_remove(bp);
-	else
+	else {
+		BO_UNLOCK(bo);
+		MNT_IUNLOCK(mp);
 		panic("reassignbuf: Buffer %p not on queue.", bp);
+	}
+
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
@@ -1953,17 +2038,15 @@
 			default:
 				delay = filedelay;
 			}
-			vn_syncer_add_to_worklist(bo, delay);
+			vn_syncer_add_to_worklist(mp, bo, delay);
 		}
 		buf_vlist_add(bp, bo, BX_VNDIRTY);
 	} else {
 		buf_vlist_add(bp, bo, BX_VNCLEAN);
 
 		if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
-			mtx_lock(&sync_mtx);
 			LIST_REMOVE(bo, bo_synclist);
-			syncer_worklist_len--;
-			mtx_unlock(&sync_mtx);
+			mp->mnt_sync_worklist_len--;
 			bo->bo_flag &= ~BO_ONWORKLST;
 		}
 	}
@@ -1984,6 +2067,7 @@
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 #endif
 	BO_UNLOCK(bo);
+	MNT_IUNLOCK(mp);
 }
 
 /*
@@ -3369,23 +3453,23 @@
 	 * are mounted at once.
 	 */
 	next += incr;
-	if (next == 0 || next > syncer_maxdelay) {
+	if (next == 0 || next > SYNCER_MAXDELAY) {
 		start /= 2;
 		incr /= 2;
 		if (start == 0) {
-			start = syncer_maxdelay / 2;
-			incr = syncer_maxdelay;
+			start = SYNCER_MAXDELAY / 2;
+			incr = SYNCER_MAXDELAY;
 		}
 		next = start;
 	}
 	bo = &vp->v_bufobj;
+	MNT_ILOCK(mp);
 	BO_LOCK(bo);
-	vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0);
-	/* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */
-	mtx_lock(&sync_mtx);
-	sync_vnode_count++;
-	mtx_unlock(&sync_mtx);
+	vn_syncer_add_to_worklist(mp, bo,
+	    syncdelay > 0 ? next % syncdelay : 0);
+	mp->mnt_sync_vnode_count++;
 	BO_UNLOCK(bo);
+	MNT_IUNLOCK(mp);
 	mp->mnt_syncer = vp;
 	return (0);
 }
@@ -3411,9 +3495,11 @@
 	 * Move ourselves to the back of the sync list.
 	 */
 	bo = &syncvp->v_bufobj;
+	MNT_ILOCK(mp);
 	BO_LOCK(bo);
-	vn_syncer_add_to_worklist(bo, syncdelay);
+	vn_syncer_add_to_worklist(mp, bo, syncdelay);
 	BO_UNLOCK(bo);
+	MNT_IUNLOCK(mp);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
@@ -3463,21 +3549,24 @@
 static int
 sync_reclaim(struct vop_reclaim_args *ap)
 {
-	struct vnode *vp = ap->a_vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct bufobj *bo;
 
+	vp = ap->a_vp;
+	mp = vp->v_mount;
 	bo = &vp->v_bufobj;
+	MNT_ILOCK(mp);
 	BO_LOCK(bo);
 	vp->v_mount->mnt_syncer = NULL;
 	if (bo->bo_flag & BO_ONWORKLST) {
-		mtx_lock(&sync_mtx);
 		LIST_REMOVE(bo, bo_synclist);
-		syncer_worklist_len--;
-		sync_vnode_count--;
-		mtx_unlock(&sync_mtx);
+		mp->mnt_sync_worklist_len--;
+		mp->mnt_sync_vnode_count--;
 		bo->bo_flag &= ~BO_ONWORKLST;
 	}
 	BO_UNLOCK(bo);
+	MNT_IUNLOCK(mp);
 
 	return (0);
 }
Index: sys/sys/mount.h
===================================================================
--- sys/sys/mount.h	(revision 205589)
+++ sys/sys/mount.h	(working copy)
@@ -36,6 +36,7 @@
 #include <sys/ucred.h>
 #include <sys/queue.h>
 #ifdef _KERNEL
+#include <sys/condvar.h>
 #include <sys/lock.h>
 #include <sys/lockmgr.h>
 #include <sys/_mutex.h>
@@ -128,6 +129,7 @@
 };
 
 TAILQ_HEAD(vnodelst, vnode);
+LIST_HEAD(synclist, bufobj);
 
 /* Mount options list */
 TAILQ_HEAD(vfsoptlist, vfsopt);
@@ -187,6 +189,21 @@
 #define	mnt_endzero	mnt_gjprovider
 	char		*mnt_gjprovider;	/* gjournal provider name */
 	struct lock	mnt_explock;		/* vfs_export walkers lock */
+	struct cv	mnt_sync_wakeup;
+	struct thread	*mnt_sync_thr;
+	struct synclist	*mnt_sync_workitem_pending;
+	int		mnt_sync_delayno;
+	int		mnt_sync_vnode_count;
+	int		mnt_sync_worklist_len;
+	int		mnt_sync_rushjob;
+	enum {
+		SYNCER_RUNNING,
+		SYNCER_SHUTTING_DOWN,
+		SYNCER_FINAL_DELAY,
+		SYNCER_TO_DIE,
+		SYNCER_DIED
+	}		mnt_sync_state;
+	int		mnt_sync_attached;
 };
 
 struct vnode *__mnt_vnode_next(struct vnode **mvp, struct mount *mp);
@@ -207,6 +224,7 @@
 	  MNT_IUNLOCK(mp);						\
 	} while (0)
 
+#define	ASSERT_MP_ILOCKED(mp)	mtx_assert(&(mp)->mnt_mtx, MA_OWNED)
 #define	MNT_ILOCK(mp)	mtx_lock(&(mp)->mnt_mtx)
 #define	MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx)
 #define	MNT_IUNLOCK(mp)	mtx_unlock(&(mp)->mnt_mtx)
@@ -724,6 +742,10 @@
 	    const char *value);
 int	vfs_setpublicfs			    /* set publicly exported fs */
 	    (struct mount *, struct netexport *, struct export_args *);
+void	vfs_syncer_attach(struct mount *mp, struct vfsconf *vfsp);
+void	vfs_syncer_detach(struct mount *mp);
+void	vfs_syncer_destroy(struct mount *mp);
+void	vfs_syncer_init(struct mount *mp);
 void	vfs_msync(struct mount *, int);
 int	vfs_busy(struct mount *, int);
 int	vfs_export			 /* process mount export info */