Index: sys/ufs/ffs/ffs_vfsops.c =================================================================== --- sys/ufs/ffs/ffs_vfsops.c (revision 206210) +++ sys/ufs/ffs/ffs_vfsops.c (working copy) @@ -1376,10 +1376,12 @@ #endif devvp = ump->um_devvp; bo = &devvp->v_bufobj; + MNT_ILOCK(mp); BO_LOCK(bo); if (waitfor != MNT_LAZY && (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) { BO_UNLOCK(bo); + MNT_IUNLOCK(mp); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0) allerror = error; @@ -1396,12 +1398,14 @@ secondary_writes, secondary_accwrites) != 0) goto loop; /* More work needed */ - mtx_assert(MNT_MTX(mp), MA_OWNED); + ASSERT_MP_ILOCKED(mp); mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; MNT_IUNLOCK(mp); suspended = 1; - } else + } else { BO_UNLOCK(bo); + MNT_IUNLOCK(mp); + } /* * Write back modified superblock. */ Index: sys/ufs/ffs/ffs_softdep.c =================================================================== --- sys/ufs/ffs/ffs_softdep.c (revision 206210) +++ sys/ufs/ffs/ffs_softdep.c (working copy) @@ -357,19 +357,19 @@ struct bufobj *bo; int error; + ASSERT_MP_ILOCKED(mp); + (void) softdep_deps, (void) softdep_accdeps; bo = &devvp->v_bufobj; ASSERT_BO_LOCKED(bo); - MNT_ILOCK(mp); while (mp->mnt_secondary_writes != 0) { BO_UNLOCK(bo); - msleep(&mp->mnt_secondary_writes, MNT_MTX(mp), - (PUSER - 1) | PDROP, "secwr", 0); + msleep(&mp->mnt_secondary_writes, MNT_MTX(mp), PUSER - 1, + "secwr", 0); BO_LOCK(bo); - MNT_ILOCK(mp); } /* @@ -539,7 +539,7 @@ static int process_worklist_item(struct mount *, int); static void add_to_worklist(struct worklist *); static void softdep_flush(void); -static int softdep_speedup(void); +static int softdep_speedup(struct mount *mp); /* * Exported softdep operations. @@ -771,7 +771,7 @@ } static int -softdep_speedup(void) +softdep_speedup(struct mount *mp) { mtx_assert(&lk, MA_OWNED); @@ -780,7 +780,7 @@ wakeup(&req_pending); } - return speedup_syncer(); + return VFS_SYNCER_SPEEDUP(mp); } /* @@ -5780,7 +5780,7 @@ return (0); } if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps) - softdep_speedup(); + softdep_speedup(vp->v_mount); stat_sync_limit_hit += 1; FREE_LOCK(&lk); return (1); @@ -5882,7 +5882,7 @@ * Next, we attempt to speed up the syncer process. If that * is successful, then we allow the process to continue. */ - if (softdep_speedup() && resource != FLUSH_REMOVE_WAIT) + if (softdep_speedup(mp) && resource != FLUSH_REMOVE_WAIT) return(0); /* * If we are resource constrained on inode dependencies, try @@ -6262,25 +6262,31 @@ struct ufsmount *ump; int error; + ASSERT_MP_ILOCKED(mp); + ump = VFSTOUFS(mp); bo = &devvp->v_bufobj; ASSERT_BO_LOCKED(bo); for (;;) { + + /* + * XXX: That pattern seems bogus, it must be re-evaluated. + */ if (!TRY_ACQUIRE_LOCK(&lk)) { BO_UNLOCK(bo); + MNT_IUNLOCK(mp); ACQUIRE_LOCK(&lk); FREE_LOCK(&lk); + MNT_ILOCK(mp); BO_LOCK(bo); continue; } - MNT_ILOCK(mp); if (mp->mnt_secondary_writes != 0) { FREE_LOCK(&lk); BO_UNLOCK(bo); - msleep(&mp->mnt_secondary_writes, - MNT_MTX(mp), - (PUSER - 1) | PDROP, "secwr", 0); + msleep(&mp->mnt_secondary_writes, MNT_MTX(mp), + PUSER - 1, "secwr", 0); BO_LOCK(bo); continue; } Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c (revision 206210) +++ sys/kern/vfs_default.c (working copy) @@ -846,6 +846,13 @@ * used to fill the vfs function table to get reasonable default return values. */ int +vfs_eopnotsupp(void) +{ + + return (EOPNOTSUPP); +} + +int vfs_stdroot (mp, flags, vpp) struct mount *mp; int flags; Index: sys/kern/vfs_init.c =================================================================== --- sys/kern/vfs_init.c (revision 206210) +++ sys/kern/vfs_init.c (working copy) @@ -229,6 +229,16 @@ vfsops->vfs_extattrctl = vfs_stdextattrctl; if (vfsops->vfs_sysctl == NULL) vfsops->vfs_sysctl = vfs_stdsysctl; + if (vfsops->vfs_syncer_init == NULL) + vfsops->vfs_syncer_init = vfs_stdsyncer_init; + if (vfsops->vfs_syncer_destroy == NULL) + vfsops->vfs_syncer_destroy = vfs_stdsyncer_destroy; + if (vfsops->vfs_syncer_attach == NULL) + vfsops->vfs_syncer_attach = vfs_stdsyncer_attach; + if (vfsops->vfs_syncer_detach == NULL) + vfsops->vfs_syncer_detach = vfs_stdsyncer_detach; + if (vfsops->vfs_syncer_speedup == NULL) + vfsops->vfs_syncer_speedup = vfs_stdsyncer_speedup; /* * Call init function for this VFS... Index: sys/kern/vfs_mount.c =================================================================== --- sys/kern/vfs_mount.c (revision 206210) +++ sys/kern/vfs_mount.c (working copy) @@ -505,6 +505,7 @@ mac_mount_create(cred, mp); #endif arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); + VFS_SYNCER_INIT(mp); return (mp); } @@ -543,6 +544,7 @@ if (mp->mnt_lockref != 0) panic("vfs_mount_destroy: nonzero lock refcount"); MNT_IUNLOCK(mp); + VFS_SYNCER_DESTROY(mp); #ifdef MAC mac_mount_destroy(mp); #endif @@ -1034,14 +1036,10 @@ else mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); - if ((mp->mnt_flag & MNT_RDONLY) == 0) { - if (mp->mnt_syncer == NULL) - error = vfs_allocate_syncvnode(mp); - } else { - if (mp->mnt_syncer != NULL) - vrele(mp->mnt_syncer); - mp->mnt_syncer = NULL; - } + if ((mp->mnt_flag & MNT_RDONLY) == 0) + VFS_SYNCER_ATTACH(mp, vfsp); + else + VFS_SYNCER_DETACH(mp); vfs_unbusy(mp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; @@ -1078,10 +1076,8 @@ mountcheckdirs(vp, newdp); vrele(newdp); if ((mp->mnt_flag & MNT_RDONLY) == 0) - error = vfs_allocate_syncvnode(mp); + VFS_SYNCER_ATTACH(mp, vfsp); vfs_unbusy(mp); - if (error) - vrele(vp); } else { vfs_unbusy(mp); vfs_mount_destroy(mp); @@ -1269,8 +1265,8 @@ mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); cache_purgevfs(mp); /* remove cache entries for this file sys */ - if (mp->mnt_syncer != NULL) - vrele(mp->mnt_syncer); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + VFS_SYNCER_DETACH(mp); /* * For forced unmounts, move process cdir/rdir refs on the fs root * vnode to the covered vnode. For non-forced unmounts we want @@ -1309,9 +1305,9 @@ } MNT_ILOCK(mp); mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ; - if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) { + if ((mp->mnt_flag & MNT_RDONLY) == 0) { MNT_IUNLOCK(mp); - (void) vfs_allocate_syncvnode(mp); + VFS_SYNCER_ATTACH(mp, NULL); MNT_ILOCK(mp); } mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c (revision 206210) +++ sys/kern/vfs_subr.c (working copy) @@ -89,15 +89,47 @@ #include #endif -#define WI_MPSAFEQ 0 -#define WI_GIANTQ 1 +#define SYNCER_MAXDELAY 32 +#define SYNCER_MASK (SYNCER_MAXDELAY - 1) +LIST_HEAD(synclist, bufobj); + +/* + * Private rappresentation of the standard syncer, used to cater the + * buffer cache consumers. + */ +struct mntpriv { + struct synclist sss_workitem_pending[SYNCER_MAXDELAY]; + struct cv sss_wakeup; + struct thread *sss_thr; + volatile int sss_rushjob; + int sss_delayno; + int sss_worklist_len; + int sss_attached; + enum { + SYNCER_RUNNING, + SYNCER_SHUTTING_DOWN, + SYNCER_FINAL_DELAY, + SYNCER_TO_DIE, + SYNCER_DIED + } sss_state; +}; + +#define mnt_sync_wakeup mnt_syncpriv->sss_wakeup +#define mnt_sync_thr mnt_syncpriv->sss_thr +#define mnt_sync_workitem_pending mnt_syncpriv->sss_workitem_pending +#define mnt_sync_delayno mnt_syncpriv->sss_delayno +#define mnt_sync_worklist_len mnt_syncpriv->sss_worklist_len +#define mnt_sync_rushjob mnt_syncpriv->sss_rushjob +#define mnt_sync_attached mnt_syncpriv->sss_attached +#define mnt_sync_state mnt_syncpriv->sss_state + static MALLOC_DEFINE(M_NETADDR, "subr_export_host", "Export host address structure"); static void delmntque(struct vnode *vp); static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo); -static void syncer_shutdown(void *arg, int howto); +static void sched_sync(void *arg); static int vtryrecycle(struct vnode *vp); static void vbusy(struct vnode *vp); static void vinactive(struct vnode *, struct thread *); @@ -112,6 +144,9 @@ static void vfs_knlunlock(void *arg); static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); +static void vfs_stdsyncer_addentry(struct mount *mp, struct bufobj *bo, + int delay); +static void vfs_stdsyncer_rementry(struct mount *mp, struct bufobj *bo); static void destroy_vpollinfo(struct vpollinfo *vi); /* @@ -191,69 +226,20 @@ /* Set to 1 to print out reclaim of active vnodes */ int prtactive; -/* - * The workitem queue. - * - * It is useful to delay writes of file data and filesystem metadata - * for tens of seconds so that quickly created and deleted files need - * not waste disk bandwidth being created and removed. To realize this, - * we append vnodes to a "workitem" queue. When running with a soft - * updates implementation, most pending metadata dependencies should - * not wait for more than a few seconds. Thus, mounted on block devices - * are delayed only about a half the time that file data is delayed. - * Similarly, directory updates are more critical, so are only delayed - * about a third the time that file data is delayed. Thus, there are - * SYNCER_MAXDELAY queues that are processed round-robin at a rate of - * one each second (driven off the filesystem syncer process). The - * syncer_delayno variable indicates the next queue that is to be processed. - * Items that need to be processed soon are placed in this queue: - * - * syncer_workitem_pending[syncer_delayno] - * - * A delay of fifteen seconds is done by placing the request fifteen - * entries later in the queue: - * - * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] - * - */ -static int syncer_delayno; -static long syncer_mask; -LIST_HEAD(synclist, bufobj); -static struct synclist *syncer_workitem_pending[2]; -/* - * The sync_mtx protects: - * bo->bo_synclist - * sync_vnode_count - * syncer_delayno - * syncer_state - * syncer_workitem_pending - * syncer_worklist_len - * rushjob - */ -static struct mtx sync_mtx; -static struct cv sync_wakeup; +/* Per-type delays, catering the standard syncer. */ +#define SYNCDELAY 30 -#define SYNCER_MAXDELAY 32 -static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ -static int syncdelay = 30; /* max time to delay syncing data */ static int filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); static int dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); static int metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); -static int rushjob; /* number of slots to run ASAP */ -static int stat_rush_requests; /* number of times I/O speeded up */ -SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); /* * When shutting down the syncer, run it at four times normal speed. */ #define SYNCER_SHUTDOWN_SPEEDUP 4 -static int sync_vnode_count; -static int syncer_worklist_len; -static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } - syncer_state; /* * Number of vnodes we want to exist at any one time. This is mostly used @@ -313,16 +299,6 @@ NULL, NULL, UMA_ALIGN_PTR, 0); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - /* - * Initialize the filesystem syncer. - */ - syncer_workitem_pending[WI_MPSAFEQ] = hashinit(syncer_maxdelay, M_VNODE, - &syncer_mask); - syncer_workitem_pending[WI_GIANTQ] = hashinit(syncer_maxdelay, M_VNODE, - &syncer_mask); - syncer_maxdelay = syncer_mask + 1; - mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); - cv_init(&sync_wakeup, "syncer"); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL); @@ -521,6 +497,141 @@ mtx_unlock(&mntid_mtx); } +void +vfs_stdsyncer_init(struct mount *mp) +{ + + MPASS(mp != NULL); + + mp->mnt_syncpriv = malloc(sizeof(struct mntpriv), M_TEMP, + M_NOWAIT); + if (mp->mnt_syncpriv == NULL) + panic("%s: ENOMEM for the mp(%p) syncer private section", + __func__, mp); + cv_init(&mp->mnt_sync_wakeup, "syncer"); + + CTR3(KTR_SPARE2, + "%s: initializing syncer private datas %p for mp %p", __func__, + mp->mnt_syncpriv, mp); +} + +void +vfs_stdsyncer_destroy(struct mount *mp) +{ + + MPASS(mp != NULL); + CTR3(KTR_SPARE2, + "%s: destroying syncer private datas for mp %p with %p section", + __func__, mp, mp->mnt_syncpriv); + + if (mp->mnt_syncpriv != NULL) { + cv_destroy(&mp->mnt_sync_wakeup); + free(mp->mnt_syncpriv, M_TEMP); + mp->mnt_syncpriv = NULL; + } +} + +void +vfs_stdsyncer_attach(struct mount *mp, struct vfsconf *vfsp) +{ + int error; + + MPASS(mp != NULL); + MPASS(mp->mnt_syncpriv != NULL); + + MNT_ILOCK(mp); + if (mp->mnt_sync_attached != 0) { + MNT_IUNLOCK(mp); + return; + } + if (vfsp == NULL) { + vfsp = mp->mnt_vfc; + MPASS(vfsp != NULL); + } + memset(&mp->mnt_sync_workitem_pending, 0, + sizeof(mp->mnt_sync_workitem_pending)); + mp->mnt_sync_delayno = 0; + mp->mnt_sync_worklist_len = 0; + mp->mnt_sync_rushjob = 0; + mp->mnt_sync_state = SYNCER_RUNNING; + mp->mnt_sync_attached = 1; + MNT_IUNLOCK(mp); + error = kthread_add(sched_sync, mp, NULL, &mp->mnt_sync_thr, 0, + 0, "syncer %s:%d", vfsp->vfc_name, vfsp->vfc_refcount); + if (error != 0) + panic("%s: kthread_add() failed with %d errno", __func__, + error); + + CTR3(KTR_SPARE2, + "%s: attaching the syncer for mp %p to private section %p", + __func__, mp, mp->mnt_syncpriv); +} + +void +vfs_stdsyncer_detach(struct mount *mp) +{ + + MPASS(mp != NULL); + MPASS(mp->mnt_syncpriv != NULL); + CTR3(KTR_SPARE2, + "%s: detaching the syncer for mp %p to private section %p", + __func__, mp, mp->mnt_syncpriv); + + MNT_ILOCK(mp); + if (mp->mnt_sync_attached == 0) { + MNT_IUNLOCK(mp); + CTR2(KTR_SPARE2, + "%s: the syncer for mp %p is already detached", __func__, + mp); + return; + } + + /* Until the syncer thread is not entirely closed, stop the unmount. */ + MPASS(mp->mnt_sync_state != SYNCER_TO_DIE); + while (mp->mnt_sync_state != SYNCER_DIED) { + mp->mnt_sync_state = SYNCER_TO_DIE; + + /* + * Wakeup the possible sleeping syncer in order to speedup the + * operation. + */ + cv_broadcast(&mp->mnt_sync_wakeup); + msleep(&mp->mnt_sync_thr, MNT_MTX(mp), PVFS, "syncer drain", 0); + } + mp->mnt_sync_attached = 0; + MNT_IUNLOCK(mp); +} + +int +vfs_stdsyncer_speedup(struct mount *mp) +{ + int crushjob, ret; + + MPASS(mp != NULL); + MPASS(mp->mnt_syncpriv != NULL); + CTR3(KTR_SPARE2, + "%s: speeding up the syncer for mp %p to private section %p", + __func__, mp, mp->mnt_syncpriv); + + /* + * We never push it to speed up more than half of its normal turn + * time, otherwise it could take over the cpu. + * Use a lock-less algorithm in order to avoid any possible lock + * constraint. + */ + do { + crushjob = mp->mnt_sync_rushjob; + if (crushjob >= SYNCDELAY / 2) { + ret = 0; + break; + } + ret = 1; + } while (!atomic_cmpset_int(&mp->mnt_sync_rushjob, crushjob, + crushjob + 1)); + cv_broadcast(&mp->mnt_sync_wakeup); + return (ret); +} + /* * Knob to control the precision of file timestamps: * @@ -768,9 +879,6 @@ int done, vfslocked; struct proc *p = vnlruproc; - EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, p, - SHUTDOWN_PRI_FIRST); - for (;;) { kproc_suspend_check(p); mtx_lock(&vnode_free_list_mtx); @@ -1590,6 +1698,7 @@ brelvp(struct buf *bp) { struct bufobj *bo; + struct mount *mp; struct vnode *vp; CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); @@ -1598,20 +1707,20 @@ /* * Delete from old vnode list, if on one. */ - vp = bp->b_vp; /* XXX */ + vp = bp->b_vp; + mp = vp->v_mount; bo = bp->b_bufobj; + MNT_ILOCK(mp); BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); - else + else { + BO_UNLOCK(bo); + MNT_IUNLOCK(mp); panic("brelvp: Buffer %p not on queue.", bp); - if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { - bo->bo_flag &= ~BO_ONWORKLST; - mtx_lock(&sync_mtx); - LIST_REMOVE(bo, bo_synclist); - syncer_worklist_len--; - mtx_unlock(&sync_mtx); } + vfs_stdsyncer_rementry(mp, bo); + MNT_IUNLOCK(mp); bp->b_flags &= ~B_NEEDSGIANT; bp->b_vp = NULL; bp->b_bufobj = NULL; @@ -1620,141 +1729,142 @@ } /* - * Add an item to the syncer work queue. + * Remove an item from the syncer work queue. */ static void -vn_syncer_add_to_worklist(struct bufobj *bo, int delay) +vfs_stdsyncer_rementry(struct mount *mp, struct bufobj *bo) { - int queue, slot; + MPASS(mp != NULL && bo != NULL); + MPASS(mp->mnt_syncpriv != NULL); + ASSERT_MP_ILOCKED(mp); ASSERT_BO_LOCKED(bo); - mtx_lock(&sync_mtx); - if (bo->bo_flag & BO_ONWORKLST) + if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { LIST_REMOVE(bo, bo_synclist); - else { - bo->bo_flag |= BO_ONWORKLST; - syncer_worklist_len++; + mp->mnt_sync_worklist_len--; + bo->bo_flag &= ~BO_ONWORKLST; } - - if (delay > syncer_maxdelay - 2) - delay = syncer_maxdelay - 2; - slot = (syncer_delayno + delay) & syncer_mask; - - queue = VFS_NEEDSGIANT(bo->__bo_vnode->v_mount) ? WI_GIANTQ : - WI_MPSAFEQ; - LIST_INSERT_HEAD(&syncer_workitem_pending[queue][slot], bo, - bo_synclist); - mtx_unlock(&sync_mtx); } -static int -sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS) +/* + * Add an item to the syncer work queue. + */ +static void +vfs_stdsyncer_addentry(struct mount *mp, struct bufobj *bo, int delay) { - int error, len; + int slot; - mtx_lock(&sync_mtx); - len = syncer_worklist_len - sync_vnode_count; - mtx_unlock(&sync_mtx); - error = SYSCTL_OUT(req, &len, sizeof(len)); - return (error); + MPASS(mp != NULL && bo != NULL); + MPASS(mp->mnt_syncpriv != NULL); + ASSERT_MP_ILOCKED(mp); + + if (bo->bo_flag & BO_ONWORKLST) + LIST_REMOVE(bo, bo_synclist); + else { + bo->bo_flag |= BO_ONWORKLST; + mp->mnt_sync_worklist_len++; + } + if (delay > SYNCER_MAXDELAY - 2) + delay = SYNCER_MAXDELAY - 2; + slot = (mp->mnt_sync_delayno + delay) & SYNCER_MASK; + LIST_INSERT_HEAD(&mp->mnt_sync_workitem_pending[slot], bo, bo_synclist); } -SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0, - sysctl_vfs_worklist_len, "I", "Syncer thread worklist length"); - -static struct proc *updateproc; -static void sched_sync(void); -static struct kproc_desc up_kp = { - "syncer", - sched_sync, - &updateproc -}; -SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp); - -static int -sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) +/* + * Syncs on the filesystem a vnode (feeded through its bufobj). + * It does expects a mountpoint locked and correctly referenced, and a + * valid bufobj. + */ +static void +sync_vnode(struct mount *mp, struct bufobj *bo) { struct vnode *vp; - struct mount *mp; - *bo = LIST_FIRST(slp); - if (*bo == NULL) - return (0); - vp = (*bo)->__bo_vnode; /* XXX */ - if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0) - return (1); + MPASS(mp != NULL && bo != NULL); + MPASS(mp->mnt_syncpriv != NULL); + ASSERT_MP_ILOCKED(mp); + MPASS(mp->mnt_writeopcount > 0); + + vp = bo->__bo_vnode; + /* - * We use vhold in case the vnode does not - * successfully sync. vhold prevents the vnode from - * going away when we unlock the sync_mtx so that - * we can acquire the vnode interlock. + * vhold() the vnode linked to the bufobj in order to avoid + * possible recycling when releasing the mountpoint interlock. */ - vholdl(vp); - mtx_unlock(&sync_mtx); - VI_UNLOCK(vp); - if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { - vdrop(vp); - mtx_lock(&sync_mtx); - return (*bo == LIST_FIRST(slp)); - } + vhold(vp); + MNT_IUNLOCK(mp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - (void) VOP_FSYNC(vp, MNT_LAZY, td); + VOP_FSYNC(vp, MNT_LAZY, curthread); VOP_UNLOCK(vp, 0); - vn_finished_write(mp); - BO_LOCK(*bo); - if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { + MNT_ILOCK(mp); + if ((bo->bo_flag & BO_ONWORKLST) != 0) { + /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ - vn_syncer_add_to_worklist(*bo, syncdelay); + vfs_stdsyncer_addentry(mp, bo, SYNCDELAY); } - BO_UNLOCK(*bo); vdrop(vp); - mtx_lock(&sync_mtx); - return (0); } /* - * System filesystem synchronizer daemon. + * Private implementation of the standard syncer. + * + * It is useful to delay writes of file data and filesystem metadata + * for tens of seconds so that quickly created and deleted files need + * not waste disk bandwidth being created and removed. To realize this, + * we append vnodes to a "workitem" queue. When running with a soft + * updates implementation, most pending metadata dependencies should + * not wait for more than a few seconds. Thus, mounted on block devices + * are delayed only about a half the time that file data is delayed. + * Similarly, directory updates are more critical, so are only delayed + * about a third the time that file data is delayed. Thus, there are + * SYNCER_MAXDELAY queues that are processed round-robin at a rate of + * one each second (driven off the filesystem syncer process). The + * syncer_delayno variable indicates the next queue that is to be processed. + * Items that need to be processed soon are placed in this queue: + * + * syncer_workitem_pending[syncer_delayno] + * + * A delay of fifteen seconds is done by placing the request fifteen + * entries later in the queue: + * + * syncer_workitem_pending[(syncer_delayno + 15) & SYNCER_MASK] + * */ static void -sched_sync(void) +sched_sync(void *arg) { - struct synclist *gnext, *next; - struct synclist *gslp, *slp; - struct bufobj *bo; long starttime; - struct thread *td = curthread; - int last_work_seen; - int net_worklist_len; - int syncer_final_iter; + struct mount *mp; + struct synclist *next, *slp; + struct bufobj *bo; + struct thread *td; + int last_work_seen, net_worklist_len, syncer_final_iter; + int crushjob, rushed; int first_printf; - int error; + int vfslocked; + mp = arg; last_work_seen = 0; syncer_final_iter = 0; first_printf = 1; - syncer_state = SYNCER_RUNNING; starttime = time_uptime; + td = curthread; td->td_pflags |= TDP_NORUNNINGBUF; - EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, - SHUTDOWN_PRI_LAST); - - mtx_lock(&sync_mtx); - for (;;) { - if (syncer_state == SYNCER_FINAL_DELAY && - syncer_final_iter == 0) { - mtx_unlock(&sync_mtx); - kproc_suspend_check(td->td_proc); - mtx_lock(&sync_mtx); - } - net_worklist_len = syncer_worklist_len - sync_vnode_count; - if (syncer_state != SYNCER_RUNNING && + vfslocked = VFS_LOCK_GIANT(mp); + MNT_ILOCK(mp); + while (mp->mnt_sync_state != SYNCER_TO_DIE) { + MPASS(mp != NULL); + MPASS(mp->mnt_syncpriv != NULL); + MPASS(mp->mnt_sync_state != SYNCER_DIED); + net_worklist_len = mp->mnt_sync_worklist_len; + if (mp->mnt_sync_state != SYNCER_RUNNING && starttime != time_uptime) { if (first_printf) { printf("\nSyncing disks, vnodes remaining..."); @@ -1771,27 +1881,26 @@ * Skip over empty worklist slots when shutting down. */ do { - slp = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno]; - gslp = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno]; - syncer_delayno += 1; - if (syncer_delayno == syncer_maxdelay) - syncer_delayno = 0; - next = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno]; - gnext = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno]; + slp = &mp->mnt_sync_workitem_pending[mp->mnt_sync_delayno]; + mp->mnt_sync_delayno += 1; + if (mp->mnt_sync_delayno == SYNCER_MAXDELAY) + mp->mnt_sync_delayno = 0; + next = &mp->mnt_sync_workitem_pending[mp->mnt_sync_delayno]; + /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, * switch to the FINAL_DELAY state and run * for one more second. */ - if (syncer_state == SYNCER_SHUTTING_DOWN && + if (mp->mnt_sync_state == SYNCER_SHUTTING_DOWN && net_worklist_len == 0 && - last_work_seen == syncer_delayno) { - syncer_state = SYNCER_FINAL_DELAY; + last_work_seen == mp->mnt_sync_delayno) { + mp->mnt_sync_state = SYNCER_FINAL_DELAY; syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } - } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && - LIST_EMPTY(gslp) && syncer_worklist_len > 0); + } while (mp->mnt_sync_state != SYNCER_RUNNING && + LIST_EMPTY(slp) && mp->mnt_sync_worklist_len > 0); /* * Keep track of the last time there was anything @@ -1799,35 +1908,81 @@ * Return to the SHUTTING_DOWN state if any * new work appears. */ - if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) - last_work_seen = syncer_delayno; - if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) - syncer_state = SYNCER_SHUTTING_DOWN; - while (!LIST_EMPTY(slp)) { - error = sync_vnode(slp, &bo, td); - if (error == 1) { + if (net_worklist_len > 0 || + mp->mnt_sync_state == SYNCER_RUNNING) + last_work_seen = mp->mnt_sync_delayno; + if (net_worklist_len > 0 && + mp->mnt_sync_state == SYNCER_FINAL_DELAY) + mp->mnt_sync_state = SYNCER_SHUTTING_DOWN; + + /* + * As long as the mountpoint interlock is already held and + * a simple, non-sleeping, call to vn_start_write() is due, + * it is convenient to just emulate interesting bits + * directly in the code. + * Furthermore, it is already known that the mountpoint is + * not a filesystem bypass, thus VOP_GETWRITEMOUNT() + * inquirying is not necessary and that the syncer thread + * didn't set TDP_IGNSUSP flag, thus this check is skipped. + * Ultimately, the syncer also doesn't need to acquire a + * refcount on the mountpoint because the races against + * unmount are handled via the syncer state movements. + */ + if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { + + /* + * The filesystem is already suspending. + * Move the bufobjs into the next syncer queue + * and skip any further magic. + */ + while (!LIST_EMPTY(slp)) { + bo = LIST_FIRST(slp); LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(next, bo, bo_synclist); - continue; } + } else { + mp->mnt_writeopcount++; + mp->mnt_noasync++; + mp->mnt_kern_flag &= ~MNTK_ASYNC; + + /* + * The mountpoint is ready to be written now. + * sync_buflist() may release the mountpoint + * interlock so use a strict loop for dealing with + * lost insertion races. + */ + while (!LIST_EMPTY(slp)) + sync_vnode(mp, LIST_FIRST(slp)); + + /* Perform a full filesystem syncing now. */ + MNT_IUNLOCK(mp); + vfs_msync(mp, MNT_NOWAIT); + VFS_SYNC(mp, MNT_LAZY); + MNT_ILOCK(mp); + mp->mnt_noasync--; + if ((mp->mnt_flag & MNT_ASYNC) != 0 && + mp->mnt_noasync == 0) + mp->mnt_kern_flag |= MNTK_ASYNC; + + /* Emulate a vn_finished_write(). */ + mp->mnt_writeopcount--; + MPASS(mp->mnt_writeopcount >= 0); + if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && + mp->mnt_writeopcount == 0) + wakeup(&mp->mnt_writeopcount); + + /* + * If an unmount operation started while the mountpoint + * interlock has been released quit the syncer + * thread now. + */ + if (mp->mnt_sync_state == SYNCER_TO_DIE) + continue; } - if (!LIST_EMPTY(gslp)) { - mtx_unlock(&sync_mtx); - mtx_lock(&Giant); - mtx_lock(&sync_mtx); - while (!LIST_EMPTY(gslp)) { - error = sync_vnode(gslp, &bo, td); - if (error == 1) { - LIST_REMOVE(bo, bo_synclist); - LIST_INSERT_HEAD(gnext, bo, - bo_synclist); - continue; - } - } - mtx_unlock(&Giant); - } - if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) + if (mp->mnt_sync_state == SYNCER_FINAL_DELAY && + syncer_final_iter > 0) syncer_final_iter--; + /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob @@ -1838,10 +1993,18 @@ * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ - if (rushjob > 0) { - rushjob -= 1; + do { + crushjob = mp->mnt_sync_rushjob; + if (crushjob <= 0) { + rushed = 0; + break; + } + rushed = 1; + } while (!atomic_cmpset_int(&mp->mnt_sync_rushjob, crushjob, + crushjob - 1)); + if (rushed != 0) continue; - } + /* * Just sleep for a short period of time between * iterations when shutting down to allow some I/O @@ -1854,54 +2017,25 @@ * matter as we are just trying to generally pace the * filesystem activity. */ - if (syncer_state != SYNCER_RUNNING) - cv_timedwait(&sync_wakeup, &sync_mtx, + if (mp->mnt_sync_state != SYNCER_RUNNING) + cv_timedwait(&mp->mnt_sync_wakeup, MNT_MTX(mp), hz / SYNCER_SHUTDOWN_SPEEDUP); else if (time_uptime == starttime) - cv_timedwait(&sync_wakeup, &sync_mtx, hz); + cv_timedwait(&mp->mnt_sync_wakeup, MNT_MTX(mp), hz); } -} -/* - * Request the syncer daemon to speed up its work. - * We never push it to speed up more than half of its - * normal turn time, otherwise it could take over the cpu. - */ -int -speedup_syncer(void) -{ - int ret = 0; - - mtx_lock(&sync_mtx); - if (rushjob < syncdelay / 2) { - rushjob += 1; - stat_rush_requests += 1; - ret = 1; - } - mtx_unlock(&sync_mtx); - cv_broadcast(&sync_wakeup); - return (ret); + /* + * If this point is reached, an unmount request is in progress. + * Wakeup the unmount thread and close the syncer. + */ + mp->mnt_sync_state = SYNCER_DIED; + wakeup(&mp->mnt_sync_thr); + MNT_IUNLOCK(mp); + VFS_UNLOCK_GIANT(vfslocked); + kthread_exit(); } /* - * Tell the syncer to speed up its work and run though its work - * list several times, then tell it to shut down. - */ -static void -syncer_shutdown(void *arg, int howto) -{ - - if (howto & RB_NOSYNC) - return; - mtx_lock(&sync_mtx); - syncer_state = SYNCER_SHUTTING_DOWN; - rushjob = 0; - mtx_unlock(&sync_mtx); - cv_broadcast(&sync_wakeup); - kproc_shutdown(arg, howto); -} - -/* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. @@ -1909,6 +2043,7 @@ void reassignbuf(struct buf *bp) { + struct mount *mp; struct vnode *vp; struct bufobj *bo; int delay; @@ -1917,6 +2052,7 @@ #endif vp = bp->b_vp; + mp = vp->v_mount; bo = bp->b_bufobj; ++reassignbufcalls; @@ -1931,12 +2067,21 @@ /* * Delete from old vnode list, if on one. + * + * Lock the mountpoint now in order to avoid a LOR with the bufobj lock + * as we may be needing to insert a dirty buffer within the appropriate + * syncer pending worklist. */ + MNT_ILOCK(mp); BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); - else + else { + BO_UNLOCK(bo); + MNT_IUNLOCK(mp); panic("reassignbuf: Buffer %p not on queue.", bp); + } + /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. @@ -1953,19 +2098,12 @@ default: delay = filedelay; } - vn_syncer_add_to_worklist(bo, delay); + vfs_stdsyncer_addentry(mp, bo, delay); } buf_vlist_add(bp, bo, BX_VNDIRTY); } else { buf_vlist_add(bp, bo, BX_VNCLEAN); - - if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { - mtx_lock(&sync_mtx); - LIST_REMOVE(bo, bo_synclist); - syncer_worklist_len--; - mtx_unlock(&sync_mtx); - bo->bo_flag &= ~BO_ONWORKLST; - } + vfs_stdsyncer_rementry(mp, bo); } #ifdef INVARIANTS bv = &bo->bo_clean; @@ -1984,6 +2122,7 @@ ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif BO_UNLOCK(bo); + MNT_IUNLOCK(mp); } /* @@ -3323,169 +3462,6 @@ } /* - * Routine to create and manage a filesystem syncer vnode. - */ -#define sync_close ((int (*)(struct vop_close_args *))nullop) -static int sync_fsync(struct vop_fsync_args *); -static int sync_inactive(struct vop_inactive_args *); -static int sync_reclaim(struct vop_reclaim_args *); - -static struct vop_vector sync_vnodeops = { - .vop_bypass = VOP_EOPNOTSUPP, - .vop_close = sync_close, /* close */ - .vop_fsync = sync_fsync, /* fsync */ - .vop_inactive = sync_inactive, /* inactive */ - .vop_reclaim = sync_reclaim, /* reclaim */ - .vop_lock1 = vop_stdlock, /* lock */ - .vop_unlock = vop_stdunlock, /* unlock */ - .vop_islocked = vop_stdislocked, /* islocked */ -}; - -/* - * Create a new filesystem syncer vnode for the specified mount point. - */ -int -vfs_allocate_syncvnode(struct mount *mp) -{ - struct vnode *vp; - struct bufobj *bo; - static long start, incr, next; - int error; - - /* Allocate a new vnode */ - if ((error = getnewvnode("syncer", mp, &sync_vnodeops, &vp)) != 0) { - mp->mnt_syncer = NULL; - return (error); - } - vp->v_type = VNON; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - vp->v_vflag |= VV_FORCEINSMQ; - error = insmntque(vp, mp); - if (error != 0) - panic("vfs_allocate_syncvnode: insmntque failed"); - vp->v_vflag &= ~VV_FORCEINSMQ; - VOP_UNLOCK(vp, 0); - /* - * Place the vnode onto the syncer worklist. We attempt to - * scatter them about on the list so that they will go off - * at evenly distributed times even if all the filesystems - * are mounted at once. - */ - next += incr; - if (next == 0 || next > syncer_maxdelay) { - start /= 2; - incr /= 2; - if (start == 0) { - start = syncer_maxdelay / 2; - incr = syncer_maxdelay; - } - next = start; - } - bo = &vp->v_bufobj; - BO_LOCK(bo); - vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0); - /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ - mtx_lock(&sync_mtx); - sync_vnode_count++; - mtx_unlock(&sync_mtx); - BO_UNLOCK(bo); - mp->mnt_syncer = vp; - return (0); -} - -/* - * Do a lazy sync of the filesystem. - */ -static int -sync_fsync(struct vop_fsync_args *ap) -{ - struct vnode *syncvp = ap->a_vp; - struct mount *mp = syncvp->v_mount; - int error; - struct bufobj *bo; - - /* - * We only need to do something if this is a lazy evaluation. - */ - if (ap->a_waitfor != MNT_LAZY) - return (0); - - /* - * Move ourselves to the back of the sync list. - */ - bo = &syncvp->v_bufobj; - BO_LOCK(bo); - vn_syncer_add_to_worklist(bo, syncdelay); - BO_UNLOCK(bo); - - /* - * Walk the list of vnodes pushing all that are dirty and - * not already on the sync list. - */ - mtx_lock(&mountlist_mtx); - if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { - mtx_unlock(&mountlist_mtx); - return (0); - } - if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { - vfs_unbusy(mp); - return (0); - } - MNT_ILOCK(mp); - mp->mnt_noasync++; - mp->mnt_kern_flag &= ~MNTK_ASYNC; - MNT_IUNLOCK(mp); - vfs_msync(mp, MNT_NOWAIT); - error = VFS_SYNC(mp, MNT_LAZY); - MNT_ILOCK(mp); - mp->mnt_noasync--; - if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) - mp->mnt_kern_flag |= MNTK_ASYNC; - MNT_IUNLOCK(mp); - vn_finished_write(mp); - vfs_unbusy(mp); - return (error); -} - -/* - * The syncer vnode is no referenced. - */ -static int -sync_inactive(struct vop_inactive_args *ap) -{ - - vgone(ap->a_vp); - return (0); -} - -/* - * The syncer vnode is no longer needed and is being decommissioned. - * - * Modifications to the worklist must be protected by sync_mtx. - */ -static int -sync_reclaim(struct vop_reclaim_args *ap) -{ - struct vnode *vp = ap->a_vp; - struct bufobj *bo; - - bo = &vp->v_bufobj; - BO_LOCK(bo); - vp->v_mount->mnt_syncer = NULL; - if (bo->bo_flag & BO_ONWORKLST) { - mtx_lock(&sync_mtx); - LIST_REMOVE(bo, bo_synclist); - syncer_worklist_len--; - sync_vnode_count--; - mtx_unlock(&sync_mtx); - bo->bo_flag &= ~BO_ONWORKLST; - } - BO_UNLOCK(bo); - - return (0); -} - -/* * Check if vnode represents a disk device */ int Index: sys/fs/unionfs/union_vfsops.c =================================================================== --- sys/fs/unionfs/union_vfsops.c (revision 206210) +++ sys/fs/unionfs/union_vfsops.c (working copy) @@ -519,6 +519,11 @@ .vfs_uninit = unionfs_uninit, .vfs_unmount = unionfs_unmount, .vfs_vget = unionfs_vget, + .vfs_syncer_init = VFS_EOPNOTSUPP, + .vfs_syncer_destroy = VFS_EOPNOTSUPP, + .vfs_syncer_attach = VFS_EOPNOTSUPP, + .vfs_syncer_detach = VFS_EOPNOTSUPP, + .vfs_syncer_speedup = VFS_EOPNOTSUPP }; VFS_SET(unionfs_vfsops, unionfs, VFCF_LOOPBACK); Index: sys/fs/pseudofs/pseudofs.h =================================================================== --- sys/fs/pseudofs/pseudofs.h (revision 206210) +++ sys/fs/pseudofs/pseudofs.h (working copy) @@ -302,6 +302,11 @@ .vfs_statfs = pfs_statfs, \ .vfs_uninit = _##name##_uninit, \ .vfs_unmount = pfs_unmount, \ + .vfs_syncer_init = VFS_EOPNOTSUPP, \ + .vfs_syncer_destroy = VFS_EOPNOTSUPP, \ + .vfs_syncer_attach = VFS_EOPNOTSUPP, \ + .vfs_syncer_detach = VFS_EOPNOTSUPP, \ + .vfs_syncer_speedup = VFS_EOPNOTSUPP \ }; \ VFS_SET(name##_vfsops, name, VFCF_SYNTHETIC); \ MODULE_VERSION(name, version); \ Index: sys/fs/tmpfs/tmpfs_vfsops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vfsops.c (revision 206210) +++ sys/fs/tmpfs/tmpfs_vfsops.c (working copy) @@ -414,5 +414,10 @@ .vfs_root = tmpfs_root, .vfs_statfs = tmpfs_statfs, .vfs_fhtovp = tmpfs_fhtovp, + .vfs_syncer_init = VFS_EOPNOTSUPP, + .vfs_syncer_destroy = VFS_EOPNOTSUPP, + .vfs_syncer_attach = VFS_EOPNOTSUPP, + .vfs_syncer_detach = VFS_EOPNOTSUPP, + .vfs_syncer_speedup = VFS_EOPNOTSUPP }; VFS_SET(tmpfs_vfsops, tmpfs, 0); Index: sys/fs/portalfs/portal_vfsops.c =================================================================== --- sys/fs/portalfs/portal_vfsops.c (revision 206210) +++ sys/fs/portalfs/portal_vfsops.c (working copy) @@ -252,6 +252,11 @@ .vfs_root = portal_root, .vfs_statfs = portal_statfs, .vfs_unmount = portal_unmount, + .vfs_syncer_init = VFS_EOPNOTSUPP, + .vfs_syncer_destroy = VFS_EOPNOTSUPP, + .vfs_syncer_attach = VFS_EOPNOTSUPP, + .vfs_syncer_detach = VFS_EOPNOTSUPP, + .vfs_syncer_speedup = VFS_EOPNOTSUPP }; VFS_SET(portal_vfsops, portalfs, VFCF_SYNTHETIC); Index: sys/fs/nullfs/null_vfsops.c =================================================================== --- sys/fs/nullfs/null_vfsops.c (revision 206210) +++ sys/fs/nullfs/null_vfsops.c (working copy) @@ -359,6 +359,11 @@ .vfs_uninit = nullfs_uninit, .vfs_unmount = nullfs_unmount, .vfs_vget = nullfs_vget, + .vfs_syncer_init = VFS_EOPNOTSUPP, + .vfs_syncer_destroy = VFS_EOPNOTSUPP, + .vfs_syncer_attach = VFS_EOPNOTSUPP, + .vfs_syncer_detach = VFS_EOPNOTSUPP, + .vfs_syncer_speedup = VFS_EOPNOTSUPP }; VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK); Index: sys/fs/coda/coda_vfsops.c =================================================================== --- sys/fs/coda/coda_vfsops.c (revision 206210) +++ sys/fs/coda/coda_vfsops.c (working copy) @@ -442,5 +442,10 @@ .vfs_statfs = coda_statfs, .vfs_sync = coda_sync, .vfs_unmount = coda_unmount, + .vfs_syncer_init = VFS_EOPNOTSUPP, + .vfs_syncer_destroy = VFS_EOPNOTSUPP, + .vfs_syncer_attach = VFS_EOPNOTSUPP, + .vfs_syncer_detach = VFS_EOPNOTSUPP, + .vfs_syncer_speedup = VFS_EOPNOTSUPP }; VFS_SET(coda_vfsops, coda, VFCF_NETWORK); Index: sys/fs/fdescfs/fdesc_vfsops.c =================================================================== --- sys/fs/fdescfs/fdesc_vfsops.c (revision 206210) +++ sys/fs/fdescfs/fdesc_vfsops.c (working copy) @@ -233,6 +233,11 @@ .vfs_statfs = fdesc_statfs, .vfs_uninit = fdesc_uninit, .vfs_unmount = fdesc_unmount, + .vfs_syncer_init = VFS_EOPNOTSUPP, + .vfs_syncer_destroy = VFS_EOPNOTSUPP, + .vfs_syncer_attach = VFS_EOPNOTSUPP, + .vfs_syncer_detach = VFS_EOPNOTSUPP, + .vfs_syncer_speedup = VFS_EOPNOTSUPP }; VFS_SET(fdesc_vfsops, fdescfs, VFCF_SYNTHETIC); Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c (revision 206210) +++ sys/vm/vm_pageout.c (working copy) @@ -681,6 +681,7 @@ int page_shortage, maxscan, pcount; int addl_page_shortage, addl_page_shortage_init; vm_object_t object; + struct mount *mp; int actcount; int vnodes_skipped = 0; int maxlaunder; @@ -1159,8 +1160,12 @@ * if we did not get enough free pages. */ if (vm_paging_target() > 0) { - if (vnodes_skipped && vm_page_count_min()) - (void) speedup_syncer(); + if (vnodes_skipped && vm_page_count_min()) { + mtx_lock(&mountlist_mtx); + TAILQ_FOREACH(mp, &mountlist, mnt_list) + VFS_SYNCER_SPEEDUP(mp); + mtx_unlock(&mountlist_mtx); + } #if !defined(NO_SWAPPING) if (vm_swap_enabled && vm_page_count_target()) vm_req_vmdaemon(VM_SWAP_NORMAL); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c (revision 206210) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c (working copy) @@ -112,6 +112,11 @@ .vfs_sync = zfs_sync, .vfs_checkexp = zfs_checkexp, .vfs_fhtovp = zfs_fhtovp, + .vfs_syncer_init = VFS_EOPNOTSUPP, + .vfs_syncer_destroy = VFS_EOPNOTSUPP, + .vfs_syncer_attach = VFS_EOPNOTSUPP, + .vfs_syncer_detach = VFS_EOPNOTSUPP, + .vfs_syncer_speedup = VFS_EOPNOTSUPP }; VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); Index: sys/sys/mount.h =================================================================== --- sys/sys/mount.h (revision 206210) +++ sys/sys/mount.h (working copy) @@ -127,6 +127,7 @@ long f_spare[2]; /* unused spare */ }; +struct mntpriv; TAILQ_HEAD(vnodelst, vnode); /* Mount options list */ @@ -160,7 +161,7 @@ struct vfsops *mnt_op; /* operations on fs */ struct vfsconf *mnt_vfc; /* configuration info */ struct vnode *mnt_vnodecovered; /* vnode we mounted on */ - struct vnode *mnt_syncer; /* syncer vnode */ + struct mntpriv *mnt_syncpriv; /* syncer private datas */ int mnt_ref; /* (i) Reference count */ struct vnodelst mnt_nvnodelist; /* (i) list of vnodes */ int mnt_nvnodelistsize; /* (i) # of vnodes */ @@ -207,6 +208,7 @@ MNT_IUNLOCK(mp); \ } while (0) +#define ASSERT_MP_ILOCKED(mp) mtx_assert(&(mp)->mnt_mtx, MA_OWNED) #define MNT_ILOCK(mp) mtx_lock(&(mp)->mnt_mtx) #define MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx) #define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx) @@ -577,6 +579,11 @@ typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, struct sysctl_req *req); typedef void vfs_susp_clean_t(struct mount *mp); +typedef void vfs_syncer_init_t(struct mount *mp); +typedef void vfs_syncer_destroy_t(struct mount *mp); +typedef void vfs_syncer_attach_t(struct mount *mp, struct vfsconf *vfsp); +typedef void vfs_syncer_detach_t(struct mount *mp); +typedef int vfs_syncer_speedup_t(struct mount *mp); struct vfsops { vfs_mount_t *vfs_mount; @@ -594,10 +601,16 @@ vfs_extattrctl_t *vfs_extattrctl; vfs_sysctl_t *vfs_sysctl; vfs_susp_clean_t *vfs_susp_clean; + vfs_syncer_init_t *vfs_syncer_init; + vfs_syncer_destroy_t *vfs_syncer_destroy; + vfs_syncer_attach_t *vfs_syncer_attach; + vfs_syncer_detach_t *vfs_syncer_detach; + vfs_syncer_speedup_t *vfs_syncer_speedup; }; vfs_statfs_t __vfs_statfs; +#define VFS_EOPNOTSUPP ((void *)(uintptr_t)vfs_eopnotsupp) #define VFS_MOUNT(MP) (*(MP)->mnt_op->vfs_mount)(MP) #define VFS_UNMOUNT(MP, FORCE) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE) #define VFS_ROOT(MP, FLAGS, VPP) \ @@ -619,6 +632,12 @@ #define VFS_SUSP_CLEAN(MP) \ ({if (*(MP)->mnt_op->vfs_susp_clean != NULL) \ (*(MP)->mnt_op->vfs_susp_clean)(MP); }) +#define VFS_SYNCER_INIT(MP) (*(MP)->mnt_op->vfs_syncer_init)(MP) +#define VFS_SYNCER_DESTROY(MP) (*(MP)->mnt_op->vfs_syncer_destroy)(MP) +#define VFS_SYNCER_ATTACH(MP, VFSP) \ + (*(MP)->mnt_op->vfs_syncer_attach)(MP, VFSP) +#define VFS_SYNCER_DETACH(MP) (*(MP)->mnt_op->vfs_syncer_detach)(MP) +#define VFS_SYNCER_SPEEDUP(MP) (*(MP)->mnt_op->vfs_syncer_speedup)(MP) #define VFS_NEEDSGIANT_(MP) \ ((MP) != NULL && ((MP)->mnt_kern_flag & MNTK_MPSAFE) == 0) @@ -728,7 +747,6 @@ int vfs_busy(struct mount *, int); int vfs_export /* process mount export info */ (struct mount *, struct export_args *); -int vfs_allocate_syncvnode(struct mount *); int vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions); void vfs_getnewfsid(struct mount *); struct cdev *vfs_getrootfsid(struct mount *); @@ -745,6 +763,7 @@ int vfs_suser(struct mount *, struct thread *); void vfs_unbusy(struct mount *); void vfs_unmountall(void); +int vfs_eopnotsupp(void); extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct mtx mountlist_mtx; extern struct nfs_public nfs_pub; @@ -766,6 +785,11 @@ vfs_uninit_t vfs_stduninit; vfs_extattrctl_t vfs_stdextattrctl; vfs_sysctl_t vfs_stdsysctl; +vfs_syncer_init_t vfs_stdsyncer_init; +vfs_syncer_destroy_t vfs_stdsyncer_destroy; +vfs_syncer_attach_t vfs_stdsyncer_attach; +vfs_syncer_detach_t vfs_stdsyncer_detach; +vfs_syncer_speedup_t vfs_stdsyncer_speedup; #else /* !_KERNEL */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h (revision 206210) +++ sys/sys/vnode.h (working copy) @@ -600,7 +600,6 @@ void (*dtr)(struct vnode *, void *), void *dtr_arg); int insmntque(struct vnode *vp, struct mount *mp); u_quad_t init_va_filerev(void); -int speedup_syncer(void); int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen); #define textvp_fullpath(p, rb, rfb) \