commit 1df35d7e8e56f23e445ff6ed8effa47d04678cd1 Author: Mateusz Guzik Date: Sun Mar 6 15:26:15 2022 +0000 vfs: work around stalls in vnode reclaim Reported by: pho diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index afafd02d92b9..079aae54a3a0 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1309,8 +1309,26 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp) TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist); mtx_unlock(&vnode_list_mtx); - if (vtryrecycle(vp) == 0) - count--; + /* + * FIXME ignores the return value meaning it may be nothing got + * recycled. + * + * Originally the value started being ignored in 2005 with + * 114a1006a8204aa156e1f9ad6476cdff89cada7f . + * + * Respecting the value can run into significant stalls if most + * vnodes belong to one file system and it has writes suspended. + * In presence of many threads and millions of vnodes they keep + * contending on the vnode_list_mtx lock only to find a vnode + * they can't recycle. + * + * Fixing this is not easy as finding the mount point requires + * a call to VOP_GETWRITEMOUNT, which in case of nullfs takes + * the vnode interlock, which means vnode_list_mtx cannot be held + * at the time to avoid a LOR. + */ + vtryrecycle(vp); + count--; mtx_lock(&vnode_list_mtx); vp = mvp; }