Index: sbin/mount/mount.c =================================================================== --- sbin/mount/mount.c (revision 190561) +++ sbin/mount/mount.c (working copy) @@ -112,6 +112,7 @@ static struct opt { { MNT_MULTILABEL, "multilabel" }, { MNT_ACLS, "acls" }, { MNT_GJOURNAL, "gjournal" }, + { MNT_ORPHANED, "orphaned" }, { 0, NULL } }; Index: sys/ufs/ffs/ffs_vfsops.c =================================================================== --- sys/ufs/ffs/ffs_vfsops.c (revision 190561) +++ sys/ufs/ffs/ffs_vfsops.c (working copy) @@ -601,6 +601,15 @@ loop: */ static int sblock_try[] = SBLOCKSEARCH; +static void +ffs_orphan_callback(struct g_consumer *cp, void *user) +{ + struct mount *mp; + + mp = (struct mount *)user; + vfs_orphan(mp); +} + /* * Common code for mount and mountroot */ @@ -629,9 +638,13 @@ ffs_mountfs(devvp, mp, td) dev = devvp->v_rdev; dev_ref(dev); + vfs_ref(mp); DROP_GIANT(); g_topology_lock(); error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1); + if (error == 0) + g_vfs_register_callback(cp, ffs_orphan_callback, + mp, G_CB_ORPHAN); /* * If we are a root mount, drop the E flag so fsck can do its magic. @@ -923,6 +936,7 @@ out: free(ump, M_UFSMNT); mp->mnt_data = NULL; } + vfs_rel(mp); dev_rel(dev); return (error); } @@ -1110,6 +1124,7 @@ ffs_unmount(mp, mntflags, td) g_topology_unlock(); PICKUP_GIANT(); vrele(ump->um_devvp); + vfs_rel(mp); dev_rel(ump->um_dev); mtx_destroy(UFS_MTX(ump)); if (mp->mnt_gjprovider != NULL) { Index: sys/kern/vfs_syscalls.c =================================================================== --- sys/kern/vfs_syscalls.c (revision 190561) +++ sys/kern/vfs_syscalls.c (working copy) @@ -326,6 +326,8 @@ kern_statfs(struct thread *td, char *path, enum ui sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (mp->mnt_kern_flag & MNTK_ORPHANED) + sp->f_flags |= MNT_ORPHANED; error = VFS_STATFS(mp, sp, td); if (error) goto out; @@ -415,6 +417,8 @@ kern_fstatfs(struct thread *td, int fd, struct sta sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (mp->mnt_kern_flag & MNTK_ORPHANED) + sp->f_flags |= MNT_ORPHANED; error = VFS_STATFS(mp, sp, td); if (error) goto out; @@ -515,6 +519,8 @@ kern_getfsstat(struct thread *td, struct statfs ** sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (mp->mnt_kern_flag & MNTK_ORPHANED) + sp->f_flags |= MNT_ORPHANED; /* * If MNT_NOWAIT or MNT_LAZY is specified, do not * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY @@ -4662,6 +4668,8 @@ kern_fhstatfs(struct thread *td, fhandle_t fh, str sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (mp->mnt_kern_flag & MNTK_ORPHANED) + sp->f_flags |= MNT_ORPHANED; error = VFS_STATFS(mp, sp, td); if (error == 0) *buf = *sp; Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c (revision 190561) +++ sys/kern/vfs_subr.c (working copy) @@ -1084,7 +1084,7 @@ insmntque1(struct vnode *vp, struct mount *mp, #endif MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 && - ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 || + ((mp->mnt_kern_flag & (MNTK_UNMOUNTF | MNTK_ORPHANED)) != 0 || mp->mnt_nvnodelistsize == 0)) { locked = VOP_ISLOCKED(vp); if (!locked || (locked == LK_EXCLUSIVE && @@ -1092,6 +1092,8 @@ insmntque1(struct vnode *vp, struct mount *mp, MNT_IUNLOCK(mp); if (dtr != NULL) dtr(vp, dtr_arg); + if ((mp->mnt_kern_flag & MNTK_ORPHANED) != 0) + return (ENXIO); return (EBUSY); } } @@ -2875,6 +2877,7 @@ DB_SHOW_COMMAND(mount, db_show_mount) MNT_KERN_FLAG(MNTK_MPSAFE); MNT_KERN_FLAG(MNTK_NOKNOTE); MNT_KERN_FLAG(MNTK_LOOKUP_SHARED); + MNT_KERN_FLAG(MNTK_ORPHANED); #undef MNT_KERN_FLAG if (flags != 0) { if (buf[0] != '\0') @@ -4249,6 +4252,45 @@ vfs_read_dirent(struct vop_readdir_args *ap, struc } /* + * Mark the filesystem as orphaned. Usually called when the device + * that contained the filesystem goes away. + */ +void +vfs_orphan(struct mount *mp) +{ + int error; + struct mount *tmp; + + error = vfs_busy(mp, MBF_NOWAIT); + /* If the filesystem is being unmounted, do nothing. */ + if (error) + return; + + /* Prevent all future vnode operations from succeeding. */ + MNT_ILOCK(mp); + mp->mnt_kern_flag |= (MNTK_ORPHANED | MNTK_NOINSMNTQ); + MNT_IUNLOCK(mp); + + /* + * Don't try to call vflush on a mount structure that is not + * fully initialized yet. Assume that the mount is initialized + * if it can be found on the mountlist. + */ + mtx_lock(&mountlist_mtx); + TAILQ_FOREACH(tmp, &mountlist, mnt_list) { + if (tmp == mp) + break; + } + mtx_unlock(&mountlist_mtx); + if (tmp == NULL) { + vfs_unbusy(mp); + return; + } + vflush(mp, 0, FORCECLOSE, curthread); + vfs_unbusy(mp); +} + +/* * Mark for update the access time of the file if the filesystem * supports VOP_MARKATIME. This functionality is used by execve and * mmap, so we want to avoid the I/O implied by directly setting Index: sys/fs/msdosfs/msdosfs_vfsops.c =================================================================== --- sys/fs/msdosfs/msdosfs_vfsops.c (revision 190561) +++ sys/fs/msdosfs/msdosfs_vfsops.c (working copy) @@ -403,6 +403,15 @@ msdosfs_mount(struct mount *mp, struct thread *td) return (0); } +static void +msdosfs_orphan_callback(struct g_consumer *cp, void *user) +{ + struct mount *mp; + + mp = (struct mount *)user; + vfs_orphan(mp); +} + static int mountmsdosfs(struct vnode *devvp, struct mount *mp) { @@ -425,9 +434,13 @@ mountmsdosfs(struct vnode *devvp, struct mount *mp dev = devvp->v_rdev; dev_ref(dev); + vfs_ref(mp); DROP_GIANT(); g_topology_lock(); error = g_vfs_open(devvp, &cp, "msdosfs", ronly ? 0 : 1); + if (error == 0) + g_vfs_register_callback(cp, msdosfs_orphan_callback, + mp, G_CB_ORPHAN); g_topology_unlock(); PICKUP_GIANT(); VOP_UNLOCK(devvp, 0); @@ -766,6 +779,7 @@ error_exit: free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; } + vfs_rel(mp); dev_rel(dev); return (error); } @@ -831,6 +845,7 @@ msdosfs_unmount(struct mount *mp, int mntflags, st g_topology_unlock(); PICKUP_GIANT(); vrele(pmp->pm_devvp); + vfs_rel(mp); dev_rel(pmp->pm_dev); free(pmp->pm_inusemap, M_MSDOSFSFAT); if (pmp->pm_flags & MSDOSFS_LARGEFS) Index: sys/geom/geom_vfs.c =================================================================== --- sys/geom/geom_vfs.c (revision 190561) +++ sys/geom/geom_vfs.c (working copy) @@ -34,6 +34,8 @@ __FBSDID("$FreeBSD$"); #include #include #include /* XXX Temporary for VFS_LOCK_GIANT */ +#include +#include #include #include @@ -130,17 +132,78 @@ g_vfs_strategy(struct bufobj *bo, struct buf *bp) g_io_request(bip, cp); } +struct g_vfs_cb { + struct g_consumer *cb_consumer; + int cb_event; + void (*cb_callback)(struct g_consumer *, void *); + void *cb_userptr; + struct task cb_task; +}; + +/* + * When registering the callback from the mount routine, the topology lock + * is being taken while holding devvp vnode lock. The callback routine + * would probably try to grab devvp vnode lock, and executing it from + * g_event context, while holding topology lock, would cause LOR. To make + * sure this doesn't happen, we call the callback from taskqueue. + */ static void +g_vfs_cb_func(void *context, int pending) +{ + struct g_vfs_cb *cb; + + cb = context; + + KASSERT(cb->cb_event == G_CB_ORPHAN, + ("found callback for unknown event")); + + (cb->cb_callback)(cb->cb_consumer, cb->cb_userptr); +} + +void +g_vfs_register_callback(struct g_consumer *cp, + void (callback)(struct g_consumer *, void *), void *userptr, int event) +{ + struct g_vfs_cb *cb; + + g_topology_assert(); + + KASSERT(event >= 0 && event <= G_CB_LAST, + ("invalid callback event flag")); + cb = cp->private; + KASSERT(cb[event].cb_callback == NULL, + ("callback already registered")); + + cb[event].cb_callback = callback; + cb[event].cb_userptr = userptr; + cb[event].cb_consumer = cp; + cb[event].cb_event = event; + TASK_INIT(&(cb[event].cb_task), 0, g_vfs_cb_func, &(cb[event])); +} + +static void g_vfs_orphan(struct g_consumer *cp) { struct g_geom *gp; struct bufobj *bo; + struct g_vfs_cb *cb; + int error; g_topology_assert(); gp = cp->geom; bo = gp->softc; + cb = cp->private; + g_trace(G_T_TOPOLOGY, "g_vfs_orphan(%p(%s))", cp, gp->name); + + if (cb != NULL && cb[G_CB_ORPHAN].cb_callback != NULL) { + error = taskqueue_enqueue(taskqueue_thread, + &(cb[G_CB_ORPHAN].cb_task)); + KASSERT(error == 0, ("taskqueue_enqueue(9) failed.")); + taskqueue_drain(taskqueue_thread, &(cb[G_CB_ORPHAN].cb_task)); + } + if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_detach(cp); @@ -169,6 +232,8 @@ g_vfs_open(struct vnode *vp, struct g_consumer **c gp = g_new_geomf(&g_vfs_class, "%s.%s", fsname, pp->name); cp = g_new_consumer(gp); g_attach(cp, pp); + cp->private = g_malloc(sizeof(struct g_vfs_cb[G_CB_LAST + 1]), + M_WAITOK | M_ZERO); error = g_access(cp, 1, wr, 1); if (error) { g_wither_geom(gp, ENXIO); @@ -195,6 +260,8 @@ g_vfs_close(struct g_consumer *cp) g_topology_assert(); + g_free(cp->private); + cp->private = NULL; gp = cp->geom; bo = gp->softc; bufobj_invalbuf(bo, V_SAVE, 0, 0); Index: sys/geom/geom_vfs.h =================================================================== --- sys/geom/geom_vfs.h (revision 190561) +++ sys/geom/geom_vfs.h (working copy) @@ -35,8 +35,13 @@ struct buf; extern struct buf_ops *g_vfs_bufops; +#define G_CB_ORPHAN 1 +#define G_CB_LAST G_CB_ORPHAN + void g_vfs_strategy(struct bufobj *bo, struct buf *bp); int g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr); void g_vfs_close(struct g_consumer *cp); +void g_vfs_register_callback(struct g_consumer *cp, + void (callback)(struct g_consumer *, void *), void *user, int event); #endif /* _GEOM_GEOM_VFS_H_ */ Index: sys/sys/mount.h =================================================================== --- sys/sys/mount.h (revision 190561) +++ sys/sys/mount.h (working copy) @@ -250,14 +250,17 @@ void __mnt_vnode_markerfree(struct vnode #define MNT_EXPUBLIC 0x20000000 /* public export (WebNFS) */ /* - * Flags set by internal operations, - * but visible to the user. - * XXX some of these are not quite right.. (I've never seen the root flag set) + * Flags set by internal operations, but visible to the user. + * Note that MNT_ORPHANED flag is never actually set on mnt_flag field + * in struct mount; it's only set on f_flags in struct statfs when + * MNTK_ORPHANED is set. We cannot use MNT_ORPHANED instead of MNTK_ORPHANED + * due to missing locking of mnt_flag. */ #define MNT_LOCAL 0x00001000 /* filesystem is stored locally */ #define MNT_QUOTA 0x00002000 /* quotas are enabled on filesystem */ #define MNT_ROOTFS 0x00004000 /* identifies the root filesystem */ #define MNT_USER 0x00008000 /* mounted by a user */ +#define MNT_ORPHANED 0x00020000 /* MNTK_ORPHANED is set */ #define MNT_IGNORE 0x00800000 /* do not show entry in df */ /* @@ -273,7 +276,8 @@ void __mnt_vnode_markerfree(struct vnode MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \ MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \ - MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS) + MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \ + MNT_ORPHANED) /* Mask of flags that can be updated. */ #define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \ @@ -289,6 +293,8 @@ void __mnt_vnode_markerfree(struct vnode * XXX: These are not STATES and really should be somewhere else. * XXX: MNT_BYFSID collides with MNT_ACLS, but because MNT_ACLS is only used for * mount(2) and MNT_BYFSID is only used for unmount(2) it's harmless. + * XXX: MNT_DELEXPORT collides with MNT_ORPHANED, but MNT_DELEXPORT is never + * used in mnt_flag, only for ex_flags. */ #define MNT_UPDATE 0x00010000 /* not a real mount, just an update */ #define MNT_DELEXPORT 0x00020000 /* delete export host lists */ @@ -325,6 +331,7 @@ void __mnt_vnode_markerfree(struct vnode #define MNTK_DRAINING 0x00000010 /* lock draining is happening */ #define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */ #define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */ +#define MNTK_ORPHANED 0x00000080 /* device is gone */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ #define MNTK_SUSPEND 0x08000000 /* request write suspension */ @@ -747,6 +754,7 @@ struct mount *vfs_mount_alloc(struct vnode *, stru int vfs_suser(struct mount *, struct thread *); void vfs_unbusy(struct mount *); void vfs_unmountall(void); +void vfs_orphan(struct mount *); extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct mtx mountlist_mtx; extern struct nfs_public nfs_pub;