Index: kern/vfs_syscalls.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/kern/vfs_syscalls.c,v retrieving revision 1.392.2.19 diff -u -r1.392.2.19 vfs_syscalls.c --- kern/vfs_syscalls.c 29 May 2007 10:09:43 -0000 1.392.2.19 +++ kern/vfs_syscalls.c 25 Jun 2007 14:52:45 -0000 @@ -189,7 +189,8 @@ caddr_t arg; } */ *uap; { - struct mount *mp, *vmp; + struct mount *mp; + int vfslocked; int error; struct nameidata nd; @@ -197,23 +198,22 @@ AUDIT_ARG(uid, uap->uid); if (jailed(td->td_ucred) && !prison_quotas) return (EPERM); - mtx_lock(&Giant); - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->path, td); - if ((error = namei(&nd)) != 0) { - mtx_unlock(&Giant); + NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, + UIO_USERSPACE, uap->path, td); + if ((error = namei(&nd)) != 0) return (error); - } + vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); - error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH); mp = nd.ni_vp->v_mount; - vrele(nd.ni_vp); - if (error) { - mtx_unlock(&Giant); + if ((error = vfs_busy(mp, 0, NULL, td))) { + vrele(nd.ni_vp); + VFS_UNLOCK_GIANT(vfslocked); return (error); } + vrele(nd.ni_vp); error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td); - vn_finished_write(vmp); - mtx_unlock(&Giant); + vfs_unbusy(mp, td); + VFS_UNLOCK_GIANT(vfslocked); return (error); } Index: ufs/ffs/ffs_softdep.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ffs/ffs_softdep.c,v retrieving revision 1.181.2.17 diff -u -r1.181.2.17 ffs_softdep.c --- ufs/ffs/ffs_softdep.c 24 Apr 2007 11:06:12 -0000 1.181.2.17 +++ ufs/ffs/ffs_softdep.c 25 Jun 2007 14:52:48 -0000 @@ -729,9 +729,7 @@ for (;;) { kthread_suspend_check(softdepproc); -#ifdef QUOTA - mtx_lock(&Giant); -#endif + vfslocked = VFS_LOCK_GIANT((struct mount *)NULL); ACQUIRE_LOCK(&lk); /* * If requested, try removing inode or removal dependencies. @@ -747,9 +745,7 @@ wakeup_one(&proc_waiting); } FREE_LOCK(&lk); -#ifdef QUOTA - mtx_unlock(&Giant); -#endif + VFS_UNLOCK_GIANT(vfslocked); remaining = 0; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { @@ -2602,6 +2598,7 @@ } WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list); FREE_LOCK(&lk); + ip->i_flag |= IN_MODIFIED; } /* Index: ufs/ffs/ffs_vfsops.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ffs/ffs_vfsops.c,v retrieving revision 1.290.2.17 diff -u -r1.290.2.17 ffs_vfsops.c --- ufs/ffs/ffs_vfsops.c 11 Jun 2007 10:53:48 -0000 1.290.2.17 +++ ufs/ffs/ffs_vfsops.c 25 Jun 2007 14:52:48 -0000 @@ -815,11 +815,9 @@ (void) ufs_extattr_autostart(mp, td); #endif /* !UFS_EXTATTR_AUTOSTART */ #endif /* !UFS_EXTATTR */ -#ifndef QUOTA MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_MPSAFE; MNT_IUNLOCK(mp); -#endif return (0); out: if (bp) @@ -1022,8 +1020,6 @@ if (error) return (error); for (i = 0; i < MAXQUOTAS; i++) { - if (ump->um_quotas[i] == NULLVP) - continue; quotaoff(td, mp, i); } /* Index: ufs/ufs/quota.h =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ufs/quota.h,v retrieving revision 1.27.2.1 diff -u -r1.27.2.1 quota.h --- ufs/ufs/quota.h 1 Feb 2007 04:45:43 -0000 1.27.2.1 +++ ufs/ufs/quota.h 25 Jun 2007 14:52:48 -0000 @@ -113,15 +113,18 @@ * filesystem. There is one allocated for each quota that exists on any * filesystem for the current user or group. A cache is kept of recently * used entries. + * (h) protected by dqhlock */ struct dquot { - LIST_ENTRY(dquot) dq_hash; /* hash list */ - TAILQ_ENTRY(dquot) dq_freelist; /* free list */ + LIST_ENTRY(dquot) dq_hash; /* (h) hash list */ + TAILQ_ENTRY(dquot) dq_freelist; /* (h) free list */ + struct mtx dq_lock; /* lock for concurrency */ u_int16_t dq_flags; /* flags, see below */ u_int16_t dq_type; /* quota type of this dquot */ - u_int32_t dq_cnt; /* count of active references */ + u_int32_t dq_cnt; /* (h) count of active references */ u_int32_t dq_id; /* identifier this applies to */ - struct ufsmount *dq_ump; /* filesystem that this is taken from */ + struct ufsmount *dq_ump; /* (h) filesystem that this is + taken from */ struct dqblk dq_dqb; /* actual usage & quotas */ }; /* @@ -167,6 +170,23 @@ #define DQREF(dq) (dq)->dq_cnt++ #endif +#define DQI_LOCK(dq) mtx_lock(&(dq)->dq_lock) +#define DQI_UNLOCK(dq) mtx_unlock(&(dq)->dq_lock) + +#define DQI_WAIT(dq, prio, msg) do { \ + while ((dq)->dq_flags & DQ_LOCK) { \ + (dq)->dq_flags |= DQ_WANT; \ + (void) msleep((dq), \ + &(dq)->dq_lock, (prio), (msg), 0); \ + } \ +} while (0) + +#define DQI_WAKEUP(dq) do { \ + if ((dq)->dq_flags & DQ_WANT) \ + wakeup((dq)); \ + (dq)->dq_flags &= ~(DQ_WANT|DQ_LOCK); \ +} while (0) + struct inode; struct mount; struct thread; Index: ufs/ufs/ufs_inode.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ufs/ufs_inode.c,v retrieving revision 1.63.2.3 diff -u -r1.63.2.3 ufs_inode.c --- ufs/ufs/ufs_inode.c 14 May 2006 01:12:56 -0000 1.63.2.3 +++ ufs/ufs/ufs_inode.c 25 Jun 2007 14:52:48 -0000 @@ -188,10 +188,9 @@ * Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); - if (ip->i_flag & IN_LAZYMOD) { + if (ip->i_flag & IN_LAZYMOD) ip->i_flag |= IN_MODIFIED; - UFS_UPDATE(vp, 0); - } + UFS_UPDATE(vp, 0); /* * Remove the inode from its hash chain. */ Index: ufs/ufs/ufs_lookup.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ufs/ufs_lookup.c,v retrieving revision 1.77.2.3 diff -u -r1.77.2.3 ufs_lookup.c --- ufs/ufs/ufs_lookup.c 5 Sep 2006 13:20:41 -0000 1.77.2.3 +++ ufs/ufs/ufs_lookup.c 25 Jun 2007 14:52:48 -0000 @@ -39,6 +39,7 @@ #include "opt_ffs_broken_fixme.h" #include "opt_ufs.h" +#include "opt_quota.h" #include #include @@ -721,6 +722,13 @@ flags = BA_CLRBUF; if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) flags |= IO_SYNC; +#ifdef QUOTA + if ((error = getinoquota(dp)) != 0) { + if (DOINGSOFTDEP(dvp) && newdirbp != NULL) + bdwrite(newdirbp); + return (error); + } +#endif if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, cr, flags, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) Index: ufs/ufs/ufs_quota.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ufs/ufs_quota.c,v retrieving revision 1.74.2.6 diff -u -r1.74.2.6 ufs_quota.c --- ufs/ufs/ufs_quota.c 1 Feb 2007 04:45:43 -0000 1.74.2.6 +++ ufs/ufs/ufs_quota.c 25 Jun 2007 14:52:48 -0000 @@ -72,12 +72,14 @@ */ static char *quotatypes[] = INITQFNAMES; -static int chkdqchg(struct inode *, ufs2_daddr_t, struct ucred *, int); -static int chkiqchg(struct inode *, int, struct ucred *, int); +static int chkdqchg(struct inode *, ufs2_daddr_t, struct ucred *, int, int *); +static int chkiqchg(struct inode *, int, struct ucred *, int, int *); static int dqget(struct vnode *, - u_long, struct ufsmount *, int, struct dquot **); + u_long, struct ufsmount *, int, struct dquot **); static int dqsync(struct vnode *, struct dquot *); static void dqflush(struct vnode *); +static int quotaoff1(struct thread *td, struct mount *mp, int type); +static int quotaoff_inchange(struct thread *td, struct mount *mp, int type); #ifdef DIAGNOSTIC static void dqref(struct dquot *); @@ -117,8 +119,7 @@ * Set up the user quota based on file uid. * EINVAL means that quotas are not enabled. */ - if (ip->i_dquot[USRQUOTA] == NODQUOT && - (error = + if ((error = dqget(vp, ip->i_uid, ump, USRQUOTA, &ip->i_dquot[USRQUOTA])) && error != EINVAL) return (error); @@ -126,8 +127,7 @@ * Set up the group quota based on file gid. * EINVAL means that quotas are not enabled. */ - if (ip->i_dquot[GRPQUOTA] == NODQUOT && - (error = + if ((error = dqget(vp, ip->i_gid, ump, GRPQUOTA, &ip->i_dquot[GRPQUOTA])) && error != EINVAL) return (error); @@ -147,7 +147,7 @@ struct dquot *dq; ufs2_daddr_t ncurblocks; struct vnode *vp = ITOV(ip); - int i, error; + int i, error, warn, do_check; /* * Disk quotas must be turned off for system files. Currently @@ -171,10 +171,8 @@ for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "chkdq1", 0); - } + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "chkdq1"); ncurblocks = dq->dq_curblocks + change; if (ncurblocks >= 0) dq->dq_curblocks = ncurblocks; @@ -182,24 +180,45 @@ dq->dq_curblocks = 0; dq->dq_flags &= ~DQ_BLKS; dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); } return (0); } - if ((flags & FORCE) == 0 && suser_cred(cred, 0)) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - error = chkdqchg(ip, change, cred, i); - if (error) - return (error); - } - } + if ((flags & FORCE) == 0 && suser_cred(cred, 0)) + do_check = 1; + else + do_check = 0; for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "chkdq2", 0); + warn = 0; + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "chkdq2"); + if (do_check) { + error = chkdqchg(ip, change, cred, i, &warn); + if (error) { + /* + * Roll back user quota changes when + * group quota failed. + */ + while (i > 0) { + --i; + dq = ip->i_dquot[i]; + if (dq == NODQUOT) + continue; + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "chkdq3"); + ncurblocks = dq->dq_curblocks - change; + if (ncurblocks >= 0) + dq->dq_curblocks = ncurblocks; + else + dq->dq_curblocks = 0; + dq->dq_flags &= ~DQ_BLKS; + dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); + } + return (error); + } } /* Reset timer when crossing soft limit */ if (dq->dq_curblocks + change >= dq->dq_bsoftlimit && @@ -208,6 +227,11 @@ VFSTOUFS(ITOV(ip)->v_mount)->um_btime[i]; dq->dq_curblocks += change; dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); + if (warn) + uprintf("\n%s: warning, %s %s\n", + ITOV(ip)->v_mount->mnt_stat.f_mntonname, + quotatypes[i], "disk quota exceeded"); } return (0); } @@ -217,11 +241,12 @@ * Issue an error message if appropriate. */ static int -chkdqchg(ip, change, cred, type) +chkdqchg(ip, change, cred, type, warn) struct inode *ip; ufs2_daddr_t change; struct ucred *cred; int type; + int *warn; { struct dquot *dq = ip->i_dquot[type]; ufs2_daddr_t ncurblocks = dq->dq_curblocks + change; @@ -232,11 +257,14 @@ if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) { if ((dq->dq_flags & DQ_BLKS) == 0 && ip->i_uid == cred->cr_uid) { + dq->dq_flags |= DQ_BLKS; + DQI_UNLOCK(dq); uprintf("\n%s: write failed, %s disk limit reached\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type]); - dq->dq_flags |= DQ_BLKS; + return (EDQUOT); } + DQI_UNLOCK(dq); return (EDQUOT); } /* @@ -248,20 +276,21 @@ dq->dq_btime = time_second + VFSTOUFS(ITOV(ip)->v_mount)->um_btime[type]; if (ip->i_uid == cred->cr_uid) - uprintf("\n%s: warning, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, - quotatypes[type], "disk quota exceeded"); + *warn = 1; return (0); } if (time_second > dq->dq_btime) { if ((dq->dq_flags & DQ_BLKS) == 0 && ip->i_uid == cred->cr_uid) { + dq->dq_flags |= DQ_BLKS; + DQI_UNLOCK(dq); uprintf("\n%s: write failed, %s %s\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type], "disk quota exceeded for too long"); - dq->dq_flags |= DQ_BLKS; + return (EDQUOT); } + DQI_UNLOCK(dq); return (EDQUOT); } } @@ -280,7 +309,7 @@ { struct dquot *dq; ino_t ncurinodes; - int i, error; + int i, error, warn, do_check; #ifdef DIAGNOSTIC if ((flags & CHOWN) == 0) @@ -292,10 +321,8 @@ for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "chkiq1", 0); - } + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "chkiq1"); ncurinodes = dq->dq_curinodes + change; /* XXX: ncurinodes is unsigned */ if (dq->dq_curinodes != 0 && ncurinodes >= 0) @@ -304,24 +331,47 @@ dq->dq_curinodes = 0; dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); } return (0); } - if ((flags & FORCE) == 0 && suser_cred(cred, 0)) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - error = chkiqchg(ip, change, cred, i); - if (error) - return (error); - } - } + if ((flags & FORCE) == 0 && suser_cred(cred, 0)) + do_check = 1; + else + do_check = 0; for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "chkiq2", 0); + warn = 0; + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "chkiq2"); + if (do_check) { + error = chkiqchg(ip, change, cred, i, &warn); + if (error) { + /* + * Roll back user quota changes when + * group quota failed. + */ + while (i > 0) { + --i; + dq = ip->i_dquot[i]; + if (dq == NODQUOT) + continue; + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "chkiq3"); + ncurinodes = dq->dq_curinodes - change; + /* XXX: ncurinodes is unsigned */ + if (dq->dq_curinodes != 0 && + ncurinodes >= 0) + dq->dq_curinodes = ncurinodes; + else + dq->dq_curinodes = 0; + dq->dq_flags &= ~DQ_INODS; + dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); + } + return (error); + } } /* Reset timer when crossing soft limit */ if (dq->dq_curinodes + change >= dq->dq_isoftlimit && @@ -330,6 +380,11 @@ VFSTOUFS(ITOV(ip)->v_mount)->um_itime[i]; dq->dq_curinodes += change; dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); + if (warn) + uprintf("\n%s: warning, %s %s\n", + ITOV(ip)->v_mount->mnt_stat.f_mntonname, + quotatypes[i], "inode quota exceeded"); } return (0); } @@ -339,11 +394,12 @@ * Issue an error message if appropriate. */ static int -chkiqchg(ip, change, cred, type) +chkiqchg(ip, change, cred, type, warn) struct inode *ip; int change; struct ucred *cred; int type; + int *warn; { struct dquot *dq = ip->i_dquot[type]; ino_t ncurinodes = dq->dq_curinodes + change; @@ -354,11 +410,14 @@ if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) { if ((dq->dq_flags & DQ_INODS) == 0 && ip->i_uid == cred->cr_uid) { + dq->dq_flags |= DQ_INODS; + DQI_UNLOCK(dq); uprintf("\n%s: write failed, %s inode limit reached\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type]); - dq->dq_flags |= DQ_INODS; + return (EDQUOT); } + DQI_UNLOCK(dq); return (EDQUOT); } /* @@ -370,20 +429,21 @@ dq->dq_itime = time_second + VFSTOUFS(ITOV(ip)->v_mount)->um_itime[type]; if (ip->i_uid == cred->cr_uid) - uprintf("\n%s: warning, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, - quotatypes[type], "inode quota exceeded"); + *warn = 1; return (0); } if (time_second > dq->dq_itime) { if ((dq->dq_flags & DQ_INODS) == 0 && ip->i_uid == cred->cr_uid) { - uprintf("\n%s: write failed, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, - quotatypes[type], - "inode quota exceeded for too long"); dq->dq_flags |= DQ_INODS; + DQI_UNLOCK(dq); + uprintf("\n%s: write failed, %s %s\n", + ITOV(ip)->v_mount->mnt_stat.f_mntonname, + quotatypes[type], + "inode quota exceeded for too long"); + return (EDQUOT); } + DQI_UNLOCK(dq); return (EDQUOT); } } @@ -415,15 +475,19 @@ */ if ((int)ip->i_uid < 0 || (int)ip->i_gid < 0) return; + + UFS_LOCK(ump); for (i = 0; i < MAXQUOTAS; i++) { if (ump->um_quotas[i] == NULLVP || (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING))) continue; if (ip->i_dquot[i] == NODQUOT) { + UFS_UNLOCK(ump); vprint("chkdquot: missing dquot", ITOV(ip)); panic("chkdquot: missing dquot"); } } + UFS_UNLOCK(ump); } #endif @@ -445,36 +509,53 @@ struct vnode *vp, **vpp; struct vnode *mvp; struct dquot *dq; - int error, flags; + int error, flags, vfslocked; struct nameidata nd; error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); if (error) return (error); - vpp = &ump->um_quotas[type]; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, td); + ump = VFSTOUFS(mp); + dq = NODQUOT; + + NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, fname, td); flags = FREAD | FWRITE; error = vn_open(&nd, &flags, 0, -1); if (error) return (error); - NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + NDFREE(&nd, NDF_ONLY_PNBUF); VOP_UNLOCK(vp, 0, td); if (vp->v_type != VREG) { (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); + VFS_UNLOCK_GIANT(vfslocked); return (EACCES); } - if (*vpp != vp) - quotaoff(td, mp, type); - ump->um_qflags[type] |= QTF_OPENING; + + UFS_LOCK(ump); + if ((ump->um_qflags[type] & (QTF_OPENING|QTF_CLOSING)) != 0) { + UFS_UNLOCK(ump); + (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); + VFS_UNLOCK_GIANT(vfslocked); + return (EALREADY); + } + ump->um_qflags[type] |= QTF_OPENING|QTF_CLOSING; MNT_ILOCK(mp); mp->mnt_flag |= MNT_QUOTA; MNT_IUNLOCK(mp); + UFS_UNLOCK(ump); + + vpp = &ump->um_quotas[type]; + if (*vpp != vp) + quotaoff1(td, mp, type); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); vp->v_vflag |= VV_SYSTEM; VOP_UNLOCK(vp, 0, td); *vpp = vp; + VFS_UNLOCK_GIANT(vfslocked); /* * Save the credential of the process that turned on quotas. * Set up the time limits for this quota. @@ -490,6 +571,13 @@ dqrele(NULLVP, dq); } /* + * Allow the getdq from getinoquota below to read the quota + * from file. + */ + UFS_LOCK(ump); + ump->um_qflags[type] &= ~QTF_CLOSING; + UFS_UNLOCK(ump); + /* * Search vnodes associated with this mount point, * adding references to quota file being opened. * NB: only need to add dquot's for inodes being modified. @@ -520,17 +608,24 @@ } } MNT_IUNLOCK(mp); + + if (error) + quotaoff_inchange(td, mp, type); + UFS_LOCK(ump); ump->um_qflags[type] &= ~QTF_OPENING; - if (error) - quotaoff(td, mp, type); + KASSERT((ump->um_qflags[type] & QTF_CLOSING) == 0, + ("quotaon: leaking flags")); + UFS_UNLOCK(ump); + return (error); } /* - * Q_QUOTAOFF - turn off disk quotas for a filesystem. + * Main code to turn off disk quotas for a filesystem. Does not change + * flags. */ -int -quotaoff(td, mp, type) +static int +quotaoff1(td, mp, type) struct thread *td; struct mount *mp; int type; @@ -540,15 +635,22 @@ struct ufsmount *ump = VFSTOUFS(mp); struct dquot *dq; struct inode *ip; + struct ucred *cr; + int vfslocked; int error; - error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); - if (error) - return (error); + ump = VFSTOUFS(mp); - if ((qvp = ump->um_quotas[type]) == NULLVP) + UFS_LOCK(ump); + KASSERT((ump->um_qflags[type] & QTF_CLOSING) != 0, + ("quotaoff1: flags are invalid")); + if ((qvp = ump->um_quotas[type]) == NULLVP) { + UFS_UNLOCK(ump); return (0); - ump->um_qflags[type] |= QTF_CLOSING; + } + cr = ump->um_cred[type]; + UFS_UNLOCK(ump); + /* * Search vnodes associated with this mount point, * deleting any references to quota file being closed. @@ -577,27 +679,88 @@ MNT_ILOCK(mp); } MNT_IUNLOCK(mp); + dqflush(qvp); + /* Clear um_quotas before closing the quota vnode to prevent + * access to the closed vnode from dqget/dqsync + */ + UFS_LOCK(ump); + ump->um_quotas[type] = NULLVP; + ump->um_cred[type] = NOCRED; + UFS_UNLOCK(ump); + + vfslocked = VFS_LOCK_GIANT(qvp->v_mount); vn_lock(qvp, LK_EXCLUSIVE | LK_RETRY, td); qvp->v_vflag &= ~VV_SYSTEM; VOP_UNLOCK(qvp, 0, td); error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td); - ump->um_quotas[type] = NULLVP; - crfree(ump->um_cred[type]); - ump->um_cred[type] = NOCRED; + VFS_UNLOCK_GIANT(vfslocked); + crfree(cr); + + return (error); +} + +/* + * Turns off quotas, assumes that ump->um_qflags are already checked + * and QTF_CLOSING is set to indicate operation in progress. Fixes + * ump->um_qflags and mp->mnt_flag after. + */ +int +quotaoff_inchange(td, mp, type) + struct thread *td; + struct mount *mp; + int type; +{ + struct ufsmount *ump; + int i; + int error; + + error = quotaoff1(td, mp, type); + + ump = VFSTOUFS(mp); + UFS_LOCK(ump); ump->um_qflags[type] &= ~QTF_CLOSING; - for (type = 0; type < MAXQUOTAS; type++) - if (ump->um_quotas[type] != NULLVP) + for (i = 0; i < MAXQUOTAS; i++) + if (ump->um_quotas[i] != NULLVP) break; - if (type == MAXQUOTAS) { + if (i == MAXQUOTAS) { MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_QUOTA; MNT_IUNLOCK(mp); } + UFS_UNLOCK(ump); return (error); } /* + * Q_QUOTAOFF - turn off disk quotas for a filesystem. + */ +int +quotaoff(td, mp, type) + struct thread *td; + struct mount *mp; + int type; +{ + struct ufsmount *ump; + int error; + + error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); + if (error) + return (error); + + ump = VFSTOUFS(mp); + UFS_LOCK(ump); + if ((ump->um_qflags[type] & (QTF_OPENING|QTF_CLOSING)) != 0) { + UFS_UNLOCK(ump); + return (EALREADY); + } + ump->um_qflags[type] |= QTF_CLOSING; + UFS_UNLOCK(ump); + + return (quotaoff_inchange(td, mp, type)); +} + +/* * Q_GETQUOTA - return current values in a dqblk structure. */ int @@ -632,6 +795,7 @@ return (EINVAL); } + dq = NODQUOT; error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq); if (error) return (error); @@ -664,14 +828,16 @@ error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)); if (error) return (error); + + ndq = NODQUOT; + ump = VFSTOUFS(mp); + error = dqget(NULLVP, id, ump, type, &ndq); if (error) return (error); dq = ndq; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "setqta", 0); - } + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "setqta"); /* * Copy all but the current values. * Reset time limit if previously had no soft limit or were @@ -702,6 +868,7 @@ else dq->dq_flags &= ~DQ_FAKE; dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); dqrele(NULLVP, dq); return (0); } @@ -730,14 +897,16 @@ error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk)); if (error) return (error); + + ump = VFSTOUFS(mp); + ndq = NODQUOT; + error = dqget(NULLVP, id, ump, type, &ndq); if (error) return (error); dq = ndq; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "setuse", 0); - } + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "setuse"); /* * Reset time limit if have a soft limit and were * previously under it, but are now over it. @@ -755,6 +924,7 @@ if (dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); dqrele(NULLVP, dq); return (0); } @@ -776,9 +946,11 @@ * Check if the mount point has any quotas. * If not, simply return. */ + UFS_LOCK(ump); for (i = 0; i < MAXQUOTAS; i++) if (ump->um_quotas[i] != NULLVP) break; + UFS_UNLOCK(ump); if (i == MAXQUOTAS) return (0); /* @@ -806,7 +978,7 @@ } for (i = 0; i < MAXQUOTAS; i++) { dq = VTOI(vp)->i_dquot[i]; - if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) + if (dq != NODQUOT) dqsync(vp, dq); } vput(vp); @@ -831,6 +1003,18 @@ static TAILQ_HEAD(dqfreelist, dquot) dqfreelist; static long numdquot, desireddquot = DQUOTINC; +/* + * Lock to protect quota hash, dq free list and dq_cnt ref counters of + * _all_ dqs. + */ +struct mtx dqhlock; + +#define DQH_LOCK() mtx_lock(&dqhlock) +#define DQH_UNLOCK() mtx_unlock(&dqhlock) + +static struct dquot *dqhashfind(struct dqhash *dqh, u_long id, + struct vnode *dqvp); + /* * Initialize the quota system. */ @@ -838,6 +1022,7 @@ dqinit() { + mtx_init(&dqhlock, "dqhlock", NULL, MTX_DEF); dqhashtbl = hashinit(desiredvnodes, M_DQUOT, &dqhash); TAILQ_INIT(&dqfreelist); } @@ -853,8 +1038,35 @@ hashdestroy(dqhashtbl, M_DQUOT, dqhash); while ((dq = TAILQ_FIRST(&dqfreelist)) != NULL) { TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); + mtx_destroy(&dq->dq_lock); free(dq, M_DQUOT); } + mtx_destroy(&dqhlock); +} + +static struct dquot * +dqhashfind(dqh, id, dqvp) + struct dqhash *dqh; + u_long id; + struct vnode *dqvp; +{ + struct dquot *dq; + + mtx_assert(&dqhlock, MA_OWNED); + LIST_FOREACH(dq, dqh, dq_hash) { + if (dq->dq_id != id || + dq->dq_ump->um_quotas[dq->dq_type] != dqvp) + continue; + /* + * Cache hit with no references. Take + * the structure off the free list. + */ + if (dq->dq_cnt == 0) + TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); + DQREF(dq); + return (dq); + } + return (NODQUOT); } /* @@ -870,55 +1082,122 @@ struct dquot **dqp; { struct thread *td = curthread; /* XXX */ - struct dquot *dq; + struct dquot *dq, *dq1; struct dqhash *dqh; struct vnode *dqvp; struct iovec aiov; struct uio auio; - int error; + int vfslocked, dqvplocked, error; + +#ifdef DEBUG_VFS_LOCKS + if (vp != NULLVP) + ASSERT_VOP_ELOCKED(vp, "dqget"); +#endif + + if (vp != NULLVP && *dqp != NODQUOT) { + return (0); + } /* XXX: Disallow negative id values to prevent the * creation of 100GB+ quota data files. */ if ((int)id < 0) return (EINVAL); + + UFS_LOCK(ump); dqvp = ump->um_quotas[type]; if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) { *dqp = NODQUOT; + UFS_UNLOCK(ump); return (EINVAL); } + vref(dqvp); + UFS_UNLOCK(ump); + error = 0; + dqvplocked = 0; + /* * Check the cache first. */ dqh = DQHASH(dqvp, id); - LIST_FOREACH(dq, dqh, dq_hash) { - if (dq->dq_id != id || - dq->dq_ump->um_quotas[dq->dq_type] != dqvp) - continue; + DQH_LOCK(); + dq = dqhashfind(dqh, id, dqvp); + if (dq != NULL) { + DQH_UNLOCK(); +hfound: DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "dqget"); + DQI_UNLOCK(dq); + if (dq->dq_ump == NULL) { + dqrele(vp, dq); + dq = NODQUOT; + error = EIO; + } + *dqp = dq; + vfslocked = VFS_LOCK_GIANT(dqvp->v_mount); + if (dqvplocked) + vput(dqvp); + else + vrele(dqvp); + VFS_UNLOCK_GIANT(vfslocked); + return (error); + } + + /* + * Quota vnode lock is before DQ_LOCK. Acquire dqvp lock there + * since new dq will appear on the hash chain DQ_LOCKed. + */ + if (vp != dqvp) { + DQH_UNLOCK(); + vn_lock(dqvp, LK_SHARED | LK_RETRY, td); + dqvplocked = 1; + DQH_LOCK(); /* - * Cache hit with no references. Take - * the structure off the free list. + * Recheck the cache after sleep for quota vnode lock. */ - if (dq->dq_cnt == 0) - TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); - DQREF(dq); - *dqp = dq; - return (0); + dq = dqhashfind(dqh, id, dqvp); + if (dq != NULL) { + DQH_UNLOCK(); + goto hfound; + } } + /* - * Not in cache, allocate a new one. + * Not in cache, allocate a new one or take it from the + * free list. */ if (TAILQ_FIRST(&dqfreelist) == NODQUOT && numdquot < MAXQUOTAS * desiredvnodes) desireddquot += DQUOTINC; if (numdquot < desireddquot) { - dq = (struct dquot *)malloc(sizeof *dq, M_DQUOT, - M_WAITOK | M_ZERO); numdquot++; + DQH_UNLOCK(); + dq1 = (struct dquot *)malloc(sizeof *dq, M_DQUOT, + M_WAITOK | M_ZERO); + mtx_init(&dq1->dq_lock, "dqlock", NULL, MTX_DEF); + DQH_LOCK(); + /* + * Recheck the cache after sleep for memory. + */ + dq = dqhashfind(dqh, id, dqvp); + if (dq != NULL) { + numdquot--; + DQH_UNLOCK(); + mtx_destroy(&dq1->dq_lock); + free(dq1, M_DQUOT); + goto hfound; + } + dq = dq1; } else { if ((dq = TAILQ_FIRST(&dqfreelist)) == NULL) { + DQH_UNLOCK(); tablefull("dquot"); *dqp = NODQUOT; + vfslocked = VFS_LOCK_GIANT(dqvp->v_mount); + if (dqvplocked) + vput(dqvp); + else + vrele(dqvp); + VFS_UNLOCK_GIANT(vfslocked); return (EUSERS); } if (dq->dq_cnt || (dq->dq_flags & DQ_MOD)) @@ -927,17 +1206,19 @@ if (dq->dq_ump != NULL) LIST_REMOVE(dq, dq_hash); } + /* - * Initialize the contents of the dquot structure. + * Dq is put into hash already locked to prevent parallel + * usage while it is being read from file. */ - if (vp != dqvp) - vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, td); - LIST_INSERT_HEAD(dqh, dq, dq_hash); - DQREF(dq); dq->dq_flags = DQ_LOCK; dq->dq_id = id; - dq->dq_ump = ump; dq->dq_type = type; + dq->dq_ump = ump; + LIST_INSERT_HEAD(dqh, dq, dq_hash); + DQREF(dq); + DQH_UNLOCK(); + auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = (caddr_t)&dq->dq_dqb; @@ -947,24 +1228,35 @@ auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = (struct thread *)0; + + vfslocked = VFS_LOCK_GIANT(dqvp->v_mount); error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]); if (auio.uio_resid == sizeof(struct dqblk) && error == 0) - bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk)); - if (vp != dqvp) - VOP_UNLOCK(dqvp, 0, td); - if (dq->dq_flags & DQ_WANT) - wakeup(dq); - dq->dq_flags = 0; + bzero(&dq->dq_dqb, sizeof(struct dqblk)); + if (dqvplocked) + vput(dqvp); + else + vrele(dqvp); + VFS_UNLOCK_GIANT(vfslocked); /* * I/O error in reading quota file, release * quota structure and reflect problem to caller. */ if (error) { + DQH_LOCK(); + dq->dq_ump = NULL; LIST_REMOVE(dq, dq_hash); + DQH_UNLOCK(); + DQI_LOCK(dq); + if (dq->dq_flags & DQ_WANT) + wakeup(dq); + dq->dq_flags = 0; + DQI_UNLOCK(dq); dqrele(vp, dq); *dqp = NODQUOT; return (error); } + DQI_LOCK(dq); /* * Check for no limit to enforce. * Initialize time values if necessary. @@ -978,6 +1270,8 @@ if (dq->dq_itime == 0) dq->dq_itime = time_second + ump->um_itime[type]; } + DQI_WAKEUP(dq); + DQI_UNLOCK(dq); *dqp = dq; return (0); } @@ -1006,15 +1300,24 @@ if (dq == NODQUOT) return; + DQH_LOCK(); if (dq->dq_cnt > 1) { dq->dq_cnt--; + DQH_UNLOCK(); return; } - if (dq->dq_flags & DQ_MOD) - (void) dqsync(vp, dq); + DQH_UNLOCK(); + + (void) dqsync(vp, dq); + + DQH_LOCK(); if (--dq->dq_cnt > 0) + { + DQH_UNLOCK(); return; + } TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist); + DQH_UNLOCK(); } /* @@ -1029,30 +1332,49 @@ struct vnode *dqvp; struct iovec aiov; struct uio auio; - int error; + int vfslocked, error; struct mount *mp; + struct ufsmount *ump; + +#ifdef DEBUG_VFS_LOCKS + if (vp != NULL) + ASSERT_VOP_ELOCKED(vp, "dqsync"); +#endif mp = NULL; + error = 0; if (dq == NODQUOT) panic("dqsync: dquot"); - if ((dq->dq_flags & DQ_MOD) == 0) + if ((ump = dq->dq_ump) == NULL) return (0); - if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP) + UFS_LOCK(ump); + if ((dqvp = ump->um_quotas[dq->dq_type]) == NULLVP) panic("dqsync: file"); + vref(dqvp); + UFS_UNLOCK(ump); + + vfslocked = VFS_LOCK_GIANT(dqvp->v_mount); + DQI_LOCK(dq); + if ((dq->dq_flags & DQ_MOD) == 0) { + DQI_UNLOCK(dq); + vrele(dqvp); + VFS_UNLOCK_GIANT(vfslocked); + return (0); + } + DQI_UNLOCK(dq); + (void) vn_start_secondary_write(dqvp, &mp, V_WAIT); if (vp != dqvp) vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, td); - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+2, "dqsync", 0); - if ((dq->dq_flags & DQ_MOD) == 0) { - if (vp != dqvp) - VOP_UNLOCK(dqvp, 0, td); - vn_finished_secondary_write(mp); - return (0); - } - } + + VFS_UNLOCK_GIANT(vfslocked); + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+2, "dqsync"); + if ((dq->dq_flags & DQ_MOD) == 0) + goto out; dq->dq_flags |= DQ_LOCK; + DQI_UNLOCK(dq); + auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = (caddr_t)&dq->dq_dqb; @@ -1062,15 +1384,23 @@ auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_td = (struct thread *)0; + vfslocked = VFS_LOCK_GIANT(dqvp->v_mount); error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]); + VFS_UNLOCK_GIANT(vfslocked); if (auio.uio_resid && error == 0) error = EIO; - if (dq->dq_flags & DQ_WANT) - wakeup(dq); - dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT); + + DQI_LOCK(dq); + DQI_WAKEUP(dq); + dq->dq_flags &= ~DQ_MOD; +out: DQI_UNLOCK(dq); + vfslocked = VFS_LOCK_GIANT(dqvp->v_mount); if (vp != dqvp) - VOP_UNLOCK(dqvp, 0, td); + vput(dqvp); + else + vrele(dqvp); vn_finished_secondary_write(mp); + VFS_UNLOCK_GIANT(vfslocked); return (error); } @@ -1089,6 +1419,7 @@ * file off their hash chains (they will eventually * fall off the head of the free list and be re-used). */ + DQH_LOCK(); for (dqh = &dqhashtbl[dqhash]; dqh >= dqhashtbl; dqh--) { for (dq = LIST_FIRST(dqh); dq; dq = nextdq) { nextdq = LIST_NEXT(dq, dq_hash); @@ -1100,4 +1431,5 @@ dq->dq_ump = (struct ufsmount *)0; } } + DQH_UNLOCK(); } Index: ufs/ufs/ufs_vnops.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ufs/ufs_vnops.c,v retrieving revision 1.271.2.9 diff -u -r1.271.2.9 ufs_vnops.c --- ufs/ufs/ufs_vnops.c 9 Mar 2007 13:54:26 -0000 1.271.2.9 +++ ufs/ufs/ufs_vnops.c 25 Jun 2007 14:52:48 -0000 @@ -316,10 +316,6 @@ case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); -#ifdef QUOTA - if ((error = getinoquota(ip)) != 0) - return (error); -#endif break; default: break;