Index: kern_lockf.c =================================================================== RCS file: /usr/ncvs/src/sys/kern/kern_lockf.c,v retrieving revision 1.57 diff -u -r1.57 kern_lockf.c --- kern_lockf.c 7 Aug 2007 09:04:50 -0000 1.57 +++ kern_lockf.c 5 Jan 2008 19:25:39 -0000 @@ -38,6 +38,7 @@ #include "opt_debug_lockf.h" #include +#include #include #include #include @@ -45,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -57,8 +59,8 @@ */ static int maxlockdepth = MAXDEPTH; +#define LOCKF_DEBUG #ifdef LOCKF_DEBUG -#include #include #include @@ -67,10 +69,13 @@ static int lockf_debug = 0; SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, ""); #endif +extern int maxlocks; +static int cur_lock_cnt; +SYSCTL_INT(_kern, OID_AUTO, maxlocks, CTLFLAG_RW, &maxlocks, + 0, "system wide limit for advisory record locks"); MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures"); -#define NOLOCKF (struct lockf *)0 #define SELF 0x1 #define OTHERS 0x2 static int lf_clearlock(struct lockf *, struct lockf **); @@ -87,27 +92,43 @@ static void lf_printlist(char *, struct lockf *); #endif +static void +lf_free(struct lockf *lock) +{ + /* + * We may have allowed this allocation to occur without tracking it. + * If this is the case, do not adjust the lock count, otherwise we + * can end up with lock counts that are < 0. + */ + if (lock->lf_acct != 0) + (void) chglockcnt(lock->lf_uidinfo, -1, 0); + uifree(lock->lf_uidinfo); + FREE(lock, M_LOCKF); + cur_lock_cnt--; +} + /* * Advisory record locking support + * + * struct vop_advlock_args { + * struct vnode *a_vp; + * caddr_t a_id; + * int a_op; + * struct flock *a_fl; + * int a_flags; + * }; */ int -lf_advlock(ap, head, size) - struct vop_advlock_args /* { - struct vnode *a_vp; - caddr_t a_id; - int a_op; - struct flock *a_fl; - int a_flags; - } */ *ap; - struct lockf **head; - u_quad_t size; +lf_advlock(struct vop_advlock_args *ap, struct lockf **head, u_quad_t size) { struct flock *fl = ap->a_fl; struct lockf *lock; struct vnode *vp = ap->a_vp; off_t start, end, oadd; struct lockf *clean, *n; - int error; + int error, canalloc, clearall, mulocks, acct_advlock; + struct proc *p; + struct uidinfo *ui; /* * Convert the flock structure into a start and end. @@ -153,37 +174,94 @@ /* * Avoid the common case of unlocking when inode has no locks. */ - if (*head == (struct lockf *)0) { + if (*head == NULL) { if (ap->a_op != F_SETLK) { fl->l_type = F_UNLCK; return (0); } } /* - * Allocate a spare structure in case we have to split. + * Check to see if this operation will result in the entire file being + * unlocked. If the subject has come across the upper limit for locks + * and they want to clear the entire file, allow this to happen. It + * should be noted that the clearing of locks on an entire file happens + * when a file descriptor is closed. + */ + clearall = (fl->l_whence == SEEK_SET && fl->l_start == 0 && + fl->l_len == 0 && fl->l_type == F_UNLCK); + /* + * Allocate a spare structure in case we have to split. For security + * reasons, do not allow a subject to allocate more locks when is + * specified by their upper bound. Otherwise it is possible to run + * system out of memory. */ clean = NULL; + p = curthread->td_proc; if (ap->a_op == F_SETLK || ap->a_op == F_UNLCK) { + PROC_LOCK(p); + mulocks = (int) lim_cur(p, RLIMIT_LOCKS); + /* + * Carefully manage the lock count here. We will not allow user + * to exceed their lock count unless this operation is going to + * result in all locks being cleared. If this is the case, then + * we need to ignore this lock when the resources are reclaimed. + * Failure to do this will result in negative lock counts. + */ + canalloc = chglockcnt(p->p_ucred->cr_ruidinfo, 1, mulocks); + if (canalloc == 0 || cur_lock_cnt == maxlocks) { + if (clearall == 0) { + PROC_UNLOCK(p); + return (EAGAIN); + } + acct_advlock = 0; + } else + acct_advlock = 1; + uihold(p->p_ucred->cr_ruidinfo); + ui = p->p_ucred->cr_ruidinfo; + PROC_UNLOCK(p); MALLOC(clean, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK); + clean->lf_acct = acct_advlock; clean->lf_next = NULL; + uihold(p->p_ucred->cr_ruidinfo); + clean->lf_uidinfo = ui; + cur_lock_cnt++; } /* * Create the lockf structure */ + PROC_LOCK(p); + mulocks = (int) lim_cur(p, RLIMIT_LOCKS); + canalloc = chglockcnt(p->p_ucred->cr_ruidinfo, 1, mulocks); + if (canalloc == 0 || cur_lock_cnt == maxlocks) { + if (clearall == 0) { + PROC_UNLOCK(p); + if (clean != NULL) + lf_free(clean); + return (EAGAIN); + } + acct_advlock = 0; + } else + acct_advlock = 1; + uihold(p->p_ucred->cr_ruidinfo); + ui = p->p_ucred->cr_ruidinfo; + PROC_UNLOCK(p); MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK); + lock->lf_acct = acct_advlock; lock->lf_start = start; lock->lf_end = end; lock->lf_id = ap->a_id; + lock->lf_uidinfo = ui; + cur_lock_cnt++; /* * XXX The problem is that VTOI is ufs specific, so it will * break LOCKF_DEBUG for all other FS's other than UFS because * it casts the vnode->data ptr to struct inode *. */ /* lock->lf_inode = VTOI(ap->a_vp); */ - lock->lf_inode = (struct inode *)0; + lock->lf_inode = NULL; lock->lf_type = fl->l_type; lock->lf_head = head; - lock->lf_next = (struct lockf *)0; + lock->lf_next = NULL; TAILQ_INIT(&lock->lf_blkhd); lock->lf_flags = ap->a_flags; /* @@ -216,7 +294,7 @@ VI_UNLOCK(vp); for (lock = clean; lock != NULL; ) { n = lock->lf_next; - free(lock, M_LOCKF); + lf_free(lock); lock = n; } return (error); @@ -226,10 +304,7 @@ * Set a byte-range lock. */ static int -lf_setlock(lock, vp, clean) - struct lockf *lock; - struct vnode *vp; - struct lockf **clean; +lf_setlock(struct lockf *lock, struct vnode *vp, struct lockf **clean) { struct lockf *block; struct lockf **head = lock->lf_head; @@ -340,11 +415,11 @@ * ourselves from the blocked list) and/or by another * process releasing a lock (in which case we have * already been removed from the blocked list and our - * lf_next field set to NOLOCKF). + * lf_next field set to NULL). */ if (lock->lf_next) { TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); - lock->lf_next = NOLOCKF; + lock->lf_next = NULL; } if (error) { lock->lf_next = *clean; @@ -491,16 +566,14 @@ * and remove it (or shrink it), then wakeup anyone we can. */ static int -lf_clearlock(unlock, clean) - struct lockf *unlock; - struct lockf **clean; +lf_clearlock(struct lockf *unlock, struct lockf **clean) { struct lockf **head = unlock->lf_head; register struct lockf *lf = *head; struct lockf *overlap, **prev; int ovcase; - if (lf == NOLOCKF) + if (lf == NULL) return (0); #ifdef LOCKF_DEBUG if (unlock->lf_type != F_UNLCK) @@ -563,9 +636,7 @@ * and if so return its process identifier. */ static int -lf_getlock(lock, fl) - register struct lockf *lock; - register struct flock *fl; +lf_getlock(struct lockf *lock, struct flock *fl) { register struct lockf *block; @@ -597,8 +668,7 @@ * return the first blocking lock. */ static struct lockf * -lf_getblock(lock) - register struct lockf *lock; +lf_getblock(struct lockf *lock) { struct lockf **prev, *overlap, *lf = *(lock->lf_head); int ovcase; @@ -616,7 +686,7 @@ */ lf = overlap->lf_next; } - return (NOLOCKF); + return (NULL); } /* @@ -627,17 +697,13 @@ * may be more than one. */ static int -lf_findoverlap(lf, lock, type, prev, overlap) - register struct lockf *lf; - struct lockf *lock; - int type; - struct lockf ***prev; - struct lockf **overlap; +lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, + struct lockf ***prev, struct lockf **overlap) { off_t start, end; *overlap = lf; - if (lf == NOLOCKF) + if (lf == NULL) return (0); #ifdef LOCKF_DEBUG if (lockf_debug & 2) @@ -645,7 +711,7 @@ #endif /* LOCKF_DEBUG */ start = lock->lf_start; end = lock->lf_end; - while (lf != NOLOCKF) { + while (lf != NULL) { if (((type & SELF) && lf->lf_id != lock->lf_id) || ((type & OTHERS) && lf->lf_id == lock->lf_id)) { *prev = &lf->lf_next; @@ -737,10 +803,7 @@ * two or three locks as necessary. */ static void -lf_split(lock1, lock2, split) - struct lockf *lock1; - struct lockf *lock2; - struct lockf **split; +lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **split) { struct lockf *splitlock; @@ -788,15 +851,14 @@ * Wakeup a blocklist */ static void -lf_wakelock(listhead) - struct lockf *listhead; +lf_wakelock(struct lockf *listhead) { register struct lockf *wakelock; while (!TAILQ_EMPTY(&listhead->lf_blkhd)) { wakelock = TAILQ_FIRST(&listhead->lf_blkhd); TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); - wakelock->lf_next = NOLOCKF; + wakelock->lf_next = NULL; #ifdef LOCKF_DEBUG if (lockf_debug & 2) lf_print("lf_wakelock: awakening", wakelock); @@ -810,9 +872,7 @@ * Print out a lock. */ static void -lf_print(tag, lock) - char *tag; - register struct lockf *lock; +lf_print(char *tag, struct lockf *lock) { printf("%s: lock %p for ", tag, (void *)lock); @@ -841,13 +901,11 @@ } static void -lf_printlist(tag, lock) - char *tag; - struct lockf *lock; +lf_printlist(char *tag, struct lockf *lock) { register struct lockf *lf, *blk; - if (lock->lf_inode == (struct inode *)0) + if (lock->lf_inode == NULL) return; printf("%s: Lock list for ino %ju on dev <%s>:\n",