Index: kern_lockf.c
===================================================================
RCS file: /usr/ncvs/src/sys/kern/kern_lockf.c,v
retrieving revision 1.57
diff -u -r1.57 kern_lockf.c
--- kern_lockf.c	7 Aug 2007 09:04:50 -0000	1.57
+++ kern_lockf.c	5 Jan 2008 19:25:39 -0000
@@ -38,6 +38,7 @@
 #include "opt_debug_lockf.h"
 
 #include <sys/param.h>
+#include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
@@ -45,6 +46,7 @@
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/resourcevar.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
@@ -57,8 +59,8 @@
  */
 static int maxlockdepth = MAXDEPTH;
 
+#define LOCKF_DEBUG
 #ifdef LOCKF_DEBUG
-#include <sys/sysctl.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
@@ -67,10 +69,13 @@
 static int	lockf_debug = 0;
 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
 #endif
+extern int	maxlocks;
+static int	cur_lock_cnt;
+SYSCTL_INT(_kern, OID_AUTO, maxlocks, CTLFLAG_RW, &maxlocks,
+    0, "system wide limit for advisory record locks");
 
 MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
 
-#define NOLOCKF (struct lockf *)0
 #define SELF	0x1
 #define OTHERS	0x2
 static int	 lf_clearlock(struct lockf *, struct lockf **);
@@ -87,27 +92,43 @@
 static void	 lf_printlist(char *, struct lockf *);
 #endif
 
+static void
+lf_free(struct lockf *lock)
+{
+	/*
+	 * We may have allowed this allocation to occur without tracking it.
+	 * If this is the case, do not adjust the lock count, otherwise we
+	 * can end up with lock counts that are < 0.
+	 */
+	if (lock->lf_acct != 0)
+		(void) chglockcnt(lock->lf_uidinfo, -1, 0);
+	uifree(lock->lf_uidinfo);
+	FREE(lock, M_LOCKF);
+	cur_lock_cnt--;
+}
+
 /*
  * Advisory record locking support
+ *
+ * struct vop_advlock_args {
+ * 	struct vnode *a_vp;
+ * 	caddr_t  a_id;
+ * 	int  a_op;
+ *	struct flock *a_fl;
+ * 	int  a_flags;
+ * };
  */
 int
-lf_advlock(ap, head, size)
-	struct vop_advlock_args /* {
-		struct vnode *a_vp;
-		caddr_t  a_id;
-		int  a_op;
-		struct flock *a_fl;
-		int  a_flags;
-	} */ *ap;
-	struct lockf **head;
-	u_quad_t size;
+lf_advlock(struct vop_advlock_args *ap, struct lockf **head, u_quad_t size)
 {
 	struct flock *fl = ap->a_fl;
 	struct lockf *lock;
 	struct vnode *vp = ap->a_vp;
 	off_t start, end, oadd;
 	struct lockf *clean, *n;
-	int error;
+	int error, canalloc, clearall, mulocks, acct_advlock;
+	struct proc *p;
+	struct uidinfo *ui;
 
 	/*
 	 * Convert the flock structure into a start and end.
@@ -153,37 +174,94 @@
 	/*
 	 * Avoid the common case of unlocking when inode has no locks.
 	 */
-	if (*head == (struct lockf *)0) {
+	if (*head == NULL) {
 		if (ap->a_op != F_SETLK) {
 			fl->l_type = F_UNLCK;
 			return (0);
 		}
 	}
 	/*
-	 * Allocate a spare structure in case we have to split.
+	 * Check to see if this operation will result in the entire file being
+	 * unlocked.  If the subject has come across the upper limit for locks
+	 * and they want to clear the entire file, allow this to happen.  It
+	 * should be noted that the clearing of locks on an entire file happens
+	 * when a file descriptor is closed.
+	 */
+	clearall = (fl->l_whence == SEEK_SET && fl->l_start == 0 &&
+	    fl->l_len == 0 && fl->l_type == F_UNLCK);
+	/*
+	 * Allocate a spare structure in case we have to split.  For security
+	 * reasons, do not allow a subject to allocate more locks when is
+	 * specified by their upper bound.  Otherwise it is possible to run
+	 * system out of memory.
 	 */
 	clean = NULL;
+	p = curthread->td_proc;
 	if (ap->a_op == F_SETLK || ap->a_op == F_UNLCK) {
+		PROC_LOCK(p);
+		mulocks = (int) lim_cur(p, RLIMIT_LOCKS);
+		/*
+		 * Carefully manage the lock count here.  We will not allow user
+		 * to exceed their lock count unless this operation is going to
+		 * result in all locks being cleared.  If this is the case, then
+		 * we need to ignore this lock when the resources are reclaimed.
+		 * Failure to do this will result in negative lock counts.
+		 */
+		canalloc = chglockcnt(p->p_ucred->cr_ruidinfo, 1, mulocks);
+		if (canalloc == 0 || cur_lock_cnt == maxlocks) {
+			if (clearall == 0) {
+				PROC_UNLOCK(p);
+				return (EAGAIN);
+			}
+			acct_advlock = 0;
+		} else
+			acct_advlock = 1;
+		uihold(p->p_ucred->cr_ruidinfo);
+		ui = p->p_ucred->cr_ruidinfo;
+		PROC_UNLOCK(p);
 		MALLOC(clean, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+		clean->lf_acct = acct_advlock;
 		clean->lf_next = NULL;
+		uihold(p->p_ucred->cr_ruidinfo);
+		clean->lf_uidinfo = ui;
+		cur_lock_cnt++;
 	}
 	/*
 	 * Create the lockf structure
 	 */
+	PROC_LOCK(p);
+	mulocks = (int) lim_cur(p, RLIMIT_LOCKS);
+	canalloc = chglockcnt(p->p_ucred->cr_ruidinfo, 1, mulocks);
+	if (canalloc == 0 || cur_lock_cnt == maxlocks) {
+		if (clearall == 0) {
+			PROC_UNLOCK(p);
+			if (clean != NULL)
+				lf_free(clean);
+			return (EAGAIN);
+		}
+		acct_advlock = 0;
+	} else
+		acct_advlock = 1;
+	uihold(p->p_ucred->cr_ruidinfo);
+	ui = p->p_ucred->cr_ruidinfo;
+	PROC_UNLOCK(p);
 	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+	lock->lf_acct = acct_advlock;
 	lock->lf_start = start;
 	lock->lf_end = end;
 	lock->lf_id = ap->a_id;
+	lock->lf_uidinfo = ui;
+	cur_lock_cnt++;
 	/*
 	 * XXX The problem is that VTOI is ufs specific, so it will
 	 * break LOCKF_DEBUG for all other FS's other than UFS because
 	 * it casts the vnode->data ptr to struct inode *.
 	 */
 /*	lock->lf_inode = VTOI(ap->a_vp); */
-	lock->lf_inode = (struct inode *)0;
+	lock->lf_inode = NULL;
 	lock->lf_type = fl->l_type;
 	lock->lf_head = head;
-	lock->lf_next = (struct lockf *)0;
+	lock->lf_next = NULL;
 	TAILQ_INIT(&lock->lf_blkhd);
 	lock->lf_flags = ap->a_flags;
 	/*
@@ -216,7 +294,7 @@
 	VI_UNLOCK(vp);
 	for (lock = clean; lock != NULL; ) {
 		n = lock->lf_next;
-		free(lock, M_LOCKF);
+		lf_free(lock);
 		lock = n;
 	}
 	return (error);
@@ -226,10 +304,7 @@
  * Set a byte-range lock.
  */
 static int
-lf_setlock(lock, vp, clean)
-	struct lockf *lock;
-	struct vnode *vp;
-	struct lockf **clean;
+lf_setlock(struct lockf *lock, struct vnode *vp, struct lockf **clean)
 {
 	struct lockf *block;
 	struct lockf **head = lock->lf_head;
@@ -340,11 +415,11 @@
 		 * ourselves from the blocked list) and/or by another
 		 * process releasing a lock (in which case we have
 		 * already been removed from the blocked list and our
-		 * lf_next field set to NOLOCKF).
+		 * lf_next field set to NULL).
 		 */
 		if (lock->lf_next) {
 			TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
-			lock->lf_next = NOLOCKF;
+			lock->lf_next = NULL;
 		}
 		if (error) {
 			lock->lf_next = *clean;
@@ -491,16 +566,14 @@
  * and remove it (or shrink it), then wakeup anyone we can.
  */
 static int
-lf_clearlock(unlock, clean)
-	struct lockf *unlock;
-	struct lockf **clean;
+lf_clearlock(struct lockf *unlock, struct lockf **clean)
 {
 	struct lockf **head = unlock->lf_head;
 	register struct lockf *lf = *head;
 	struct lockf *overlap, **prev;
 	int ovcase;
 
-	if (lf == NOLOCKF)
+	if (lf == NULL)
 		return (0);
 #ifdef LOCKF_DEBUG
 	if (unlock->lf_type != F_UNLCK)
@@ -563,9 +636,7 @@
  * and if so return its process identifier.
  */
 static int
-lf_getlock(lock, fl)
-	register struct lockf *lock;
-	register struct flock *fl;
+lf_getlock(struct lockf *lock, struct flock *fl)
 {
 	register struct lockf *block;
 
@@ -597,8 +668,7 @@
  * return the first blocking lock.
  */
 static struct lockf *
-lf_getblock(lock)
-	register struct lockf *lock;
+lf_getblock(struct lockf *lock)
 {
 	struct lockf **prev, *overlap, *lf = *(lock->lf_head);
 	int ovcase;
@@ -616,7 +686,7 @@
 		 */
 		lf = overlap->lf_next;
 	}
-	return (NOLOCKF);
+	return (NULL);
 }
 
 /*
@@ -627,17 +697,13 @@
  *	 may be more than one.
  */
 static int
-lf_findoverlap(lf, lock, type, prev, overlap)
-	register struct lockf *lf;
-	struct lockf *lock;
-	int type;
-	struct lockf ***prev;
-	struct lockf **overlap;
+lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
+    struct lockf ***prev, struct lockf **overlap)
 {
 	off_t start, end;
 
 	*overlap = lf;
-	if (lf == NOLOCKF)
+	if (lf == NULL)
 		return (0);
 #ifdef LOCKF_DEBUG
 	if (lockf_debug & 2)
@@ -645,7 +711,7 @@
 #endif /* LOCKF_DEBUG */
 	start = lock->lf_start;
 	end = lock->lf_end;
-	while (lf != NOLOCKF) {
+	while (lf != NULL) {
 		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
 		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
 			*prev = &lf->lf_next;
@@ -737,10 +803,7 @@
  * two or three locks as necessary.
  */
 static void
-lf_split(lock1, lock2, split)
-	struct lockf *lock1;
-	struct lockf *lock2;
-	struct lockf **split;
+lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **split)
 {
 	struct lockf *splitlock;
 
@@ -788,15 +851,14 @@
  * Wakeup a blocklist
  */
 static void
-lf_wakelock(listhead)
-	struct lockf *listhead;
+lf_wakelock(struct lockf *listhead)
 {
 	register struct lockf *wakelock;
 
 	while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
 		wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
 		TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
-		wakelock->lf_next = NOLOCKF;
+		wakelock->lf_next = NULL;
 #ifdef LOCKF_DEBUG
 		if (lockf_debug & 2)
 			lf_print("lf_wakelock: awakening", wakelock);
@@ -810,9 +872,7 @@
  * Print out a lock.
  */
 static void
-lf_print(tag, lock)
-	char *tag;
-	register struct lockf *lock;
+lf_print(char *tag, struct lockf *lock)
 {
 
 	printf("%s: lock %p for ", tag, (void *)lock);
@@ -841,13 +901,11 @@
 }
 
 static void
-lf_printlist(tag, lock)
-	char *tag;
-	struct lockf *lock;
+lf_printlist(char *tag, struct lockf *lock)
 {
 	register struct lockf *lf, *blk;
 
-	if (lock->lf_inode == (struct inode *)0)
+	if (lock->lf_inode == NULL)
 		return;
 
 	printf("%s: Lock list for ino %ju on dev <%s>:\n",