Property changes on: .
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /head/sys:r178046,178109-178110,178420,182061,183079-183080,183093,183280,183330-183331,183519

Index: conf/options
===================================================================
--- conf/options	(revision 183520)
+++ conf/options	(working copy)
@@ -741,9 +741,6 @@
 #XXXBZ#NI4BING			opt_i4b.h
 #XXXBZ#NI4BISPPP		opt_i4b.h
 
-# VFS options
-LOOKUP_SHARED		opt_vfs.h
-
 # HWPMC options
 HWPMC_HOOKS
 
Index: nfsclient/nfs_vnops.c
===================================================================
--- nfsclient/nfs_vnops.c	(revision 183520)
+++ nfsclient/nfs_vnops.c	(working copy)
@@ -868,7 +868,10 @@
 		*vpp = NULLVP;
 		return (error);
 	}
-	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
+	error = cache_lookup(dvp, vpp, cnp);
+	if (error > 0 && error != ENOENT)
+		return (error);
+	if (error == -1) {
 		struct vattr vattr;
 
 		newvp = *vpp;
Index: ufs/ufs/ufs_dirhash.c
===================================================================
--- ufs/ufs/ufs_dirhash.c	(revision 183520)
+++ ufs/ufs/ufs_dirhash.c	(working copy)
@@ -46,7 +46,9 @@
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
+#include <sys/refcount.h>
 #include <sys/sysctl.h>
+#include <sys/sx.h>
 #include <vm/uma.h>
 
 #include <ufs/ufs/quota.h>
@@ -88,15 +90,16 @@
 	   doff_t offset);
 static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
 static int ufsdirhash_recycle(int wanted);
+static void ufsdirhash_free_locked(struct inode *ip);
 
 static uma_zone_t	ufsdirhash_zone;
 
 #define DIRHASHLIST_LOCK() 		mtx_lock(&ufsdirhash_mtx)
 #define DIRHASHLIST_UNLOCK() 		mtx_unlock(&ufsdirhash_mtx)
-#define DIRHASH_LOCK(dh)		mtx_lock(&(dh)->dh_mtx)
-#define DIRHASH_UNLOCK(dh) 		mtx_unlock(&(dh)->dh_mtx)
 #define DIRHASH_BLKALLOC_WAITOK() 	uma_zalloc(ufsdirhash_zone, M_WAITOK)
 #define DIRHASH_BLKFREE(ptr) 		uma_zfree(ufsdirhash_zone, (ptr))
+#define	DIRHASH_ASSERT_LOCKED(dh)					\
+    sx_assert(&(dh)->dh_lock, SA_LOCKED)
 
 /* Dirhash list; recently-used entries are near the tail. */
 static TAILQ_HEAD(, dirhash) ufsdirhash_list;
@@ -105,16 +108,201 @@
 static struct mtx	ufsdirhash_mtx;
 
 /*
- * Locking order:
- *	ufsdirhash_mtx
- *	dh_mtx
+ * Locking:
  *
- * The dh_mtx mutex should be acquired either via the inode lock, or via
- * ufsdirhash_mtx. Only the owner of the inode may free the associated
- * dirhash, but anything can steal its memory and set dh_hash to NULL.
+ * The relationship between inode and dirhash is protected either by an
+ * exclusive vnode lock or the vnode interlock where a shared vnode lock
+ * may be used.  The dirhash_mtx is acquired after the dirhash lock.  To
+ * handle teardown races, code wishing to lock the dirhash for an inode
+ * when using a shared vnode lock must obtain a private reference on the
+ * dirhash while holding the vnode interlock.  They can drop it once they
+ * have obtained the dirhash lock and verified that the dirhash wasn't
+ * recycled while they waited for the dirhash lock.
+ *
+ * ufsdirhash_build() acquires a shared lock on the dirhash when it is
+ * successful.  This lock is released after a call to ufsdirhash_lookup().
+ *
+ * Functions requiring exclusive access use ufsdirhash_acquire() which may
+ * free a dirhash structure that was recycled by ufsdirhash_recycle().
+ *
+ * The dirhash lock may be held across io operations.
  */
 
+static void
+ufsdirhash_hold(struct dirhash *dh)
+{
+
+	refcount_acquire(&dh->dh_refcount);
+}
+
+static void
+ufsdirhash_drop(struct dirhash *dh)
+{
+
+	if (refcount_release(&dh->dh_refcount)) {
+		sx_destroy(&dh->dh_lock);
+		free(dh, M_DIRHASH);
+	}
+}
+
 /*
+ * Release the lock on a dirhash.
+ */
+static void
+ufsdirhash_release(struct dirhash *dh)
+{
+
+	sx_unlock(&dh->dh_lock);
+}
+
+/*
+ * Either acquire an existing hash locked shared or create a new hash and
+ * return it exclusively locked.  May return NULL if the allocation fails.
+ *
+ * The vnode interlock is used to protect the i_dirhash pointer from
+ * simultaneous access while only a shared vnode lock is held.
+ */
+static struct dirhash *
+ufsdirhash_create(struct inode *ip)
+{
+	struct dirhash *ndh;
+	struct dirhash *dh;
+	struct vnode *vp;
+	int error;
+
+	error = 0;
+	ndh = dh = NULL;
+	vp = ip->i_vnode;
+	for (;;) {
+		/* Racy check for i_dirhash to prefetch an dirhash structure. */
+		if (ip->i_dirhash == NULL && ndh == NULL) {
+			MALLOC(ndh, struct dirhash *, sizeof *dh, M_DIRHASH,
+			    M_NOWAIT | M_ZERO);
+			if (ndh == NULL)
+				return (NULL);
+			refcount_init(&ndh->dh_refcount, 1);
+			sx_init(&ndh->dh_lock, "dirhash");
+			sx_xlock(&ndh->dh_lock);
+		}
+		/*
+		 * Check i_dirhash.  If it's NULL just try to use a
+		 * preallocated structure.  If none exists loop and try again.
+		 */
+		VI_LOCK(vp);
+		dh = ip->i_dirhash;
+		if (dh == NULL) {
+			ip->i_dirhash = ndh;
+			VI_UNLOCK(vp);
+			if (ndh == NULL)
+				continue;
+			return (ndh);
+		}
+		ufsdirhash_hold(dh);
+		VI_UNLOCK(vp);
+
+		/* Acquire a shared lock on existing hashes. */
+		sx_slock(&dh->dh_lock);
+
+		/* The hash could've been recycled while we were waiting. */
+		VI_LOCK(vp);
+		if (ip->i_dirhash != dh) {
+			VI_UNLOCK(vp);
+			ufsdirhash_release(dh);
+			ufsdirhash_drop(dh);
+			continue;
+		}
+		VI_UNLOCK(vp);
+		ufsdirhash_drop(dh);
+
+		/* If the hash is still valid we've succeeded. */
+		if (dh->dh_hash != NULL)
+			break;
+		/*
+		 * If the hash is NULL it has been recycled.  Try to upgrade
+		 * so we can recreate it.  If we fail the upgrade, drop our
+		 * lock and try again.
+		 */
+		if (sx_try_upgrade(&dh->dh_lock))
+			break;
+		sx_sunlock(&dh->dh_lock);
+	}
+	/* Free the preallocated structure if it was not necessary. */
+	if (ndh) {
+		ufsdirhash_release(ndh);
+		ufsdirhash_drop(ndh);
+	}
+	return (dh);
+}
+
+/*
+ * Acquire an exclusive lock on an existing hash.  Requires an exclusive
+ * vnode lock to protect the i_dirhash pointer.  hashes that have been
+ * recycled are reclaimed here and NULL is returned.
+ */
+static struct dirhash *
+ufsdirhash_acquire(struct inode *ip)
+{
+	struct dirhash *dh;
+	struct vnode *vp;
+
+	ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__);
+
+	vp = ip->i_vnode;
+	dh = ip->i_dirhash;
+	if (dh == NULL)
+		return (NULL);
+	sx_xlock(&dh->dh_lock);
+	if (dh->dh_hash != NULL)
+		return (dh);
+	ufsdirhash_free_locked(ip);
+	return (NULL);
+}
+
+/*
+ * Acquire exclusively and free the hash pointed to by ip.  Works with a
+ * shared or exclusive vnode lock.
+ */
+void
+ufsdirhash_free(struct inode *ip)
+{
+	struct dirhash *dh;
+	struct vnode *vp;
+
+	vp = ip->i_vnode;
+	for (;;) {
+		/* Grab a reference on this inode's dirhash if it has one. */
+		VI_LOCK(vp);
+		dh = ip->i_dirhash;
+		if (dh == NULL) {
+			VI_UNLOCK(vp);
+			return;
+		}
+		ufsdirhash_hold(dh);
+		VI_UNLOCK(vp);
+
+		/* Exclusively lock the dirhash. */
+		sx_xlock(&dh->dh_lock);
+
+		/* If this dirhash still belongs to this inode, then free it. */
+		VI_LOCK(vp);
+		if (ip->i_dirhash == dh) {
+			VI_UNLOCK(vp);
+			ufsdirhash_drop(dh);
+			break;
+		}
+		VI_UNLOCK(vp);
+
+		/*
+		 * This inode's dirhash has changed while we were
+		 * waiting for the dirhash lock, so try again.
+		 */
+		ufsdirhash_release(dh);
+		ufsdirhash_drop(dh);
+	}
+	ufsdirhash_free_locked(ip);
+}
+
+/*
  * Attempt to build up a hash table for the directory contents in
  * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
  */
@@ -128,27 +316,23 @@
 	doff_t bmask, pos;
 	int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
 
+	/* Take care of a decreased sysctl value. */
+	while (ufs_dirhashmem > ufs_dirhashmaxmem)
+		if (ufsdirhash_recycle(0) != 0)
+			return (-1);
+
 	/* Check if we can/should use dirhash. */
-	if (ip->i_dirhash == NULL) {
-		if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode))
-			return (-1);
-	} else {
-		/* Hash exists, but sysctls could have changed. */
-		if (ip->i_size < ufs_mindirhashsize ||
-		    ufs_dirhashmem > ufs_dirhashmaxmem) {
+	if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode) ||
+	    ip->i_effnlink == 0) {
+		if (ip->i_dirhash)
 			ufsdirhash_free(ip);
-			return (-1);
-		}
-		/* Check if hash exists and is intact (note: unlocked read). */
-		if (ip->i_dirhash->dh_hash != NULL)
-			return (0);
-		/* Free the old, recycled hash and build a new one. */
-		ufsdirhash_free(ip);
+		return (-1);
 	}
-
-	/* Don't hash removed directories. */
-	if (ip->i_effnlink == 0)
+	dh = ufsdirhash_create(ip);
+	if (dh == NULL)
 		return (-1);
+	if (dh->dh_hash != NULL)
+		return (0);
 
 	vp = ip->i_vnode;
 	/* Allocate 50% more entries than this dir size could ever need. */
@@ -159,7 +343,6 @@
 	nslots = narrays * DH_NBLKOFF;
 	dirblocks = howmany(ip->i_size, DIRBLKSIZ);
 	nblocks = (dirblocks * 3 + 1) / 2;
-
 	memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
 	    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
 	    nblocks * sizeof(*dh->dh_blkfree);
@@ -167,33 +350,40 @@
 	if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
 		DIRHASHLIST_UNLOCK();
 		if (memreqd > ufs_dirhashmaxmem / 2)
-			return (-1);
-
+			goto fail;
 		/* Try to free some space. */
 		if (ufsdirhash_recycle(memreqd) != 0)
-			return (-1);
+			goto fail;
 		/* Enough was freed, and list has been locked. */
 	}
 	ufs_dirhashmem += memreqd;
 	DIRHASHLIST_UNLOCK();
 
+	/* Initialise the hash table and block statistics. */
+	dh->dh_memreq = memreqd;
+	dh->dh_narrays = narrays;
+	dh->dh_hlen = nslots;
+	dh->dh_nblk = nblocks;
+	dh->dh_dirblks = dirblocks;
+	for (i = 0; i < DH_NFSTATS; i++)
+		dh->dh_firstfree[i] = -1;
+	dh->dh_firstfree[DH_NFSTATS] = 0;
+	dh->dh_hused = 0;
+	dh->dh_seqopt = 0;
+	dh->dh_seqoff = 0;
+	dh->dh_score = DH_SCOREINIT;
+
 	/*
 	 * Use non-blocking mallocs so that we will revert to a linear
 	 * lookup on failure rather than potentially blocking forever.
 	 */
-	MALLOC(dh, struct dirhash *, sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO);
-	if (dh == NULL) {
-		DIRHASHLIST_LOCK();
-		ufs_dirhashmem -= memreqd;
-		DIRHASHLIST_UNLOCK();
-		return (-1);
-	}
-	mtx_init(&dh->dh_mtx, "dirhash", NULL, MTX_DEF);
 	MALLOC(dh->dh_hash, doff_t **, narrays * sizeof(dh->dh_hash[0]),
 	    M_DIRHASH, M_NOWAIT | M_ZERO);
+	if (dh->dh_hash == NULL)
+		goto fail;
 	MALLOC(dh->dh_blkfree, u_int8_t *, nblocks * sizeof(dh->dh_blkfree[0]),
 	    M_DIRHASH, M_NOWAIT);
-	if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
+	if (dh->dh_blkfree == NULL)
 		goto fail;
 	for (i = 0; i < narrays; i++) {
 		if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
@@ -201,22 +391,8 @@
 		for (j = 0; j < DH_NBLKOFF; j++)
 			dh->dh_hash[i][j] = DIRHASH_EMPTY;
 	}
-
-	/* Initialise the hash table and block statistics. */
-	dh->dh_narrays = narrays;
-	dh->dh_hlen = nslots;
-	dh->dh_nblk = nblocks;
-	dh->dh_dirblks = dirblocks;
 	for (i = 0; i < dirblocks; i++)
 		dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
-	for (i = 0; i < DH_NFSTATS; i++)
-		dh->dh_firstfree[i] = -1;
-	dh->dh_firstfree[DH_NFSTATS] = 0;
-	dh->dh_seqopt = 0;
-	dh->dh_seqoff = 0;
-	dh->dh_score = DH_SCOREINIT;
-	ip->i_dirhash = dh;
-
 	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
 	pos = 0;
 	while (pos < ip->i_size) {
@@ -254,63 +430,70 @@
 	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
 	dh->dh_onlist = 1;
 	DIRHASHLIST_UNLOCK();
+	sx_downgrade(&dh->dh_lock);
 	return (0);
 
 fail:
-	if (dh->dh_hash != NULL) {
-		for (i = 0; i < narrays; i++)
-			if (dh->dh_hash[i] != NULL)
-				DIRHASH_BLKFREE(dh->dh_hash[i]);
-		FREE(dh->dh_hash, M_DIRHASH);
-	}
-	if (dh->dh_blkfree != NULL)
-		FREE(dh->dh_blkfree, M_DIRHASH);
-	mtx_destroy(&dh->dh_mtx);
-	FREE(dh, M_DIRHASH);
-	ip->i_dirhash = NULL;
-	DIRHASHLIST_LOCK();
-	ufs_dirhashmem -= memreqd;
-	DIRHASHLIST_UNLOCK();
+	ufsdirhash_free_locked(ip);
 	return (-1);
 }
 
 /*
  * Free any hash table associated with inode 'ip'.
  */
-void
-ufsdirhash_free(struct inode *ip)
+static void
+ufsdirhash_free_locked(struct inode *ip)
 {
 	struct dirhash *dh;
-	int i, mem;
+	struct vnode *vp;
+	int i;
 
-	if ((dh = ip->i_dirhash) == NULL)
-		return;
+	DIRHASH_ASSERT_LOCKED(ip->i_dirhash);
+
+	/*
+	 * Clear the pointer in the inode to prevent new threads from
+	 * finding the dead structure.
+	 */
+	vp = ip->i_vnode;
+	VI_LOCK(vp);
+	dh = ip->i_dirhash;
+	ip->i_dirhash = NULL;
+	VI_UNLOCK(vp);
+
+	/*
+	 * Remove the hash from the list since we are going to free its
+	 * memory.
+	 */
 	DIRHASHLIST_LOCK();
-	DIRHASH_LOCK(dh);
 	if (dh->dh_onlist)
 		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
-	DIRHASH_UNLOCK(dh);
+	ufs_dirhashmem -= dh->dh_memreq;
 	DIRHASHLIST_UNLOCK();
 
-	/* The dirhash pointed to by 'dh' is exclusively ours now. */
+	/*
+	 * At this point, any waiters for the lock should hold their
+	 * own reference on the dirhash structure.  They will drop
+	 * that reference once they grab the vnode interlock and see
+	 * that ip->i_dirhash is NULL.
+	 */
+	sx_xunlock(&dh->dh_lock);
 
-	mem = sizeof(*dh);
+	/*
+	 * Handle partially recycled as well as fully constructed hashes.
+	 */
 	if (dh->dh_hash != NULL) {
 		for (i = 0; i < dh->dh_narrays; i++)
-			DIRHASH_BLKFREE(dh->dh_hash[i]);
+			if (dh->dh_hash[i] != NULL)
+				DIRHASH_BLKFREE(dh->dh_hash[i]);
 		FREE(dh->dh_hash, M_DIRHASH);
-		FREE(dh->dh_blkfree, M_DIRHASH);
-		mem += dh->dh_narrays * sizeof(*dh->dh_hash) +
-		    dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
-		    dh->dh_nblk * sizeof(*dh->dh_blkfree);
+		if (dh->dh_blkfree != NULL)
+			FREE(dh->dh_blkfree, M_DIRHASH);
 	}
-	mtx_destroy(&dh->dh_mtx);
-	FREE(dh, M_DIRHASH);
-	ip->i_dirhash = NULL;
 
-	DIRHASHLIST_LOCK();
-	ufs_dirhashmem -= mem;
-	DIRHASHLIST_UNLOCK();
+	/*
+	 * Drop the inode's reference to the data structure.
+	 */
+	ufsdirhash_drop(dh);
 }
 
 /*
@@ -323,6 +506,8 @@
  * prevoffp is non-NULL, the offset of the previous entry within
  * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
  * is the first in a block, the start of the block is used).
+ *
+ * Must be called with the hash locked.  Returns with the hash unlocked.
  */
 int
 ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
@@ -334,48 +519,36 @@
 	struct buf *bp;
 	doff_t blkoff, bmask, offset, prevoff;
 	int i, slot;
+	int error;
 
-	if ((dh = ip->i_dirhash) == NULL)
-		return (EJUSTRETURN);
+	dh = ip->i_dirhash;
+	KASSERT(dh != NULL && dh->dh_hash != NULL,
+	    ("ufsdirhash_lookup: Invalid dirhash %p\n", dh));
+	DIRHASH_ASSERT_LOCKED(dh);
 	/*
 	 * Move this dirhash towards the end of the list if it has a
-	 * score higher than the next entry, and acquire the dh_mtx.
-	 * Optimise the case where it's already the last by performing
-	 * an unlocked read of the TAILQ_NEXT pointer.
-	 *
-	 * In both cases, end up holding just dh_mtx.
+	 * score higher than the next entry, and acquire the dh_lock.
 	 */
+	DIRHASHLIST_LOCK();
 	if (TAILQ_NEXT(dh, dh_list) != NULL) {
-		DIRHASHLIST_LOCK();
-		DIRHASH_LOCK(dh);
 		/*
 		 * If the new score will be greater than that of the next
 		 * entry, then move this entry past it. With both mutexes
 		 * held, dh_next won't go away, but its dh_score could
 		 * change; that's not important since it is just a hint.
 		 */
-		if (dh->dh_hash != NULL &&
-		    (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
+		if ((dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
 		    dh->dh_score >= dh_next->dh_score) {
 			KASSERT(dh->dh_onlist, ("dirhash: not on list"));
 			TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
 			TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
 			    dh_list);
 		}
-		DIRHASHLIST_UNLOCK();
-	} else {
-		/* Already the last, though that could change as we wait. */
-		DIRHASH_LOCK(dh);
 	}
-	if (dh->dh_hash == NULL) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
-		return (EJUSTRETURN);
-	}
-
 	/* Update the score. */
 	if (dh->dh_score < DH_SCOREMAX)
 		dh->dh_score++;
+	DIRHASHLIST_UNLOCK();
 
 	vp = ip->i_vnode;
 	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
@@ -410,23 +583,23 @@
 	    slot = WRAPINCR(slot, dh->dh_hlen)) {
 		if (offset == DIRHASH_DEL)
 			continue;
-		DIRHASH_UNLOCK(dh);
-
 		if (offset < 0 || offset >= ip->i_size)
 			panic("ufsdirhash_lookup: bad offset in hash array");
 		if ((offset & ~bmask) != blkoff) {
 			if (bp != NULL)
 				brelse(bp);
 			blkoff = offset & ~bmask;
-			if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0)
-				return (EJUSTRETURN);
+			if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) {
+				error = EJUSTRETURN;
+				goto fail;
+			}
 		}
 		dp = (struct direct *)(bp->b_data + (offset & bmask));
 		if (dp->d_reclen == 0 || dp->d_reclen >
 		    DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
 			/* Corrupted directory. */
-			brelse(bp);
-			return (EJUSTRETURN);
+			error = EJUSTRETURN;
+			goto fail;
 		}
 		if (dp->d_namlen == namelen &&
 		    bcmp(dp->d_name, name, namelen) == 0) {
@@ -436,8 +609,8 @@
 					prevoff = ufsdirhash_getprev(dp,
 					    offset);
 					if (prevoff == -1) {
-						brelse(bp);
-						return (EJUSTRETURN);
+						error = EJUSTRETURN;
+						goto fail;
 					}
 				} else
 					prevoff = offset;
@@ -448,20 +621,12 @@
 			if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
 				dh->dh_seqopt = 1;
 			dh->dh_seqoff = offset + DIRSIZ(0, dp);
-
 			*bpp = bp;
 			*offp = offset;
+			ufsdirhash_release(dh);
 			return (0);
 		}
 
-		DIRHASH_LOCK(dh);
-		if (dh->dh_hash == NULL) {
-			DIRHASH_UNLOCK(dh);
-			if (bp != NULL)
-				brelse(bp);
-			ufsdirhash_free(ip);
-			return (EJUSTRETURN);
-		}
 		/*
 		 * When the name doesn't match in the seqopt case, go back
 		 * and search normally.
@@ -471,10 +636,12 @@
 			goto restart;
 		}
 	}
-	DIRHASH_UNLOCK(dh);
+	error = ENOENT;
+fail:
+	ufsdirhash_release(dh);
 	if (bp != NULL)
 		brelse(bp);
-	return (ENOENT);
+	return (error);
 }
 
 /*
@@ -502,29 +669,22 @@
 	doff_t pos, slotstart;
 	int dirblock, error, freebytes, i;
 
-	if ((dh = ip->i_dirhash) == NULL)
-		return (-1);
-	DIRHASH_LOCK(dh);
-	if (dh->dh_hash == NULL) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
-		return (-1);
-	}
+	dh = ip->i_dirhash;
+	KASSERT(dh != NULL && dh->dh_hash != NULL,
+	    ("ufsdirhash_findfree: Invalid dirhash %p\n", dh));
+	DIRHASH_ASSERT_LOCKED(dh);
 
 	/* Find a directory block with the desired free space. */
 	dirblock = -1;
 	for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
 		if ((dirblock = dh->dh_firstfree[i]) != -1)
 			break;
-	if (dirblock == -1) {
-		DIRHASH_UNLOCK(dh);
+	if (dirblock == -1)
 		return (-1);
-	}
 
 	KASSERT(dirblock < dh->dh_nblk &&
 	    dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
 	    ("ufsdirhash_findfree: bad stats"));
-	DIRHASH_UNLOCK(dh);
 	pos = dirblock * DIRBLKSIZ;
 	error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp);
 	if (error)
@@ -582,24 +742,18 @@
 	struct dirhash *dh;
 	int i;
 
-	if ((dh = ip->i_dirhash) == NULL)
-		return (-1);
-	DIRHASH_LOCK(dh);
-	if (dh->dh_hash == NULL) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
-		return (-1);
-	}
+	dh = ip->i_dirhash;
+	DIRHASH_ASSERT_LOCKED(dh);
+	KASSERT(dh != NULL && dh->dh_hash != NULL,
+	    ("ufsdirhash_enduseful: Invalid dirhash %p\n", dh));
 
-	if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) {
-		DIRHASH_UNLOCK(dh);
+	if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN)
 		return (-1);
-	}
 
 	for (i = dh->dh_dirblks - 1; i >= 0; i--)
 		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
 			break;
-	DIRHASH_UNLOCK(dh);
+
 	return ((doff_t)(i + 1) * DIRBLKSIZ);
 }
 
@@ -614,15 +768,9 @@
 	struct dirhash *dh;
 	int slot;
 
-	if ((dh = ip->i_dirhash) == NULL)
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
 		return;
-	DIRHASH_LOCK(dh);
-	if (dh->dh_hash == NULL) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
-		return;
-	}
-
+	
 	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
 	    ("ufsdirhash_add: bad offset"));
 	/*
@@ -630,8 +778,7 @@
 	 * remove the hash entirely and let it be rebuilt later.
 	 */
 	if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
+		ufsdirhash_free_locked(ip);
 		return;
 	}
 
@@ -645,7 +792,7 @@
 
 	/* Update the per-block summary info. */
 	ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp));
-	DIRHASH_UNLOCK(dh);
+	ufsdirhash_release(dh);
 }
 
 /*
@@ -659,14 +806,8 @@
 	struct dirhash *dh;
 	int slot;
 
-	if ((dh = ip->i_dirhash) == NULL)
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
 		return;
-	DIRHASH_LOCK(dh);
-	if (dh->dh_hash == NULL) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
-		return;
-	}
 
 	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
 	    ("ufsdirhash_remove: bad offset"));
@@ -678,7 +819,7 @@
 
 	/* Update the per-block summary info. */
 	ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp));
-	DIRHASH_UNLOCK(dh);
+	ufsdirhash_release(dh);
 }
 
 /*
@@ -692,14 +833,8 @@
 	struct dirhash *dh;
 	int slot;
 
-	if ((dh = ip->i_dirhash) == NULL)
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
 		return;
-	DIRHASH_LOCK(dh);
-	if (dh->dh_hash == NULL) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
-		return;
-	}
 
 	KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
 	    newoff < dh->dh_dirblks * DIRBLKSIZ,
@@ -707,7 +842,7 @@
 	/* Find the entry, and update the offset. */
 	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
 	DH_ENTRY(dh, slot) = newoff;
-	DIRHASH_UNLOCK(dh);
+	ufsdirhash_release(dh);
 }
 
 /*
@@ -720,22 +855,15 @@
 	struct dirhash *dh;
 	int block;
 
-	if ((dh = ip->i_dirhash) == NULL)
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
 		return;
-	DIRHASH_LOCK(dh);
-	if (dh->dh_hash == NULL) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
-		return;
-	}
 
 	KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
 	    ("ufsdirhash_newblk: bad offset"));
 	block = offset / DIRBLKSIZ;
 	if (block >= dh->dh_nblk) {
 		/* Out of space; must rebuild. */
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
+		ufsdirhash_free_locked(ip);
 		return;
 	}
 	dh->dh_dirblks = block + 1;
@@ -744,7 +872,7 @@
 	dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
 	if (dh->dh_firstfree[DH_NFSTATS] == -1)
 		dh->dh_firstfree[DH_NFSTATS] = block;
-	DIRHASH_UNLOCK(dh);
+	ufsdirhash_release(dh);
 }
 
 /*
@@ -756,14 +884,8 @@
 	struct dirhash *dh;
 	int block, i;
 
-	if ((dh = ip->i_dirhash) == NULL)
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
 		return;
-	DIRHASH_LOCK(dh);
-	if (dh->dh_hash == NULL) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
-		return;
-	}
 
 	KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
 	    ("ufsdirhash_dirtrunc: bad offset"));
@@ -775,8 +897,7 @@
 	 * if necessary.
 	 */
 	if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
+		ufsdirhash_free_locked(ip);
 		return;
 	}
 
@@ -794,7 +915,7 @@
 		if (dh->dh_firstfree[i] >= block)
 			panic("ufsdirhash_dirtrunc: first free corrupt");
 	dh->dh_dirblks = block;
-	DIRHASH_UNLOCK(dh);
+	ufsdirhash_release(dh);
 }
 
 /*
@@ -815,14 +936,8 @@
 
 	if (!ufs_dirhashcheck)
 		return;
-	if ((dh = ip->i_dirhash) == NULL)
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
 		return;
-	DIRHASH_LOCK(dh);
-	if (dh->dh_hash == NULL) {
-		DIRHASH_UNLOCK(dh);
-		ufsdirhash_free(ip);
-		return;
-	}
 
 	block = offset / DIRBLKSIZ;
 	if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
@@ -866,7 +981,7 @@
 			panic("ufsdirhash_checkblock: bad first-free");
 	if (dh->dh_firstfree[ffslot] == -1)
 		panic("ufsdirhash_checkblock: missing first-free entry");
-	DIRHASH_UNLOCK(dh);
+	ufsdirhash_release(dh);
 }
 
 /*
@@ -893,7 +1008,7 @@
  * by the value specified by `diff'.
  *
  * The caller must ensure we have exclusive access to `dh'; normally
- * that means that dh_mtx should be held, but this is also called
+ * that means that dh_lock should be held, but this is also called
  * from ufsdirhash_build() where exclusive access can be assumed.
  */
 static void
@@ -937,7 +1052,7 @@
 {
 	int slot;
 
-	mtx_assert(&dh->dh_mtx, MA_OWNED);
+	DIRHASH_ASSERT_LOCKED(dh);
 
 	/* Find the entry. */
 	KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
@@ -961,7 +1076,7 @@
 {
 	int i;
 
-	mtx_assert(&dh->dh_mtx, MA_OWNED);
+	DIRHASH_ASSERT_LOCKED(dh);
 
 	/* Mark the entry as deleted. */
 	DH_ENTRY(dh, slot) = DIRHASH_DEL;
@@ -1026,22 +1141,23 @@
 	int i, mem, narrays;
 
 	DIRHASHLIST_LOCK();
+	dh = TAILQ_FIRST(&ufsdirhash_list);
 	while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
-		/* Find a dirhash, and lock it. */
-		if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) {
+		/* Decrement the score; only recycle if it becomes zero. */
+		if (dh == NULL || --dh->dh_score > 0) {
 			DIRHASHLIST_UNLOCK();
 			return (-1);
 		}
-		DIRHASH_LOCK(dh);
+		/*
+		 * If we can't lock it it's in use and we don't want to
+		 * recycle it anyway.
+		 */
+		if (!sx_try_xlock(&dh->dh_lock)) {
+			dh = TAILQ_NEXT(dh, dh_list);
+			continue;
+		}
 		KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
 
-		/* Decrement the score; only recycle if it becomes zero. */
-		if (--dh->dh_score > 0) {
-			DIRHASH_UNLOCK(dh);
-			DIRHASHLIST_UNLOCK();
-			return (-1);
-		}
-
 		/* Remove it from the list and detach its memory. */
 		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
 		dh->dh_onlist = 0;
@@ -1050,12 +1166,11 @@
 		blkfree = dh->dh_blkfree;
 		dh->dh_blkfree = NULL;
 		narrays = dh->dh_narrays;
-		mem = narrays * sizeof(*dh->dh_hash) +
-		    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
-		    dh->dh_nblk * sizeof(*dh->dh_blkfree);
+		mem = dh->dh_memreq;
+		dh->dh_memreq = 0;
 
 		/* Unlock everything, free the detached memory. */
-		DIRHASH_UNLOCK(dh);
+		ufsdirhash_release(dh);
 		DIRHASHLIST_UNLOCK();
 		for (i = 0; i < narrays; i++)
 			DIRHASH_BLKFREE(hash[i]);
@@ -1065,6 +1180,7 @@
 		/* Account for the returned memory, and repeat if necessary. */
 		DIRHASHLIST_LOCK();
 		ufs_dirhashmem -= mem;
+		dh = TAILQ_FIRST(&ufsdirhash_list);
 	}
 	/* Success; return with list locked. */
 	return (0);
Index: ufs/ufs/inode.h
===================================================================
--- ufs/ufs/inode.h	(revision 183520)
+++ ufs/ufs/inode.h	(working copy)
@@ -82,8 +82,6 @@
 	doff_t	  i_endoff;	/* End of useful stuff in directory. */
 	doff_t	  i_diroff;	/* Offset in dir, where we found last entry. */
 	doff_t	  i_offset;	/* Offset of free space in directory. */
-	ino_t	  i_ino;	/* Inode number of found directory. */
-	u_int32_t i_reclen;	/* Size of found directory entry. */
 
 	union {
 		struct dirhash *dirhash; /* Hashing for large directories. */
Index: ufs/ufs/ufs_lookup.c
===================================================================
--- ufs/ufs/ufs_lookup.c	(revision 183520)
+++ ufs/ufs/ufs_lookup.c	(working copy)
@@ -137,6 +137,8 @@
 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
 	enum {NONE, COMPACT, FOUND} slotstatus;
 	doff_t slotoffset;		/* offset of area with free space */
+	doff_t i_diroff;		/* cached i_diroff value. */
+	doff_t i_offset;		/* cached i_offset value. */
 	int slotsize;			/* size of area at slotoffset */
 	int slotfreespace;		/* amount of space free in slot */
 	int slotneeded;			/* size of the entry we're seeking */
@@ -154,7 +156,8 @@
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 	struct thread *td = cnp->cn_thread;
-	ino_t saved_ino;
+	ino_t ino;
+	int ltype;
 
 	bp = NULL;
 	slotoffset = -1;
@@ -183,6 +186,8 @@
 	 * we watch for a place to put the new file in
 	 * case it doesn't already exist.
 	 */
+	ino = 0;
+	i_diroff = dp->i_diroff;
 	slotstatus = FOUND;
 	slotfreespace = slotsize = slotneeded = 0;
 	if ((nameiop == CREATE || nameiop == RENAME) &&
@@ -216,13 +221,13 @@
 		numdirpasses = 1;
 		entryoffsetinblock = 0; /* silence compiler warning */
 		switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
-		    &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) {
+		    &i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) {
 		case 0:
 			ep = (struct direct *)((char *)bp->b_data +
-			    (dp->i_offset & bmask));
+			    (i_offset & bmask));
 			goto foundentry;
 		case ENOENT:
-			dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
+			i_offset = roundup2(dp->i_size, DIRBLKSIZ);
 			goto notfound;
 		default:
 			/* Something failed; just do a linear search. */
@@ -241,33 +246,32 @@
 	 * profiling time and hence has been removed in the interest
 	 * of simplicity.
 	 */
-	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
-	    dp->i_diroff >= dp->i_size) {
+	if (nameiop != LOOKUP || i_diroff == 0 || i_diroff >= dp->i_size) {
 		entryoffsetinblock = 0;
-		dp->i_offset = 0;
+		i_offset = 0;
 		numdirpasses = 1;
 	} else {
-		dp->i_offset = dp->i_diroff;
-		if ((entryoffsetinblock = dp->i_offset & bmask) &&
-		    (error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
+		i_offset = i_diroff;
+		if ((entryoffsetinblock = i_offset & bmask) &&
+		    (error = UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp)))
 			return (error);
 		numdirpasses = 2;
 		nchstats.ncs_2passes++;
 	}
-	prevoff = dp->i_offset;
+	prevoff = i_offset;
 	endsearch = roundup2(dp->i_size, DIRBLKSIZ);
 	enduseful = 0;
 
 searchloop:
-	while (dp->i_offset < endsearch) {
+	while (i_offset < endsearch) {
 		/*
 		 * If necessary, get the next directory block.
 		 */
-		if ((dp->i_offset & bmask) == 0) {
+		if ((i_offset & bmask) == 0) {
 			if (bp != NULL)
 				brelse(bp);
 			error =
-			    UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp);
+			    UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp);
 			if (error)
 				return (error);
 			entryoffsetinblock = 0;
@@ -294,9 +298,9 @@
 		    (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) {
 			int i;
 
-			ufs_dirbad(dp, dp->i_offset, "mangled entry");
+			ufs_dirbad(dp, i_offset, "mangled entry");
 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
-			dp->i_offset += i;
+			i_offset += i;
 			entryoffsetinblock += i;
 			continue;
 		}
@@ -315,15 +319,15 @@
 			if (size > 0) {
 				if (size >= slotneeded) {
 					slotstatus = FOUND;
-					slotoffset = dp->i_offset;
+					slotoffset = i_offset;
 					slotsize = ep->d_reclen;
 				} else if (slotstatus == NONE) {
 					slotfreespace += size;
 					if (slotoffset == -1)
-						slotoffset = dp->i_offset;
+						slotoffset = i_offset;
 					if (slotfreespace >= slotneeded) {
 						slotstatus = COMPACT;
-						slotsize = dp->i_offset +
+						slotsize = i_offset +
 						      ep->d_reclen - slotoffset;
 					}
 				}
@@ -357,24 +361,22 @@
 				if (vdp->v_mount->mnt_maxsymlinklen > 0 &&
 				    ep->d_type == DT_WHT) {
 					slotstatus = FOUND;
-					slotoffset = dp->i_offset;
+					slotoffset = i_offset;
 					slotsize = ep->d_reclen;
-					dp->i_reclen = slotsize;
 					enduseful = dp->i_size;
 					ap->a_cnp->cn_flags |= ISWHITEOUT;
 					numdirpasses--;
 					goto notfound;
 				}
-				dp->i_ino = ep->d_ino;
-				dp->i_reclen = ep->d_reclen;
+				ino = ep->d_ino;
 				goto found;
 			}
 		}
-		prevoff = dp->i_offset;
-		dp->i_offset += ep->d_reclen;
+		prevoff = i_offset;
+		i_offset += ep->d_reclen;
 		entryoffsetinblock += ep->d_reclen;
 		if (ep->d_ino)
-			enduseful = dp->i_offset;
+			enduseful = i_offset;
 	}
 notfound:
 	/*
@@ -383,8 +385,8 @@
 	 */
 	if (numdirpasses == 2) {
 		numdirpasses--;
-		dp->i_offset = 0;
-		endsearch = dp->i_diroff;
+		i_offset = 0;
+		endsearch = i_diroff;
 		goto searchloop;
 	}
 	if (bp != NULL)
@@ -462,9 +464,9 @@
 	 * Check that directory length properly reflects presence
 	 * of this entry.
 	 */
-	if (dp->i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) {
-		ufs_dirbad(dp, dp->i_offset, "i_size too small");
-		dp->i_size = dp->i_offset + DIRSIZ(OFSFMT(vdp), ep);
+	if (i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) {
+		ufs_dirbad(dp, i_offset, "i_size too small");
+		dp->i_size = i_offset + DIRSIZ(OFSFMT(vdp), ep);
 		DIP_SET(dp, i_size, dp->i_size);
 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
@@ -476,13 +478,15 @@
 	 * in the cache as to where the entry was found.
 	 */
 	if ((flags & ISLASTCN) && nameiop == LOOKUP)
-		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
+		dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1);
 
 	/*
 	 * If deleting, and at end of pathname, return
 	 * parameters which can be used to remove file.
 	 */
 	if (nameiop == DELETE && (flags & ISLASTCN)) {
+		if (flags & LOCKPARENT)
+			ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
 		/*
 		 * Write access to directory required to delete files.
 		 */
@@ -494,17 +498,23 @@
 		 * and distance past previous entry (if there
 		 * is a previous entry in this block) in dp->i_count.
 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
+		 *
+		 * Technically we shouldn't be setting these in the
+		 * WANTPARENT case (first lookup in rename()), but any
+		 * lookups that will result in directory changes will
+		 * overwrite these.
 		 */
+		dp->i_offset = i_offset;
 		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
 			dp->i_count = 0;
 		else
 			dp->i_count = dp->i_offset - prevoff;
-		if (dp->i_number == dp->i_ino) {
+		if (dp->i_number == ino) {
 			VREF(vdp);
 			*vpp = vdp;
 			return (0);
 		}
-		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino,
+		if ((error = VFS_VGET(vdp->v_mount, ino,
 		    LK_EXCLUSIVE, &tdp)) != 0)
 			return (error);
 		/*
@@ -536,9 +546,10 @@
 		 * Careful about locking second inode.
 		 * This can only occur if the target is ".".
 		 */
-		if (dp->i_number == dp->i_ino)
+		dp->i_offset = i_offset;
+		if (dp->i_number == ino)
 			return (EISDIR);
-		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino,
+		if ((error = VFS_VGET(vdp->v_mount, ino,
 		    LK_EXCLUSIVE, &tdp)) != 0)
 			return (error);
 		*vpp = tdp;
@@ -567,20 +578,29 @@
 	 */
 	pdp = vdp;
 	if (flags & ISDOTDOT) {
-		saved_ino = dp->i_ino;
+		ltype = VOP_ISLOCKED(pdp, td);
 		VOP_UNLOCK(pdp, 0, td);	/* race to get the inode */
-		error = VFS_VGET(pdp->v_mount, saved_ino,
-		    cnp->cn_lkflags, &tdp);
-		vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, td);
+		error = VFS_VGET(pdp->v_mount, ino, cnp->cn_lkflags, &tdp);
+		vn_lock(pdp, ltype | LK_RETRY, td);
 		if (error)
 			return (error);
 		*vpp = tdp;
-	} else if (dp->i_number == dp->i_ino) {
+	} else if (dp->i_number == ino) {
 		VREF(vdp);	/* we want ourself, ie "." */
+		/*
+		 * When we lookup "." we still can be asked to lock it
+		 * differently.
+		 */
+		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
+		if (ltype != VOP_ISLOCKED(vdp, td)) {
+			if (ltype == LK_EXCLUSIVE)
+				vn_lock(vdp, LK_UPGRADE | LK_RETRY, td);
+			else /* if (ltype == LK_SHARED) */
+				vn_lock(vdp, LK_DOWNGRADE | LK_RETRY, td);
+		}
 		*vpp = vdp;
 	} else {
-		error = VFS_VGET(pdp->v_mount, dp->i_ino,
-		    cnp->cn_lkflags, &tdp);
+		error = VFS_VGET(pdp->v_mount, ino, cnp->cn_lkflags, &tdp);
 		if (error)
 			return (error);
 		*vpp = tdp;
@@ -984,7 +1004,7 @@
 	int isrmdir;
 {
 	struct inode *dp;
-	struct direct *ep;
+	struct direct *ep, *rep;
 	struct buf *bp;
 	int error;
 
@@ -1005,14 +1025,19 @@
 	if ((error = UFS_BLKATOFF(dvp,
 	    (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
 		return (error);
+
+	/* Set 'rep' to the entry being removed. */
+	if (dp->i_count == 0)
+		rep = ep;
+	else
+		rep = (struct direct *)((char *)ep + ep->d_reclen);
 #ifdef UFS_DIRHASH
 	/*
 	 * Remove the dirhash entry. This is complicated by the fact
 	 * that `ep' is the previous entry when dp->i_count != 0.
 	 */
 	if (dp->i_dirhash != NULL)
-		ufsdirhash_remove(dp, (dp->i_count == 0) ? ep :
-		   (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset);
+		ufsdirhash_remove(dp, rep, dp->i_offset);
 #endif
 	if (dp->i_count == 0) {
 		/*
@@ -1023,7 +1048,7 @@
 		/*
 		 * Collapse new free space into previous entry.
 		 */
-		ep->d_reclen += dp->i_reclen;
+		ep->d_reclen += rep->d_reclen;
 	}
 #ifdef UFS_DIRHASH
 	if (dp->i_dirhash != NULL)
Index: ufs/ufs/dirhash.h
===================================================================
--- ufs/ufs/dirhash.h	(revision 183520)
+++ ufs/ufs/dirhash.h	(working copy)
@@ -28,6 +28,9 @@
 #ifndef _UFS_UFS_DIRHASH_H_
 #define _UFS_UFS_DIRHASH_H_
 
+#include <sys/_lock.h>
+#include <sys/_sx.h>
+
 /*
  * For fast operations on large directories, we maintain a hash
  * that maps the file name to the offset of the directory entry within
@@ -80,12 +83,14 @@
     ((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK])
 
 struct dirhash {
-	struct mtx dh_mtx;	/* protects all fields except dh_list */
+	struct sx dh_lock;	/* protects all fields except list & score */
+	int	dh_refcount;
 
 	doff_t	**dh_hash;	/* the hash array (2-level) */
 	int	dh_narrays;	/* number of entries in dh_hash */
 	int	dh_hlen;	/* total slots in the 2-level hash array */
 	int	dh_hused;	/* entries in use */
+	int	dh_memreq;	/* Memory used. */
 
 	/* Free space statistics. XXX assumes DIRBLKSIZ is 512. */
 	u_int8_t *dh_blkfree;	/* free DIRALIGN words in each dir block */
Index: ufs/ffs/ffs_vfsops.c
===================================================================
--- ufs/ffs/ffs_vfsops.c	(revision 183520)
+++ ufs/ffs/ffs_vfsops.c	(working copy)
@@ -852,7 +852,7 @@
 	 * Initialize filesystem stat information in mount struct.
 	 */
 	MNT_ILOCK(mp);
-	mp->mnt_kern_flag |= MNTK_MPSAFE;
+	mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED;
 	MNT_IUNLOCK(mp);
 #ifdef UFS_EXTATTR
 #ifdef UFS_EXTATTR_AUTOSTART
Index: kern/vfs_lookup.c
===================================================================
--- kern/vfs_lookup.c	(revision 183520)
+++ kern/vfs_lookup.c	(working copy)
@@ -39,7 +39,6 @@
 
 #include "opt_ktrace.h"
 #include "opt_mac.h"
-#include "opt_vfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -88,13 +87,10 @@
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);
 
-#ifdef LOOKUP_SHARED
-static int lookup_shared = 1;
-#else
 static int lookup_shared = 0;
-#endif
 SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0,
     "Enables/Disables shared locks for path name translation");
+TUNABLE_INT("vfs.lookup_shared", &lookup_shared);
 
 /*
  * Convert a pathname into a pointer to a locked vnode.
Index: kern/vfs_cache.c
===================================================================
--- kern/vfs_cache.c	(revision 183520)
+++ kern/vfs_cache.c	(working copy)
@@ -300,7 +300,9 @@
  * succeeds, the vnode is returned in *vpp, and a status of -1 is
  * returned. If the lookup determines that the name does not exist
  * (negative cacheing), a status of ENOENT is returned. If the lookup
- * fails, a status of zero is returned.
+ * fails, a status of zero is returned.  If the directory vnode is
+ * recycled out from under us due to a forced unmount, a status of
+ * EBADF is returned.
  *
  * vpp is locked and ref'd on return.  If we're looking up DOTDOT, dvp is
  * unlocked.  If we're looking up . an extra ref is taken, but the lock is
@@ -425,11 +427,19 @@
 		 * When we lookup "." we still can be asked to lock it
 		 * differently...
 		 */
-		ltype = cnp->cn_lkflags & (LK_SHARED | LK_EXCLUSIVE);
-		if (ltype == VOP_ISLOCKED(*vpp, td))
-			return (-1);
-		else if (ltype == LK_EXCLUSIVE)
-			vn_lock(*vpp, LK_UPGRADE | LK_RETRY, td);
+		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
+		if (ltype != VOP_ISLOCKED(*vpp, td)) {
+			if (ltype == LK_EXCLUSIVE) {
+				vn_lock(*vpp, LK_UPGRADE | LK_RETRY, td);
+				if ((*vpp)->v_iflag & VI_DOOMED) {
+					/* forced unmount */
+					vrele(*vpp);
+					*vpp = NULL;
+					return (EBADF);
+				}
+			} else
+				vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY, td);
+		}
 		return (-1);
 	}
 	ltype = 0;	/* silence gcc warning */
@@ -442,12 +452,14 @@
 	error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, td);
 	if (cnp->cn_flags & ISDOTDOT)
 		vn_lock(dvp, ltype | LK_RETRY, td);
-	if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_lkflags & LK_EXCLUSIVE))
-		ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
 	if (error) {
 		*vpp = NULL;
 		goto retry;
 	}
+	if ((cnp->cn_flags & ISLASTCN) &&
+	    (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
+		ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
+	}
 	return (-1);
 }
 
@@ -496,8 +508,39 @@
 
 	hold = 0;
 	zap = 0;
+
+	/*
+	 * Calculate the hash key and setup as much of the new
+	 * namecache entry as possible before acquiring the lock.
+	 */
 	ncp = cache_alloc(cnp->cn_namelen);
+	ncp->nc_vp = vp;
+	ncp->nc_dvp = dvp;
+	len = ncp->nc_nlen = cnp->cn_namelen;
+	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
+	bcopy(cnp->cn_nameptr, ncp->nc_name, len);
+	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
 	CACHE_LOCK();
+
+	/*
+	 * See if this vnode is already in the cache with this name.
+	 * This can happen with concurrent lookups of the same path
+	 * name.
+	 */
+	if (vp) {
+		struct namecache *n2;
+
+		TAILQ_FOREACH(n2, &vp->v_cache_dst, nc_dst) {
+			if (n2->nc_dvp == dvp &&
+			    n2->nc_nlen == cnp->cn_namelen &&
+			    !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
+				CACHE_UNLOCK();
+				cache_free(ncp);
+				return;
+			}
+		}
+	}	
+
 	numcache++;
 	if (!vp) {
 		numneg++;
@@ -509,16 +552,9 @@
 	}
 
 	/*
-	 * Set the rest of the namecache entry elements, calculate it's
-	 * hash key and insert it into the appropriate chain within
-	 * the cache entries table.
+	 * Insert the new namecache entry into the appropriate chain
+	 * within the cache entries table.
 	 */
-	ncp->nc_vp = vp;
-	ncp->nc_dvp = dvp;
-	len = ncp->nc_nlen = cnp->cn_namelen;
-	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
-	bcopy(cnp->cn_nameptr, ncp->nc_name, len);
-	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
 	ncpp = NCHHASH(hash);
 	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
 	if (LIST_EMPTY(&dvp->v_cache_src)) {
@@ -643,9 +679,9 @@
 	error = cache_lookup(dvp, vpp, cnp);
 	if (error == 0)
 		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
-	if (error == ENOENT)
-		return (error);
-	return (0);
+	if (error == -1)
+		return (0);
+	return (error);
 }