Index: arc.c =================================================================== RCS file: /zoo/pjd/repo/src/sys/contrib/opensolaris/uts/common/fs/zfs/arc.c,v retrieving revision 1.11 diff -u -p -r1.11 arc.c --- arc.c 10 Sep 2007 18:12:27 -0000 1.11 +++ arc.c 10 Nov 2007 13:25:36 -0000 @@ -373,7 +373,7 @@ struct arc_buf_hdr { static arc_buf_t *arc_eviction_list; static kmutex_t arc_eviction_mtx; static arc_buf_hdr_t arc_eviction_hdr; -static void arc_get_data_buf(arc_buf_t *buf); +static int arc_get_data_buf(arc_buf_t *buf, kmutex_t *hash_lock); static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock); #define GHOST_STATE(state) \ @@ -460,19 +460,33 @@ buf_hash(spa_t *spa, dva_t *dva, uint64_ ((buf)->b_birth == birth) && ((buf)->b_spa == spa) static arc_buf_hdr_t * -buf_hash_find(spa_t *spa, dva_t *dva, uint64_t birth, kmutex_t **lockp) +buf_hash_lookup(spa_t *spa, dva_t *dva, uint64_t birth) { uint64_t idx = BUF_HASH_INDEX(spa, dva, birth); kmutex_t *hash_lock = BUF_HASH_LOCK(idx); arc_buf_hdr_t *buf; - mutex_enter(hash_lock); + ASSERT(MUTEX_HELD(BUF_HASH_LOCK(idx))); for (buf = buf_hash_table.ht_table[idx]; buf != NULL; buf = buf->b_hash_next) { - if (BUF_EQUAL(spa, dva, birth, buf)) { - *lockp = hash_lock; + if (BUF_EQUAL(spa, dva, birth, buf)) return (buf); - } + } + return (NULL); +} + +static arc_buf_hdr_t * +buf_hash_find(spa_t *spa, dva_t *dva, uint64_t birth, kmutex_t **lockp) +{ + uint64_t idx = BUF_HASH_INDEX(spa, dva, birth); + kmutex_t *hash_lock = BUF_HASH_LOCK(idx); + arc_buf_hdr_t *buf; + + mutex_enter(hash_lock); + buf = buf_hash_lookup(spa, dva, birth); + if (buf != NULL) { + *lockp = hash_lock; + return (buf); } mutex_exit(hash_lock); *lockp = NULL; @@ -878,7 +892,7 @@ arc_buf_alloc(spa_t *spa, int size, void buf->b_private = NULL; buf->b_next = NULL; hdr->b_buf = buf; - arc_get_data_buf(buf); + arc_get_data_buf(buf, NULL); hdr->b_datacnt = 1; hdr->b_flags = 0; ASSERT(refcount_is_zero(&hdr->b_refcnt)); @@ -901,7 +915,7 @@ arc_buf_clone(arc_buf_t *from) buf->b_private = NULL; buf->b_next = hdr->b_buf; hdr->b_buf = buf; - arc_get_data_buf(buf); + arc_get_data_buf(buf, NULL); bcopy(from->b_data, buf->b_data, size); hdr->b_datacnt += 1; return (buf); @@ -1665,12 +1679,13 @@ arc_evict_needed() * MFU's resident set is consuming more space than it has been allotted. In * this situation, we must victimize our own cache, the MFU, for this insertion. */ -static void -arc_get_data_buf(arc_buf_t *buf) +static boolean_t +arc_get_data_buf(arc_buf_t *buf, kmutex_t *hash_lock) { arc_state_t *state = buf->b_hdr->b_state; uint64_t size = buf->b_hdr->b_size; arc_buf_contents_t type = buf->b_hdr->b_type; + boolean_t relookup = 0; arc_adapt(size, state); @@ -1679,12 +1694,18 @@ arc_get_data_buf(arc_buf_t *buf) * just allocate a new buffer. */ if (!arc_evict_needed()) { + if (hash_lock != NULL) { + relookup = 1; + mutex_exit(hash_lock); + } if (type == ARC_BUFC_METADATA) { buf->b_data = zio_buf_alloc(size); } else { ASSERT(type == ARC_BUFC_DATA); buf->b_data = zio_data_buf_alloc(size); } + if (hash_lock != NULL) + mutex_enter(hash_lock); atomic_add_64(&arc_size, size); goto out; } @@ -1707,12 +1728,18 @@ arc_get_data_buf(arc_buf_t *buf) state = (mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu; } if ((buf->b_data = arc_evict(state, size, TRUE, type)) == NULL) { + if (hash_lock != NULL) { + relookup = 1; + mutex_exit(hash_lock); + } if (type == ARC_BUFC_METADATA) { buf->b_data = zio_buf_alloc(size); } else { ASSERT(type == ARC_BUFC_DATA); buf->b_data = zio_data_buf_alloc(size); } + if (hash_lock != NULL) + mutex_enter(hash_lock); atomic_add_64(&arc_size, size); ARCSTAT_BUMP(arcstat_recycle_miss); } @@ -1738,6 +1765,7 @@ out: arc_anon->arcs_size + arc_mru->arcs_size > arc_p) arc_p = MIN(arc_c, arc_p + size); } + return (relookup); } /* @@ -2151,7 +2179,10 @@ top: buf->b_private = NULL; buf->b_next = NULL; hdr->b_buf = buf; - arc_get_data_buf(buf); + if (arc_get_data_buf(buf, hash_lock)) { + ASSERT(hdr == buf_hash_lookup(spa, BP_IDENTITY(bp), + bp->blk_birth)); + } ASSERT(hdr->b_datacnt == 0); hdr->b_datacnt = 1;