diff -r 223a3d4ccf29 -r bade5b0c49ed sys/fs/nfs/nfsport.h --- a/sys/fs/nfs/nfsport.h Mon Oct 15 13:29:53 2012 +0200 +++ b/sys/fs/nfs/nfsport.h Sat Jan 05 01:55:59 2013 +0100 @@ -546,11 +546,6 @@ #define NFSREQSPINLOCK extern struct mtx nfs_req_mutex #define NFSLOCKREQ() mtx_lock(&nfs_req_mutex) #define NFSUNLOCKREQ() mtx_unlock(&nfs_req_mutex) -#define NFSCACHEMUTEX extern struct mtx nfs_cache_mutex -#define NFSCACHEMUTEXPTR (&nfs_cache_mutex) -#define NFSLOCKCACHE() mtx_lock(&nfs_cache_mutex) -#define NFSUNLOCKCACHE() mtx_unlock(&nfs_cache_mutex) -#define NFSCACHELOCKREQUIRED() mtx_assert(&nfs_cache_mutex, MA_OWNED) #define NFSSOCKMUTEX extern struct mtx nfs_slock_mutex #define NFSSOCKMUTEXPTR (&nfs_slock_mutex) #define NFSLOCKSOCK() mtx_lock(&nfs_slock_mutex) diff -r 223a3d4ccf29 -r bade5b0c49ed sys/fs/nfsserver/nfs_nfsdcache.c --- a/sys/fs/nfsserver/nfs_nfsdcache.c Mon Oct 15 13:29:53 2012 +0200 +++ b/sys/fs/nfsserver/nfs_nfsdcache.c Sat Jan 05 01:55:59 2013 +0100 @@ -160,7 +160,6 @@ #include extern struct nfsstats newnfsstats; -NFSCACHEMUTEX; int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0; #endif /* !APPLEKEXT */ @@ -169,15 +168,18 @@ /* The fine-grained locked cache hash table */ struct nfsrchash_bucket { - struct mtx lock; - char lock_name[8]; - struct nfsrvhashhead nfsrvhashtbl; - struct nfsrvhashhead nfsrvudphashtbl; - TAILQ_HEAD(, nfsrvcache) nfsrvudplru; + struct mtx lock; + char lock_name[8]; + struct nfsrvhashhead nfsrvhashtbl; + struct nfsrvhashhead nfsrvudphashtbl; }; static struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; +static TAILQ_HEAD(, nfsrvcache) nfsrvudplru; +static struct mtx nfsrvudplru_lock; +static char* nfsrvudplru_lock_name = "nfslrul"; + static inline struct nfsrchash_bucket *nfsrchash_getxidbucket(uint32_t xid); static inline struct nfsrchash_bucket *nfsrchash_getbucket(int i); static inline void nfsrchash_lockbucket(struct nfsrchash_bucket *b); @@ -185,11 +187,13 @@ static inline void nfsrchash_xidbucketlockrequired(uint32_t xid); static inline void nfsrchash_lockallbuckets(void); static inline void nfsrchash_unlockallbuckets(void); +static inline void nfsrvudplru_xlock(void); +static inline void nfsrvudplru_unlock(void); static inline struct nfsrchash_bucket * nfsrchash_getxidbucket(uint32_t xid) { - return &nfsrchash_table[(xid + (xid >> 24)) % NFSRVCACHE_HASHSIZE]; + return (&nfsrchash_table[(xid + (xid >> 24)) % NFSRVCACHE_HASHSIZE]); } static inline struct nfsrchash_bucket * @@ -197,18 +201,20 @@ { KASSERT(i >= 0 && i < NFSRVCACHE_HASHSIZE, ("Invalid hash bucket %d", i)); - return &nfsrchash_table[i]; + return (&nfsrchash_table[i]); } static inline void nfsrchash_lockbucket(struct nfsrchash_bucket *b) { +// log(LOG_DEBUG, "Locking bucket %p\n", b); mtx_lock(&b->lock); } static inline void nfsrchash_unlockbucket(struct nfsrchash_bucket *b) { +// log(LOG_DEBUG, "Unlocking bucket %p\n", b); mtx_unlock(&b->lock); } @@ -216,7 +222,8 @@ nfsrchash_xidbucketlockrequired(uint32_t xid) { KASSERT(mtx_owned(&(nfsrchash_getxidbucket(xid)->lock)), - ("nfsrchash bucket lock not owned for xid %u", xid)); + ("nfsrchash bucket lock not owned for xid %u, bucket %p", + xid, nfsrchash_getxidbucket(xid))); } static inline void @@ -226,6 +233,7 @@ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) nfsrchash_lockbucket(nfsrchash_getbucket(i)); +// log(LOG_DEBUG, "nfsrchash_lockallbuckets\n"); } static inline void @@ -235,6 +243,19 @@ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) nfsrchash_unlockbucket(nfsrchash_getbucket(i)); +// log(LOG_DEBUG, "nfsrchash_unlockallbuckets\n"); +} + +static inline void +nfsrvudplru_xlock(void) +{ + mtx_lock(&nfsrvudplru_lock); +} + +static inline void +nfsrvudplru_unlock(void) +{ + mtx_unlock(&nfsrvudplru_lock); } /* @@ -307,10 +328,11 @@ static void nfsrc_lock(struct nfsrvcache *rp); static void nfsrc_unlock(struct nfsrvcache *rp); static void nfsrc_wanted(struct nfsrvcache *rp); -static void nfsrc_freecache(struct nfsrvcache *rp); +static void nfsrc_freecache(struct nfsrvcache *rp, int lru_locked); static void nfsrc_trimcache_bucket(struct nfsrchash_bucket *hb, u_int64_t, struct socket *so); static void nfsrc_trimcache(u_int64_t, struct socket *); +static void nfsrc_trimudplru(void); static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t, struct socket *); static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum); static void nfsrc_marksametcpconn(u_int64_t); @@ -331,11 +353,12 @@ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { LIST_INIT(&nfsrchash_table[i].nfsrvudphashtbl); LIST_INIT(&nfsrchash_table[i].nfsrvhashtbl); - TAILQ_INIT(&nfsrchash_table[i].nfsrvudplru); sprintf(nfsrchash_table[i].lock_name, "nfsh%d", i); mtx_init(&nfsrchash_table[i].lock, nfsrchash_table[i].lock_name, NULL, MTX_DEF); } + TAILQ_INIT(&nfsrvudplru); + mtx_init(&nfsrvudplru_lock, nfsrvudplru_lock_name, NULL, MTX_DEF); nfsrc_tcpsavedreplies = 0; nfsrc_udpcachesize = 0; newnfsstats.srvcache_tcppeak = 0; @@ -407,15 +430,18 @@ nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; - (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, + (void)mtx_sleep(rp, &hb->lock, (PZERO - 1) | PDROP, "nfsrc", 10 * hz); +// log(LOG_DEBUG, "goto loop"); goto loop; } if (rp->rc_flag == 0) panic("nfs udp cache0"); rp->rc_flag |= RC_LOCKED; - TAILQ_REMOVE(&hb->nfsrvudplru, rp, rc_lru); - TAILQ_INSERT_TAIL(&hb->nfsrvudplru, rp, rc_lru); + nfsrvudplru_xlock(); + TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); + TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); + nfsrvudplru_unlock(); if (rp->rc_flag & RC_INPROG) { newnfsstats.srvcache_inproghits++; nfsrchash_unlockbucket(hb); @@ -462,7 +488,9 @@ newrp->rc_flag |= RC_INETIPV6; } LIST_INSERT_HEAD(hp, newrp, rc_hash); - TAILQ_INSERT_TAIL(&hb->nfsrvudplru, newrp, rc_lru); + nfsrvudplru_xlock(); + TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru); + nfsrvudplru_unlock(); nfsrchash_unlockbucket(hb); nd->nd_rp = newrp; ret = RC_DOIT; @@ -494,8 +522,10 @@ panic("nfsrvd_updatecache not inprog"); rp->rc_flag &= ~RC_INPROG; if (rp->rc_flag & RC_UDP) { - TAILQ_REMOVE(&hb->nfsrvudplru, rp, rc_lru); - TAILQ_INSERT_TAIL(&hb->nfsrvudplru, rp, rc_lru); + nfsrvudplru_xlock(); + TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); + TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); + nfsrvudplru_unlock(); } /* @@ -566,7 +596,7 @@ retrp = rp; } } else { - nfsrc_freecache(rp); + nfsrc_freecache(rp, FALSE); nfsrchash_unlockbucket(hb); } @@ -591,7 +621,7 @@ nfsrchash_lockbucket(hb); rp->rc_flag &= ~RC_INPROG; if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED)) - nfsrc_freecache(rp); + nfsrc_freecache(rp, FALSE); nfsrchash_unlockbucket(hb); } @@ -642,6 +672,7 @@ hb = nfsrchash_getxidbucket(newrp->rc_xid); hp = &hb->nfsrvhashtbl; newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum); + tryagain: nfsrchash_lockbucket(hb); hit = 1; @@ -701,8 +732,9 @@ rp = hitrp; if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; - (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, + (void)mtx_sleep(rp, &hb->lock, (PZERO - 1) | PDROP, "nfsrc", 10 * hz); +// log(LOG_DEBUG, "goto tryagain"); goto tryagain; } if (rp->rc_flag == 0) @@ -770,10 +802,13 @@ static void nfsrc_lock(struct nfsrvcache *rp) { + struct nfsrchash_bucket *hb; + + hb = nfsrchash_getxidbucket(rp->rc_xid); nfsrchash_xidbucketlockrequired(rp->rc_xid); while ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; - (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, PZERO - 1, + (void)mtx_sleep(rp, &hb->lock, PZERO - 1, "nfsrc", 0); } rp->rc_flag |= RC_LOCKED; @@ -807,19 +842,24 @@ } /* - * Free up the entry. - * Must not sleep. + * Free up the entry. Must not sleep. + * The lru_locked argument provides context about the state of the + * nfsrvudplru_lock, as this function may be called from contexts where + * it is either convenient or inconvenient to manually lock it beforehand. */ static void -nfsrc_freecache(struct nfsrvcache *rp) +nfsrc_freecache(struct nfsrvcache *rp, int lru_locked) { - struct nfsrchash_bucket *hb; - nfsrchash_xidbucketlockrequired(rp->rc_xid); - hb = nfsrchash_getxidbucket(rp->rc_xid); + /* XXX: do we only have to take care of the LRU lock? */ +// log(LOG_DEBUG, "freecache on %p", rp); LIST_REMOVE(rp, rc_hash); if (rp->rc_flag & RC_UDP) { - TAILQ_REMOVE(&hb->nfsrvudplru, rp, rc_lru); + if (!lru_locked) + nfsrvudplru_xlock(); + TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); + if (!lru_locked) + nfsrvudplru_unlock(); nfsrc_udpcachesize--; } nfsrc_wanted(rp); @@ -843,21 +883,23 @@ int i; nfsrchash_lockallbuckets(); + nfsrvudplru_xlock(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { hb = nfsrchash_getbucket(i); LIST_FOREACH_SAFE(rp, &hb->nfsrvhashtbl, rc_hash, nextrp) { - nfsrc_freecache(rp); + nfsrc_freecache(rp, TRUE); } } for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { hb = nfsrchash_getbucket(i); LIST_FOREACH_SAFE(rp, &hb->nfsrvudphashtbl, rc_hash, nextrp) { - nfsrc_freecache(rp); + nfsrc_freecache(rp, TRUE); } } newnfsstats.srvcache_size = 0; nfsrc_tcpsavedreplies = 0; nfsrchash_unlockallbuckets(); + nfsrvudplru_unlock(); } /* @@ -870,24 +912,38 @@ { struct nfsrvcache *rp, *nextrp; - TAILQ_FOREACH_SAFE(rp, &hb->nfsrvudplru, rc_lru, nextrp) { - if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) - && rp->rc_refcnt == 0 - && ((rp->rc_flag & RC_REFCNT) || - NFSD_MONOSEC > rp->rc_timestamp || - nfsrc_udpcachesize > nfsrc_udphighwater)) - nfsrc_freecache(rp); - } + mtx_assert(&hb->lock, MA_OWNED); LIST_FOREACH_SAFE(rp, &hb->nfsrvhashtbl, rc_hash, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) && rp->rc_refcnt == 0 && ((rp->rc_flag & RC_REFCNT) || NFSD_MONOSEC > rp->rc_timestamp || nfsrc_activesocket(rp, sockref, so))) - nfsrc_freecache(rp); + nfsrc_freecache(rp, FALSE); } } +static void +nfsrc_trimudplru() +{ + struct nfsrvcache *rp, *nextrp; + +// log(LOG_DEBUG, "nfsrc_trimudplru\n"); + + nfsrvudplru_xlock(); + TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { + if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) + && rp->rc_refcnt == 0 + && ((rp->rc_flag & RC_REFCNT) || + NFSD_MONOSEC > rp->rc_timestamp || + nfsrc_udpcachesize > nfsrc_udphighwater)) { + nfsrc_freecache(rp, TRUE); + } + } + nfsrvudplru_unlock(); +// log(LOG_DEBUG, "nfsrc_trimudplru exit\n"); +} + /* * The basic rule is to get rid of entries that are expired. */ @@ -897,12 +953,17 @@ struct nfsrchash_bucket *hb; int i; +// log(LOG_DEBUG, "nfsrc_trimcache\n"); + nfsrc_trimudplru(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { +// log(LOG_DEBUG, "trimcache bucket %d %p\n", i, hb); hb = nfsrchash_getbucket(i); nfsrchash_lockbucket(hb); nfsrc_trimcache_bucket(hb, sockref, so); nfsrchash_unlockbucket(hb); +// log(LOG_DEBUG, "trimcache bucket %d %p OK\n", i, hb); } +// log(LOG_DEBUG, "nfsrc_trimucache exit\n"); } /* @@ -936,7 +997,8 @@ panic("nfs cache derefcnt"); rp->rc_refcnt--; if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG))) - nfsrc_freecache(rp); + nfsrc_freecache(rp, FALSE); + nfsrchash_unlockbucket(hb); } diff -r 223a3d4ccf29 -r bade5b0c49ed sys/fs/nfsserver/nfs_nfsdport.c --- a/sys/fs/nfsserver/nfs_nfsdport.c Mon Oct 15 13:29:53 2012 +0200 +++ b/sys/fs/nfsserver/nfs_nfsdport.c Sat Jan 05 01:55:59 2013 +0100 @@ -59,7 +59,6 @@ extern SVCPOOL *nfsrvd_pool; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; -struct mtx nfs_cache_mutex; struct mtx nfs_v4root_mutex; struct nfsrvfh nfs_rootfh, nfs_pubfh; int nfs_pubfhset = 0, nfs_rootfhset = 0; @@ -3274,7 +3273,6 @@ if (loaded) goto out; newnfs_portinit(); - mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF); mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL, MTX_DEF); @@ -3318,7 +3316,6 @@ svcpool_destroy(nfsrvd_pool); /* and get rid of the locks */ - mtx_destroy(&nfs_cache_mutex); mtx_destroy(&nfs_v4root_mutex); mtx_destroy(&nfsv4root_mnt.mnt_mtx); lockdestroy(&nfsv4root_mnt.mnt_explock);