diff -r b17891499926 -r 223a3d4ccf29 sys/fs/nfsserver/nfs_nfsdcache.c --- a/sys/fs/nfsserver/nfs_nfsdcache.c Sat Oct 13 18:04:15 2012 +0200 +++ b/sys/fs/nfsserver/nfs_nfsdcache.c Mon Oct 15 13:29:53 2012 +0200 @@ -166,9 +166,77 @@ static int nfsrc_tcpnonidempotent = 1; static int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER, nfsrc_udpcachesize = 0; -static TAILQ_HEAD(, nfsrvcache) nfsrvudplru; -static struct nfsrvhashhead nfsrvhashtbl[NFSRVCACHE_HASHSIZE], - nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]; + +/* The fine-grained locked cache hash table */ +struct nfsrchash_bucket { + struct mtx lock; + char lock_name[8]; + struct nfsrvhashhead nfsrvhashtbl; + struct nfsrvhashhead nfsrvudphashtbl; + TAILQ_HEAD(, nfsrvcache) nfsrvudplru; +}; + +static struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; + +static inline struct nfsrchash_bucket *nfsrchash_getxidbucket(uint32_t xid); +static inline struct nfsrchash_bucket *nfsrchash_getbucket(int i); +static inline void nfsrchash_lockbucket(struct nfsrchash_bucket *b); +static inline void nfsrchash_unlockbucket(struct nfsrchash_bucket *b); +static inline void nfsrchash_xidbucketlockrequired(uint32_t xid); +static inline void nfsrchash_lockallbuckets(void); +static inline void nfsrchash_unlockallbuckets(void); + +static inline struct nfsrchash_bucket * +nfsrchash_getxidbucket(uint32_t xid) +{ + return &nfsrchash_table[(xid + (xid >> 24)) % NFSRVCACHE_HASHSIZE]; +} + +static inline struct nfsrchash_bucket * +nfsrchash_getbucket(int i) +{ + KASSERT(i >= 0 && i < NFSRVCACHE_HASHSIZE, + ("Invalid hash bucket %d", i)); + return &nfsrchash_table[i]; +} + +static inline void +nfsrchash_lockbucket(struct nfsrchash_bucket *b) +{ + mtx_lock(&b->lock); +} + +static inline void +nfsrchash_unlockbucket(struct nfsrchash_bucket *b) +{ + mtx_unlock(&b->lock); +} + +static inline void +nfsrchash_xidbucketlockrequired(uint32_t xid) +{ + KASSERT(mtx_owned(&(nfsrchash_getxidbucket(xid)->lock)), + ("nfsrchash bucket lock not owned for xid %u", xid)); +} + +static inline void +nfsrchash_lockallbuckets() +{ + int i; + + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) + nfsrchash_lockbucket(nfsrchash_getbucket(i)); +} + +static inline void +nfsrchash_unlockallbuckets() +{ + int i; + + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) + nfsrchash_unlockbucket(nfsrchash_getbucket(i)); +} + /* * and the reverse mapping from generic to Version 2 procedure numbers */ @@ -197,10 +265,6 @@ NFSV2PROC_NOOP, }; -#define NFSRCUDPHASH(xid) \ - (&nfsrvudphashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE]) -#define NFSRCHASH(xid) \ - (&nfsrvhashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE]) #define TRUE 1 #define FALSE 0 #define NFSRVCACHE_CHECKLEN 100 @@ -244,9 +308,10 @@ static void nfsrc_unlock(struct nfsrvcache *rp); static void nfsrc_wanted(struct nfsrvcache *rp); static void nfsrc_freecache(struct nfsrvcache *rp); +static void nfsrc_trimcache_bucket(struct nfsrchash_bucket *hb, u_int64_t, + struct socket *so); static void nfsrc_trimcache(u_int64_t, struct socket *); -static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t, - struct socket *); +static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t, struct socket *); static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum); static void nfsrc_marksametcpconn(u_int64_t); @@ -262,11 +327,15 @@ if (inited) return; inited = 1; + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_INIT(&nfsrvudphashtbl[i]); - LIST_INIT(&nfsrvhashtbl[i]); + LIST_INIT(&nfsrchash_table[i].nfsrvudphashtbl); + LIST_INIT(&nfsrchash_table[i].nfsrvhashtbl); + TAILQ_INIT(&nfsrchash_table[i].nfsrvudplru); + sprintf(nfsrchash_table[i].lock_name, "nfsh%d", i); + mtx_init(&nfsrchash_table[i].lock, nfsrchash_table[i].lock_name, + NULL, MTX_DEF); } - TAILQ_INIT(&nfsrvudplru); nfsrc_tcpsavedreplies = 0; nfsrc_udpcachesize = 0; newnfsstats.srvcache_tcppeak = 0; @@ -323,12 +392,14 @@ struct nfsrvcache *rp; struct sockaddr_in *saddr; struct sockaddr_in6 *saddr6; + struct nfsrchash_bucket *hb; struct nfsrvhashhead *hp; int ret = 0; - hp = NFSRCUDPHASH(newrp->rc_xid); + hb = nfsrchash_getxidbucket(newrp->rc_xid); + hp = &hb->nfsrvudphashtbl; loop: - NFSLOCKCACHE(); + nfsrchash_lockbucket(hb); LIST_FOREACH(rp, hp, rc_hash) { if (newrp->rc_xid == rp->rc_xid && newrp->rc_proc == rp->rc_proc && @@ -343,18 +414,18 @@ if (rp->rc_flag == 0) panic("nfs udp cache0"); rp->rc_flag |= RC_LOCKED; - TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); - TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); + TAILQ_REMOVE(&hb->nfsrvudplru, rp, rc_lru); + TAILQ_INSERT_TAIL(&hb->nfsrvudplru, rp, rc_lru); if (rp->rc_flag & RC_INPROG) { newnfsstats.srvcache_inproghits++; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { /* * V2 only. */ newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); nfsrvd_rephead(nd); *(nd->nd_errp) = rp->rc_status; ret = RC_REPLY; @@ -362,7 +433,7 @@ NFSRVCACHE_UDPTIMEOUT; } else if (rp->rc_flag & RC_REPMBUF) { newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAIT); ret = RC_REPLY; @@ -391,8 +462,8 @@ newrp->rc_flag |= RC_INETIPV6; } LIST_INSERT_HEAD(hp, newrp, rc_hash); - TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru); - NFSUNLOCKCACHE(); + TAILQ_INSERT_TAIL(&hb->nfsrvudplru, newrp, rc_lru); + nfsrchash_unlockbucket(hb); nd->nd_rp = newrp; ret = RC_DOIT; @@ -409,20 +480,22 @@ { struct nfsrvcache *rp; struct nfsrvcache *retrp = NULL; + struct nfsrchash_bucket *hb; mbuf_t m; rp = nd->nd_rp; if (!rp) panic("nfsrvd_updatecache null rp"); + hb = nfsrchash_getxidbucket(rp->rc_xid); nd->nd_rp = NULL; - NFSLOCKCACHE(); + nfsrchash_lockbucket(hb); nfsrc_lock(rp); if (!(rp->rc_flag & RC_INPROG)) panic("nfsrvd_updatecache not inprog"); rp->rc_flag &= ~RC_INPROG; if (rp->rc_flag & RC_UDP) { - TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); - TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); + TAILQ_REMOVE(&hb->nfsrvudplru, rp, rc_lru); + TAILQ_INSERT_TAIL(&hb->nfsrvudplru, rp, rc_lru); } /* @@ -430,7 +503,7 @@ */ if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); nd->nd_repstat = 0; if (nd->nd_mreq) mbuf_freem(nd->nd_mreq); @@ -463,7 +536,7 @@ nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) { rp->rc_status = nd->nd_repstat; rp->rc_flag |= RC_REPSTATUS; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); } else { if (!(rp->rc_flag & RC_UDP)) { nfsrc_tcpsavedreplies++; @@ -472,12 +545,13 @@ newnfsstats.srvcache_tcppeak = nfsrc_tcpsavedreplies; } - NFSUNLOCKCACHE(); + /* XXX: is this safe??? */ + nfsrchash_unlockbucket(hb); m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAIT); - NFSLOCKCACHE(); + nfsrchash_lockbucket(hb); rp->rc_reply = m; rp->rc_flag |= RC_REPMBUF; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); } if (rp->rc_flag & RC_UDP) { rp->rc_timestamp = NFSD_MONOSEC + @@ -493,7 +567,7 @@ } } else { nfsrc_freecache(rp); - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); } out: @@ -509,14 +583,16 @@ APPLESTATIC void nfsrvd_delcache(struct nfsrvcache *rp) { + struct nfsrchash_bucket *hb; + hb = nfsrchash_getxidbucket(rp->rc_xid); if (!(rp->rc_flag & RC_INPROG)) panic("nfsrvd_delcache not in prog"); - NFSLOCKCACHE(); + nfsrchash_lockbucket(hb); rp->rc_flag &= ~RC_INPROG; if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED)) nfsrc_freecache(rp); - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); } /* @@ -527,20 +603,22 @@ APPLESTATIC void nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err) { + struct nfsrchash_bucket *hb; tcp_seq tmp_seq; if (!(rp->rc_flag & RC_LOCKED)) panic("nfsrvd_sentcache not locked"); + hb = nfsrchash_getxidbucket(rp->rc_xid); if (!err) { if ((so->so_proto->pr_domain->dom_family != AF_INET && so->so_proto->pr_domain->dom_family != AF_INET6) || so->so_proto->pr_protocol != IPPROTO_TCP) panic("nfs sent cache"); if (nfsrv_getsockseqnum(so, &tmp_seq)) { - NFSLOCKCACHE(); + nfsrchash_lockbucket(hb); rp->rc_tcpseq = tmp_seq; rp->rc_flag |= RC_TCPSEQ; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); } } nfsrc_unlock(rp); @@ -558,12 +636,14 @@ int i; struct nfsrvcache *hitrp; struct nfsrvhashhead *hp, nfsrc_templist; + struct nfsrchash_bucket *hb; int hit, ret = 0; - hp = NFSRCHASH(newrp->rc_xid); + hb = nfsrchash_getxidbucket(newrp->rc_xid); + hp = &hb->nfsrvhashtbl; newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum); tryagain: - NFSLOCKCACHE(); + nfsrchash_lockbucket(hb); hit = 1; LIST_INIT(&nfsrc_templist); /* @@ -630,7 +710,7 @@ rp->rc_flag |= RC_LOCKED; if (rp->rc_flag & RC_INPROG) { newnfsstats.srvcache_inproghits++; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); ret = RC_DROPIT; @@ -639,7 +719,7 @@ * V2 only. */ newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); ret = RC_REPLY; @@ -649,7 +729,7 @@ NFSRVCACHE_TCPTIMEOUT; } else if (rp->rc_flag & RC_REPMBUF) { newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); ret = RC_REPLY; @@ -674,7 +754,7 @@ newrp->rc_cachetime = NFSD_MONOSEC; newrp->rc_flag |= RC_INPROG; LIST_INSERT_HEAD(hp, newrp, rc_hash); - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); nd->nd_rp = newrp; ret = RC_DOIT; @@ -690,7 +770,7 @@ static void nfsrc_lock(struct nfsrvcache *rp) { - NFSCACHELOCKREQUIRED(); + nfsrchash_xidbucketlockrequired(rp->rc_xid); while ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, PZERO - 1, @@ -705,11 +785,13 @@ static void nfsrc_unlock(struct nfsrvcache *rp) { + struct nfsrchash_bucket *hb; - NFSLOCKCACHE(); + hb = nfsrchash_getxidbucket(rp->rc_xid); + nfsrchash_lockbucket(hb); rp->rc_flag &= ~RC_LOCKED; nfsrc_wanted(rp); - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); } /* @@ -731,11 +813,13 @@ static void nfsrc_freecache(struct nfsrvcache *rp) { + struct nfsrchash_bucket *hb; - NFSCACHELOCKREQUIRED(); + nfsrchash_xidbucketlockrequired(rp->rc_xid); + hb = nfsrchash_getxidbucket(rp->rc_xid); LIST_REMOVE(rp, rc_hash); if (rp->rc_flag & RC_UDP) { - TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); + TAILQ_REMOVE(&hb->nfsrvudplru, rp, rc_lru); nfsrc_udpcachesize--; } nfsrc_wanted(rp); @@ -755,22 +839,53 @@ nfsrvd_cleancache(void) { struct nfsrvcache *rp, *nextrp; + struct nfsrchash_bucket *hb; int i; - NFSLOCKCACHE(); + nfsrchash_lockallbuckets(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) { + hb = nfsrchash_getbucket(i); + LIST_FOREACH_SAFE(rp, &hb->nfsrvhashtbl, rc_hash, nextrp) { nfsrc_freecache(rp); } } for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) { + hb = nfsrchash_getbucket(i); + LIST_FOREACH_SAFE(rp, &hb->nfsrvudphashtbl, rc_hash, nextrp) { nfsrc_freecache(rp); } } newnfsstats.srvcache_size = 0; nfsrc_tcpsavedreplies = 0; - NFSUNLOCKCACHE(); + nfsrchash_unlockallbuckets(); +} + +/* + * Performs the trim operation per-hash-bucket. + * The basic rule is to get rid of entries that are expired. + */ +static inline void +nfsrc_trimcache_bucket(struct nfsrchash_bucket *hb, u_int64_t sockref, + struct socket *so) +{ + struct nfsrvcache *rp, *nextrp; + + TAILQ_FOREACH_SAFE(rp, &hb->nfsrvudplru, rc_lru, nextrp) { + if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) + && rp->rc_refcnt == 0 + && ((rp->rc_flag & RC_REFCNT) || + NFSD_MONOSEC > rp->rc_timestamp || + nfsrc_udpcachesize > nfsrc_udphighwater)) + nfsrc_freecache(rp); + } + LIST_FOREACH_SAFE(rp, &hb->nfsrvhashtbl, rc_hash, nextrp) { + if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) + && rp->rc_refcnt == 0 + && ((rp->rc_flag & RC_REFCNT) || + NFSD_MONOSEC > rp->rc_timestamp || + nfsrc_activesocket(rp, sockref, so))) + nfsrc_freecache(rp); + } } /* @@ -779,29 +894,15 @@ static void nfsrc_trimcache(u_int64_t sockref, struct socket *so) { - struct nfsrvcache *rp, *nextrp; + struct nfsrchash_bucket *hb; int i; - NFSLOCKCACHE(); - TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { - if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) - && rp->rc_refcnt == 0 - && ((rp->rc_flag & RC_REFCNT) || - NFSD_MONOSEC > rp->rc_timestamp || - nfsrc_udpcachesize > nfsrc_udphighwater)) - nfsrc_freecache(rp); + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { + hb = nfsrchash_getbucket(i); + nfsrchash_lockbucket(hb); + nfsrc_trimcache_bucket(hb, sockref, so); + nfsrchash_unlockbucket(hb); } - for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) { - if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) - && rp->rc_refcnt == 0 - && ((rp->rc_flag & RC_REFCNT) || - NFSD_MONOSEC > rp->rc_timestamp || - nfsrc_activesocket(rp, sockref, so))) - nfsrc_freecache(rp); - } - } - NFSUNLOCKCACHE(); } /* @@ -810,12 +911,14 @@ APPLESTATIC void nfsrvd_refcache(struct nfsrvcache *rp) { + struct nfsrchash_bucket *hb; - NFSLOCKCACHE(); + hb = nfsrchash_getxidbucket(rp->rc_xid); + nfsrchash_lockbucket(hb); if (rp->rc_refcnt < 0) panic("nfs cache refcnt"); rp->rc_refcnt++; - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); } /* @@ -824,14 +927,17 @@ APPLESTATIC void nfsrvd_derefcache(struct nfsrvcache *rp) { + struct nfsrchash_bucket *hb; - NFSLOCKCACHE(); + hb = nfsrchash_getxidbucket(rp->rc_xid); + nfsrchash_lockbucket(hb); + if (rp->rc_refcnt <= 0) panic("nfs cache derefcnt"); rp->rc_refcnt--; if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG))) nfsrc_freecache(rp); - NFSUNLOCKCACHE(); + nfsrchash_unlockbucket(hb); } /*