Index: sys/socketvar.h =================================================================== RCS file: /home/ncvs/src/sys/sys/socketvar.h,v retrieving revision 1.102 diff -u -r1.102 socketvar.h --- sys/socketvar.h 2 Mar 2003 16:54:39 -0000 1.102 +++ sys/socketvar.h 19 Mar 2003 10:20:11 -0000 @@ -101,6 +101,7 @@ struct sockbuf { struct selinfo sb_sel; /* process selecting read/write */ struct mbuf *sb_mb; /* the mbuf chain */ + struct mbuf *sb_mb_tail; /* last pkt in chain (if sb_mb != NULL) */ u_int sb_cc; /* actual chars in buffer */ u_int sb_hiwat; /* max actual char count */ u_int sb_mbcnt; /* chars of mbufs used */ Index: kern/uipc_socket.c =================================================================== RCS file: /home/ncvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.148 diff -u -r1.148 uipc_socket.c --- kern/uipc_socket.c 2 Mar 2003 15:56:49 -0000 1.148 +++ kern/uipc_socket.c 19 Mar 2003 10:20:12 -0000 @@ -893,6 +893,12 @@ goto restart; } dontblock: + /* + * On entry here, m points to the first record on the socket buffer. + * While we process the initial mbufs containing address and control + * info we save a copy of m->m_nextpkt into nextrecord. We do need + * to take care of sb_mb_tail until later. + */ if (uio->uio_td) uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; nextrecord = m->m_nextpkt; @@ -936,9 +942,17 @@ while (*controlp != NULL); } } + /* + * If m is non-null, we have some data to read. From now on, make + * sure to keep sb_mb_tail consistent when working on the last + * packet on the chain (nextrecord==NULL) and we change m->m_nextpkt. + */ if (m) { - if ((flags & MSG_PEEK) == 0) + if ((flags & MSG_PEEK) == 0) { m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_mb_tail = m; + } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; @@ -1016,8 +1030,11 @@ so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } - if (m) + if (m) { m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_mb_tail = m; + } } } else { if (flags & MSG_PEEK) @@ -1080,8 +1097,12 @@ (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { - if (m == 0) + if (m == 0) { so->so_rcv.sb_mb = nextrecord; + if (nextrecord == NULL || nextrecord->m_nextpkt == NULL) + so->so_rcv.sb_mb_tail = nextrecord; + } + if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreqs->pru_rcvd)(so, flags); } Index: kern/uipc_socket2.c =================================================================== RCS file: /home/ncvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.111 diff -u -r1.111 uipc_socket2.c --- kern/uipc_socket2.c 21 Feb 2003 22:23:40 -0000 1.111 +++ kern/uipc_socket2.c 19 Mar 2003 10:20:12 -0000 @@ -62,6 +62,8 @@ void (*aio_swake)(struct socket *, struct sockbuf *); +int fastscan; /* XXX see below */ + /* * Primitive routines for operating on sockets and socket buffers */ @@ -466,6 +468,50 @@ * or sbdroprecord() when the data is acknowledged by the peer. */ +static struct mbuf * +sbgettail(struct sockbuf *sb, char *msg, struct mbuf *m0); +/* + * sbgettail returns a pointer to the last record of the socketbuffer. + * If m0 is non-null, it also appends m0 to the chain. + */ +static struct mbuf * +sbgettail(struct sockbuf *sb, char *msg, struct mbuf *m0) +{ + struct mbuf *m = sb->sb_mb; + + if (m == NULL) + goto done; + if (sb->sb_mb_tail == NULL) + printf("%s: null tail\n", msg); + if (fastscan && sb->sb_mb_tail != NULL) { + m = sb->sb_mb_tail ; + if (m == NULL) + panic ("sbgettail returns NULL"); + if (m->m_nextpkt == NULL) /* ok ... */ + goto done; + /* otherwise continue scan */ + printf("%s: sbgettail m_nextpkt != NULL\n", msg); + } + while (m->m_nextpkt) + m = m->m_nextpkt ; + if (m != sb->sb_mb_tail) { + if (sb->sb_mb_tail != NULL) + printf("%s: bad tail 0x%p instead of 0x%p\n", + msg, sb->sb_mb_tail, m); + sb->sb_mb_tail = m ; + } +done: + if (m0) { + if (m) + m->m_nextpkt = m0; + else + sb->sb_mb = m0; + sb->sb_mb_tail = m0 ; + m = m0 ; + } + return m ; +} + /* * Append mbuf chain m to the last record in the * socket buffer sb. The additional space associated @@ -481,10 +527,8 @@ if (m == 0) return; - n = sb->sb_mb; + n = sbgettail(sb, "sbappend", NULL); if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { sbappendrecord(sb, m); /* XXXXXX!!!! */ @@ -534,19 +578,13 @@ if (m0 == 0) return; - m = sb->sb_mb; - if (m) - while (m->m_nextpkt) - m = m->m_nextpkt; /* * Put the first mbuf on the queue. * Note this permits zero length records. */ sballoc(sb, m0); - if (m) - m->m_nextpkt = m0; - else - sb->sb_mb = m0; + m = sbgettail(sb, "sbappendrecord", m0); + if (m0->m_nextpkt != NULL) printf("ouch! sbappendrecord nextpkt!=NULL\n"); m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { @@ -592,6 +630,8 @@ */ sballoc(sb, m0); m0->m_nextpkt = *mp; + if (*mp == NULL) /* m0 is actually the new tail */ + sb->sb_mb_tail = m0; *mp = m0; m = m0->m_next; m0->m_next = 0; @@ -638,13 +678,7 @@ m->m_next = control; for (n = m; n; n = n->m_next) sballoc(sb, n); - n = sb->sb_mb; - if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = m; - } else - sb->sb_mb = m; + n = sbgettail(sb, "sbappendaddr", m); return (1); } @@ -664,13 +698,7 @@ n->m_next = m0; /* concatenate data to control */ for (m = control; m; m = m->m_next) sballoc(sb, m); - n = sb->sb_mb; - if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = control; - } else - sb->sb_mb = control; + n = sbgettail(sb, "sbappendcontrol", control); return (1); } @@ -715,7 +743,7 @@ if (n) n->m_next = m; else - sb->sb_mb = m; + sb->sb_mb = sb->sb_mb_tail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; @@ -980,6 +1008,8 @@ xsb->sb_timeo = sb->sb_timeo; } +SYSCTL_INT(_kern_ipc, OID_AUTO, fastscan, CTLFLAG_RW, + &fastscan, 0, "Fast scanning of socket queues for append"); /* * Here is the definition of some of the basic objects in the kern.ipc * branch of the MIB.