Index: src/sys/kern/uipc_usrreq.c diff -u src/sys/kern/uipc_usrreq.c:1.233.2.1 src/sys/kern/uipc_usrreq.c:1.235 --- src/sys/kern/uipc_usrreq.c:1.233.2.1 Mon Aug 3 10:13:06 2009 +++ src/sys/kern/uipc_usrreq.c Tue Oct 6 00:23:12 2009 @@ -50,13 +50,14 @@ * garbage collector to find and tear down cycles of disconnected sockets. * * TODO: - * SEQPACKET, RDM + * RDM + * distinguish datagram size limits from flow control limits in SEQPACKET * rethink name space problems * need a proper out-of-band */ #include __FBSDID("$FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.233.2.1 2009/08/03 08:13:06 kensmith Exp $"); #include "opt_ddb.h" @@ -112,6 +113,7 @@ static int unp_rights; /* (g) File descriptors in flight. */ static struct unp_head unp_shead; /* (l) List of stream sockets. */ static struct unp_head unp_dhead; /* (l) List of datagram sockets. */ +static struct unp_head unp_sphead; /* (l) List of seqpacket sockets. */ static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; @@ -139,10 +141,14 @@ static u_long unpst_recvspace = PIPSIZ; static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ static u_long unpdg_recvspace = 4*1024; +static u_long unpsp_sendspace = PIPSIZ; /* really max datagram size */ +static u_long unpsp_recvspace = PIPSIZ; SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain"); SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0, "SOCK_STREAM"); SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM"); +SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket, CTLFLAG_RW, 0, + "SOCK_SEQPACKET"); SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, &unpst_sendspace, 0, "Default stream send space."); @@ -152,6 +158,10 @@ &unpdg_sendspace, 0, "Default datagram send space."); SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, &unpdg_recvspace, 0, "Default datagram receive space."); +SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW, + &unpsp_sendspace, 0, "Default seqpacket send space."); +SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW, + &unpsp_recvspace, 0, "Default seqpacket receive space."); SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "File descriptors in flight."); @@ -257,6 +267,7 @@ */ static struct domain localdomain; static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream; +static struct pr_usrreqs uipc_usrreqs_seqpacket; static struct protosw localsw[] = { { .pr_type = SOCK_STREAM, @@ -271,6 +282,19 @@ .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, .pr_usrreqs = &uipc_usrreqs_dgram }, +{ + .pr_type = SOCK_SEQPACKET, + .pr_domain = &localdomain, + + /* + * XXXRW: For now, PR_ADDR because soreceive will bump into them + * due to our use of sbappendaddr. A new sbappend variants is needed + * that supports both atomic record writes and control data. + */ + .pr_flags = PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD| + PR_RIGHTS, + .pr_usrreqs = &uipc_usrreqs_seqpacket, +}, }; static struct domain localdomain = { @@ -353,6 +377,11 @@ recvspace = unpdg_recvspace; break; + case SOCK_SEQPACKET: + sendspace = unpsp_sendspace; + recvspace = unpsp_recvspace; + break; + default: panic("uipc_attach"); } @@ -372,8 +401,22 @@ UNP_LIST_LOCK(); unp->unp_gencnt = ++unp_gencnt; unp_count++; - LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, - unp, unp_link); + switch (so->so_type) { + case SOCK_STREAM: + LIST_INSERT_HEAD(&unp_shead, unp, unp_link); + break; + + case SOCK_DGRAM: + LIST_INSERT_HEAD(&unp_dhead, unp, unp_link); + break; + + case SOCK_SEQPACKET: + LIST_INSERT_HEAD(&unp_sphead, unp, unp_link); + break; + + default: + panic("uipc_attach"); + } UNP_LIST_UNLOCK(); return (0); @@ -705,11 +748,8 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL")); - if (so->so_type == SOCK_DGRAM) - panic("uipc_rcvd DGRAM?"); - - if (so->so_type != SOCK_STREAM) - panic("uipc_rcvd unknown socktype"); + if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET) + panic("uipc_rcvd socktype %d", so->so_type); /* * Adjust backpressure on sender and wakeup any waiting to write. @@ -824,6 +864,7 @@ break; } + case SOCK_SEQPACKET: case SOCK_STREAM: if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { @@ -875,11 +916,33 @@ * Send to paired receive port, and then reduce send buffer * hiwater marks to maintain backpressure. Wake up readers. */ - if (control != NULL) { - if (sbappendcontrol_locked(&so2->so_rcv, m, control)) + switch (so->so_type) { + case SOCK_STREAM: + if (control != NULL) { + if (sbappendcontrol_locked(&so2->so_rcv, m, + control)) + control = NULL; + } else + sbappend_locked(&so2->so_rcv, m); + break; + + case SOCK_SEQPACKET: { + const struct sockaddr *from; + + from = &sun_noname; + if (sbappendaddr_locked(&so2->so_rcv, from, m, + control)) control = NULL; - } else - sbappend_locked(&so2->so_rcv, m); + break; + } + } + + /* + * XXXRW: While fine for SOCK_STREAM, this conflates maximum + * datagram size and back-pressure for SOCK_SEQPACKET, which + * can lead to undesired return of EMSGSIZE on send instead + * of more desirable blocking. + */ mbcnt_delta = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt; unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt; sbcc = so2->so_rcv.sb_cc; @@ -939,7 +1002,8 @@ UNP_LINK_RLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; - if (so->so_type == SOCK_STREAM && unp2 != NULL) { + if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) && + unp2 != NULL) { so2 = unp2->unp_socket; sb->st_blksize += so2->so_rcv.sb_cc; } @@ -1009,6 +1073,26 @@ .pru_close = uipc_close, }; +static struct pr_usrreqs uipc_usrreqs_seqpacket = { + .pru_abort = uipc_abort, + .pru_accept = uipc_accept, + .pru_attach = uipc_attach, + .pru_bind = uipc_bind, + .pru_connect = uipc_connect, + .pru_connect2 = uipc_connect2, + .pru_detach = uipc_detach, + .pru_disconnect = uipc_disconnect, + .pru_listen = uipc_listen, + .pru_peeraddr = uipc_peeraddr, + .pru_rcvd = uipc_rcvd, + .pru_send = uipc_send, + .pru_sense = uipc_sense, + .pru_shutdown = uipc_shutdown, + .pru_sockaddr = uipc_sockaddr, + .pru_soreceive = soreceive_generic, /* XXX: or...? */ + .pru_close = uipc_close, +}; + static struct pr_usrreqs uipc_usrreqs_stream = { .pru_abort = uipc_abort, .pru_accept = uipc_accept, @@ -1306,6 +1390,7 @@ break; case SOCK_STREAM: + case SOCK_SEQPACKET: unp2->unp_conn = unp; if (req == PRU_CONNECT && ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) @@ -1343,6 +1428,7 @@ break; case SOCK_STREAM: + case SOCK_SEQPACKET: soisdisconnected(unp->unp_socket); unp2->unp_conn = NULL; soisdisconnected(unp2->unp_socket); @@ -1368,7 +1454,22 @@ struct unp_head *head; struct xunpcb *xu; - head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); + switch ((intptr_t)arg1) { + case SOCK_STREAM: + head = &unp_shead; + break; + + case SOCK_DGRAM: + head = &unp_dhead; + break; + + case SOCK_SEQPACKET: + head = &unp_sphead; + break; + + default: + panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1); + } /* * The process of preparing the PCB list is too time-consuming and @@ -1481,6 +1582,9 @@ SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", "List of active local stream sockets"); +SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, + (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", + "List of active local seqpacket sockets"); static void unp_shutdown(struct unpcb *unp) @@ -1492,7 +1596,8 @@ UNP_PCB_LOCK_ASSERT(unp); unp2 = unp->unp_conn; - if (unp->unp_socket->so_type == SOCK_STREAM && unp2 != NULL) { + if ((unp->unp_socket->so_type == SOCK_STREAM || + (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) { so = unp2->unp_socket; if (so != NULL) socantrcvmore(so); @@ -1658,6 +1763,7 @@ NULL, EVENTHANDLER_PRI_ANY); LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); + LIST_INIT(&unp_sphead); TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); UNP_LINK_LOCK_INIT(); UNP_LIST_LOCK_INIT(); @@ -1974,7 +2080,8 @@ static void unp_gc(__unused void *arg, int pending) { - struct unp_head *heads[] = { &unp_dhead, &unp_shead, NULL }; + struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead, + NULL }; struct unp_head **head; struct file **unref; struct unpcb *unp;