diff -ur include/linux/tcp.h /usr/src/linux/include/linux/tcp.h --- include/linux/tcp.h 2005-10-09 23:21:42.000000000 +0200 +++ /usr/src/linux/include/linux/tcp.h 2005-10-10 22:22:55.000000000 +0200 @@ -436,6 +436,8 @@ __u32 last_cwnd; /* the last snd_cwnd */ __u32 last_stamp; /* time when updated last_cwnd */ } bictcp; + + int total_rcv; }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) diff -ur include/net/sock.h /usr/src/linux/include/net/sock.h --- include/net/sock.h 2005-10-09 23:21:42.000000000 +0200 +++ /usr/src/linux/include/net/sock.h 2005-10-11 00:54:32.000000000 +0200 @@ -248,6 +248,7 @@ int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); + struct sock *sk_parent; }; /* diff -x .o -ur net/ipv4/tcp.c /usr/src/linux/net/ipv4/tcp.c --- net/ipv4/tcp.c 2005-10-09 23:22:05.000000000 +0200 +++ /usr/src/linux/net/ipv4/tcp.c 2005-10-11 03:13:05.000000000 +0200 @@ -1871,6 +1871,8 @@ if (!tp->accept_queue) timeo = schedule_timeout(timeo); lock_sock(sk); + printk(KERN_INFO "%s: woke up. sk: %p %p %d\n", __func__, sk, + tp->accept_queue, sk->sk_state); err = 0; if (tp->accept_queue) break; @@ -2059,6 +2061,7 @@ case TCP_DEFER_ACCEPT: tp->defer_accept = 0; + printk(KERN_INFO "TCP_DEFER_ACCEPT\n"); if (val > 0) { /* Translate value in seconds to number of * retransmits */ diff -x .o -ur net/ipv4/tcp_input.c /usr/src/linux/net/ipv4/tcp_input.c --- net/ipv4/tcp_input.c 2005-10-09 23:22:05.000000000 +0200 +++ /usr/src/linux/net/ipv4/tcp_input.c 2005-10-11 03:24:58.000000000 +0200 @@ -3541,7 +3541,6 @@ sock_owned_by_user(sk) && !tp->urg_data) { int chunk = min_t(unsigned int, skb->len, tp->ucopy.len); - __set_current_state(TASK_RUNNING); local_bh_enable(); @@ -3589,8 +3588,21 @@ if (eaten > 0) __kfree_skb(skb); - else if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, 0); + else if (!sock_flag(sk, SOCK_DEAD)) { + if (tp->defer_accept) { + tp->total_rcv += skb->len;; + printk(KERN_INFO "%s: data received %d %d\n", + __func__, skb->len, tp->total_rcv); + if (tp->total_rcv > 10) { + printk(KERN_INFO "%s: waking up parent %p\n", + __func__, sk->sk_parent); + tp->defer_accept = 0; + sk->sk_state_change(sk->sk_parent); + } + } else { + sk->sk_data_ready(sk, 0); + } + } return; } @@ -4126,8 +4138,21 @@ if (skb_copy_bits(skb, ptr, &tmp, 1)) BUG(); tp->urg_data = TCP_URG_VALID | tmp; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, 0); + if (!sock_flag(sk, SOCK_DEAD)) { + if (tp->defer_accept) { + tp->total_rcv += skb->len;; + printk(KERN_INFO "%s: data received %d %d\n", + __func__, skb->len, tp->total_rcv); + if (tp->total_rcv > 10) { + printk(KERN_INFO "%s: waking up parent %p\n", + __func__, sk->sk_parent); + tp->defer_accept = 0; + sk->sk_state_change(sk->sk_parent); + } + } else { + sk->sk_data_ready(sk, 0); + } + } } } } @@ -4359,8 +4384,21 @@ no_ack: if (eaten) __kfree_skb(skb); - else - sk->sk_data_ready(sk, 0); + else { + if (tp->defer_accept) { + tp->total_rcv += skb->len;; + printk(KERN_INFO "%s: data received %d %d\n", + __func__, skb->len, tp->total_rcv); + if (tp->total_rcv > 10) { + printk(KERN_INFO "%s: waking up parent %p\n", + __func__, sk->sk_parent); + tp->defer_accept = 0; + sk->sk_state_change(sk->sk_parent); + } + } else { + sk->sk_data_ready(sk, 0); + } + } return 0; } } @@ -4573,6 +4611,7 @@ } if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) { + printk(KERN_INFO "%s: deferred\n", __func__); /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * diff -x .o -ur net/ipv4/tcp_minisocks.c /usr/src/linux/net/ipv4/tcp_minisocks.c --- net/ipv4/tcp_minisocks.c 2005-10-09 23:22:05.000000000 +0200 +++ /usr/src/linux/net/ipv4/tcp_minisocks.c 2005-10-11 02:58:32.000000000 +0200 @@ -724,6 +724,7 @@ rwlock_init(&newsk->sk_callback_lock); skb_queue_head_init(&newsk->sk_error_queue); newsk->sk_write_space = sk_stream_write_space; + newsk->sk_parent = sk; if ((filter = newsk->sk_filter) != NULL) sk_filter_charge(newsk, filter); @@ -1004,7 +1005,9 @@ /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) { + printk(KERN_INFO "%s: deferred ACK\n", __func__); req->acked = 1; + tp->total_rcv = 0; return NULL; } @@ -1048,15 +1051,27 @@ int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb) { + struct tcp_sock *ctp; int ret = 0; int state = child->sk_state; + int defer; if (!sock_owned_by_user(child)) { + ctp = tcp_sk(parent); + defer = ctp->defer_accept; ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len); /* Wakeup parent, send SIGIO */ - if (state == TCP_SYN_RECV && child->sk_state != state) + if (!defer && state == TCP_SYN_RECV && child->sk_state != state) { parent->sk_data_ready(parent, 0); + } else if (defer) { + printk(KERN_INFO "%s: parent %p\n", __func__, + parent); + if (ctp->defer_accept == 0) { + printk(KERN_INFO "%s: waking up parent\n", __func__); + parent->sk_data_ready(parent, 0); + } + } } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening