- Fix TCP delayed ACK stall (Andrea Arcangeli)
[davej-history.git] / net / ipv4 / tcp_output.c
blob2bdbb5d2347e1f98d9c44573cdefc9f6abda53d6
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * Implementation of the Transmission Control Protocol(TCP).
8 * Version: $Id: tcp_output.c,v 1.58 1998/03/11 07:12:49 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 * Corey Minyard <wf-rch!minyard@relay.EU.net>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
16 * Linus Torvalds, <torvalds@cs.helsinki.fi>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Matthew Dillon, <dillon@apollo.west.oic.com>
19 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
20 * Jorge Cwik, <jorge@laser.satlink.net>
24 * Changes: Pedro Roque : Retransmit queue handled by TCP.
25 * : Fragmentation on mtu decrease
26 * : Segment collapse on retransmit
27 * : AF independence
29 * Linus Torvalds : send_delayed_ack
30 * David S. Miller : Charge memory using the right skb
31 * during syn/ack processing.
35 #include <net/tcp.h>
37 externint sysctl_tcp_sack;
38 externint sysctl_tcp_tsack;
39 externint sysctl_tcp_timestamps;
40 externint sysctl_tcp_window_scaling;
42 /* Get rid of any delayed acks, we sent one already.. */
43 static __inline__ voidclear_delayed_acks(struct sock * sk)
45 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
47 tp->delayed_acks =0;
48 if(tcp_in_quickack_mode(tp))
49 tp->ato = ((HZ/100)*2);
50 tcp_clear_xmit_timer(sk, TIME_DACK);
53 static __inline__ voidupdate_send_head(struct sock *sk)
55 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
57 tp->send_head = tp->send_head->next;
58 if(tp->send_head == (struct sk_buff *) &sk->write_queue)
59 tp->send_head = NULL;
63 * This is the main buffer sending routine. We queue the buffer
64 * having checked it is sane seeming.
67 voidtcp_send_skb(struct sock *sk,struct sk_buff *skb,int force_queue)
69 struct tcphdr * th = skb->h.th;
70 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
71 int size;
73 /* Length of packet (not counting length of pre-tcp headers). */
74 size = skb->len - ((unsigned char*) th - skb->data);
76 /* Sanity check it.. */
77 if(size < tp->tcp_header_len || size > skb->len) {
78 printk(KERN_DEBUG "tcp_send_skb: bad skb "
79 "(skb = %p, data = %p, th = %p, len = %u)\n",
80 skb, skb->data, th, skb->len);
81 kfree_skb(skb);
82 return;
85 /* If we have queued a header size packet.. (these crash a few
86 * tcp stacks if ack is not set)
88 if(size == tp->tcp_header_len) {
89 /* If it's got a syn or fin discard. */
90 if(!th->syn && !th->fin) {
91 printk(KERN_DEBUG "tcp_send_skb: attempt to queue a bogon.\n");
92 kfree_skb(skb);
93 return;
97 /* Actual processing. */
98 skb->seq =ntohl(th->seq);
99 skb->end_seq = skb->seq + size -4*th->doff;
101 skb_queue_tail(&sk->write_queue, skb);
103 if(!force_queue && tp->send_head == NULL &&tcp_snd_test(sk, skb)) {
104 struct sk_buff * buff;
106 /* This is going straight out. */
107 tp->last_ack_sent = tp->rcv_nxt;
108 th->ack_seq =htonl(tp->rcv_nxt);
109 th->window =htons(tcp_select_window(sk));
110 tcp_update_options((__u32 *)(th+1),tp);
112 tp->af_specific->send_check(sk, th, size, skb);
114 buff =skb_clone(skb, GFP_KERNEL);
115 if(buff == NULL)
116 goto queue;
118 clear_delayed_acks(sk);
119 skb_set_owner_w(buff, sk);
121 tp->snd_nxt = skb->end_seq;
122 tp->packets_out++;
124 skb->when = jiffies;
126 tcp_statistics.TcpOutSegs++;
127 tp->af_specific->queue_xmit(buff);
129 if(!tcp_timer_is_set(sk, TIME_RETRANS))
130 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
132 return;
135 queue:
136 /* Remember where we must start sending. */
137 if(tp->send_head == NULL)
138 tp->send_head = skb;
139 if(!force_queue && tp->packets_out ==0&& !tp->pending) {
140 tp->pending = TIME_PROBE0;
141 tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
146 * Function to create two new tcp segments.
147 * Shrinks the given segment to the specified size and appends a new
148 * segment with the rest of the packet to the list.
149 * This won't be called frenquently, I hope...
152 static inttcp_fragment(struct sock *sk,struct sk_buff *skb, u32 len)
154 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
155 struct sk_buff *buff;
156 struct tcphdr *th, *nth;
157 int nsize;
158 int tmp;
160 th = skb->h.th;
162 /* Size of new segment. */
163 nsize = skb->tail - ((unsigned char*)(th)+tp->tcp_header_len) - len;
164 if(nsize <=0) {
165 printk(KERN_DEBUG "tcp_fragment: bug size <= 0\n");
166 return-1;
169 /* Get a new skb... force flag on. */
170 buff =sock_wmalloc(sk, nsize +128+ sk->prot->max_header +15,1,
171 GFP_ATOMIC);
172 if(buff == NULL)
173 return-1;
175 /* Put headers on the new packet. */
176 tmp = tp->af_specific->build_net_header(sk, buff);
177 if(tmp <0) {
178 kfree_skb(buff);
179 return-1;
182 /* Move the TCP header over. */
183 nth = (struct tcphdr *)skb_put(buff, tp->tcp_header_len);
184 buff->h.th = nth;
185 memcpy(nth, th, tp->tcp_header_len);
187 /* FIXME: Make sure this gets tcp options right. */
189 /* Correct the new header. */
190 buff->seq = skb->seq + len;
191 buff->end_seq = skb->end_seq;
192 nth->seq =htonl(buff->seq);
193 nth->check =0;
194 nth->doff = th->doff;
196 /* urg data is always an headache */
197 if(th->urg) {
198 if(th->urg_ptr > len) {
199 th->urg =0;
200 nth->urg_ptr -= len;
201 }else{
202 nth->urg =0;
206 /* Copy data tail to our new buffer. */
207 buff->csum =csum_partial_copy(((u8 *)(th)+tp->tcp_header_len) + len,
208 skb_put(buff, nsize),
209 nsize,0);
211 skb->end_seq -= nsize;
212 skb_trim(skb, skb->len - nsize);
214 /* Remember to checksum this packet afterwards. */
215 th->check =0;
216 skb->csum =csum_partial((u8*)(th) + tp->tcp_header_len, skb->tail - ((u8 *) (th)+tp->tcp_header_len),
219 skb_append(skb, buff);
221 return0;
224 static voidtcp_wrxmit_prob(struct sock *sk,struct sk_buff *skb)
226 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
228 /* This is acked data. We can discard it. This cannot currently occur. */
229 tp->retransmits =0;
231 printk(KERN_DEBUG "tcp_write_xmit: bug skb in write queue\n");
233 update_send_head(sk);
235 skb_unlink(skb);
236 kfree_skb(skb);
238 if(!sk->dead)
239 sk->write_space(sk);
242 static inttcp_wrxmit_frag(struct sock *sk,struct sk_buff *skb,int size)
244 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
246 SOCK_DEBUG(sk,"tcp_write_xmit: frag needed size=%d mss=%d\n",
247 size, sk->mss);
249 if(tcp_fragment(sk, skb, sk->mss)) {
250 /* !tcp_frament Failed! */
251 tp->send_head = skb;
252 tp->packets_out--;
253 return-1;
255 return0;
259 * This routine writes packets to the network.
260 * It advances the send_head.
261 * This happens as incoming acks open up the remote window for us.
264 voidtcp_write_xmit(struct sock *sk)
266 struct sk_buff *skb;
267 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
268 u16 rcv_wnd;
269 int sent_pkts =0;
271 /* The bytes will have to remain here. In time closedown will
272 * empty the write queue and all will be happy.
274 if(sk->zapped)
275 return;
277 /* Anything on the transmit queue that fits the window can
278 * be added providing we are:
280 * a) following SWS avoidance [and Nagle algorithm]
281 * b) not exceeding our congestion window.
282 * c) not retransmiting [Nagle]
284 rcv_wnd =htons(tcp_select_window(sk));
285 while((skb = tp->send_head) &&tcp_snd_test(sk, skb)) {
286 struct tcphdr *th;
287 struct sk_buff *buff;
288 int size;
290 /* See if we really need to send the packet. (debugging code) */
291 if(!after(skb->end_seq, tp->snd_una)) {
292 tcp_wrxmit_prob(sk, skb);
293 continue;
296 /* Put in the ack seq and window at this point rather
297 * than earlier, in order to keep them monotonic.
298 * We really want to avoid taking back window allocations.
299 * That's legal, but RFC1122 says it's frowned on.
300 * Ack and window will in general have changed since
301 * this packet was put on the write queue.
303 th = skb->h.th;
304 size = skb->len - (((unsigned char*) th) - skb->data);
305 if(size - (th->doff <<2) > sk->mss) {
306 if(tcp_wrxmit_frag(sk, skb, size))
307 break;
308 size = skb->len - (((unsigned char*)th) - skb->data);
311 tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
312 th->window = rcv_wnd;
313 tcp_update_options((__u32 *)(th+1),tp);
315 tp->af_specific->send_check(sk, th, size, skb);
317 #ifdef TCP_DEBUG
318 if(before(skb->end_seq, tp->snd_nxt))
319 printk(KERN_DEBUG "tcp_write_xmit:"
320 " sending already sent seq\n");
321 #endif
323 buff =skb_clone(skb, GFP_ATOMIC);
324 if(buff == NULL)
325 break;
327 /* Advance the send_head. This one is going out. */
328 update_send_head(sk);
329 clear_delayed_acks(sk);
331 tp->packets_out++;
332 skb_set_owner_w(buff, sk);
334 tp->snd_nxt = skb->end_seq;
336 skb->when = jiffies;
338 sent_pkts =1;
339 tp->af_specific->queue_xmit(buff);
342 if(sent_pkts && !tcp_timer_is_set(sk, TIME_RETRANS))
343 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
348 /* This function returns the amount that we can raise the
349 * usable window based on the following constraints
351 * 1. The window can never be shrunk once it is offered (RFC 793)
352 * 2. We limit memory per socket
354 * RFC 1122:
355 * "the suggested [SWS] avoidance algoritm for the receiver is to keep
356 * RECV.NEXT + RCV.WIN fixed until:
357 * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
359 * i.e. don't raise the right edge of the window until you can raise
360 * it at least MSS bytes.
362 * Unfortunately, the recomended algorithm breaks header prediction,
363 * since header prediction assumes th->window stays fixed.
365 * Strictly speaking, keeping th->window fixed violates the receiver
366 * side SWS prevention criteria. The problem is that under this rule
367 * a stream of single byte packets will cause the right side of the
368 * window to always advance by a single byte.
370 * Of course, if the sender implements sender side SWS prevention
371 * then this will not be a problem.
373 * BSD seems to make the following compromise:
375 * If the free space is less than the 1/4 of the maximum
376 * space available and the free space is less than 1/2 mss,
377 * then set the window to 0.
378 * Otherwise, just prevent the window from shrinking
379 * and from being larger than the largest representable value.
381 * This prevents incremental opening of the window in the regime
382 * where TCP is limited by the speed of the reader side taking
383 * data out of the TCP receive queue. It does nothing about
384 * those cases where the window is constrained on the sender side
385 * because the pipeline is full.
387 * BSD also seems to "accidentally" limit itself to windows that are a
388 * multiple of MSS, at least until the free space gets quite small.
389 * This would appear to be a side effect of the mbuf implementation.
390 * Combining these two algorithms results in the observed behavior
391 * of having a fixed window size at almost all times.
393 * Below we obtain similar behavior by forcing the offered window to
394 * a multiple of the mss when it is feasible to do so.
396 * FIXME: In our current implementation the value returned by sock_rpsace(sk)
397 * is the total space we have allocated to the socket to store skbuf's.
398 * The current design assumes that up to half of that space will be
399 * taken by headers, and the remaining space will be available for TCP data.
400 * This should be accounted for correctly instead.
402 u32 __tcp_select_window(struct sock *sk)
404 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
405 unsigned int mss = sk->mss;
406 unsigned int free_space;
407 u32 window, cur_win;
409 free_space = (sk->rcvbuf -atomic_read(&sk->rmem_alloc)) /2;
410 if(tp->window_clamp) {
411 free_space =min(tp->window_clamp, free_space);
412 mss =min(tp->window_clamp, mss);
413 }else{
414 printk("tcp_select_window: tp->window_clamp == 0.\n");
417 if(mss <1) {
418 mss =1;
419 printk("tcp_select_window: sk->mss fell to 0.\n");
422 cur_win =tcp_receive_window(tp);
423 if(free_space < sk->rcvbuf/4&& free_space < mss/2) {
424 window =0;
425 }else{
426 /* Get the largest window that is a nice multiple of mss.
427 * Window clamp already applied above.
428 * If our current window offering is within 1 mss of the
429 * free space we just keep it. This prevents the divide
430 * and multiply from happening most of the time.
431 * We also don't do any window rounding when the free space
432 * is too small.
434 window = tp->rcv_wnd;
435 if((window <= (free_space - mss)) || (window > free_space))
436 window = (free_space/mss)*mss;
438 return window;
441 static inttcp_retrans_try_collapse(struct sock *sk,struct sk_buff *skb)
443 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
444 struct tcphdr *th1, *th2;
445 int size1, size2, avail;
446 struct sk_buff *buff = skb->next;
448 th1 = skb->h.th;
450 if(th1->urg)
451 return-1;
453 avail =skb_tailroom(skb);
455 /* Size of TCP payload. */
456 size1 = skb->tail - ((u8 *) (th1)+(th1->doff<<2));
458 th2 = buff->h.th;
459 size2 = buff->tail - ((u8 *) (th2)+(th2->doff<<2));
461 if(size2 > avail || size1 + size2 > sk->mss )
462 return-1;
464 /* Ok. We will be able to collapse the packet. */
465 skb_unlink(buff);
466 memcpy(skb_put(skb, size2), ((char*) th2) + (th2->doff <<2), size2);
468 /* Update sizes on original skb, both TCP and IP. */
469 skb->end_seq += buff->end_seq - buff->seq;
470 if(th2->urg) {
471 th1->urg =1;
472 th1->urg_ptr = th2->urg_ptr + size1;
474 if(th2->fin)
475 th1->fin =1;
477 /* ... and off you go. */
478 kfree_skb(buff);
479 tp->packets_out--;
481 /* Header checksum will be set by the retransmit procedure
482 * after calling rebuild header.
484 th1->check =0;
485 skb->csum =csum_partial((u8*)(th1)+(th1->doff<<2), size1 + size2,0);
486 return0;
489 /* Do a simple retransmit without using the backoff mechanisms in
490 * tcp_timer. This is used to speed up path mtu recovery. Note that
491 * these simple retransmit aren't counted in the usual tcp retransmit
492 * backoff counters.
493 * The socket is already locked here.
495 voidtcp_simple_retransmit(struct sock *sk)
497 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
499 /* Clear delay ack timer. */
500 tcp_clear_xmit_timer(sk, TIME_DACK);
502 tp->retrans_head = NULL;
503 /* Don't muck with the congestion window here. */
504 tp->dup_acks =0;
505 tp->high_seq = tp->snd_nxt;
506 /* FIXME: make the current rtt sample invalid */
507 tcp_do_retransmit(sk,0);
511 * A socket has timed out on its send queue and wants to do a
512 * little retransmitting.
513 * retrans_head can be different from the head of the write_queue
514 * if we are doing fast retransmit.
517 voidtcp_do_retransmit(struct sock *sk,int all)
519 struct sk_buff * skb;
520 int ct=0;
521 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
523 if(tp->retrans_head == NULL)
524 tp->retrans_head =skb_peek(&sk->write_queue);
526 if(tp->retrans_head == tp->send_head)
527 tp->retrans_head = NULL;
529 while((skb = tp->retrans_head) != NULL) {
530 struct sk_buff *buff;
531 struct tcphdr *th;
532 int tcp_size;
533 int size;
535 /* In general it's OK just to use the old packet. However we
536 * need to use the current ack and window fields. Urg and
537 * urg_ptr could possibly stand to be updated as well, but we
538 * don't keep the necessary data. That shouldn't be a problem,
539 * if the other end is doing the right thing. Since we're
540 * changing the packet, we have to issue a new IP identifier.
543 th = skb->h.th;
545 tcp_size = skb->tail - ((unsigned char*)(th)+tp->tcp_header_len);
547 if(tcp_size > sk->mss) {
548 if(tcp_fragment(sk, skb, sk->mss)) {
549 printk(KERN_DEBUG "tcp_fragment failed\n");
550 return;
552 tp->packets_out++;
555 if(!th->syn &&
556 tcp_size < (sk->mss >>1) &&
557 skb->next != tp->send_head &&
558 skb->next != (struct sk_buff *)&sk->write_queue)
559 tcp_retrans_try_collapse(sk, skb);
561 if(tp->af_specific->rebuild_header(sk, skb)) {
562 #ifdef TCP_DEBUG
563 printk(KERN_DEBUG "tcp_do_rebuild_header failed\n");
564 #endif
565 break;
568 SOCK_DEBUG(sk,"retransmit sending seq=%x\n", skb->seq);
570 /* Update ack and window. */
571 tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
572 th->window =ntohs(tcp_select_window(sk));
573 tcp_update_options((__u32 *)(th+1),tp);
575 size = skb->tail - (unsigned char*) th;
576 tp->af_specific->send_check(sk, th, size, skb);
578 skb->when = jiffies;
580 buff =skb_clone(skb, GFP_ATOMIC);
581 if(buff == NULL)
582 break;
584 skb_set_owner_w(buff, sk);
586 clear_delayed_acks(sk);
587 tp->af_specific->queue_xmit(buff);
589 /* Count retransmissions. */
590 ct++;
591 sk->prot->retransmits++;
592 tcp_statistics.TcpRetransSegs++;
594 /* Only one retransmit requested. */
595 if(!all)
596 break;
598 /* This should cut it off before we send too many packets. */
599 if(ct >= tp->snd_cwnd)
600 break;
602 /* Advance the pointer. */
603 tp->retrans_head = skb->next;
604 if((tp->retrans_head == tp->send_head) ||
605 (tp->retrans_head == (struct sk_buff *) &sk->write_queue))
606 tp->retrans_head = NULL;
611 * Send a fin.
614 voidtcp_send_fin(struct sock *sk)
616 struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
617 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
618 struct tcphdr *t1;
619 struct sk_buff *buff;
620 int tmp;
622 buff =sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len,1, GFP_KERNEL);
623 if(buff == NULL) {
624 /* FIXME: This is a disaster if it occurs. */
625 printk(KERN_INFO "tcp_send_fin: Impossible malloc failure");
626 return;
629 /* Administrivia. */
630 buff->csum =0;
632 /* Put in the IP header and routing stuff. */
633 tmp = tp->af_specific->build_net_header(sk, buff);
634 if(tmp <0) {
635 int t;
637 /* FIXME: We must not throw this out. Eventually we must
638 * put a FIN into the queue, otherwise it never gets queued.
640 kfree_skb(buff);
641 tp->write_seq++;
642 t =del_timer(&sk->timer);
643 if(t)
644 add_timer(&sk->timer);
645 else
646 tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
647 return;
650 /* We ought to check if the end of the queue is a buffer and
651 * if so simply add the fin to that buffer, not send it ahead.
653 t1 =(struct tcphdr *)skb_put(buff,tp->tcp_header_len);
654 buff->h.th = t1;
655 tcp_build_options((__u32 *)(t1+1),tp);
657 memcpy(t1, th,sizeof(*t1));
658 buff->seq = tp->write_seq;
659 tp->write_seq++;
660 buff->end_seq = tp->write_seq;
661 t1->seq =htonl(buff->seq);
662 t1->ack_seq =htonl(tp->rcv_nxt);
663 t1->window =htons(tcp_select_window(sk));
664 t1->fin =1;
666 tp->af_specific->send_check(sk, t1, tp->tcp_header_len, buff);
668 /* The fin can only be transmited after the data. */
669 skb_queue_tail(&sk->write_queue, buff);
670 if(tp->send_head == NULL) {
671 /* FIXME: BUG! we need to check if the fin fits into the window
672 * here. If not we need to do window probing (sick, but true)
674 struct sk_buff *skb1;
676 tp->packets_out++;
677 tp->snd_nxt = tp->write_seq;
678 buff->when = jiffies;
680 skb1 =skb_clone(buff, GFP_KERNEL);
681 if(skb1) {
682 skb_set_owner_w(skb1, sk);
683 tp->af_specific->queue_xmit(skb1);
686 if(!tcp_timer_is_set(sk, TIME_RETRANS))
687 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
691 /* WARNING: This routine must only be called when we have already sent
692 * a SYN packet that crossed the incoming SYN that caused this routine
693 * to get called. If this assumption fails then the initial rcv_wnd
694 * and rcv_wscale values will not be correct.
696 inttcp_send_synack(struct sock *sk)
698 struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp);
699 struct sk_buff * skb;
700 struct sk_buff * buff;
701 struct tcphdr *th;
702 int tmp;
704 skb =sock_wmalloc(sk, MAX_SYN_SIZE,1, GFP_ATOMIC);
705 if(skb == NULL)
706 return-ENOMEM;
708 tmp = tp->af_specific->build_net_header(sk, skb);
709 if(tmp <0) {
710 kfree_skb(skb);
711 return tmp;
714 th =(struct tcphdr *)skb_put(skb,sizeof(struct tcphdr));
715 skb->h.th = th;
716 memset(th,0,sizeof(struct tcphdr));
718 th->syn =1;
719 th->ack =1;
721 th->source = sk->dummy_th.source;
722 th->dest = sk->dummy_th.dest;
724 skb->seq = tp->snd_una;
725 skb->end_seq = skb->seq +1/* th->syn */;
726 th->seq =ntohl(skb->seq);
728 /* This is a resend of a previous SYN, now with an ACK.
729 * we must reuse the previously offered window.
731 th->window =htons(tp->rcv_wnd);
733 tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
735 tmp =tcp_syn_build_options(skb, sk->mss,
736 tp->sack_ok, tp->tstamp_ok,
737 tp->wscale_ok,tp->rcv_wscale);
738 skb->csum =0;
739 th->doff = (sizeof(*th) + tmp)>>2;
741 tp->af_specific->send_check(sk, th,sizeof(*th)+tmp, skb);
743 skb_queue_tail(&sk->write_queue, skb);
745 buff =skb_clone(skb, GFP_ATOMIC);
746 if(buff) {
747 skb_set_owner_w(buff, sk);
749 tp->packets_out++;
750 skb->when = jiffies;
752 tp->af_specific->queue_xmit(buff);
753 tcp_statistics.TcpOutSegs++;
755 tcp_reset_xmit_timer(sk, TIME_RETRANS, TCP_TIMEOUT_INIT);
757 return0;
761 * Send out a delayed ack, the caller does the policy checking
762 * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
763 * for details.
766 voidtcp_send_delayed_ack(struct tcp_opt *tp,int max_timeout)
768 unsigned long timeout;
770 /* Stay within the limit we were given */
771 timeout = tp->ato;
772 if(timeout > max_timeout)
773 timeout = max_timeout;
774 timeout += jiffies;
776 /* Use new timeout only if there wasn't a older one earlier. */
777 if(!del_timer(&tp->delack_timer) || timeout < tp->delack_timer.expires)
778 tp->delack_timer.expires = timeout;
780 add_timer(&tp->delack_timer);
786 * This routine sends an ack and also updates the window.
789 voidtcp_send_ack(struct sock *sk)
791 struct sk_buff *buff;
792 struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
793 struct tcphdr *th;
794 int tmp;
796 if(sk->zapped)
797 return;/* We have been reset, we may not send again. */
799 /* We need to grab some memory, and put together an ack,
800 * and then put it into the queue to be sent.
801 * FIXME: is it better to waste memory here and use a
802 * constant sized ACK?
804 buff =sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len,1, GFP_ATOMIC);
805 if(buff == NULL) {
806 /* Force it to send an ack. We don't have to do this
807 * (ACK is unreliable) but it's much better use of
808 * bandwidth on slow links to send a spare ack than
809 * resend packets.
811 tcp_send_delayed_ack(tp, HZ/2);
812 return;
815 clear_delayed_acks(sk);
817 /* Assemble a suitable TCP frame. */
818 buff->csum =0;
820 /* Put in the IP header and routing stuff. */
821 tmp = tp->af_specific->build_net_header(sk, buff);
822 if(tmp <0) {
823 kfree_skb(buff);
824 return;
827 th = (struct tcphdr *)skb_put(buff,tp->tcp_header_len);
828 memcpy(th, &sk->dummy_th,sizeof(struct tcphdr));
829 tcp_build_options((__u32 *)(th+1),tp);
831 /* Swap the send and the receive. */
832 th->window =ntohs(tcp_select_window(sk));
833 th->seq =ntohl(tp->snd_nxt);
834 tp->last_ack_sent = tp->rcv_nxt;
835 th->ack_seq =htonl(tp->rcv_nxt);
837 /* Fill in the packet and send it. */
838 tp->af_specific->send_check(sk, th, tp->tcp_header_len, buff);
839 tp->af_specific->queue_xmit(buff);
840 tcp_statistics.TcpOutSegs++;
844 * This routine sends a packet with an out of date sequence
845 * number. It assumes the other end will try to ack it.
848 voidtcp_write_wakeup(struct sock *sk)
850 struct sk_buff *buff, *skb;
851 struct tcphdr *t1;
852 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
853 int tmp;
855 if(sk->zapped)
856 return;/* After a valid reset we can send no more. */
858 /* Write data can still be transmitted/retransmitted in the
859 * following states. If any other state is encountered, return.
860 * [listen/close will never occur here anyway]
862 if((1<< sk->state) &
863 ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1|TCPF_LAST_ACK|TCPF_CLOSING))
864 return;
866 if(before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && (skb=tp->send_head)) {
867 struct tcphdr *th;
868 unsigned long win_size;
870 /* We are probing the opening of a window
871 * but the window size is != 0
872 * must have been a result SWS avoidance ( sender )
874 win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
875 if(win_size < skb->end_seq - skb->seq) {
876 if(tcp_fragment(sk, skb, win_size)) {
877 printk(KERN_DEBUG "tcp_write_wakeup: "
878 "fragment failed\n");
879 return;
883 th = skb->h.th;
884 tp->af_specific->send_check(sk, th, th->doff *4+ win_size, skb);
885 buff =skb_clone(skb, GFP_ATOMIC);
886 if(buff == NULL)
887 return;
889 skb_set_owner_w(buff, sk);
890 tp->packets_out++;
892 clear_delayed_acks(sk);
894 if(!tcp_timer_is_set(sk, TIME_RETRANS))
895 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
897 skb->when = jiffies;
898 update_send_head(sk);
899 tp->snd_nxt = skb->end_seq;
900 }else{
901 buff =sock_wmalloc(sk, MAX_ACK_SIZE,1, GFP_ATOMIC);
902 if(buff == NULL)
903 return;
905 buff->csum =0;
907 /* Put in the IP header and routing stuff. */
908 tmp = tp->af_specific->build_net_header(sk, buff);
909 if(tmp <0) {
910 kfree_skb(buff);
911 return;
914 t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
915 memcpy(t1,(void*) &sk->dummy_th,sizeof(*t1));
916 /* FIXME: should zero window probes have SACK and/or TIMESTAMP data?
917 * If so we have to tack them on here.
920 /* Use a previous sequence.
921 * This should cause the other end to send an ack.
924 t1->seq =htonl(tp->snd_nxt-1);
925 t1->ack_seq =htonl(tp->rcv_nxt);
926 t1->window =htons(tcp_select_window(sk));
928 /* Value from dummy_th may be larger. */
929 t1->doff =sizeof(struct tcphdr)/4;
931 tp->af_specific->send_check(sk, t1,sizeof(*t1), buff);
934 /* Send it. */
935 tp->af_specific->queue_xmit(buff);
936 tcp_statistics.TcpOutSegs++;
940 * A window probe timeout has occurred.
941 * If window is not closed send a partial packet
942 * else a zero probe.
945 voidtcp_send_probe0(struct sock *sk)
947 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
949 tcp_write_wakeup(sk);
950 tp->pending = TIME_PROBE0;
951 tp->backoff++;
952 tp->probes_out++;
953 tcp_reset_xmit_timer(sk, TIME_PROBE0,
954 min(tp->rto << tp->backoff,120*HZ));
close