Import 2.1.36
[davej-history.git] / net / ipv4 / tcp_output.c
blob7f157abe267cfda2365741d9ff8b2e66cc058e8d
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * Implementation of the Transmission Control Protocol(TCP).
8 * Version: $Id: tcp_output.c,v 1.42 1997/04/22 01:06:33 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 * Corey Minyard <wf-rch!minyard@relay.EU.net>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
16 * Linus Torvalds, <torvalds@cs.helsinki.fi>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Matthew Dillon, <dillon@apollo.west.oic.com>
19 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
20 * Jorge Cwik, <jorge@laser.satlink.net>
24 * Changes: Pedro Roque : Retransmit queue handled by TCP.
25 * : Fragmentation on mtu decrease
26 * : Segment collapse on retransmit
27 * : AF independence
29 * Linus Torvalds : send_delayed_ack
30 * David S. Miller : Charge memory using the right skb
31 * during syn/ack processing.
35 #include <net/tcp.h>
37 externint sysctl_tcp_sack;
38 externint sysctl_tcp_tsack;
39 externint sysctl_tcp_timestamps;
40 externint sysctl_tcp_window_scaling;
42 /* Get rid of any delayed acks, we sent one already.. */
43 static __inline__ voidclear_delayed_acks(struct sock * sk)
45 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
47 tp->delayed_acks =0;
48 sk->ack_backlog =0;
49 tcp_clear_xmit_timer(sk, TIME_DACK);
52 static __inline__ voidupdate_send_head(struct sock *sk)
54 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
56 tp->send_head = tp->send_head->next;
57 if(tp->send_head == (struct sk_buff *) &sk->write_queue)
58 tp->send_head = NULL;
61 static __inline__ inttcp_snd_test(struct sock *sk,struct sk_buff *skb)
63 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
64 int nagle_check =1;
65 int len;
67 /* RFC 1122 - section 4.2.3.4
69 * We must queue if
71 * a) The right edge of this frame exceeds the window
72 * b) There are packets in flight and we have a small segment
73 * [SWS avoidance and Nagle algorithm]
74 * (part of SWS is done on packetization)
75 * c) We are retransmiting [Nagle]
76 * d) We have too many packets 'in flight'
78 len = skb->end_seq - skb->seq;
79 if(!sk->nonagle && len < (sk->mss >>1) && tp->packets_out)
80 nagle_check =0;
82 return(nagle_check && tp->packets_out < tp->snd_cwnd &&
83 !after(skb->end_seq, tp->snd_una + tp->snd_wnd) &&
84 tp->retransmits ==0);
87 static __inline__ voidtcp_build_options(__u32 *ptr,struct tcp_opt *tp)
89 /* FIXME: We will still need to do SACK here. */
90 if(tp->tstamp_ok) {
91 *ptr++ =ntohl((TCPOPT_NOP <<24)
92 | (TCPOPT_NOP <<16)
93 | (TCPOPT_TIMESTAMP <<8)
94 | TCPOLEN_TIMESTAMP);
95 /* WARNING: If HZ is ever larger than 1000 on some system,
96 * then we will be violating RFC1323 here because our timestamps
97 * will be moving too fast.
98 * FIXME: code TCP so it uses at most ~ 1000 ticks a second?
99 * (I notice alpha is 1024 ticks now). -- erics
101 *ptr++ =htonl(jiffies);
102 *ptr =htonl(tp->ts_recent);
106 static __inline__ voidtcp_update_options(__u32 *ptr,struct tcp_opt *tp)
108 /* FIXME: We will still need to do SACK here. */
109 if(tp->tstamp_ok) {
110 *++ptr =htonl(jiffies);
111 *++ptr =htonl(tp->ts_recent);
116 * This is the main buffer sending routine. We queue the buffer
117 * having checked it is sane seeming.
120 inttcp_send_skb(struct sock *sk,struct sk_buff *skb)
122 struct tcphdr * th = skb->h.th;
123 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
124 int size;
126 /* Length of packet (not counting length of pre-tcp headers). */
127 size = skb->len - ((unsigned char*) th - skb->data);
129 /* Sanity check it.. */
130 if(size <sizeof(struct tcphdr) || size > skb->len) {
131 printk(KERN_DEBUG "tcp_send_skb: bad skb "
132 "(skb = %p, data = %p, th = %p, len = %u)\n",
133 skb, skb->data, th, skb->len);
134 kfree_skb(skb, FREE_WRITE);
135 return0;
138 /* If we have queued a header size packet.. (these crash a few
139 * tcp stacks if ack is not set)
140 * FIXME: What is the equivalent below when we have options?
142 if(size ==sizeof(struct tcphdr)) {
143 /* If it's got a syn or fin discard. */
144 if(!th->syn && !th->fin) {
145 printk(KERN_DEBUG "tcp_send_skb: attempt to queue a bogon.\n");
146 kfree_skb(skb,FREE_WRITE);
147 return0;
151 /* Actual processing. */
152 skb->seq =ntohl(th->seq);
153 skb->end_seq = skb->seq + size -4*th->doff;
155 skb_queue_tail(&sk->write_queue, skb);
157 if(tp->send_head == NULL &&tcp_snd_test(sk, skb)) {
158 struct sk_buff * buff;
160 /* This is going straight out. */
161 tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
162 th->window =htons(tcp_select_window(sk));
163 tcp_update_options((__u32 *)(th+1),tp);
165 tp->af_specific->send_check(sk, th, size, skb);
167 buff =skb_clone(skb, GFP_KERNEL);
168 if(buff == NULL)
169 goto queue;
171 clear_delayed_acks(sk);
172 skb_set_owner_w(buff, sk);
174 tp->snd_nxt = skb->end_seq;
175 tp->packets_out++;
177 skb->when = jiffies;
179 tcp_statistics.TcpOutSegs++;
180 tp->af_specific->queue_xmit(buff);
182 if(!tcp_timer_is_set(sk, TIME_RETRANS))
183 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
185 return0;
188 queue:
189 /* Remember where we must start sending. */
190 if(tp->send_head == NULL)
191 tp->send_head = skb;
192 if(tp->packets_out ==0&& !tp->pending) {
193 tp->pending = TIME_PROBE0;
194 tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
196 return0;
200 * Function to create two new tcp segments.
201 * Shrinks the given segment to the specified size and appends a new
202 * segment with the rest of the packet to the list.
203 * This won't be called frenquently, I hope...
206 static inttcp_fragment(struct sock *sk,struct sk_buff *skb, u32 len)
208 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
209 struct sk_buff *buff;
210 struct tcphdr *th, *nth;
211 int nsize;
212 int tmp;
214 th = skb->h.th;
216 /* Size of new segment. */
217 nsize = skb->tail - ((unsigned char*)(th)+tp->tcp_header_len) - len;
218 if(nsize <=0) {
219 printk(KERN_DEBUG "tcp_fragment: bug size <= 0\n");
220 return-1;
223 /* Get a new skb... force flag on. */
224 buff =sock_wmalloc(sk, nsize +128+ sk->prot->max_header +15,1,
225 GFP_ATOMIC);
226 if(buff == NULL)
227 return-1;
229 /* Put headers on the new packet. */
230 tmp = tp->af_specific->build_net_header(sk, buff);
231 if(tmp <0) {
232 kfree_skb(buff, FREE_WRITE);
233 return-1;
236 /* Move the TCP header over. */
237 nth = (struct tcphdr *)skb_put(buff, tp->tcp_header_len);
238 buff->h.th = nth;
239 memcpy(nth, th, tp->tcp_header_len);
241 /* FIXME: Make sure this gets tcp options right. */
243 /* Correct the new header. */
244 buff->seq = skb->seq + len;
245 buff->end_seq = skb->end_seq;
246 nth->seq =htonl(buff->seq);
247 nth->check =0;
248 nth->doff = th->doff;
250 /* urg data is always an headache */
251 if(th->urg) {
252 if(th->urg_ptr > len) {
253 th->urg =0;
254 nth->urg_ptr -= len;
255 }else{
256 nth->urg =0;
260 /* Copy data tail to our new buffer. */
261 buff->csum =csum_partial_copy(((u8 *)(th)+tp->tcp_header_len) + len,
262 skb_put(buff, nsize),
263 nsize,0);
265 skb->end_seq -= nsize;
266 skb_trim(skb, skb->len - nsize);
268 /* Remember to checksum this packet afterwards. */
269 th->check =0;
270 skb->csum =csum_partial((u8*)(th) + tp->tcp_header_len, skb->tail - ((u8 *) (th)+tp->tcp_header_len),
273 skb_append(skb, buff);
275 return0;
278 static voidtcp_wrxmit_prob(struct sock *sk,struct sk_buff *skb)
280 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
282 /* This is acked data. We can discard it. This cannot currently occur. */
283 tp->retransmits =0;
285 printk(KERN_DEBUG "tcp_write_xmit: bug skb in write queue\n");
287 update_send_head(sk);
289 skb_unlink(skb);
290 kfree_skb(skb, FREE_WRITE);
292 if(!sk->dead)
293 sk->write_space(sk);
296 static inttcp_wrxmit_frag(struct sock *sk,struct sk_buff *skb,int size)
298 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
300 SOCK_DEBUG(sk,"tcp_write_xmit: frag needed size=%d mss=%d\n",
301 size, sk->mss);
303 if(tcp_fragment(sk, skb, sk->mss)) {
304 /* !tcp_frament Failed! */
305 tp->send_head = skb;
306 tp->packets_out--;
307 return-1;
308 }else{
309 /* If tcp_fragment succeded then
310 * the send head is the resulting
311 * fragment
313 tp->send_head = skb->next;
315 return0;
319 * This routine writes packets to the network.
320 * It advances the send_head.
321 * This happens as incoming acks open up the remote window for us.
324 voidtcp_write_xmit(struct sock *sk)
326 struct sk_buff *skb;
327 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
328 u16 rcv_wnd;
329 int sent_pkts =0;
331 /* The bytes will have to remain here. In time closedown will
332 * empty the write queue and all will be happy.
334 if(sk->zapped)
335 return;
337 /* Anything on the transmit queue that fits the window can
338 * be added providing we are:
340 * a) following SWS avoidance [and Nagle algorithm]
341 * b) not exceeding our congestion window.
342 * c) not retransmiting [Nagle]
344 rcv_wnd =htons(tcp_select_window(sk));
345 while((skb = tp->send_head) &&tcp_snd_test(sk, skb)) {
346 struct tcphdr *th;
347 struct sk_buff *buff;
348 int size;
350 /* See if we really need to send the packet. (debugging code) */
351 if(!after(skb->end_seq, tp->snd_una)) {
352 tcp_wrxmit_prob(sk, skb);
353 continue;
356 /* Put in the ack seq and window at this point rather
357 * than earlier, in order to keep them monotonic.
358 * We really want to avoid taking back window allocations.
359 * That's legal, but RFC1122 says it's frowned on.
360 * Ack and window will in general have changed since
361 * this packet was put on the write queue.
363 th = skb->h.th;
364 size = skb->len - (((unsigned char*) th) - skb->data);
365 if(size - (th->doff <<2) > sk->mss) {
366 if(tcp_wrxmit_frag(sk, skb, size))
367 break;
370 tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
371 th->window = rcv_wnd;
372 tcp_update_options((__u32 *)(th+1),tp);
374 tp->af_specific->send_check(sk, th, size, skb);
376 #ifdef TCP_DEBUG
377 if(before(skb->end_seq, tp->snd_nxt))
378 printk(KERN_DEBUG "tcp_write_xmit:"
379 " sending already sent seq\n");
380 #endif
382 buff =skb_clone(skb, GFP_ATOMIC);
383 if(buff == NULL)
384 break;
386 /* Advance the send_head. This one is going out. */
387 update_send_head(sk);
388 clear_delayed_acks(sk);
390 tp->packets_out++;
391 skb_set_owner_w(buff, sk);
393 tp->snd_nxt = skb->end_seq;
395 skb->when = jiffies;
397 sent_pkts =1;
398 tp->af_specific->queue_xmit(buff);
401 if(sent_pkts && !tcp_timer_is_set(sk, TIME_RETRANS))
402 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
408 * This function returns the amount that we can raise the
409 * usable window based on the following constraints
411 * 1. The window can never be shrunk once it is offered (RFC 793)
412 * 2. We limit memory per socket
415 unsigned shorttcp_select_window(struct sock *sk)
417 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
418 int mss = sk->mss;
419 long free_space =sock_rspace(sk);
420 long window, cur_win, usable;
422 if(tp->window_clamp) {
423 free_space =min(tp->window_clamp, free_space);
424 mss =min(tp->window_clamp, mss);
427 /* compute the actual window i.e.
428 * old_window - received_bytes_on_that_win
430 cur_win = tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd);
431 window = tp->rcv_wnd;
433 if(cur_win <0) {
434 cur_win =0;
435 printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
436 tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup);
440 * RFC 1122:
441 * "the suggested [SWS] avoidance algoritm for the receiver is to keep
442 * RECV.NEXT + RCV.WIN fixed until:
443 * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
445 * i.e. don't raise the right edge of the window until you can't raise
446 * it MSS bytes
449 /* It would be a good idea if it didn't break header prediction.
450 * and BSD made the header predition standard...
451 * It expects the same value in the header i.e. th->window to be
452 * constant
454 usable = free_space - cur_win;
455 if(usable <0)
456 usable =0;
458 if(window < usable) {
459 /* Window is not blocking the sender
460 * and we have enought free space for it
462 if(cur_win > (sk->mss <<1))
463 goto out;
466 if(window >= usable) {
467 /* We are offering too much, cut it down...
468 * but don't shrink the window
470 window =max(usable, cur_win);
471 }else{
472 if((usable - window) >= mss)
473 window += mss;
475 out:
476 tp->rcv_wnd = window;
477 tp->rcv_wup = tp->rcv_nxt;
478 return window;
481 static inttcp_retrans_try_collapse(struct sock *sk,struct sk_buff *skb)
483 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
484 struct tcphdr *th1, *th2;
485 int size1, size2, avail;
486 struct sk_buff *buff = skb->next;
488 th1 = skb->h.th;
490 if(th1->urg)
491 return-1;
493 avail =skb_tailroom(skb);
495 /* Size of TCP payload. */
496 size1 = skb->tail - ((u8 *) (th1)+(th1->doff<<2));
498 th2 = buff->h.th;
499 size2 = buff->tail - ((u8 *) (th2)+(th2->doff<<2));
501 if(size2 > avail || size1 + size2 > sk->mss )
502 return-1;
504 /* Ok. We will be able to collapse the packet. */
505 skb_unlink(buff);
506 memcpy(skb_put(skb, size2), ((char*) th2) + (th2->doff <<2), size2);
508 /* Update sizes on original skb, both TCP and IP. */
509 skb->end_seq += size2;
510 if(th2->urg) {
511 th1->urg =1;
512 th1->urg_ptr = th2->urg_ptr + size1;
515 /* ... and off you go. */
516 kfree_skb(buff, FREE_WRITE);
517 tp->packets_out--;
519 /* Header checksum will be set by the retransmit procedure
520 * after calling rebuild header.
522 th1->check =0;
523 skb->csum =csum_partial((u8*)(th1)+(th1->doff<<2), size1 + size2,0);
524 return0;
529 * A socket has timed out on its send queue and wants to do a
530 * little retransmitting.
531 * retransmit_head can be different from the head of the write_queue
532 * if we are doing fast retransmit.
535 voidtcp_do_retransmit(struct sock *sk,int all)
537 struct sk_buff * skb;
538 int ct=0;
539 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
541 if(tp->retrans_head == NULL)
542 tp->retrans_head =skb_peek(&sk->write_queue);
544 if(tp->retrans_head == tp->send_head)
545 tp->retrans_head = NULL;
547 while((skb = tp->retrans_head) != NULL) {
548 struct sk_buff *buff;
549 struct tcphdr *th;
550 int tcp_size;
551 int size;
553 /* In general it's OK just to use the old packet. However we
554 * need to use the current ack and window fields. Urg and
555 * urg_ptr could possibly stand to be updated as well, but we
556 * don't keep the necessary data. That shouldn't be a problem,
557 * if the other end is doing the right thing. Since we're
558 * changing the packet, we have to issue a new IP identifier.
561 th = skb->h.th;
563 tcp_size = skb->tail - ((unsigned char*)(th)+tp->tcp_header_len);
565 if(tcp_size > sk->mss) {
566 if(tcp_fragment(sk, skb, sk->mss)) {
567 printk(KERN_DEBUG "tcp_fragment failed\n");
568 return;
570 tp->packets_out++;
573 if(!th->syn &&
574 tcp_size < (sk->mss >>1) &&
575 skb->next != tp->send_head &&
576 skb->next != (struct sk_buff *)&sk->write_queue)
577 tcp_retrans_try_collapse(sk, skb);
579 if(tp->af_specific->rebuild_header(sk, skb)) {
580 #ifdef TCP_DEBUG
581 printk(KERN_DEBUG "tcp_do_rebuild_header failed\n");
582 #endif
583 break;
586 SOCK_DEBUG(sk,"retransmit sending\n");
588 /* Update ack and window. */
589 tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
590 th->window =ntohs(tcp_select_window(sk));
591 tcp_update_options((__u32 *)(th+1),tp);
593 size = skb->tail - (unsigned char*) th;
594 tp->af_specific->send_check(sk, th, size, skb);
596 skb->when = jiffies;
598 buff =skb_clone(skb, GFP_ATOMIC);
599 if(buff == NULL)
600 break;
602 skb_set_owner_w(buff, sk);
604 clear_delayed_acks(sk);
605 tp->af_specific->queue_xmit(buff);
607 /* Count retransmissions. */
608 ct++;
609 sk->prot->retransmits++;
610 tcp_statistics.TcpRetransSegs++;
612 /* Only one retransmit requested. */
613 if(!all)
614 break;
616 /* This should cut it off before we send too many packets. */
617 if(ct >= tp->snd_cwnd)
618 break;
620 /* Advance the pointer. */
621 tp->retrans_head = skb->next;
622 if((tp->retrans_head == tp->send_head) ||
623 (tp->retrans_head == (struct sk_buff *) &sk->write_queue))
624 tp->retrans_head = NULL;
629 * Send a fin.
632 voidtcp_send_fin(struct sock *sk)
634 struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
635 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
636 struct tcphdr *t1;
637 struct sk_buff *buff;
638 int tmp;
640 buff =sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len,1, GFP_KERNEL);
641 if(buff == NULL) {
642 /* FIXME: This is a disaster if it occurs. */
643 printk(KERN_INFO "tcp_send_fin: Impossible malloc failure");
644 return;
647 /* Administrivia. */
648 buff->csum =0;
650 /* Put in the IP header and routing stuff. */
651 tmp = tp->af_specific->build_net_header(sk, buff);
652 if(tmp <0) {
653 int t;
655 /* FIXME: We must not throw this out. Eventually we must
656 * put a FIN into the queue, otherwise it never gets queued.
658 kfree_skb(buff, FREE_WRITE);
659 sk->write_seq++;
660 t =del_timer(&sk->timer);
661 if(t)
662 add_timer(&sk->timer);
663 else
664 tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
665 return;
668 /* We ought to check if the end of the queue is a buffer and
669 * if so simply add the fin to that buffer, not send it ahead.
671 t1 =(struct tcphdr *)skb_put(buff,tp->tcp_header_len);
672 buff->h.th = t1;
673 tcp_build_options((__u32 *)(t1+1),tp);
675 memcpy(t1, th,sizeof(*t1));
676 buff->seq = sk->write_seq;
677 sk->write_seq++;
678 buff->end_seq = sk->write_seq;
679 t1->seq =htonl(buff->seq);
680 t1->ack_seq =htonl(tp->rcv_nxt);
681 t1->window =htons(tcp_select_window(sk));
682 t1->fin =1;
684 tp->af_specific->send_check(sk, t1, tp->tcp_header_len, buff);
686 /* The fin can only be transmited after the data. */
687 skb_queue_tail(&sk->write_queue, buff);
688 if(tp->send_head == NULL) {
689 struct sk_buff *skb1;
691 tp->packets_out++;
692 tp->snd_nxt = sk->write_seq;
693 buff->when = jiffies;
695 skb1 =skb_clone(buff, GFP_KERNEL);
696 if(skb1) {
697 skb_set_owner_w(skb1, sk);
698 tp->af_specific->queue_xmit(skb1);
701 if(!tcp_timer_is_set(sk, TIME_RETRANS))
702 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
706 inttcp_send_synack(struct sock *sk)
708 struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp);
709 struct sk_buff * skb;
710 struct sk_buff * buff;
711 struct tcphdr *th;
712 int tmp;
714 skb =sock_wmalloc(sk, MAX_SYN_SIZE,1, GFP_ATOMIC);
715 if(skb == NULL)
716 return-ENOMEM;
718 tmp = tp->af_specific->build_net_header(sk, skb);
719 if(tmp <0) {
720 kfree_skb(skb, FREE_WRITE);
721 return tmp;
724 th =(struct tcphdr *)skb_put(skb,sizeof(struct tcphdr));
725 skb->h.th = th;
726 memset(th,0,sizeof(struct tcphdr));
728 th->syn =1;
729 th->ack =1;
731 th->source = sk->dummy_th.source;
732 th->dest = sk->dummy_th.dest;
734 skb->seq = tp->snd_una;
735 skb->end_seq = skb->seq +1/* th->syn */;
736 th->seq =ntohl(skb->seq);
738 th->window =ntohs(tp->rcv_wnd);
740 tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
742 tmp =tcp_syn_build_options(skb, sk->mss,
743 tp->sack_ok, tp->tstamp_ok,
744 tp->snd_wscale?tp->rcv_wscale:0);
745 skb->csum =0;
746 th->doff = (sizeof(*th) + tmp)>>2;
748 tp->af_specific->send_check(sk, th,sizeof(*th)+tmp, skb);
750 skb_queue_tail(&sk->write_queue, skb);
752 buff =skb_clone(skb, GFP_ATOMIC);
753 if(buff) {
754 skb_set_owner_w(buff, sk);
756 tp->packets_out++;
757 skb->when = jiffies;
759 tp->af_specific->queue_xmit(buff);
760 tcp_statistics.TcpOutSegs++;
762 tcp_reset_xmit_timer(sk, TIME_RETRANS, TCP_TIMEOUT_INIT);
764 return0;
768 * Set up the timers for sending a delayed ack..
770 * rules for delaying an ack:
771 * - delay time <= 0.5 HZ
772 * - must send at least every 2 full sized packets
773 * - we don't have a window update to send
776 voidtcp_send_delayed_ack(struct sock * sk,int max_timeout)
778 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
779 unsigned long timeout, now;
781 /* Calculate new timeout. */
782 now = jiffies;
783 timeout = tp->ato;
785 if(timeout > max_timeout ||
786 ((tp->rcv_nxt - tp->rcv_wup) > (sk->mss <<2)))
787 timeout = now;
788 else
789 timeout += now;
791 /* Use new timeout only if there wasn't a older one earlier. */
792 if(!del_timer(&tp->delack_timer) || timeout < tp->delack_timer.expires)
793 tp->delack_timer.expires = timeout;
795 add_timer(&tp->delack_timer);
801 * This routine sends an ack and also updates the window.
804 voidtcp_send_ack(struct sock *sk)
806 struct sk_buff *buff;
807 struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
808 struct tcphdr *th;
809 int tmp;
811 if(sk->zapped)
812 return;/* We have been reset, we may not send again. */
814 /* We need to grab some memory, and put together an ack,
815 * and then put it into the queue to be sent.
816 * FIXME: is it better to waste memory here and use a
817 * constant sized ACK?
819 buff =sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len,1, GFP_ATOMIC);
820 if(buff == NULL) {
821 /* Force it to send an ack. We don't have to do this
822 * (ACK is unreliable) but it's much better use of
823 * bandwidth on slow links to send a spare ack than
824 * resend packets.
826 tcp_send_delayed_ack(sk, HZ/2);
827 return;
830 clear_delayed_acks(sk);
832 /* Assemble a suitable TCP frame. */
833 buff->csum =0;
835 /* Put in the IP header and routing stuff. */
836 tmp = tp->af_specific->build_net_header(sk, buff);
837 if(tmp <0) {
838 kfree_skb(buff, FREE_WRITE);
839 return;
842 th = (struct tcphdr *)skb_put(buff,tp->tcp_header_len);
843 memcpy(th, &sk->dummy_th,sizeof(struct tcphdr));
844 tcp_build_options((__u32 *)(th+1),tp);
846 /* Swap the send and the receive. */
847 th->window =ntohs(tcp_select_window(sk));
848 th->seq =ntohl(tp->snd_nxt);
849 tp->last_ack_sent = th->ack_seq =ntohl(tp->rcv_nxt);
851 /* Fill in the packet and send it. */
852 tp->af_specific->send_check(sk, th, tp->tcp_header_len, buff);
854 SOCK_DEBUG(sk,"\rtcp_send_ack: seq %x ack %x\n",
855 tp->snd_nxt, tp->rcv_nxt);
857 tp->af_specific->queue_xmit(buff);
858 tcp_statistics.TcpOutSegs++;
862 * This routine sends a packet with an out of date sequence
863 * number. It assumes the other end will try to ack it.
866 voidtcp_write_wakeup(struct sock *sk)
868 struct sk_buff *buff, *skb;
869 struct tcphdr *t1;
870 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
871 int tmp;
873 if(sk->zapped)
874 return;/* After a valid reset we can send no more. */
876 /* Write data can still be transmitted/retransmitted in the
877 * following states. If any other state is encountered, return.
878 * [listen/close will never occur here anyway]
880 if(sk->state != TCP_ESTABLISHED &&
881 sk->state != TCP_CLOSE_WAIT &&
882 sk->state != TCP_FIN_WAIT1 &&
883 sk->state != TCP_LAST_ACK &&
884 sk->state != TCP_CLOSING)
885 return;
887 if(before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && (skb=tp->send_head)) {
888 struct tcphdr *th;
889 unsigned long win_size;
891 /* We are probing the opening of a window
892 * but the window size is != 0
893 * must have been a result SWS avoidance ( sender )
895 win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
896 if(win_size < skb->end_seq - skb->seq) {
897 if(tcp_fragment(sk, skb, win_size)) {
898 printk(KERN_DEBUG "tcp_write_wakeup: "
899 "fragment failed\n");
900 return;
904 th = skb->h.th;
905 tp->af_specific->send_check(sk, th, th->doff *4+ win_size, skb);
906 buff =skb_clone(skb, GFP_ATOMIC);
907 if(buff == NULL)
908 return;
910 skb_set_owner_w(buff, sk);
911 tp->packets_out++;
913 clear_delayed_acks(sk);
915 if(!tcp_timer_is_set(sk, TIME_RETRANS))
916 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
918 skb->when = jiffies;
919 update_send_head(sk);
920 tp->snd_nxt = skb->end_seq;
921 }else{
922 buff =sock_wmalloc(sk, MAX_ACK_SIZE,1, GFP_ATOMIC);
923 if(buff == NULL)
924 return;
926 buff->csum =0;
928 /* Put in the IP header and routing stuff. */
929 tmp = tp->af_specific->build_net_header(sk, buff);
930 if(tmp <0) {
931 kfree_skb(buff, FREE_WRITE);
932 return;
935 t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
936 memcpy(t1,(void*) &sk->dummy_th,sizeof(*t1));
937 /* FIXME: should zero window probes have SACK and/or TIMESTAMP data?
938 * If so we have to tack them on here.
941 /* Use a previous sequence.
942 * This should cause the other end to send an ack.
945 t1->seq =htonl(tp->snd_nxt-1);
946 /* t1->fin = 0; -- We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
947 t1->ack_seq =htonl(tp->rcv_nxt);
948 t1->window =htons(tcp_select_window(sk));
950 /* Value from dummy_th may be larger. */
951 t1->doff =sizeof(struct tcphdr)/4;
953 tp->af_specific->send_check(sk, t1,sizeof(*t1), buff);
956 /* Send it. */
957 tp->af_specific->queue_xmit(buff);
958 tcp_statistics.TcpOutSegs++;
962 * A window probe timeout has occurred.
963 * If window is not closed send a partial packet
964 * else a zero probe.
967 voidtcp_send_probe0(struct sock *sk)
969 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
971 if(sk->zapped)
972 return;/* After a valid reset we can send no more. */
974 tcp_write_wakeup(sk);
975 tp->pending = TIME_PROBE0;
976 tp->backoff++;
977 tp->probes_out++;
978 tcp_reset_xmit_timer(sk, TIME_PROBE0,
979 min(tp->rto << tp->backoff,120*HZ));
close