Import pre2.0.10
[davej-history.git] / net / ipv4 / tcp_output.c
blobf25d0fdbc49b365b90be27cf7508bdd1ef7827bd
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * Implementation of the Transmission Control Protocol(TCP).
8 * Version: @(#)tcp_input.c 1.0.16 05/25/93
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 * Corey Minyard <wf-rch!minyard@relay.EU.net>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
16 * Linus Torvalds, <torvalds@cs.helsinki.fi>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Matthew Dillon, <dillon@apollo.west.oic.com>
19 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
20 * Jorge Cwik, <jorge@laser.satlink.net>
22 * Fixes: Eric Schenk : avoid multiple retransmissions in one
23 * : round trip timeout.
26 #include <linux/config.h>
27 #include <net/tcp.h>
29 #include <linux/interrupt.h>
32 * RFC 1122 says:
34 * "the suggested [SWS] avoidance algorithm for the receiver is to keep
35 * RECV.NEXT + RCV.WIN fixed until:
36 * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
38 * Experiments against BSD and Solaris machines show that following
39 * these rules results in the BSD and Solaris machines making very
40 * bad guesses about how much data they can have in flight.
42 * Instead we follow the BSD lead and offer a window that gives
43 * the size of the current free space, truncated to a multiple
44 * of 1024 bytes. If the window is smaller than
45 * min(sk->mss, MAX_WINDOW/2)
46 * then we advertise the window as having size 0, unless this
47 * would shrink the window we offered last time.
48 * This results in as much as double the throughput as the original
49 * implementation.
51 * We do BSD style SWS avoidance -- note that RFC1122 only says we
52 * must do silly window avoidance, it does not require that we use
53 * the suggested algorithm.
55 * The "rcvbuf" and "rmem_alloc" values are shifted by 1, because
56 * they also contain buffer handling overhead etc, so the window
57 * we actually use is essentially based on only half those values.
59 inttcp_new_window(struct sock * sk)
61 unsigned long window;
62 unsigned long minwin, maxwin;
64 /* Get minimum and maximum window values.. */
65 minwin = sk->mss;
66 if(!minwin)
67 minwin = sk->mtu;
68 maxwin = sk->window_clamp;
69 if(!maxwin)
70 maxwin = MAX_WINDOW;
71 if(minwin > maxwin/2)
72 minwin = maxwin/2;
74 /* Get current rcvbuf size.. */
75 window = sk->rcvbuf/2;
76 if(window < minwin) {
77 sk->rcvbuf = minwin*2;
78 window = minwin;
81 /* Check rcvbuf against used and minimum window */
82 window -= sk->rmem_alloc/2;
83 if((long)(window - minwin) <0)/* SWS avoidance */
84 window =0;
86 if(window >1023)
87 window &= ~1023;
88 if(window > maxwin)
89 window = maxwin;
90 return window;
94 * Get rid of any delayed acks, we sent one already..
96 static __inline__ voidclear_delayed_acks(struct sock * sk)
98 sk->ack_timed =0;
99 sk->ack_backlog =0;
100 sk->bytes_rcv =0;
101 del_timer(&sk->delack_timer);
105 * This is the main buffer sending routine. We queue the buffer
106 * having checked it is sane seeming.
109 voidtcp_send_skb(struct sock *sk,struct sk_buff *skb)
111 int size;
112 struct tcphdr * th = skb->h.th;
115 * length of packet (not counting length of pre-tcp headers)
118 size = skb->len - ((unsigned char*) th - skb->data);
121 * Sanity check it..
124 if(size <sizeof(struct tcphdr) || size > skb->len)
126 printk(KERN_ERR "tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
127 skb, skb->data, th, skb->len);
128 kfree_skb(skb, FREE_WRITE);
129 return;
133 * If we have queued a header size packet.. (these crash a few
134 * tcp stacks if ack is not set)
137 if(size ==sizeof(struct tcphdr))
139 /* If it's got a syn or fin it's notionally included in the size..*/
140 if(!th->syn && !th->fin)
142 printk(KERN_ERR "tcp_send_skb: attempt to queue a bogon.\n");
143 kfree_skb(skb,FREE_WRITE);
144 return;
149 * Actual processing.
152 tcp_statistics.TcpOutSegs++;
153 skb->seq =ntohl(th->seq);
154 skb->end_seq = skb->seq + size -4*th->doff;
157 * We must queue if
159 * a) The right edge of this frame exceeds the window
160 * b) We are retransmitting (Nagle's rule)
161 * c) We have too many packets 'in flight'
164 if(after(skb->end_seq, sk->window_seq) ||
165 (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) ||
166 sk->packets_out >= sk->cong_window)
168 /* checksum will be supplied by tcp_write_xmit. So
169 * we shouldn't need to set it at all. I'm being paranoid */
170 th->check =0;
171 if(skb->next != NULL)
173 printk(KERN_ERR "tcp_send_partial: next != NULL\n");
174 skb_unlink(skb);
176 skb_queue_tail(&sk->write_queue, skb);
178 if(before(sk->window_seq, sk->write_queue.next->end_seq) &&
179 sk->send_head == NULL && sk->ack_backlog ==0)
180 tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
182 else
185 * This is going straight out
187 clear_delayed_acks(sk);
188 th->ack_seq =htonl(sk->acked_seq);
189 th->window =htons(tcp_select_window(sk));
191 tcp_send_check(th, sk->saddr, sk->daddr, size, skb);
193 sk->sent_seq = sk->write_seq;
196 * This is mad. The tcp retransmit queue is put together
197 * by the ip layer. This causes half the problems with
198 * unroutable FIN's and other things.
201 sk->prot->queue_xmit(sk, skb->dev, skb,0);
204 * Set for next retransmit based on expected ACK time
205 * of the first packet in the resend queue.
206 * This is no longer a window behind.
209 tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
214 * Locking problems lead us to a messy situation where we can have
215 * multiple partially complete buffers queued up. This is really bad
216 * as we don't want to be sending partial buffers. Fix this with
217 * a semaphore or similar to lock tcp_write per socket.
219 * These routines are pretty self descriptive.
222 struct sk_buff *tcp_dequeue_partial(struct sock * sk)
224 struct sk_buff * skb;
225 unsigned long flags;
227 save_flags(flags);
228 cli();
229 skb = sk->partial;
230 if(skb) {
231 sk->partial = NULL;
232 del_timer(&sk->partial_timer);
234 restore_flags(flags);
235 return skb;
239 * Empty the partial queue
242 voidtcp_send_partial(struct sock *sk)
244 struct sk_buff *skb;
246 if(sk == NULL)
247 return;
248 while((skb =tcp_dequeue_partial(sk)) != NULL)
249 tcp_send_skb(sk, skb);
253 * Queue a partial frame
256 voidtcp_enqueue_partial(struct sk_buff * skb,struct sock * sk)
258 struct sk_buff * tmp;
259 unsigned long flags;
261 save_flags(flags);
262 cli();
263 tmp = sk->partial;
264 if(tmp)
265 del_timer(&sk->partial_timer);
266 sk->partial = skb;
267 init_timer(&sk->partial_timer);
269 * Wait up to 1 second for the buffer to fill.
271 sk->partial_timer.expires = jiffies+HZ;
272 sk->partial_timer.function = (void(*)(unsigned long)) tcp_send_partial;
273 sk->partial_timer.data = (unsigned long) sk;
274 add_timer(&sk->partial_timer);
275 restore_flags(flags);
276 if(tmp)
277 tcp_send_skb(sk, tmp);
281 * This routine takes stuff off of the write queue,
282 * and puts it in the xmit queue. This happens as incoming acks
283 * open up the remote window for us.
286 voidtcp_write_xmit(struct sock *sk)
288 struct sk_buff *skb;
291 * The bytes will have to remain here. In time closedown will
292 * empty the write queue and all will be happy
295 if(sk->zapped)
296 return;
299 * Anything on the transmit queue that fits the window can
300 * be added providing we are not
302 * a) retransmitting (Nagle's rule)
303 * b) exceeding our congestion window.
306 while((skb =skb_peek(&sk->write_queue)) != NULL &&
307 !after(skb->end_seq, sk->window_seq) &&
308 (sk->retransmits ==0||
309 sk->ip_xmit_timeout != TIME_WRITE ||
310 !after(skb->end_seq, sk->rcv_ack_seq))
311 && sk->packets_out < sk->cong_window)
313 IS_SKB(skb);
314 skb_unlink(skb);
317 * See if we really need to send the packet.
320 if(before(skb->end_seq, sk->rcv_ack_seq +1))
323 * This is acked data. We can discard it. This
324 * cannot currently occur.
327 sk->retransmits =0;
328 kfree_skb(skb, FREE_WRITE);
329 if(!sk->dead)
330 sk->write_space(sk);
332 else
334 struct tcphdr *th;
335 struct iphdr *iph;
336 int size;
338 * put in the ack seq and window at this point rather than earlier,
339 * in order to keep them monotonic. We really want to avoid taking
340 * back window allocations. That's legal, but RFC1122 says it's frowned on.
341 * Ack and window will in general have changed since this packet was put
342 * on the write queue.
344 iph = skb->ip_hdr;
345 th = (struct tcphdr *)(((char*)iph) +(iph->ihl <<2));
346 size = skb->len - (((unsigned char*) th) - skb->data);
347 #ifndef CONFIG_NO_PATH_MTU_DISCOVERY
348 if(size > sk->mtu -sizeof(struct iphdr))
350 iph->frag_off &= ~htons(IP_DF);
351 ip_send_check(iph);
353 #endif
355 th->ack_seq =htonl(sk->acked_seq);
356 th->window =htons(tcp_select_window(sk));
358 tcp_send_check(th, sk->saddr, sk->daddr, size, skb);
360 sk->sent_seq = skb->end_seq;
363 * IP manages our queue for some crazy reason
366 sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
368 clear_delayed_acks(sk);
370 tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
377 * A socket has timed out on its send queue and wants to do a
378 * little retransmitting. Currently this means TCP.
381 voidtcp_do_retransmit(struct sock *sk,int all)
383 struct sk_buff * skb;
384 struct proto *prot;
385 struct device *dev;
386 struct rtable *rt;
388 prot = sk->prot;
389 if(!all) {
391 * If we are just retransmitting one packet reset
392 * to the start of the queue.
394 sk->send_next = sk->send_head;
395 sk->packets_out =0;
397 skb = sk->send_head;
399 while(skb != NULL)
401 struct tcphdr *th;
402 struct iphdr *iph;
403 int size;
405 dev = skb->dev;
406 IS_SKB(skb);
407 skb->when = jiffies;
409 /* dl1bke 960201 - @%$$! Hope this cures strange race conditions */
410 /* with AX.25 mode VC. (esp. DAMA) */
411 /* if the buffer is locked we should not retransmit */
412 /* anyway, so we don't need all the fuss to prepare */
413 /* the buffer in this case. */
414 /* (the skb_pull() changes skb->data while we may */
415 /* actually try to send the data. Ouch. A side */
416 /* effect is that we'll send some unnecessary data, */
417 /* but the alternative is disastrous... */
419 if(skb_device_locked(skb))
420 break;
423 * Discard the surplus MAC header
426 skb_pull(skb,((unsigned char*)skb->ip_hdr)-skb->data);
429 * In general it's OK just to use the old packet. However we
430 * need to use the current ack and window fields. Urg and
431 * urg_ptr could possibly stand to be updated as well, but we
432 * don't keep the necessary data. That shouldn't be a problem,
433 * if the other end is doing the right thing. Since we're
434 * changing the packet, we have to issue a new IP identifier.
437 iph = (struct iphdr *)skb->data;
438 th = (struct tcphdr *)(((char*)iph) + (iph->ihl <<2));
439 size =ntohs(iph->tot_len) - (iph->ihl<<2);
442 * Note: We ought to check for window limits here but
443 * currently this is done (less efficiently) elsewhere.
447 * Put a MAC header back on (may cause ARPing)
451 /* ANK: UGLY, but the bug, that was here, should be fixed.
453 struct options * opt = (struct options*)skb->proto_priv;
454 rt =ip_check_route(&sk->ip_route_cache, opt->srr?opt->faddr:iph->daddr, skb->localroute);
457 iph->id =htons(ip_id_count++);
458 #ifndef CONFIG_NO_PATH_MTU_DISCOVERY
459 if(rt &&ntohs(iph->tot_len) > rt->rt_mtu)
460 iph->frag_off &= ~htons(IP_DF);
461 #endif
462 ip_send_check(iph);
464 if(rt==NULL)/* Deep poo */
466 if(skb->sk)
468 skb->sk->err_soft=ENETUNREACH;
469 skb->sk->error_report(skb->sk);
472 else
474 dev=rt->rt_dev;
475 skb->raddr=rt->rt_gateway;
476 skb->dev=dev;
477 skb->arp=1;
478 if(rt->rt_hh)
480 memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
481 if(!rt->rt_hh->hh_uptodate)
483 skb->arp =0;
484 #if RT_CACHE_DEBUG >= 2
485 printk("tcp_do_retransmit: hh miss %08x via %08x\n", iph->daddr, rt->rt_gateway);
486 #endif
489 else if(dev->hard_header)
491 if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, skb->len)<0)
492 skb->arp=0;
496 * This is not the right way to handle this. We have to
497 * issue an up to date window and ack report with this
498 * retransmit to keep the odd buggy tcp that relies on
499 * the fact BSD does this happy.
500 * We don't however need to recalculate the entire
501 * checksum, so someone wanting a small problem to play
502 * with might like to implement RFC1141/RFC1624 and speed
503 * this up by avoiding a full checksum.
506 th->ack_seq =htonl(sk->acked_seq);
507 clear_delayed_acks(sk);
508 th->window =ntohs(tcp_select_window(sk));
509 tcp_send_check(th, sk->saddr, sk->daddr, size, skb);
512 * If the interface is (still) up and running, kick it.
515 if(dev->flags & IFF_UP)
518 * If the packet is still being sent by the device/protocol
519 * below then don't retransmit. This is both needed, and good -
520 * especially with connected mode AX.25 where it stops resends
521 * occurring of an as yet unsent anyway frame!
522 * We still add up the counts as the round trip time wants
523 * adjusting.
525 if(sk && !skb_device_locked(skb))
527 /* Remove it from any existing driver queue first! */
528 skb_unlink(skb);
529 /* Now queue it */
530 ip_statistics.IpOutRequests++;
531 dev_queue_xmit(skb, dev, sk->priority);
532 sk->packets_out++;
538 * Count retransmissions
541 sk->prot->retransmits++;
542 tcp_statistics.TcpRetransSegs++;
545 * Record the high sequence number to help avoid doing
546 * to much fast retransmission.
548 if(sk->retransmits)
549 sk->high_seq = sk->sent_seq;
552 * Advance the send_next pointer so we don't keep
553 * retransmitting the same stuff every time we get an ACK.
555 sk->send_next = skb->link3;
558 * Only one retransmit requested.
561 if(!all)
562 break;
565 * This should cut it off before we send too many packets.
568 if(sk->packets_out >= sk->cong_window)
569 break;
571 skb = skb->link3;
576 * This routine will send an RST to the other tcp.
579 voidtcp_send_reset(unsigned long saddr,unsigned long daddr,struct tcphdr *th,
580 struct proto *prot,struct options *opt,struct device *dev,int tos,int ttl)
582 struct sk_buff *buff;
583 struct tcphdr *t1;
584 int tmp;
585 struct device *ndev=NULL;
588 * Cannot reset a reset (Think about it).
591 if(th->rst)
592 return;
595 * We need to grab some memory, and put together an RST,
596 * and then put it into the queue to be sent.
599 buff =sock_wmalloc(NULL, MAX_RESET_SIZE,1, GFP_ATOMIC);
600 if(buff == NULL)
601 return;
603 buff->sk = NULL;
604 buff->dev = dev;
605 buff->localroute =0;
606 buff->csum =0;
609 * Put in the IP header and routing stuff.
612 tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
613 sizeof(struct tcphdr),tos,ttl,NULL);
614 if(tmp <0)
616 buff->free =1;
617 sock_wfree(NULL, buff);
618 return;
621 t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
622 memset(t1,0,sizeof(*t1));
625 * Swap the send and the receive.
628 t1->dest = th->source;
629 t1->source = th->dest;
630 t1->doff =sizeof(*t1)/4;
631 t1->rst =1;
633 if(th->ack)
635 t1->seq = th->ack_seq;
637 else
639 t1->ack =1;
640 if(!th->syn)
641 t1->ack_seq = th->seq;
642 else
643 t1->ack_seq =htonl(ntohl(th->seq)+1);
646 tcp_send_check(t1, saddr, daddr,sizeof(*t1), buff);
647 prot->queue_xmit(NULL, ndev, buff,1);
648 tcp_statistics.TcpOutSegs++;
652 * Send a fin.
655 voidtcp_send_fin(struct sock *sk)
657 struct proto *prot =(struct proto *)sk->prot;
658 struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
659 struct tcphdr *t1;
660 struct sk_buff *buff;
661 struct device *dev=NULL;
662 int tmp;
664 buff =sock_wmalloc(sk, MAX_RESET_SIZE,1, GFP_KERNEL);
666 if(buff == NULL)
668 /* This is a disaster if it occurs */
669 printk(KERN_CRIT "tcp_send_fin: Impossible malloc failure");
670 return;
674 * Administrivia
677 buff->sk = sk;
678 buff->localroute = sk->localroute;
679 buff->csum =0;
682 * Put in the IP header and routing stuff.
685 tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
686 IPPROTO_TCP, sk->opt,
687 sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
688 if(tmp <0)
690 int t;
692 * Finish anyway, treat this as a send that got lost.
693 * (Not good).
696 buff->free =1;
697 sock_wfree(sk,buff);
698 sk->write_seq++;
699 t=del_timer(&sk->timer);
700 if(t)
701 add_timer(&sk->timer);
702 else
703 tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
704 return;
708 * We ought to check if the end of the queue is a buffer and
709 * if so simply add the fin to that buffer, not send it ahead.
712 t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
713 buff->dev = dev;
714 memcpy(t1, th,sizeof(*t1));
715 buff->seq = sk->write_seq;
716 sk->write_seq++;
717 buff->end_seq = sk->write_seq;
718 t1->seq =htonl(buff->seq);
719 t1->ack_seq =htonl(sk->acked_seq);
720 t1->window =htons(tcp_select_window(sk));
721 t1->fin =1;
722 tcp_send_check(t1, sk->saddr, sk->daddr,sizeof(*t1), buff);
725 * If there is data in the write queue, the fin must be appended to
726 * the write queue.
729 if(skb_peek(&sk->write_queue) != NULL)
731 buff->free =0;
732 if(buff->next != NULL)
734 printk(KERN_ERR "tcp_send_fin: next != NULL\n");
735 skb_unlink(buff);
737 skb_queue_tail(&sk->write_queue, buff);
739 else
741 sk->sent_seq = sk->write_seq;
742 sk->prot->queue_xmit(sk, dev, buff,0);
743 tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
748 voidtcp_send_synack(struct sock * newsk,struct sock * sk,struct sk_buff * skb)
750 struct tcphdr *t1;
751 unsigned char*ptr;
752 struct sk_buff * buff;
753 struct device *ndev=NULL;
754 int tmp;
756 buff =sock_wmalloc(newsk, MAX_SYN_SIZE,1, GFP_ATOMIC);
757 if(buff == NULL)
759 sk->err = ENOMEM;
760 destroy_sock(newsk);
761 kfree_skb(skb, FREE_READ);
762 tcp_statistics.TcpAttemptFails++;
763 return;
766 buff->sk = newsk;
767 buff->localroute = newsk->localroute;
770 * Put in the IP header and routing stuff.
773 tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
774 IPPROTO_TCP, newsk->opt, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&newsk->ip_route_cache);
777 * Something went wrong.
780 if(tmp <0)
782 sk->err = tmp;
783 buff->free =1;
784 kfree_skb(buff,FREE_WRITE);
785 destroy_sock(newsk);
786 skb->sk = sk;
787 kfree_skb(skb, FREE_READ);
788 tcp_statistics.TcpAttemptFails++;
789 return;
792 t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
794 memcpy(t1, skb->h.th,sizeof(*t1));
795 buff->seq = newsk->write_seq++;
796 buff->end_seq = newsk->write_seq;
798 * Swap the send and the receive.
800 t1->dest = skb->h.th->source;
801 t1->source = newsk->dummy_th.source;
802 t1->seq =ntohl(buff->seq);
803 newsk->sent_seq = newsk->write_seq;
804 t1->window =ntohs(tcp_select_window(newsk));
805 t1->syn =1;
806 t1->ack =1;
807 t1->urg =0;
808 t1->rst =0;
809 t1->psh =0;
810 t1->ack_seq =htonl(newsk->acked_seq);
811 t1->doff =sizeof(*t1)/4+1;
812 ptr =skb_put(buff,4);
813 ptr[0] =2;
814 ptr[1] =4;
815 ptr[2] = ((newsk->mtu) >>8) &0xff;
816 ptr[3] =(newsk->mtu) &0xff;
817 buff->csum =csum_partial(ptr,4,0);
818 tcp_send_check(t1, newsk->saddr, newsk->daddr,sizeof(*t1)+4, buff);
819 newsk->prot->queue_xmit(newsk, ndev, buff,0);
820 tcp_reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
821 skb->sk = newsk;
824 * Charge the sock_buff to newsk.
827 atomic_sub(skb->truesize, &sk->rmem_alloc);
828 atomic_add(skb->truesize, &newsk->rmem_alloc);
830 skb_queue_tail(&sk->receive_queue,skb);
831 sk->ack_backlog++;
832 tcp_statistics.TcpOutSegs++;
836 * Set up the timers for sending a delayed ack..
838 * rules for delaying an ack:
839 * - delay time <= 0.5 HZ
840 * - must send at least every 2 full sized packets
841 * - we don't have a window update to send
843 * additional thoughts:
844 * - we should not delay sending an ACK if we have ato > 0.5 HZ.
845 * My thinking about this is that in this case we will just be
846 * systematically skewing the RTT calculation. (The rule about
847 * sending every two full sized packets will never need to be
848 * invoked, the delayed ack will be sent before the ATO timeout
849 * every time. Of course, the relies on our having a good estimate
850 * for packet interarrival times.)
852 voidtcp_send_delayed_ack(struct sock * sk,int max_timeout,unsigned long timeout)
854 unsigned long now;
856 /* Calculate new timeout */
857 now = jiffies;
858 if(timeout > max_timeout || sk->bytes_rcv >= sk->max_unacked) {
859 timeout = now;
860 mark_bh(TIMER_BH);
861 }else{
862 timeout += now;
865 /* Use new timeout only if there wasn't a older one earlier */
866 if(!del_timer(&sk->delack_timer) || timeout < sk->delack_timer.expires)
867 sk->delack_timer.expires = timeout;
869 sk->ack_backlog++;
870 add_timer(&sk->delack_timer);
876 * This routine sends an ack and also updates the window.
879 voidtcp_send_ack(struct sock *sk)
881 struct sk_buff *buff;
882 struct tcphdr *t1;
883 struct device *dev = NULL;
884 int tmp;
886 if(sk->zapped)
887 return;/* We have been reset, we may not send again */
890 * If we have nothing queued for transmit and the transmit timer
891 * is on we are just doing an ACK timeout and need to switch
892 * to a keepalive.
895 clear_delayed_acks(sk);
897 if(sk->send_head == NULL
898 &&skb_queue_empty(&sk->write_queue)
899 && sk->ip_xmit_timeout == TIME_WRITE)
901 if(sk->keepopen)
902 tcp_reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
903 else
904 del_timer(&sk->retransmit_timer);
908 * We need to grab some memory, and put together an ack,
909 * and then put it into the queue to be sent.
912 buff =sock_wmalloc(sk, MAX_ACK_SIZE,1, GFP_ATOMIC);
913 if(buff == NULL)
916 * Force it to send an ack. We don't have to do this
917 * (ACK is unreliable) but it's much better use of
918 * bandwidth on slow links to send a spare ack than
919 * resend packets.
922 tcp_send_delayed_ack(sk, HZ/2, HZ/2);
923 return;
927 * Assemble a suitable TCP frame
930 buff->sk = sk;
931 buff->localroute = sk->localroute;
932 buff->csum =0;
935 * Put in the IP header and routing stuff.
938 tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
939 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
940 if(tmp <0)
942 buff->free =1;
943 sock_wfree(sk, buff);
944 return;
946 t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
949 * Fill in the packet and send it
952 memcpy(t1, &sk->dummy_th,sizeof(*t1));
953 t1->seq =htonl(sk->sent_seq);
954 t1->ack_seq =htonl(sk->acked_seq);
955 t1->window =htons(tcp_select_window(sk));
957 tcp_send_check(t1, sk->saddr, sk->daddr,sizeof(*t1), buff);
958 if(sk->debug)
959 printk(KERN_ERR "\rtcp_ack: seq %x ack %x\n", sk->sent_seq, sk->acked_seq);
960 sk->prot->queue_xmit(sk, dev, buff,1);
961 tcp_statistics.TcpOutSegs++;
965 * This routine sends a packet with an out of date sequence
966 * number. It assumes the other end will try to ack it.
969 voidtcp_write_wakeup(struct sock *sk)
971 struct sk_buff *buff,*skb;
972 struct tcphdr *t1;
973 struct device *dev=NULL;
974 int tmp;
976 if(sk->zapped)
977 return;/* After a valid reset we can send no more */
980 * Write data can still be transmitted/retransmitted in the
981 * following states. If any other state is encountered, return.
982 * [listen/close will never occur here anyway]
985 if(sk->state != TCP_ESTABLISHED &&
986 sk->state != TCP_CLOSE_WAIT &&
987 sk->state != TCP_FIN_WAIT1 &&
988 sk->state != TCP_LAST_ACK &&
989 sk->state != TCP_CLOSING
992 return;
994 if(before(sk->sent_seq, sk->window_seq) &&
995 (skb=skb_peek(&sk->write_queue)))
998 * We are probing the opening of a window
999 * but the window size is != 0
1000 * must have been a result SWS avoidance ( sender )
1003 struct iphdr *iph;
1004 struct tcphdr *th;
1005 struct tcphdr *nth;
1006 unsigned long win_size;
1007 #if 0
1008 unsigned long ow_size;
1009 #endif
1012 * How many bytes can we send ?
1015 win_size = sk->window_seq - sk->sent_seq;
1018 * Recover the buffer pointers
1021 iph = (struct iphdr *)skb->ip_hdr;
1022 th = (struct tcphdr *)(((char*)iph) +(iph->ihl <<2));
1025 * Grab the data for a temporary frame
1028 buff =sock_wmalloc(sk, win_size + th->doff *4+
1029 (iph->ihl <<2) +
1030 sk->prot->max_header +15,
1031 1, GFP_ATOMIC);
1032 if( buff == NULL )
1033 return;
1036 * If we strip the packet on the write queue we must
1037 * be ready to retransmit this one
1040 buff->free =/*0*/1;
1042 buff->sk = sk;
1043 buff->localroute = sk->localroute;
1046 * Put headers on the new packet
1049 tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1050 IPPROTO_TCP, sk->opt, buff->truesize,
1051 sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
1052 if(tmp <0)
1054 sock_wfree(sk, buff);
1055 return;
1059 * Move the TCP header over
1062 buff->dev = dev;
1064 nth = (struct tcphdr *)skb_put(buff,sizeof(*th));
1066 memcpy(nth, th,sizeof(*th));
1069 * Correct the new header
1072 nth->ack =1;
1073 nth->ack_seq =htonl(sk->acked_seq);
1074 nth->window =htons(tcp_select_window(sk));
1075 nth->check =0;
1078 * Copy TCP options and data start to our new buffer
1081 buff->csum =csum_partial_copy((void*)(th +1),skb_put(buff,win_size),
1082 win_size + th->doff*4-sizeof(*th),0);
1085 * Remember our right edge sequence number.
1088 buff->end_seq = sk->sent_seq + win_size;
1089 sk->sent_seq = buff->end_seq;/* Hack */
1090 if(th->urg &&ntohs(th->urg_ptr) < win_size)
1091 nth->urg =0;
1094 * Checksum the split buffer
1097 tcp_send_check(nth, sk->saddr, sk->daddr,
1098 nth->doff *4+ win_size , buff);
1100 else
1102 buff =sock_wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1103 if(buff == NULL)
1104 return;
1106 buff->free =1;
1107 buff->sk = sk;
1108 buff->localroute = sk->localroute;
1109 buff->csum =0;
1112 * Put in the IP header and routing stuff.
1115 tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1116 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
1117 if(tmp <0)
1119 sock_wfree(sk, buff);
1120 return;
1123 t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
1124 memcpy(t1,(void*) &sk->dummy_th,sizeof(*t1));
1127 * Use a previous sequence.
1128 * This should cause the other end to send an ack.
1131 t1->seq =htonl(sk->sent_seq-1);
1132 /* t1->fin = 0; -- We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
1133 t1->ack_seq =htonl(sk->acked_seq);
1134 t1->window =htons(tcp_select_window(sk));
1135 tcp_send_check(t1, sk->saddr, sk->daddr,sizeof(*t1), buff);
1140 * Send it.
1143 sk->prot->queue_xmit(sk, dev, buff,1);
1144 tcp_statistics.TcpOutSegs++;
1148 * A window probe timeout has occurred.
1151 voidtcp_send_probe0(struct sock *sk)
1153 if(sk->zapped)
1154 return;/* After a valid reset we can send no more */
1156 tcp_write_wakeup(sk);
1158 sk->backoff++;
1159 sk->rto =min(sk->rto <<1,120*HZ);
1160 sk->retransmits++;
1161 sk->prot->retransmits ++;
1162 tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
close