net/ipv4/tcp_output.c

Name: Public Git Hosting - davej-history.git/blob - net/ipv4/tcp_output.c
Rating: 4.6 (9904 reviews)
 1 /*
 2  * INET An implementation of the TCP/IP protocol suite for the LINUX
 3  * operating system. INET is implemented using the BSD Socket
 4  * interface as the means of communication with the user level.
 5  *
 6  * Implementation of the Transmission Control Protocol(TCP).
 7  *
 8  * Version: @(#)tcp_input.c 1.0.16 05/25/93
 9  *
 10  * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
 11  * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  * Mark Evans, <evansmp@uhura.aston.ac.uk>
 13  * Corey Minyard <wf-rch!minyard@relay.EU.net>
 14  * Florian La Roche, <flla@stud.uni-sb.de>
 15  * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
 16  * Linus Torvalds, <torvalds@cs.helsinki.fi>
 17  * Alan Cox, <gw4pts@gw4pts.ampr.org>
 18  * Matthew Dillon, <dillon@apollo.west.oic.com>
 19  * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 20  * Jorge Cwik, <jorge@laser.satlink.net>
 21  *
 22  * Fixes: Eric Schenk : avoid multiple retransmissions in one
 23  * : round trip timeout.
 24  */
 25
 26 #include <linux/config.h>
 27 #include <net/tcp.h>
 28
 29 #include <linux/interrupt.h>
 30
 31 /*
 32  * RFC 1122 says:
 33  *
 34  * "the suggested [SWS] avoidance algorithm for the receiver is to keep
 35  * RECV.NEXT + RCV.WIN fixed until:
 36  * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
 37  *
 38  * Experiments against BSD and Solaris machines show that following
 39  * these rules results in the BSD and Solaris machines making very
 40  * bad guesses about how much data they can have in flight.
 41  *
 42  * Instead we follow the BSD lead and offer a window that gives
 43  * the size of the current free space, truncated to a multiple
 44  * of 1024 bytes. If the window is smaller than
 45  * min(sk->mss, MAX_WINDOW/2)
 46  * then we advertise the window as having size 0, unless this
 47  * would shrink the window we offered last time.
 48  * This results in as much as double the throughput as the original
 49  * implementation.
 50  *
 51  * We do BSD style SWS avoidance -- note that RFC1122 only says we
 52  * must do silly window avoidance, it does not require that we use
 53  * the suggested algorithm.
 54  *
 55  * The "rcvbuf" and "rmem_alloc" values are shifted by 1, because
 56  * they also contain buffer handling overhead etc, so the window
 57  * we actually use is essentially based on only half those values.
 58  */
 59 inttcp_new_window(struct sock * sk)
 60 {
 61 unsigned long window;
 62 unsigned long minwin, maxwin;
 63
 64 /* Get minimum and maximum window values.. */
 65  minwin = sk->mss;
 66 if(!minwin)
 67  minwin = sk->mtu;
 68  maxwin = sk->window_clamp;
 69 if(!maxwin)
 70  maxwin = MAX_WINDOW;
 71 if(minwin > maxwin/2)
 72  minwin = maxwin/2;
 73
 74 /* Get current rcvbuf size.. */
 75  window = sk->rcvbuf/2;
 76 if(window < minwin) {
 77  sk->rcvbuf = minwin*2;
 78  window = minwin;
 79 }
 80
 81 /* Check rcvbuf against used and minimum window */
 82  window -= sk->rmem_alloc/2;
 83 if((long)(window - minwin) <0)/* SWS avoidance */
 84  window =0;
 85
 86 if(window >1023)
 87  window &= ~1023;
 88 if(window > maxwin)
 89  window = maxwin;
 90 return window;
 91 }
 92
 93 /*
 94  * Get rid of any delayed acks, we sent one already..
 95  */
 96 static __inline__ voidclear_delayed_acks(struct sock * sk)
 97 {
 98  sk->ack_timed =0;
 99  sk->ack_backlog =0;
 100  sk->bytes_rcv =0;
 101 del_timer(&sk->delack_timer);
 102 }
 103
 104 /*
 105  * This is the main buffer sending routine. We queue the buffer
 106  * having checked it is sane seeming.
 107  */
 108
 109 voidtcp_send_skb(struct sock *sk,struct sk_buff *skb)
 110 {
 111 int size;
 112 struct tcphdr * th = skb->h.th;
 113
 114 /*
 115  * length of packet (not counting length of pre-tcp headers)
 116  */
 117
 118  size = skb->len - ((unsigned char*) th - skb->data);
 119
 120 /*
 121  * Sanity check it..
 122  */
 123
 124 if(size <sizeof(struct tcphdr) || size > skb->len)
 125 {
 126 printk(KERN_ERR "tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 127  skb, skb->data, th, skb->len);
 128 kfree_skb(skb, FREE_WRITE);
 129 return;
 130 }
 131
 132 /*
 133  * If we have queued a header size packet.. (these crash a few
 134  * tcp stacks if ack is not set)
 135  */
 136
 137 if(size ==sizeof(struct tcphdr))
 138 {
 139 /* If it's got a syn or fin it's notionally included in the size..*/
 140 if(!th->syn && !th->fin)
 141 {
 142 printk(KERN_ERR "tcp_send_skb: attempt to queue a bogon.\n");
 143 kfree_skb(skb,FREE_WRITE);
 144 return;
 145 }
 146 }
 147
 148 /*
 149  * Actual processing.
 150  */
 151
 152  tcp_statistics.TcpOutSegs++;
 153  skb->seq =ntohl(th->seq);
 154  skb->end_seq = skb->seq + size -4*th->doff;
 155
 156 /*
 157  * We must queue if
 158  *
 159  * a) The right edge of this frame exceeds the window
 160  * b) We are retransmitting (Nagle's rule)
 161  * c) We have too many packets 'in flight'
 162  */
 163
 164 if(after(skb->end_seq, sk->window_seq) ||
 165 (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) ||
 166  sk->packets_out >= sk->cong_window)
 167 {
 168 /* checksum will be supplied by tcp_write_xmit. So
 169  * we shouldn't need to set it at all. I'm being paranoid */
 170  th->check =0;
 171 if(skb->next != NULL)
 172 {
 173 printk(KERN_ERR "tcp_send_partial: next != NULL\n");
 174 skb_unlink(skb);
 175 }
 176 skb_queue_tail(&sk->write_queue, skb);
 177
 178 if(before(sk->window_seq, sk->write_queue.next->end_seq) &&
 179  sk->send_head == NULL && sk->ack_backlog ==0)
 180 tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
 181 }
 182 else
 183 {
 184 /*
 185  * This is going straight out
 186  */
 187 clear_delayed_acks(sk);
 188  th->ack_seq =htonl(sk->acked_seq);
 189  th->window =htons(tcp_select_window(sk));
 190
 191 tcp_send_check(th, sk->saddr, sk->daddr, size, skb);
 192
 193  sk->sent_seq = sk->write_seq;
 194
 195 /*
 196  * This is mad. The tcp retransmit queue is put together
 197  * by the ip layer. This causes half the problems with
 198  * unroutable FIN's and other things.
 199  */
 200
 201  sk->prot->queue_xmit(sk, skb->dev, skb,0);
 202
 203 /*
 204  * Set for next retransmit based on expected ACK time
 205  * of the first packet in the resend queue.
 206  * This is no longer a window behind.
 207  */
 208
 209 tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
 210 }
 211 }
 212
 213 /*
 214  * Locking problems lead us to a messy situation where we can have
 215  * multiple partially complete buffers queued up. This is really bad
 216  * as we don't want to be sending partial buffers. Fix this with
 217  * a semaphore or similar to lock tcp_write per socket.
 218  *
 219  * These routines are pretty self descriptive.
 220  */
 221
 222 struct sk_buff *tcp_dequeue_partial(struct sock * sk)
 223 {
 224 struct sk_buff * skb;
 225 unsigned long flags;
 226
 227 save_flags(flags);
 228 cli();
 229  skb = sk->partial;
 230 if(skb) {
 231  sk->partial = NULL;
 232 del_timer(&sk->partial_timer);
 233 }
 234 restore_flags(flags);
 235 return skb;
 236 }
 237
 238 /*
 239  * Empty the partial queue
 240  */
 241
 242 voidtcp_send_partial(struct sock *sk)
 243 {
 244 struct sk_buff *skb;
 245
 246 if(sk == NULL)
 247 return;
 248 while((skb =tcp_dequeue_partial(sk)) != NULL)
 249 tcp_send_skb(sk, skb);
 250 }
 251
 252 /*
 253  * Queue a partial frame
 254  */
 255
 256 voidtcp_enqueue_partial(struct sk_buff * skb,struct sock * sk)
 257 {
 258 struct sk_buff * tmp;
 259 unsigned long flags;
 260
 261 save_flags(flags);
 262 cli();
 263  tmp = sk->partial;
 264 if(tmp)
 265 del_timer(&sk->partial_timer);
 266  sk->partial = skb;
 267 init_timer(&sk->partial_timer);
 268 /*
 269  * Wait up to 1 second for the buffer to fill.
 270  */
 271  sk->partial_timer.expires = jiffies+HZ;
 272  sk->partial_timer.function = (void(*)(unsigned long)) tcp_send_partial;
 273  sk->partial_timer.data = (unsigned long) sk;
 274 add_timer(&sk->partial_timer);
 275 restore_flags(flags);
 276 if(tmp)
 277 tcp_send_skb(sk, tmp);
 278 }
 279
 280 /*
 281  * This routine takes stuff off of the write queue,
 282  * and puts it in the xmit queue. This happens as incoming acks
 283  * open up the remote window for us.
 284  */
 285
 286 voidtcp_write_xmit(struct sock *sk)
 287 {
 288 struct sk_buff *skb;
 289
 290 /*
 291  * The bytes will have to remain here. In time closedown will
 292  * empty the write queue and all will be happy
 293  */
 294
 295 if(sk->zapped)
 296 return;
 297
 298 /*
 299  * Anything on the transmit queue that fits the window can
 300  * be added providing we are not
 301  *
 302  * a) retransmitting (Nagle's rule)
 303  * b) exceeding our congestion window.
 304  */
 305
 306 while((skb =skb_peek(&sk->write_queue)) != NULL &&
 307 !after(skb->end_seq, sk->window_seq) &&
 308 (sk->retransmits ==0||
 309  sk->ip_xmit_timeout != TIME_WRITE ||
 310 !after(skb->end_seq, sk->rcv_ack_seq))
 311 && sk->packets_out < sk->cong_window)
 312 {
 313 IS_SKB(skb);
 314 skb_unlink(skb);
 315
 316 /*
 317  * See if we really need to send the packet.
 318  */
 319
 320 if(before(skb->end_seq, sk->rcv_ack_seq +1))
 321 {
 322 /*
 323  * This is acked data. We can discard it. This
 324  * cannot currently occur.
 325  */
 326
 327  sk->retransmits =0;
 328 kfree_skb(skb, FREE_WRITE);
 329 if(!sk->dead)
 330  sk->write_space(sk);
 331 }
 332 else
 333 {
 334 struct tcphdr *th;
 335 struct iphdr *iph;
 336 int size;
 337 /*
 338  * put in the ack seq and window at this point rather than earlier,
 339  * in order to keep them monotonic. We really want to avoid taking
 340  * back window allocations. That's legal, but RFC1122 says it's frowned on.
 341  * Ack and window will in general have changed since this packet was put
 342  * on the write queue.
 343  */
 344  iph = skb->ip_hdr;
 345  th = (struct tcphdr *)(((char*)iph) +(iph->ihl <<2));
 346  size = skb->len - (((unsigned char*) th) - skb->data);
 347 #ifndef CONFIG_NO_PATH_MTU_DISCOVERY
 348 if(size > sk->mtu -sizeof(struct iphdr))
 349 {
 350  iph->frag_off &= ~htons(IP_DF);
 351 ip_send_check(iph);
 352 }
 353 #endif
 354
 355  th->ack_seq =htonl(sk->acked_seq);
 356  th->window =htons(tcp_select_window(sk));
 357
 358 tcp_send_check(th, sk->saddr, sk->daddr, size, skb);
 359
 360  sk->sent_seq = skb->end_seq;
 361
 362 /*
 363  * IP manages our queue for some crazy reason
 364  */
 365
 366  sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
 367
 368 clear_delayed_acks(sk);
 369
 370 tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
 371 }
 372 }
 373 }
 374
 375
 376 /*
 377  * A socket has timed out on its send queue and wants to do a
 378  * little retransmitting. Currently this means TCP.
 379  */
 380
 381 voidtcp_do_retransmit(struct sock *sk,int all)
 382 {
 383 struct sk_buff * skb;
 384 struct proto *prot;
 385 struct device *dev;
 386 struct rtable *rt;
 387
 388  prot = sk->prot;
 389 if(!all) {
 390 /*
 391  * If we are just retransmitting one packet reset
 392  * to the start of the queue.
 393  */
 394  sk->send_next = sk->send_head;
 395  sk->packets_out =0;
 396 }
 397  skb = sk->send_head;
 398
 399 while(skb != NULL)
 400 {
 401 struct tcphdr *th;
 402 struct iphdr *iph;
 403 int size;
 404
 405  dev = skb->dev;
 406 IS_SKB(skb);
 407  skb->when = jiffies;
 408
 409 /* dl1bke 960201 - @%$$! Hope this cures strange race conditions */
 410 /* with AX.25 mode VC. (esp. DAMA) */
 411 /* if the buffer is locked we should not retransmit */
 412 /* anyway, so we don't need all the fuss to prepare */
 413 /* the buffer in this case. */
 414 /* (the skb_pull() changes skb->data while we may */
 415 /* actually try to send the data. Ouch. A side */
 416 /* effect is that we'll send some unnecessary data, */
 417 /* but the alternative is disastrous... */
 418
 419 if(skb_device_locked(skb))
 420 break;
 421
 422 /*
 423  * Discard the surplus MAC header
 424  */
 425
 426 skb_pull(skb,((unsigned char*)skb->ip_hdr)-skb->data);
 427
 428 /*
 429  * In general it's OK just to use the old packet. However we
 430  * need to use the current ack and window fields. Urg and
 431  * urg_ptr could possibly stand to be updated as well, but we
 432  * don't keep the necessary data. That shouldn't be a problem,
 433  * if the other end is doing the right thing. Since we're
 434  * changing the packet, we have to issue a new IP identifier.
 435  */
 436
 437  iph = (struct iphdr *)skb->data;
 438  th = (struct tcphdr *)(((char*)iph) + (iph->ihl <<2));
 439  size =ntohs(iph->tot_len) - (iph->ihl<<2);
 440
 441 /*
 442  * Note: We ought to check for window limits here but
 443  * currently this is done (less efficiently) elsewhere.
 444  */
 445
 446 /*
 447  * Put a MAC header back on (may cause ARPing)
 448  */
 449
 450 {
 451 /* ANK: UGLY, but the bug, that was here, should be fixed.
 452  */
 453 struct options * opt = (struct options*)skb->proto_priv;
 454  rt =ip_check_route(&sk->ip_route_cache, opt->srr?opt->faddr:iph->daddr, skb->localroute);
 455 }
 456
 457  iph->id =htons(ip_id_count++);
 458 #ifndef CONFIG_NO_PATH_MTU_DISCOVERY
 459 if(rt &&ntohs(iph->tot_len) > rt->rt_mtu)
 460  iph->frag_off &= ~htons(IP_DF);
 461 #endif
 462 ip_send_check(iph);
 463
 464 if(rt==NULL)/* Deep poo */
 465 {
 466 if(skb->sk)
 467 {
 468  skb->sk->err_soft=ENETUNREACH;
 469  skb->sk->error_report(skb->sk);
 470 }
 471 }
 472 else
 473 {
 474  dev=rt->rt_dev;
 475  skb->raddr=rt->rt_gateway;
 476  skb->dev=dev;
 477  skb->arp=1;
 478 if(rt->rt_hh)
 479 {
 480 memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
 481 if(!rt->rt_hh->hh_uptodate)
 482 {
 483  skb->arp =0;
 484 #if RT_CACHE_DEBUG >= 2
 485 printk("tcp_do_retransmit: hh miss %08x via %08x\n", iph->daddr, rt->rt_gateway);
 486 #endif
 487 }
 488 }
 489 else if(dev->hard_header)
 490 {
 491 if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, skb->len)<0)
 492  skb->arp=0;
 493 }
 494
 495 /*
 496  * This is not the right way to handle this. We have to
 497  * issue an up to date window and ack report with this
 498  * retransmit to keep the odd buggy tcp that relies on
 499  * the fact BSD does this happy.
 500  * We don't however need to recalculate the entire
 501  * checksum, so someone wanting a small problem to play
 502  * with might like to implement RFC1141/RFC1624 and speed
 503  * this up by avoiding a full checksum.
 504  */
 505
 506  th->ack_seq =htonl(sk->acked_seq);
 507 clear_delayed_acks(sk);
 508  th->window =ntohs(tcp_select_window(sk));
 509 tcp_send_check(th, sk->saddr, sk->daddr, size, skb);
 510
 511 /*
 512  * If the interface is (still) up and running, kick it.
 513  */
 514
 515 if(dev->flags & IFF_UP)
 516 {
 517 /*
 518  * If the packet is still being sent by the device/protocol
 519  * below then don't retransmit. This is both needed, and good -
 520  * especially with connected mode AX.25 where it stops resends
 521  * occurring of an as yet unsent anyway frame!
 522  * We still add up the counts as the round trip time wants
 523  * adjusting.
 524  */
 525 if(sk && !skb_device_locked(skb))
 526 {
 527 /* Remove it from any existing driver queue first! */
 528 skb_unlink(skb);
 529 /* Now queue it */
 530  ip_statistics.IpOutRequests++;
 531 dev_queue_xmit(skb, dev, sk->priority);
 532  sk->packets_out++;
 533 }
 534 }
 535 }
 536
 537 /*
 538  * Count retransmissions
 539  */
 540
 541  sk->prot->retransmits++;
 542  tcp_statistics.TcpRetransSegs++;
 543
 544 /*
 545  * Record the high sequence number to help avoid doing
 546  * to much fast retransmission.
 547  */
 548 if(sk->retransmits)
 549  sk->high_seq = sk->sent_seq;
 550
 551 /*
 552  * Advance the send_next pointer so we don't keep
 553  * retransmitting the same stuff every time we get an ACK.
 554  */
 555  sk->send_next = skb->link3;
 556
 557 /*
 558  * Only one retransmit requested.
 559  */
 560
 561 if(!all)
 562 break;
 563
 564 /*
 565  * This should cut it off before we send too many packets.
 566  */
 567
 568 if(sk->packets_out >= sk->cong_window)
 569 break;
 570
 571  skb = skb->link3;
 572 }
 573 }
 574
 575 /*
 576  * This routine will send an RST to the other tcp.
 577  */
 578
 579 voidtcp_send_reset(unsigned long saddr,unsigned long daddr,struct tcphdr *th,
 580 struct proto *prot,struct options *opt,struct device *dev,int tos,int ttl)
 581 {
 582 struct sk_buff *buff;
 583 struct tcphdr *t1;
 584 int tmp;
 585 struct device *ndev=NULL;
 586
 587 /*
 588  * Cannot reset a reset (Think about it).
 589  */
 590
 591 if(th->rst)
 592 return;
 593
 594 /*
 595  * We need to grab some memory, and put together an RST,
 596  * and then put it into the queue to be sent.
 597  */
 598
 599  buff =sock_wmalloc(NULL, MAX_RESET_SIZE,1, GFP_ATOMIC);
 600 if(buff == NULL)
 601 return;
 602
 603  buff->sk = NULL;
 604  buff->dev = dev;
 605  buff->localroute =0;
 606  buff->csum =0;
 607
 608 /*
 609  * Put in the IP header and routing stuff.
 610  */
 611
 612  tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
 613 sizeof(struct tcphdr),tos,ttl,NULL);
 614 if(tmp <0)
 615 {
 616  buff->free =1;
 617 sock_wfree(NULL, buff);
 618 return;
 619 }
 620
 621  t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
 622 memset(t1,0,sizeof(*t1));
 623
 624 /*
 625  * Swap the send and the receive.
 626  */
 627
 628  t1->dest = th->source;
 629  t1->source = th->dest;
 630  t1->doff =sizeof(*t1)/4;
 631  t1->rst =1;
 632
 633 if(th->ack)
 634 {
 635  t1->seq = th->ack_seq;
 636 }
 637 else
 638 {
 639  t1->ack =1;
 640 if(!th->syn)
 641  t1->ack_seq = th->seq;
 642 else
 643  t1->ack_seq =htonl(ntohl(th->seq)+1);
 644 }
 645
 646 tcp_send_check(t1, saddr, daddr,sizeof(*t1), buff);
 647  prot->queue_xmit(NULL, ndev, buff,1);
 648  tcp_statistics.TcpOutSegs++;
 649 }
 650
 651 /*
 652  * Send a fin.
 653  */
 654
 655 voidtcp_send_fin(struct sock *sk)
 656 {
 657 struct proto *prot =(struct proto *)sk->prot;
 658 struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
 659 struct tcphdr *t1;
 660 struct sk_buff *buff;
 661 struct device *dev=NULL;
 662 int tmp;
 663
 664  buff =sock_wmalloc(sk, MAX_RESET_SIZE,1, GFP_KERNEL);
 665
 666 if(buff == NULL)
 667 {
 668 /* This is a disaster if it occurs */
 669 printk(KERN_CRIT "tcp_send_fin: Impossible malloc failure");
 670 return;
 671 }
 672
 673 /*
 674  * Administrivia
 675  */
 676
 677  buff->sk = sk;
 678  buff->localroute = sk->localroute;
 679  buff->csum =0;
 680
 681 /*
 682  * Put in the IP header and routing stuff.
 683  */
 684
 685  tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
 686  IPPROTO_TCP, sk->opt,
 687 sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
 688 if(tmp <0)
 689 {
 690 int t;
 691 /*
 692  * Finish anyway, treat this as a send that got lost.
 693  * (Not good).
 694  */
 695
 696  buff->free =1;
 697 sock_wfree(sk,buff);
 698  sk->write_seq++;
 699  t=del_timer(&sk->timer);
 700 if(t)
 701 add_timer(&sk->timer);
 702 else
 703 tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 704 return;
 705 }
 706
 707 /*
 708  * We ought to check if the end of the queue is a buffer and
 709  * if so simply add the fin to that buffer, not send it ahead.
 710  */
 711
 712  t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
 713  buff->dev = dev;
 714 memcpy(t1, th,sizeof(*t1));
 715  buff->seq = sk->write_seq;
 716  sk->write_seq++;
 717  buff->end_seq = sk->write_seq;
 718  t1->seq =htonl(buff->seq);
 719  t1->ack_seq =htonl(sk->acked_seq);
 720  t1->window =htons(tcp_select_window(sk));
 721  t1->fin =1;
 722 tcp_send_check(t1, sk->saddr, sk->daddr,sizeof(*t1), buff);
 723
 724 /*
 725  * If there is data in the write queue, the fin must be appended to
 726  * the write queue.
 727  */
 728
 729 if(skb_peek(&sk->write_queue) != NULL)
 730 {
 731  buff->free =0;
 732 if(buff->next != NULL)
 733 {
 734 printk(KERN_ERR "tcp_send_fin: next != NULL\n");
 735 skb_unlink(buff);
 736 }
 737 skb_queue_tail(&sk->write_queue, buff);
 738 }
 739 else
 740 {
 741  sk->sent_seq = sk->write_seq;
 742  sk->prot->queue_xmit(sk, dev, buff,0);
 743 tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
 744 }
 745 }
 746
 747
 748 voidtcp_send_synack(struct sock * newsk,struct sock * sk,struct sk_buff * skb)
 749 {
 750 struct tcphdr *t1;
 751 unsigned char*ptr;
 752 struct sk_buff * buff;
 753 struct device *ndev=NULL;
 754 int tmp;
 755
 756  buff =sock_wmalloc(newsk, MAX_SYN_SIZE,1, GFP_ATOMIC);
 757 if(buff == NULL)
 758 {
 759  sk->err = ENOMEM;
 760 destroy_sock(newsk);
 761 kfree_skb(skb, FREE_READ);
 762  tcp_statistics.TcpAttemptFails++;
 763 return;
 764 }
 765
 766  buff->sk = newsk;
 767  buff->localroute = newsk->localroute;
 768
 769 /*
 770  * Put in the IP header and routing stuff.
 771  */
 772
 773  tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
 774  IPPROTO_TCP, newsk->opt, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&newsk->ip_route_cache);
 775
 776 /*
 777  * Something went wrong.
 778  */
 779
 780 if(tmp <0)
 781 {
 782  sk->err = tmp;
 783  buff->free =1;
 784 kfree_skb(buff,FREE_WRITE);
 785 destroy_sock(newsk);
 786  skb->sk = sk;
 787 kfree_skb(skb, FREE_READ);
 788  tcp_statistics.TcpAttemptFails++;
 789 return;
 790 }
 791
 792  t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
 793
 794 memcpy(t1, skb->h.th,sizeof(*t1));
 795  buff->seq = newsk->write_seq++;
 796  buff->end_seq = newsk->write_seq;
 797 /*
 798  * Swap the send and the receive.
 799  */
 800  t1->dest = skb->h.th->source;
 801  t1->source = newsk->dummy_th.source;
 802  t1->seq =ntohl(buff->seq);
 803  newsk->sent_seq = newsk->write_seq;
 804  t1->window =ntohs(tcp_select_window(newsk));
 805  t1->syn =1;
 806  t1->ack =1;
 807  t1->urg =0;
 808  t1->rst =0;
 809  t1->psh =0;
 810  t1->ack_seq =htonl(newsk->acked_seq);
 811  t1->doff =sizeof(*t1)/4+1;
 812  ptr =skb_put(buff,4);
 813  ptr[0] =2;
 814  ptr[1] =4;
 815  ptr[2] = ((newsk->mtu) >>8) &0xff;
 816  ptr[3] =(newsk->mtu) &0xff;
 817  buff->csum =csum_partial(ptr,4,0);
 818 tcp_send_check(t1, newsk->saddr, newsk->daddr,sizeof(*t1)+4, buff);
 819  newsk->prot->queue_xmit(newsk, ndev, buff,0);
 820 tcp_reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
 821  skb->sk = newsk;
 822
 823 /*
 824  * Charge the sock_buff to newsk.
 825  */
 826
 827 atomic_sub(skb->truesize, &sk->rmem_alloc);
 828 atomic_add(skb->truesize, &newsk->rmem_alloc);
 829
 830 skb_queue_tail(&sk->receive_queue,skb);
 831  sk->ack_backlog++;
 832  tcp_statistics.TcpOutSegs++;
 833 }
 834
 835 /*
 836  * Set up the timers for sending a delayed ack..
 837  *
 838  * rules for delaying an ack:
 839  * - delay time <= 0.5 HZ
 840  * - must send at least every 2 full sized packets
 841  * - we don't have a window update to send
 842  *
 843  * additional thoughts:
 844  * - we should not delay sending an ACK if we have ato > 0.5 HZ.
 845  * My thinking about this is that in this case we will just be
 846  * systematically skewing the RTT calculation. (The rule about
 847  * sending every two full sized packets will never need to be
 848  * invoked, the delayed ack will be sent before the ATO timeout
 849  * every time. Of course, the relies on our having a good estimate
 850  * for packet interarrival times.)
 851  */
 852 voidtcp_send_delayed_ack(struct sock * sk,int max_timeout,unsigned long timeout)
 853 {
 854 unsigned long now;
 855
 856 /* Calculate new timeout */
 857  now = jiffies;
 858 if(timeout > max_timeout || sk->bytes_rcv >= sk->max_unacked) {
 859  timeout = now;
 860 mark_bh(TIMER_BH);
 861 }else{
 862  timeout += now;
 863 }
 864
 865 /* Use new timeout only if there wasn't a older one earlier */
 866 if(!del_timer(&sk->delack_timer) || timeout < sk->delack_timer.expires)
 867  sk->delack_timer.expires = timeout;
 868
 869  sk->ack_backlog++;
 870 add_timer(&sk->delack_timer);
 871 }
 872
 873
 874
 875 /*
 876  * This routine sends an ack and also updates the window.
 877  */
 878
 879 voidtcp_send_ack(struct sock *sk)
 880 {
 881 struct sk_buff *buff;
 882 struct tcphdr *t1;
 883 struct device *dev = NULL;
 884 int tmp;
 885
 886 if(sk->zapped)
 887 return;/* We have been reset, we may not send again */
 888
 889 /*
 890  * If we have nothing queued for transmit and the transmit timer
 891  * is on we are just doing an ACK timeout and need to switch
 892  * to a keepalive.
 893  */
 894
 895 clear_delayed_acks(sk);
 896
 897 if(sk->send_head == NULL
 898 &&skb_queue_empty(&sk->write_queue)
 899 && sk->ip_xmit_timeout == TIME_WRITE)
 900 {
 901 if(sk->keepopen)
 902 tcp_reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 903 else
 904 del_timer(&sk->retransmit_timer);
 905 }
 906
 907 /*
 908  * We need to grab some memory, and put together an ack,
 909  * and then put it into the queue to be sent.
 910  */
 911
 912  buff =sock_wmalloc(sk, MAX_ACK_SIZE,1, GFP_ATOMIC);
 913 if(buff == NULL)
 914 {
 915 /*
 916  * Force it to send an ack. We don't have to do this
 917  * (ACK is unreliable) but it's much better use of
 918  * bandwidth on slow links to send a spare ack than
 919  * resend packets.
 920  */
 921
 922 tcp_send_delayed_ack(sk, HZ/2, HZ/2);
 923 return;
 924 }
 925
 926 /*
 927  * Assemble a suitable TCP frame
 928  */
 929
 930  buff->sk = sk;
 931  buff->localroute = sk->localroute;
 932  buff->csum =0;
 933
 934 /*
 935  * Put in the IP header and routing stuff.
 936  */
 937
 938  tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
 939  IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
 940 if(tmp <0)
 941 {
 942  buff->free =1;
 943 sock_wfree(sk, buff);
 944 return;
 945 }
 946  t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
 947
 948 /*
 949  * Fill in the packet and send it
 950  */
 951
 952 memcpy(t1, &sk->dummy_th,sizeof(*t1));
 953  t1->seq =htonl(sk->sent_seq);
 954  t1->ack_seq =htonl(sk->acked_seq);
 955  t1->window =htons(tcp_select_window(sk));
 956
 957 tcp_send_check(t1, sk->saddr, sk->daddr,sizeof(*t1), buff);
 958 if(sk->debug)
 959 printk(KERN_ERR "\rtcp_ack: seq %x ack %x\n", sk->sent_seq, sk->acked_seq);
 960  sk->prot->queue_xmit(sk, dev, buff,1);
 961  tcp_statistics.TcpOutSegs++;
 962 }
 963
 964 /*
 965  * This routine sends a packet with an out of date sequence
 966  * number. It assumes the other end will try to ack it.
 967  */
 968
 969 voidtcp_write_wakeup(struct sock *sk)
 970 {
 971 struct sk_buff *buff,*skb;
 972 struct tcphdr *t1;
 973 struct device *dev=NULL;
 974 int tmp;
 975
 976 if(sk->zapped)
 977 return;/* After a valid reset we can send no more */
 978
 979 /*
 980  * Write data can still be transmitted/retransmitted in the
 981  * following states. If any other state is encountered, return.
 982  * [listen/close will never occur here anyway]
 983  */
 984
 985 if(sk->state != TCP_ESTABLISHED &&
 986  sk->state != TCP_CLOSE_WAIT &&
 987  sk->state != TCP_FIN_WAIT1 &&
 988  sk->state != TCP_LAST_ACK &&
 989  sk->state != TCP_CLOSING
 990 )
 991 {
 992 return;
 993 }
 994 if(before(sk->sent_seq, sk->window_seq) &&
 995 (skb=skb_peek(&sk->write_queue)))
 996 {
 997 /*
 998  * We are probing the opening of a window
 999  * but the window size is != 0
1000  * must have been a result SWS avoidance ( sender )
1001  */
1002
1003 struct iphdr *iph;
1004 struct tcphdr *th;
1005 struct tcphdr *nth;
1006 unsigned long win_size;
1007 #if 0
1008 unsigned long ow_size;
1009 #endif
1010
1011 /*
1012  * How many bytes can we send ?
1013  */
1014
1015  win_size = sk->window_seq - sk->sent_seq;
1016
1017 /*
1018  * Recover the buffer pointers
1019  */
1020
1021  iph = (struct iphdr *)skb->ip_hdr;
1022  th = (struct tcphdr *)(((char*)iph) +(iph->ihl <<2));
1023
1024 /*
1025  * Grab the data for a temporary frame
1026  */
1027
1028  buff =sock_wmalloc(sk, win_size + th->doff *4+
1029 (iph->ihl <<2) +
1030  sk->prot->max_header +15,
1031 1, GFP_ATOMIC);
1032 if( buff == NULL )
1033 return;
1034
1035 /*
1036  * If we strip the packet on the write queue we must
1037  * be ready to retransmit this one
1038  */
1039
1040  buff->free =/*0*/1;
1041
1042  buff->sk = sk;
1043  buff->localroute = sk->localroute;
1044
1045 /*
1046  * Put headers on the new packet
1047  */
1048
1049  tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1050  IPPROTO_TCP, sk->opt, buff->truesize,
1051  sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
1052 if(tmp <0)
1053 {
1054 sock_wfree(sk, buff);
1055 return;
1056 }
1057
1058 /*
1059  * Move the TCP header over
1060  */
1061
1062  buff->dev = dev;
1063
1064  nth = (struct tcphdr *)skb_put(buff,sizeof(*th));
1065
1066 memcpy(nth, th,sizeof(*th));
1067
1068 /*
1069  * Correct the new header
1070  */
1071
1072  nth->ack =1;
1073  nth->ack_seq =htonl(sk->acked_seq);
1074  nth->window =htons(tcp_select_window(sk));
1075  nth->check =0;
1076
1077 /*
1078  * Copy TCP options and data start to our new buffer
1079  */
1080
1081  buff->csum =csum_partial_copy((void*)(th +1),skb_put(buff,win_size),
1082  win_size + th->doff*4-sizeof(*th),0);
1083
1084 /*
1085  * Remember our right edge sequence number.
1086  */
1087
1088  buff->end_seq = sk->sent_seq + win_size;
1089  sk->sent_seq = buff->end_seq;/* Hack */
1090 if(th->urg &&ntohs(th->urg_ptr) < win_size)
1091  nth->urg =0;
1092
1093 /*
1094  * Checksum the split buffer
1095  */
1096
1097 tcp_send_check(nth, sk->saddr, sk->daddr,
1098  nth->doff *4+ win_size , buff);
1099 }
1100 else
1101 {
1102  buff =sock_wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1103 if(buff == NULL)
1104 return;
1105
1106  buff->free =1;
1107  buff->sk = sk;
1108  buff->localroute = sk->localroute;
1109  buff->csum =0;
1110
1111 /*
1112  * Put in the IP header and routing stuff.
1113  */
1114
1115  tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1116  IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
1117 if(tmp <0)
1118 {
1119 sock_wfree(sk, buff);
1120 return;
1121 }
1122
1123  t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
1124 memcpy(t1,(void*) &sk->dummy_th,sizeof(*t1));
1125
1126 /*
1127  * Use a previous sequence.
1128  * This should cause the other end to send an ack.
1129  */
1130
1131  t1->seq =htonl(sk->sent_seq-1);
1132 /* t1->fin = 0; -- We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
1133  t1->ack_seq =htonl(sk->acked_seq);
1134  t1->window =htons(tcp_select_window(sk));
1135 tcp_send_check(t1, sk->saddr, sk->daddr,sizeof(*t1), buff);
1136
1137 }
1138
1139 /*
1140  * Send it.
1141  */
1142
1143  sk->prot->queue_xmit(sk, dev, buff,1);
1144  tcp_statistics.TcpOutSegs++;
1145 }
1146
1147 /*
1148  * A window probe timeout has occurred.
1149  */
1150
1151 voidtcp_send_probe0(struct sock *sk)
1152 {
1153 if(sk->zapped)
1154 return;/* After a valid reset we can send no more */
1155
1156 tcp_write_wakeup(sk);
1157
1158  sk->backoff++;
1159  sk->rto =min(sk->rto <<1,120*HZ);
1160  sk->retransmits++;
1161  sk->prot->retransmits ++;
1162 tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
1163 }