net/ipv4/tcp_output.c

Name: Public Git Hosting - davej-history.git/blob - net/ipv4/tcp_output.c
Rating: 4.9 (8391 reviews)
 1 /*
 2  * INET An implementation of the TCP/IP protocol suite for the LINUX
 3  * operating system. INET is implemented using the BSD Socket
 4  * interface as the means of communication with the user level.
 5  *
 6  * Implementation of the Transmission Control Protocol(TCP).
 7  *
 8  * Version: $Id: tcp_output.c,v 1.42 1997/04/22 01:06:33 davem Exp $
 9  *
 10  * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
 11  * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  * Mark Evans, <evansmp@uhura.aston.ac.uk>
 13  * Corey Minyard <wf-rch!minyard@relay.EU.net>
 14  * Florian La Roche, <flla@stud.uni-sb.de>
 15  * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
 16  * Linus Torvalds, <torvalds@cs.helsinki.fi>
 17  * Alan Cox, <gw4pts@gw4pts.ampr.org>
 18  * Matthew Dillon, <dillon@apollo.west.oic.com>
 19  * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 20  * Jorge Cwik, <jorge@laser.satlink.net>
 21  */
 22
 23 /*
 24  * Changes: Pedro Roque : Retransmit queue handled by TCP.
 25  * : Fragmentation on mtu decrease
 26  * : Segment collapse on retransmit
 27  * : AF independence
 28  *
 29  * Linus Torvalds : send_delayed_ack
 30  * David S. Miller : Charge memory using the right skb
 31  * during syn/ack processing.
 32  *
 33  */
 34
 35 #include <net/tcp.h>
 36
 37 externint sysctl_tcp_sack;
 38 externint sysctl_tcp_tsack;
 39 externint sysctl_tcp_timestamps;
 40 externint sysctl_tcp_window_scaling;
 41
 42 /* Get rid of any delayed acks, we sent one already.. */
 43 static __inline__ voidclear_delayed_acks(struct sock * sk)
 44 {
 45 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 46
 47  tp->delayed_acks =0;
 48  sk->ack_backlog =0;
 49 tcp_clear_xmit_timer(sk, TIME_DACK);
 50 }
 51
 52 static __inline__ voidupdate_send_head(struct sock *sk)
 53 {
 54 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 55
 56  tp->send_head = tp->send_head->next;
 57 if(tp->send_head == (struct sk_buff *) &sk->write_queue)
 58  tp->send_head = NULL;
 59 }
 60
 61 static __inline__ inttcp_snd_test(struct sock *sk,struct sk_buff *skb)
 62 {
 63 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 64 int nagle_check =1;
 65 int len;
 66
 67 /* RFC 1122 - section 4.2.3.4
 68  *
 69  * We must queue if
 70  *
 71  * a) The right edge of this frame exceeds the window
 72  * b) There are packets in flight and we have a small segment
 73  * [SWS avoidance and Nagle algorithm]
 74  * (part of SWS is done on packetization)
 75  * c) We are retransmiting [Nagle]
 76  * d) We have too many packets 'in flight'
 77  */
 78  len = skb->end_seq - skb->seq;
 79 if(!sk->nonagle && len < (sk->mss >>1) && tp->packets_out)
 80  nagle_check =0;
 81
 82 return(nagle_check && tp->packets_out < tp->snd_cwnd &&
 83 !after(skb->end_seq, tp->snd_una + tp->snd_wnd) &&
 84  tp->retransmits ==0);
 85 }
 86
 87 static __inline__ voidtcp_build_options(__u32 *ptr,struct tcp_opt *tp)
 88 {
 89 /* FIXME: We will still need to do SACK here. */
 90 if(tp->tstamp_ok) {
 91 *ptr++ =ntohl((TCPOPT_NOP <<24)
 92 | (TCPOPT_NOP <<16)
 93 | (TCPOPT_TIMESTAMP <<8)
 94 | TCPOLEN_TIMESTAMP);
 95 /* WARNING: If HZ is ever larger than 1000 on some system,
 96  * then we will be violating RFC1323 here because our timestamps
 97  * will be moving too fast.
 98  * FIXME: code TCP so it uses at most ~ 1000 ticks a second?
 99  * (I notice alpha is 1024 ticks now). -- erics
 100  */
 101 *ptr++ =htonl(jiffies);
 102 *ptr =htonl(tp->ts_recent);
 103 }
 104 }
 105
 106 static __inline__ voidtcp_update_options(__u32 *ptr,struct tcp_opt *tp)
 107 {
 108 /* FIXME: We will still need to do SACK here. */
 109 if(tp->tstamp_ok) {
 110 *++ptr =htonl(jiffies);
 111 *++ptr =htonl(tp->ts_recent);
 112 }
 113 }
 114
 115 /*
 116  * This is the main buffer sending routine. We queue the buffer
 117  * having checked it is sane seeming.
 118  */
 119
 120 inttcp_send_skb(struct sock *sk,struct sk_buff *skb)
 121 {
 122 struct tcphdr * th = skb->h.th;
 123 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 124 int size;
 125
 126 /* Length of packet (not counting length of pre-tcp headers). */
 127  size = skb->len - ((unsigned char*) th - skb->data);
 128
 129 /* Sanity check it.. */
 130 if(size <sizeof(struct tcphdr) || size > skb->len) {
 131 printk(KERN_DEBUG "tcp_send_skb: bad skb "
 132 "(skb = %p, data = %p, th = %p, len = %u)\n",
 133  skb, skb->data, th, skb->len);
 134 kfree_skb(skb, FREE_WRITE);
 135 return0;
 136 }
 137
 138 /* If we have queued a header size packet.. (these crash a few
 139  * tcp stacks if ack is not set)
 140  * FIXME: What is the equivalent below when we have options?
 141  */
 142 if(size ==sizeof(struct tcphdr)) {
 143 /* If it's got a syn or fin discard. */
 144 if(!th->syn && !th->fin) {
 145 printk(KERN_DEBUG "tcp_send_skb: attempt to queue a bogon.\n");
 146 kfree_skb(skb,FREE_WRITE);
 147 return0;
 148 }
 149 }
 150
 151 /* Actual processing. */
 152  skb->seq =ntohl(th->seq);
 153  skb->end_seq = skb->seq + size -4*th->doff;
 154
 155 skb_queue_tail(&sk->write_queue, skb);
 156
 157 if(tp->send_head == NULL &&tcp_snd_test(sk, skb)) {
 158 struct sk_buff * buff;
 159
 160 /* This is going straight out. */
 161  tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
 162  th->window =htons(tcp_select_window(sk));
 163 tcp_update_options((__u32 *)(th+1),tp);
 164
 165  tp->af_specific->send_check(sk, th, size, skb);
 166
 167  buff =skb_clone(skb, GFP_KERNEL);
 168 if(buff == NULL)
 169 goto queue;
 170
 171 clear_delayed_acks(sk);
 172 skb_set_owner_w(buff, sk);
 173
 174  tp->snd_nxt = skb->end_seq;
 175  tp->packets_out++;
 176
 177  skb->when = jiffies;
 178
 179  tcp_statistics.TcpOutSegs++;
 180  tp->af_specific->queue_xmit(buff);
 181
 182 if(!tcp_timer_is_set(sk, TIME_RETRANS))
 183 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 184
 185 return0;
 186 }
 187
 188 queue:
 189 /* Remember where we must start sending. */
 190 if(tp->send_head == NULL)
 191  tp->send_head = skb;
 192 if(tp->packets_out ==0&& !tp->pending) {
 193  tp->pending = TIME_PROBE0;
 194 tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
 195 }
 196 return0;
 197 }
 198
 199 /*
 200  * Function to create two new tcp segments.
 201  * Shrinks the given segment to the specified size and appends a new
 202  * segment with the rest of the packet to the list.
 203  * This won't be called frenquently, I hope...
 204  */
 205
 206 static inttcp_fragment(struct sock *sk,struct sk_buff *skb, u32 len)
 207 {
 208 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 209 struct sk_buff *buff;
 210 struct tcphdr *th, *nth;
 211 int nsize;
 212 int tmp;
 213
 214  th = skb->h.th;
 215
 216 /* Size of new segment. */
 217  nsize = skb->tail - ((unsigned char*)(th)+tp->tcp_header_len) - len;
 218 if(nsize <=0) {
 219 printk(KERN_DEBUG "tcp_fragment: bug size <= 0\n");
 220 return-1;
 221 }
 222
 223 /* Get a new skb... force flag on. */
 224  buff =sock_wmalloc(sk, nsize +128+ sk->prot->max_header +15,1,
 225  GFP_ATOMIC);
 226 if(buff == NULL)
 227 return-1;
 228
 229 /* Put headers on the new packet. */
 230  tmp = tp->af_specific->build_net_header(sk, buff);
 231 if(tmp <0) {
 232 kfree_skb(buff, FREE_WRITE);
 233 return-1;
 234 }
 235
 236 /* Move the TCP header over. */
 237  nth = (struct tcphdr *)skb_put(buff, tp->tcp_header_len);
 238  buff->h.th = nth;
 239 memcpy(nth, th, tp->tcp_header_len);
 240
 241 /* FIXME: Make sure this gets tcp options right. */
 242
 243 /* Correct the new header. */
 244  buff->seq = skb->seq + len;
 245  buff->end_seq = skb->end_seq;
 246  nth->seq =htonl(buff->seq);
 247  nth->check =0;
 248  nth->doff = th->doff;
 249
 250 /* urg data is always an headache */
 251 if(th->urg) {
 252 if(th->urg_ptr > len) {
 253  th->urg =0;
 254  nth->urg_ptr -= len;
 255 }else{
 256  nth->urg =0;
 257 }
 258 }
 259
 260 /* Copy data tail to our new buffer. */
 261  buff->csum =csum_partial_copy(((u8 *)(th)+tp->tcp_header_len) + len,
 262 skb_put(buff, nsize),
 263  nsize,0);
 264
 265  skb->end_seq -= nsize;
 266 skb_trim(skb, skb->len - nsize);
 267
 268 /* Remember to checksum this packet afterwards. */
 269  th->check =0;
 270  skb->csum =csum_partial((u8*)(th) + tp->tcp_header_len, skb->tail - ((u8 *) (th)+tp->tcp_header_len),
 271 0);
 272
 273 skb_append(skb, buff);
 274
 275 return0;
 276 }
 277
 278 static voidtcp_wrxmit_prob(struct sock *sk,struct sk_buff *skb)
 279 {
 280 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 281
 282 /* This is acked data. We can discard it. This cannot currently occur. */
 283  tp->retransmits =0;
 284
 285 printk(KERN_DEBUG "tcp_write_xmit: bug skb in write queue\n");
 286
 287 update_send_head(sk);
 288
 289 skb_unlink(skb);
 290 kfree_skb(skb, FREE_WRITE);
 291
 292 if(!sk->dead)
 293  sk->write_space(sk);
 294 }
 295
 296 static inttcp_wrxmit_frag(struct sock *sk,struct sk_buff *skb,int size)
 297 {
 298 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 299
 300 SOCK_DEBUG(sk,"tcp_write_xmit: frag needed size=%d mss=%d\n",
 301  size, sk->mss);
 302
 303 if(tcp_fragment(sk, skb, sk->mss)) {
 304 /* !tcp_frament Failed! */
 305  tp->send_head = skb;
 306  tp->packets_out--;
 307 return-1;
 308 }else{
 309 /* If tcp_fragment succeded then
 310  * the send head is the resulting
 311  * fragment
 312  */
 313  tp->send_head = skb->next;
 314 }
 315 return0;
 316 }
 317
 318 /*
 319  * This routine writes packets to the network.
 320  * It advances the send_head.
 321  * This happens as incoming acks open up the remote window for us.
 322  */
 323
 324 voidtcp_write_xmit(struct sock *sk)
 325 {
 326 struct sk_buff *skb;
 327 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 328  u16 rcv_wnd;
 329 int sent_pkts =0;
 330
 331 /* The bytes will have to remain here. In time closedown will
 332  * empty the write queue and all will be happy.
 333  */
 334 if(sk->zapped)
 335 return;
 336
 337 /* Anything on the transmit queue that fits the window can
 338  * be added providing we are:
 339  *
 340  * a) following SWS avoidance [and Nagle algorithm]
 341  * b) not exceeding our congestion window.
 342  * c) not retransmiting [Nagle]
 343  */
 344  rcv_wnd =htons(tcp_select_window(sk));
 345 while((skb = tp->send_head) &&tcp_snd_test(sk, skb)) {
 346 struct tcphdr *th;
 347 struct sk_buff *buff;
 348 int size;
 349
 350 /* See if we really need to send the packet. (debugging code) */
 351 if(!after(skb->end_seq, tp->snd_una)) {
 352 tcp_wrxmit_prob(sk, skb);
 353 continue;
 354 }
 355
 356 /* Put in the ack seq and window at this point rather
 357  * than earlier, in order to keep them monotonic.
 358  * We really want to avoid taking back window allocations.
 359  * That's legal, but RFC1122 says it's frowned on.
 360  * Ack and window will in general have changed since
 361  * this packet was put on the write queue.
 362  */
 363  th = skb->h.th;
 364  size = skb->len - (((unsigned char*) th) - skb->data);
 365 if(size - (th->doff <<2) > sk->mss) {
 366 if(tcp_wrxmit_frag(sk, skb, size))
 367 break;
 368 }
 369
 370  tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
 371  th->window = rcv_wnd;
 372 tcp_update_options((__u32 *)(th+1),tp);
 373
 374  tp->af_specific->send_check(sk, th, size, skb);
 375
 376 #ifdef TCP_DEBUG
 377 if(before(skb->end_seq, tp->snd_nxt))
 378 printk(KERN_DEBUG "tcp_write_xmit:"
 379 " sending already sent seq\n");
 380 #endif
 381
 382  buff =skb_clone(skb, GFP_ATOMIC);
 383 if(buff == NULL)
 384 break;
 385
 386 /* Advance the send_head. This one is going out. */
 387 update_send_head(sk);
 388 clear_delayed_acks(sk);
 389
 390  tp->packets_out++;
 391 skb_set_owner_w(buff, sk);
 392
 393  tp->snd_nxt = skb->end_seq;
 394
 395  skb->when = jiffies;
 396
 397  sent_pkts =1;
 398  tp->af_specific->queue_xmit(buff);
 399 }
 400
 401 if(sent_pkts && !tcp_timer_is_set(sk, TIME_RETRANS))
 402 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 403 }
 404
 405
 406
 407 /*
 408  * This function returns the amount that we can raise the
 409  * usable window based on the following constraints
 410  *
 411  * 1. The window can never be shrunk once it is offered (RFC 793)
 412  * 2. We limit memory per socket
 413  */
 414
 415 unsigned shorttcp_select_window(struct sock *sk)
 416 {
 417 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 418 int mss = sk->mss;
 419 long free_space =sock_rspace(sk);
 420 long window, cur_win, usable;
 421
 422 if(tp->window_clamp) {
 423  free_space =min(tp->window_clamp, free_space);
 424  mss =min(tp->window_clamp, mss);
 425 }
 426
 427 /* compute the actual window i.e.
 428  * old_window - received_bytes_on_that_win
 429  */
 430  cur_win = tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd);
 431  window = tp->rcv_wnd;
 432
 433 if(cur_win <0) {
 434  cur_win =0;
 435 printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
 436  tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup);
 437 }
 438
 439 /*
 440  * RFC 1122:
 441  * "the suggested [SWS] avoidance algoritm for the receiver is to keep
 442  * RECV.NEXT + RCV.WIN fixed until:
 443  * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
 444  *
 445  * i.e. don't raise the right edge of the window until you can't raise
 446  * it MSS bytes
 447  */
 448
 449 /* It would be a good idea if it didn't break header prediction.
 450  * and BSD made the header predition standard...
 451  * It expects the same value in the header i.e. th->window to be
 452  * constant
 453  */
 454  usable = free_space - cur_win;
 455 if(usable <0)
 456  usable =0;
 457
 458 if(window < usable) {
 459 /* Window is not blocking the sender
 460  * and we have enought free space for it
 461  */
 462 if(cur_win > (sk->mss <<1))
 463 goto out;
 464 }
 465
 466 if(window >= usable) {
 467 /* We are offering too much, cut it down...
 468  * but don't shrink the window
 469  */
 470  window =max(usable, cur_win);
 471 }else{
 472 if((usable - window) >= mss)
 473  window += mss;
 474 }
 475 out:
 476  tp->rcv_wnd = window;
 477  tp->rcv_wup = tp->rcv_nxt;
 478 return window;
 479 }
 480
 481 static inttcp_retrans_try_collapse(struct sock *sk,struct sk_buff *skb)
 482 {
 483 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 484 struct tcphdr *th1, *th2;
 485 int size1, size2, avail;
 486 struct sk_buff *buff = skb->next;
 487
 488  th1 = skb->h.th;
 489
 490 if(th1->urg)
 491 return-1;
 492
 493  avail =skb_tailroom(skb);
 494
 495 /* Size of TCP payload. */
 496  size1 = skb->tail - ((u8 *) (th1)+(th1->doff<<2));
 497
 498  th2 = buff->h.th;
 499  size2 = buff->tail - ((u8 *) (th2)+(th2->doff<<2));
 500
 501 if(size2 > avail || size1 + size2 > sk->mss )
 502 return-1;
 503
 504 /* Ok. We will be able to collapse the packet. */
 505 skb_unlink(buff);
 506 memcpy(skb_put(skb, size2), ((char*) th2) + (th2->doff <<2), size2);
 507
 508 /* Update sizes on original skb, both TCP and IP. */
 509  skb->end_seq += size2;
 510 if(th2->urg) {
 511  th1->urg =1;
 512  th1->urg_ptr = th2->urg_ptr + size1;
 513 }
 514
 515 /* ... and off you go. */
 516 kfree_skb(buff, FREE_WRITE);
 517  tp->packets_out--;
 518
 519 /* Header checksum will be set by the retransmit procedure
 520  * after calling rebuild header.
 521  */
 522  th1->check =0;
 523  skb->csum =csum_partial((u8*)(th1)+(th1->doff<<2), size1 + size2,0);
 524 return0;
 525 }
 526
 527
 528 /*
 529  * A socket has timed out on its send queue and wants to do a
 530  * little retransmitting.
 531  * retransmit_head can be different from the head of the write_queue
 532  * if we are doing fast retransmit.
 533  */
 534
 535 voidtcp_do_retransmit(struct sock *sk,int all)
 536 {
 537 struct sk_buff * skb;
 538 int ct=0;
 539 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 540
 541 if(tp->retrans_head == NULL)
 542  tp->retrans_head =skb_peek(&sk->write_queue);
 543
 544 if(tp->retrans_head == tp->send_head)
 545  tp->retrans_head = NULL;
 546
 547 while((skb = tp->retrans_head) != NULL) {
 548 struct sk_buff *buff;
 549 struct tcphdr *th;
 550 int tcp_size;
 551 int size;
 552
 553 /* In general it's OK just to use the old packet. However we
 554  * need to use the current ack and window fields. Urg and
 555  * urg_ptr could possibly stand to be updated as well, but we
 556  * don't keep the necessary data. That shouldn't be a problem,
 557  * if the other end is doing the right thing. Since we're
 558  * changing the packet, we have to issue a new IP identifier.
 559  */
 560
 561  th = skb->h.th;
 562
 563  tcp_size = skb->tail - ((unsigned char*)(th)+tp->tcp_header_len);
 564
 565 if(tcp_size > sk->mss) {
 566 if(tcp_fragment(sk, skb, sk->mss)) {
 567 printk(KERN_DEBUG "tcp_fragment failed\n");
 568 return;
 569 }
 570  tp->packets_out++;
 571 }
 572
 573 if(!th->syn &&
 574  tcp_size < (sk->mss >>1) &&
 575  skb->next != tp->send_head &&
 576  skb->next != (struct sk_buff *)&sk->write_queue)
 577 tcp_retrans_try_collapse(sk, skb);
 578
 579 if(tp->af_specific->rebuild_header(sk, skb)) {
 580 #ifdef TCP_DEBUG
 581 printk(KERN_DEBUG "tcp_do_rebuild_header failed\n");
 582 #endif
 583 break;
 584 }
 585
 586 SOCK_DEBUG(sk,"retransmit sending\n");
 587
 588 /* Update ack and window. */
 589  tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
 590  th->window =ntohs(tcp_select_window(sk));
 591 tcp_update_options((__u32 *)(th+1),tp);
 592
 593  size = skb->tail - (unsigned char*) th;
 594  tp->af_specific->send_check(sk, th, size, skb);
 595
 596  skb->when = jiffies;
 597
 598  buff =skb_clone(skb, GFP_ATOMIC);
 599 if(buff == NULL)
 600 break;
 601
 602 skb_set_owner_w(buff, sk);
 603
 604 clear_delayed_acks(sk);
 605  tp->af_specific->queue_xmit(buff);
 606
 607 /* Count retransmissions. */
 608  ct++;
 609  sk->prot->retransmits++;
 610  tcp_statistics.TcpRetransSegs++;
 611
 612 /* Only one retransmit requested. */
 613 if(!all)
 614 break;
 615
 616 /* This should cut it off before we send too many packets. */
 617 if(ct >= tp->snd_cwnd)
 618 break;
 619
 620 /* Advance the pointer. */
 621  tp->retrans_head = skb->next;
 622 if((tp->retrans_head == tp->send_head) ||
 623 (tp->retrans_head == (struct sk_buff *) &sk->write_queue))
 624  tp->retrans_head = NULL;
 625 }
 626 }
 627
 628 /*
 629  * Send a fin.
 630  */
 631
 632 voidtcp_send_fin(struct sock *sk)
 633 {
 634 struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
 635 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 636 struct tcphdr *t1;
 637 struct sk_buff *buff;
 638 int tmp;
 639
 640  buff =sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len,1, GFP_KERNEL);
 641 if(buff == NULL) {
 642 /* FIXME: This is a disaster if it occurs. */
 643 printk(KERN_INFO "tcp_send_fin: Impossible malloc failure");
 644 return;
 645 }
 646
 647 /* Administrivia. */
 648  buff->csum =0;
 649
 650 /* Put in the IP header and routing stuff. */
 651  tmp = tp->af_specific->build_net_header(sk, buff);
 652 if(tmp <0) {
 653 int t;
 654
 655 /* FIXME: We must not throw this out. Eventually we must
 656  * put a FIN into the queue, otherwise it never gets queued.
 657  */
 658 kfree_skb(buff, FREE_WRITE);
 659  sk->write_seq++;
 660  t =del_timer(&sk->timer);
 661 if(t)
 662 add_timer(&sk->timer);
 663 else
 664 tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 665 return;
 666 }
 667
 668 /* We ought to check if the end of the queue is a buffer and
 669  * if so simply add the fin to that buffer, not send it ahead.
 670  */
 671  t1 =(struct tcphdr *)skb_put(buff,tp->tcp_header_len);
 672  buff->h.th = t1;
 673 tcp_build_options((__u32 *)(t1+1),tp);
 674
 675 memcpy(t1, th,sizeof(*t1));
 676  buff->seq = sk->write_seq;
 677  sk->write_seq++;
 678  buff->end_seq = sk->write_seq;
 679  t1->seq =htonl(buff->seq);
 680  t1->ack_seq =htonl(tp->rcv_nxt);
 681  t1->window =htons(tcp_select_window(sk));
 682  t1->fin =1;
 683
 684  tp->af_specific->send_check(sk, t1, tp->tcp_header_len, buff);
 685
 686 /* The fin can only be transmited after the data. */
 687 skb_queue_tail(&sk->write_queue, buff);
 688 if(tp->send_head == NULL) {
 689 struct sk_buff *skb1;
 690
 691  tp->packets_out++;
 692  tp->snd_nxt = sk->write_seq;
 693  buff->when = jiffies;
 694
 695  skb1 =skb_clone(buff, GFP_KERNEL);
 696 if(skb1) {
 697 skb_set_owner_w(skb1, sk);
 698  tp->af_specific->queue_xmit(skb1);
 699 }
 700
 701 if(!tcp_timer_is_set(sk, TIME_RETRANS))
 702 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 703 }
 704 }
 705
 706 inttcp_send_synack(struct sock *sk)
 707 {
 708 struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp);
 709 struct sk_buff * skb;
 710 struct sk_buff * buff;
 711 struct tcphdr *th;
 712 int tmp;
 713
 714  skb =sock_wmalloc(sk, MAX_SYN_SIZE,1, GFP_ATOMIC);
 715 if(skb == NULL)
 716 return-ENOMEM;
 717
 718  tmp = tp->af_specific->build_net_header(sk, skb);
 719 if(tmp <0) {
 720 kfree_skb(skb, FREE_WRITE);
 721 return tmp;
 722 }
 723
 724  th =(struct tcphdr *)skb_put(skb,sizeof(struct tcphdr));
 725  skb->h.th = th;
 726 memset(th,0,sizeof(struct tcphdr));
 727
 728  th->syn =1;
 729  th->ack =1;
 730
 731  th->source = sk->dummy_th.source;
 732  th->dest = sk->dummy_th.dest;
 733
 734  skb->seq = tp->snd_una;
 735  skb->end_seq = skb->seq +1/* th->syn */;
 736  th->seq =ntohl(skb->seq);
 737
 738  th->window =ntohs(tp->rcv_wnd);
 739
 740  tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
 741
 742  tmp =tcp_syn_build_options(skb, sk->mss,
 743  tp->sack_ok, tp->tstamp_ok,
 744  tp->snd_wscale?tp->rcv_wscale:0);
 745  skb->csum =0;
 746  th->doff = (sizeof(*th) + tmp)>>2;
 747
 748  tp->af_specific->send_check(sk, th,sizeof(*th)+tmp, skb);
 749
 750 skb_queue_tail(&sk->write_queue, skb);
 751
 752  buff =skb_clone(skb, GFP_ATOMIC);
 753 if(buff) {
 754 skb_set_owner_w(buff, sk);
 755
 756  tp->packets_out++;
 757  skb->when = jiffies;
 758
 759  tp->af_specific->queue_xmit(buff);
 760  tcp_statistics.TcpOutSegs++;
 761
 762 tcp_reset_xmit_timer(sk, TIME_RETRANS, TCP_TIMEOUT_INIT);
 763 }
 764 return0;
 765 }
 766
 767 /*
 768  * Set up the timers for sending a delayed ack..
 769  *
 770  * rules for delaying an ack:
 771  * - delay time <= 0.5 HZ
 772  * - must send at least every 2 full sized packets
 773  * - we don't have a window update to send
 774  */
 775
 776 voidtcp_send_delayed_ack(struct sock * sk,int max_timeout)
 777 {
 778 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 779 unsigned long timeout, now;
 780
 781 /* Calculate new timeout. */
 782  now = jiffies;
 783  timeout = tp->ato;
 784
 785 if(timeout > max_timeout ||
 786 ((tp->rcv_nxt - tp->rcv_wup) > (sk->mss <<2)))
 787  timeout = now;
 788 else
 789  timeout += now;
 790
 791 /* Use new timeout only if there wasn't a older one earlier. */
 792 if(!del_timer(&tp->delack_timer) || timeout < tp->delack_timer.expires)
 793  tp->delack_timer.expires = timeout;
 794
 795 add_timer(&tp->delack_timer);
 796 }
 797
 798
 799
 800 /*
 801  * This routine sends an ack and also updates the window.
 802  */
 803
 804 voidtcp_send_ack(struct sock *sk)
 805 {
 806 struct sk_buff *buff;
 807 struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
 808 struct tcphdr *th;
 809 int tmp;
 810
 811 if(sk->zapped)
 812 return;/* We have been reset, we may not send again. */
 813
 814 /* We need to grab some memory, and put together an ack,
 815  * and then put it into the queue to be sent.
 816  * FIXME: is it better to waste memory here and use a
 817  * constant sized ACK?
 818  */
 819  buff =sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len,1, GFP_ATOMIC);
 820 if(buff == NULL) {
 821 /* Force it to send an ack. We don't have to do this
 822  * (ACK is unreliable) but it's much better use of
 823  * bandwidth on slow links to send a spare ack than
 824  * resend packets.
 825  */
 826 tcp_send_delayed_ack(sk, HZ/2);
 827 return;
 828 }
 829
 830 clear_delayed_acks(sk);
 831
 832 /* Assemble a suitable TCP frame. */
 833  buff->csum =0;
 834
 835 /* Put in the IP header and routing stuff. */
 836  tmp = tp->af_specific->build_net_header(sk, buff);
 837 if(tmp <0) {
 838 kfree_skb(buff, FREE_WRITE);
 839 return;
 840 }
 841
 842  th = (struct tcphdr *)skb_put(buff,tp->tcp_header_len);
 843 memcpy(th, &sk->dummy_th,sizeof(struct tcphdr));
 844 tcp_build_options((__u32 *)(th+1),tp);
 845
 846 /* Swap the send and the receive. */
 847  th->window =ntohs(tcp_select_window(sk));
 848  th->seq =ntohl(tp->snd_nxt);
 849  tp->last_ack_sent = th->ack_seq =ntohl(tp->rcv_nxt);
 850
 851 /* Fill in the packet and send it. */
 852  tp->af_specific->send_check(sk, th, tp->tcp_header_len, buff);
 853
 854 SOCK_DEBUG(sk,"\rtcp_send_ack: seq %x ack %x\n",
 855  tp->snd_nxt, tp->rcv_nxt);
 856
 857  tp->af_specific->queue_xmit(buff);
 858  tcp_statistics.TcpOutSegs++;
 859 }
 860
 861 /*
 862  * This routine sends a packet with an out of date sequence
 863  * number. It assumes the other end will try to ack it.
 864  */
 865
 866 voidtcp_write_wakeup(struct sock *sk)
 867 {
 868 struct sk_buff *buff, *skb;
 869 struct tcphdr *t1;
 870 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 871 int tmp;
 872
 873 if(sk->zapped)
 874 return;/* After a valid reset we can send no more. */
 875
 876 /* Write data can still be transmitted/retransmitted in the
 877  * following states. If any other state is encountered, return.
 878  * [listen/close will never occur here anyway]
 879  */
 880 if(sk->state != TCP_ESTABLISHED &&
 881  sk->state != TCP_CLOSE_WAIT &&
 882  sk->state != TCP_FIN_WAIT1 &&
 883  sk->state != TCP_LAST_ACK &&
 884  sk->state != TCP_CLOSING)
 885 return;
 886
 887 if(before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && (skb=tp->send_head)) {
 888 struct tcphdr *th;
 889 unsigned long win_size;
 890
 891 /* We are probing the opening of a window
 892  * but the window size is != 0
 893  * must have been a result SWS avoidance ( sender )
 894  */
 895  win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
 896 if(win_size < skb->end_seq - skb->seq) {
 897 if(tcp_fragment(sk, skb, win_size)) {
 898 printk(KERN_DEBUG "tcp_write_wakeup: "
 899 "fragment failed\n");
 900 return;
 901 }
 902 }
 903
 904  th = skb->h.th;
 905  tp->af_specific->send_check(sk, th, th->doff *4+ win_size, skb);
 906  buff =skb_clone(skb, GFP_ATOMIC);
 907 if(buff == NULL)
 908 return;
 909
 910 skb_set_owner_w(buff, sk);
 911  tp->packets_out++;
 912
 913 clear_delayed_acks(sk);
 914
 915 if(!tcp_timer_is_set(sk, TIME_RETRANS))
 916 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 917
 918  skb->when = jiffies;
 919 update_send_head(sk);
 920  tp->snd_nxt = skb->end_seq;
 921 }else{
 922  buff =sock_wmalloc(sk, MAX_ACK_SIZE,1, GFP_ATOMIC);
 923 if(buff == NULL)
 924 return;
 925
 926  buff->csum =0;
 927
 928 /* Put in the IP header and routing stuff. */
 929  tmp = tp->af_specific->build_net_header(sk, buff);
 930 if(tmp <0) {
 931 kfree_skb(buff, FREE_WRITE);
 932 return;
 933 }
 934
 935  t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
 936 memcpy(t1,(void*) &sk->dummy_th,sizeof(*t1));
 937 /* FIXME: should zero window probes have SACK and/or TIMESTAMP data?
 938  * If so we have to tack them on here.
 939  */
 940
 941 /* Use a previous sequence.
 942  * This should cause the other end to send an ack.
 943  */
 944
 945  t1->seq =htonl(tp->snd_nxt-1);
 946 /* t1->fin = 0; -- We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
 947  t1->ack_seq =htonl(tp->rcv_nxt);
 948  t1->window =htons(tcp_select_window(sk));
 949
 950 /* Value from dummy_th may be larger. */
 951  t1->doff =sizeof(struct tcphdr)/4;
 952
 953  tp->af_specific->send_check(sk, t1,sizeof(*t1), buff);
 954 }
 955
 956 /* Send it. */
 957  tp->af_specific->queue_xmit(buff);
 958  tcp_statistics.TcpOutSegs++;
 959 }
 960
 961 /*
 962  * A window probe timeout has occurred.
 963  * If window is not closed send a partial packet
 964  * else a zero probe.
 965  */
 966
 967 voidtcp_send_probe0(struct sock *sk)
 968 {
 969 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 970
 971 if(sk->zapped)
 972 return;/* After a valid reset we can send no more. */
 973
 974 tcp_write_wakeup(sk);
 975  tp->pending = TIME_PROBE0;
 976  tp->backoff++;
 977  tp->probes_out++;
 978 tcp_reset_xmit_timer(sk, TIME_PROBE0,
 979 min(tp->rto << tp->backoff,120*HZ));
 980 }