net/ipv4/tcp_output.c

Name: Public Git Hosting - davej-history.git/blob - net/ipv4/tcp_output.c
Rating: 4.6 (2507 reviews)
 1 /*
 2  * INET An implementation of the TCP/IP protocol suite for the LINUX
 3  * operating system. INET is implemented using the BSD Socket
 4  * interface as the means of communication with the user level.
 5  *
 6  * Implementation of the Transmission Control Protocol(TCP).
 7  *
 8  * Version: $Id: tcp_output.c,v 1.58 1998/03/11 07:12:49 davem Exp $
 9  *
 10  * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
 11  * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  * Mark Evans, <evansmp@uhura.aston.ac.uk>
 13  * Corey Minyard <wf-rch!minyard@relay.EU.net>
 14  * Florian La Roche, <flla@stud.uni-sb.de>
 15  * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
 16  * Linus Torvalds, <torvalds@cs.helsinki.fi>
 17  * Alan Cox, <gw4pts@gw4pts.ampr.org>
 18  * Matthew Dillon, <dillon@apollo.west.oic.com>
 19  * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 20  * Jorge Cwik, <jorge@laser.satlink.net>
 21  */
 22
 23 /*
 24  * Changes: Pedro Roque : Retransmit queue handled by TCP.
 25  * : Fragmentation on mtu decrease
 26  * : Segment collapse on retransmit
 27  * : AF independence
 28  *
 29  * Linus Torvalds : send_delayed_ack
 30  * David S. Miller : Charge memory using the right skb
 31  * during syn/ack processing.
 32  *
 33  */
 34
 35 #include <net/tcp.h>
 36
 37 externint sysctl_tcp_sack;
 38 externint sysctl_tcp_tsack;
 39 externint sysctl_tcp_timestamps;
 40 externint sysctl_tcp_window_scaling;
 41
 42 /* Get rid of any delayed acks, we sent one already.. */
 43 static __inline__ voidclear_delayed_acks(struct sock * sk)
 44 {
 45 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 46
 47  tp->delayed_acks =0;
 48 if(tcp_in_quickack_mode(tp))
 49  tp->ato = ((HZ/100)*2);
 50 tcp_clear_xmit_timer(sk, TIME_DACK);
 51 }
 52
 53 static __inline__ voidupdate_send_head(struct sock *sk)
 54 {
 55 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 56
 57  tp->send_head = tp->send_head->next;
 58 if(tp->send_head == (struct sk_buff *) &sk->write_queue)
 59  tp->send_head = NULL;
 60 }
 61
 62 /*
 63  * This is the main buffer sending routine. We queue the buffer
 64  * having checked it is sane seeming.
 65  */
 66
 67 voidtcp_send_skb(struct sock *sk,struct sk_buff *skb,int force_queue)
 68 {
 69 struct tcphdr * th = skb->h.th;
 70 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 71 int size;
 72
 73 /* Length of packet (not counting length of pre-tcp headers). */
 74  size = skb->len - ((unsigned char*) th - skb->data);
 75
 76 /* Sanity check it.. */
 77 if(size < tp->tcp_header_len || size > skb->len) {
 78 printk(KERN_DEBUG "tcp_send_skb: bad skb "
 79 "(skb = %p, data = %p, th = %p, len = %u)\n",
 80  skb, skb->data, th, skb->len);
 81 kfree_skb(skb);
 82 return;
 83 }
 84
 85 /* If we have queued a header size packet.. (these crash a few
 86  * tcp stacks if ack is not set)
 87  */
 88 if(size == tp->tcp_header_len) {
 89 /* If it's got a syn or fin discard. */
 90 if(!th->syn && !th->fin) {
 91 printk(KERN_DEBUG "tcp_send_skb: attempt to queue a bogon.\n");
 92 kfree_skb(skb);
 93 return;
 94 }
 95 }
 96
 97 /* Actual processing. */
 98  skb->seq =ntohl(th->seq);
 99  skb->end_seq = skb->seq + size -4*th->doff;
 100
 101 skb_queue_tail(&sk->write_queue, skb);
 102
 103 if(!force_queue && tp->send_head == NULL &&tcp_snd_test(sk, skb)) {
 104 struct sk_buff * buff;
 105
 106 /* This is going straight out. */
 107  tp->last_ack_sent = tp->rcv_nxt;
 108  th->ack_seq =htonl(tp->rcv_nxt);
 109  th->window =htons(tcp_select_window(sk));
 110 tcp_update_options((__u32 *)(th+1),tp);
 111
 112  tp->af_specific->send_check(sk, th, size, skb);
 113
 114  buff =skb_clone(skb, GFP_KERNEL);
 115 if(buff == NULL)
 116 goto queue;
 117
 118 clear_delayed_acks(sk);
 119 skb_set_owner_w(buff, sk);
 120
 121  tp->snd_nxt = skb->end_seq;
 122  tp->packets_out++;
 123
 124  skb->when = jiffies;
 125
 126  tcp_statistics.TcpOutSegs++;
 127  tp->af_specific->queue_xmit(buff);
 128
 129 if(!tcp_timer_is_set(sk, TIME_RETRANS))
 130 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 131
 132 return;
 133 }
 134
 135 queue:
 136 /* Remember where we must start sending. */
 137 if(tp->send_head == NULL)
 138  tp->send_head = skb;
 139 if(!force_queue && tp->packets_out ==0&& !tp->pending) {
 140  tp->pending = TIME_PROBE0;
 141 tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
 142 }
 143 }
 144
 145 /*
 146  * Function to create two new tcp segments.
 147  * Shrinks the given segment to the specified size and appends a new
 148  * segment with the rest of the packet to the list.
 149  * This won't be called frenquently, I hope...
 150  */
 151
 152 static inttcp_fragment(struct sock *sk,struct sk_buff *skb, u32 len)
 153 {
 154 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 155 struct sk_buff *buff;
 156 struct tcphdr *th, *nth;
 157 int nsize;
 158 int tmp;
 159
 160  th = skb->h.th;
 161
 162 /* Size of new segment. */
 163  nsize = skb->tail - ((unsigned char*)(th)+tp->tcp_header_len) - len;
 164 if(nsize <=0) {
 165 printk(KERN_DEBUG "tcp_fragment: bug size <= 0\n");
 166 return-1;
 167 }
 168
 169 /* Get a new skb... force flag on. */
 170  buff =sock_wmalloc(sk, nsize +128+ sk->prot->max_header +15,1,
 171  GFP_ATOMIC);
 172 if(buff == NULL)
 173 return-1;
 174
 175 /* Put headers on the new packet. */
 176  tmp = tp->af_specific->build_net_header(sk, buff);
 177 if(tmp <0) {
 178 kfree_skb(buff);
 179 return-1;
 180 }
 181
 182 /* Move the TCP header over. */
 183  nth = (struct tcphdr *)skb_put(buff, tp->tcp_header_len);
 184  buff->h.th = nth;
 185 memcpy(nth, th, tp->tcp_header_len);
 186
 187 /* FIXME: Make sure this gets tcp options right. */
 188
 189 /* Correct the new header. */
 190  buff->seq = skb->seq + len;
 191  buff->end_seq = skb->end_seq;
 192  nth->seq =htonl(buff->seq);
 193  nth->check =0;
 194  nth->doff = th->doff;
 195
 196 /* urg data is always an headache */
 197 if(th->urg) {
 198 if(th->urg_ptr > len) {
 199  th->urg =0;
 200  nth->urg_ptr -= len;
 201 }else{
 202  nth->urg =0;
 203 }
 204 }
 205
 206 /* Copy data tail to our new buffer. */
 207  buff->csum =csum_partial_copy(((u8 *)(th)+tp->tcp_header_len) + len,
 208 skb_put(buff, nsize),
 209  nsize,0);
 210
 211  skb->end_seq -= nsize;
 212 skb_trim(skb, skb->len - nsize);
 213
 214 /* Remember to checksum this packet afterwards. */
 215  th->check =0;
 216  skb->csum =csum_partial((u8*)(th) + tp->tcp_header_len, skb->tail - ((u8 *) (th)+tp->tcp_header_len),
 217 0);
 218
 219 skb_append(skb, buff);
 220
 221 return0;
 222 }
 223
 224 static voidtcp_wrxmit_prob(struct sock *sk,struct sk_buff *skb)
 225 {
 226 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 227
 228 /* This is acked data. We can discard it. This cannot currently occur. */
 229  tp->retransmits =0;
 230
 231 printk(KERN_DEBUG "tcp_write_xmit: bug skb in write queue\n");
 232
 233 update_send_head(sk);
 234
 235 skb_unlink(skb);
 236 kfree_skb(skb);
 237
 238 if(!sk->dead)
 239  sk->write_space(sk);
 240 }
 241
 242 static inttcp_wrxmit_frag(struct sock *sk,struct sk_buff *skb,int size)
 243 {
 244 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 245
 246 SOCK_DEBUG(sk,"tcp_write_xmit: frag needed size=%d mss=%d\n",
 247  size, sk->mss);
 248
 249 if(tcp_fragment(sk, skb, sk->mss)) {
 250 /* !tcp_frament Failed! */
 251  tp->send_head = skb;
 252  tp->packets_out--;
 253 return-1;
 254 }
 255 return0;
 256 }
 257
 258 /*
 259  * This routine writes packets to the network.
 260  * It advances the send_head.
 261  * This happens as incoming acks open up the remote window for us.
 262  */
 263
 264 voidtcp_write_xmit(struct sock *sk)
 265 {
 266 struct sk_buff *skb;
 267 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 268  u16 rcv_wnd;
 269 int sent_pkts =0;
 270
 271 /* The bytes will have to remain here. In time closedown will
 272  * empty the write queue and all will be happy.
 273  */
 274 if(sk->zapped)
 275 return;
 276
 277 /* Anything on the transmit queue that fits the window can
 278  * be added providing we are:
 279  *
 280  * a) following SWS avoidance [and Nagle algorithm]
 281  * b) not exceeding our congestion window.
 282  * c) not retransmiting [Nagle]
 283  */
 284  rcv_wnd =htons(tcp_select_window(sk));
 285 while((skb = tp->send_head) &&tcp_snd_test(sk, skb)) {
 286 struct tcphdr *th;
 287 struct sk_buff *buff;
 288 int size;
 289
 290 /* See if we really need to send the packet. (debugging code) */
 291 if(!after(skb->end_seq, tp->snd_una)) {
 292 tcp_wrxmit_prob(sk, skb);
 293 continue;
 294 }
 295
 296 /* Put in the ack seq and window at this point rather
 297  * than earlier, in order to keep them monotonic.
 298  * We really want to avoid taking back window allocations.
 299  * That's legal, but RFC1122 says it's frowned on.
 300  * Ack and window will in general have changed since
 301  * this packet was put on the write queue.
 302  */
 303  th = skb->h.th;
 304  size = skb->len - (((unsigned char*) th) - skb->data);
 305 if(size - (th->doff <<2) > sk->mss) {
 306 if(tcp_wrxmit_frag(sk, skb, size))
 307 break;
 308  size = skb->len - (((unsigned char*)th) - skb->data);
 309 }
 310
 311  tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
 312  th->window = rcv_wnd;
 313 tcp_update_options((__u32 *)(th+1),tp);
 314
 315  tp->af_specific->send_check(sk, th, size, skb);
 316
 317 #ifdef TCP_DEBUG
 318 if(before(skb->end_seq, tp->snd_nxt))
 319 printk(KERN_DEBUG "tcp_write_xmit:"
 320 " sending already sent seq\n");
 321 #endif
 322
 323  buff =skb_clone(skb, GFP_ATOMIC);
 324 if(buff == NULL)
 325 break;
 326
 327 /* Advance the send_head. This one is going out. */
 328 update_send_head(sk);
 329 clear_delayed_acks(sk);
 330
 331  tp->packets_out++;
 332 skb_set_owner_w(buff, sk);
 333
 334  tp->snd_nxt = skb->end_seq;
 335
 336  skb->when = jiffies;
 337
 338  sent_pkts =1;
 339  tp->af_specific->queue_xmit(buff);
 340 }
 341
 342 if(sent_pkts && !tcp_timer_is_set(sk, TIME_RETRANS))
 343 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 344 }
 345
 346
 347
 348 /* This function returns the amount that we can raise the
 349  * usable window based on the following constraints
 350  *
 351  * 1. The window can never be shrunk once it is offered (RFC 793)
 352  * 2. We limit memory per socket
 353  *
 354  * RFC 1122:
 355  * "the suggested [SWS] avoidance algoritm for the receiver is to keep
 356  * RECV.NEXT + RCV.WIN fixed until:
 357  * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
 358  *
 359  * i.e. don't raise the right edge of the window until you can raise
 360  * it at least MSS bytes.
 361  *
 362  * Unfortunately, the recomended algorithm breaks header prediction,
 363  * since header prediction assumes th->window stays fixed.
 364  *
 365  * Strictly speaking, keeping th->window fixed violates the receiver
 366  * side SWS prevention criteria. The problem is that under this rule
 367  * a stream of single byte packets will cause the right side of the
 368  * window to always advance by a single byte.
 369  *
 370  * Of course, if the sender implements sender side SWS prevention
 371  * then this will not be a problem.
 372  *
 373  * BSD seems to make the following compromise:
 374  *
 375  * If the free space is less than the 1/4 of the maximum
 376  * space available and the free space is less than 1/2 mss,
 377  * then set the window to 0.
 378  * Otherwise, just prevent the window from shrinking
 379  * and from being larger than the largest representable value.
 380  *
 381  * This prevents incremental opening of the window in the regime
 382  * where TCP is limited by the speed of the reader side taking
 383  * data out of the TCP receive queue. It does nothing about
 384  * those cases where the window is constrained on the sender side
 385  * because the pipeline is full.
 386  *
 387  * BSD also seems to "accidentally" limit itself to windows that are a
 388  * multiple of MSS, at least until the free space gets quite small.
 389  * This would appear to be a side effect of the mbuf implementation.
 390  * Combining these two algorithms results in the observed behavior
 391  * of having a fixed window size at almost all times.
 392  *
 393  * Below we obtain similar behavior by forcing the offered window to
 394  * a multiple of the mss when it is feasible to do so.
 395  *
 396  * FIXME: In our current implementation the value returned by sock_rpsace(sk)
 397  * is the total space we have allocated to the socket to store skbuf's.
 398  * The current design assumes that up to half of that space will be
 399  * taken by headers, and the remaining space will be available for TCP data.
 400  * This should be accounted for correctly instead.
 401  */
 402 u32 __tcp_select_window(struct sock *sk)
 403 {
 404 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 405 unsigned int mss = sk->mss;
 406 unsigned int free_space;
 407  u32 window, cur_win;
 408
 409  free_space = (sk->rcvbuf -atomic_read(&sk->rmem_alloc)) /2;
 410 if(tp->window_clamp) {
 411  free_space =min(tp->window_clamp, free_space);
 412  mss =min(tp->window_clamp, mss);
 413 }else{
 414 printk("tcp_select_window: tp->window_clamp == 0.\n");
 415 }
 416
 417 if(mss <1) {
 418  mss =1;
 419 printk("tcp_select_window: sk->mss fell to 0.\n");
 420 }
 421
 422  cur_win =tcp_receive_window(tp);
 423 if(free_space < sk->rcvbuf/4&& free_space < mss/2) {
 424  window =0;
 425 }else{
 426 /* Get the largest window that is a nice multiple of mss.
 427  * Window clamp already applied above.
 428  * If our current window offering is within 1 mss of the
 429  * free space we just keep it. This prevents the divide
 430  * and multiply from happening most of the time.
 431  * We also don't do any window rounding when the free space
 432  * is too small.
 433  */
 434  window = tp->rcv_wnd;
 435 if((window <= (free_space - mss)) || (window > free_space))
 436  window = (free_space/mss)*mss;
 437 }
 438 return window;
 439 }
 440
 441 static inttcp_retrans_try_collapse(struct sock *sk,struct sk_buff *skb)
 442 {
 443 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 444 struct tcphdr *th1, *th2;
 445 int size1, size2, avail;
 446 struct sk_buff *buff = skb->next;
 447
 448  th1 = skb->h.th;
 449
 450 if(th1->urg)
 451 return-1;
 452
 453  avail =skb_tailroom(skb);
 454
 455 /* Size of TCP payload. */
 456  size1 = skb->tail - ((u8 *) (th1)+(th1->doff<<2));
 457
 458  th2 = buff->h.th;
 459  size2 = buff->tail - ((u8 *) (th2)+(th2->doff<<2));
 460
 461 if(size2 > avail || size1 + size2 > sk->mss )
 462 return-1;
 463
 464 /* Ok. We will be able to collapse the packet. */
 465 skb_unlink(buff);
 466 memcpy(skb_put(skb, size2), ((char*) th2) + (th2->doff <<2), size2);
 467
 468 /* Update sizes on original skb, both TCP and IP. */
 469  skb->end_seq += buff->end_seq - buff->seq;
 470 if(th2->urg) {
 471  th1->urg =1;
 472  th1->urg_ptr = th2->urg_ptr + size1;
 473 }
 474 if(th2->fin)
 475  th1->fin =1;
 476
 477 /* ... and off you go. */
 478 kfree_skb(buff);
 479  tp->packets_out--;
 480
 481 /* Header checksum will be set by the retransmit procedure
 482  * after calling rebuild header.
 483  */
 484  th1->check =0;
 485  skb->csum =csum_partial((u8*)(th1)+(th1->doff<<2), size1 + size2,0);
 486 return0;
 487 }
 488
 489 /* Do a simple retransmit without using the backoff mechanisms in
 490  * tcp_timer. This is used to speed up path mtu recovery. Note that
 491  * these simple retransmit aren't counted in the usual tcp retransmit
 492  * backoff counters.
 493  * The socket is already locked here.
 494  */
 495 voidtcp_simple_retransmit(struct sock *sk)
 496 {
 497 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 498
 499 /* Clear delay ack timer. */
 500 tcp_clear_xmit_timer(sk, TIME_DACK);
 501
 502  tp->retrans_head = NULL;
 503 /* Don't muck with the congestion window here. */
 504  tp->dup_acks =0;
 505  tp->high_seq = tp->snd_nxt;
 506 /* FIXME: make the current rtt sample invalid */
 507 tcp_do_retransmit(sk,0);
 508 }
 509
 510 /*
 511  * A socket has timed out on its send queue and wants to do a
 512  * little retransmitting.
 513  * retrans_head can be different from the head of the write_queue
 514  * if we are doing fast retransmit.
 515  */
 516
 517 voidtcp_do_retransmit(struct sock *sk,int all)
 518 {
 519 struct sk_buff * skb;
 520 int ct=0;
 521 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 522
 523 if(tp->retrans_head == NULL)
 524  tp->retrans_head =skb_peek(&sk->write_queue);
 525
 526 if(tp->retrans_head == tp->send_head)
 527  tp->retrans_head = NULL;
 528
 529 while((skb = tp->retrans_head) != NULL) {
 530 struct sk_buff *buff;
 531 struct tcphdr *th;
 532 int tcp_size;
 533 int size;
 534
 535 /* In general it's OK just to use the old packet. However we
 536  * need to use the current ack and window fields. Urg and
 537  * urg_ptr could possibly stand to be updated as well, but we
 538  * don't keep the necessary data. That shouldn't be a problem,
 539  * if the other end is doing the right thing. Since we're
 540  * changing the packet, we have to issue a new IP identifier.
 541  */
 542
 543  th = skb->h.th;
 544
 545  tcp_size = skb->tail - ((unsigned char*)(th)+tp->tcp_header_len);
 546
 547 if(tcp_size > sk->mss) {
 548 if(tcp_fragment(sk, skb, sk->mss)) {
 549 printk(KERN_DEBUG "tcp_fragment failed\n");
 550 return;
 551 }
 552  tp->packets_out++;
 553 }
 554
 555 if(!th->syn &&
 556  tcp_size < (sk->mss >>1) &&
 557  skb->next != tp->send_head &&
 558  skb->next != (struct sk_buff *)&sk->write_queue)
 559 tcp_retrans_try_collapse(sk, skb);
 560
 561 if(tp->af_specific->rebuild_header(sk, skb)) {
 562 #ifdef TCP_DEBUG
 563 printk(KERN_DEBUG "tcp_do_rebuild_header failed\n");
 564 #endif
 565 break;
 566 }
 567
 568 SOCK_DEBUG(sk,"retransmit sending seq=%x\n", skb->seq);
 569
 570 /* Update ack and window. */
 571  tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
 572  th->window =ntohs(tcp_select_window(sk));
 573 tcp_update_options((__u32 *)(th+1),tp);
 574
 575  size = skb->tail - (unsigned char*) th;
 576  tp->af_specific->send_check(sk, th, size, skb);
 577
 578  skb->when = jiffies;
 579
 580  buff =skb_clone(skb, GFP_ATOMIC);
 581 if(buff == NULL)
 582 break;
 583
 584 skb_set_owner_w(buff, sk);
 585
 586 clear_delayed_acks(sk);
 587  tp->af_specific->queue_xmit(buff);
 588
 589 /* Count retransmissions. */
 590  ct++;
 591  sk->prot->retransmits++;
 592  tcp_statistics.TcpRetransSegs++;
 593
 594 /* Only one retransmit requested. */
 595 if(!all)
 596 break;
 597
 598 /* This should cut it off before we send too many packets. */
 599 if(ct >= tp->snd_cwnd)
 600 break;
 601
 602 /* Advance the pointer. */
 603  tp->retrans_head = skb->next;
 604 if((tp->retrans_head == tp->send_head) ||
 605 (tp->retrans_head == (struct sk_buff *) &sk->write_queue))
 606  tp->retrans_head = NULL;
 607 }
 608 }
 609
 610 /*
 611  * Send a fin.
 612  */
 613
 614 voidtcp_send_fin(struct sock *sk)
 615 {
 616 struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
 617 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 618 struct tcphdr *t1;
 619 struct sk_buff *buff;
 620 int tmp;
 621
 622  buff =sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len,1, GFP_KERNEL);
 623 if(buff == NULL) {
 624 /* FIXME: This is a disaster if it occurs. */
 625 printk(KERN_INFO "tcp_send_fin: Impossible malloc failure");
 626 return;
 627 }
 628
 629 /* Administrivia. */
 630  buff->csum =0;
 631
 632 /* Put in the IP header and routing stuff. */
 633  tmp = tp->af_specific->build_net_header(sk, buff);
 634 if(tmp <0) {
 635 int t;
 636
 637 /* FIXME: We must not throw this out. Eventually we must
 638  * put a FIN into the queue, otherwise it never gets queued.
 639  */
 640 kfree_skb(buff);
 641  tp->write_seq++;
 642  t =del_timer(&sk->timer);
 643 if(t)
 644 add_timer(&sk->timer);
 645 else
 646 tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 647 return;
 648 }
 649
 650 /* We ought to check if the end of the queue is a buffer and
 651  * if so simply add the fin to that buffer, not send it ahead.
 652  */
 653  t1 =(struct tcphdr *)skb_put(buff,tp->tcp_header_len);
 654  buff->h.th = t1;
 655 tcp_build_options((__u32 *)(t1+1),tp);
 656
 657 memcpy(t1, th,sizeof(*t1));
 658  buff->seq = tp->write_seq;
 659  tp->write_seq++;
 660  buff->end_seq = tp->write_seq;
 661  t1->seq =htonl(buff->seq);
 662  t1->ack_seq =htonl(tp->rcv_nxt);
 663  t1->window =htons(tcp_select_window(sk));
 664  t1->fin =1;
 665
 666  tp->af_specific->send_check(sk, t1, tp->tcp_header_len, buff);
 667
 668 /* The fin can only be transmited after the data. */
 669 skb_queue_tail(&sk->write_queue, buff);
 670 if(tp->send_head == NULL) {
 671 /* FIXME: BUG! we need to check if the fin fits into the window
 672  * here. If not we need to do window probing (sick, but true)
 673  */
 674 struct sk_buff *skb1;
 675
 676  tp->packets_out++;
 677  tp->snd_nxt = tp->write_seq;
 678  buff->when = jiffies;
 679
 680  skb1 =skb_clone(buff, GFP_KERNEL);
 681 if(skb1) {
 682 skb_set_owner_w(skb1, sk);
 683  tp->af_specific->queue_xmit(skb1);
 684 }
 685
 686 if(!tcp_timer_is_set(sk, TIME_RETRANS))
 687 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 688 }
 689 }
 690
 691 /* WARNING: This routine must only be called when we have already sent
 692  * a SYN packet that crossed the incoming SYN that caused this routine
 693  * to get called. If this assumption fails then the initial rcv_wnd
 694  * and rcv_wscale values will not be correct.
 695  */
 696 inttcp_send_synack(struct sock *sk)
 697 {
 698 struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp);
 699 struct sk_buff * skb;
 700 struct sk_buff * buff;
 701 struct tcphdr *th;
 702 int tmp;
 703
 704  skb =sock_wmalloc(sk, MAX_SYN_SIZE,1, GFP_ATOMIC);
 705 if(skb == NULL)
 706 return-ENOMEM;
 707
 708  tmp = tp->af_specific->build_net_header(sk, skb);
 709 if(tmp <0) {
 710 kfree_skb(skb);
 711 return tmp;
 712 }
 713
 714  th =(struct tcphdr *)skb_put(skb,sizeof(struct tcphdr));
 715  skb->h.th = th;
 716 memset(th,0,sizeof(struct tcphdr));
 717
 718  th->syn =1;
 719  th->ack =1;
 720
 721  th->source = sk->dummy_th.source;
 722  th->dest = sk->dummy_th.dest;
 723
 724  skb->seq = tp->snd_una;
 725  skb->end_seq = skb->seq +1/* th->syn */;
 726  th->seq =ntohl(skb->seq);
 727
 728 /* This is a resend of a previous SYN, now with an ACK.
 729  * we must reuse the previously offered window.
 730  */
 731  th->window =htons(tp->rcv_wnd);
 732
 733  tp->last_ack_sent = th->ack_seq =htonl(tp->rcv_nxt);
 734
 735  tmp =tcp_syn_build_options(skb, sk->mss,
 736  tp->sack_ok, tp->tstamp_ok,
 737  tp->wscale_ok,tp->rcv_wscale);
 738  skb->csum =0;
 739  th->doff = (sizeof(*th) + tmp)>>2;
 740
 741  tp->af_specific->send_check(sk, th,sizeof(*th)+tmp, skb);
 742
 743 skb_queue_tail(&sk->write_queue, skb);
 744
 745  buff =skb_clone(skb, GFP_ATOMIC);
 746 if(buff) {
 747 skb_set_owner_w(buff, sk);
 748
 749  tp->packets_out++;
 750  skb->when = jiffies;
 751
 752  tp->af_specific->queue_xmit(buff);
 753  tcp_statistics.TcpOutSegs++;
 754
 755 tcp_reset_xmit_timer(sk, TIME_RETRANS, TCP_TIMEOUT_INIT);
 756 }
 757 return0;
 758 }
 759
 760 /*
 761  * Send out a delayed ack, the caller does the policy checking
 762  * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
 763  * for details.
 764  */
 765
 766 voidtcp_send_delayed_ack(struct tcp_opt *tp,int max_timeout)
 767 {
 768 unsigned long timeout;
 769
 770 /* Stay within the limit we were given */
 771  timeout = tp->ato;
 772 if(timeout > max_timeout)
 773  timeout = max_timeout;
 774  timeout += jiffies;
 775
 776 /* Use new timeout only if there wasn't a older one earlier. */
 777 if(!del_timer(&tp->delack_timer) || timeout < tp->delack_timer.expires)
 778  tp->delack_timer.expires = timeout;
 779
 780 add_timer(&tp->delack_timer);
 781 }
 782
 783
 784
 785 /*
 786  * This routine sends an ack and also updates the window.
 787  */
 788
 789 voidtcp_send_ack(struct sock *sk)
 790 {
 791 struct sk_buff *buff;
 792 struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
 793 struct tcphdr *th;
 794 int tmp;
 795
 796 if(sk->zapped)
 797 return;/* We have been reset, we may not send again. */
 798
 799 /* We need to grab some memory, and put together an ack,
 800  * and then put it into the queue to be sent.
 801  * FIXME: is it better to waste memory here and use a
 802  * constant sized ACK?
 803  */
 804  buff =sock_wmalloc(sk, BASE_ACK_SIZE + tp->tcp_header_len,1, GFP_ATOMIC);
 805 if(buff == NULL) {
 806 /* Force it to send an ack. We don't have to do this
 807  * (ACK is unreliable) but it's much better use of
 808  * bandwidth on slow links to send a spare ack than
 809  * resend packets.
 810  */
 811 tcp_send_delayed_ack(tp, HZ/2);
 812 return;
 813 }
 814
 815 clear_delayed_acks(sk);
 816
 817 /* Assemble a suitable TCP frame. */
 818  buff->csum =0;
 819
 820 /* Put in the IP header and routing stuff. */
 821  tmp = tp->af_specific->build_net_header(sk, buff);
 822 if(tmp <0) {
 823 kfree_skb(buff);
 824 return;
 825 }
 826
 827  th = (struct tcphdr *)skb_put(buff,tp->tcp_header_len);
 828 memcpy(th, &sk->dummy_th,sizeof(struct tcphdr));
 829 tcp_build_options((__u32 *)(th+1),tp);
 830
 831 /* Swap the send and the receive. */
 832  th->window =ntohs(tcp_select_window(sk));
 833  th->seq =ntohl(tp->snd_nxt);
 834  tp->last_ack_sent = tp->rcv_nxt;
 835  th->ack_seq =htonl(tp->rcv_nxt);
 836
 837 /* Fill in the packet and send it. */
 838  tp->af_specific->send_check(sk, th, tp->tcp_header_len, buff);
 839  tp->af_specific->queue_xmit(buff);
 840  tcp_statistics.TcpOutSegs++;
 841 }
 842
 843 /*
 844  * This routine sends a packet with an out of date sequence
 845  * number. It assumes the other end will try to ack it.
 846  */
 847
 848 voidtcp_write_wakeup(struct sock *sk)
 849 {
 850 struct sk_buff *buff, *skb;
 851 struct tcphdr *t1;
 852 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 853 int tmp;
 854
 855 if(sk->zapped)
 856 return;/* After a valid reset we can send no more. */
 857
 858 /* Write data can still be transmitted/retransmitted in the
 859  * following states. If any other state is encountered, return.
 860  * [listen/close will never occur here anyway]
 861  */
 862 if((1<< sk->state) &
 863 ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1|TCPF_LAST_ACK|TCPF_CLOSING))
 864 return;
 865
 866 if(before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && (skb=tp->send_head)) {
 867 struct tcphdr *th;
 868 unsigned long win_size;
 869
 870 /* We are probing the opening of a window
 871  * but the window size is != 0
 872  * must have been a result SWS avoidance ( sender )
 873  */
 874  win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
 875 if(win_size < skb->end_seq - skb->seq) {
 876 if(tcp_fragment(sk, skb, win_size)) {
 877 printk(KERN_DEBUG "tcp_write_wakeup: "
 878 "fragment failed\n");
 879 return;
 880 }
 881 }
 882
 883  th = skb->h.th;
 884  tp->af_specific->send_check(sk, th, th->doff *4+ win_size, skb);
 885  buff =skb_clone(skb, GFP_ATOMIC);
 886 if(buff == NULL)
 887 return;
 888
 889 skb_set_owner_w(buff, sk);
 890  tp->packets_out++;
 891
 892 clear_delayed_acks(sk);
 893
 894 if(!tcp_timer_is_set(sk, TIME_RETRANS))
 895 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 896
 897  skb->when = jiffies;
 898 update_send_head(sk);
 899  tp->snd_nxt = skb->end_seq;
 900 }else{
 901  buff =sock_wmalloc(sk, MAX_ACK_SIZE,1, GFP_ATOMIC);
 902 if(buff == NULL)
 903 return;
 904
 905  buff->csum =0;
 906
 907 /* Put in the IP header and routing stuff. */
 908  tmp = tp->af_specific->build_net_header(sk, buff);
 909 if(tmp <0) {
 910 kfree_skb(buff);
 911 return;
 912 }
 913
 914  t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
 915 memcpy(t1,(void*) &sk->dummy_th,sizeof(*t1));
 916 /* FIXME: should zero window probes have SACK and/or TIMESTAMP data?
 917  * If so we have to tack them on here.
 918  */
 919
 920 /* Use a previous sequence.
 921  * This should cause the other end to send an ack.
 922  */
 923
 924  t1->seq =htonl(tp->snd_nxt-1);
 925  t1->ack_seq =htonl(tp->rcv_nxt);
 926  t1->window =htons(tcp_select_window(sk));
 927
 928 /* Value from dummy_th may be larger. */
 929  t1->doff =sizeof(struct tcphdr)/4;
 930
 931  tp->af_specific->send_check(sk, t1,sizeof(*t1), buff);
 932 }
 933
 934 /* Send it. */
 935  tp->af_specific->queue_xmit(buff);
 936  tcp_statistics.TcpOutSegs++;
 937 }
 938
 939 /*
 940  * A window probe timeout has occurred.
 941  * If window is not closed send a partial packet
 942  * else a zero probe.
 943  */
 944
 945 voidtcp_send_probe0(struct sock *sk)
 946 {
 947 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 948
 949 tcp_write_wakeup(sk);
 950  tp->pending = TIME_PROBE0;
 951  tp->backoff++;
 952  tp->probes_out++;
 953 tcp_reset_xmit_timer(sk, TIME_PROBE0,
 954 min(tp->rto << tp->backoff,120*HZ));
 955 }