2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 6 * Implementation of the Transmission Control Protocol(TCP). 8 * Version: $Id: tcp_output.c,v 1.129 2000/11/28 17:04:10 davem Exp $ 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * Corey Minyard <wf-rch!minyard@relay.EU.net> 14 * Florian La Roche, <flla@stud.uni-sb.de> 15 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> 16 * Linus Torvalds, <torvalds@cs.helsinki.fi> 17 * Alan Cox, <gw4pts@gw4pts.ampr.org> 18 * Matthew Dillon, <dillon@apollo.west.oic.com> 19 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 20 * Jorge Cwik, <jorge@laser.satlink.net> 24 * Changes: Pedro Roque : Retransmit queue handled by TCP. 25 * : Fragmentation on mtu decrease 26 * : Segment collapse on retransmit 29 * Linus Torvalds : send_delayed_ack 30 * David S. Miller : Charge memory using the right skb 31 * during syn/ack processing. 32 * David S. Miller : Output engine completely rewritten. 33 * Andrea Arcangeli: SYNACK carry ts_recent in tsecr. 34 * Cacophonix Gaul : draft-minshall-nagle-01 35 * J Hadi Salim : ECN support 41 #include <linux/smp_lock.h> 43 /* People can turn this off for buggy TCP's found in printers etc. */ 44 int sysctl_tcp_retrans_collapse
=1; 47 voidupdate_send_head(struct sock
*sk
,struct tcp_opt
*tp
,struct sk_buff
*skb
) 49 tp
->send_head
= skb
->next
; 50 if(tp
->send_head
== (struct sk_buff
*) &sk
->write_queue
) 52 tp
->snd_nxt
=TCP_SKB_CB(skb
)->end_seq
; 53 if(tp
->packets_out
++ ==0) 54 tcp_reset_xmit_timer(sk
, TCP_TIME_RETRANS
, tp
->rto
); 57 /* SND.NXT, if window was not shrunk. 58 * If window has been shrunk, what should we make? It is not clear at all. 59 * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( 60 * Anything in between SND.UNA...SND.UNA+SND.WND also can be already 61 * invalid. OK, let's make this for now: 63 static __inline__ __u32
tcp_acceptable_seq(struct sock
*sk
,struct tcp_opt
*tp
) 65 if(!before(tp
->snd_una
+tp
->snd_wnd
, tp
->snd_nxt
)) 68 return tp
->snd_una
+tp
->snd_wnd
; 71 /* Calculate mss to advertise in SYN segment. 72 * RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that: 74 * 1. It is independent of path mtu. 75 * 2. Ideally, it is maximal possible segment size i.e. 65535-40. 76 * 3. For IPv4 it is reasonable to calculate it from maximal MTU of 77 * attached devices, because some buggy hosts are confused by 79 * 4. We do not make 3, we advertise MSS, calculated from first 80 * hop device mtu, but allow to raise it to ip_rt_min_advmss. 81 * This may be overriden via information stored in routing table. 82 * 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible, 83 * probably even Jumbo". 85 static __u16
tcp_advertise_mss(struct sock
*sk
) 87 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 88 struct dst_entry
*dst
=__sk_dst_get(sk
); 91 if(dst
&& dst
->advmss
< mss
) { 99 /* RFC2861. Reset CWND after idle period longer RTO to "restart window". 100 * This is the first part of cwnd validation mechanism. */ 101 static voidtcp_cwnd_restart(struct tcp_opt
*tp
) 103 s32 delta
= tcp_time_stamp
- tp
->lsndtime
; 104 u32 restart_cwnd
=tcp_init_cwnd(tp
); 105 u32 cwnd
= tp
->snd_cwnd
; 107 tp
->snd_ssthresh
=tcp_current_ssthresh(tp
); 108 restart_cwnd
=min(restart_cwnd
, cwnd
); 110 while((delta
-= tp
->rto
) >0&& cwnd
> restart_cwnd
) 112 tp
->snd_cwnd
=max(cwnd
, restart_cwnd
); 113 tp
->snd_cwnd_stamp
= tcp_time_stamp
; 114 tp
->snd_cwnd_used
=0; 117 static __inline__
voidtcp_event_data_sent(struct tcp_opt
*tp
,struct sk_buff
*skb
) 119 u32 now
= tcp_time_stamp
; 121 if(!tp
->packets_out
&& (s32
)(now
- tp
->lsndtime
) > tp
->rto
) 122 tcp_cwnd_restart(tp
); 126 /* If it is a reply for ato after last received 127 * packet, enter pingpong mode. 129 if((u32
)(now
- tp
->ack
.lrcvtime
) < tp
->ack
.ato
) 133 static __inline__
voidtcp_event_ack_sent(struct sock
*sk
) 135 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 137 tcp_dec_quickack_mode(tp
); 138 tcp_clear_xmit_timer(sk
, TCP_TIME_DACK
); 141 /* Chose a new window to advertise, update state in tcp_opt for the 142 * socket, and return result with RFC1323 scaling applied. The return 143 * value can be stuffed directly into th->window for an outgoing 146 static __inline__ u16
tcp_select_window(struct sock
*sk
) 148 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 149 u32 cur_win
=tcp_receive_window(tp
); 150 u32 new_win
=__tcp_select_window(sk
); 152 /* Never shrink the offered window */ 153 if(new_win
< cur_win
) { 154 /* Danger Will Robinson! 155 * Don't update rcv_wup/rcv_wnd here or else 156 * we will not be able to advertise a zero 157 * window in time. --DaveM 159 * Relax Will Robinson. 163 tp
->rcv_wnd
= new_win
; 164 tp
->rcv_wup
= tp
->rcv_nxt
; 166 /* RFC1323 scaling applied */ 167 new_win
>>= tp
->rcv_wscale
; 169 #ifdef TCP_FORMAL_WINDOW 171 /* If we advertise zero window, disable fast path. */ 173 }else if(cur_win
==0&& tp
->pred_flags
==0&& 174 skb_queue_len(&tp
->out_of_order_queue
) ==0&& 176 /* If we open zero window, enable fast path. 177 Without this it will be open by the first data packet, 178 it is too late to merge checksumming to copy. 180 tcp_fast_path_on(tp
); 188 /* This routine actually transmits TCP packets queued in by 189 * tcp_do_sendmsg(). This is used by both the initial 190 * transmission and possible later retransmissions. 191 * All SKB's seen here are completely headerless. It is our 192 * job to build the TCP header, and pass the packet down to 193 * IP so it can do the same plus pass the packet off to the 196 * We are working here with either a clone of the original 197 * SKB, or a fresh unique copy made by the retransmit engine. 199 inttcp_transmit_skb(struct sock
*sk
,struct sk_buff
*skb
) 202 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 203 struct tcp_skb_cb
*tcb
=TCP_SKB_CB(skb
); 204 int tcp_header_size
= tp
->tcp_header_len
; 209 #define SYSCTL_FLAG_TSTAMPS 0x1 210 #define SYSCTL_FLAG_WSCALE 0x2 211 #define SYSCTL_FLAG_SACK 0x4 214 if(tcb
->flags
& TCPCB_FLAG_SYN
) { 215 tcp_header_size
=sizeof(struct tcphdr
) + TCPOLEN_MSS
; 216 if(sysctl_tcp_timestamps
) { 217 tcp_header_size
+= TCPOLEN_TSTAMP_ALIGNED
; 218 sysctl_flags
|= SYSCTL_FLAG_TSTAMPS
; 220 if(sysctl_tcp_window_scaling
) { 221 tcp_header_size
+= TCPOLEN_WSCALE_ALIGNED
; 222 sysctl_flags
|= SYSCTL_FLAG_WSCALE
; 224 if(sysctl_tcp_sack
) { 225 sysctl_flags
|= SYSCTL_FLAG_SACK
; 226 if(!(sysctl_flags
& SYSCTL_FLAG_TSTAMPS
)) 227 tcp_header_size
+= TCPOLEN_SACKPERM_ALIGNED
; 229 }else if(tp
->eff_sacks
) { 230 /* A SACK is 2 pad bytes, a 2 byte header, plus 231 * 2 32-bit sequence numbers for each SACK block. 233 tcp_header_size
+= (TCPOLEN_SACK_BASE_ALIGNED
+ 234 (tp
->eff_sacks
* TCPOLEN_SACK_PERBLOCK
)); 236 th
= (struct tcphdr
*)skb_push(skb
, tcp_header_size
); 238 skb_set_owner_w(skb
, sk
); 240 /* Build TCP header and checksum it. */ 241 th
->source
= sk
->sport
; 242 th
->dest
= sk
->dport
; 243 th
->seq
=htonl(tcb
->seq
); 244 th
->ack_seq
=htonl(tp
->rcv_nxt
); 245 *(((__u16
*)th
) +6) =htons(((tcp_header_size
>>2) <<12) | tcb
->flags
); 246 if(tcb
->flags
& TCPCB_FLAG_SYN
) { 247 /* RFC1323: The window in SYN & SYN/ACK segments 250 th
->window
=htons(tp
->rcv_wnd
); 252 th
->window
=htons(tcp_select_window(sk
)); 258 between(tp
->snd_up
, tcb
->seq
+1, tcb
->seq
+0xFFFF)) { 259 th
->urg_ptr
=htons(tp
->snd_up
-tcb
->seq
); 263 if(tcb
->flags
& TCPCB_FLAG_SYN
) { 264 tcp_syn_build_options((__u32
*)(th
+1), 265 tcp_advertise_mss(sk
), 266 (sysctl_flags
& SYSCTL_FLAG_TSTAMPS
), 267 (sysctl_flags
& SYSCTL_FLAG_SACK
), 268 (sysctl_flags
& SYSCTL_FLAG_WSCALE
), 273 tcp_build_and_update_options((__u32
*)(th
+1), 276 TCP_ECN_send(sk
, tp
, skb
, tcp_header_size
); 278 tp
->af_specific
->send_check(sk
, th
, skb
->len
, skb
); 280 if(tcb
->flags
& TCPCB_FLAG_ACK
) 281 tcp_event_ack_sent(sk
); 283 if(skb
->len
!= tcp_header_size
) 284 tcp_event_data_sent(tp
, skb
); 286 TCP_INC_STATS(TcpOutSegs
); 288 err
= tp
->af_specific
->queue_xmit(skb
); 294 /* NET_XMIT_CN is special. It does not guarantee, 295 * that this packet is lost. It tells that device 296 * is about to start to drop packets or already 297 * drops some packets of the same priority and 298 * invokes us to send less aggressively. 300 return err
== NET_XMIT_CN
?0: err
; 303 #undef SYSCTL_FLAG_TSTAMPS 304 #undef SYSCTL_FLAG_WSCALE 305 #undef SYSCTL_FLAG_SACK 309 /* This is the main buffer sending routine. We queue the buffer 310 * and decide whether to queue or transmit now. 312 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, 313 * otherwise socket can stall. 315 voidtcp_send_skb(struct sock
*sk
,struct sk_buff
*skb
,int force_queue
,unsigned cur_mss
) 317 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 319 /* Advance write_seq and place onto the write_queue. */ 320 tp
->write_seq
=TCP_SKB_CB(skb
)->end_seq
; 321 __skb_queue_tail(&sk
->write_queue
, skb
); 322 tcp_charge_skb(sk
, skb
); 324 if(!force_queue
&& tp
->send_head
== NULL
&&tcp_snd_test(tp
, skb
, cur_mss
, tp
->nonagle
)) { 325 /* Send it out now. */ 326 TCP_SKB_CB(skb
)->when
= tcp_time_stamp
; 327 if(tcp_transmit_skb(sk
,skb_clone(skb
, sk
->allocation
)) ==0) { 328 tp
->snd_nxt
=TCP_SKB_CB(skb
)->end_seq
; 329 tcp_minshall_update(tp
, cur_mss
, skb
); 330 if(tp
->packets_out
++ ==0) 331 tcp_reset_xmit_timer(sk
, TCP_TIME_RETRANS
, tp
->rto
); 335 /* Queue it, remembering where we must start sending. */ 336 if(tp
->send_head
== NULL
) 340 /* Function to create two new TCP segments. Shrinks the given segment 341 * to the specified size and appends a new segment with the rest of the 342 * packet to the list. This won't be called frequently, I hope. 343 * Remember, these are still headerless SKBs at this point. 345 static inttcp_fragment(struct sock
*sk
,struct sk_buff
*skb
, u32 len
) 347 struct tcp_opt
*tp
= &sk
->tp_pinfo
.af_tcp
; 348 struct sk_buff
*buff
; 349 int nsize
= skb
->len
- len
; 352 /* Get a new skb... force flag on. */ 353 buff
=tcp_alloc_skb(sk
, nsize
+ MAX_TCP_HEADER
, GFP_ATOMIC
); 355 return-ENOMEM
;/* We'll just try again later. */ 356 tcp_charge_skb(sk
, buff
); 358 /* Reserve space for headers. */ 359 skb_reserve(buff
, MAX_TCP_HEADER
); 361 /* Correct the sequence numbers. */ 362 TCP_SKB_CB(buff
)->seq
=TCP_SKB_CB(skb
)->seq
+ len
; 363 TCP_SKB_CB(buff
)->end_seq
=TCP_SKB_CB(skb
)->end_seq
; 365 /* PSH and FIN should only be set in the second packet. */ 366 flags
=TCP_SKB_CB(skb
)->flags
; 367 TCP_SKB_CB(skb
)->flags
= flags
& ~(TCPCB_FLAG_FIN
|TCPCB_FLAG_PSH
); 368 TCP_SKB_CB(buff
)->flags
= flags
; 369 TCP_SKB_CB(buff
)->sacked
=TCP_SKB_CB(skb
)->sacked
&(TCPCB_LOST
|TCPCB_EVER_RETRANS
|TCPCB_AT_TAIL
); 370 if(TCP_SKB_CB(buff
)->sacked
&TCPCB_LOST
) { 374 TCP_SKB_CB(buff
)->sacked
&= ~TCPCB_AT_TAIL
; 376 /* Copy and checksum data tail into the new buffer. */ 377 buff
->csum
=csum_partial_copy_nocheck(skb
->data
+ len
,skb_put(buff
, nsize
), 380 /* This takes care of the FIN sequence number too. */ 381 TCP_SKB_CB(skb
)->end_seq
=TCP_SKB_CB(buff
)->seq
; 384 /* Rechecksum original buffer. */ 385 skb
->csum
=csum_partial(skb
->data
, skb
->len
,0); 387 /* Looks stupid, but our code really uses when of 388 * skbs, which it never sent before. --ANK 390 TCP_SKB_CB(buff
)->when
=TCP_SKB_CB(skb
)->when
; 392 /* Link BUFF into the send queue. */ 393 __skb_append(skb
, buff
); 398 /* This function synchronize snd mss to current pmtu/exthdr set. 400 tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts 401 for TCP options, but includes only bare TCP header. 403 tp->mss_clamp is mss negotiated at connection setup. 404 It is minumum of user_mss and mss received with SYN. 405 It also does not include TCP options. 407 tp->pmtu_cookie is last pmtu, seen by this function. 409 tp->mss_cache is current effective sending mss, including 410 all tcp options except for SACKs. It is evaluated, 411 taking into account current pmtu, but never exceeds 414 NOTE1. rfc1122 clearly states that advertised MSS 415 DOES NOT include either tcp or ip options. 417 NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside 418 this function. --ANK (980731) 421 inttcp_sync_mss(struct sock
*sk
, u32 pmtu
) 423 struct tcp_opt
*tp
= &sk
->tp_pinfo
.af_tcp
; 426 /* Calculate base mss without TCP options: 427 It is MMS_S - sizeof(tcphdr) of rfc1122 430 mss_now
= pmtu
- tp
->af_specific
->net_header_len
-sizeof(struct tcphdr
); 432 /* Clamp it (mss_clamp does not include tcp options) */ 433 if(mss_now
> tp
->mss_clamp
) 434 mss_now
= tp
->mss_clamp
; 436 /* Now subtract optional transport overhead */ 437 mss_now
-= tp
->ext_header_len
; 439 /* Then reserve room for full set of TCP options and 8 bytes of data */ 443 /* Now subtract TCP options size, not including SACKs */ 444 mss_now
-= tp
->tcp_header_len
-sizeof(struct tcphdr
); 446 /* Bound mss with half of window */ 447 if(tp
->max_window
&& mss_now
> (tp
->max_window
>>1)) 448 mss_now
=max((tp
->max_window
>>1),68- tp
->tcp_header_len
); 450 /* And store cached results */ 451 tp
->pmtu_cookie
= pmtu
; 452 tp
->mss_cache
= mss_now
; 457 /* This routine writes packets to the network. It advances the 458 * send_head. This happens as incoming acks open up the remote 461 * Returns 1, if no segments are in flight and we have queued segments, but 462 * cannot send anything now because of SWS or another problem. 464 inttcp_write_xmit(struct sock
*sk
) 466 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 467 unsigned int mss_now
; 469 /* If we are closed, the bytes will have to remain here. 470 * In time closedown will finish, we empty the write queue and all 473 if(sk
->state
!= TCP_CLOSE
) { 477 /* Account for SACKS, we may need to fragment due to this. 478 * It is just like the real MSS changing on us midstream. 479 * We also handle things correctly when the user adds some 480 * IP options mid-stream. Silly to do, but cover it. 482 mss_now
=tcp_current_mss(sk
); 484 while((skb
= tp
->send_head
) && 485 tcp_snd_test(tp
, skb
, mss_now
,tcp_skb_is_last(sk
, skb
) ? tp
->nonagle
:1)) { 486 if(skb
->len
> mss_now
) { 487 if(tcp_fragment(sk
, skb
, mss_now
)) 491 TCP_SKB_CB(skb
)->when
= tcp_time_stamp
; 492 if(tcp_transmit_skb(sk
,skb_clone(skb
, GFP_ATOMIC
))) 494 /* Advance the send_head. This one is sent out. */ 495 update_send_head(sk
, tp
, skb
); 496 tcp_minshall_update(tp
, mss_now
, skb
); 501 tcp_cwnd_validate(sk
, tp
); 505 return!tp
->packets_out
&& tp
->send_head
; 510 /* This function returns the amount that we can raise the 511 * usable window based on the following constraints 513 * 1. The window can never be shrunk once it is offered (RFC 793) 514 * 2. We limit memory per socket 517 * "the suggested [SWS] avoidance algorithm for the receiver is to keep 518 * RECV.NEXT + RCV.WIN fixed until: 519 * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" 521 * i.e. don't raise the right edge of the window until you can raise 522 * it at least MSS bytes. 524 * Unfortunately, the recommended algorithm breaks header prediction, 525 * since header prediction assumes th->window stays fixed. 527 * Strictly speaking, keeping th->window fixed violates the receiver 528 * side SWS prevention criteria. The problem is that under this rule 529 * a stream of single byte packets will cause the right side of the 530 * window to always advance by a single byte. 532 * Of course, if the sender implements sender side SWS prevention 533 * then this will not be a problem. 535 * BSD seems to make the following compromise: 537 * If the free space is less than the 1/4 of the maximum 538 * space available and the free space is less than 1/2 mss, 539 * then set the window to 0. 540 * [ Actually, bsd uses MSS and 1/4 of maximal _window_ ] 541 * Otherwise, just prevent the window from shrinking 542 * and from being larger than the largest representable value. 544 * This prevents incremental opening of the window in the regime 545 * where TCP is limited by the speed of the reader side taking 546 * data out of the TCP receive queue. It does nothing about 547 * those cases where the window is constrained on the sender side 548 * because the pipeline is full. 550 * BSD also seems to "accidentally" limit itself to windows that are a 551 * multiple of MSS, at least until the free space gets quite small. 552 * This would appear to be a side effect of the mbuf implementation. 553 * Combining these two algorithms results in the observed behavior 554 * of having a fixed window size at almost all times. 556 * Below we obtain similar behavior by forcing the offered window to 557 * a multiple of the mss when it is feasible to do so. 559 * Note, we don't "adjust" for TIMESTAMP or SACK option bytes. 560 * Regular options like TIMESTAMP are taken into account. 562 u32
__tcp_select_window(struct sock
*sk
) 564 struct tcp_opt
*tp
= &sk
->tp_pinfo
.af_tcp
; 565 /* MSS for the peer's data. Previous verions used mss_clamp 566 * here. I don't know if the value based on our guesses 567 * of peer's MSS is better for the performance. It's more correct 568 * but may be worse for the performance because of rcv_mss 569 * fluctuations. --SAW 1998/11/1 571 unsigned int mss
= tp
->ack
.rcv_mss
; 575 /* Sometimes free_space can be < 0. */ 576 free_space
=tcp_space(sk
); 577 if(tp
->window_clamp
< mss
) 578 mss
= tp
->window_clamp
; 580 if(free_space
< (int)min(tp
->window_clamp
,tcp_full_space(sk
)) /2) { 583 if(tcp_memory_pressure
) 584 tp
->rcv_ssthresh
=min(tp
->rcv_ssthresh
,4*tp
->advmss
); 586 if(free_space
< ((int)mss
)) 590 if(free_space
> tp
->rcv_ssthresh
) 591 free_space
= tp
->rcv_ssthresh
; 593 /* Get the largest window that is a nice multiple of mss. 594 * Window clamp already applied above. 595 * If our current window offering is within 1 mss of the 596 * free space we just keep it. This prevents the divide 597 * and multiply from happening most of the time. 598 * We also don't do any window rounding when the free space 601 window
= tp
->rcv_wnd
; 602 if((((int) window
) <= (free_space
- ((int) mss
))) || 603 (((int) window
) > free_space
)) 604 window
= (((unsigned int) free_space
)/mss
)*mss
; 609 /* Attempt to collapse two adjacent SKB's during retransmission. */ 610 static voidtcp_retrans_try_collapse(struct sock
*sk
,struct sk_buff
*skb
,int mss_now
) 612 struct tcp_opt
*tp
= &sk
->tp_pinfo
.af_tcp
; 613 struct sk_buff
*next_skb
= skb
->next
; 615 /* The first test we must make is that neither of these two 616 * SKB's are still referenced by someone else. 618 if(!skb_cloned(skb
) && !skb_cloned(next_skb
)) { 619 int skb_size
= skb
->len
, next_skb_size
= next_skb
->len
; 620 u16 flags
=TCP_SKB_CB(skb
)->flags
; 622 /* Also punt if next skb has been SACK'd. */ 623 if(TCP_SKB_CB(next_skb
)->sacked
& TCPCB_SACKED_ACKED
) 626 /* Next skb is out of window. */ 627 if(after(TCP_SKB_CB(next_skb
)->end_seq
, tp
->snd_una
+tp
->snd_wnd
)) 630 /* Punt if not enough space exists in the first SKB for 631 * the data in the second, or the total combined payload 632 * would exceed the MSS. 634 if((next_skb_size
>skb_tailroom(skb
)) || 635 ((skb_size
+ next_skb_size
) > mss_now
)) 638 /* Ok. We will be able to collapse the packet. */ 639 __skb_unlink(next_skb
, next_skb
->list
); 642 /* Must copy and rechecksum all data. */ 643 memcpy(skb_put(skb
, next_skb_size
), next_skb
->data
, next_skb_size
); 644 skb
->csum
=csum_partial(skb
->data
, skb
->len
,0); 646 /* Optimize, actually we could also combine next_skb->csum 647 * to skb->csum using a single add w/carry operation too. 649 skb
->csum
=csum_partial_copy_nocheck(next_skb
->data
, 650 skb_put(skb
, next_skb_size
), 651 next_skb_size
, skb
->csum
); 654 /* Update sequence range on original skb. */ 655 TCP_SKB_CB(skb
)->end_seq
=TCP_SKB_CB(next_skb
)->end_seq
; 657 /* Merge over control information. */ 658 flags
|=TCP_SKB_CB(next_skb
)->flags
;/* This moves PSH/FIN etc. over */ 659 TCP_SKB_CB(skb
)->flags
= flags
; 661 /* All done, get rid of second SKB and account for it so 662 * packet counting does not break. 664 TCP_SKB_CB(skb
)->sacked
|=TCP_SKB_CB(next_skb
)->sacked
&(TCPCB_EVER_RETRANS
|TCPCB_AT_TAIL
); 665 if(TCP_SKB_CB(next_skb
)->sacked
&TCPCB_SACKED_RETRANS
) 667 if(TCP_SKB_CB(next_skb
)->sacked
&TCPCB_LOST
) { 671 if(!tp
->sack_ok
&& tp
->sacked_out
) { 672 /* Reno case is special. Sigh... */ 676 /* Not quite right: it can be > snd.fack, but 677 * it is better to underestimate fackets. 681 tcp_free_skb(sk
, next_skb
); 686 /* Do a simple retransmit without using the backoff mechanisms in 687 * tcp_timer. This is used for path mtu discovery. 688 * The socket is already locked here. 690 voidtcp_simple_retransmit(struct sock
*sk
) 692 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 694 unsigned int mss
=tcp_current_mss(sk
); 697 for_retrans_queue(skb
, sk
, tp
) { 699 !(TCP_SKB_CB(skb
)->sacked
&TCPCB_SACKED_ACKED
)) { 700 if(TCP_SKB_CB(skb
)->sacked
&TCPCB_SACKED_RETRANS
) { 701 TCP_SKB_CB(skb
)->sacked
&= ~TCPCB_SACKED_RETRANS
; 704 if(!(TCP_SKB_CB(skb
)->sacked
&TCPCB_LOST
)) { 705 TCP_SKB_CB(skb
)->sacked
|= TCPCB_LOST
; 715 tp
->left_out
= tp
->sacked_out
+ tp
->lost_out
; 717 /* Don't muck with the congestion window here. 718 * Reason is that we do not increase amount of _data_ 719 * in network, but units changed and effective 720 * cwnd/ssthresh really reduced now. 722 if(tp
->ca_state
!= TCP_CA_Loss
) { 723 tp
->high_seq
= tp
->snd_nxt
; 724 tp
->snd_ssthresh
=tcp_current_ssthresh(tp
); 725 tp
->prior_ssthresh
=0; 727 tp
->ca_state
= TCP_CA_Loss
; 729 tcp_xmit_retransmit_queue(sk
); 732 /* This retransmits one SKB. Policy decisions and retransmit queue 733 * state updates are done by the caller. Returns non-zero if an 734 * error occurred which prevented the send. 736 inttcp_retransmit_skb(struct sock
*sk
,struct sk_buff
*skb
) 738 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 739 unsigned int cur_mss
=tcp_current_mss(sk
); 742 /* Do not sent more than we queued. 1/4 is reserved for possible 743 * copying overhead: frgagmentation, tunneling, mangling etc. 745 if(atomic_read(&sk
->wmem_alloc
) >min(sk
->wmem_queued
+(sk
->wmem_queued
>>2),sk
->sndbuf
)) 748 if(skb
->len
> cur_mss
) { 749 if(tcp_fragment(sk
, skb
, cur_mss
)) 750 return-ENOMEM
;/* We'll try again later. */ 752 /* New SKB created, account for it. */ 756 /* Collapse two adjacent packets if worthwhile and we can. */ 757 if(!(TCP_SKB_CB(skb
)->flags
& TCPCB_FLAG_SYN
) && 758 (skb
->len
< (cur_mss
>>1)) && 759 (skb
->next
!= tp
->send_head
) && 760 (skb
->next
!= (struct sk_buff
*)&sk
->write_queue
) && 761 (sysctl_tcp_retrans_collapse
!=0)) 762 tcp_retrans_try_collapse(sk
, skb
, cur_mss
); 764 if(tp
->af_specific
->rebuild_header(sk
)) 765 return-EHOSTUNREACH
;/* Routing failure or similar. */ 767 /* Some Solaris stacks overoptimize and ignore the FIN on a 768 * retransmit when old data is attached. So strip it off 769 * since it is cheap to do so and saves bytes on the network. 772 (TCP_SKB_CB(skb
)->flags
& TCPCB_FLAG_FIN
) && 773 tp
->snd_una
== (TCP_SKB_CB(skb
)->end_seq
-1)) { 774 TCP_SKB_CB(skb
)->seq
=TCP_SKB_CB(skb
)->end_seq
-1; 779 /* Make a copy, if the first transmission SKB clone we made 780 * is still in somebody's hands, else make a clone. 782 TCP_SKB_CB(skb
)->when
= tcp_time_stamp
; 784 err
=tcp_transmit_skb(sk
, (skb_cloned(skb
) ? 785 skb_copy(skb
, GFP_ATOMIC
): 786 skb_clone(skb
, GFP_ATOMIC
))); 789 /* Update global TCP statistics. */ 790 TCP_INC_STATS(TcpRetransSegs
); 792 #if FASTRETRANS_DEBUG > 0 793 if(TCP_SKB_CB(skb
)->sacked
&TCPCB_SACKED_RETRANS
) { 795 printk(KERN_DEBUG
"retrans_out leaked.\n"); 798 TCP_SKB_CB(skb
)->sacked
|= TCPCB_RETRANS
; 801 /* Save stamp of the first retransmit. */ 802 if(!tp
->retrans_stamp
) 803 tp
->retrans_stamp
=TCP_SKB_CB(skb
)->when
; 807 /* snd_nxt is stored to detect loss of retransmitted segment, 808 * see tcp_input.c tcp_sacktag_write_queue(). 810 TCP_SKB_CB(skb
)->ack_seq
= tp
->snd_nxt
; 815 /* This gets called after a retransmit timeout, and the initially 816 * retransmitted data is acknowledged. It tries to continue 817 * resending the rest of the retransmit queue, until either 818 * we've sent it all or the congestion window limit is reached. 819 * If doing SACK, the first ACK which comes back for a timeout 820 * based retransmit packet might feed us FACK information again. 821 * If so, we use it to avoid unnecessarily retransmissions. 823 voidtcp_xmit_retransmit_queue(struct sock
*sk
) 825 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 827 int packet_cnt
= tp
->lost_out
; 829 /* First pass: retransmit lost packets. */ 831 for_retrans_queue(skb
, sk
, tp
) { 832 __u8 sacked
=TCP_SKB_CB(skb
)->sacked
; 834 if(tcp_packets_in_flight(tp
) >= tp
->snd_cwnd
) 837 if(sacked
&TCPCB_LOST
) { 838 if(!(sacked
&(TCPCB_SACKED_ACKED
|TCPCB_SACKED_RETRANS
))) { 839 if(tcp_retransmit_skb(sk
, skb
)) 841 if(tp
->ca_state
!= TCP_CA_Loss
) 842 NET_INC_STATS_BH(TCPFastRetrans
); 844 NET_INC_STATS_BH(TCPSlowStartRetrans
); 846 if(skb
==skb_peek(&sk
->write_queue
)) 847 tcp_reset_xmit_timer(sk
, TCP_TIME_RETRANS
, tp
->rto
); 856 /* OK, demanded retransmission is finished. */ 858 /* Forward retransmissions are possible only during Recovery. */ 859 if(tp
->ca_state
!= TCP_CA_Recovery
) 862 /* No forward retransmissions in Reno are possible. */ 866 /* Yeah, we have to make difficult choice between forward transmission 867 * and retransmission... Both ways have their merits... 869 * For now we do not retrnamsit anything, while we have some new 873 if(tcp_may_send_now(sk
, tp
)) 878 for_retrans_queue(skb
, sk
, tp
) { 879 if(++packet_cnt
> tp
->fackets_out
) 882 if(tcp_packets_in_flight(tp
) >= tp
->snd_cwnd
) 885 if(TCP_SKB_CB(skb
)->sacked
& TCPCB_TAGBITS
) 888 /* Ok, retransmit it. */ 889 if(tcp_retransmit_skb(sk
, skb
)) 892 if(skb
==skb_peek(&sk
->write_queue
)) 893 tcp_reset_xmit_timer(sk
, TCP_TIME_RETRANS
, tp
->rto
); 895 NET_INC_STATS_BH(TCPForwardRetrans
); 900 /* Send a fin. The caller locks the socket for us. This cannot be 901 * allowed to fail queueing a FIN frame under any circumstances. 903 voidtcp_send_fin(struct sock
*sk
) 905 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 906 struct sk_buff
*skb
=skb_peek_tail(&sk
->write_queue
); 907 unsigned int mss_now
; 909 /* Optimization, tack on the FIN if we have a queue of 910 * unsent frames. But be careful about outgoing SACKS 913 mss_now
=tcp_current_mss(sk
); 915 /* Please, find seven differences of 2.3.33 and loook 916 * what I broke here. 8) --ANK 919 if(tp
->send_head
!= NULL
) { 920 /* tcp_write_xmit() takes care of the rest. */ 921 TCP_SKB_CB(skb
)->flags
|= TCPCB_FLAG_FIN
; 922 TCP_SKB_CB(skb
)->end_seq
++; 925 /* Special case to avoid Nagle bogosity. If this 926 * segment is the last segment, and it was queued 927 * due to Nagle/SWS-avoidance, send it out now. 929 if(tp
->send_head
== skb
&& 930 !after(tp
->write_seq
, tp
->snd_una
+ tp
->snd_wnd
)) { 931 TCP_SKB_CB(skb
)->when
= tcp_time_stamp
; 932 if(!tcp_transmit_skb(sk
,skb_clone(skb
, GFP_KERNEL
))) 933 update_send_head(sk
, tp
, skb
); 935 tcp_check_probe_timer(sk
, tp
); 938 /* Socket is locked, keep trying until memory is available. */ 940 skb
=alloc_skb(MAX_TCP_HEADER
, GFP_KERNEL
); 943 current
->policy
|= SCHED_YIELD
; 947 /* Reserve space for headers and prepare control bits. */ 948 skb_reserve(skb
, MAX_TCP_HEADER
); 950 TCP_SKB_CB(skb
)->flags
= (TCPCB_FLAG_ACK
| TCPCB_FLAG_FIN
); 951 TCP_SKB_CB(skb
)->sacked
=0; 953 /* FIN eats a sequence byte, write_seq advanced by tcp_send_skb(). */ 954 TCP_SKB_CB(skb
)->seq
= tp
->write_seq
; 955 TCP_SKB_CB(skb
)->end_seq
=TCP_SKB_CB(skb
)->seq
+1; 956 tcp_send_skb(sk
, skb
,0, mss_now
); 957 __tcp_push_pending_frames(sk
, tp
, mss_now
,1); 961 /* We get here when a process closes a file descriptor (either due to 962 * an explicit close() or as a byproduct of exit()'ing) and there 963 * was unread data in the receive queue. This behavior is recommended 964 * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM 966 voidtcp_send_active_reset(struct sock
*sk
,int priority
) 968 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 971 /* NOTE: No TCP options attached and we never retransmit this. */ 972 skb
=alloc_skb(MAX_TCP_HEADER
, priority
); 974 NET_INC_STATS(TCPAbortFailed
); 978 /* Reserve space for headers and prepare control bits. */ 979 skb_reserve(skb
, MAX_TCP_HEADER
); 981 TCP_SKB_CB(skb
)->flags
= (TCPCB_FLAG_ACK
| TCPCB_FLAG_RST
); 982 TCP_SKB_CB(skb
)->sacked
=0; 985 TCP_SKB_CB(skb
)->seq
=tcp_acceptable_seq(sk
, tp
); 986 TCP_SKB_CB(skb
)->end_seq
=TCP_SKB_CB(skb
)->seq
; 987 TCP_SKB_CB(skb
)->when
= tcp_time_stamp
; 988 if(tcp_transmit_skb(sk
, skb
)) 989 NET_INC_STATS(TCPAbortFailed
); 992 /* WARNING: This routine must only be called when we have already sent 993 * a SYN packet that crossed the incoming SYN that caused this routine 994 * to get called. If this assumption fails then the initial rcv_wnd 995 * and rcv_wscale values will not be correct. 997 inttcp_send_synack(struct sock
*sk
) 1001 skb
=skb_peek(&sk
->write_queue
); 1002 if(skb
== NULL
|| !(TCP_SKB_CB(skb
)->flags
&TCPCB_FLAG_SYN
)) { 1003 printk(KERN_DEBUG
"tcp_send_synack: wrong queue state\n"); 1006 if(!(TCP_SKB_CB(skb
)->flags
&TCPCB_FLAG_ACK
)) { 1007 if(skb_cloned(skb
)) { 1008 struct sk_buff
*nskb
=skb_copy(skb
, GFP_ATOMIC
); 1011 __skb_unlink(skb
, &sk
->write_queue
); 1012 __skb_queue_head(&sk
->write_queue
, nskb
); 1013 tcp_free_skb(sk
, skb
); 1014 tcp_charge_skb(sk
, nskb
); 1018 TCP_SKB_CB(skb
)->flags
|= TCPCB_FLAG_ACK
; 1019 TCP_ECN_send_synack(&sk
->tp_pinfo
.af_tcp
, skb
); 1021 TCP_SKB_CB(skb
)->when
= tcp_time_stamp
; 1022 returntcp_transmit_skb(sk
,skb_clone(skb
, GFP_ATOMIC
)); 1026 * Prepare a SYN-ACK. 1028 struct sk_buff
*tcp_make_synack(struct sock
*sk
,struct dst_entry
*dst
, 1029 struct open_request
*req
) 1031 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 1033 int tcp_header_size
; 1034 struct sk_buff
*skb
; 1036 skb
=sock_wmalloc(sk
, MAX_TCP_HEADER
+15,1, GFP_ATOMIC
); 1040 /* Reserve space for headers. */ 1041 skb_reserve(skb
, MAX_TCP_HEADER
); 1043 skb
->dst
=dst_clone(dst
); 1045 tcp_header_size
= (sizeof(struct tcphdr
) + TCPOLEN_MSS
+ 1046 (req
->tstamp_ok
? TCPOLEN_TSTAMP_ALIGNED
:0) + 1047 (req
->wscale_ok
? TCPOLEN_WSCALE_ALIGNED
:0) + 1048 /* SACK_PERM is in the place of NOP NOP of TS */ 1049 ((req
->sack_ok
&& !req
->tstamp_ok
) ? TCPOLEN_SACKPERM_ALIGNED
:0)); 1050 skb
->h
.th
= th
= (struct tcphdr
*)skb_push(skb
, tcp_header_size
); 1052 memset(th
,0,sizeof(struct tcphdr
)); 1055 TCP_ECN_make_synack(req
, th
); 1056 th
->source
= sk
->sport
; 1057 th
->dest
= req
->rmt_port
; 1058 TCP_SKB_CB(skb
)->seq
= req
->snt_isn
; 1059 TCP_SKB_CB(skb
)->end_seq
=TCP_SKB_CB(skb
)->seq
+1; 1060 th
->seq
=htonl(TCP_SKB_CB(skb
)->seq
); 1061 th
->ack_seq
=htonl(req
->rcv_isn
+1); 1062 if(req
->rcv_wnd
==0) {/* ignored for retransmitted syns */ 1064 /* Set this up on the first call only */ 1065 req
->window_clamp
= tp
->window_clamp
? : dst
->window
; 1066 /* tcp_full_space because it is guaranteed to be the first packet */ 1067 tcp_select_initial_window(tcp_full_space(sk
), 1068 dst
->advmss
- (req
->tstamp_ok
? TCPOLEN_TSTAMP_ALIGNED
:0), 1073 req
->rcv_wscale
= rcv_wscale
; 1076 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 1077 th
->window
=htons(req
->rcv_wnd
); 1079 TCP_SKB_CB(skb
)->when
= tcp_time_stamp
; 1080 tcp_syn_build_options((__u32
*)(th
+1), dst
->advmss
, req
->tstamp_ok
, 1081 req
->sack_ok
, req
->wscale_ok
, req
->rcv_wscale
, 1082 TCP_SKB_CB(skb
)->when
, 1086 th
->doff
= (tcp_header_size
>>2); 1087 TCP_INC_STATS(TcpOutSegs
); 1091 inttcp_connect(struct sock
*sk
,struct sk_buff
*buff
) 1093 struct dst_entry
*dst
=__sk_dst_get(sk
); 1094 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 1096 /* Reserve space for headers. */ 1097 skb_reserve(buff
, MAX_TCP_HEADER
); 1099 /* We'll fix this up when we get a response from the other end. 1100 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. 1102 tp
->tcp_header_len
=sizeof(struct tcphdr
) + 1103 (sysctl_tcp_timestamps
? TCPOLEN_TSTAMP_ALIGNED
:0); 1105 /* If user gave his TCP_MAXSEG, record it to clamp */ 1107 tp
->mss_clamp
= tp
->user_mss
; 1109 tcp_sync_mss(sk
, dst
->pmtu
); 1111 if(!tp
->window_clamp
) 1112 tp
->window_clamp
= dst
->window
; 1113 tp
->advmss
= dst
->advmss
; 1114 tcp_initialize_rcv_mss(sk
); 1116 tcp_select_initial_window(tcp_full_space(sk
), 1117 tp
->advmss
- (tp
->ts_recent_stamp
? tp
->tcp_header_len
-sizeof(struct tcphdr
) :0), 1120 sysctl_tcp_window_scaling
, 1123 tp
->rcv_ssthresh
= tp
->rcv_wnd
; 1125 /* Socket identity change complete, no longer 1126 * in TCP_CLOSE, so enter ourselves into the 1129 tcp_set_state(sk
,TCP_SYN_SENT
); 1130 if(tp
->af_specific
->hash_connecting(sk
)) 1136 tcp_init_wl(tp
, tp
->write_seq
,0); 1137 tp
->snd_una
= tp
->write_seq
; 1138 tp
->snd_sml
= tp
->write_seq
; 1143 tp
->rto
= TCP_TIMEOUT_INIT
; 1145 tcp_clear_retrans(tp
); 1147 TCP_SKB_CB(buff
)->flags
= TCPCB_FLAG_SYN
; 1148 TCP_ECN_send_syn(tp
, buff
); 1149 TCP_SKB_CB(buff
)->sacked
=0; 1151 TCP_SKB_CB(buff
)->seq
= tp
->write_seq
++; 1152 TCP_SKB_CB(buff
)->end_seq
= tp
->write_seq
; 1153 tp
->snd_nxt
= tp
->write_seq
; 1154 tp
->pushed_seq
= tp
->write_seq
; 1157 TCP_SKB_CB(buff
)->when
= tcp_time_stamp
; 1158 tp
->retrans_stamp
=TCP_SKB_CB(buff
)->when
; 1159 __skb_queue_tail(&sk
->write_queue
, buff
); 1160 tcp_charge_skb(sk
, buff
); 1162 tcp_transmit_skb(sk
,skb_clone(buff
, GFP_KERNEL
)); 1163 TCP_INC_STATS(TcpActiveOpens
); 1165 /* Timer for repeating the SYN until an answer. */ 1166 tcp_reset_xmit_timer(sk
, TCP_TIME_RETRANS
, tp
->rto
); 1170 tcp_set_state(sk
,TCP_CLOSE
); 1172 return-EADDRNOTAVAIL
; 1175 /* Send out a delayed ack, the caller does the policy checking 1176 * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() 1179 voidtcp_send_delayed_ack(struct sock
*sk
) 1181 struct tcp_opt
*tp
= &sk
->tp_pinfo
.af_tcp
; 1182 int ato
= tp
->ack
.ato
; 1183 unsigned long timeout
; 1185 if(ato
> TCP_DELACK_MIN
) { 1188 if(tp
->ack
.pingpong
|| (tp
->ack
.pending
&TCP_ACK_PUSHED
)) 1189 max_ato
= TCP_DELACK_MAX
; 1191 /* Slow path, intersegment interval is "high". */ 1193 /* If some rtt estimate is known, use it to bound delayed ack. 1194 * Do not use tp->rto here, use results of rtt measurements 1198 int rtt
=max(tp
->srtt
>>3, TCP_DELACK_MIN
); 1204 ato
=min(ato
, max_ato
); 1207 /* Stay within the limit we were given */ 1208 timeout
= jiffies
+ ato
; 1210 /* Use new timeout only if there wasn't a older one earlier. */ 1211 if(tp
->ack
.pending
&TCP_ACK_TIMER
) { 1212 /* If delack timer was blocked or is about to expire, 1215 if(tp
->ack
.blocked
||time_before_eq(tp
->ack
.timeout
, jiffies
+(ato
>>2))) { 1220 if(!time_before(timeout
, tp
->ack
.timeout
)) 1221 timeout
= tp
->ack
.timeout
; 1223 tp
->ack
.pending
|= TCP_ACK_SCHED
|TCP_ACK_TIMER
; 1224 tp
->ack
.timeout
= timeout
; 1225 if(!mod_timer(&tp
->delack_timer
, timeout
)) 1228 #ifdef TCP_FORMAL_WINDOW 1229 /* Explanation. Header prediction path does not handle 1230 * case of zero window. If we send ACK immediately, pred_flags 1231 * are reset when sending ACK. If rcv_nxt is advanced and 1232 * ack is not sent, than delayed ack is scheduled. 1233 * Hence, it is the best place to check for zero window. 1235 if(tp
->pred_flags
) { 1236 if(tcp_receive_window(tp
) ==0) 1239 if(skb_queue_len(&tp
->out_of_order_queue
) ==0&& 1241 tcp_fast_path_on(tp
); 1246 /* This routine sends an ack and also updates the window. */ 1247 voidtcp_send_ack(struct sock
*sk
) 1249 /* If we have been reset, we may not send again. */ 1250 if(sk
->state
!= TCP_CLOSE
) { 1251 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 1252 struct sk_buff
*buff
; 1254 /* We are not putting this on the write queue, so 1255 * tcp_transmit_skb() will set the ownership to this 1258 buff
=alloc_skb(MAX_TCP_HEADER
, GFP_ATOMIC
); 1260 tcp_schedule_ack(tp
); 1261 tp
->ack
.ato
= TCP_ATO_MIN
; 1262 tcp_reset_xmit_timer(sk
, TCP_TIME_DACK
, TCP_DELACK_MAX
); 1266 /* Reserve space for headers and prepare control bits. */ 1267 skb_reserve(buff
, MAX_TCP_HEADER
); 1269 TCP_SKB_CB(buff
)->flags
= TCPCB_FLAG_ACK
; 1270 TCP_SKB_CB(buff
)->sacked
=0; 1272 /* Send it off, this clears delayed acks for us. */ 1273 TCP_SKB_CB(buff
)->seq
=TCP_SKB_CB(buff
)->end_seq
=tcp_acceptable_seq(sk
, tp
); 1274 TCP_SKB_CB(buff
)->when
= tcp_time_stamp
; 1275 tcp_transmit_skb(sk
, buff
); 1279 /* This routine sends a packet with an out of date sequence 1280 * number. It assumes the other end will try to ack it. 1282 * Question: what should we make while urgent mode? 1283 * 4.4BSD forces sending single byte of data. We cannot send 1284 * out of window data, because we have SND.NXT==SND.MAX... 1286 * Current solution: to send TWO zero-length segments in urgent mode: 1287 * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is 1288 * out-of-date with SND.UNA-1 to probe window. 1290 static inttcp_xmit_probe_skb(struct sock
*sk
,int urgent
) 1292 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 1293 struct sk_buff
*skb
; 1295 /* We don't queue it, tcp_transmit_skb() sets ownership. */ 1296 skb
=alloc_skb(MAX_TCP_HEADER
, GFP_ATOMIC
); 1300 /* Reserve space for headers and set control bits. */ 1301 skb_reserve(skb
, MAX_TCP_HEADER
); 1303 TCP_SKB_CB(skb
)->flags
= TCPCB_FLAG_ACK
; 1304 TCP_SKB_CB(skb
)->sacked
= urgent
; 1306 /* Use a previous sequence. This should cause the other 1307 * end to send an ack. Don't queue or clone SKB, just 1310 TCP_SKB_CB(skb
)->seq
= urgent
? tp
->snd_una
: tp
->snd_una
-1; 1311 TCP_SKB_CB(skb
)->end_seq
=TCP_SKB_CB(skb
)->seq
; 1312 TCP_SKB_CB(skb
)->when
= tcp_time_stamp
; 1313 returntcp_transmit_skb(sk
, skb
); 1316 inttcp_write_wakeup(struct sock
*sk
) 1318 if(sk
->state
!= TCP_CLOSE
) { 1319 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 1320 struct sk_buff
*skb
; 1322 if((skb
= tp
->send_head
) != NULL
&& 1323 before(TCP_SKB_CB(skb
)->seq
, tp
->snd_una
+tp
->snd_wnd
)) { 1325 int mss
=tcp_current_mss(sk
); 1326 int seg_size
= tp
->snd_una
+tp
->snd_wnd
-TCP_SKB_CB(skb
)->seq
; 1328 if(before(tp
->pushed_seq
,TCP_SKB_CB(skb
)->end_seq
)) 1329 tp
->pushed_seq
=TCP_SKB_CB(skb
)->end_seq
; 1331 /* We are probing the opening of a window 1332 * but the window size is != 0 1333 * must have been a result SWS avoidance ( sender ) 1335 if(seg_size
<TCP_SKB_CB(skb
)->end_seq
-TCP_SKB_CB(skb
)->seq
|| 1337 seg_size
=min(seg_size
, mss
); 1338 TCP_SKB_CB(skb
)->flags
|= TCPCB_FLAG_PSH
; 1339 if(tcp_fragment(sk
, skb
, seg_size
)) 1342 TCP_SKB_CB(skb
)->flags
|= TCPCB_FLAG_PSH
; 1343 TCP_SKB_CB(skb
)->when
= tcp_time_stamp
; 1344 err
=tcp_transmit_skb(sk
,skb_clone(skb
, GFP_ATOMIC
)); 1346 update_send_head(sk
, tp
, skb
); 1351 between(tp
->snd_up
, tp
->snd_una
+1, tp
->snd_una
+0xFFFF)) 1352 tcp_xmit_probe_skb(sk
, TCPCB_URG
); 1353 returntcp_xmit_probe_skb(sk
,0); 1359 /* A window probe timeout has occurred. If window is not closed send 1360 * a partial packet else a zero probe. 1362 voidtcp_send_probe0(struct sock
*sk
) 1364 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
); 1367 err
=tcp_write_wakeup(sk
); 1369 if(tp
->packets_out
|| !tp
->send_head
) { 1370 /* Cancel probe timer, if it is not required. */ 1379 tcp_reset_xmit_timer(sk
, TCP_TIME_PROBE0
, 1380 min(tp
->rto
<< tp
->backoff
, TCP_RTO_MAX
)); 1382 /* If packet was not sent due to local congestion, 1383 * do not backoff and do not remember probes_out. 1384 * Let local senders to fight for local resources. 1386 * Use accumulated backoff yet. 1390 tcp_reset_xmit_timer(sk
, TCP_TIME_PROBE0
, 1391 min(tp
->rto
<< tp
->backoff
, TCP_RESOURCE_PROBE_INTERVAL
));