Import 2.1.92pre2
[davej-history.git] / net / ipv6 / tcp_ipv6.c
blob5e3446b49df7ba39b3f7ce5057c074ce90fee3aa
1 /*
2 * TCP over IPv6
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.69 1998/03/28 00:55:36 davem Exp $
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
21 #include <linux/errno.h>
22 #include <linux/types.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/net.h>
26 #include <linux/sched.h>
27 #include <linux/in.h>
28 #include <linux/in6.h>
29 #include <linux/netdevice.h>
30 #include <linux/init.h>
32 #include <linux/ipv6.h>
33 #include <linux/icmpv6.h>
34 #include <linux/random.h>
36 #include <net/tcp.h>
37 #include <net/ndisc.h>
38 #include <net/ipv6.h>
39 #include <net/transp_v6.h>
40 #include <net/addrconf.h>
41 #include <net/ip6_route.h>
43 #include <asm/uaccess.h>
45 externint sysctl_tcp_timestamps;
46 externint sysctl_tcp_window_scaling;
48 static voidtcp_v6_send_reset(struct sk_buff *skb);
49 static voidtcp_v6_send_check(struct sock *sk,struct tcphdr *th,int len,
50 struct sk_buff *skb);
52 static inttcp_v6_backlog_rcv(struct sock *sk,struct sk_buff *skb);
53 static voidtcp_v6_xmit(struct sk_buff *skb);
54 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
55 struct ipv6hdr *ip6h,
56 struct tcphdr *th,
57 struct open_request **prevp);
59 static struct tcp_func ipv6_mapped;
60 static struct tcp_func ipv6_specific;
62 /* I have no idea if this is a good hash for v6 or not. -DaveM */
63 static __inline__ inttcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
64 struct in6_addr *faddr, u16 fport)
66 int hashent = (lport ^ fport);
68 hashent ^= (laddr->s6_addr32[0] ^ laddr->s6_addr32[1]);
69 hashent ^= (faddr->s6_addr32[0] ^ faddr->s6_addr32[1]);
70 hashent ^= (faddr->s6_addr32[2] ^ faddr->s6_addr32[3]);
71 return(hashent & ((TCP_HTABLE_SIZE/2) -1));
74 static __inline__ inttcp_v6_sk_hashfn(struct sock *sk)
76 struct in6_addr *laddr = &sk->net_pinfo.af_inet6.rcv_saddr;
77 struct in6_addr *faddr = &sk->net_pinfo.af_inet6.daddr;
78 __u16 lport = sk->num;
79 __u16 fport = sk->dport;
80 returntcp_v6_hashfn(laddr, lport, faddr, fport);
83 /* Grrr, addr_type already calculated by caller, but I don't want
84 * to add some silly "cookie" argument to this method just for that.
85 * But it doesn't matter, the recalculation is in the rarest path
86 * this function ever takes.
88 static inttcp_v6_verify_bind(struct sock *sk,unsigned short snum)
90 struct tcp_bind_bucket *tb;
91 int result =0;
93 SOCKHASH_LOCK();
94 for(tb = tcp_bound_hash[tcp_bhashfn(snum)];
95 (tb && (tb->port != snum));
96 tb = tb->next)
98 if(tb && tb->owners) {
99 /* Fast path for reuse ports, see include/net/tcp.h for a very
100 * detailed description of why this works, and why it is worth
101 * the effort at all. -DaveM
103 if((tb->flags & TCPB_FLAG_FASTREUSE) &&
104 (sk->reuse !=0)) {
105 goto go_like_smoke;
106 }else{
107 struct sock *sk2;
108 int sk_reuse = sk->reuse;
109 int addr_type =ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
111 /* We must walk the whole port owner list in this case. -DaveM */
112 for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
113 if(sk->bound_dev_if == sk2->bound_dev_if) {
114 if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
115 if(addr_type == IPV6_ADDR_ANY ||
116 !sk2->rcv_saddr ||
117 !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
118 &sk2->net_pinfo.af_inet6.rcv_saddr))
119 break;
123 if(sk2 != NULL)
124 result =1;
127 if((result ==0) &&
128 (tb == NULL) &&
129 (tcp_bucket_create(snum) == NULL))
130 result =1;
131 go_like_smoke:
132 SOCKHASH_UNLOCK();
133 return result;
136 static voidtcp_v6_hash(struct sock *sk)
138 if(sk->state != TCP_CLOSE) {
139 struct sock **skp;
141 SOCKHASH_LOCK();
142 skp = &tcp_established_hash[(sk->hashent =tcp_v6_sk_hashfn(sk))];
143 if((sk->next = *skp) != NULL)
144 (*skp)->pprev = &sk->next;
145 *skp = sk;
146 sk->pprev = skp;
147 tcp_sk_bindify(sk);
148 SOCKHASH_UNLOCK();
152 static voidtcp_v6_unhash(struct sock *sk)
154 SOCKHASH_LOCK();
155 if(sk->pprev) {
156 if(sk->next)
157 sk->next->pprev = sk->pprev;
158 *sk->pprev = sk->next;
159 sk->pprev = NULL;
160 tcp_sk_unbindify(sk);
161 tcp_reg_zap(sk);
163 SOCKHASH_UNLOCK();
166 static voidtcp_v6_rehash(struct sock *sk)
168 unsigned char state;
170 SOCKHASH_LOCK();
171 state = sk->state;
172 if(sk->pprev != NULL) {
173 if(sk->next)
174 sk->next->pprev = sk->pprev;
175 *sk->pprev = sk->next;
176 sk->pprev = NULL;
177 tcp_reg_zap(sk);
179 if(state != TCP_CLOSE) {
180 struct sock **skp;
182 if(state == TCP_LISTEN)
183 skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
184 else
185 skp = &tcp_established_hash[(sk->hashent =tcp_v6_sk_hashfn(sk))];
187 if((sk->next = *skp) != NULL)
188 (*skp)->pprev = &sk->next;
189 *skp = sk;
190 sk->pprev = skp;
191 if(state == TCP_LISTEN)
192 tcp_sk_bindify(sk);
194 SOCKHASH_UNLOCK();
197 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr,unsigned short hnum,int dif)
199 struct sock *sk;
200 struct sock *result = NULL;
201 int score, hiscore;
203 hiscore=0;
204 sk = tcp_listening_hash[tcp_lhashfn(hnum)];
205 for(; sk; sk = sk->next) {
206 if((sk->num == hnum) && (sk->family == AF_INET6)) {
207 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
209 score =1;
210 if(!ipv6_addr_any(&np->rcv_saddr)) {
211 if(ipv6_addr_cmp(&np->rcv_saddr, daddr))
212 continue;
213 score++;
215 if(sk->bound_dev_if) {
216 if(sk->bound_dev_if != dif)
217 continue;
218 score++;
220 if(score ==3)
221 return sk;
222 if(score > hiscore) {
223 hiscore = score;
224 result = sk;
228 return result;
231 /* Until this is verified... -DaveM */
232 /* #define USE_QUICKSYNS */
234 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
235 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
236 * It is assumed that this code only gets called from within NET_BH.
238 staticinlinestruct sock *__tcp_v6_lookup(struct tcphdr *th,
239 struct in6_addr *saddr, u16 sport,
240 struct in6_addr *daddr, u16 dport,
241 int dif)
243 unsigned short hnum =ntohs(dport);
244 struct sock *sk;
245 int hash;
247 #ifdef USE_QUICKSYNS
248 /* Incomming connection short-cut. */
249 if(th && th->syn ==1&& th->ack ==0)
250 goto listener_shortcut;
251 #endif
253 /* Check TCP register quick cache first. */
254 sk =TCP_RHASH(sport);
255 if(sk &&
256 sk->num == hnum &&/* local port */
257 sk->family == AF_INET6 &&/* address family */
258 sk->dport == sport &&/* remote port */
259 !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.daddr, saddr) &&
260 !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, daddr) &&
261 (!sk->bound_dev_if || sk->bound_dev_if == dif))
262 goto hit;
264 /* Optimize here for direct hit, only listening connections can
265 * have wildcards anyways.
267 hash =tcp_v6_hashfn(daddr, hnum, saddr, sport);
268 for(sk = tcp_established_hash[hash]; sk; sk = sk->next) {
269 /* For IPV6 do the cheaper port and family tests first. */
270 if(sk->num == hnum &&/* local port */
271 sk->family == AF_INET6 &&/* address family */
272 sk->dport == sport &&/* remote port */
273 !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.daddr, saddr) &&
274 !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, daddr) &&
275 (!sk->bound_dev_if || sk->bound_dev_if == dif)) {
276 if(sk->state == TCP_ESTABLISHED)
277 TCP_RHASH(sport) = sk;
278 goto hit;/* You sunk my battleship! */
281 /* Must check for a TIME_WAIT'er before going to listener hash. */
282 for(sk = tcp_established_hash[hash+(TCP_HTABLE_SIZE/2)]; sk; sk = sk->next)
283 if(sk->num == hnum &&/* local port */
284 sk->family == AF_INET6 &&/* address family */
285 sk->dport == sport) {/* remote port */
286 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
287 if(!ipv6_addr_cmp(&tw->v6_daddr, saddr) &&
288 !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) &&
289 (!sk->bound_dev_if || sk->bound_dev_if == dif))
290 goto hit;
292 #ifdef USE_QUICKSYNS
293 listener_shortcut:
294 #endif
295 sk =tcp_v6_lookup_listener(daddr, hnum, dif);
296 hit:
297 return sk;
300 #define tcp_v6_lookup(sa, sp, da, dp, dif) __tcp_v6_lookup((0),(sa),(sp),(da),(dp),(dif))
302 static __inline__ u16 tcp_v6_check(struct tcphdr *th,int len,
303 struct in6_addr *saddr,
304 struct in6_addr *daddr,
305 unsigned long base)
307 returncsum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
310 static __u32 tcp_v6_init_sequence(struct sock *sk,struct sk_buff *skb)
312 __u32 si;
313 __u32 di;
315 if(skb->protocol ==__constant_htons(ETH_P_IPV6)) {
316 si = skb->nh.ipv6h->saddr.s6_addr32[3];
317 di = skb->nh.ipv6h->daddr.s6_addr32[3];
318 }else{
319 si = skb->nh.iph->saddr;
320 di = skb->nh.iph->daddr;
323 returnsecure_tcp_sequence_number(di, si,
324 skb->h.th->dest,
325 skb->h.th->source);
328 static inttcp_v6_unique_address(struct sock *sk)
330 struct tcp_bind_bucket *tb;
331 unsigned short snum = sk->num;
332 int retval =1;
334 /* Freeze the hash while we snoop around. */
335 SOCKHASH_LOCK();
336 tb = tcp_bound_hash[tcp_bhashfn(snum)];
337 for(; tb; tb = tb->next) {
338 if(tb->port == snum && tb->owners != NULL) {
339 /* Almost certainly the re-use port case, search the real hashes
340 * so it actually scales. (we hope that all ipv6 ftp servers will
341 * use passive ftp, I just cover this case for completeness)
343 sk =__tcp_v6_lookup(NULL, &sk->net_pinfo.af_inet6.daddr,
344 sk->dport,
345 &sk->net_pinfo.af_inet6.rcv_saddr, snum,
346 sk->bound_dev_if);
347 if((sk != NULL) && (sk->state != TCP_LISTEN))
348 retval =0;
349 break;
352 SOCKHASH_UNLOCK();
353 return retval;
356 static inttcp_v6_connect(struct sock *sk,struct sockaddr *uaddr,
357 int addr_len)
359 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
360 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
361 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
362 struct inet6_ifaddr *ifa;
363 struct in6_addr *saddr = NULL;
364 struct flowi fl;
365 struct dst_entry *dst;
366 struct sk_buff *buff;
367 int addr_type;
368 int mss;
370 if(sk->state != TCP_CLOSE)
371 return(-EISCONN);
374 * Don't allow a double connect.
377 if(!ipv6_addr_any(&np->daddr))
378 return-EINVAL;
380 if(addr_len <sizeof(struct sockaddr_in6))
381 return(-EINVAL);
383 if(usin->sin6_family && usin->sin6_family != AF_INET6)
384 return(-EAFNOSUPPORT);
387 * connect() to INADDR_ANY means loopback (BSD'ism).
390 if(ipv6_addr_any(&usin->sin6_addr))
391 usin->sin6_addr.s6_addr[15] =0x1;
393 addr_type =ipv6_addr_type(&usin->sin6_addr);
395 if(addr_type & IPV6_ADDR_MULTICAST)
396 return-ENETUNREACH;
399 * connect to self not allowed
402 if(ipv6_addr_cmp(&usin->sin6_addr, &np->saddr) ==0&&
403 usin->sin6_port == sk->sport)
404 return(-EINVAL);
406 memcpy(&np->daddr, &usin->sin6_addr,sizeof(struct in6_addr));
409 * TCP over IPv4
412 if(addr_type == IPV6_ADDR_MAPPED) {
413 struct sockaddr_in sin;
414 int err;
416 SOCK_DEBUG(sk,"connect: ipv4 mapped\n");
418 sin.sin_family = AF_INET;
419 sin.sin_port = usin->sin6_port;
420 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
422 sk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped;
423 sk->backlog_rcv = tcp_v4_do_rcv;
425 err =tcp_v4_connect(sk, (struct sockaddr *)&sin,sizeof(sin));
427 if(err) {
428 sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
429 sk->backlog_rcv = tcp_v6_backlog_rcv;
432 return err;
435 if(!ipv6_addr_any(&np->rcv_saddr))
436 saddr = &np->rcv_saddr;
438 fl.proto = IPPROTO_TCP;
439 fl.nl_u.ip6_u.daddr = &np->daddr;
440 fl.nl_u.ip6_u.saddr = saddr;
441 fl.oif = sk->bound_dev_if;
442 fl.uli_u.ports.dport = usin->sin6_port;
443 fl.uli_u.ports.sport = sk->sport;
445 dst =ip6_route_output(sk, &fl);
447 if(dst->error) {
448 dst_release(dst);
449 return dst->error;
452 if(dst->pmtu <576) {
453 dst_release(dst);
454 return-EINVAL;
457 if(fl.oif ==0&& addr_type&IPV6_ADDR_LINKLOCAL) {
458 /* Ough! This guy tries to connect to link local
459 * address and did not specify interface.
460 * Actually we should kick him out, but
461 * we will be patient :) --ANK
463 sk->bound_dev_if = dst->dev->ifindex;
466 ip6_dst_store(sk, dst);
468 if(saddr == NULL) {
469 ifa =ipv6_get_saddr(dst, &np->daddr);
471 if(ifa == NULL)
472 return-ENETUNREACH;
474 saddr = &ifa->addr;
476 /* set the source address */
477 ipv6_addr_copy(&np->rcv_saddr, saddr);
478 ipv6_addr_copy(&np->saddr, saddr);
481 buff =sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header),
482 0, GFP_KERNEL);
484 if(buff == NULL)
485 return-ENOBUFS;
487 sk->dport = usin->sin6_port;
489 if(!tcp_v6_unique_address(sk)) {
490 kfree_skb(buff);
491 return-EADDRNOTAVAIL;
495 * Init variables
498 tp->write_seq =secure_tcp_sequence_number(np->saddr.s6_addr32[3],
499 np->daddr.s6_addr32[3],
500 sk->sport, sk->dport);
502 sk->mtu = dst->pmtu;
503 mss = sk->mtu -sizeof(struct ipv6hdr);
504 #if 0
505 if(np->opt) {
506 /* Adjust mss */
508 #endif
510 tcp_connect(sk, buff, mss);
512 return0;
515 static inttcp_v6_sendmsg(struct sock *sk,struct msghdr *msg,int len)
517 struct tcp_opt *tp;
518 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
519 int retval = -EINVAL;
522 * Do sanity checking for sendmsg/sendto/send
525 if(msg->msg_flags & ~(MSG_OOB|MSG_DONTROUTE|MSG_DONTWAIT|MSG_NOSIGNAL))
526 goto out;
527 if(msg->msg_name) {
528 struct sockaddr_in6 *addr=(struct sockaddr_in6 *)msg->msg_name;
530 if(msg->msg_namelen <sizeof(*addr))
531 goto out;
533 if(addr->sin6_family && addr->sin6_family != AF_INET6)
534 goto out;
535 retval = -ENOTCONN;
537 if(sk->state == TCP_CLOSE)
538 goto out;
539 retval = -EISCONN;
540 if(addr->sin6_port != sk->dport)
541 goto out;
542 if(ipv6_addr_cmp(&addr->sin6_addr, &np->daddr))
543 goto out;
546 lock_sock(sk);
547 retval =tcp_do_sendmsg(sk, msg->msg_iovlen, msg->msg_iov,
548 msg->msg_flags);
549 /* Push out partial tail frames if needed. */
550 tp = &(sk->tp_pinfo.af_tcp);
551 if(tp->send_head &&tcp_snd_test(sk, tp->send_head))
552 tcp_write_xmit(sk);
553 release_sock(sk);
555 out:
556 return retval;
559 voidtcp_v6_err(struct sk_buff *skb,int type,int code,unsigned char*header, __u32 info,
560 struct in6_addr *saddr,struct in6_addr *daddr,
561 struct inet6_protocol *protocol)
563 struct tcphdr *th = (struct tcphdr *)header;
564 struct ipv6_pinfo *np;
565 struct sock *sk;
566 int err;
567 int opening;
568 struct tcp_opt *tp;
569 __u32 seq;
571 /* XXX: length check for tcphdr missing here */
573 sk =tcp_v6_lookup(daddr, th->dest, saddr, th->source, skb->dev->ifindex);
575 if(sk == NULL || sk->state == TCP_TIME_WAIT) {
576 /* XXX: Update ICMP error count */
577 return;
580 tp = &sk->tp_pinfo.af_tcp;
581 seq =ntohl(th->seq);
582 if(sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
583 if(net_ratelimit())
584 printk(KERN_DEBUG "icmp packet outside the tcp window:"
585 " s:%d %u,%u,%u\n",
586 (int)sk->state, seq, tp->snd_una, tp->snd_nxt);
587 return;
590 np = &sk->net_pinfo.af_inet6;
591 if(type == ICMPV6_PKT_TOOBIG && sk->state != TCP_LISTEN) {
592 /* icmp should have updated the destination cache entry */
594 if(sk->dst_cache)
595 dst_check(&sk->dst_cache, np->dst_cookie);
597 if(sk->dst_cache == NULL) {
598 struct flowi fl;
599 struct dst_entry *dst;
601 fl.proto = IPPROTO_TCP;
602 fl.nl_u.ip6_u.daddr = &np->daddr;
603 fl.nl_u.ip6_u.saddr = &np->saddr;
604 fl.oif = sk->bound_dev_if;
605 fl.uli_u.ports.dport = sk->dport;
606 fl.uli_u.ports.sport = sk->sport;
608 dst =ip6_route_output(sk, &fl);
610 ip6_dst_store(sk, dst);
613 if(sk->dst_cache->error) {
614 sk->err_soft = sk->dst_cache->error;
615 }else{
616 /* FIXME: Reset sk->mss, taking into account TCP option
617 * bytes for timestamps. -DaveM
619 sk->mtu = sk->dst_cache->pmtu;
621 if(sk->sock_readers) {/* remove later */
622 printk(KERN_DEBUG "tcp_v6_err: pmtu disc: socket locked.\n");
623 return;
625 tcp_simple_retransmit(sk);
626 return;
629 opening =0;
630 /* Might be for an open_request */
631 switch(sk->state) {
632 struct open_request *req, *prev;
633 struct ipv6hdr hd;
634 case TCP_LISTEN:
635 if(sk->sock_readers)
636 return;
638 /* Grrrr - fix this later. */
639 ipv6_addr_copy(&hd.saddr, saddr);
640 ipv6_addr_copy(&hd.daddr, daddr);
641 req =tcp_v6_search_req(tp, &hd,th, &prev);
642 if(!req)
643 return;
644 if(seq != req->snt_isn) {
645 if(net_ratelimit())
646 printk(KERN_DEBUG "icmp packet for openreq "
647 "with wrong seq number:%d:%d\n",
648 seq, req->snt_isn);
649 return;
651 if(req->sk) {
652 sk = req->sk;/* report error in accept */
653 }else{
654 tcp_synq_unlink(tp, req, prev);
655 req->class->destructor(req);
656 tcp_openreq_free(req);
658 /* FALL THROUGH */
659 case TCP_SYN_SENT:
660 case TCP_SYN_RECV:
661 opening =1;
662 break;
665 if(icmpv6_err_convert(type, code, &err) || opening) {
666 sk->err = err;
668 if(opening) {
669 tcp_statistics.TcpAttemptFails++;
670 tcp_set_state(sk,TCP_CLOSE);
671 sk->error_report(sk);
673 }else{
674 sk->err_soft = err;
679 /* FIXME: this is substantially similar to the ipv4 code.
680 * Can some kind of merge be done? -- erics
682 static voidtcp_v6_send_synack(struct sock *sk,struct open_request *req)
684 struct sk_buff * skb;
685 struct dst_entry *dst;
686 struct flowi fl;
687 int mss;
689 fl.proto = IPPROTO_TCP;
690 fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
691 fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
692 fl.oif = req->af.v6_req.iif;
693 fl.uli_u.ports.dport = req->rmt_port;
694 fl.uli_u.ports.sport = sk->sport;
696 dst =ip6_route_output(sk, &fl);
697 if(dst->error) {
698 dst_release(dst);
699 return;
702 mss = dst->pmtu -sizeof(struct ipv6hdr) -sizeof(struct tcphdr);
703 #if 0
704 /* Subtract option length... */
705 if(opt) {
706 mss -= opt->optlen;
708 #endif
710 skb =tcp_make_synack(sk, dst, req, mss);
711 if(skb) {
712 struct tcphdr *th = skb->h.th;
714 th->check =tcp_v6_check(th, skb->len,
715 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
716 csum_partial((char*)th, skb->len, skb->csum));
718 ip6_xmit(sk, skb, &fl, req->af.v6_req.opt);
720 dst_release(dst);
723 static voidtcp_v6_or_free(struct open_request *req)
727 static struct or_calltable or_ipv6 = {
728 tcp_v6_send_synack,
729 tcp_v6_or_free,
730 tcp_v6_send_reset
733 /* FIXME: this is substantially similar to the ipv4 code.
734 * Can some kind of merge be done? -- erics
736 static inttcp_v6_conn_request(struct sock *sk,struct sk_buff *skb,void*ptr,
737 __u32 isn)
739 struct tcp_opt tp;
740 struct open_request *req;
742 /* If the socket is dead, don't accept the connection. */
743 if(sk->dead) {
744 SOCK_DEBUG(sk,"Reset on %p: Connect on dead socket.\n", sk);
745 tcp_statistics.TcpAttemptFails++;
746 return-ENOTCONN;
749 if(skb->protocol ==__constant_htons(ETH_P_IP))
750 returntcp_v4_conn_request(sk, skb, ptr, isn);
752 if(isn ==0)
753 isn =tcp_v6_init_sequence(sk,skb);
756 * There are no SYN attacks on IPv6, yet...
758 if(sk->ack_backlog >= sk->max_ack_backlog) {
759 printk(KERN_DEBUG "droping syn ack:%d max:%d\n",
760 sk->ack_backlog, sk->max_ack_backlog);
761 tcp_statistics.TcpAttemptFails++;
762 goto exit;
765 req =tcp_openreq_alloc();
766 if(req == NULL) {
769 sk->ack_backlog++;
771 req->rcv_wnd =0;/* So that tcp_send_synack() knows! */
773 req->rcv_isn =TCP_SKB_CB(skb)->seq;
774 req->snt_isn = isn;
775 tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale =0;
776 tp.in_mss =536;
777 tcp_parse_options(NULL, skb->h.th, &tp,0);
778 req->mss = tp.in_mss;
779 if(tp.saw_tstamp) {
780 req->mss -= TCPOLEN_TSTAMP_ALIGNED;
781 req->ts_recent = tp.rcv_tsval;
783 req->tstamp_ok = tp.tstamp_ok;
784 req->sack_ok = tp.sack_ok;
785 req->snd_wscale = tp.snd_wscale;
786 req->wscale_ok = tp.wscale_ok;
787 req->rmt_port = skb->h.th->source;
788 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
789 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
790 req->af.v6_req.opt = NULL;/* FIXME: options */
791 req->af.v6_req.iif = sk->bound_dev_if;
793 /* So that link locals have meaning */
794 if(!sk->bound_dev_if &&ipv6_addr_type(&req->af.v6_req.rmt_addr)&IPV6_ADDR_LINKLOCAL)
795 req->af.v6_req.iif = skb->dev->ifindex;
797 req->class= &or_ipv6;
798 req->retrans =0;
799 req->sk = NULL;
801 tcp_v6_send_synack(sk, req);
803 req->expires = jiffies + TCP_TIMEOUT_INIT;
804 tcp_inc_slow_timer(TCP_SLT_SYNACK);
805 tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
807 sk->data_ready(sk,0);
809 exit:
810 return0;
813 static voidtcp_v6_send_check(struct sock *sk,struct tcphdr *th,int len,
814 struct sk_buff *skb)
816 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
817 th->check =0;
819 th->check =csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
820 csum_partial((char*)th, th->doff<<2,
821 skb->csum));
824 static struct sock *tcp_v6_syn_recv_sock(struct sock *sk,struct sk_buff *skb,
825 struct open_request *req,
826 struct dst_entry *dst)
828 struct ipv6_pinfo *np;
829 struct flowi fl;
830 struct tcp_opt *newtp;
831 struct sock *newsk;
832 int mss;
834 if(skb->protocol ==__constant_htons(ETH_P_IP)) {
836 * v6 mapped
839 newsk =tcp_v4_syn_recv_sock(sk, skb, req, dst);
841 if(newsk == NULL)
842 return NULL;
844 np = &newsk->net_pinfo.af_inet6;
846 ipv6_addr_set(&np->daddr,0,0,__constant_htonl(0x0000FFFF),
847 newsk->daddr);
849 ipv6_addr_set(&np->saddr,0,0,__constant_htonl(0x0000FFFF),
850 newsk->saddr);
852 ipv6_addr_copy(&np->rcv_saddr, &np->saddr);
854 newsk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped;
855 newsk->backlog_rcv = tcp_v4_do_rcv;
857 return newsk;
861 if(dst == NULL) {
863 * options / mss / route cache
866 fl.proto = IPPROTO_TCP;
867 fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
868 fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
869 fl.oif = sk->bound_dev_if;
870 fl.uli_u.ports.dport = req->rmt_port;
871 fl.uli_u.ports.sport = sk->sport;
873 dst =ip6_route_output(sk, &fl);
876 if(dst->error || dst->pmtu <576)
877 goto out;
880 mss = dst->pmtu -sizeof(struct ipv6hdr);
881 #if 0
882 /* Adjust mss by option size */
883 #endif
885 newsk =tcp_create_openreq_child(sk, req, skb, mss);
886 if(newsk == NULL)
887 goto out;
889 ip6_dst_store(newsk, dst);
891 newtp = &(newsk->tp_pinfo.af_tcp);
893 np = &newsk->net_pinfo.af_inet6;
894 ipv6_addr_copy(&np->daddr, &req->af.v6_req.rmt_addr);
895 ipv6_addr_copy(&np->saddr, &req->af.v6_req.loc_addr);
896 ipv6_addr_copy(&np->rcv_saddr, &req->af.v6_req.loc_addr);
897 newsk->bound_dev_if = req->af.v6_req.iif;
898 newsk->mtu = dst->pmtu;
899 newsk->opt = NULL;
901 newsk->daddr = LOOPBACK4_IPV6;
902 newsk->saddr = LOOPBACK4_IPV6;
903 newsk->rcv_saddr= LOOPBACK4_IPV6;
905 newsk->prot->hash(newsk);
906 add_to_prot_sklist(newsk);
907 return newsk;
909 out:
910 dst_release(dst);
911 return NULL;
914 static voidtcp_v6_send_reset(struct sk_buff *skb)
916 struct tcphdr *th = skb->h.th, *t1;
917 struct sk_buff *buff;
918 struct flowi fl;
920 if(th->rst)
921 return;
924 * We need to grab some memory, and put together an RST,
925 * and then put it into the queue to be sent.
928 buff =alloc_skb(MAX_HEADER +sizeof(struct ipv6hdr), GFP_ATOMIC);
929 if(buff == NULL)
930 return;
932 skb_reserve(buff, MAX_HEADER +sizeof(struct ipv6hdr));
934 t1 = (struct tcphdr *)skb_push(buff,sizeof(struct tcphdr));
936 /* Swap the send and the receive. */
937 memset(t1,0,sizeof(*t1));
938 t1->dest = th->source;
939 t1->source = th->dest;
940 t1->doff =sizeof(*t1)/4;
941 t1->rst =1;
943 if(th->ack) {
944 t1->seq = th->ack_seq;
945 }else{
946 t1->ack =1;
947 if(!th->syn)
948 t1->ack_seq = th->seq;
949 else
950 t1->ack_seq =htonl(ntohl(th->seq)+1);
953 buff->csum =csum_partial((char*)t1,sizeof(*t1),0);
955 fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
956 fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
958 t1->check =csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
959 fl.nl_u.ip6_u.daddr,
960 sizeof(*t1), IPPROTO_TCP,
961 buff->csum);
963 fl.proto = IPPROTO_TCP;
964 fl.oif = skb->dev->ifindex;
965 fl.uli_u.ports.dport = t1->dest;
966 fl.uli_u.ports.sport = t1->source;
968 /* sk = NULL, but it is safe for now. RST socket required. */
969 buff->dst =ip6_route_output(NULL, &fl);
971 if(buff->dst->error ==0) {
972 ip6_xmit(NULL, buff, &fl, NULL);
973 tcp_statistics.TcpOutSegs++;
974 tcp_statistics.TcpOutRsts++;
975 return;
978 kfree_skb(buff);
981 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
982 struct ipv6hdr *ip6h,
983 struct tcphdr *th,
984 struct open_request **prevp)
986 struct open_request *req, *prev;
987 __u16 rport = th->source;
989 /* assumption: the socket is not in use.
990 * as we checked the user count on tcp_rcv and we're
991 * running from a soft interrupt.
993 prev = (struct open_request *) (&tp->syn_wait_queue);
994 for(req = prev->dl_next; req; req = req->dl_next) {
995 if(!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) &&
996 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &ip6h->daddr) &&
997 req->rmt_port == rport) {
998 *prevp = prev;
999 return req;
1001 prev = req;
1003 return NULL;
1006 static voidtcp_v6_rst_req(struct sock *sk,struct sk_buff *skb)
1008 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1009 struct open_request *req, *prev;
1011 req =tcp_v6_search_req(tp,skb->nh.ipv6h,skb->h.th,&prev);
1012 if(!req)
1013 return;
1014 /* Sequence number check required by RFC793 */
1015 if(before(TCP_SKB_CB(skb)->seq, req->snt_isn) ||
1016 after(TCP_SKB_CB(skb)->seq, req->snt_isn+1))
1017 return;
1018 tcp_synq_unlink(tp, req, prev);
1019 req->class->destructor(req);
1020 tcp_openreq_free(req);
1023 inttcp_v6_rcv(struct sk_buff *skb,struct device *dev,
1024 struct in6_addr *saddr,struct in6_addr *daddr,
1025 struct ipv6_options *opt,unsigned short len,
1026 int redo,struct inet6_protocol *protocol)
1028 struct tcphdr *th;
1029 struct sock *sk;
1032 * "redo" is 1 if we have already seen this skb but couldn't
1033 * use it at that time (the socket was locked). In that case
1034 * we have already done a lot of the work (looked up the socket
1035 * etc).
1038 th = skb->h.th;
1040 sk = skb->sk;
1042 if(!redo) {
1043 if(skb->pkt_type != PACKET_HOST)
1044 goto discard_it;
1047 * Pull up the IP header.
1050 __skb_pull(skb, skb->h.raw - skb->data);
1053 * Count it even if it's bad.
1056 tcp_statistics.TcpInSegs++;
1059 * Try to use the device checksum if provided.
1062 switch(skb->ip_summed) {
1063 case CHECKSUM_NONE:
1064 skb->csum =csum_partial((char*)th, len,0);
1065 case CHECKSUM_HW:
1066 if(tcp_v6_check(th,len,saddr,daddr,skb->csum)) {
1067 printk(KERN_DEBUG "tcp csum failed\n");
1068 tcp_statistics.TcpInErrs++;
1069 goto discard_it;
1071 default:
1072 /* CHECKSUM_UNNECESSARY */
1075 sk =__tcp_v6_lookup(th, saddr, th->source, daddr, th->dest, dev->ifindex);
1077 if(!sk) {
1078 printk(KERN_DEBUG "socket not found\n");
1079 goto no_tcp_socket;
1082 TCP_SKB_CB(skb)->seq =ntohl(th->seq);
1083 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1084 len - th->doff*4);
1085 TCP_SKB_CB(skb)->ack_seq =ntohl(th->ack_seq);
1086 skb->used =0;
1087 if(sk->state == TCP_TIME_WAIT)
1088 goto do_time_wait;
1090 skb->sk = sk;
1094 * We may need to add it to the backlog here.
1097 if(sk->sock_readers) {
1098 __skb_queue_tail(&sk->back_log, skb);
1099 return(0);
1102 skb_set_owner_r(skb, sk);
1104 if(sk->state == TCP_ESTABLISHED) {
1105 if(tcp_rcv_established(sk, skb, th, len))
1106 goto no_tcp_socket;
1107 return0;
1110 if(sk->state == TCP_LISTEN) {
1111 __u32 flg = ((u32 *)th)[3];
1113 /* Check for RST */
1114 if(flg &__constant_htonl(0x00040000)) {
1115 tcp_v6_rst_req(sk, skb);
1118 /* Check SYN|ACK */
1119 if(flg &__constant_htonl(0x00120000)) {
1120 struct open_request *req, *prev;
1121 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1123 req =tcp_v6_search_req(tp, skb->nh.ipv6h,th,&prev);
1124 if(req) {
1125 sk =tcp_check_req(sk, skb, req);
1127 /* else do syncookies (add them here) */
1128 if(sk == NULL)
1129 goto discard_it;
1133 if(tcp_rcv_state_process(sk, skb, th, opt, len) ==0)
1134 return0;
1136 no_tcp_socket:
1139 * No such TCB. If th->rst is 0 send a reset
1140 * (checked in tcp_v6_send_reset)
1143 tcp_v6_send_reset(skb);
1145 discard_it:
1148 * Discard frame
1151 kfree_skb(skb);
1152 return0;
1154 do_time_wait:
1155 if(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1156 skb, th, &(IPCB(skb)->opt), skb->len))
1157 goto no_tcp_socket;
1158 goto discard_it;
1161 static inttcp_v6_rebuild_header(struct sock *sk)
1163 struct dst_entry *dst = NULL;
1164 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
1166 if(sk->dst_cache)
1167 dst =dst_check(&sk->dst_cache, np->dst_cookie);
1169 if(dst == NULL) {
1170 struct flowi fl;
1172 fl.proto = IPPROTO_TCP;
1173 fl.nl_u.ip6_u.daddr = &np->daddr;
1174 fl.nl_u.ip6_u.saddr = &np->saddr;
1175 fl.oif = sk->bound_dev_if;
1176 fl.uli_u.ports.dport = sk->dport;
1177 fl.uli_u.ports.sport = sk->sport;
1179 dst =ip6_route_output(sk, &fl);
1181 if(dst->error) {
1182 dst_release(dst);
1183 return dst->error;
1186 ip6_dst_store(sk, dst);
1189 return dst->error;
1192 static inttcp_v6_backlog_rcv(struct sock *sk,struct sk_buff *skb)
1194 int res;
1196 res =tcp_v6_rcv(skb, skb->dev,
1197 &skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
1198 (struct ipv6_options *) skb->cb,
1199 skb->len,1,
1200 (struct inet6_protocol *) sk->pair);
1201 return res;
1204 static struct sock *tcp_v6_get_sock(struct sk_buff *skb,struct tcphdr *th)
1206 struct in6_addr *saddr;
1207 struct in6_addr *daddr;
1209 saddr = &skb->nh.ipv6h->saddr;
1210 daddr = &skb->nh.ipv6h->daddr;
1211 returntcp_v6_lookup(saddr, th->source, daddr, th->dest, skb->dev->ifindex);
1214 static voidtcp_v6_xmit(struct sk_buff *skb)
1216 struct sock *sk = skb->sk;
1217 struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
1218 struct flowi fl;
1219 struct dst_entry *dst = sk->dst_cache;
1221 fl.proto = IPPROTO_TCP;
1222 fl.nl_u.ip6_u.daddr = &np->daddr;
1223 fl.nl_u.ip6_u.saddr = &np->saddr;
1224 fl.oif = sk->bound_dev_if;
1225 fl.uli_u.ports.sport = sk->sport;
1226 fl.uli_u.ports.dport = sk->dport;
1228 if(sk->dst_cache)
1229 dst =dst_check(&sk->dst_cache, np->dst_cookie);
1231 if(dst == NULL) {
1232 dst =ip6_route_output(sk, &fl);
1234 if(dst->error) {
1235 sk->err_soft = dst->error;
1236 dst_release(dst);
1237 return;
1240 ip6_dst_store(sk, dst);
1243 skb->dst =dst_clone(dst);
1245 ip6_xmit(sk, skb, &fl, np->opt);
1248 static voidv6_addr2sockaddr(struct sock *sk,struct sockaddr * uaddr)
1250 struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
1251 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1253 sin6->sin6_family = AF_INET6;
1254 memcpy(&sin6->sin6_addr, &np->daddr,sizeof(struct in6_addr));
1255 sin6->sin6_port = sk->dport;
1258 static struct tcp_func ipv6_specific = {
1259 tcp_v6_xmit,
1260 tcp_v6_send_check,
1261 tcp_v6_rebuild_header,
1262 tcp_v6_conn_request,
1263 tcp_v6_syn_recv_sock,
1264 tcp_v6_get_sock,
1265 ipv6_setsockopt,
1266 ipv6_getsockopt,
1267 v6_addr2sockaddr,
1268 sizeof(struct sockaddr_in6)
1272 * TCP over IPv4 via INET6 API
1275 static struct tcp_func ipv6_mapped = {
1276 ip_queue_xmit,
1277 tcp_v4_send_check,
1278 tcp_v4_rebuild_header,
1279 tcp_v6_conn_request,
1280 tcp_v6_syn_recv_sock,
1281 tcp_v6_get_sock,
1282 ipv6_setsockopt,
1283 ipv6_getsockopt,
1284 v6_addr2sockaddr,
1285 sizeof(struct sockaddr_in6)
1288 /* NOTE: A lot of things set to zero explicitly by call to
1289 * sk_alloc() so need not be done here.
1291 static inttcp_v6_init_sock(struct sock *sk)
1293 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1295 skb_queue_head_init(&tp->out_of_order_queue);
1296 tcp_init_xmit_timers(sk);
1298 tp->rto = TCP_TIMEOUT_INIT;/*TCP_WRITE_TIME*/
1299 tp->mdev = TCP_TIMEOUT_INIT;
1300 tp->in_mss =536;
1302 /* See draft-stevens-tcpca-spec-01 for discussion of the
1303 * initialization of these values.
1305 tp->snd_cwnd =1;
1306 tp->snd_ssthresh =0x7fffffff;
1308 sk->priority =1;
1309 sk->state = TCP_CLOSE;
1310 sk->max_ack_backlog = SOMAXCONN;
1311 sk->mtu =576;
1312 sk->mss =536;
1314 /* Init SYN queue. */
1315 tcp_synq_init(tp);
1317 sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
1319 return0;
1322 static inttcp_v6_destroy_sock(struct sock *sk)
1324 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1325 struct sk_buff *skb;
1327 tcp_clear_xmit_timers(sk);
1329 if(sk->keepopen)
1330 tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
1333 * Cleanup up the write buffer.
1336 while((skb =skb_dequeue(&sk->write_queue)) != NULL)
1337 kfree_skb(skb);
1340 * Cleans up our, hopefuly empty, out_of_order_queue
1343 while((skb =skb_dequeue(&tp->out_of_order_queue)) != NULL)
1344 kfree_skb(skb);
1347 * Release destination entry
1350 dst_release(xchg(&sk->dst_cache,NULL));
1352 /* Clean up a locked TCP bind bucket, this only happens if a
1353 * port is allocated for a socket, but it never fully connects.
1354 * In which case we will find num to be non-zero and daddr to
1355 * be zero.
1357 if(ipv6_addr_any(&(sk->net_pinfo.af_inet6.daddr)) && sk->num !=0)
1358 tcp_bucket_unlock(sk);
1360 return0;
1363 struct proto tcpv6_prot = {
1364 (struct sock *)&tcpv6_prot,/* sklist_next */
1365 (struct sock *)&tcpv6_prot,/* sklist_prev */
1366 tcp_close,/* close */
1367 tcp_v6_connect,/* connect */
1368 tcp_accept,/* accept */
1369 NULL,/* retransmit */
1370 tcp_write_wakeup,/* write_wakeup */
1371 tcp_read_wakeup,/* read_wakeup */
1372 tcp_poll,/* poll */
1373 tcp_ioctl,/* ioctl */
1374 tcp_v6_init_sock,/* init */
1375 tcp_v6_destroy_sock,/* destroy */
1376 tcp_shutdown,/* shutdown */
1377 tcp_setsockopt,/* setsockopt */
1378 tcp_getsockopt,/* getsockopt */
1379 tcp_v6_sendmsg,/* sendmsg */
1380 tcp_recvmsg,/* recvmsg */
1381 NULL,/* bind */
1382 tcp_v6_backlog_rcv,/* backlog_rcv */
1383 tcp_v6_hash,/* hash */
1384 tcp_v6_unhash,/* unhash */
1385 tcp_v6_rehash,/* rehash */
1386 tcp_good_socknum,/* good_socknum */
1387 tcp_v6_verify_bind,/* verify_bind */
1388 128,/* max_header */
1389 0,/* retransmits */
1390 "TCPv6",/* name */
1391 0,/* inuse */
1392 0/* highestinuse */
1395 static struct inet6_protocol tcpv6_protocol =
1397 tcp_v6_rcv,/* TCP handler */
1398 tcp_v6_err,/* TCP error control */
1399 NULL,/* next */
1400 IPPROTO_TCP,/* protocol ID */
1401 0,/* copy */
1402 NULL,/* data */
1403 "TCPv6"/* name */
1406 __initfunc(voidtcpv6_init(void))
1408 /* register inet6 protocol */
1409 inet6_add_protocol(&tcpv6_protocol);
close