b5fcaa09cf23a5eb66d09721c66355641da4e76c
[davej-history.git] / net / core / sock.c
blobb5fcaa09cf23a5eb66d09721c66355641da4e76c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
10 * Version: @(#)sock.c 1.0.17 06/02/93
12 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Alan Cox, <A.Cox@swansea.ac.uk>
17 * Fixes:
18 * Alan Cox : Numerous verify_area() problems
19 * Alan Cox : Connecting on a connecting socket
20 * now returns an error for tcp.
21 * Alan Cox : sock->protocol is set correctly.
22 * and is not sometimes left as 0.
23 * Alan Cox : connect handles icmp errors on a
24 * connect properly. Unfortunately there
25 * is a restart syscall nasty there. I
26 * can't match BSD without hacking the C
27 * library. Ideas urgently sought!
28 * Alan Cox : Disallow bind() to addresses that are
29 * not ours - especially broadcast ones!!
30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
32 * instead they leave that for the DESTROY timer.
33 * Alan Cox : Clean up error flag in accept
34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
35 * was buggy. Put a remove_sock() in the handler
36 * for memory when we hit 0. Also altered the timer
37 * code. The ACK stuff can wait and needs major
38 * TCP layer surgery.
39 * Alan Cox : Fixed TCP ack bug, removed remove sock
40 * and fixed timer/inet_bh race.
41 * Alan Cox : Added zapped flag for TCP
42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 * Rick Sladkey : Relaxed UDP rules for matching packets.
48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
49 * Pauline Middelink : identd support
50 * Alan Cox : Fixed connect() taking signals I think.
51 * Alan Cox : SO_LINGER supported
52 * Alan Cox : Error reporting fixes
53 * Anonymous : inet_create tidied up (sk->reuse setting)
54 * Alan Cox : inet sockets don't set sk->type!
55 * Alan Cox : Split socket option code
56 * Alan Cox : Callbacks
57 * Alan Cox : Nagle flag for Charles & Johannes stuff
58 * Alex : Removed restriction on inet fioctl
59 * Alan Cox : Splitting INET from NET core
60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
62 * Alan Cox : Split IP from generic code
63 * Alan Cox : New kfree_skbmem()
64 * Alan Cox : Make SO_DEBUG superuser only.
65 * Alan Cox : Allow anyone to clear SO_DEBUG
66 * (compatibility fix)
67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
68 * Alan Cox : Allocator for a socket is settable.
69 * Alan Cox : SO_ERROR includes soft errors.
70 * Alan Cox : Allow NULL arguments on some SO_ opts
71 * Alan Cox : Generic socket allocation to make hooks
72 * easier (suggested by Craig Metz).
73 * Michael Pall : SO_ERROR returns positive errno again
75 * To Fix:
78 * This program is free software; you can redistribute it and/or
79 * modify it under the terms of the GNU General Public License
80 * as published by the Free Software Foundation; either version
81 * 2 of the License, or (at your option) any later version.
84 #include <linux/config.h>
85 #include <linux/errno.h>
86 #include <linux/types.h>
87 #include <linux/socket.h>
88 #include <linux/in.h>
89 #include <linux/kernel.h>
90 #include <linux/major.h>
91 #include <linux/sched.h>
92 #include <linux/timer.h>
93 #include <linux/string.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
96 #include <linux/fcntl.h>
97 #include <linux/mm.h>
98 #include <linux/interrupt.h>
100 #include <asm/uaccess.h>
101 #include <asm/system.h>
103 #include <linux/inet.h>
104 #include <linux/netdevice.h>
105 #include <net/ip.h>
106 #include <net/protocol.h>
107 #include <net/arp.h>
108 #include <net/rarp.h>
109 #include <net/route.h>
110 #include <net/tcp.h>
111 #include <net/udp.h>
112 #include <linux/skbuff.h>
113 #include <net/sock.h>
114 #include <net/raw.h>
115 #include <net/icmp.h>
117 #define min(a,b) ((a)<(b)?(a):(b))
120 * This is meant for all protocols to use and covers goings on
121 * at the socket level. Everything here is generic.
124 intsock_setsockopt(struct sock *sk,int level,int optname,
125 char*optval,int optlen)
127 int val;
128 int valbool;
129 int err;
130 struct linger ling;
133 * Options without arguments
136 #ifdef SO_DONTLINGER/* Compatibility item... */
137 switch(optname)
139 case SO_DONTLINGER:
140 sk->linger=0;
141 return0;
143 #endif
145 if(optval == NULL)
146 return(-EINVAL);
148 err=verify_area(VERIFY_READ, optval,sizeof(int));
149 if(err)
150 return err;
152 get_user(val, (int*)optval);
153 valbool = val?1:0;
155 switch(optname)
157 case SO_DEBUG:
158 if(val && !suser())
159 return(-EPERM);
160 sk->debug=valbool;
161 return0;
162 case SO_REUSEADDR:
163 sk->reuse = valbool;
164 return(0);
165 case SO_TYPE:
166 case SO_ERROR:
167 return(-ENOPROTOOPT);
168 case SO_DONTROUTE:
169 sk->localroute=valbool;
170 return0;
171 case SO_BROADCAST:
172 sk->broadcast=valbool;
173 return0;
174 case SO_SNDBUF:
175 if(val > SK_WMEM_MAX*2)
176 val = SK_WMEM_MAX*2;
177 if(val <256)
178 val =256;
179 if(val >65535)
180 val =65535;
181 sk->sndbuf = val;
182 return0;
184 case SO_RCVBUF:
185 if(val > SK_RMEM_MAX*2)
186 val = SK_RMEM_MAX*2;
187 if(val <256)
188 val =256;
189 if(val >65535)
190 val =65535;
191 sk->rcvbuf = val;
192 return(0);
194 case SO_KEEPALIVE:
195 if(sk->protocol == IPPROTO_TCP)
197 tcp_set_keepalive(sk, valbool);
199 sk->keepopen = valbool;
200 return(0);
202 case SO_OOBINLINE:
203 sk->urginline = valbool;
204 return(0);
206 case SO_NO_CHECK:
207 sk->no_check = valbool;
208 return(0);
210 case SO_PRIORITY:
211 if(val >=0&& val < DEV_NUMBUFFS)
213 sk->priority = val;
215 else
217 return(-EINVAL);
219 return(0);
222 case SO_LINGER:
223 err=verify_area(VERIFY_READ,optval,sizeof(ling));
224 if(err)
225 return err;
226 copy_from_user(&ling,optval,sizeof(ling));
227 if(ling.l_onoff==0)
228 sk->linger=0;
229 else
231 sk->lingertime=ling.l_linger;
232 sk->linger=1;
234 return0;
236 case SO_BSDCOMPAT:
237 sk->bsdism = valbool;
238 return0;
240 default:
241 return(-ENOPROTOOPT);
246 intsock_getsockopt(struct sock *sk,int level,int optname,
247 char*optval,int*optlen)
249 int val;
250 int err;
251 struct linger ling;
253 switch(optname)
255 case SO_DEBUG:
256 val = sk->debug;
257 break;
259 case SO_DONTROUTE:
260 val = sk->localroute;
261 break;
263 case SO_BROADCAST:
264 val= sk->broadcast;
265 break;
267 case SO_SNDBUF:
268 val=sk->sndbuf;
269 break;
271 case SO_RCVBUF:
272 val =sk->rcvbuf;
273 break;
275 case SO_REUSEADDR:
276 val = sk->reuse;
277 break;
279 case SO_KEEPALIVE:
280 val = sk->keepopen;
281 break;
283 case SO_TYPE:
284 val = sk->type;
285 break;
287 case SO_ERROR:
288 val = -sock_error(sk);
289 if(val==0)
290 val=xchg(&sk->err_soft,0);
291 break;
293 case SO_OOBINLINE:
294 val = sk->urginline;
295 break;
297 case SO_NO_CHECK:
298 val = sk->no_check;
299 break;
301 case SO_PRIORITY:
302 val = sk->priority;
303 break;
305 case SO_LINGER:
306 err=verify_area(VERIFY_WRITE,optval,sizeof(ling));
307 if(err)
308 return err;
309 err=verify_area(VERIFY_WRITE,optlen,sizeof(int));
310 if(err)
311 return err;
312 put_user(sizeof(ling), optlen);
313 ling.l_onoff=sk->linger;
314 ling.l_linger=sk->lingertime;
315 copy_to_user(optval,&ling,sizeof(ling));
316 return0;
318 case SO_BSDCOMPAT:
319 val = sk->bsdism;
320 break;
322 default:
323 return(-ENOPROTOOPT);
325 err=verify_area(VERIFY_WRITE, optlen,sizeof(int));
326 if(err)
327 return err;
328 put_user(sizeof(int), optlen);
330 err=verify_area(VERIFY_WRITE, optval,sizeof(int));
331 if(err)
332 return err;
333 put_user(val,(unsigned int*)optval);
335 return(0);
338 struct sock *sk_alloc(int priority)
340 struct sock *sk=(struct sock *)kmalloc(sizeof(*sk), priority);
341 if(!sk)
342 return NULL;
343 memset(sk,0,sizeof(*sk));
344 return sk;
347 voidsk_free(struct sock *sk)
349 kfree_s(sk,sizeof(*sk));
353 struct sk_buff *sock_wmalloc(struct sock *sk,unsigned long size,int force,int priority)
355 if(sk) {
356 if(force || sk->wmem_alloc < sk->sndbuf) {
357 struct sk_buff * skb =alloc_skb(size, priority);
358 if(skb)
359 atomic_add(skb->truesize, &sk->wmem_alloc);
360 return skb;
362 return NULL;
364 returnalloc_skb(size, priority);
367 struct sk_buff *sock_rmalloc(struct sock *sk,unsigned long size,int force,int priority)
369 if(sk) {
370 if(force || sk->rmem_alloc < sk->rcvbuf) {
371 struct sk_buff *skb =alloc_skb(size, priority);
372 if(skb)
373 atomic_add(skb->truesize, &sk->rmem_alloc);
374 return skb;
376 return NULL;
378 returnalloc_skb(size, priority);
382 unsigned longsock_rspace(struct sock *sk)
384 int amt;
386 if(sk != NULL)
388 if(sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW)
389 return(0);
390 amt =min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW);
391 if(amt <0)
392 return(0);
393 return(amt);
395 return(0);
399 unsigned longsock_wspace(struct sock *sk)
401 if(sk != NULL)
403 if(sk->shutdown & SEND_SHUTDOWN)
404 return(0);
405 if(sk->wmem_alloc >= sk->sndbuf)
406 return(0);
407 return sk->sndbuf - sk->wmem_alloc;
409 return(0);
413 voidsock_wfree(struct sock *sk,struct sk_buff *skb)
415 int s=skb->truesize;
416 #if CONFIG_SKB_CHECK
417 IS_SKB(skb);
418 #endif
419 kfree_skbmem(skb);
420 if(sk)
422 /* In case it might be waiting for more memory. */
423 sk->write_space(sk);
424 atomic_sub(s, &sk->wmem_alloc);
429 voidsock_rfree(struct sock *sk,struct sk_buff *skb)
431 int s=skb->truesize;
432 #if CONFIG_SKB_CHECK
433 IS_SKB(skb);
434 #endif
435 kfree_skbmem(skb);
436 if(sk)
438 atomic_sub(s, &sk->rmem_alloc);
443 * Generic send/receive buffer handlers
446 struct sk_buff *sock_alloc_send_skb(struct sock *sk,unsigned long size,unsigned long fallback,int noblock,int*errcode)
448 struct sk_buff *skb;
449 int err;
453 if(sk->err!=0)
455 cli();
456 err= -sk->err;
457 sk->err=0;
458 sti();
459 *errcode=err;
460 return NULL;
463 if(sk->shutdown&SEND_SHUTDOWN)
465 *errcode=-EPIPE;
466 return NULL;
469 if(!fallback)
470 skb =sock_wmalloc(sk, size,0, sk->allocation);
471 else
473 /* The buffer get won't block, or use the atomic queue. It does
474 produce annoying no free page messages still.... */
475 skb =sock_wmalloc(sk, size,0, GFP_BUFFER);
476 if(!skb)
477 skb=sock_wmalloc(sk, fallback,0, GFP_KERNEL);
481 * This means we have too many buffers for this socket already.
484 if(skb==NULL)
486 unsigned long tmp;
488 sk->socket->flags |= SO_NOSPACE;
489 if(noblock)
491 *errcode=-EAGAIN;
492 return NULL;
494 if(sk->shutdown&SEND_SHUTDOWN)
496 *errcode=-EPIPE;
497 return NULL;
499 tmp = sk->wmem_alloc;
500 cli();
501 if(sk->shutdown&SEND_SHUTDOWN)
503 sti();
504 *errcode=-EPIPE;
505 return NULL;
508 #if 1
509 if( tmp <= sk->wmem_alloc)
510 #else
511 /* ANK: Line above seems either incorrect
512 * or useless. sk->wmem_alloc has a tiny chance to change
513 * between tmp = sk->w... and cli(),
514 * but it might(?) change earlier. In real life
515 * it does not (I never seen the message).
516 * In any case I'd delete this check at all, or
517 * change it to:
519 if(sk->wmem_alloc + size >= sk->sndbuf)
520 #endif
522 sk->socket->flags &= ~SO_NOSPACE;
523 interruptible_sleep_on(sk->sleep);
524 if(current->signal & ~current->blocked)
526 sti();
527 *errcode = -ERESTARTSYS;
528 return NULL;
531 sti();
534 while(skb==NULL);
536 return skb;
540 void__release_sock(struct sock *sk)
542 #ifdef CONFIG_INET
543 if(!sk->prot || !sk->backlog_rcv)
544 return;
546 /* See if we have any packets built up. */
547 start_bh_atomic();
548 while(!skb_queue_empty(&sk->back_log)) {
549 struct sk_buff * skb = sk->back_log.next;
550 __skb_unlink(skb, &sk->back_log);
551 sk->backlog_rcv(sk, skb);
553 end_bh_atomic();
554 #endif
close