Import 2.3.4pre2
[davej-history.git] / net / unix / af_unix.c
blob1d12037dabd31c5fabd52befd76f17f45a5f6148
1 /*
2 * NET3: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan.cox@linux.org>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Version: $Id: af_unix.c,v 1.78 1999/05/27 00:38:41 davem Exp $
13 * Fixes:
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
23 * Mike Shaver's work.
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
30 * reference counting
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
33 * Lots of bug fixes.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
45 * dgram receiver.
47 * Known differences from reference BSD that was tested:
49 * [TO FIX]
50 * ECONNREFUSED is not returned from one end of a connected() socket to the
51 * other the moment one end closes.
52 * fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark
53 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
54 * [NOT TO FIX]
55 * accept() returns a path name even if the connecting socket has closed
56 * in the meantime (BSD loses the path and gives up).
57 * accept() returns 0 length path for an unbound connector. BSD returns 16
58 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
59 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
60 * BSD af_unix apparently has connect forgetting to block properly.
61 * (need to check this with the POSIX spec in detail)
63 * Differences from 2.0.0-11-... (ANK)
64 * Bug fixes and improvements.
65 * - client shutdown killed server socket.
66 * - removed all useless cli/sti pairs.
68 * Semantic changes/extensions.
69 * - generic control message passing.
70 * - SCM_CREDENTIALS control message.
71 * - "Abstract" (not FS based) socket bindings.
72 * Abstract names are sequences of bytes (not zero terminated)
73 * started by 0, so that this name space does not intersect
74 * with BSD names.
77 #include <linux/module.h>
78 #include <linux/config.h>
79 #include <linux/kernel.h>
80 #include <linux/major.h>
81 #include <linux/signal.h>
82 #include <linux/sched.h>
83 #include <linux/errno.h>
84 #include <linux/string.h>
85 #include <linux/stat.h>
86 #include <linux/socket.h>
87 #include <linux/un.h>
88 #include <linux/fcntl.h>
89 #include <linux/termios.h>
90 #include <linux/socket.h>
91 #include <linux/sockios.h>
92 #include <linux/net.h>
93 #include <linux/in.h>
94 #include <linux/fs.h>
95 #include <linux/malloc.h>
96 #include <asm/uaccess.h>
97 #include <linux/skbuff.h>
98 #include <linux/netdevice.h>
99 #include <net/sock.h>
100 #include <net/tcp.h>
101 #include <net/af_unix.h>
102 #include <linux/proc_fs.h>
103 #include <net/scm.h>
104 #include <linux/init.h>
105 #include <linux/poll.h>
107 #include <asm/checksum.h>
109 #define min(a,b) (((a)<(b))?(a):(b))
111 int sysctl_unix_delete_delay = HZ;
112 int sysctl_unix_destroy_delay =10*HZ;
113 int sysctl_unix_max_dgram_qlen =10;
115 unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
116 static atomic_t unix_nr_socks =ATOMIC_INIT(0);
117 staticDECLARE_WAIT_QUEUE_HEAD(unix_ack_wqueue);
118 staticDECLARE_WAIT_QUEUE_HEAD(unix_dgram_wqueue);
120 #define unix_sockets_unbound (unix_socket_table[UNIX_HASH_SIZE])
122 #define UNIX_ABSTRACT(sk) ((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE)
124 static voidunix_destroy_socket(unix_socket *sk);
125 static voidunix_stream_write_space(struct sock *sk);
127 extern __inline__ unsignedunix_hash_fold(unsigned hash)
129 hash ^= hash>>16;
130 hash ^= hash>>8;
131 hash ^= hash>>4;
132 return hash;
135 #define unix_peer(sk) ((sk)->pair)
137 extern __inline__ intunix_our_peer(unix_socket *sk, unix_socket *osk)
139 returnunix_peer(osk) == sk;
142 extern __inline__ intunix_may_send(unix_socket *sk, unix_socket *osk)
144 return(unix_peer(osk) == NULL ||unix_our_peer(sk, osk));
147 #define ulock(sk) (&(sk->protinfo.af_unix.user_count))
149 extern __inline__ voidunix_lock(unix_socket *sk)
151 atomic_inc(ulock(sk));
154 extern __inline__ voidunix_unlock(unix_socket *sk)
156 atomic_dec(ulock(sk));
159 extern __inline__ intunix_locked(unix_socket *sk)
161 return(atomic_read(ulock(sk)) !=0);
164 extern __inline__ voidunix_release_addr(struct unix_address *addr)
166 if(addr)
168 if(atomic_dec_and_test(&addr->refcnt))
169 kfree(addr);
173 static voidunix_destruct_addr(struct sock *sk)
175 struct unix_address *addr = sk->protinfo.af_unix.addr;
177 unix_release_addr(addr);
181 * Check unix socket name:
182 * - should be not zero length.
183 * - if started by not zero, should be NULL terminated (FS object)
184 * - if started by zero, it is abstract name.
187 static intunix_mkname(struct sockaddr_un * sunaddr,int len,unsigned*hashp)
189 if(len <=sizeof(short) || len >sizeof(*sunaddr))
190 return-EINVAL;
191 if(!sunaddr || sunaddr->sun_family != AF_UNIX)
192 return-EINVAL;
193 if(sunaddr->sun_path[0])
196 * This may look like an off by one error but it is
197 * a bit more subtle. 108 is the longest valid AF_UNIX
198 * path for a binding. sun_path[108] doesnt as such
199 * exist. However in kernel space we are guaranteed that
200 * it is a valid memory location in our kernel
201 * address buffer.
203 if(len >sizeof(*sunaddr))
204 len =sizeof(*sunaddr);
205 ((char*)sunaddr)[len]=0;
206 len =strlen(sunaddr->sun_path)+1+sizeof(short);
207 return len;
210 *hashp =unix_hash_fold(csum_partial((char*)sunaddr, len,0));
211 return len;
214 static voidunix_remove_socket(unix_socket *sk)
216 unix_socket **list = sk->protinfo.af_unix.list;
217 if(sk->next)
218 sk->next->prev = sk->prev;
219 if(sk->prev)
220 sk->prev->next = sk->next;
221 if(*list == sk)
222 *list = sk->next;
223 sk->protinfo.af_unix.list = NULL;
224 sk->prev = NULL;
225 sk->next = NULL;
228 static voidunix_insert_socket(unix_socket *sk)
230 unix_socket **list = sk->protinfo.af_unix.list;
231 sk->prev = NULL;
232 sk->next = *list;
233 if(*list)
234 (*list)->prev = sk;
235 *list=sk;
238 static unix_socket *unix_find_socket_byname(struct sockaddr_un *sunname,
239 int len,int type,unsigned hash)
241 unix_socket *s;
243 for(s=unix_socket_table[(hash^type)&0xF]; s; s=s->next)
245 if(s->protinfo.af_unix.addr->len==len &&
246 memcmp(s->protinfo.af_unix.addr->name, sunname, len) ==0&&
247 s->type == type)
249 unix_lock(s);
250 return(s);
253 return(NULL);
256 static unix_socket *unix_find_socket_byinode(struct inode *i)
258 unix_socket *s;
260 for(s=unix_socket_table[i->i_ino &0xF]; s; s=s->next)
262 struct dentry *dentry = s->protinfo.af_unix.dentry;
264 if(dentry && dentry->d_inode == i)
266 unix_lock(s);
267 return(s);
270 return(NULL);
274 * Delete a unix socket. We have to allow for deferring this on a timer.
277 static voidunix_destroy_timer(unsigned long data)
279 unix_socket *sk=(unix_socket *)data;
280 if(!unix_locked(sk) &&atomic_read(&sk->wmem_alloc) ==0)
282 atomic_dec(&unix_nr_socks);
284 sk_free(sk);
286 /* socket destroyed, decrement count */
287 MOD_DEC_USE_COUNT;
288 return;
292 * Retry;
295 sk->timer.expires=jiffies+sysctl_unix_destroy_delay;/* No real hurry try it every 10 seconds or so */
296 add_timer(&sk->timer);
300 static voidunix_delayed_delete(unix_socket *sk)
302 sk->timer.data=(unsigned long)sk;
303 sk->timer.expires=jiffies+sysctl_unix_delete_delay;/* Normally 1 second after will clean up. After that we try every 10 */
304 sk->timer.function=unix_destroy_timer;
305 add_timer(&sk->timer);
308 static intunix_release_sock(unix_socket *sk)
310 unix_socket *skpair;
312 sk->state_change(sk);
313 sk->dead=1;
314 sk->socket = NULL;
316 if(sk->state == TCP_LISTEN)
317 wake_up_interruptible(&unix_ack_wqueue);
318 if(sk->type == SOCK_DGRAM)
319 wake_up_interruptible(&unix_dgram_wqueue);
321 skpair=unix_peer(sk);
323 if(skpair!=NULL)
325 if(sk->type==SOCK_STREAM &&unix_our_peer(sk, skpair))
327 skpair->data_ready(skpair,0);
328 skpair->shutdown=SHUTDOWN_MASK;/* No more writes*/
330 unix_unlock(skpair);/* It may now die */
333 /* Try to flush out this socket. Throw out buffers at least */
334 unix_destroy_socket(sk);
337 * Fixme: BSD difference: In BSD all sockets connected to use get
338 * ECONNRESET and we die on the spot. In Linux we behave
339 * like files and pipes do and wait for the last
340 * dereference.
342 * Can't we simply set sock->err?
344 * What the above comment does talk about? --ANK(980817)
347 unix_gc();/* Garbage collect fds */
348 return0;
351 static voidunix_destroy_socket(unix_socket *sk)
353 struct sk_buff *skb;
355 unix_remove_socket(sk);
357 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
359 if(sk->state==TCP_LISTEN)
360 unix_release_sock(skb->sk);
361 /* passed fds are erased in the kfree_skb hook */
362 kfree_skb(skb);
365 if(sk->protinfo.af_unix.dentry!=NULL)
367 dput(sk->protinfo.af_unix.dentry);
368 sk->protinfo.af_unix.dentry=NULL;
371 if(!unix_locked(sk) &&atomic_read(&sk->wmem_alloc) ==0)
373 atomic_dec(&unix_nr_socks);
375 sk_free(sk);
377 /* socket destroyed, decrement count */
378 MOD_DEC_USE_COUNT;
380 else
382 sk->state=TCP_CLOSE;
383 sk->dead=1;
384 unix_delayed_delete(sk);/* Try every so often until buffers are all freed */
389 static intunix_listen(struct socket *sock,int backlog)
391 struct sock *sk = sock->sk;
393 if(sock->state != SS_UNCONNECTED)
394 return(-EINVAL);
395 if(sock->type!=SOCK_STREAM)
396 return-EOPNOTSUPP;/* Only stream sockets accept */
397 if(!sk->protinfo.af_unix.addr)
398 return-EINVAL;/* No listens on an unbound socket */
399 if((unsigned) backlog > SOMAXCONN)
400 backlog = SOMAXCONN;
401 sk->max_ack_backlog=backlog;
402 sk->state=TCP_LISTEN;
403 sock->flags |= SO_ACCEPTCON;
404 /* set credentials so connect can copy them */
405 sk->peercred.pid = current->pid;
406 sk->peercred.uid = current->euid;
407 sk->peercred.gid = current->egid;
408 return0;
411 externstruct proto_ops unix_stream_ops;
412 externstruct proto_ops unix_dgram_ops;
414 static struct sock *unix_create1(struct socket *sock,int stream)
416 struct sock *sk;
418 if(atomic_read(&unix_nr_socks) >=2*max_files)
419 return NULL;
421 MOD_INC_USE_COUNT;
422 sk =sk_alloc(PF_UNIX, GFP_KERNEL,1);
423 if(!sk) {
424 MOD_DEC_USE_COUNT;
425 return NULL;
428 atomic_inc(&unix_nr_socks);
430 sock_init_data(sock,sk);
432 if(stream)
433 sk->write_space = unix_stream_write_space;
435 sk->destruct = unix_destruct_addr;
436 sk->protinfo.af_unix.family=PF_UNIX;
437 sk->protinfo.af_unix.dentry=NULL;
438 init_MUTEX(&sk->protinfo.af_unix.readsem);/* single task reading lock */
439 sk->protinfo.af_unix.list=&unix_sockets_unbound;
440 unix_insert_socket(sk);
442 return sk;
445 static intunix_create(struct socket *sock,int protocol)
447 int stream =0;
449 if(protocol && protocol != PF_UNIX)
450 return-EPROTONOSUPPORT;
452 sock->state = SS_UNCONNECTED;
454 switch(sock->type) {
455 case SOCK_STREAM:
456 sock->ops = &unix_stream_ops;
457 stream =1;
458 break;
460 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
461 * nothing uses it.
463 case SOCK_RAW:
464 sock->type=SOCK_DGRAM;
465 case SOCK_DGRAM:
466 sock->ops = &unix_dgram_ops;
467 break;
468 default:
469 return-ESOCKTNOSUPPORT;
472 returnunix_create1(sock, stream) ?0: -ENOMEM;
475 static intunix_release(struct socket *sock,struct socket *peer)
477 unix_socket *sk = sock->sk;
479 if(!sk)
480 return0;
482 sock->sk = NULL;
483 if(sock->state != SS_UNCONNECTED)
484 sock->state = SS_DISCONNECTING;
486 returnunix_release_sock(sk);
489 static intunix_autobind(struct socket *sock)
491 struct sock *sk = sock->sk;
492 static u32 ordernum =1;
493 struct unix_address * addr;
494 unix_socket *osk;
496 addr =kmalloc(sizeof(*addr) +sizeof(short) +16, GFP_KERNEL);
497 if(!addr)
498 return-ENOMEM;
499 if(sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry)
501 kfree(addr);
502 return-EINVAL;
504 memset(addr,0,sizeof(*addr) +sizeof(short) +16);
505 addr->name->sun_family = AF_UNIX;
506 atomic_set(&addr->refcnt,1);
508 retry:
509 addr->len =sprintf(addr->name->sun_path+1,"%08x", ordernum) +1+sizeof(short);
510 addr->hash =unix_hash_fold(csum_partial((void*)addr->name, addr->len,0));
511 ordernum++;
513 if((osk=unix_find_socket_byname(addr->name, addr->len, sock->type,
514 addr->hash)) != NULL)
516 unix_unlock(osk);
517 goto retry;
520 sk->protinfo.af_unix.addr = addr;
521 unix_remove_socket(sk);
522 sk->protinfo.af_unix.list = &unix_socket_table[(addr->hash ^ sk->type)&0xF];
523 unix_insert_socket(sk);
524 return0;
527 static unix_socket *unix_find_other(struct sockaddr_un *sunname,int len,
528 int type,unsigned hash,int*error)
530 unix_socket *u;
532 if(sunname->sun_path[0])
534 struct dentry *dentry;
535 dentry =open_namei(sunname->sun_path,2, S_IFSOCK);
536 if(IS_ERR(dentry)) {
537 *error =PTR_ERR(dentry);
538 return NULL;
540 u=unix_find_socket_byinode(dentry->d_inode);
541 dput(dentry);
542 if(u && u->type != type)
544 *error=-EPROTOTYPE;
545 unix_unlock(u);
546 return NULL;
549 else
550 u=unix_find_socket_byname(sunname, len, type, hash);
552 if(u==NULL)
554 *error=-ECONNREFUSED;
555 return NULL;
557 return u;
561 static intunix_bind(struct socket *sock,struct sockaddr *uaddr,int addr_len)
563 struct sock *sk = sock->sk;
564 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
565 struct dentry * dentry;
566 int err;
567 unsigned hash;
568 struct unix_address *addr;
570 if(sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry ||
571 sunaddr->sun_family != AF_UNIX)
572 return-EINVAL;
574 if(addr_len==sizeof(short))
575 returnunix_autobind(sock);
577 addr_len =unix_mkname(sunaddr, addr_len, &hash);
578 if(addr_len <0)
579 return addr_len;
581 addr =kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
582 if(!addr)
583 return-ENOMEM;
585 /* We slept; recheck ... */
587 if(sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry)
589 kfree(addr);
590 return-EINVAL;/* Already bound */
593 memcpy(addr->name, sunaddr, addr_len);
594 addr->len = addr_len;
595 addr->hash = hash;
596 atomic_set(&addr->refcnt,1);
598 if(!sunaddr->sun_path[0])
600 unix_socket *osk =unix_find_socket_byname(sunaddr, addr_len,
601 sk->type, hash);
602 if(osk)
604 unix_unlock(osk);
605 kfree(addr);
606 return-EADDRINUSE;
608 unix_remove_socket(sk);
609 sk->protinfo.af_unix.addr = addr;
610 sk->protinfo.af_unix.list = &unix_socket_table[(hash^sk->type)&0xF];
611 unix_insert_socket(sk);
612 return0;
615 addr->hash = UNIX_HASH_SIZE;
616 sk->protinfo.af_unix.addr = addr;
619 dentry =do_mknod(sunaddr->sun_path, S_IFSOCK|sock->inode->i_mode,0);
620 if(IS_ERR(dentry))
622 err =PTR_ERR(dentry);
623 unix_release_addr(addr);
624 sk->protinfo.af_unix.addr = NULL;
625 if(err==-EEXIST)
626 return-EADDRINUSE;
627 else
628 return err;
630 unix_remove_socket(sk);
631 sk->protinfo.af_unix.list = &unix_socket_table[dentry->d_inode->i_ino &0xF];
632 sk->protinfo.af_unix.dentry = dentry;
633 unix_insert_socket(sk);
635 return0;
638 static intunix_dgram_connect(struct socket *sock,struct sockaddr *addr,
639 int alen,int flags)
641 struct sock *sk = sock->sk;
642 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
643 struct sock *other;
644 unsigned hash;
645 int err;
648 * 1003.1g breaking connected state with AF_UNSPEC
651 if(addr->sa_family==AF_UNSPEC)
653 if(unix_peer(sk))
655 unix_unlock(unix_peer(sk));
656 unix_peer(sk) = NULL;
657 sock->state=SS_UNCONNECTED;
659 return0;
662 alen =unix_mkname(sunaddr, alen, &hash);
663 if(alen <0)
664 return alen;
666 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
667 if(!other)
668 return err;
669 if(!unix_may_send(sk, other))
671 unix_unlock(other);
672 return-EINVAL;
676 * If it was connected, reconnect.
678 if(unix_peer(sk))
680 unix_unlock(unix_peer(sk));
681 unix_peer(sk)=NULL;
683 unix_peer(sk)=other;
684 if(sock->passcred && !sk->protinfo.af_unix.addr)
685 unix_autobind(sock);
686 return0;
689 static intunix_stream_connect(struct socket *sock,struct sockaddr *uaddr,
690 int addr_len,int flags)
692 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
693 struct sock *sk = sock->sk, *newsk;
694 unix_socket *other;
695 struct sk_buff *skb;
696 int err;
697 unsigned hash;
699 addr_len =unix_mkname(sunaddr, addr_len, &hash);
700 if(addr_len <0)
701 return addr_len;
703 /* First of all allocate resources.
704 If we will make it after state checks,
705 we will have to recheck all again in any case.
708 restart:
709 /* Find listening sock */
710 other=unix_find_other(sunaddr, addr_len, sk->type, hash, &err);
712 if(!other)
713 return-ECONNREFUSED;
715 while(other->ack_backlog >= other->max_ack_backlog) {
716 unix_unlock(other);
717 if(other->dead || other->state != TCP_LISTEN)
718 return-ECONNREFUSED;
719 if(flags & O_NONBLOCK)
720 return-EAGAIN;
721 interruptible_sleep_on(&unix_ack_wqueue);
722 if(signal_pending(current))
723 return-ERESTARTSYS;
724 goto restart;
727 /* create new sock for complete connection */
728 newsk =unix_create1(NULL,1);
730 /* Allocate skb for sending to listening sock */
731 skb = NULL;
732 if(newsk)
733 skb =sock_wmalloc(newsk,1,0, GFP_KERNEL);
735 switch(sock->state)
737 case SS_UNCONNECTED:
738 /* This is ok... continue with connect */
739 break;
740 case SS_CONNECTED:
741 /* Socket is already connected */
742 err = -EISCONN;
743 goto out;
744 default:
745 err = -EINVAL;
746 goto out;
749 err = -EINVAL;
750 if(sk->state != TCP_CLOSE)
751 goto out;
753 /* Check that listener is in valid state. */
754 err = -ECONNREFUSED;
755 if(other->dead || other->state != TCP_LISTEN)
756 goto out;
758 err = -ENOMEM;
759 if(newsk == NULL || skb == NULL)
760 goto out;
762 UNIXCB(skb).attr = MSG_SYN;
764 /* set up connecting socket */
765 sock->state=SS_CONNECTED;
766 if(!sk->protinfo.af_unix.addr)
767 unix_autobind(sock);
768 unix_peer(sk)=newsk;
769 unix_lock(sk);
770 sk->state=TCP_ESTABLISHED;
771 /* Set credentials */
772 sk->peercred = other->peercred;
774 /* set up newly created sock */
775 unix_peer(newsk)=sk;
776 unix_lock(newsk);
777 newsk->state=TCP_ESTABLISHED;
778 newsk->type=SOCK_STREAM;
779 newsk->peercred.pid = current->pid;
780 newsk->peercred.uid = current->euid;
781 newsk->peercred.gid = current->egid;
783 /* copy address information from listening to new sock*/
784 if(other->protinfo.af_unix.addr)
786 atomic_inc(&other->protinfo.af_unix.addr->refcnt);
787 newsk->protinfo.af_unix.addr=other->protinfo.af_unix.addr;
789 if(other->protinfo.af_unix.dentry)
790 newsk->protinfo.af_unix.dentry=dget(other->protinfo.af_unix.dentry);
792 /* send info to listening sock */
793 other->ack_backlog++;
794 skb_queue_tail(&other->receive_queue,skb);
795 other->data_ready(other,0);/* Wake up ! */
796 unix_unlock(other);
797 return0;
799 out:
800 if(skb)
801 kfree_skb(skb);
802 if(newsk)
803 unix_destroy_socket(newsk);
804 if(other)
805 unix_unlock(other);
806 return err;
809 static intunix_socketpair(struct socket *socka,struct socket *sockb)
811 struct sock *ska=socka->sk, *skb = sockb->sk;
813 /* Join our sockets back to back */
814 unix_lock(ska);
815 unix_lock(skb);
816 unix_peer(ska)=skb;
817 unix_peer(skb)=ska;
819 if(ska->type != SOCK_DGRAM)
821 ska->state=TCP_ESTABLISHED;
822 skb->state=TCP_ESTABLISHED;
823 socka->state=SS_CONNECTED;
824 sockb->state=SS_CONNECTED;
826 return0;
829 static intunix_accept(struct socket *sock,struct socket *newsock,int flags)
831 unix_socket *sk = sock->sk;
832 unix_socket *newsk = newsock->sk;
833 unix_socket *tsk;
834 struct sk_buff *skb;
836 if(sock->state != SS_UNCONNECTED)
837 return(-EINVAL);
838 if(!(sock->flags & SO_ACCEPTCON))
839 return(-EINVAL);
841 if(sock->type!=SOCK_STREAM)
842 return-EOPNOTSUPP;
843 if(sk->state!=TCP_LISTEN)
844 return-EINVAL;
846 for(;;)
848 skb=skb_dequeue(&sk->receive_queue);
849 if(skb==NULL)
851 if(flags&O_NONBLOCK)
852 return-EAGAIN;
853 interruptible_sleep_on(sk->sleep);
854 if(signal_pending(current))
855 return-ERESTARTSYS;
856 continue;
858 if(!(UNIXCB(skb).attr & MSG_SYN))
860 tsk=skb->sk;
861 tsk->state_change(tsk);
862 kfree_skb(skb);
863 continue;
865 tsk = skb->sk;
866 if(sk->max_ack_backlog == sk->ack_backlog--)
867 wake_up_interruptible(&unix_ack_wqueue);
868 kfree_skb(skb);
869 break;
873 /* attach accepted sock to socket */
874 newsock->state=SS_CONNECTED;
875 newsock->sk=tsk;
876 tsk->sleep=newsk->sleep;
877 tsk->socket=newsock;
879 /* destroy handed sock */
880 newsk->socket = NULL;
881 unix_destroy_socket(newsk);
883 return0;
887 static intunix_getname(struct socket *sock,struct sockaddr *uaddr,int*uaddr_len,int peer)
889 struct sock *sk = sock->sk;
890 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
892 if(peer)
894 if(!unix_peer(sk))
895 return-ENOTCONN;
896 sk=unix_peer(sk);
898 if(!sk->protinfo.af_unix.addr)
900 sunaddr->sun_family = AF_UNIX;
901 sunaddr->sun_path[0] =0;
902 *uaddr_len =sizeof(short);
903 return0;/* Not bound */
905 *uaddr_len = sk->protinfo.af_unix.addr->len;
906 memcpy(sunaddr, sk->protinfo.af_unix.addr->name, *uaddr_len);
907 return0;
910 static voidunix_detach_fds(struct scm_cookie *scm,struct sk_buff *skb)
912 int i;
914 scm->fp =UNIXCB(skb).fp;
915 skb->destructor = sock_wfree;
916 UNIXCB(skb).fp = NULL;
918 for(i=scm->fp->count-1; i>=0; i--)
919 unix_notinflight(scm->fp->fp[i]);
922 static voidunix_destruct_fds(struct sk_buff *skb)
924 struct scm_cookie scm;
925 memset(&scm,0,sizeof(scm));
926 unix_detach_fds(&scm, skb);
927 scm_destroy(&scm);
928 sock_wfree(skb);
931 static voidunix_attach_fds(struct scm_cookie *scm,struct sk_buff *skb)
933 int i;
934 for(i=scm->fp->count-1; i>=0; i--)
935 unix_inflight(scm->fp->fp[i]);
936 UNIXCB(skb).fp = scm->fp;
937 skb->destructor = unix_destruct_fds;
938 scm->fp = NULL;
943 * Send AF_UNIX data.
946 static intunix_dgram_sendmsg(struct socket *sock,struct msghdr *msg,int len,
947 struct scm_cookie *scm)
949 struct sock *sk = sock->sk;
950 struct sockaddr_un *sunaddr=msg->msg_name;
951 unix_socket *other;
952 int namelen =0;/* fake GCC */
953 int err;
954 unsigned hash;
955 struct sk_buff *skb;
957 if(msg->msg_flags&MSG_OOB)
958 return-EOPNOTSUPP;
960 if(msg->msg_flags&~(MSG_DONTWAIT|MSG_NOSIGNAL))
961 return-EINVAL;
963 if(msg->msg_namelen) {
964 namelen =unix_mkname(sunaddr, msg->msg_namelen, &hash);
965 if(namelen <0)
966 return namelen;
967 }else{
968 sunaddr = NULL;
969 if(!unix_peer(sk))
970 return-ENOTCONN;
973 if(sock->passcred && !sk->protinfo.af_unix.addr)
974 unix_autobind(sock);
976 skb =sock_alloc_send_skb(sk, len,0, msg->msg_flags&MSG_DONTWAIT, &err);
977 if(skb==NULL)
978 goto out;
980 memcpy(UNIXCREDS(skb), &scm->creds,sizeof(struct ucred));
981 UNIXCB(skb).attr = msg->msg_flags;
982 if(scm->fp)
983 unix_attach_fds(scm, skb);
985 skb->h.raw = skb->data;
986 err =memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
987 if(err)
988 goto out_free;
990 other =unix_peer(sk);
991 if(other && other->dead)
994 * Check with 1003.1g - what should
995 * datagram error
997 dead:
998 unix_unlock(other);
999 unix_peer(sk)=NULL;
1000 other = NULL;
1001 err = -ECONNRESET;
1002 if(sunaddr == NULL)
1003 goto out_free;
1005 if(!other)
1007 other =unix_find_other(sunaddr, namelen, sk->type, hash, &err);
1008 if(other==NULL)
1009 goto out_free;
1010 err = -EINVAL;
1011 if(!unix_may_send(sk, other))
1012 goto out_unlock;
1015 while(skb_queue_len(&other->receive_queue) >=
1016 sysctl_unix_max_dgram_qlen)
1018 if(sock->file->f_flags & O_NONBLOCK)
1020 err = -EAGAIN;
1021 goto out_unlock;
1023 interruptible_sleep_on(&unix_dgram_wqueue);
1024 if(other->dead)
1025 goto dead;
1026 if(sk->shutdown & SEND_SHUTDOWN)
1028 err = -EPIPE;
1029 goto out_unlock;
1031 if(signal_pending(current))
1033 err = -ERESTARTSYS;
1034 goto out_unlock;
1038 skb_queue_tail(&other->receive_queue, skb);
1039 other->data_ready(other,len);
1041 if(!unix_peer(sk))
1042 unix_unlock(other);
1043 return len;
1045 out_unlock:
1046 unix_unlock(other);
1047 out_free:
1048 kfree_skb(skb);
1049 out:
1050 return err;
1054 static intunix_stream_sendmsg(struct socket *sock,struct msghdr *msg,int len,
1055 struct scm_cookie *scm)
1057 struct sock *sk = sock->sk;
1058 unix_socket *other;
1059 struct sockaddr_un *sunaddr=msg->msg_name;
1060 int err,size;
1061 struct sk_buff *skb;
1062 int limit=0;
1063 int sent=0;
1065 if(sock->flags & SO_ACCEPTCON)
1066 return(-EINVAL);
1068 if(msg->msg_flags&MSG_OOB)
1069 return-EOPNOTSUPP;
1071 if(msg->msg_flags&~(MSG_DONTWAIT|MSG_NOSIGNAL))
1072 return-EINVAL;
1074 if(msg->msg_namelen) {
1075 if(sk->state==TCP_ESTABLISHED)
1076 return-EISCONN;
1077 else
1078 return-EOPNOTSUPP;
1079 }else{
1080 sunaddr = NULL;
1081 if(!unix_peer(sk))
1082 return-ENOTCONN;
1085 if(sk->shutdown&SEND_SHUTDOWN) {
1086 if(!(msg->msg_flags&MSG_NOSIGNAL))
1087 send_sig(SIGPIPE,current,0);
1088 return-EPIPE;
1091 while(sent < len)
1094 * Optimisation for the fact that under 0.01% of X messages typically
1095 * need breaking up.
1098 size=len-sent;
1100 /* Keep two messages in the pipe so it schedules better */
1101 if(size > sk->sndbuf/2-16)
1102 size = sk->sndbuf/2-16;
1105 * Keep to page sized kmalloc()'s as various people
1106 * have suggested. Big mallocs stress the vm too
1107 * much.
1110 if(size >4096-16)
1111 limit =4096-16;/* Fall back to a page if we can't grab a big buffer this instant */
1112 else
1113 limit =0;/* Otherwise just grab and wait */
1116 * Grab a buffer
1119 skb=sock_alloc_send_skb(sk,size,limit,msg->msg_flags&MSG_DONTWAIT, &err);
1121 if(skb==NULL)
1123 if(sent)
1124 goto out;
1125 return err;
1129 * If you pass two values to the sock_alloc_send_skb
1130 * it tries to grab the large buffer with GFP_BUFFER
1131 * (which can fail easily), and if it fails grab the
1132 * fallback size buffer which is under a page and will
1133 * succeed. [Alan]
1135 size =min(size,skb_tailroom(skb));
1137 memcpy(UNIXCREDS(skb), &scm->creds,sizeof(struct ucred));
1138 UNIXCB(skb).attr = msg->msg_flags;
1139 if(scm->fp)
1140 unix_attach_fds(scm, skb);
1142 if(memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) {
1143 kfree_skb(skb);
1144 if(sent)
1145 goto out;
1146 return-EFAULT;
1149 other=unix_peer(sk);
1151 if(other->dead || (sk->shutdown & SEND_SHUTDOWN))
1153 kfree_skb(skb);
1154 if(sent)
1155 goto out;
1156 if(!(msg->msg_flags&MSG_NOSIGNAL))
1157 send_sig(SIGPIPE,current,0);
1158 return-EPIPE;
1161 skb_queue_tail(&other->receive_queue, skb);
1162 other->data_ready(other,size);
1163 sent+=size;
1165 out:
1166 return sent;
1170 * Sleep until data has arrive. But check for races..
1173 static voidunix_data_wait(unix_socket * sk)
1175 if(!skb_peek(&sk->receive_queue))
1177 sk->socket->flags |= SO_WAITDATA;
1178 interruptible_sleep_on(sk->sleep);
1179 sk->socket->flags &= ~SO_WAITDATA;
1183 static intunix_dgram_recvmsg(struct socket *sock,struct msghdr *msg,int size,
1184 int flags,struct scm_cookie *scm)
1186 struct sock *sk = sock->sk;
1187 int noblock = flags & MSG_DONTWAIT;
1188 struct sk_buff *skb;
1189 int err;
1191 if(flags&MSG_OOB)
1192 return-EOPNOTSUPP;
1194 msg->msg_namelen =0;
1196 skb =skb_recv_datagram(sk, flags, noblock, &err);
1197 if(!skb)
1198 goto out;
1201 * sysctl_unix_max_dgram_qlen may change over the time we blocked
1202 * in the waitqueue so we must wakeup every time we shrink the
1203 * receiver queue. -arca
1205 wake_up_interruptible(&unix_dgram_wqueue);
1207 if(msg->msg_name)
1209 msg->msg_namelen =sizeof(short);
1210 if(skb->sk->protinfo.af_unix.addr)
1212 msg->msg_namelen=skb->sk->protinfo.af_unix.addr->len;
1213 memcpy(msg->msg_name,
1214 skb->sk->protinfo.af_unix.addr->name,
1215 skb->sk->protinfo.af_unix.addr->len);
1219 if(size > skb->len)
1220 size = skb->len;
1221 else if(size < skb->len)
1222 msg->msg_flags |= MSG_TRUNC;
1224 err =skb_copy_datagram_iovec(skb,0, msg->msg_iov, size);
1225 if(err)
1226 goto out_free;
1228 scm->creds = *UNIXCREDS(skb);
1230 if(!(flags & MSG_PEEK))
1232 if(UNIXCB(skb).fp)
1233 unix_detach_fds(scm, skb);
1235 else
1237 /* It is questionable: on PEEK we could:
1238 - do not return fds - good, but too simple 8)
1239 - return fds, and do not return them on read (old strategy,
1240 apparently wrong)
1241 - clone fds (I choosed it for now, it is the most universal
1242 solution)
1244 POSIX 1003.1g does not actually define this clearly
1245 at all. POSIX 1003.1g doesn't define a lot of things
1246 clearly however!
1249 if(UNIXCB(skb).fp)
1250 scm->fp =scm_fp_dup(UNIXCB(skb).fp);
1252 err = size;
1254 out_free:
1255 skb_free_datagram(sk,skb);
1256 out:
1257 return err;
1261 static intunix_stream_recvmsg(struct socket *sock,struct msghdr *msg,int size,
1262 int flags,struct scm_cookie *scm)
1264 struct sock *sk = sock->sk;
1265 int noblock = flags & MSG_DONTWAIT;
1266 struct sockaddr_un *sunaddr=msg->msg_name;
1267 int copied =0;
1268 int check_creds =0;
1269 int target =1;
1271 if(sock->flags & SO_ACCEPTCON)
1272 return(-EINVAL);
1274 if(flags&MSG_OOB)
1275 return-EOPNOTSUPP;
1276 if(flags&MSG_WAITALL)
1277 target = size;
1280 msg->msg_namelen =0;
1282 /* Lock the socket to prevent queue disordering
1283 * while sleeps in memcpy_tomsg
1286 down(&sk->protinfo.af_unix.readsem);
1290 int chunk;
1291 struct sk_buff *skb;
1293 skb=skb_dequeue(&sk->receive_queue);
1294 if(skb==NULL)
1296 if(copied >= target)
1297 break;
1300 * POSIX 1003.1g mandates this order.
1303 if(sk->err)
1305 up(&sk->protinfo.af_unix.readsem);
1306 returnsock_error(sk);
1309 if(sk->shutdown & RCV_SHUTDOWN)
1310 break;
1311 up(&sk->protinfo.af_unix.readsem);
1312 if(noblock)
1313 return-EAGAIN;
1314 unix_data_wait(sk);
1315 if(signal_pending(current))
1316 return-ERESTARTSYS;
1317 down(&sk->protinfo.af_unix.readsem);
1318 continue;
1321 /* Never glue messages from different writers */
1322 if(check_creds &&
1323 memcmp(UNIXCREDS(skb), &scm->creds,sizeof(scm->creds)) !=0)
1325 skb_queue_head(&sk->receive_queue, skb);
1326 break;
1329 /* Copy address just once */
1330 if(sunaddr)
1332 msg->msg_namelen =sizeof(short);
1333 if(skb->sk->protinfo.af_unix.addr)
1335 msg->msg_namelen=skb->sk->protinfo.af_unix.addr->len;
1336 memcpy(sunaddr,
1337 skb->sk->protinfo.af_unix.addr->name,
1338 skb->sk->protinfo.af_unix.addr->len);
1340 sunaddr = NULL;
1343 chunk =min(skb->len, size);
1344 if(memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1345 skb_queue_head(&sk->receive_queue, skb);
1346 if(copied ==0)
1347 copied = -EFAULT;
1348 break;
1350 copied += chunk;
1351 size -= chunk;
1353 /* Copy credentials */
1354 scm->creds = *UNIXCREDS(skb);
1355 check_creds =1;
1357 /* Mark read part of skb as used */
1358 if(!(flags & MSG_PEEK))
1360 skb_pull(skb, chunk);
1362 if(UNIXCB(skb).fp)
1363 unix_detach_fds(scm, skb);
1365 /* put the skb back if we didn't use it up.. */
1366 if(skb->len)
1368 skb_queue_head(&sk->receive_queue, skb);
1369 break;
1372 kfree_skb(skb);
1374 if(scm->fp)
1375 break;
1377 else
1379 /* It is questionable, see note in unix_dgram_recvmsg.
1382 if(UNIXCB(skb).fp)
1383 scm->fp =scm_fp_dup(UNIXCB(skb).fp);
1385 /* put message back and return */
1386 skb_queue_head(&sk->receive_queue, skb);
1387 break;
1389 }while(size);
1391 up(&sk->protinfo.af_unix.readsem);
1392 return copied;
1395 static intunix_shutdown(struct socket *sock,int mode)
1397 struct sock *sk = sock->sk;
1398 unix_socket *other=unix_peer(sk);
1400 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1402 if(mode) {
1403 sk->shutdown |= mode;
1404 sk->state_change(sk);
1405 if(other && sk->type == SOCK_STREAM &&
1406 unix_our_peer(sk, other)) {
1407 int peer_mode =0;
1409 if(mode&RCV_SHUTDOWN)
1410 peer_mode |= SEND_SHUTDOWN;
1411 if(mode&SEND_SHUTDOWN)
1412 peer_mode |= RCV_SHUTDOWN;
1413 other->shutdown |= peer_mode;
1414 if(peer_mode&RCV_SHUTDOWN)
1415 other->data_ready(other,0);
1416 else
1417 other->state_change(other);
1420 return0;
1424 static intunix_ioctl(struct socket *sock,unsigned int cmd,unsigned long arg)
1426 struct sock *sk = sock->sk;
1427 long amount=0;
1429 switch(cmd)
1432 case TIOCOUTQ:
1433 amount = sk->sndbuf -atomic_read(&sk->wmem_alloc);
1434 if(amount<0)
1435 amount=0;
1436 returnput_user(amount, (int*)arg);
1437 case TIOCINQ:
1439 struct sk_buff *skb;
1440 if(sk->state==TCP_LISTEN)
1441 return-EINVAL;
1443 * These two are safe on current systems as
1444 * only user tasks fiddle here
1446 if((skb=skb_peek(&sk->receive_queue))!=NULL)
1447 amount=skb->len;
1448 returnput_user(amount, (int*)arg);
1451 default:
1452 return-EINVAL;
1454 /*NOTREACHED*/
1455 return(0);
1458 static unsigned intunix_poll(struct file * file,struct socket *sock, poll_table *wait)
1460 struct sock *sk = sock->sk;
1461 unsigned int mask;
1463 poll_wait(file, sk->sleep, wait);
1464 mask =0;
1466 /* exceptional events? */
1467 if(sk->err)
1468 mask |= POLLERR;
1469 if(sk->shutdown & RCV_SHUTDOWN)
1470 mask |= POLLHUP;
1472 /* readable? */
1473 if(!skb_queue_empty(&sk->receive_queue))
1474 mask |= POLLIN | POLLRDNORM;
1476 /* Connection-based need to check for termination and startup */
1477 if(sk->type == SOCK_STREAM && sk->state==TCP_CLOSE)
1478 mask |= POLLHUP;
1481 * we set writable also when the other side has shut down the
1482 * connection. This prevents stuck sockets.
1484 if(sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= MIN_WRITE_SPACE)
1485 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1487 return mask;
1490 static voidunix_stream_write_space(struct sock *sk)
1492 if(sk->dead)
1493 return;
1494 wake_up_interruptible(sk->sleep);
1495 if(sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= MIN_WRITE_SPACE)
1496 sock_wake_async(sk->socket,2);
1499 #ifdef CONFIG_PROC_FS
1500 static intunix_read_proc(char*buffer,char**start, off_t offset,
1501 int length,int*eof,void*data)
1503 off_t pos=0;
1504 off_t begin=0;
1505 int len=0;
1506 int i;
1507 unix_socket *s;
1509 len+=sprintf(buffer,"Num RefCount Protocol Flags Type St "
1510 "Inode Path\n");
1512 forall_unix_sockets(i,s)
1514 len+=sprintf(buffer+len,"%p: %08X %08X %08lX %04X %02X %5ld",
1516 atomic_read(ulock(s)),
1518 s->socket ? s->socket->flags :0,
1519 s->type,
1520 s->socket ? s->socket->state :
1521 (s->state == TCP_ESTABLISHED ?
1522 SS_CONNECTING : SS_DISCONNECTING),
1523 s->socket ? s->socket->inode->i_ino :0);
1525 if(s->protinfo.af_unix.addr)
1527 buffer[len++] =' ';
1528 memcpy(buffer+len, s->protinfo.af_unix.addr->name->sun_path,
1529 s->protinfo.af_unix.addr->len-sizeof(short));
1530 if(!UNIX_ABSTRACT(s))
1531 len--;
1532 else
1533 buffer[len] ='@';
1534 len += s->protinfo.af_unix.addr->len -sizeof(short);
1536 buffer[len++]='\n';
1538 pos = begin + len;
1539 if(pos<offset)
1541 len=0;
1542 begin=pos;
1544 if(pos>offset+length)
1545 goto done;
1547 *eof =1;
1548 done:
1549 *start=buffer+(offset-begin);
1550 len-=(offset-begin);
1551 if(len>length)
1552 len=length;
1553 if(len <0)
1554 len =0;
1555 return len;
1557 #endif
1559 struct proto_ops unix_stream_ops = {
1560 PF_UNIX,
1562 sock_no_dup,
1563 unix_release,
1564 unix_bind,
1565 unix_stream_connect,
1566 unix_socketpair,
1567 unix_accept,
1568 unix_getname,
1569 unix_poll,
1570 unix_ioctl,
1571 unix_listen,
1572 unix_shutdown,
1573 sock_no_setsockopt,
1574 sock_no_getsockopt,
1575 sock_no_fcntl,
1576 unix_stream_sendmsg,
1577 unix_stream_recvmsg
1580 struct proto_ops unix_dgram_ops = {
1581 PF_UNIX,
1583 sock_no_dup,
1584 unix_release,
1585 unix_bind,
1586 unix_dgram_connect,
1587 unix_socketpair,
1588 sock_no_accept,
1589 unix_getname,
1590 datagram_poll,
1591 unix_ioctl,
1592 sock_no_listen,
1593 unix_shutdown,
1594 sock_no_setsockopt,
1595 sock_no_getsockopt,
1596 sock_no_fcntl,
1597 unix_dgram_sendmsg,
1598 unix_dgram_recvmsg
1601 struct net_proto_family unix_family_ops = {
1602 PF_UNIX,
1603 unix_create
1606 #ifdef MODULE
1607 #ifdef CONFIG_SYSCTL
1608 externvoidunix_sysctl_register(void);
1609 externvoidunix_sysctl_unregister(void);
1610 #endif
1612 intinit_module(void)
1613 #else
1614 __initfunc(voidunix_proto_init(struct net_proto *pro))
1615 #endif
1617 struct sk_buff *dummy_skb;
1618 struct proc_dir_entry *ent;
1620 printk(KERN_INFO "NET4: Unix domain sockets 1.0 for Linux NET4.0.\n");
1621 if(sizeof(struct unix_skb_parms) >sizeof(dummy_skb->cb))
1623 printk(KERN_CRIT "unix_proto_init: panic\n");
1624 #ifdef MODULE
1625 return-1;
1626 #else
1627 return;
1628 #endif
1630 sock_register(&unix_family_ops);
1631 #ifdef CONFIG_PROC_FS
1632 ent =create_proc_entry("net/unix",0,0);
1633 ent->read_proc = unix_read_proc;
1634 #endif
1636 #ifdef MODULE
1637 #ifdef CONFIG_SYSCTL
1638 unix_sysctl_register();
1639 #endif
1641 return0;
1642 #endif
1645 #ifdef MODULE
1646 voidcleanup_module(void)
1648 sock_unregister(PF_UNIX);
1649 #ifdef CONFIG_SYSCTL
1650 unix_sysctl_unregister();
1651 #endif
1652 #ifdef CONFIG_PROC_FS
1653 remove_proc_entry("net/unix",0);
1654 #endif
1656 #endif
1659 * Local variables:
1660 * compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c"
1661 * End:
close