Import 1.3.99
[davej-history.git] / net / unix / af_unix.c
blob4ed43a090d846a5fc4d2e4317f4e7001259f6e49
1 /*
2 * NET3: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan@cymru.net>
6 * Currently this contains all but the file descriptor passing code.
7 * Before that goes in the odd bugs in the iovec handlers need
8 * fixing, and this bit testing. BSD fd passing is not a trivial part
9 * of the exercise it turns out. Anyone like writing garbage collectors.
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
16 * Fixes:
17 * Linus Torvalds : Assorted bug cures.
18 * Niibe Yutaka : async I/O support.
19 * Carsten Paeth : PF_UNIX check, address fixes.
20 * Alan Cox : Limit size of allocated blocks.
21 * Alan Cox : Fixed the stupid socketpair bug.
22 * Alan Cox : BSD compatibility fine tuning.
23 * Alan Cox : Fixed a bug in connect when interrupted.
24 * Alan Cox : Sorted out a proper draft version of
25 * file descriptor passing hacked up from
26 * Mike Shaver's work.
27 * Marty Leisner : Fixes to fd passing
28 * Nick Nevin : recvmsg bugfix.
29 * Alan Cox : Started proper garbage collector
30 * Heiko EiBfeldt : Missing verify_area check
32 * Known differences from reference BSD that was tested:
34 * [TO FIX]
35 * ECONNREFUSED is not returned from one end of a connected() socket to the
36 * other the moment one end closes.
37 * fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark
38 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
39 * [NOT TO FIX]
40 * accept() returns a path name even if the connecting socket has closed
41 * in the meantime (BSD loses the path and gives up).
42 * accept() returns 0 length path for an unbound connector. BSD returns 16
43 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
44 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
45 * BSD af_unix apparently has connect forgetting to block properly.
48 #include <linux/config.h>
49 #include <linux/kernel.h>
50 #include <linux/major.h>
51 #include <linux/signal.h>
52 #include <linux/sched.h>
53 #include <linux/errno.h>
54 #include <linux/string.h>
55 #include <linux/stat.h>
56 #include <linux/socket.h>
57 #include <linux/un.h>
58 #include <linux/fcntl.h>
59 #include <linux/termios.h>
60 #include <linux/socket.h>
61 #include <linux/sockios.h>
62 #include <linux/net.h>
63 #include <linux/in.h>
64 #include <linux/fs.h>
65 #include <linux/malloc.h>
66 #include <asm/segment.h>
67 #include <linux/skbuff.h>
68 #include <linux/netdevice.h>
69 #include <net/sock.h>
70 #include <net/tcp.h>
71 #include <net/af_unix.h>
72 #include <linux/proc_fs.h>
74 unix_socket *unix_socket_list=NULL;
76 #define min(a,b) (((a)<(b))?(a):(b))
79 * Make sure the unix name is null-terminated.
82 staticinlinevoidunix_mkname(struct sockaddr_un * sunaddr,unsigned long len)
84 if(len >=sizeof(*sunaddr))
85 len =sizeof(*sunaddr)-1;
86 ((char*)sunaddr)[len]=0;
90 * Note: Sockets may not be removed _during_ an interrupt or net_bh
91 * handler using this technique. They can be added although we do not
92 * use this facility.
95 static voidunix_remove_socket(unix_socket *sk)
97 unix_socket **s;
99 cli();
100 s=&unix_socket_list;
102 while(*s!=NULL)
104 if(*s==sk)
106 *s=sk->next;
107 sti();
108 return;
110 s=&((*s)->next);
112 sti();
115 static voidunix_insert_socket(unix_socket *sk)
117 cli();
118 sk->next=unix_socket_list;
119 unix_socket_list=sk;
120 sti();
123 static unix_socket *unix_find_socket(struct inode *i)
125 unix_socket *s;
126 cli();
127 s=unix_socket_list;
128 while(s)
130 if(s->protinfo.af_unix.inode==i)
132 sti();
133 return(s);
135 s=s->next;
137 sti();
138 return(NULL);
142 * Delete a unix socket. We have to allow for deferring this on a timer.
145 static voidunix_destroy_timer(unsigned long data)
147 unix_socket *sk=(unix_socket *)data;
148 if(sk->protinfo.af_unix.locks==0&& sk->wmem_alloc==0)
150 if(sk->protinfo.af_unix.name)
151 kfree(sk->protinfo.af_unix.name);
152 sk_free(sk);
153 return;
157 * Retry;
160 sk->timer.expires=jiffies+10*HZ;/* No real hurry try it every 10 seconds or so */
161 add_timer(&sk->timer);
165 static voidunix_delayed_delete(unix_socket *sk)
167 sk->timer.data=(unsigned long)sk;
168 sk->timer.expires=jiffies+HZ;/* Normally 1 second after will clean up. After that we try every 10 */
169 sk->timer.function=unix_destroy_timer;
170 add_timer(&sk->timer);
173 static voidunix_destroy_socket(unix_socket *sk)
175 struct sk_buff *skb;
177 unix_remove_socket(sk);
179 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
181 if(sk->state==TCP_LISTEN)
183 unix_socket *osk=skb->sk;
184 osk->state=TCP_CLOSE;
185 kfree_skb(skb, FREE_WRITE);/* Now surplus - free the skb first before the socket */
186 osk->state_change(osk);/* So the connect wakes and cleans up (if any) */
187 /* osk will be destroyed when it gets to close or the timer fires */
189 else
191 /* passed fds are erased where?? */
192 kfree_skb(skb,FREE_WRITE);
196 if(sk->protinfo.af_unix.inode!=NULL)
198 iput(sk->protinfo.af_unix.inode);
199 sk->protinfo.af_unix.inode=NULL;
202 if(--sk->protinfo.af_unix.locks==0&& sk->wmem_alloc==0)
204 if(sk->protinfo.af_unix.name)
205 kfree(sk->protinfo.af_unix.name);
206 sk_free(sk);
208 else
210 sk->dead=1;
211 unix_delayed_delete(sk);/* Try every so often until buffers are all freed */
216 * Fixme: We need async I/O on AF_UNIX doing next.
219 static intunix_fcntl(struct socket *sock,unsigned int cmd,unsigned long arg)
221 return-EINVAL;
225 * Yes socket options work with the new unix domain socketry!!!!!!!
228 static intunix_setsockopt(struct socket *sock,int level,int optname,char*optval,int optlen)
230 unix_socket *sk=sock->data;
231 if(level!=SOL_SOCKET)
232 return-EOPNOTSUPP;
233 returnsock_setsockopt(sk,level,optname,optval,optlen);
236 static intunix_getsockopt(struct socket *sock,int level,int optname,char*optval,int*optlen)
238 unix_socket *sk=sock->data;
239 if(level!=SOL_SOCKET)
240 return-EOPNOTSUPP;
241 returnsock_getsockopt(sk,level,optname,optval,optlen);
244 static intunix_listen(struct socket *sock,int backlog)
246 unix_socket *sk=sock->data;
247 if(sk->type!=SOCK_STREAM)
248 return-EOPNOTSUPP;/* Only stream sockets accept */
249 if(sk->protinfo.af_unix.name==NULL)
250 return-EINVAL;/* No listens on an unbound socket */
251 sk->max_ack_backlog=backlog;
252 sk->state=TCP_LISTEN;
253 return0;
256 static voiddef_callback1(struct sock *sk)
258 if(!sk->dead)
259 wake_up_interruptible(sk->sleep);
262 static voiddef_callback2(struct sock *sk,int len)
264 if(!sk->dead)
266 wake_up_interruptible(sk->sleep);
267 sock_wake_async(sk->socket,1);
271 static voiddef_callback3(struct sock *sk)
273 if(!sk->dead)
275 wake_up_interruptible(sk->sleep);
276 sock_wake_async(sk->socket,2);
280 static intunix_create(struct socket *sock,int protocol)
282 unix_socket *sk;
283 if(protocol && protocol != PF_UNIX)
284 return-EPROTONOSUPPORT;
285 sk=(unix_socket *)sk_alloc(GFP_KERNEL);
286 if(sk==NULL)
287 return-ENOMEM;
288 switch(sock->type)
290 case SOCK_STREAM:
291 break;
293 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
294 * nothing uses it.
296 case SOCK_RAW:
297 sock->type=SOCK_DGRAM;
298 case SOCK_DGRAM:
299 break;
300 default:
301 sk_free(sk);
302 return-ESOCKTNOSUPPORT;
304 sk->type=sock->type;
305 init_timer(&sk->timer);
306 skb_queue_head_init(&sk->write_queue);
307 skb_queue_head_init(&sk->receive_queue);
308 skb_queue_head_init(&sk->back_log);
309 sk->protinfo.af_unix.family=AF_UNIX;
310 sk->protinfo.af_unix.inode=NULL;
311 sk->protinfo.af_unix.locks=1;/* Us */
312 sk->protinfo.af_unix.readsem=MUTEX;/* single task reading lock */
313 sk->rcvbuf=SK_RMEM_MAX;
314 sk->sndbuf=SK_WMEM_MAX;
315 sk->allocation=GFP_KERNEL;
316 sk->state=TCP_CLOSE;
317 sk->priority=SOPRI_NORMAL;
318 sk->state_change=def_callback1;
319 sk->data_ready=def_callback2;
320 sk->write_space=def_callback3;
321 sk->error_report=def_callback1;
322 sk->mtu=4096;
323 sk->socket=sock;
324 sock->data=(void*)sk;
325 sk->sleep=sock->wait;
326 unix_insert_socket(sk);
327 return0;
330 static intunix_dup(struct socket *newsock,struct socket *oldsock)
332 returnunix_create(newsock,0);
335 static intunix_release(struct socket *sock,struct socket *peer)
337 unix_socket *sk=sock->data;
338 unix_socket *skpair;
340 /* May not have data attached */
342 if(sk==NULL)
343 return0;
345 sk->state_change(sk);
346 sk->dead=1;
347 skpair=(unix_socket *)sk->protinfo.af_unix.other;/* Person we send to (default) */
348 if(sk->type==SOCK_STREAM && skpair!=NULL && skpair->state!=TCP_LISTEN)
350 skpair->shutdown=SHUTDOWN_MASK;/* No more writes */
351 skpair->state_change(skpair);/* Wake any blocked writes */
353 if(skpair!=NULL)
354 skpair->protinfo.af_unix.locks--;/* It may now die */
355 sk->protinfo.af_unix.other=NULL;/* No pair */
356 unix_destroy_socket(sk);/* Try to flush out this socket. Throw out buffers at least */
359 * FIXME: BSD difference: In BSD all sockets connected to use get ECONNRESET and we die on the spot. In
360 * Linux we behave like files and pipes do and wait for the last dereference.
363 return0;
367 static unix_socket *unix_find_other(char*path,int*error)
369 int old_fs;
370 int err;
371 struct inode *inode;
372 unix_socket *u;
374 old_fs=get_fs();
375 set_fs(get_ds());
376 err =open_namei(path,2, S_IFSOCK, &inode, NULL);
377 set_fs(old_fs);
378 if(err<0)
380 *error=err;
381 return NULL;
383 u=unix_find_socket(inode);
384 iput(inode);
385 if(u==NULL)
387 *error=-ECONNREFUSED;
388 return NULL;
390 return u;
394 static intunix_bind(struct socket *sock,struct sockaddr *uaddr,int addr_len)
396 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
397 unix_socket *sk=sock->data;
398 int old_fs;
399 int err;
401 if(sk->protinfo.af_unix.name)
402 return-EINVAL;/* Already bound */
404 if(addr_len>sizeof(struct sockaddr_un) || addr_len<3|| sunaddr->sun_family!=AF_UNIX)
405 return-EINVAL;
406 unix_mkname(sunaddr, addr_len);
408 * Put ourselves in the filesystem
410 if(sk->protinfo.af_unix.inode!=NULL)
411 return-EINVAL;
413 sk->protinfo.af_unix.name=kmalloc(addr_len+1, GFP_KERNEL);
414 if(sk->protinfo.af_unix.name==NULL)
415 return-ENOMEM;
416 memcpy(sk->protinfo.af_unix.name, sunaddr->sun_path, addr_len+1);
418 old_fs=get_fs();
419 set_fs(get_ds());
421 err=do_mknod(sk->protinfo.af_unix.name,S_IFSOCK|S_IRWXUGO,0);
422 if(err==0)
423 err=open_namei(sk->protinfo.af_unix.name,2, S_IFSOCK, &sk->protinfo.af_unix.inode, NULL);
425 set_fs(old_fs);
427 if(err<0)
429 kfree_s(sk->protinfo.af_unix.name,addr_len+1);
430 sk->protinfo.af_unix.name=NULL;
431 if(err==-EEXIST)
432 return-EADDRINUSE;
433 else
434 return err;
437 return0;
441 static intunix_connect(struct socket *sock,struct sockaddr *uaddr,int addr_len,int flags)
443 unix_socket *sk=sock->data;
444 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
445 unix_socket *other;
446 struct sk_buff *skb;
447 int err;
449 if(sk->type==SOCK_STREAM && sk->protinfo.af_unix.other)
451 if(sock->state==SS_CONNECTING && sk->state==TCP_ESTABLISHED)
453 sock->state=SS_CONNECTED;
454 return0;
456 if(sock->state==SS_CONNECTING && sk->state == TCP_CLOSE)
458 sock->state=SS_UNCONNECTED;
459 return-ECONNREFUSED;
461 if(sock->state!=SS_CONNECTING)
462 return-EISCONN;
463 if(flags&O_NONBLOCK)
464 return-EALREADY;
466 * Drop through the connect up logic to the wait.
470 if(addr_len <sizeof(sunaddr->sun_family)+1|| sunaddr->sun_family!=AF_UNIX)
471 return-EINVAL;
473 unix_mkname(sunaddr, addr_len);
475 if(sk->type==SOCK_DGRAM)
477 if(sk->protinfo.af_unix.other)
479 sk->protinfo.af_unix.other->protinfo.af_unix.locks--;
480 sk->protinfo.af_unix.other=NULL;
481 sock->state=SS_UNCONNECTED;
483 other=unix_find_other(sunaddr->sun_path, &err);
484 if(other==NULL)
485 return err;
486 if(other->type!=sk->type)
487 return-EPROTOTYPE;
488 other->protinfo.af_unix.locks++;
489 sk->protinfo.af_unix.other=other;
490 sock->state=SS_CONNECTED;
491 sk->state=TCP_ESTABLISHED;
492 return0;/* Done */
496 if(sock->state==SS_UNCONNECTED)
499 * Now ready to connect
502 skb=sock_alloc_send_skb(sk,0,0,0, &err);/* Marker object */
503 if(skb==NULL)
504 return err;
505 skb->sk=sk;/* So they know it is us */
506 skb->free=1;
507 sk->state=TCP_CLOSE;
508 unix_mkname(sunaddr, addr_len);
509 other=unix_find_other(sunaddr->sun_path, &err);
510 if(other==NULL)
512 kfree_skb(skb, FREE_WRITE);
513 return err;
515 if(other->type!=sk->type)
517 kfree_skb(skb, FREE_WRITE);
518 return-EPROTOTYPE;
520 other->protinfo.af_unix.locks++;/* Lock the other socket so it doesn't run off for a moment */
521 other->ack_backlog++;
522 sk->protinfo.af_unix.other=other;
523 skb_queue_tail(&other->receive_queue,skb);
524 sk->state=TCP_SYN_SENT;
525 sock->state=SS_CONNECTING;
526 sti();
527 other->data_ready(other,0);/* Wake up ! */
531 /* Wait for an accept */
533 cli();
534 while(sk->state==TCP_SYN_SENT)
536 if(flags&O_NONBLOCK)
538 sti();
539 return-EINPROGRESS;
541 interruptible_sleep_on(sk->sleep);
542 if(current->signal & ~current->blocked)
544 sti();
545 return-ERESTARTSYS;
550 * Has the other end closed on us ?
553 if(sk->state==TCP_CLOSE)
555 sk->protinfo.af_unix.other->protinfo.af_unix.locks--;
556 sk->protinfo.af_unix.other=NULL;
557 sock->state=SS_UNCONNECTED;
558 sti();
559 return-ECONNREFUSED;
563 * Amazingly it has worked
566 sock->state=SS_CONNECTED;
567 sti();
568 return0;
572 static intunix_socketpair(struct socket *a,struct socket *b)
574 unix_socket *ska,*skb;
576 ska=a->data;
577 skb=b->data;
579 /* Join our sockets back to back */
580 ska->protinfo.af_unix.locks++;
581 skb->protinfo.af_unix.locks++;
582 ska->protinfo.af_unix.other=skb;
583 skb->protinfo.af_unix.other=ska;
584 ska->state=TCP_ESTABLISHED;
585 skb->state=TCP_ESTABLISHED;
586 return0;
589 static intunix_accept(struct socket *sock,struct socket *newsock,int flags)
591 unix_socket *sk=sock->data;
592 unix_socket *newsk, *tsk;
593 struct sk_buff *skb;
595 if(sk->type!=SOCK_STREAM)
597 return-EOPNOTSUPP;
599 if(sk->state!=TCP_LISTEN)
601 return-EINVAL;
604 newsk=newsock->data;
605 if(sk->protinfo.af_unix.name!=NULL)
607 newsk->protinfo.af_unix.name=kmalloc(strlen(sk->protinfo.af_unix.name)+1, GFP_KERNEL);
608 if(newsk->protinfo.af_unix.name==NULL)
609 return-ENOMEM;
610 strcpy(newsk->protinfo.af_unix.name, sk->protinfo.af_unix.name);
615 cli();
616 skb=skb_dequeue(&sk->receive_queue);
617 if(skb==NULL)
619 if(flags&O_NONBLOCK)
621 sti();
622 return-EAGAIN;
624 interruptible_sleep_on(sk->sleep);
625 if(current->signal & ~current->blocked)
627 sti();
628 return-ERESTARTSYS;
630 sti();
633 while(skb==NULL);
634 tsk=skb->sk;
635 kfree_skb(skb, FREE_WRITE);/* The buffer is just used as a tag */
636 sk->ack_backlog--;
637 newsk->protinfo.af_unix.other=tsk;
638 tsk->protinfo.af_unix.other=newsk;
639 tsk->state=TCP_ESTABLISHED;
640 newsk->state=TCP_ESTABLISHED;
641 newsk->protinfo.af_unix.locks++;/* Swap lock over */
642 sk->protinfo.af_unix.locks--;/* Locked to child socket not master */
643 tsk->protinfo.af_unix.locks++;/* Back lock */
644 sti();
645 tsk->state_change(tsk);/* Wake up any sleeping connect */
646 sock_wake_async(tsk->socket,0);
647 return0;
650 static intunix_getname(struct socket *sock,struct sockaddr *uaddr,int*uaddr_len,int peer)
652 unix_socket *sk=sock->data;
653 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
655 if(peer)
657 if(sk->protinfo.af_unix.other==NULL)
658 return-ENOTCONN;
659 sk=sk->protinfo.af_unix.other;
661 sunaddr->sun_family=AF_UNIX;
662 if(sk->protinfo.af_unix.name==NULL)
664 *sunaddr->sun_path=0;
665 *uaddr_len=sizeof(sunaddr->sun_family)+1;
666 return0;/* Not bound */
668 *uaddr_len=sizeof(sunaddr->sun_family)+strlen(sk->protinfo.af_unix.name)+1;
669 strcpy(sunaddr->sun_path,sk->protinfo.af_unix.name);/* 108 byte limited */
670 return0;
674 * Support routines for struct cmsghdr handling
677 static struct cmsghdr *unix_copyrights(void*userp,int len)
679 struct cmsghdr *cm;
681 if(len>256|| len <=0)
682 return NULL;
683 cm=kmalloc(len, GFP_KERNEL);
684 memcpy_fromfs(cm, userp, len);
685 return cm;
689 * Return a header block
692 static voidunix_returnrights(void*userp,int len,struct cmsghdr *cm)
694 memcpy_tofs(userp, cm, len);
695 kfree(cm);
699 * Copy file descriptors into system space.
700 * Return number copied or negative error code
703 static intunix_fd_copy(struct sock *sk,struct cmsghdr *cmsg,struct file **fp)
705 int num=cmsg->cmsg_len-sizeof(struct cmsghdr);
706 int i;
707 int*fdp=(int*)cmsg->cmsg_data;
708 num/=4;/* Odd bytes are forgotten in BSD not errored */
711 if(num>=UNIX_MAX_FD)
712 return-EINVAL;
715 * Verify the descriptors.
718 for(i=0; i< num; i++)
720 int fd;
722 fd = fdp[i];
723 #if 0
724 printk("testing fd %d\n", fd);
725 #endif
726 if(fd <0|| fd >=NR_OPEN)
727 return-EBADF;
728 if(current->files->fd[fd]==NULL)
729 return-EBADF;
732 /* add another reference to these files */
733 for(i=0; i< num; i++)
735 fp[i]=current->files->fd[fdp[i]];
736 fp[i]->f_count++;
737 unix_inflight(fp[i]);
740 return num;
744 * Free the descriptors in the array
747 static voidunix_fd_free(struct sock *sk,struct file **fp,int num)
749 int i;
750 for(i=0;i<num;i++)
752 close_fp(fp[i]);
753 unix_notinflight(fp[i]);
758 * Count the free descriptors available to a process.
759 * Interpretation issue: Is the limit the highest descriptor (buggy
760 * allowing passed fd's higher up to cause a limit to be exceeded) -
761 * but how the old code did it - or like this...
764 static intunix_files_free(void)
766 int i;
767 int n=0;
768 for(i=0;i<NR_OPEN;i++)
770 if(current->files->fd[i])
771 n++;
774 i=NR_OPEN;
775 if(i>current->rlim[RLIMIT_NOFILE].rlim_cur)
776 i=current->rlim[RLIMIT_NOFILE].rlim_cur;
777 if(n>=i)
778 return0;
779 return i-n;
783 * Perform the AF_UNIX file descriptor pass out functionality. This
784 * is nasty and messy as is the whole design of BSD file passing.
787 static voidunix_detach_fds(struct sk_buff *skb,struct cmsghdr *cmsg)
789 int i;
790 /* count of space in parent for fds */
791 int cmnum;
792 struct file **fp;
793 struct file **ufp;
794 int*cmfptr=NULL;/* =NULL To keep gcc happy */
795 /* number of fds actually passed */
796 int fdnum;
797 int ffree;
798 int ufn=0;
800 if(cmsg==NULL)
801 cmnum=0;
802 else
804 cmnum=cmsg->cmsg_len-sizeof(struct cmsghdr);
805 cmnum/=sizeof(int);
806 cmfptr=(int*)&cmsg->cmsg_data;
809 memcpy(&fdnum,skb->h.filp,sizeof(int));
810 fp=(struct file **)(skb->h.filp+sizeof(int));
811 if(cmnum>fdnum)
812 cmnum=fdnum;
813 ffree=unix_files_free();
814 if(cmnum>ffree)
815 cmnum=ffree;
816 ufp=&current->files->fd[0];
819 * Copy those that fit
821 for(i=0;i<cmnum;i++)
824 * Insert the fd
826 while(ufp[ufn]!=NULL)
827 ufn++;
828 ufp[ufn]=fp[i];
829 *cmfptr++=ufn;
830 FD_CLR(ufn,&current->files->close_on_exec);
831 unix_notinflight(fp[i]);
834 * Dump those that don't
836 for(;i<fdnum;i++)
838 close_fp(fp[i]);
839 unix_notinflight(fp[i]);
841 kfree(skb->h.filp);
842 skb->h.filp=NULL;
844 /* no need to use destructor */
845 skb->destructor = NULL;
848 static voidunix_destruct_fds(struct sk_buff *skb)
850 unix_detach_fds(skb,NULL);
854 * Attach the file descriptor array to an sk_buff
856 static voidunix_attach_fds(int fpnum,struct file **fp,struct sk_buff *skb)
859 skb->h.filp=kmalloc(sizeof(int)+fpnum*sizeof(struct file *),
860 GFP_KERNEL);
861 /* number of descriptors starts block */
862 memcpy(skb->h.filp,&fpnum,sizeof(int));
863 /* actual descriptors */
864 memcpy(skb->h.filp+sizeof(int),fp,fpnum*sizeof(struct file *));
865 skb->destructor = unix_destruct_fds;
869 * Send AF_UNIX data.
872 static intunix_sendmsg(struct socket *sock,struct msghdr *msg,int len,int nonblock,int flags)
874 unix_socket *sk=sock->data;
875 unix_socket *other;
876 struct sockaddr_un *sunaddr=msg->msg_name;
877 int err,size;
878 struct sk_buff *skb;
879 int limit=0;
880 int sent=0;
881 struct file *fp[UNIX_MAX_FD];
882 /* number of fds waiting to be passed, 0 means either
883 * no fds to pass or they've already been passed
885 int fpnum=0;
887 if(sk->err)
888 returnsock_error(sk);
890 if(flags&MSG_OOB)
891 return-EOPNOTSUPP;
893 if(flags)/* For now */{
894 return-EINVAL;
897 if(sunaddr!=NULL)
899 if(sock->type==SOCK_STREAM)
901 if(sk->state==TCP_ESTABLISHED)
902 return-EISCONN;
903 else
904 return-EOPNOTSUPP;
908 if(sunaddr==NULL)
910 if(sk->protinfo.af_unix.other==NULL)
911 return-ENOTCONN;
915 * A control message has been attached.
917 if(msg->msg_accrights)
919 struct cmsghdr *cm=unix_copyrights(msg->msg_accrights,
920 msg->msg_accrightslen);
921 if(cm==NULL || msg->msg_accrightslen<sizeof(struct cmsghdr) ||
922 cm->cmsg_type!=SCM_RIGHTS ||
923 cm->cmsg_level!=SOL_SOCKET ||
924 msg->msg_accrightslen!=cm->cmsg_len)
926 kfree(cm);
927 return-EINVAL;
929 fpnum=unix_fd_copy(sk,cm,fp);
930 kfree(cm);
931 if(fpnum<0) {
932 return fpnum;
936 while(sent < len)
939 * Optimisation for the fact that under 0.01% of X messages typically
940 * need breaking up.
943 size=len-sent;
945 if(size>(sk->sndbuf-sizeof(struct sk_buff))/2)/* Keep two messages in the pipe so it schedules better */
947 if(sock->type==SOCK_DGRAM)
949 unix_fd_free(sk,fp,fpnum);
950 return-EMSGSIZE;
952 size=(sk->sndbuf-sizeof(struct sk_buff))/2;
955 * Keep to page sized kmalloc()'s as various people
956 * have suggested. Big mallocs stress the vm too
957 * much.
960 if(size >4000&& sock->type!=SOCK_DGRAM)
961 limit =4000;/* Fall back to 4K if we can't grab a big buffer this instant */
962 else
963 limit =0;/* Otherwise just grab and wait */
966 * Grab a buffer
969 skb=sock_alloc_send_skb(sk,size,limit,nonblock, &err);
971 if(skb==NULL)
973 unix_fd_free(sk,fp,fpnum);
974 if(sent)
976 sk->err=-err;
977 return sent;
979 return err;
981 size=skb_tailroom(skb);/* If we dropped back on a limit then our skb is smaller */
983 skb->sk=sk;
984 skb->free=1;
986 if(fpnum)
988 unix_attach_fds(fpnum,fp,skb);
989 fpnum=0;
991 else
992 skb->h.filp=NULL;
994 memcpy_fromiovec(skb_put(skb,size),msg->msg_iov, size);
996 cli();
997 if(sunaddr==NULL)
999 other=sk->protinfo.af_unix.other;
1000 if(sock->type==SOCK_DGRAM && other->dead)
1002 other->protinfo.af_unix.locks--;
1003 sk->protinfo.af_unix.other=NULL;
1004 sock->state=SS_UNCONNECTED;
1005 sti();
1006 kfree_skb(skb, FREE_WRITE);
1007 if(!sent)
1008 return-ECONNRESET;
1009 else
1010 return sent;
1013 else
1015 unix_mkname(sunaddr, msg->msg_namelen);
1016 other=unix_find_other(sunaddr->sun_path, &err);
1017 if(other==NULL)
1019 sti();
1020 kfree_skb(skb, FREE_WRITE);
1021 if(sent)
1022 return sent;
1023 else
1024 return err;
1027 skb_queue_tail(&other->receive_queue, skb);
1028 sti();
1029 /* if we sent an fd, only do it once */
1030 other->data_ready(other,size);
1031 sent+=size;
1033 return sent;
1037 * Sleep until data has arrive. But check for races..
1040 static voidunix_data_wait(unix_socket * sk)
1042 cli();
1043 if(!skb_peek(&sk->receive_queue)) {
1044 sk->socket->flags |= SO_WAITDATA;
1045 interruptible_sleep_on(sk->sleep);
1046 sk->socket->flags &= ~SO_WAITDATA;
1048 sti();
1051 static intunix_recvmsg(struct socket *sock,struct msghdr *msg,int size,int noblock,int flags,int*addr_len)
1053 unix_socket *sk=sock->data;
1054 struct sockaddr_un *sunaddr=msg->msg_name;
1055 struct sk_buff *skb;
1056 int copied=0;
1057 unsigned char*sp;
1058 int len;
1059 int num;
1060 struct iovec *iov=msg->msg_iov;
1061 struct cmsghdr *cm=NULL;
1062 int ct=msg->msg_iovlen;
1064 if(flags&MSG_OOB)
1065 return-EOPNOTSUPP;
1067 if(addr_len)
1068 *addr_len=0;
1070 if(sk->err)
1071 returnsock_error(sk);
1073 if(msg->msg_accrights)
1075 cm=unix_copyrights(msg->msg_accrights,
1076 msg->msg_accrightslen);
1077 if(msg->msg_accrightslen<sizeof(struct cmsghdr)
1078 #if 0
1079 /* investigate this further -- Stevens example doesn't seem to care */
1081 cm->cmsg_type!=SCM_RIGHTS ||
1082 cm->cmsg_level!=SOL_SOCKET ||
1083 msg->msg_accrightslen!=cm->cmsg_len
1084 #endif
1087 kfree(cm);
1088 printk("recvmsg: Bad msg_accrights\n");
1089 return-EINVAL;
1093 down(&sk->protinfo.af_unix.readsem);/* Lock the socket */
1094 while(ct--)
1096 int done=0;
1097 sp=iov->iov_base;
1098 len=iov->iov_len;
1099 iov++;
1101 while(done<len)
1103 if(copied && (flags & MSG_PEEK))
1104 goto out;
1105 if(copied == size)
1106 goto out;
1107 skb=skb_dequeue(&sk->receive_queue);
1108 if(skb==NULL)
1110 up(&sk->protinfo.af_unix.readsem);
1111 if(sk->shutdown & RCV_SHUTDOWN)
1112 return copied;
1113 if(copied)
1114 return copied;
1115 if(noblock)
1116 return-EAGAIN;
1117 if(current->signal & ~current->blocked)
1118 return-ERESTARTSYS;
1119 unix_data_wait(sk);
1120 down(&sk->protinfo.af_unix.readsem);
1121 continue;
1123 if(msg->msg_name!=NULL)
1125 sunaddr->sun_family=AF_UNIX;
1126 if(skb->sk->protinfo.af_unix.name)
1128 memcpy(sunaddr->sun_path, skb->sk->protinfo.af_unix.name,108);
1129 if(addr_len)
1130 *addr_len=strlen(sunaddr->sun_path)+sizeof(short);
1132 else
1133 if(addr_len)
1134 *addr_len=sizeof(short);
1137 num=min(skb->len,len-done);
1138 memcpy_tofs(sp, skb->data, num);
1140 if(skb->h.filp!=NULL)
1141 unix_detach_fds(skb,cm);
1143 copied+=num;
1144 done+=num;
1145 sp+=num;
1146 if(!(flags & MSG_PEEK))
1147 skb_pull(skb, num);
1148 /* put the skb back if we didn't use it up.. */
1149 if(skb->len) {
1150 skb_queue_head(&sk->receive_queue, skb);
1151 continue;
1153 kfree_skb(skb, FREE_WRITE);
1154 if(sock->type==SOCK_DGRAM || cm)
1155 goto out;
1158 out:
1159 up(&sk->protinfo.af_unix.readsem);
1160 if(cm)
1161 unix_returnrights(msg->msg_accrights,msg->msg_accrightslen,cm);
1162 return copied;
1165 static intunix_shutdown(struct socket *sock,int mode)
1167 unix_socket *sk=(unix_socket *)sock->data;
1168 unix_socket *other=sk->protinfo.af_unix.other;
1169 if(mode&SEND_SHUTDOWN)
1171 sk->shutdown|=SEND_SHUTDOWN;
1172 sk->state_change(sk);
1173 if(other)
1175 other->shutdown|=RCV_SHUTDOWN;
1176 other->state_change(other);
1179 other=sk->protinfo.af_unix.other;
1180 if(mode&RCV_SHUTDOWN)
1182 sk->shutdown|=RCV_SHUTDOWN;
1183 sk->state_change(sk);
1184 if(other)
1186 other->shutdown|=SEND_SHUTDOWN;
1187 other->state_change(other);
1190 return0;
1194 static intunix_select(struct socket *sock,int sel_type, select_table *wait)
1196 returndatagram_select(sock->data,sel_type,wait);
1199 static intunix_ioctl(struct socket *sock,unsigned int cmd,unsigned long arg)
1201 unix_socket *sk=sock->data;
1202 int err;
1203 long amount=0;
1205 switch(cmd)
1208 case TIOCOUTQ:
1209 err=verify_area(VERIFY_WRITE,(void*)arg,sizeof(unsigned long));
1210 if(err)
1211 return err;
1212 amount=sk->sndbuf-sk->wmem_alloc;
1213 if(amount<0)
1214 amount=0;
1215 put_fs_long(amount,(unsigned long*)arg);
1216 return0;
1217 case TIOCINQ:
1219 struct sk_buff *skb;
1220 if(sk->state==TCP_LISTEN)
1221 return-EINVAL;
1222 /* These two are safe on a single CPU system as only user tasks fiddle here */
1223 if((skb=skb_peek(&sk->receive_queue))!=NULL)
1224 amount=skb->len;
1225 err=verify_area(VERIFY_WRITE,(void*)arg,sizeof(unsigned long));
1226 if(err)
1227 return err;
1228 put_fs_long(amount,(unsigned long*)arg);
1229 return0;
1232 default:
1233 return-EINVAL;
1235 /*NOTREACHED*/
1236 return(0);
1239 #ifdef CONFIG_PROC_FS
1240 static intunix_get_info(char*buffer,char**start, off_t offset,int length,int dummy)
1242 off_t pos=0;
1243 off_t begin=0;
1244 int len=0;
1245 unix_socket *s=unix_socket_list;
1247 len+=sprintf(buffer,"Num RefCount Protocol Flags Type St "
1248 "Inode Path\n");
1250 while(s!=NULL)
1252 len+=sprintf(buffer+len,"%p: %08X %08X %08lX %04X %02X %5ld",
1254 s->protinfo.af_unix.locks,
1256 s->socket->flags,
1257 s->socket->type,
1258 s->socket->state,
1259 s->socket->inode ? s->socket->inode->i_ino :0);
1260 if(s->protinfo.af_unix.name!=NULL)
1261 len+=sprintf(buffer+len," %s\n", s->protinfo.af_unix.name);
1262 else
1263 buffer[len++]='\n';
1265 pos=begin+len;
1266 if(pos<offset)
1268 len=0;
1269 begin=pos;
1271 if(pos>offset+length)
1272 break;
1273 s=s->next;
1275 *start=buffer+(offset-begin);
1276 len-=(offset-begin);
1277 if(len>length)
1278 len=length;
1279 return len;
1281 #endif
1283 struct proto_ops unix_proto_ops = {
1284 AF_UNIX,
1286 unix_create,
1287 unix_dup,
1288 unix_release,
1289 unix_bind,
1290 unix_connect,
1291 unix_socketpair,
1292 unix_accept,
1293 unix_getname,
1294 unix_select,
1295 unix_ioctl,
1296 unix_listen,
1297 unix_shutdown,
1298 unix_setsockopt,
1299 unix_getsockopt,
1300 unix_fcntl,
1301 unix_sendmsg,
1302 unix_recvmsg
1306 voidunix_proto_init(struct net_proto *pro)
1308 printk("NET3: Unix domain sockets 0.12 for Linux NET3.033.\n");
1309 sock_register(unix_proto_ops.family, &unix_proto_ops);
1310 #ifdef CONFIG_PROC_FS
1311 proc_net_register(&(struct proc_dir_entry) {
1312 PROC_NET_UNIX,4,"unix",
1313 S_IFREG | S_IRUGO,1,0,0,
1314 0, &proc_net_inode_operations,
1315 unix_get_info
1317 #endif
1320 * Local variables:
1321 * compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c"
1322 * End:
close