Import 2.0.2
[davej-history.git] / net / unix / af_unix.c
blobff86725b010cf4452b8957ebdaeb536a85277407
1 /*
2 * NET3: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan@cymru.net>
6 * Currently this contains all but the file descriptor passing code.
7 * Before that goes in the odd bugs in the iovec handlers need
8 * fixing, and this bit testing. BSD fd passing is not a trivial part
9 * of the exercise it turns out. Anyone like writing garbage collectors.
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
16 * Fixes:
17 * Linus Torvalds : Assorted bug cures.
18 * Niibe Yutaka : async I/O support.
19 * Carsten Paeth : PF_UNIX check, address fixes.
20 * Alan Cox : Limit size of allocated blocks.
21 * Alan Cox : Fixed the stupid socketpair bug.
22 * Alan Cox : BSD compatibility fine tuning.
23 * Alan Cox : Fixed a bug in connect when interrupted.
24 * Alan Cox : Sorted out a proper draft version of
25 * file descriptor passing hacked up from
26 * Mike Shaver's work.
27 * Marty Leisner : Fixes to fd passing
28 * Nick Nevin : recvmsg bugfix.
29 * Alan Cox : Started proper garbage collector
30 * Heiko EiBfeldt : Missing verify_area check
32 * Known differences from reference BSD that was tested:
34 * [TO FIX]
35 * ECONNREFUSED is not returned from one end of a connected() socket to the
36 * other the moment one end closes.
37 * fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark
38 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
39 * [NOT TO FIX]
40 * accept() returns a path name even if the connecting socket has closed
41 * in the meantime (BSD loses the path and gives up).
42 * accept() returns 0 length path for an unbound connector. BSD returns 16
43 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
44 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
45 * BSD af_unix apparently has connect forgetting to block properly.
48 #include <linux/config.h>
49 #include <linux/kernel.h>
50 #include <linux/major.h>
51 #include <linux/signal.h>
52 #include <linux/sched.h>
53 #include <linux/errno.h>
54 #include <linux/string.h>
55 #include <linux/stat.h>
56 #include <linux/socket.h>
57 #include <linux/un.h>
58 #include <linux/fcntl.h>
59 #include <linux/termios.h>
60 #include <linux/socket.h>
61 #include <linux/sockios.h>
62 #include <linux/net.h>
63 #include <linux/in.h>
64 #include <linux/fs.h>
65 #include <linux/malloc.h>
66 #include <asm/segment.h>
67 #include <linux/skbuff.h>
68 #include <linux/netdevice.h>
69 #include <net/sock.h>
70 #include <net/tcp.h>
71 #include <net/af_unix.h>
72 #include <linux/proc_fs.h>
74 unix_socket *unix_socket_list=NULL;
76 #define min(a,b) (((a)<(b))?(a):(b))
79 * Make sure the unix name is null-terminated.
82 staticinlinevoidunix_mkname(struct sockaddr_un * sunaddr,unsigned long len)
84 if(len >=sizeof(*sunaddr))
85 len =sizeof(*sunaddr)-1;
86 ((char*)sunaddr)[len]=0;
90 * Note: Sockets may not be removed _during_ an interrupt or net_bh
91 * handler using this technique. They can be added although we do not
92 * use this facility.
95 static voidunix_remove_socket(unix_socket *sk)
97 unix_socket **s;
99 cli();
100 s=&unix_socket_list;
102 while(*s!=NULL)
104 if(*s==sk)
106 *s=sk->next;
107 sti();
108 return;
110 s=&((*s)->next);
112 sti();
115 static voidunix_insert_socket(unix_socket *sk)
117 cli();
118 sk->next=unix_socket_list;
119 unix_socket_list=sk;
120 sti();
123 static unix_socket *unix_find_socket(struct inode *i)
125 unix_socket *s;
126 cli();
127 s=unix_socket_list;
128 while(s)
130 if(s->protinfo.af_unix.inode==i)
132 sti();
133 return(s);
135 s=s->next;
137 sti();
138 return(NULL);
142 * Delete a unix socket. We have to allow for deferring this on a timer.
145 static voidunix_destroy_timer(unsigned long data)
147 unix_socket *sk=(unix_socket *)data;
148 if(sk->protinfo.af_unix.locks==0&& sk->wmem_alloc==0)
150 if(sk->protinfo.af_unix.name)
151 kfree(sk->protinfo.af_unix.name);
152 sk_free(sk);
153 return;
157 * Retry;
160 sk->timer.expires=jiffies+10*HZ;/* No real hurry try it every 10 seconds or so */
161 add_timer(&sk->timer);
165 static voidunix_delayed_delete(unix_socket *sk)
167 sk->timer.data=(unsigned long)sk;
168 sk->timer.expires=jiffies+HZ;/* Normally 1 second after will clean up. After that we try every 10 */
169 sk->timer.function=unix_destroy_timer;
170 add_timer(&sk->timer);
173 static voidunix_destroy_socket(unix_socket *sk)
175 struct sk_buff *skb;
177 unix_remove_socket(sk);
179 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
181 if(sk->state==TCP_LISTEN)
183 unix_socket *osk=skb->sk;
184 osk->state=TCP_CLOSE;
185 kfree_skb(skb, FREE_WRITE);/* Now surplus - free the skb first before the socket */
186 osk->state_change(osk);/* So the connect wakes and cleans up (if any) */
187 /* osk will be destroyed when it gets to close or the timer fires */
189 else
191 /* passed fds are erased in the kfree_skb hook */
192 kfree_skb(skb,FREE_WRITE);
196 if(sk->protinfo.af_unix.inode!=NULL)
198 iput(sk->protinfo.af_unix.inode);
199 sk->protinfo.af_unix.inode=NULL;
202 if(--sk->protinfo.af_unix.locks==0&& sk->wmem_alloc==0)
204 if(sk->protinfo.af_unix.name)
205 kfree(sk->protinfo.af_unix.name);
206 sk_free(sk);
208 else
210 sk->dead=1;
211 unix_delayed_delete(sk);/* Try every so often until buffers are all freed */
216 * Fixme: We need async I/O on AF_UNIX doing next.
219 static intunix_fcntl(struct socket *sock,unsigned int cmd,unsigned long arg)
221 return-EINVAL;
225 * Yes socket options work with the new unix domain socketry!!!!!!!
228 static intunix_setsockopt(struct socket *sock,int level,int optname,char*optval,int optlen)
230 unix_socket *sk=sock->data;
231 if(level!=SOL_SOCKET)
232 return-EOPNOTSUPP;
233 returnsock_setsockopt(sk,level,optname,optval,optlen);
236 static intunix_getsockopt(struct socket *sock,int level,int optname,char*optval,int*optlen)
238 unix_socket *sk=sock->data;
239 if(level!=SOL_SOCKET)
240 return-EOPNOTSUPP;
241 returnsock_getsockopt(sk,level,optname,optval,optlen);
244 static intunix_listen(struct socket *sock,int backlog)
246 unix_socket *sk=sock->data;
247 if(sk->type!=SOCK_STREAM)
248 return-EOPNOTSUPP;/* Only stream sockets accept */
249 if(sk->protinfo.af_unix.name==NULL)
250 return-EINVAL;/* No listens on an unbound socket */
251 sk->max_ack_backlog=backlog;
252 sk->state=TCP_LISTEN;
253 return0;
256 static voiddef_callback1(struct sock *sk)
258 if(!sk->dead)
259 wake_up_interruptible(sk->sleep);
262 static voiddef_callback2(struct sock *sk,int len)
264 if(!sk->dead)
266 wake_up_interruptible(sk->sleep);
267 sock_wake_async(sk->socket,1);
271 static voiddef_callback3(struct sock *sk)
273 if(!sk->dead)
275 wake_up_interruptible(sk->sleep);
276 sock_wake_async(sk->socket,2);
280 static intunix_create(struct socket *sock,int protocol)
282 unix_socket *sk;
283 if(protocol && protocol != PF_UNIX)
284 return-EPROTONOSUPPORT;
285 sk=(unix_socket *)sk_alloc(GFP_KERNEL);
286 if(sk==NULL)
287 return-ENOMEM;
288 switch(sock->type)
290 case SOCK_STREAM:
291 break;
293 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
294 * nothing uses it.
296 case SOCK_RAW:
297 sock->type=SOCK_DGRAM;
298 case SOCK_DGRAM:
299 break;
300 default:
301 sk_free(sk);
302 return-ESOCKTNOSUPPORT;
304 sk->type=sock->type;
305 init_timer(&sk->timer);
306 skb_queue_head_init(&sk->write_queue);
307 skb_queue_head_init(&sk->receive_queue);
308 skb_queue_head_init(&sk->back_log);
309 sk->protinfo.af_unix.family=AF_UNIX;
310 sk->protinfo.af_unix.inode=NULL;
311 sk->protinfo.af_unix.locks=1;/* Us */
312 sk->protinfo.af_unix.readsem=MUTEX;/* single task reading lock */
313 sk->rcvbuf=SK_RMEM_MAX;
314 sk->sndbuf=SK_WMEM_MAX;
315 sk->allocation=GFP_KERNEL;
316 sk->state=TCP_CLOSE;
317 sk->priority=SOPRI_NORMAL;
318 sk->state_change=def_callback1;
319 sk->data_ready=def_callback2;
320 sk->write_space=def_callback3;
321 sk->error_report=def_callback1;
322 sk->mtu=4096;
323 sk->socket=sock;
324 sock->data=(void*)sk;
325 sk->sleep=sock->wait;
326 unix_insert_socket(sk);
327 return0;
330 static intunix_dup(struct socket *newsock,struct socket *oldsock)
332 returnunix_create(newsock,0);
335 static intunix_release(struct socket *sock,struct socket *peer)
337 unix_socket *sk=sock->data;
338 unix_socket *skpair;
340 /* May not have data attached */
342 if(sk==NULL)
343 return0;
345 sk->state_change(sk);
346 sk->dead=1;
347 skpair=(unix_socket *)sk->protinfo.af_unix.other;/* Person we send to (default) */
348 if(sk->type==SOCK_STREAM && skpair!=NULL && skpair->state!=TCP_LISTEN)
350 skpair->shutdown=SHUTDOWN_MASK;/* No more writes */
351 skpair->state_change(skpair);/* Wake any blocked writes */
353 if(skpair!=NULL)
354 skpair->protinfo.af_unix.locks--;/* It may now die */
355 sk->protinfo.af_unix.other=NULL;/* No pair */
356 unix_destroy_socket(sk);/* Try to flush out this socket. Throw out buffers at least */
357 unix_gc();/* Garbage collect fds */
360 * FIXME: BSD difference: In BSD all sockets connected to use get ECONNRESET and we die on the spot. In
361 * Linux we behave like files and pipes do and wait for the last dereference.
364 sock->data = NULL;
365 sk->socket = NULL;
367 return0;
371 static unix_socket *unix_find_other(char*path,int*error)
373 int old_fs;
374 int err;
375 struct inode *inode;
376 unix_socket *u;
378 old_fs=get_fs();
379 set_fs(get_ds());
380 err =open_namei(path,2, S_IFSOCK, &inode, NULL);
381 set_fs(old_fs);
382 if(err<0)
384 *error=err;
385 return NULL;
387 u=unix_find_socket(inode);
388 iput(inode);
389 if(u==NULL)
391 *error=-ECONNREFUSED;
392 return NULL;
394 return u;
398 static intunix_bind(struct socket *sock,struct sockaddr *uaddr,int addr_len)
400 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
401 unix_socket *sk=sock->data;
402 int old_fs;
403 int err;
405 if(sk->protinfo.af_unix.name)
406 return-EINVAL;/* Already bound */
408 if(addr_len>sizeof(struct sockaddr_un) || addr_len<3|| sunaddr->sun_family!=AF_UNIX)
409 return-EINVAL;
410 unix_mkname(sunaddr, addr_len);
412 * Put ourselves in the filesystem
414 if(sk->protinfo.af_unix.inode!=NULL)
415 return-EINVAL;
417 sk->protinfo.af_unix.name=kmalloc(addr_len+1, GFP_KERNEL);
418 if(sk->protinfo.af_unix.name==NULL)
419 return-ENOMEM;
420 memcpy(sk->protinfo.af_unix.name, sunaddr->sun_path, addr_len+1);
422 old_fs=get_fs();
423 set_fs(get_ds());
425 err=do_mknod(sk->protinfo.af_unix.name,S_IFSOCK|S_IRWXUGO,0);
426 if(err==0)
427 err=open_namei(sk->protinfo.af_unix.name,2, S_IFSOCK, &sk->protinfo.af_unix.inode, NULL);
429 set_fs(old_fs);
431 if(err<0)
433 kfree_s(sk->protinfo.af_unix.name,addr_len+1);
434 sk->protinfo.af_unix.name=NULL;
435 if(err==-EEXIST)
436 return-EADDRINUSE;
437 else
438 return err;
441 return0;
445 static intunix_connect(struct socket *sock,struct sockaddr *uaddr,int addr_len,int flags)
447 unix_socket *sk=sock->data;
448 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
449 unix_socket *other;
450 struct sk_buff *skb;
451 int err;
453 if(sk->type==SOCK_STREAM && sk->protinfo.af_unix.other)
455 if(sock->state==SS_CONNECTING && sk->state==TCP_ESTABLISHED)
457 sock->state=SS_CONNECTED;
458 return0;
460 if(sock->state==SS_CONNECTING && sk->state == TCP_CLOSE)
462 sock->state=SS_UNCONNECTED;
463 return-ECONNREFUSED;
465 if(sock->state!=SS_CONNECTING)
466 return-EISCONN;
467 if(flags&O_NONBLOCK)
468 return-EALREADY;
470 * Drop through the connect up logic to the wait.
474 if(addr_len <sizeof(sunaddr->sun_family)+1|| sunaddr->sun_family!=AF_UNIX)
475 return-EINVAL;
477 unix_mkname(sunaddr, addr_len);
479 if(sk->type==SOCK_DGRAM)
481 if(sk->protinfo.af_unix.other)
483 sk->protinfo.af_unix.other->protinfo.af_unix.locks--;
484 sk->protinfo.af_unix.other=NULL;
485 sock->state=SS_UNCONNECTED;
487 other=unix_find_other(sunaddr->sun_path, &err);
488 if(other==NULL)
489 return err;
490 if(other->type!=sk->type)
491 return-EPROTOTYPE;
492 other->protinfo.af_unix.locks++;
493 sk->protinfo.af_unix.other=other;
494 sock->state=SS_CONNECTED;
495 sk->state=TCP_ESTABLISHED;
496 return0;/* Done */
500 if(sock->state==SS_UNCONNECTED)
503 * Now ready to connect
506 skb=sock_alloc_send_skb(sk,0,0,0, &err);/* Marker object */
507 if(skb==NULL)
508 return err;
509 skb->sk=sk;/* So they know it is us */
510 skb->free=1;
511 skb->h.filp=NULL;
512 sk->state=TCP_CLOSE;
513 unix_mkname(sunaddr, addr_len);
514 other=unix_find_other(sunaddr->sun_path, &err);
515 if(other==NULL)
517 kfree_skb(skb, FREE_WRITE);
518 return err;
520 if(other->type!=sk->type)
522 kfree_skb(skb, FREE_WRITE);
523 return-EPROTOTYPE;
525 other->protinfo.af_unix.locks++;/* Lock the other socket so it doesn't run off for a moment */
526 other->ack_backlog++;
527 sk->protinfo.af_unix.other=other;
528 skb_queue_tail(&other->receive_queue,skb);
529 sk->state=TCP_SYN_SENT;
530 sock->state=SS_CONNECTING;
531 sti();
532 other->data_ready(other,0);/* Wake up ! */
536 /* Wait for an accept */
538 cli();
539 while(sk->state==TCP_SYN_SENT)
541 if(flags&O_NONBLOCK)
543 sti();
544 return-EINPROGRESS;
546 interruptible_sleep_on(sk->sleep);
547 if(current->signal & ~current->blocked)
549 sti();
550 return-ERESTARTSYS;
555 * Has the other end closed on us ?
558 if(sk->state==TCP_CLOSE)
560 sk->protinfo.af_unix.other->protinfo.af_unix.locks--;
561 sk->protinfo.af_unix.other=NULL;
562 sock->state=SS_UNCONNECTED;
563 sti();
564 return-ECONNREFUSED;
568 * Amazingly it has worked
571 sock->state=SS_CONNECTED;
572 sti();
573 return0;
577 static intunix_socketpair(struct socket *a,struct socket *b)
579 unix_socket *ska,*skb;
581 ska=a->data;
582 skb=b->data;
584 /* Join our sockets back to back */
585 ska->protinfo.af_unix.locks++;
586 skb->protinfo.af_unix.locks++;
587 ska->protinfo.af_unix.other=skb;
588 skb->protinfo.af_unix.other=ska;
589 ska->state=TCP_ESTABLISHED;
590 skb->state=TCP_ESTABLISHED;
591 return0;
594 static intunix_accept(struct socket *sock,struct socket *newsock,int flags)
596 unix_socket *sk=sock->data;
597 unix_socket *newsk, *tsk;
598 struct sk_buff *skb;
600 if(sk->type!=SOCK_STREAM)
602 return-EOPNOTSUPP;
604 if(sk->state!=TCP_LISTEN)
606 return-EINVAL;
609 newsk=newsock->data;
610 if(sk->protinfo.af_unix.name!=NULL)
612 newsk->protinfo.af_unix.name=kmalloc(strlen(sk->protinfo.af_unix.name)+1, GFP_KERNEL);
613 if(newsk->protinfo.af_unix.name==NULL)
614 return-ENOMEM;
615 strcpy(newsk->protinfo.af_unix.name, sk->protinfo.af_unix.name);
620 cli();
621 skb=skb_dequeue(&sk->receive_queue);
622 if(skb==NULL)
624 if(flags&O_NONBLOCK)
626 sti();
627 return-EAGAIN;
629 interruptible_sleep_on(sk->sleep);
630 if(current->signal & ~current->blocked)
632 sti();
633 return-ERESTARTSYS;
635 sti();
638 while(skb==NULL);
639 tsk=skb->sk;
640 kfree_skb(skb, FREE_WRITE);/* The buffer is just used as a tag */
641 sk->ack_backlog--;
642 newsk->protinfo.af_unix.other=tsk;
643 tsk->protinfo.af_unix.other=newsk;
644 tsk->state=TCP_ESTABLISHED;
645 newsk->state=TCP_ESTABLISHED;
646 newsk->protinfo.af_unix.locks++;/* Swap lock over */
647 sk->protinfo.af_unix.locks--;/* Locked to child socket not master */
648 tsk->protinfo.af_unix.locks++;/* Back lock */
649 sti();
650 tsk->state_change(tsk);/* Wake up any sleeping connect */
651 sock_wake_async(tsk->socket,0);
652 return0;
655 static intunix_getname(struct socket *sock,struct sockaddr *uaddr,int*uaddr_len,int peer)
657 unix_socket *sk=sock->data;
658 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
660 if(peer)
662 if(sk->protinfo.af_unix.other==NULL)
663 return-ENOTCONN;
664 sk=sk->protinfo.af_unix.other;
666 sunaddr->sun_family=AF_UNIX;
667 if(sk->protinfo.af_unix.name==NULL)
669 *sunaddr->sun_path=0;
670 *uaddr_len=sizeof(sunaddr->sun_family)+1;
671 return0;/* Not bound */
673 *uaddr_len=sizeof(sunaddr->sun_family)+strlen(sk->protinfo.af_unix.name)+1;
674 strcpy(sunaddr->sun_path,sk->protinfo.af_unix.name);/* 108 byte limited */
675 return0;
679 * Support routines for struct cmsghdr handling
682 static struct cmsghdr *unix_copyrights(void*userp,int len)
684 struct cmsghdr *cm;
686 if(len>256|| len <=0)
687 return NULL;
688 cm=kmalloc(len, GFP_KERNEL);
689 memcpy_fromfs(cm, userp, len);
690 return cm;
694 * Return a header block
697 static voidunix_returnrights(void*userp,int len,struct cmsghdr *cm)
699 memcpy_tofs(userp, cm, len);
700 kfree(cm);
704 * Copy file descriptors into system space.
705 * Return number copied or negative error code
708 static intunix_fd_copy(struct sock *sk,struct cmsghdr *cmsg,struct file **fp)
710 int num=cmsg->cmsg_len-sizeof(struct cmsghdr);
711 int i;
712 int*fdp=(int*)cmsg->cmsg_data;
714 num /=sizeof(int);/* Odd bytes are forgotten in BSD not errored */
715 if(num >= UNIX_MAX_FD)
716 return-EINVAL;
719 * Verify the descriptors.
722 for(i=0; i< num; i++)
724 int fd;
726 fd = fdp[i];
727 #if 0
728 printk("testing fd %d\n", fd);
729 #endif
730 if(fd <0|| fd >= NR_OPEN)
731 return-EBADF;
732 if(current->files->fd[fd]==NULL)
733 return-EBADF;
736 /* add another reference to these files */
737 for(i=0; i< num; i++)
739 fp[i]=current->files->fd[fdp[i]];
740 fp[i]->f_count++;
741 unix_inflight(fp[i]);
744 return num;
748 * Free the descriptors in the array
751 static voidunix_fd_free(struct sock *sk,struct file **fp,int num)
753 int i;
754 for(i=0;i<num;i++)
756 close_fp(fp[i]);
757 unix_notinflight(fp[i]);
763 * Perform the AF_UNIX file descriptor pass out functionality. This
764 * is nasty and messy as is the whole design of BSD file passing.
767 static voidunix_detach_fds(struct sk_buff *skb,struct cmsghdr *cmsg)
769 int i;
770 /* count of space in parent for fds */
771 int cmnum;
772 struct file **fp;
773 int*cmfptr;
774 int fdnum;
776 cmfptr = NULL;
777 cmnum =0;
778 if(cmsg)
780 cmnum = (cmsg->cmsg_len-sizeof(struct cmsghdr)) /sizeof(int);
781 cmfptr = (int*)&cmsg->cmsg_data;
784 fdnum = *(int*)skb->h.filp;
785 fp = (struct file **)(skb->h.filp+sizeof(long));
787 if(cmnum > fdnum)
788 cmnum = fdnum;
791 * Copy those that fit
793 for(i =0; i < cmnum ; i++)
795 int new_fd =get_unused_fd();
796 if(new_fd <0)
797 break;
798 current->files->fd[new_fd]=fp[i];
799 *cmfptr++ = new_fd;
800 unix_notinflight(fp[i]);
803 * Dump those that don't
805 for( ; i < fdnum ; i++)
807 close_fp(fp[i]);
808 unix_notinflight(fp[i]);
810 kfree(skb->h.filp);
811 skb->h.filp=NULL;
813 /* no need to use destructor */
814 skb->destructor = NULL;
817 static voidunix_destruct_fds(struct sk_buff *skb)
819 unix_detach_fds(skb,NULL);
823 * Attach the file descriptor array to an sk_buff
825 static voidunix_attach_fds(int fpnum,struct file **fp,struct sk_buff *skb)
828 skb->h.filp =kmalloc(sizeof(long)+fpnum*sizeof(struct file *),
829 GFP_KERNEL);
830 /* number of descriptors starts block */
831 *(int*)skb->h.filp = fpnum;
832 /* actual descriptors */
833 memcpy(skb->h.filp+sizeof(long),fp,fpnum*sizeof(struct file *));
834 skb->destructor = unix_destruct_fds;
838 * Send AF_UNIX data.
841 static intunix_sendmsg(struct socket *sock,struct msghdr *msg,int len,int nonblock,int flags)
843 unix_socket *sk=sock->data;
844 unix_socket *other;
845 struct sockaddr_un *sunaddr=msg->msg_name;
846 int err,size;
847 struct sk_buff *skb;
848 int limit=0;
849 int sent=0;
850 struct file *fp[UNIX_MAX_FD];
851 /* number of fds waiting to be passed, 0 means either
852 * no fds to pass or they've already been passed
854 int fpnum=0;
856 if(sk->err)
857 returnsock_error(sk);
859 if(flags&MSG_OOB)
860 return-EOPNOTSUPP;
862 if(flags)/* For now */{
863 return-EINVAL;
866 if(sunaddr!=NULL)
868 if(sock->type==SOCK_STREAM)
870 if(sk->state==TCP_ESTABLISHED)
871 return-EISCONN;
872 else
873 return-EOPNOTSUPP;
877 if(sunaddr==NULL)
879 if(sk->protinfo.af_unix.other==NULL)
880 return-ENOTCONN;
884 * A control message has been attached.
886 if(msg->msg_control)
888 struct cmsghdr *cm=unix_copyrights(msg->msg_control,
889 msg->msg_controllen);
890 if(cm==NULL || msg->msg_controllen<sizeof(struct cmsghdr) ||
891 cm->cmsg_type!=SCM_RIGHTS ||
892 cm->cmsg_level!=SOL_SOCKET ||
893 msg->msg_controllen!=cm->cmsg_len)
895 kfree(cm);
896 return-EINVAL;
898 fpnum=unix_fd_copy(sk,cm,fp);
899 kfree(cm);
900 if(fpnum<0) {
901 return fpnum;
905 while(sent < len)
908 * Optimisation for the fact that under 0.01% of X messages typically
909 * need breaking up.
912 size=len-sent;
914 if(size>(sk->sndbuf-sizeof(struct sk_buff))/2)/* Keep two messages in the pipe so it schedules better */
916 if(sock->type==SOCK_DGRAM)
918 unix_fd_free(sk,fp,fpnum);
919 return-EMSGSIZE;
921 size=(sk->sndbuf-sizeof(struct sk_buff))/2;
924 * Keep to page sized kmalloc()'s as various people
925 * have suggested. Big mallocs stress the vm too
926 * much.
929 if(size >4000&& sock->type!=SOCK_DGRAM)
930 limit =4000;/* Fall back to 4K if we can't grab a big buffer this instant */
931 else
932 limit =0;/* Otherwise just grab and wait */
935 * Grab a buffer
938 skb=sock_alloc_send_skb(sk,size,limit,nonblock, &err);
940 if(skb==NULL)
942 unix_fd_free(sk,fp,fpnum);
943 if(sent)
945 sk->err=-err;
946 return sent;
948 return err;
950 size=skb_tailroom(skb);/* If we dropped back on a limit then our skb is smaller */
952 skb->sk=sk;
953 skb->free=1;
955 if(fpnum)
957 unix_attach_fds(fpnum,fp,skb);
958 fpnum=0;
960 else
961 skb->h.filp=NULL;
963 memcpy_fromiovec(skb_put(skb,size),msg->msg_iov, size);
965 cli();
966 if(sunaddr==NULL)
968 other=sk->protinfo.af_unix.other;
969 if(sock->type==SOCK_DGRAM && other->dead)
971 other->protinfo.af_unix.locks--;
972 sk->protinfo.af_unix.other=NULL;
973 sock->state=SS_UNCONNECTED;
974 sti();
975 kfree_skb(skb, FREE_WRITE);
976 if(!sent)
977 return-ECONNRESET;
978 else
979 return sent;
982 else
984 unix_mkname(sunaddr, msg->msg_namelen);
985 other=unix_find_other(sunaddr->sun_path, &err);
986 if(other==NULL)
988 sti();
989 kfree_skb(skb, FREE_WRITE);
990 if(sent)
991 return sent;
992 else
993 return err;
996 skb_queue_tail(&other->receive_queue, skb);
997 sti();
998 /* if we sent an fd, only do it once */
999 other->data_ready(other,size);
1000 sent+=size;
1002 return sent;
1006 * Sleep until data has arrive. But check for races..
1009 static voidunix_data_wait(unix_socket * sk)
1011 cli();
1012 if(!skb_peek(&sk->receive_queue)) {
1013 sk->socket->flags |= SO_WAITDATA;
1014 interruptible_sleep_on(sk->sleep);
1015 sk->socket->flags &= ~SO_WAITDATA;
1017 sti();
1020 static intunix_recvmsg(struct socket *sock,struct msghdr *msg,int size,int noblock,int flags,int*addr_len)
1022 unix_socket *sk=sock->data;
1023 struct sockaddr_un *sunaddr=msg->msg_name;
1024 struct sk_buff *skb;
1025 int copied=0;
1026 unsigned char*sp;
1027 int len;
1028 int num;
1029 struct iovec *iov=msg->msg_iov;
1030 struct cmsghdr *cm=NULL;
1031 int ct=msg->msg_iovlen;
1033 if(flags&MSG_OOB)
1034 return-EOPNOTSUPP;
1036 if(addr_len)
1037 *addr_len=0;
1039 if(sk->err)
1040 returnsock_error(sk);
1042 if(msg->msg_control)
1044 cm=unix_copyrights(msg->msg_control,
1045 msg->msg_controllen);
1046 if(msg->msg_controllen<sizeof(struct cmsghdr)
1047 #if 0
1048 /* investigate this further -- Stevens example doesn't seem to care */
1050 cm->cmsg_type!=SCM_RIGHTS ||
1051 cm->cmsg_level!=SOL_SOCKET ||
1052 msg->msg_controllen!=cm->cmsg_len
1053 #endif
1056 kfree(cm);
1057 /* printk("recvmsg: Bad msg_control\n");*/
1058 return-EINVAL;
1062 down(&sk->protinfo.af_unix.readsem);/* Lock the socket */
1063 while(ct--)
1065 int done=0;
1066 sp=iov->iov_base;
1067 len=iov->iov_len;
1068 iov++;
1070 while(done<len)
1072 if(copied && (flags & MSG_PEEK))
1073 goto out;
1074 if(copied == size)
1075 goto out;
1076 skb=skb_dequeue(&sk->receive_queue);
1077 if(skb==NULL)
1079 up(&sk->protinfo.af_unix.readsem);
1080 if(sk->shutdown & RCV_SHUTDOWN)
1081 return copied;
1082 if(copied)
1083 return copied;
1084 if(noblock)
1085 return-EAGAIN;
1086 if(current->signal & ~current->blocked)
1087 return-ERESTARTSYS;
1088 unix_data_wait(sk);
1089 down(&sk->protinfo.af_unix.readsem);
1090 continue;
1092 if(msg->msg_name!=NULL)
1094 sunaddr->sun_family=AF_UNIX;
1095 if(skb->sk->protinfo.af_unix.name)
1097 memcpy(sunaddr->sun_path, skb->sk->protinfo.af_unix.name,108);
1098 if(addr_len)
1099 *addr_len=strlen(sunaddr->sun_path)+sizeof(short);
1101 else
1102 if(addr_len)
1103 *addr_len=sizeof(short);
1106 num=min(skb->len,len-done);
1107 memcpy_tofs(sp, skb->data, num);
1109 if(skb->h.filp!=NULL)
1110 unix_detach_fds(skb,cm);
1112 copied+=num;
1113 done+=num;
1114 sp+=num;
1115 if(!(flags & MSG_PEEK))
1116 skb_pull(skb, num);
1117 /* put the skb back if we didn't use it up.. */
1118 if(skb->len) {
1119 skb_queue_head(&sk->receive_queue, skb);
1120 continue;
1122 kfree_skb(skb, FREE_WRITE);
1123 if(sock->type==SOCK_DGRAM || cm)
1124 goto out;
1127 out:
1128 up(&sk->protinfo.af_unix.readsem);
1129 if(cm)
1130 unix_returnrights(msg->msg_control,msg->msg_controllen,cm);
1131 return copied;
1134 static intunix_shutdown(struct socket *sock,int mode)
1136 unix_socket *sk=(unix_socket *)sock->data;
1137 unix_socket *other=sk->protinfo.af_unix.other;
1138 if(mode&SEND_SHUTDOWN)
1140 sk->shutdown|=SEND_SHUTDOWN;
1141 sk->state_change(sk);
1142 if(other)
1144 other->shutdown|=RCV_SHUTDOWN;
1145 other->state_change(other);
1148 other=sk->protinfo.af_unix.other;
1149 if(mode&RCV_SHUTDOWN)
1151 sk->shutdown|=RCV_SHUTDOWN;
1152 sk->state_change(sk);
1153 if(other)
1155 other->shutdown|=SEND_SHUTDOWN;
1156 other->state_change(other);
1159 return0;
1163 static intunix_select(struct socket *sock,int sel_type, select_table *wait)
1165 returndatagram_select(sock->data,sel_type,wait);
1168 static intunix_ioctl(struct socket *sock,unsigned int cmd,unsigned long arg)
1170 unix_socket *sk=sock->data;
1171 int err;
1172 long amount=0;
1174 switch(cmd)
1177 case TIOCOUTQ:
1178 err=verify_area(VERIFY_WRITE,(void*)arg,sizeof(unsigned long));
1179 if(err)
1180 return err;
1181 amount=sk->sndbuf-sk->wmem_alloc;
1182 if(amount<0)
1183 amount=0;
1184 put_fs_long(amount,(unsigned long*)arg);
1185 return0;
1186 case TIOCINQ:
1188 struct sk_buff *skb;
1189 if(sk->state==TCP_LISTEN)
1190 return-EINVAL;
1191 /* These two are safe on a single CPU system as only user tasks fiddle here */
1192 if((skb=skb_peek(&sk->receive_queue))!=NULL)
1193 amount=skb->len;
1194 err=verify_area(VERIFY_WRITE,(void*)arg,sizeof(unsigned long));
1195 if(err)
1196 return err;
1197 put_fs_long(amount,(unsigned long*)arg);
1198 return0;
1201 default:
1202 return-EINVAL;
1204 /*NOTREACHED*/
1205 return(0);
1208 #ifdef CONFIG_PROC_FS
1209 static intunix_get_info(char*buffer,char**start, off_t offset,int length,int dummy)
1211 off_t pos=0;
1212 off_t begin=0;
1213 int len=0;
1214 unix_socket *s=unix_socket_list;
1216 len+=sprintf(buffer,"Num RefCount Protocol Flags Type St "
1217 "Inode Path\n");
1219 while(s!=NULL)
1221 len+=sprintf(buffer+len,"%p: %08X %08X %08lX %04X %02X %5ld",
1223 s->protinfo.af_unix.locks,
1225 s->socket->flags,
1226 s->socket->type,
1227 s->socket->state,
1228 s->socket->inode ? s->socket->inode->i_ino :0);
1229 if(s->protinfo.af_unix.name!=NULL)
1230 len+=sprintf(buffer+len," %s\n", s->protinfo.af_unix.name);
1231 else
1232 buffer[len++]='\n';
1234 pos=begin+len;
1235 if(pos<offset)
1237 len=0;
1238 begin=pos;
1240 if(pos>offset+length)
1241 break;
1242 s=s->next;
1244 *start=buffer+(offset-begin);
1245 len-=(offset-begin);
1246 if(len>length)
1247 len=length;
1248 return len;
1250 #endif
1252 struct proto_ops unix_proto_ops = {
1253 AF_UNIX,
1255 unix_create,
1256 unix_dup,
1257 unix_release,
1258 unix_bind,
1259 unix_connect,
1260 unix_socketpair,
1261 unix_accept,
1262 unix_getname,
1263 unix_select,
1264 unix_ioctl,
1265 unix_listen,
1266 unix_shutdown,
1267 unix_setsockopt,
1268 unix_getsockopt,
1269 unix_fcntl,
1270 unix_sendmsg,
1271 unix_recvmsg
1275 voidunix_proto_init(struct net_proto *pro)
1277 printk(KERN_INFO "NET3: Unix domain sockets 0.12 for Linux NET3.035.\n");
1278 sock_register(unix_proto_ops.family, &unix_proto_ops);
1279 #ifdef CONFIG_PROC_FS
1280 proc_net_register(&(struct proc_dir_entry) {
1281 PROC_NET_UNIX,4,"unix",
1282 S_IFREG | S_IRUGO,1,0,0,
1283 0, &proc_net_inode_operations,
1284 unix_get_info
1286 #endif
1289 * Local variables:
1290 * compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c"
1291 * End:
close