2 * NET3: Implementation of BSD Unix domain sockets. 4 * Authors: Alan Cox, <alan.cox@linux.org> 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 12 * Linus Torvalds : Assorted bug cures. 13 * Niibe Yutaka : async I/O support. 14 * Carsten Paeth : PF_UNIX check, address fixes. 15 * Alan Cox : Limit size of allocated blocks. 16 * Alan Cox : Fixed the stupid socketpair bug. 17 * Alan Cox : BSD compatibility fine tuning. 18 * Alan Cox : Fixed a bug in connect when interrupted. 19 * Alan Cox : Sorted out a proper draft version of 20 * file descriptor passing hacked up from 22 * Marty Leisner : Fixes to fd passing 23 * Nick Nevin : recvmsg bugfix. 24 * Alan Cox : Started proper garbage collector 25 * Heiko EiBfeldt : Missing verify_area check 26 * Alan Cox : Started POSIXisms 27 * Andreas Schwab : Replace inode by dentry for proper 29 * Kirk Petersen : Made this a module 31 * Known differences from reference BSD that was tested: 34 * ECONNREFUSED is not returned from one end of a connected() socket to the 35 * other the moment one end closes. 36 * fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark 37 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 39 * accept() returns a path name even if the connecting socket has closed 40 * in the meantime (BSD loses the path and gives up). 41 * accept() returns 0 length path for an unbound connector. BSD returns 16 42 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 43 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 44 * BSD af_unix apparently has connect forgetting to block properly. 45 * (need to check this with the POSIX spec in detail) 47 * Differences from 2.0.0-11-... (ANK) 48 * Bug fixes and improvements. 49 * - client shutdown killed server socket. 50 * - removed all useless cli/sti pairs. 52 * Semantic changes/extensions. 53 * - generic control message passing. 54 * - SCM_CREDENTIALS control message. 55 * - "Abstract" (not FS based) socket bindings. 56 * Abstract names are sequences of bytes (not zero terminated) 57 * started by 0, so that this name space does not intersect 61 #include <linux/module.h> 62 #include <linux/config.h> 63 #include <linux/kernel.h> 64 #include <linux/major.h> 65 #include <linux/signal.h> 66 #include <linux/sched.h> 67 #include <linux/errno.h> 68 #include <linux/string.h> 69 #include <linux/stat.h> 70 #include <linux/socket.h> 72 #include <linux/fcntl.h> 73 #include <linux/termios.h> 74 #include <linux/socket.h> 75 #include <linux/sockios.h> 76 #include <linux/net.h> 79 #include <linux/malloc.h> 80 #include <asm/uaccess.h> 81 #include <linux/skbuff.h> 82 #include <linux/netdevice.h> 85 #include <net/af_unix.h> 86 #include <linux/proc_fs.h> 88 #include <linux/init.h> 90 #include <asm/checksum.h> 92 #define min(a,b) (((a)<(b))?(a):(b)) 94 int sysctl_unix_delete_delay
= HZ
; 95 int sysctl_unix_destroy_delay
=10*HZ
; 97 unix_socket
*unix_socket_table
[UNIX_HASH_SIZE
+1]; 99 #define unix_sockets_unbound (unix_socket_table[UNIX_HASH_SIZE]) 101 #define UNIX_ABSTRACT(sk) ((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE) 103 extern __inline__
unsignedunix_hash_fold(unsigned hash
) 111 #define unix_peer(sk) ((sk)->pair) 113 extern __inline__
intunix_our_peer(unix_socket
*sk
, unix_socket
*osk
) 115 returnunix_peer(osk
) == sk
; 118 extern __inline__
intunix_may_send(unix_socket
*sk
, unix_socket
*osk
) 120 return(sk
->type
==osk
->type
); 123 extern __inline__
voidunix_lock(unix_socket
*sk
) 128 extern __inline__
intunix_unlock(unix_socket
*sk
) 130 return sk
->sock_readers
--; 133 extern __inline__
intunix_locked(unix_socket
*sk
) 135 return sk
->sock_readers
; 138 extern __inline__
voidunix_release_addr(struct unix_address
*addr
) 142 if(atomic_dec_and_test(&addr
->refcnt
)) 147 static voidunix_destruct_addr(struct sock
*sk
) 149 struct unix_address
*addr
= sk
->protinfo
.af_unix
.addr
; 151 unix_release_addr(addr
); 155 * Check unix socket name: 156 * - should be not zero length. 157 * - if started by not zero, should be NULL terminated (FS object) 158 * - if started by zero, it is abstract name. 161 static intunix_mkname(struct sockaddr_un
* sunaddr
,int len
,unsigned*hashp
) 163 if(len
<=sizeof(short) || len
>sizeof(*sunaddr
)) 165 if(!sunaddr
|| sunaddr
->sun_family
!= AF_UNIX
) 167 if(sunaddr
->sun_path
[0]) 170 * This may look like an off by one error but it is 171 * a bit more subtle. 108 is the longest valid AF_UNIX 172 * path for a binding. sun_path[108] doesnt as such 173 * exist. However in kernel space we are guaranteed that 174 * it is a valid memory location in our kernel 177 if(len
>sizeof(*sunaddr
)) 178 len
=sizeof(*sunaddr
); 179 ((char*)sunaddr
)[len
]=0; 180 len
=strlen(sunaddr
->sun_path
)+1+sizeof(short); 184 *hashp
=unix_hash_fold(csum_partial((char*)sunaddr
, len
,0)); 188 static voidunix_remove_socket(unix_socket
*sk
) 190 unix_socket
**list
= sk
->protinfo
.af_unix
.list
; 192 sk
->next
->prev
= sk
->prev
; 194 sk
->prev
->next
= sk
->next
; 197 sk
->protinfo
.af_unix
.list
= NULL
; 202 static voidunix_insert_socket(unix_socket
*sk
) 204 unix_socket
**list
= sk
->protinfo
.af_unix
.list
; 212 static unix_socket
*unix_find_socket_byname(struct sockaddr_un
*sunname
, 213 int len
,int type
,unsigned hash
) 217 for(s
=unix_socket_table
[(hash
^type
)&0xF]; s
; s
=s
->next
) 219 if(s
->protinfo
.af_unix
.addr
->len
==len
&& 220 memcmp(s
->protinfo
.af_unix
.addr
->name
, sunname
, len
) ==0&& 230 static unix_socket
*unix_find_socket_byinode(struct inode
*i
) 234 for(s
=unix_socket_table
[i
->i_ino
&0xF]; s
; s
=s
->next
) 236 struct dentry
*dentry
= s
->protinfo
.af_unix
.dentry
; 238 if(dentry
&& dentry
->d_inode
== i
) 248 * Delete a unix socket. We have to allow for deferring this on a timer. 251 static voidunix_destroy_timer(unsigned long data
) 253 unix_socket
*sk
=(unix_socket
*)data
; 254 if(!unix_locked(sk
) &&atomic_read(&sk
->wmem_alloc
) ==0) 264 sk
->timer
.expires
=jiffies
+sysctl_unix_destroy_delay
;/* No real hurry try it every 10 seconds or so */ 265 add_timer(&sk
->timer
); 269 static voidunix_delayed_delete(unix_socket
*sk
) 271 sk
->timer
.data
=(unsigned long)sk
; 272 sk
->timer
.expires
=jiffies
+sysctl_unix_delete_delay
;/* Normally 1 second after will clean up. After that we try every 10 */ 273 sk
->timer
.function
=unix_destroy_timer
; 274 add_timer(&sk
->timer
); 277 static voidunix_destroy_socket(unix_socket
*sk
) 281 unix_remove_socket(sk
); 283 while((skb
=skb_dequeue(&sk
->receive_queue
))!=NULL
) 285 if(sk
->state
==TCP_LISTEN
) 287 unix_socket
*osk
=skb
->sk
; 288 osk
->state
=TCP_CLOSE
; 289 kfree_skb(skb
);/* Now surplus - free the skb first before the socket */ 290 osk
->state_change(osk
);/* So the connect wakes and cleans up (if any) */ 291 /* osk will be destroyed when it gets to close or the timer fires */ 295 /* passed fds are erased in the kfree_skb hook */ 300 if(sk
->protinfo
.af_unix
.dentry
!=NULL
) 302 dput(sk
->protinfo
.af_unix
.dentry
); 303 sk
->protinfo
.af_unix
.dentry
=NULL
; 306 if(!unix_unlock(sk
) &&atomic_read(&sk
->wmem_alloc
) ==0) 313 unix_delayed_delete(sk
);/* Try every so often until buffers are all freed */ 316 /* socket destroyed, decrement count */ 320 static intunix_listen(struct socket
*sock
,int backlog
) 322 struct sock
*sk
= sock
->sk
; 324 if(sock
->state
!= SS_UNCONNECTED
) 326 if(sock
->type
!=SOCK_STREAM
) 327 return-EOPNOTSUPP
;/* Only stream sockets accept */ 328 if(!sk
->protinfo
.af_unix
.addr
) 329 return-EINVAL
;/* No listens on an unbound socket */ 330 sk
->max_ack_backlog
=backlog
; 331 if(sk
->ack_backlog
< backlog
) 332 sk
->state_change(sk
); 333 sk
->state
=TCP_LISTEN
; 334 sock
->flags
|= SO_ACCEPTCON
; 338 externstruct proto_ops unix_stream_ops
; 339 externstruct proto_ops unix_dgram_ops
; 341 static intunix_create(struct socket
*sock
,int protocol
) 345 sock
->state
= SS_UNCONNECTED
; 347 if(protocol
&& protocol
!= PF_UNIX
) 348 return-EPROTONOSUPPORT
; 353 sock
->ops
= &unix_stream_ops
; 356 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 360 sock
->type
=SOCK_DGRAM
; 362 sock
->ops
= &unix_dgram_ops
; 365 return-ESOCKTNOSUPPORT
; 367 sk
=sk_alloc(AF_UNIX
, GFP_KERNEL
); 371 sock_init_data(sock
,sk
); 373 sk
->destruct
= unix_destruct_addr
; 374 sk
->protinfo
.af_unix
.family
=AF_UNIX
; 375 sk
->protinfo
.af_unix
.dentry
=NULL
; 376 sk
->sock_readers
=1;/* Us */ 377 sk
->protinfo
.af_unix
.readsem
=MUTEX
;/* single task reading lock */ 379 sk
->protinfo
.af_unix
.list
=&unix_sockets_unbound
; 380 unix_insert_socket(sk
); 382 /* socket created, increment count */ 388 static intunix_release(struct socket
*sock
,struct socket
*peer
) 390 unix_socket
*sk
= sock
->sk
; 396 if(sock
->state
!= SS_UNCONNECTED
) 397 sock
->state
= SS_DISCONNECTING
; 399 sk
->state_change(sk
); 401 skpair
=unix_peer(sk
); 402 if(sock
->type
==SOCK_STREAM
&& skpair
) 404 if(unix_our_peer(sk
, skpair
)) 405 skpair
->shutdown
=SHUTDOWN_MASK
;/* No more writes */ 406 if(skpair
->state
!=TCP_LISTEN
) 407 skpair
->state_change(skpair
);/* Wake any blocked writes */ 410 unix_unlock(skpair
);/* It may now die */ 411 unix_peer(sk
)=NULL
;/* No pair */ 412 unix_destroy_socket(sk
);/* Try to flush out this socket. Throw out buffers at least */ 413 unix_gc();/* Garbage collect fds */ 416 * FIXME: BSD difference: In BSD all sockets connected to use get ECONNRESET and we die on the spot. In 417 * Linux we behave like files and pipes do and wait for the last dereference. 428 static intunix_autobind(struct socket
*sock
) 430 struct sock
*sk
= sock
->sk
; 431 static u32 ordernum
=1; 432 struct unix_address
* addr
; 435 addr
=kmalloc(sizeof(*addr
) +sizeof(short) +16, GFP_KERNEL
); 438 if(sk
->protinfo
.af_unix
.addr
|| sk
->protinfo
.af_unix
.dentry
) 443 memset(addr
,0,sizeof(*addr
) +sizeof(short) +16); 444 addr
->name
->sun_family
= AF_UNIX
; 445 atomic_set(&addr
->refcnt
,1); 448 addr
->len
=sprintf(addr
->name
->sun_path
+1,"%08x", ordernum
) +1+sizeof(short); 449 addr
->hash
=unix_hash_fold(csum_partial((void*)addr
->name
, addr
->len
,0)); 452 if((osk
=unix_find_socket_byname(addr
->name
, addr
->len
, sock
->type
, 453 addr
->hash
)) != NULL
) 459 sk
->protinfo
.af_unix
.addr
= addr
; 460 unix_remove_socket(sk
); 461 sk
->protinfo
.af_unix
.list
= &unix_socket_table
[(addr
->hash
^ sk
->type
)&0xF]; 462 unix_insert_socket(sk
); 466 static unix_socket
*unix_find_other(struct sockaddr_un
*sunname
,int len
, 467 int type
,unsigned hash
,int*error
) 471 if(sunname
->sun_path
[0]) 473 struct dentry
*dentry
; 474 dentry
=open_namei(sunname
->sun_path
,2, S_IFSOCK
); 476 *error
=PTR_ERR(dentry
); 479 u
=unix_find_socket_byinode(dentry
->d_inode
); 481 if(u
&& u
->type
!= type
) 489 u
=unix_find_socket_byname(sunname
, len
, type
, hash
); 493 *error
=-ECONNREFUSED
; 500 static intunix_bind(struct socket
*sock
,struct sockaddr
*uaddr
,int addr_len
) 502 struct sock
*sk
= sock
->sk
; 503 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)uaddr
; 504 struct dentry
* dentry
; 507 struct unix_address
*addr
; 509 if(sk
->protinfo
.af_unix
.addr
|| sk
->protinfo
.af_unix
.dentry
|| 510 sunaddr
->sun_family
!= AF_UNIX
) 513 if(addr_len
==sizeof(short)) 514 returnunix_autobind(sock
); 516 addr_len
=unix_mkname(sunaddr
, addr_len
, &hash
); 520 addr
=kmalloc(sizeof(*addr
)+addr_len
, GFP_KERNEL
); 524 /* We slept; recheck ... */ 526 if(sk
->protinfo
.af_unix
.addr
|| sk
->protinfo
.af_unix
.dentry
) 529 return-EINVAL
;/* Already bound */ 532 memcpy(addr
->name
, sunaddr
, addr_len
); 533 addr
->len
= addr_len
; 535 atomic_set(&addr
->refcnt
,1); 537 if(!sunaddr
->sun_path
[0]) 539 unix_socket
*osk
=unix_find_socket_byname(sunaddr
, addr_len
, 547 unix_remove_socket(sk
); 548 sk
->protinfo
.af_unix
.addr
= addr
; 549 sk
->protinfo
.af_unix
.list
= &unix_socket_table
[(hash
^sk
->type
)&0xF]; 550 unix_insert_socket(sk
); 554 addr
->hash
= UNIX_HASH_SIZE
; 555 sk
->protinfo
.af_unix
.addr
= addr
; 558 dentry
=do_mknod(sunaddr
->sun_path
, S_IFSOCK
|S_IRWXUGO
,0); 561 err
=PTR_ERR(dentry
); 562 unix_release_addr(addr
); 563 sk
->protinfo
.af_unix
.addr
= NULL
; 569 unix_remove_socket(sk
); 570 sk
->protinfo
.af_unix
.list
= &unix_socket_table
[dentry
->d_inode
->i_ino
&0xF]; 571 sk
->protinfo
.af_unix
.dentry
= dentry
; 572 unix_insert_socket(sk
); 577 static intunix_dgram_connect(struct socket
*sock
,struct sockaddr
*addr
, 580 struct sock
*sk
= sock
->sk
; 581 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)addr
; 587 * 1003.1g breaking connected state with AF_UNSPEC 590 if(addr
->sa_family
==AF_UNSPEC
) 594 unix_unlock(unix_peer(sk
)); 595 unix_peer(sk
) = NULL
; 596 sock
->state
=SS_UNCONNECTED
; 601 alen
=unix_mkname(sunaddr
, alen
, &hash
); 605 other
=unix_find_other(sunaddr
, alen
, sock
->type
, hash
, &err
); 608 if(!unix_may_send(sk
, other
)) 615 * If it was connected, reconnect. 619 unix_unlock(unix_peer(sk
)); 623 if(sock
->passcred
&& !sk
->protinfo
.af_unix
.addr
) 628 static intunix_stream_connect1(struct socket
*sock
,struct msghdr
*msg
, 629 int len
,struct unix_skb_parms
*cmsg
,int nonblock
) 631 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)msg
->msg_name
; 632 struct sock
*sk
= sock
->sk
; 639 addr_len
=unix_mkname(sunaddr
, msg
->msg_namelen
, &hash
); 646 /* This is ok... continue with connect */ 649 /* Socket is already connected */ 652 /* Not yet connected... we will check this. */ 661 if(sock
->state
==SS_CONNECTING
&& sk
->state
==TCP_ESTABLISHED
) 663 sock
->state
=SS_CONNECTED
; 664 if(!sk
->protinfo
.af_unix
.addr
) 668 if(sock
->state
==SS_CONNECTING
&& sk
->state
== TCP_CLOSE
) 670 sock
->state
=SS_UNCONNECTED
; 673 if(sock
->state
!=SS_CONNECTING
) 678 * Drop through the connect up logic to the wait. 682 if(sock
->state
==SS_UNCONNECTED
) 685 * Now ready to connect 688 skb
=sock_alloc_send_skb(sk
, len
,0, nonblock
, &err
);/* Marker object */ 691 memcpy(&UNIXCB(skb
), cmsg
,sizeof(*cmsg
)); 693 memcpy_fromiovec(skb_put(skb
,len
), msg
->msg_iov
, len
); 695 other
=unix_find_other(sunaddr
, addr_len
, sk
->type
, hash
, &err
); 701 other
->ack_backlog
++; 703 skb_queue_tail(&other
->receive_queue
,skb
); 704 sk
->state
=TCP_SYN_SENT
; 705 sock
->state
=SS_CONNECTING
; 706 other
->data_ready(other
,0);/* Wake up ! */ 710 /* Wait for an accept */ 712 while(sk
->state
==TCP_SYN_SENT
) 716 interruptible_sleep_on(sk
->sleep
); 717 if(signal_pending(current
)) 722 * Has the other end closed on us ? 725 if(sk
->state
==TCP_CLOSE
) 727 unix_unlock(unix_peer(sk
)); 729 sock
->state
=SS_UNCONNECTED
; 734 * Amazingly it has worked 737 sock
->state
=SS_CONNECTED
; 738 if(!sk
->protinfo
.af_unix
.addr
) 744 static intunix_stream_connect(struct socket
*sock
,struct sockaddr
*uaddr
, 745 int addr_len
,int flags
) 748 struct unix_skb_parms cmsg
; 750 msg
.msg_name
= uaddr
; 751 msg
.msg_namelen
= addr_len
; 754 cmsg
.creds
.pid
= current
->pid
; 755 cmsg
.creds
.uid
= current
->euid
; 756 cmsg
.creds
.gid
= current
->egid
; 758 returnunix_stream_connect1(sock
, &msg
,0, &cmsg
, flags
&O_NONBLOCK
); 761 static intunix_socketpair(struct socket
*socka
,struct socket
*sockb
) 763 struct sock
*ska
=socka
->sk
, *skb
= sockb
->sk
; 765 /* Join our sockets back to back */ 771 if(ska
->type
!= SOCK_DGRAM
) 773 ska
->state
=TCP_ESTABLISHED
; 774 skb
->state
=TCP_ESTABLISHED
; 775 socka
->state
=SS_CONNECTED
; 776 sockb
->state
=SS_CONNECTED
; 781 static intunix_accept(struct socket
*sock
,struct socket
*newsock
,int flags
) 783 unix_socket
*sk
= sock
->sk
; 784 unix_socket
*newsk
= newsock
->sk
; 788 if(sock
->state
!= SS_UNCONNECTED
) 790 if(!(sock
->flags
& SO_ACCEPTCON
)) 793 if(sock
->type
!=SOCK_STREAM
) 795 if(sk
->state
!=TCP_LISTEN
) 798 if(sk
->protinfo
.af_unix
.addr
) 800 atomic_inc(&sk
->protinfo
.af_unix
.addr
->refcnt
); 801 newsk
->protinfo
.af_unix
.addr
=sk
->protinfo
.af_unix
.addr
; 803 if(sk
->protinfo
.af_unix
.dentry
) 804 newsk
->protinfo
.af_unix
.dentry
=dget(sk
->protinfo
.af_unix
.dentry
); 808 skb
=skb_dequeue(&sk
->receive_queue
); 813 interruptible_sleep_on(sk
->sleep
); 814 if(signal_pending(current
)) 818 if(!(UNIXCB(skb
).attr
& MSG_SYN
)) 821 tsk
->state_change(tsk
); 830 unix_peer(newsk
)=tsk
; 831 unix_peer(tsk
)=newsk
; 832 tsk
->state
=TCP_ESTABLISHED
; 833 newsk
->state
=TCP_ESTABLISHED
; 834 memcpy(&newsk
->peercred
,UNIXCREDS(skb
),sizeof(struct ucred
)); 835 tsk
->peercred
.pid
= current
->pid
; 836 tsk
->peercred
.uid
= current
->euid
; 837 tsk
->peercred
.gid
= current
->egid
; 838 unix_lock(newsk
);/* Swap lock over */ 839 unix_unlock(sk
);/* Locked to child socket not master */ 840 unix_lock(tsk
);/* Back lock */ 841 kfree_skb(skb
);/* The buffer is just used as a tag */ 842 tsk
->state_change(tsk
);/* Wake up any sleeping connect */ 843 sock_wake_async(tsk
->socket
,0); 848 static intunix_getname(struct socket
*sock
,struct sockaddr
*uaddr
,int*uaddr_len
,int peer
) 850 struct sock
*sk
= sock
->sk
; 851 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)uaddr
; 859 if(!sk
->protinfo
.af_unix
.addr
) 861 sunaddr
->sun_family
= AF_UNIX
; 862 sunaddr
->sun_path
[0] =0; 863 *uaddr_len
=sizeof(short); 864 return0;/* Not bound */ 866 *uaddr_len
= sk
->protinfo
.af_unix
.addr
->len
; 867 memcpy(sunaddr
, sk
->protinfo
.af_unix
.addr
->name
, *uaddr_len
); 871 static voidunix_detach_fds(struct scm_cookie
*scm
,struct sk_buff
*skb
) 875 scm
->fp
=UNIXCB(skb
).fp
; 876 skb
->destructor
= sock_wfree
; 877 UNIXCB(skb
).fp
= NULL
; 879 for(i
=scm
->fp
->count
-1; i
>=0; i
--) 880 unix_notinflight(scm
->fp
->fp
[i
]); 883 static voidunix_destruct_fds(struct sk_buff
*skb
) 885 struct scm_cookie scm
; 886 memset(&scm
,0,sizeof(scm
)); 887 unix_detach_fds(&scm
, skb
); 892 static voidunix_attach_fds(struct scm_cookie
*scm
,struct sk_buff
*skb
) 895 for(i
=scm
->fp
->count
-1; i
>=0; i
--) 896 unix_inflight(scm
->fp
->fp
[i
]); 897 UNIXCB(skb
).fp
= scm
->fp
; 898 skb
->destructor
= unix_destruct_fds
; 907 static intunix_dgram_sendmsg(struct socket
*sock
,struct msghdr
*msg
,int len
, 908 struct scm_cookie
*scm
) 910 struct sock
*sk
= sock
->sk
; 912 struct sockaddr_un
*sunaddr
=msg
->msg_name
; 913 int namelen
=0;/* fake GCC */ 918 if(msg
->msg_flags
&MSG_OOB
) 921 if(msg
->msg_flags
&~MSG_DONTWAIT
) 924 if(msg
->msg_namelen
) { 925 namelen
=unix_mkname(sunaddr
, msg
->msg_namelen
, &hash
); 934 if(sock
->passcred
&& !sk
->protinfo
.af_unix
.addr
) 937 skb
=sock_alloc_send_skb(sk
, len
,0, msg
->msg_flags
&MSG_DONTWAIT
, &err
); 942 memcpy(UNIXCREDS(skb
), &scm
->creds
,sizeof(struct ucred
)); 943 UNIXCB(skb
).attr
= msg
->msg_flags
; 945 unix_attach_fds(scm
, skb
); 947 skb
->h
.raw
= skb
->data
; 948 memcpy_fromiovec(skb_put(skb
,len
), msg
->msg_iov
, len
); 950 other
=unix_peer(sk
); 951 if(other
&& other
->dead
) 954 * Check with 1003.1g - what should 960 if(sunaddr
== NULL
) { 967 other
=unix_find_other(sunaddr
, namelen
, sk
->type
, hash
, &err
); 974 if(!unix_may_send(sk
, other
)) 982 skb_queue_tail(&other
->receive_queue
, skb
); 983 other
->data_ready(other
,len
); 991 static intunix_stream_sendmsg(struct socket
*sock
,struct msghdr
*msg
,int len
, 992 struct scm_cookie
*scm
) 994 struct sock
*sk
= sock
->sk
; 996 struct sockaddr_un
*sunaddr
=msg
->msg_name
; 1002 if(sock
->flags
& SO_ACCEPTCON
) 1005 if(msg
->msg_flags
&MSG_OOB
) 1008 if(msg
->msg_flags
&~MSG_DONTWAIT
) 1011 if(msg
->msg_namelen
) { 1012 if(sk
->state
==TCP_ESTABLISHED
) 1022 if(sk
->shutdown
&SEND_SHUTDOWN
) { 1023 send_sig(SIGPIPE
,current
,0); 1030 * Optimisation for the fact that under 0.01% of X messages typically 1036 /* Keep two messages in the pipe so it schedules better */ 1037 if(size
> (sk
->sndbuf
-sizeof(struct sk_buff
)) /2) 1038 size
= (sk
->sndbuf
-sizeof(struct sk_buff
)) /2; 1041 * Keep to page sized kmalloc()'s as various people 1042 * have suggested. Big mallocs stress the vm too 1047 limit
=3500;/* Fall back to a page if we can't grab a big buffer this instant */ 1049 limit
=0;/* Otherwise just grab and wait */ 1055 skb
=sock_alloc_send_skb(sk
,size
,limit
,msg
->msg_flags
&MSG_DONTWAIT
, &err
); 1065 * If you pass two values to the sock_alloc_send_skb 1066 * it tries to grab the large buffer with GFP_BUFFER 1067 * (which can fail easily), and if it fails grab the 1068 * fallback size buffer which is under a page and will 1071 size
=min(size
,skb_tailroom(skb
)); 1073 memcpy(UNIXCREDS(skb
), &scm
->creds
,sizeof(struct ucred
)); 1074 UNIXCB(skb
).attr
= msg
->msg_flags
; 1076 unix_attach_fds(scm
, skb
); 1078 /* N.B. this could fail with -EFAULT */ 1079 memcpy_fromiovec(skb_put(skb
,size
), msg
->msg_iov
, size
); 1081 other
=unix_peer(sk
); 1083 if(other
->dead
|| (sk
->shutdown
& SEND_SHUTDOWN
)) 1088 send_sig(SIGPIPE
,current
,0); 1092 skb_queue_tail(&other
->receive_queue
, skb
); 1093 other
->data_ready(other
,size
); 1101 * Sleep until data has arrive. But check for races.. 1104 static voidunix_data_wait(unix_socket
* sk
) 1106 if(!skb_peek(&sk
->receive_queue
)) 1108 sk
->socket
->flags
|= SO_WAITDATA
; 1109 interruptible_sleep_on(sk
->sleep
); 1110 sk
->socket
->flags
&= ~SO_WAITDATA
; 1114 static intunix_dgram_recvmsg(struct socket
*sock
,struct msghdr
*msg
,int size
, 1115 int flags
,struct scm_cookie
*scm
) 1117 struct sock
*sk
= sock
->sk
; 1118 int noblock
= flags
& MSG_DONTWAIT
; 1119 struct sk_buff
*skb
; 1125 msg
->msg_namelen
=0; 1127 skb
=skb_recv_datagram(sk
, flags
, noblock
, &err
); 1133 msg
->msg_namelen
=sizeof(short); 1134 if(skb
->sk
->protinfo
.af_unix
.addr
) 1136 msg
->msg_namelen
=skb
->sk
->protinfo
.af_unix
.addr
->len
; 1137 memcpy(msg
->msg_name
, 1138 skb
->sk
->protinfo
.af_unix
.addr
->name
, 1139 skb
->sk
->protinfo
.af_unix
.addr
->len
); 1145 else if(size
< skb
->len
) 1146 msg
->msg_flags
|= MSG_TRUNC
; 1148 err
=skb_copy_datagram_iovec(skb
,0, msg
->msg_iov
, size
); 1152 scm
->creds
= *UNIXCREDS(skb
); 1154 if(!(flags
& MSG_PEEK
)) 1157 unix_detach_fds(scm
, skb
); 1161 /* It is questionable: on PEEK we could: 1162 - do not return fds - good, but too simple 8) 1163 - return fds, and do not return them on read (old strategy, 1165 - clone fds (I choosed it for now, it is the most universal 1168 POSIX 1003.1g does not actually define this clearly 1169 at all. POSIX 1003.1g doesn't define a lot of things 1174 scm
->fp
=scm_fp_dup(UNIXCB(skb
).fp
); 1179 skb_free_datagram(sk
,skb
); 1185 static intunix_stream_recvmsg(struct socket
*sock
,struct msghdr
*msg
,int size
, 1186 int flags
,struct scm_cookie
*scm
) 1188 struct sock
*sk
= sock
->sk
; 1189 int noblock
= flags
& MSG_DONTWAIT
; 1190 struct sockaddr_un
*sunaddr
=msg
->msg_name
; 1195 if(sock
->flags
& SO_ACCEPTCON
) 1200 if(flags
&MSG_WAITALL
) 1204 msg
->msg_namelen
=0; 1206 /* Lock the socket to prevent queue disordering 1207 * while sleeps in memcpy_tomsg 1210 down(&sk
->protinfo
.af_unix
.readsem
); 1215 struct sk_buff
*skb
; 1217 skb
=skb_dequeue(&sk
->receive_queue
); 1220 if(copied
>= target
) 1224 * POSIX 1003.1g mandates this order. 1229 up(&sk
->protinfo
.af_unix
.readsem
); 1230 returnsock_error(sk
); 1233 if(sk
->shutdown
& RCV_SHUTDOWN
) 1235 up(&sk
->protinfo
.af_unix
.readsem
); 1239 if(signal_pending(current
)) 1241 down(&sk
->protinfo
.af_unix
.readsem
); 1245 /* Never glue messages from different writers */ 1247 memcmp(UNIXCREDS(skb
), &scm
->creds
,sizeof(scm
->creds
)) !=0) 1249 skb_queue_head(&sk
->receive_queue
, skb
); 1253 /* Copy address just once */ 1256 msg
->msg_namelen
=sizeof(short); 1257 if(skb
->sk
->protinfo
.af_unix
.addr
) 1259 msg
->msg_namelen
=skb
->sk
->protinfo
.af_unix
.addr
->len
; 1261 skb
->sk
->protinfo
.af_unix
.addr
->name
, 1262 skb
->sk
->protinfo
.af_unix
.addr
->len
); 1267 chunk
=min(skb
->len
, size
); 1268 /* N.B. This could fail with -EFAULT */ 1269 memcpy_toiovec(msg
->msg_iov
, skb
->data
, chunk
); 1273 /* Copy credentials */ 1274 scm
->creds
= *UNIXCREDS(skb
); 1277 /* Mark read part of skb as used */ 1278 if(!(flags
& MSG_PEEK
)) 1280 skb_pull(skb
, chunk
); 1283 unix_detach_fds(scm
, skb
); 1285 /* put the skb back if we didn't use it up.. */ 1288 skb_queue_head(&sk
->receive_queue
, skb
); 1299 /* It is questionable, see note in unix_dgram_recvmsg. 1303 scm
->fp
=scm_fp_dup(UNIXCB(skb
).fp
); 1305 /* put message back and return */ 1306 skb_queue_head(&sk
->receive_queue
, skb
); 1311 up(&sk
->protinfo
.af_unix
.readsem
); 1315 static intunix_shutdown(struct socket
*sock
,int mode
) 1317 struct sock
*sk
= sock
->sk
; 1318 unix_socket
*other
=unix_peer(sk
); 1322 if(mode
&SEND_SHUTDOWN
) 1324 sk
->shutdown
|=SEND_SHUTDOWN
; 1325 sk
->state_change(sk
); 1326 if(other
&& sk
->type
== SOCK_STREAM
&& other
->state
!= TCP_LISTEN
) 1328 if(unix_our_peer(sk
, other
)) 1329 other
->shutdown
|=RCV_SHUTDOWN
; 1330 other
->state_change(other
); 1333 other
=unix_peer(sk
); 1334 if(mode
&RCV_SHUTDOWN
) 1336 sk
->shutdown
|=RCV_SHUTDOWN
; 1337 sk
->state_change(sk
); 1338 if(other
&& sk
->type
!= SOCK_DGRAM
&& other
->state
!= TCP_LISTEN
) 1340 if(unix_our_peer(sk
, other
)) 1341 other
->shutdown
|=SEND_SHUTDOWN
; 1342 other
->state_change(other
); 1349 static intunix_ioctl(struct socket
*sock
,unsigned int cmd
,unsigned long arg
) 1351 struct sock
*sk
= sock
->sk
; 1358 amount
= sk
->sndbuf
-atomic_read(&sk
->wmem_alloc
); 1361 returnput_user(amount
, (int*)arg
); 1364 struct sk_buff
*skb
; 1365 if(sk
->state
==TCP_LISTEN
) 1368 * These two are safe on current systems as 1369 * only user tasks fiddle here 1371 if((skb
=skb_peek(&sk
->receive_queue
))!=NULL
) 1373 returnput_user(amount
, (int*)arg
); 1383 #ifdef CONFIG_PROC_FS 1384 static intunix_read_proc(char*buffer
,char**start
, off_t offset
, 1385 int length
,int*eof
,void*data
) 1393 len
+=sprintf(buffer
,"Num RefCount Protocol Flags Type St " 1396 forall_unix_sockets(i
,s
) 1398 len
+=sprintf(buffer
+len
,"%p: %08X %08X %08lX %04X %02X %5ld", 1402 s
->socket
? s
->socket
->flags
:0, 1404 s
->socket
? s
->socket
->state
:0, 1405 s
->socket
? s
->socket
->inode
->i_ino
:0); 1407 if(s
->protinfo
.af_unix
.addr
) 1410 memcpy(buffer
+len
, s
->protinfo
.af_unix
.addr
->name
->sun_path
, 1411 s
->protinfo
.af_unix
.addr
->len
-sizeof(short)); 1412 if(!UNIX_ABSTRACT(s
)) 1416 len
+= s
->protinfo
.af_unix
.addr
->len
-sizeof(short); 1426 if(pos
>offset
+length
) 1431 *start
=buffer
+(offset
-begin
); 1432 len
-=(offset
-begin
); 1439 struct proto_ops unix_stream_ops
= { 1445 unix_stream_connect
, 1456 unix_stream_sendmsg
, 1460 struct proto_ops unix_dgram_ops
= { 1481 struct net_proto_family unix_family_ops
= { 1487 #ifdef CONFIG_SYSCTL 1488 externvoidunix_sysctl_register(void); 1489 externvoidunix_sysctl_unregister(void); 1492 intinit_module(void) 1494 __initfunc(voidunix_proto_init(struct net_proto
*pro
)) 1497 struct sk_buff
*dummy_skb
; 1498 struct proc_dir_entry
*ent
; 1500 printk(KERN_INFO
"NET3: Unix domain sockets 0.16 for Linux NET3.038.\n"); 1501 if(sizeof(struct unix_skb_parms
) >sizeof(dummy_skb
->cb
)) 1503 printk(KERN_CRIT
"unix_proto_init: panic\n"); 1510 sock_register(&unix_family_ops
); 1511 #ifdef CONFIG_PROC_FS 1512 ent
=create_proc_entry("net/unix",0,0); 1513 ent
->read_proc
= unix_read_proc
; 1517 #ifdef CONFIG_SYSCTL 1518 unix_sysctl_register(); 1526 voidcleanup_module(void) 1528 sock_unregister(AF_UNIX
); 1529 #ifdef CONFIG_SYSCTL 1530 unix_sysctl_unregister(); 1537 * compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c"