Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
[linux-2.6] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/sock.h>
107 #include <net/tcp_states.h>
108 #include <net/af_unix.h>
109 #include <linux/proc_fs.h>
110 #include <linux/seq_file.h>
111 #include <net/scm.h>
112 #include <linux/init.h>
113 #include <linux/poll.h>
114 #include <linux/smp_lock.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119
120 int sysctl_unix_max_dgram_qlen = 10;
121
122 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
123 DEFINE_SPINLOCK(unix_table_lock);
124 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
125
126 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
127
128 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
129
130 #ifdef CONFIG_SECURITY_NETWORK
131 static void unix_get_peersec_dgram(struct sk_buff *skb)
132 {
133         int err;
134
135         err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb),
136                                                UNIXSECLEN(skb));
137         if (err)
138                 *(UNIXSECDATA(skb)) = NULL;
139 }
140
141 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 {
143         scm->secdata = *UNIXSECDATA(skb);
144         scm->seclen = *UNIXSECLEN(skb);
145 }
146 #else
147 static void unix_get_peersec_dgram(struct sk_buff *skb)
148 { }
149
150 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
151 { }
152 #endif /* CONFIG_SECURITY_NETWORK */
153
154 /*
155  *  SMP locking strategy:
156  *    hash table is protected with spinlock unix_table_lock
157  *    each socket state is protected by separate rwlock.
158  */
159
160 static inline unsigned unix_hash_fold(unsigned hash)
161 {
162         hash ^= hash>>16;
163         hash ^= hash>>8;
164         return hash&(UNIX_HASH_SIZE-1);
165 }
166
167 #define unix_peer(sk) (unix_sk(sk)->peer)
168
169 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
170 {
171         return unix_peer(osk) == sk;
172 }
173
174 static inline int unix_may_send(struct sock *sk, struct sock *osk)
175 {
176         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
177 }
178
179 static struct sock *unix_peer_get(struct sock *s)
180 {
181         struct sock *peer;
182
183         unix_state_rlock(s);
184         peer = unix_peer(s);
185         if (peer)
186                 sock_hold(peer);
187         unix_state_runlock(s);
188         return peer;
189 }
190
191 static inline void unix_release_addr(struct unix_address *addr)
192 {
193         if (atomic_dec_and_test(&addr->refcnt))
194                 kfree(addr);
195 }
196
197 /*
198  *      Check unix socket name:
199  *              - should be not zero length.
200  *              - if started by not zero, should be NULL terminated (FS object)
201  *              - if started by zero, it is abstract name.
202  */
203  
204 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
205 {
206         if (len <= sizeof(short) || len > sizeof(*sunaddr))
207                 return -EINVAL;
208         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
209                 return -EINVAL;
210         if (sunaddr->sun_path[0]) {
211                 /*
212                  * This may look like an off by one error but it is a bit more
213                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
214                  * sun_path[108] doesnt as such exist.  However in kernel space
215                  * we are guaranteed that it is a valid memory location in our
216                  * kernel address buffer.
217                  */
218                 ((char *)sunaddr)[len]=0;
219                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
220                 return len;
221         }
222
223         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
224         return len;
225 }
226
227 static void __unix_remove_socket(struct sock *sk)
228 {
229         sk_del_node_init(sk);
230 }
231
232 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
233 {
234         BUG_TRAP(sk_unhashed(sk));
235         sk_add_node(sk, list);
236 }
237
238 static inline void unix_remove_socket(struct sock *sk)
239 {
240         spin_lock(&unix_table_lock);
241         __unix_remove_socket(sk);
242         spin_unlock(&unix_table_lock);
243 }
244
245 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
246 {
247         spin_lock(&unix_table_lock);
248         __unix_insert_socket(list, sk);
249         spin_unlock(&unix_table_lock);
250 }
251
252 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
253                                               int len, int type, unsigned hash)
254 {
255         struct sock *s;
256         struct hlist_node *node;
257
258         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
259                 struct unix_sock *u = unix_sk(s);
260
261                 if (u->addr->len == len &&
262                     !memcmp(u->addr->name, sunname, len))
263                         goto found;
264         }
265         s = NULL;
266 found:
267         return s;
268 }
269
270 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
271                                                    int len, int type,
272                                                    unsigned hash)
273 {
274         struct sock *s;
275
276         spin_lock(&unix_table_lock);
277         s = __unix_find_socket_byname(sunname, len, type, hash);
278         if (s)
279                 sock_hold(s);
280         spin_unlock(&unix_table_lock);
281         return s;
282 }
283
284 static struct sock *unix_find_socket_byinode(struct inode *i)
285 {
286         struct sock *s;
287         struct hlist_node *node;
288
289         spin_lock(&unix_table_lock);
290         sk_for_each(s, node,
291                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
292                 struct dentry *dentry = unix_sk(s)->dentry;
293
294                 if(dentry && dentry->d_inode == i)
295                 {
296                         sock_hold(s);
297                         goto found;
298                 }
299         }
300         s = NULL;
301 found:
302         spin_unlock(&unix_table_lock);
303         return s;
304 }
305
306 static inline int unix_writable(struct sock *sk)
307 {
308         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
309 }
310
311 static void unix_write_space(struct sock *sk)
312 {
313         read_lock(&sk->sk_callback_lock);
314         if (unix_writable(sk)) {
315                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
316                         wake_up_interruptible(sk->sk_sleep);
317                 sk_wake_async(sk, 2, POLL_OUT);
318         }
319         read_unlock(&sk->sk_callback_lock);
320 }
321
322 /* When dgram socket disconnects (or changes its peer), we clear its receive
323  * queue of packets arrived from previous peer. First, it allows to do
324  * flow control based only on wmem_alloc; second, sk connected to peer
325  * may receive messages only from that peer. */
326 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
327 {
328         if (!skb_queue_empty(&sk->sk_receive_queue)) {
329                 skb_queue_purge(&sk->sk_receive_queue);
330                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
331
332                 /* If one link of bidirectional dgram pipe is disconnected,
333                  * we signal error. Messages are lost. Do not make this,
334                  * when peer was not connected to us.
335                  */
336                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
337                         other->sk_err = ECONNRESET;
338                         other->sk_error_report(other);
339                 }
340         }
341 }
342
343 static void unix_sock_destructor(struct sock *sk)
344 {
345         struct unix_sock *u = unix_sk(sk);
346
347         skb_queue_purge(&sk->sk_receive_queue);
348
349         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
350         BUG_TRAP(sk_unhashed(sk));
351         BUG_TRAP(!sk->sk_socket);
352         if (!sock_flag(sk, SOCK_DEAD)) {
353                 printk("Attempt to release alive unix socket: %p\n", sk);
354                 return;
355         }
356
357         if (u->addr)
358                 unix_release_addr(u->addr);
359
360         atomic_dec(&unix_nr_socks);
361 #ifdef UNIX_REFCNT_DEBUG
362         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
363 #endif
364 }
365
366 static int unix_release_sock (struct sock *sk, int embrion)
367 {
368         struct unix_sock *u = unix_sk(sk);
369         struct dentry *dentry;
370         struct vfsmount *mnt;
371         struct sock *skpair;
372         struct sk_buff *skb;
373         int state;
374
375         unix_remove_socket(sk);
376
377         /* Clear state */
378         unix_state_wlock(sk);
379         sock_orphan(sk);
380         sk->sk_shutdown = SHUTDOWN_MASK;
381         dentry       = u->dentry;
382         u->dentry    = NULL;
383         mnt          = u->mnt;
384         u->mnt       = NULL;
385         state = sk->sk_state;
386         sk->sk_state = TCP_CLOSE;
387         unix_state_wunlock(sk);
388
389         wake_up_interruptible_all(&u->peer_wait);
390
391         skpair=unix_peer(sk);
392
393         if (skpair!=NULL) {
394                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
395                         unix_state_wlock(skpair);
396                         /* No more writes */
397                         skpair->sk_shutdown = SHUTDOWN_MASK;
398                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
399                                 skpair->sk_err = ECONNRESET;
400                         unix_state_wunlock(skpair);
401                         skpair->sk_state_change(skpair);
402                         read_lock(&skpair->sk_callback_lock);
403                         sk_wake_async(skpair,1,POLL_HUP);
404                         read_unlock(&skpair->sk_callback_lock);
405                 }
406                 sock_put(skpair); /* It may now die */
407                 unix_peer(sk) = NULL;
408         }
409
410         /* Try to flush out this socket. Throw out buffers at least */
411
412         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
413                 if (state==TCP_LISTEN)
414                         unix_release_sock(skb->sk, 1);
415                 /* passed fds are erased in the kfree_skb hook        */
416                 kfree_skb(skb);
417         }
418
419         if (dentry) {
420                 dput(dentry);
421                 mntput(mnt);
422         }
423
424         sock_put(sk);
425
426         /* ---- Socket is dead now and most probably destroyed ---- */
427
428         /*
429          * Fixme: BSD difference: In BSD all sockets connected to use get
430          *        ECONNRESET and we die on the spot. In Linux we behave
431          *        like files and pipes do and wait for the last
432          *        dereference.
433          *
434          * Can't we simply set sock->err?
435          *
436          *        What the above comment does talk about? --ANK(980817)
437          */
438
439         if (atomic_read(&unix_tot_inflight))
440                 unix_gc();              /* Garbage collect fds */       
441
442         return 0;
443 }
444
445 static int unix_listen(struct socket *sock, int backlog)
446 {
447         int err;
448         struct sock *sk = sock->sk;
449         struct unix_sock *u = unix_sk(sk);
450
451         err = -EOPNOTSUPP;
452         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
453                 goto out;                       /* Only stream/seqpacket sockets accept */
454         err = -EINVAL;
455         if (!u->addr)
456                 goto out;                       /* No listens on an unbound socket */
457         unix_state_wlock(sk);
458         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
459                 goto out_unlock;
460         if (backlog > sk->sk_max_ack_backlog)
461                 wake_up_interruptible_all(&u->peer_wait);
462         sk->sk_max_ack_backlog  = backlog;
463         sk->sk_state            = TCP_LISTEN;
464         /* set credentials so connect can copy them */
465         sk->sk_peercred.pid     = current->tgid;
466         sk->sk_peercred.uid     = current->euid;
467         sk->sk_peercred.gid     = current->egid;
468         err = 0;
469
470 out_unlock:
471         unix_state_wunlock(sk);
472 out:
473         return err;
474 }
475
476 static int unix_release(struct socket *);
477 static int unix_bind(struct socket *, struct sockaddr *, int);
478 static int unix_stream_connect(struct socket *, struct sockaddr *,
479                                int addr_len, int flags);
480 static int unix_socketpair(struct socket *, struct socket *);
481 static int unix_accept(struct socket *, struct socket *, int);
482 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
483 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
484 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
485 static int unix_shutdown(struct socket *, int);
486 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
487                                struct msghdr *, size_t);
488 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
489                                struct msghdr *, size_t, int);
490 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
491                               struct msghdr *, size_t);
492 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
493                               struct msghdr *, size_t, int);
494 static int unix_dgram_connect(struct socket *, struct sockaddr *,
495                               int, int);
496 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
497                                   struct msghdr *, size_t);
498
499 static const struct proto_ops unix_stream_ops = {
500         .family =       PF_UNIX,
501         .owner =        THIS_MODULE,
502         .release =      unix_release,
503         .bind =         unix_bind,
504         .connect =      unix_stream_connect,
505         .socketpair =   unix_socketpair,
506         .accept =       unix_accept,
507         .getname =      unix_getname,
508         .poll =         unix_poll,
509         .ioctl =        unix_ioctl,
510         .listen =       unix_listen,
511         .shutdown =     unix_shutdown,
512         .setsockopt =   sock_no_setsockopt,
513         .getsockopt =   sock_no_getsockopt,
514         .sendmsg =      unix_stream_sendmsg,
515         .recvmsg =      unix_stream_recvmsg,
516         .mmap =         sock_no_mmap,
517         .sendpage =     sock_no_sendpage,
518 };
519
520 static const struct proto_ops unix_dgram_ops = {
521         .family =       PF_UNIX,
522         .owner =        THIS_MODULE,
523         .release =      unix_release,
524         .bind =         unix_bind,
525         .connect =      unix_dgram_connect,
526         .socketpair =   unix_socketpair,
527         .accept =       sock_no_accept,
528         .getname =      unix_getname,
529         .poll =         datagram_poll,
530         .ioctl =        unix_ioctl,
531         .listen =       sock_no_listen,
532         .shutdown =     unix_shutdown,
533         .setsockopt =   sock_no_setsockopt,
534         .getsockopt =   sock_no_getsockopt,
535         .sendmsg =      unix_dgram_sendmsg,
536         .recvmsg =      unix_dgram_recvmsg,
537         .mmap =         sock_no_mmap,
538         .sendpage =     sock_no_sendpage,
539 };
540
541 static const struct proto_ops unix_seqpacket_ops = {
542         .family =       PF_UNIX,
543         .owner =        THIS_MODULE,
544         .release =      unix_release,
545         .bind =         unix_bind,
546         .connect =      unix_stream_connect,
547         .socketpair =   unix_socketpair,
548         .accept =       unix_accept,
549         .getname =      unix_getname,
550         .poll =         datagram_poll,
551         .ioctl =        unix_ioctl,
552         .listen =       unix_listen,
553         .shutdown =     unix_shutdown,
554         .setsockopt =   sock_no_setsockopt,
555         .getsockopt =   sock_no_getsockopt,
556         .sendmsg =      unix_seqpacket_sendmsg,
557         .recvmsg =      unix_dgram_recvmsg,
558         .mmap =         sock_no_mmap,
559         .sendpage =     sock_no_sendpage,
560 };
561
562 static struct proto unix_proto = {
563         .name     = "UNIX",
564         .owner    = THIS_MODULE,
565         .obj_size = sizeof(struct unix_sock),
566 };
567
568 static struct sock * unix_create1(struct socket *sock)
569 {
570         struct sock *sk = NULL;
571         struct unix_sock *u;
572
573         if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
574                 goto out;
575
576         sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
577         if (!sk)
578                 goto out;
579
580         atomic_inc(&unix_nr_socks);
581
582         sock_init_data(sock,sk);
583
584         sk->sk_write_space      = unix_write_space;
585         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
586         sk->sk_destruct         = unix_sock_destructor;
587         u         = unix_sk(sk);
588         u->dentry = NULL;
589         u->mnt    = NULL;
590         spin_lock_init(&u->lock);
591         atomic_set(&u->inflight, sock ? 0 : -1);
592         mutex_init(&u->readlock); /* single task reading lock */
593         init_waitqueue_head(&u->peer_wait);
594         unix_insert_socket(unix_sockets_unbound, sk);
595 out:
596         return sk;
597 }
598
599 static int unix_create(struct socket *sock, int protocol)
600 {
601         if (protocol && protocol != PF_UNIX)
602                 return -EPROTONOSUPPORT;
603
604         sock->state = SS_UNCONNECTED;
605
606         switch (sock->type) {
607         case SOCK_STREAM:
608                 sock->ops = &unix_stream_ops;
609                 break;
610                 /*
611                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
612                  *      nothing uses it.
613                  */
614         case SOCK_RAW:
615                 sock->type=SOCK_DGRAM;
616         case SOCK_DGRAM:
617                 sock->ops = &unix_dgram_ops;
618                 break;
619         case SOCK_SEQPACKET:
620                 sock->ops = &unix_seqpacket_ops;
621                 break;
622         default:
623                 return -ESOCKTNOSUPPORT;
624         }
625
626         return unix_create1(sock) ? 0 : -ENOMEM;
627 }
628
629 static int unix_release(struct socket *sock)
630 {
631         struct sock *sk = sock->sk;
632
633         if (!sk)
634                 return 0;
635
636         sock->sk = NULL;
637
638         return unix_release_sock (sk, 0);
639 }
640
641 static int unix_autobind(struct socket *sock)
642 {
643         struct sock *sk = sock->sk;
644         struct unix_sock *u = unix_sk(sk);
645         static u32 ordernum = 1;
646         struct unix_address * addr;
647         int err;
648
649         mutex_lock(&u->readlock);
650
651         err = 0;
652         if (u->addr)
653                 goto out;
654
655         err = -ENOMEM;
656         addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
657         if (!addr)
658                 goto out;
659
660         memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
661         addr->name->sun_family = AF_UNIX;
662         atomic_set(&addr->refcnt, 1);
663
664 retry:
665         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
666         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
667
668         spin_lock(&unix_table_lock);
669         ordernum = (ordernum+1)&0xFFFFF;
670
671         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
672                                       addr->hash)) {
673                 spin_unlock(&unix_table_lock);
674                 /* Sanity yield. It is unusual case, but yet... */
675                 if (!(ordernum&0xFF))
676                         yield();
677                 goto retry;
678         }
679         addr->hash ^= sk->sk_type;
680
681         __unix_remove_socket(sk);
682         u->addr = addr;
683         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
684         spin_unlock(&unix_table_lock);
685         err = 0;
686
687 out:    mutex_unlock(&u->readlock);
688         return err;
689 }
690
691 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
692                                     int type, unsigned hash, int *error)
693 {
694         struct sock *u;
695         struct nameidata nd;
696         int err = 0;
697         
698         if (sunname->sun_path[0]) {
699                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
700                 if (err)
701                         goto fail;
702                 err = vfs_permission(&nd, MAY_WRITE);
703                 if (err)
704                         goto put_fail;
705
706                 err = -ECONNREFUSED;
707                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
708                         goto put_fail;
709                 u=unix_find_socket_byinode(nd.dentry->d_inode);
710                 if (!u)
711                         goto put_fail;
712
713                 if (u->sk_type == type)
714                         touch_atime(nd.mnt, nd.dentry);
715
716                 path_release(&nd);
717
718                 err=-EPROTOTYPE;
719                 if (u->sk_type != type) {
720                         sock_put(u);
721                         goto fail;
722                 }
723         } else {
724                 err = -ECONNREFUSED;
725                 u=unix_find_socket_byname(sunname, len, type, hash);
726                 if (u) {
727                         struct dentry *dentry;
728                         dentry = unix_sk(u)->dentry;
729                         if (dentry)
730                                 touch_atime(unix_sk(u)->mnt, dentry);
731                 } else
732                         goto fail;
733         }
734         return u;
735
736 put_fail:
737         path_release(&nd);
738 fail:
739         *error=err;
740         return NULL;
741 }
742
743
744 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
745 {
746         struct sock *sk = sock->sk;
747         struct unix_sock *u = unix_sk(sk);
748         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
749         struct dentry * dentry = NULL;
750         struct nameidata nd;
751         int err;
752         unsigned hash;
753         struct unix_address *addr;
754         struct hlist_head *list;
755
756         err = -EINVAL;
757         if (sunaddr->sun_family != AF_UNIX)
758                 goto out;
759
760         if (addr_len==sizeof(short)) {
761                 err = unix_autobind(sock);
762                 goto out;
763         }
764
765         err = unix_mkname(sunaddr, addr_len, &hash);
766         if (err < 0)
767                 goto out;
768         addr_len = err;
769
770         mutex_lock(&u->readlock);
771
772         err = -EINVAL;
773         if (u->addr)
774                 goto out_up;
775
776         err = -ENOMEM;
777         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
778         if (!addr)
779                 goto out_up;
780
781         memcpy(addr->name, sunaddr, addr_len);
782         addr->len = addr_len;
783         addr->hash = hash ^ sk->sk_type;
784         atomic_set(&addr->refcnt, 1);
785
786         if (sunaddr->sun_path[0]) {
787                 unsigned int mode;
788                 err = 0;
789                 /*
790                  * Get the parent directory, calculate the hash for last
791                  * component.
792                  */
793                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
794                 if (err)
795                         goto out_mknod_parent;
796
797                 dentry = lookup_create(&nd, 0);
798                 err = PTR_ERR(dentry);
799                 if (IS_ERR(dentry))
800                         goto out_mknod_unlock;
801
802                 /*
803                  * All right, let's create it.
804                  */
805                 mode = S_IFSOCK |
806                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
807                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
808                 if (err)
809                         goto out_mknod_dput;
810                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
811                 dput(nd.dentry);
812                 nd.dentry = dentry;
813
814                 addr->hash = UNIX_HASH_SIZE;
815         }
816
817         spin_lock(&unix_table_lock);
818
819         if (!sunaddr->sun_path[0]) {
820                 err = -EADDRINUSE;
821                 if (__unix_find_socket_byname(sunaddr, addr_len,
822                                               sk->sk_type, hash)) {
823                         unix_release_addr(addr);
824                         goto out_unlock;
825                 }
826
827                 list = &unix_socket_table[addr->hash];
828         } else {
829                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
830                 u->dentry = nd.dentry;
831                 u->mnt    = nd.mnt;
832         }
833
834         err = 0;
835         __unix_remove_socket(sk);
836         u->addr = addr;
837         __unix_insert_socket(list, sk);
838
839 out_unlock:
840         spin_unlock(&unix_table_lock);
841 out_up:
842         mutex_unlock(&u->readlock);
843 out:
844         return err;
845
846 out_mknod_dput:
847         dput(dentry);
848 out_mknod_unlock:
849         mutex_unlock(&nd.dentry->d_inode->i_mutex);
850         path_release(&nd);
851 out_mknod_parent:
852         if (err==-EEXIST)
853                 err=-EADDRINUSE;
854         unix_release_addr(addr);
855         goto out_up;
856 }
857
858 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
859                               int alen, int flags)
860 {
861         struct sock *sk = sock->sk;
862         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
863         struct sock *other;
864         unsigned hash;
865         int err;
866
867         if (addr->sa_family != AF_UNSPEC) {
868                 err = unix_mkname(sunaddr, alen, &hash);
869                 if (err < 0)
870                         goto out;
871                 alen = err;
872
873                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
874                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
875                         goto out;
876
877                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
878                 if (!other)
879                         goto out;
880
881                 unix_state_wlock(sk);
882
883                 err = -EPERM;
884                 if (!unix_may_send(sk, other))
885                         goto out_unlock;
886
887                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
888                 if (err)
889                         goto out_unlock;
890
891         } else {
892                 /*
893                  *      1003.1g breaking connected state with AF_UNSPEC
894                  */
895                 other = NULL;
896                 unix_state_wlock(sk);
897         }
898
899         /*
900          * If it was connected, reconnect.
901          */
902         if (unix_peer(sk)) {
903                 struct sock *old_peer = unix_peer(sk);
904                 unix_peer(sk)=other;
905                 unix_state_wunlock(sk);
906
907                 if (other != old_peer)
908                         unix_dgram_disconnected(sk, old_peer);
909                 sock_put(old_peer);
910         } else {
911                 unix_peer(sk)=other;
912                 unix_state_wunlock(sk);
913         }
914         return 0;
915
916 out_unlock:
917         unix_state_wunlock(sk);
918         sock_put(other);
919 out:
920         return err;
921 }
922
923 static long unix_wait_for_peer(struct sock *other, long timeo)
924 {
925         struct unix_sock *u = unix_sk(other);
926         int sched;
927         DEFINE_WAIT(wait);
928
929         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
930
931         sched = !sock_flag(other, SOCK_DEAD) &&
932                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
933                 (skb_queue_len(&other->sk_receive_queue) >
934                  other->sk_max_ack_backlog);
935
936         unix_state_runlock(other);
937
938         if (sched)
939                 timeo = schedule_timeout(timeo);
940
941         finish_wait(&u->peer_wait, &wait);
942         return timeo;
943 }
944
945 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
946                                int addr_len, int flags)
947 {
948         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
949         struct sock *sk = sock->sk;
950         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
951         struct sock *newsk = NULL;
952         struct sock *other = NULL;
953         struct sk_buff *skb = NULL;
954         unsigned hash;
955         int st;
956         int err;
957         long timeo;
958
959         err = unix_mkname(sunaddr, addr_len, &hash);
960         if (err < 0)
961                 goto out;
962         addr_len = err;
963
964         if (test_bit(SOCK_PASSCRED, &sock->flags)
965                 && !u->addr && (err = unix_autobind(sock)) != 0)
966                 goto out;
967
968         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
969
970         /* First of all allocate resources.
971            If we will make it after state is locked,
972            we will have to recheck all again in any case.
973          */
974
975         err = -ENOMEM;
976
977         /* create new sock for complete connection */
978         newsk = unix_create1(NULL);
979         if (newsk == NULL)
980                 goto out;
981
982         /* Allocate skb for sending to listening sock */
983         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
984         if (skb == NULL)
985                 goto out;
986
987 restart:
988         /*  Find listening sock. */
989         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
990         if (!other)
991                 goto out;
992
993         /* Latch state of peer */
994         unix_state_rlock(other);
995
996         /* Apparently VFS overslept socket death. Retry. */
997         if (sock_flag(other, SOCK_DEAD)) {
998                 unix_state_runlock(other);
999                 sock_put(other);
1000                 goto restart;
1001         }
1002
1003         err = -ECONNREFUSED;
1004         if (other->sk_state != TCP_LISTEN)
1005                 goto out_unlock;
1006
1007         if (skb_queue_len(&other->sk_receive_queue) >
1008             other->sk_max_ack_backlog) {
1009                 err = -EAGAIN;
1010                 if (!timeo)
1011                         goto out_unlock;
1012
1013                 timeo = unix_wait_for_peer(other, timeo);
1014
1015                 err = sock_intr_errno(timeo);
1016                 if (signal_pending(current))
1017                         goto out;
1018                 sock_put(other);
1019                 goto restart;
1020         }
1021
1022         /* Latch our state.
1023
1024            It is tricky place. We need to grab write lock and cannot
1025            drop lock on peer. It is dangerous because deadlock is
1026            possible. Connect to self case and simultaneous
1027            attempt to connect are eliminated by checking socket
1028            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1029            check this before attempt to grab lock.
1030
1031            Well, and we have to recheck the state after socket locked.
1032          */
1033         st = sk->sk_state;
1034
1035         switch (st) {
1036         case TCP_CLOSE:
1037                 /* This is ok... continue with connect */
1038                 break;
1039         case TCP_ESTABLISHED:
1040                 /* Socket is already connected */
1041                 err = -EISCONN;
1042                 goto out_unlock;
1043         default:
1044                 err = -EINVAL;
1045                 goto out_unlock;
1046         }
1047
1048         unix_state_wlock(sk);
1049
1050         if (sk->sk_state != st) {
1051                 unix_state_wunlock(sk);
1052                 unix_state_runlock(other);
1053                 sock_put(other);
1054                 goto restart;
1055         }
1056
1057         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1058         if (err) {
1059                 unix_state_wunlock(sk);
1060                 goto out_unlock;
1061         }
1062
1063         /* The way is open! Fastly set all the necessary fields... */
1064
1065         sock_hold(sk);
1066         unix_peer(newsk)        = sk;
1067         newsk->sk_state         = TCP_ESTABLISHED;
1068         newsk->sk_type          = sk->sk_type;
1069         newsk->sk_peercred.pid  = current->tgid;
1070         newsk->sk_peercred.uid  = current->euid;
1071         newsk->sk_peercred.gid  = current->egid;
1072         newu = unix_sk(newsk);
1073         newsk->sk_sleep         = &newu->peer_wait;
1074         otheru = unix_sk(other);
1075
1076         /* copy address information from listening to new sock*/
1077         if (otheru->addr) {
1078                 atomic_inc(&otheru->addr->refcnt);
1079                 newu->addr = otheru->addr;
1080         }
1081         if (otheru->dentry) {
1082                 newu->dentry    = dget(otheru->dentry);
1083                 newu->mnt       = mntget(otheru->mnt);
1084         }
1085
1086         /* Set credentials */
1087         sk->sk_peercred = other->sk_peercred;
1088
1089         sock->state     = SS_CONNECTED;
1090         sk->sk_state    = TCP_ESTABLISHED;
1091         sock_hold(newsk);
1092
1093         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1094         unix_peer(sk)   = newsk;
1095
1096         unix_state_wunlock(sk);
1097
1098         /* take ten and and send info to listening sock */
1099         spin_lock(&other->sk_receive_queue.lock);
1100         __skb_queue_tail(&other->sk_receive_queue, skb);
1101         /* Undo artificially decreased inflight after embrion
1102          * is installed to listening socket. */
1103         atomic_inc(&newu->inflight);
1104         spin_unlock(&other->sk_receive_queue.lock);
1105         unix_state_runlock(other);
1106         other->sk_data_ready(other, 0);
1107         sock_put(other);
1108         return 0;
1109
1110 out_unlock:
1111         if (other)
1112                 unix_state_runlock(other);
1113
1114 out:
1115         if (skb)
1116                 kfree_skb(skb);
1117         if (newsk)
1118                 unix_release_sock(newsk, 0);
1119         if (other)
1120                 sock_put(other);
1121         return err;
1122 }
1123
1124 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1125 {
1126         struct sock *ska=socka->sk, *skb = sockb->sk;
1127
1128         /* Join our sockets back to back */
1129         sock_hold(ska);
1130         sock_hold(skb);
1131         unix_peer(ska)=skb;
1132         unix_peer(skb)=ska;
1133         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1134         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1135         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1136
1137         if (ska->sk_type != SOCK_DGRAM) {
1138                 ska->sk_state = TCP_ESTABLISHED;
1139                 skb->sk_state = TCP_ESTABLISHED;
1140                 socka->state  = SS_CONNECTED;
1141                 sockb->state  = SS_CONNECTED;
1142         }
1143         return 0;
1144 }
1145
1146 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1147 {
1148         struct sock *sk = sock->sk;
1149         struct sock *tsk;
1150         struct sk_buff *skb;
1151         int err;
1152
1153         err = -EOPNOTSUPP;
1154         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1155                 goto out;
1156
1157         err = -EINVAL;
1158         if (sk->sk_state != TCP_LISTEN)
1159                 goto out;
1160
1161         /* If socket state is TCP_LISTEN it cannot change (for now...),
1162          * so that no locks are necessary.
1163          */
1164
1165         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1166         if (!skb) {
1167                 /* This means receive shutdown. */
1168                 if (err == 0)
1169                         err = -EINVAL;
1170                 goto out;
1171         }
1172
1173         tsk = skb->sk;
1174         skb_free_datagram(sk, skb);
1175         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1176
1177         /* attach accepted sock to socket */
1178         unix_state_wlock(tsk);
1179         newsock->state = SS_CONNECTED;
1180         sock_graft(tsk, newsock);
1181         unix_state_wunlock(tsk);
1182         return 0;
1183
1184 out:
1185         return err;
1186 }
1187
1188
1189 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1190 {
1191         struct sock *sk = sock->sk;
1192         struct unix_sock *u;
1193         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1194         int err = 0;
1195
1196         if (peer) {
1197                 sk = unix_peer_get(sk);
1198
1199                 err = -ENOTCONN;
1200                 if (!sk)
1201                         goto out;
1202                 err = 0;
1203         } else {
1204                 sock_hold(sk);
1205         }
1206
1207         u = unix_sk(sk);
1208         unix_state_rlock(sk);
1209         if (!u->addr) {
1210                 sunaddr->sun_family = AF_UNIX;
1211                 sunaddr->sun_path[0] = 0;
1212                 *uaddr_len = sizeof(short);
1213         } else {
1214                 struct unix_address *addr = u->addr;
1215
1216                 *uaddr_len = addr->len;
1217                 memcpy(sunaddr, addr->name, *uaddr_len);
1218         }
1219         unix_state_runlock(sk);
1220         sock_put(sk);
1221 out:
1222         return err;
1223 }
1224
1225 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1226 {
1227         int i;
1228
1229         scm->fp = UNIXCB(skb).fp;
1230         skb->destructor = sock_wfree;
1231         UNIXCB(skb).fp = NULL;
1232
1233         for (i=scm->fp->count-1; i>=0; i--)
1234                 unix_notinflight(scm->fp->fp[i]);
1235 }
1236
1237 static void unix_destruct_fds(struct sk_buff *skb)
1238 {
1239         struct scm_cookie scm;
1240         memset(&scm, 0, sizeof(scm));
1241         unix_detach_fds(&scm, skb);
1242
1243         /* Alas, it calls VFS */
1244         /* So fscking what? fput() had been SMP-safe since the last Summer */
1245         scm_destroy(&scm);
1246         sock_wfree(skb);
1247 }
1248
1249 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1250 {
1251         int i;
1252         for (i=scm->fp->count-1; i>=0; i--)
1253                 unix_inflight(scm->fp->fp[i]);
1254         UNIXCB(skb).fp = scm->fp;
1255         skb->destructor = unix_destruct_fds;
1256         scm->fp = NULL;
1257 }
1258
1259 /*
1260  *      Send AF_UNIX data.
1261  */
1262
1263 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1264                               struct msghdr *msg, size_t len)
1265 {
1266         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1267         struct sock *sk = sock->sk;
1268         struct unix_sock *u = unix_sk(sk);
1269         struct sockaddr_un *sunaddr=msg->msg_name;
1270         struct sock *other = NULL;
1271         int namelen = 0; /* fake GCC */
1272         int err;
1273         unsigned hash;
1274         struct sk_buff *skb;
1275         long timeo;
1276         struct scm_cookie tmp_scm;
1277
1278         if (NULL == siocb->scm)
1279                 siocb->scm = &tmp_scm;
1280         err = scm_send(sock, msg, siocb->scm);
1281         if (err < 0)
1282                 return err;
1283
1284         err = -EOPNOTSUPP;
1285         if (msg->msg_flags&MSG_OOB)
1286                 goto out;
1287
1288         if (msg->msg_namelen) {
1289                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1290                 if (err < 0)
1291                         goto out;
1292                 namelen = err;
1293         } else {
1294                 sunaddr = NULL;
1295                 err = -ENOTCONN;
1296                 other = unix_peer_get(sk);
1297                 if (!other)
1298                         goto out;
1299         }
1300
1301         if (test_bit(SOCK_PASSCRED, &sock->flags)
1302                 && !u->addr && (err = unix_autobind(sock)) != 0)
1303                 goto out;
1304
1305         err = -EMSGSIZE;
1306         if (len > sk->sk_sndbuf - 32)
1307                 goto out;
1308
1309         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1310         if (skb==NULL)
1311                 goto out;
1312
1313         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1314         if (siocb->scm->fp)
1315                 unix_attach_fds(siocb->scm, skb);
1316
1317         unix_get_peersec_dgram(skb);
1318
1319         skb->h.raw = skb->data;
1320         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1321         if (err)
1322                 goto out_free;
1323
1324         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1325
1326 restart:
1327         if (!other) {
1328                 err = -ECONNRESET;
1329                 if (sunaddr == NULL)
1330                         goto out_free;
1331
1332                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1333                                         hash, &err);
1334                 if (other==NULL)
1335                         goto out_free;
1336         }
1337
1338         unix_state_rlock(other);
1339         err = -EPERM;
1340         if (!unix_may_send(sk, other))
1341                 goto out_unlock;
1342
1343         if (sock_flag(other, SOCK_DEAD)) {
1344                 /*
1345                  *      Check with 1003.1g - what should
1346                  *      datagram error
1347                  */
1348                 unix_state_runlock(other);
1349                 sock_put(other);
1350
1351                 err = 0;
1352                 unix_state_wlock(sk);
1353                 if (unix_peer(sk) == other) {
1354                         unix_peer(sk)=NULL;
1355                         unix_state_wunlock(sk);
1356
1357                         unix_dgram_disconnected(sk, other);
1358                         sock_put(other);
1359                         err = -ECONNREFUSED;
1360                 } else {
1361                         unix_state_wunlock(sk);
1362                 }
1363
1364                 other = NULL;
1365                 if (err)
1366                         goto out_free;
1367                 goto restart;
1368         }
1369
1370         err = -EPIPE;
1371         if (other->sk_shutdown & RCV_SHUTDOWN)
1372                 goto out_unlock;
1373
1374         if (sk->sk_type != SOCK_SEQPACKET) {
1375                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1376                 if (err)
1377                         goto out_unlock;
1378         }
1379
1380         if (unix_peer(other) != sk &&
1381             (skb_queue_len(&other->sk_receive_queue) >
1382              other->sk_max_ack_backlog)) {
1383                 if (!timeo) {
1384                         err = -EAGAIN;
1385                         goto out_unlock;
1386                 }
1387
1388                 timeo = unix_wait_for_peer(other, timeo);
1389
1390                 err = sock_intr_errno(timeo);
1391                 if (signal_pending(current))
1392                         goto out_free;
1393
1394                 goto restart;
1395         }
1396
1397         skb_queue_tail(&other->sk_receive_queue, skb);
1398         unix_state_runlock(other);
1399         other->sk_data_ready(other, len);
1400         sock_put(other);
1401         scm_destroy(siocb->scm);
1402         return len;
1403
1404 out_unlock:
1405         unix_state_runlock(other);
1406 out_free:
1407         kfree_skb(skb);
1408 out:
1409         if (other)
1410                 sock_put(other);
1411         scm_destroy(siocb->scm);
1412         return err;
1413 }
1414
1415                 
1416 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1417                                struct msghdr *msg, size_t len)
1418 {
1419         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1420         struct sock *sk = sock->sk;
1421         struct sock *other = NULL;
1422         struct sockaddr_un *sunaddr=msg->msg_name;
1423         int err,size;
1424         struct sk_buff *skb;
1425         int sent=0;
1426         struct scm_cookie tmp_scm;
1427
1428         if (NULL == siocb->scm)
1429                 siocb->scm = &tmp_scm;
1430         err = scm_send(sock, msg, siocb->scm);
1431         if (err < 0)
1432                 return err;
1433
1434         err = -EOPNOTSUPP;
1435         if (msg->msg_flags&MSG_OOB)
1436                 goto out_err;
1437
1438         if (msg->msg_namelen) {
1439                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1440                 goto out_err;
1441         } else {
1442                 sunaddr = NULL;
1443                 err = -ENOTCONN;
1444                 other = unix_peer(sk);
1445                 if (!other)
1446                         goto out_err;
1447         }
1448
1449         if (sk->sk_shutdown & SEND_SHUTDOWN)
1450                 goto pipe_err;
1451
1452         while(sent < len)
1453         {
1454                 /*
1455                  *      Optimisation for the fact that under 0.01% of X
1456                  *      messages typically need breaking up.
1457                  */
1458
1459                 size = len-sent;
1460
1461                 /* Keep two messages in the pipe so it schedules better */
1462                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1463                         size = (sk->sk_sndbuf >> 1) - 64;
1464
1465                 if (size > SKB_MAX_ALLOC)
1466                         size = SKB_MAX_ALLOC;
1467                         
1468                 /*
1469                  *      Grab a buffer
1470                  */
1471                  
1472                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1473
1474                 if (skb==NULL)
1475                         goto out_err;
1476
1477                 /*
1478                  *      If you pass two values to the sock_alloc_send_skb
1479                  *      it tries to grab the large buffer with GFP_NOFS
1480                  *      (which can fail easily), and if it fails grab the
1481                  *      fallback size buffer which is under a page and will
1482                  *      succeed. [Alan]
1483                  */
1484                 size = min_t(int, size, skb_tailroom(skb));
1485
1486                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1487                 if (siocb->scm->fp)
1488                         unix_attach_fds(siocb->scm, skb);
1489
1490                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1491                         kfree_skb(skb);
1492                         goto out_err;
1493                 }
1494
1495                 unix_state_rlock(other);
1496
1497                 if (sock_flag(other, SOCK_DEAD) ||
1498                     (other->sk_shutdown & RCV_SHUTDOWN))
1499                         goto pipe_err_free;
1500
1501                 skb_queue_tail(&other->sk_receive_queue, skb);
1502                 unix_state_runlock(other);
1503                 other->sk_data_ready(other, size);
1504                 sent+=size;
1505         }
1506
1507         scm_destroy(siocb->scm);
1508         siocb->scm = NULL;
1509
1510         return sent;
1511
1512 pipe_err_free:
1513         unix_state_runlock(other);
1514         kfree_skb(skb);
1515 pipe_err:
1516         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1517                 send_sig(SIGPIPE,current,0);
1518         err = -EPIPE;
1519 out_err:
1520         scm_destroy(siocb->scm);
1521         siocb->scm = NULL;
1522         return sent ? : err;
1523 }
1524
1525 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1526                                   struct msghdr *msg, size_t len)
1527 {
1528         int err;
1529         struct sock *sk = sock->sk;
1530         
1531         err = sock_error(sk);
1532         if (err)
1533                 return err;
1534
1535         if (sk->sk_state != TCP_ESTABLISHED)
1536                 return -ENOTCONN;
1537
1538         if (msg->msg_namelen)
1539                 msg->msg_namelen = 0;
1540
1541         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1542 }
1543                                                                                             
1544 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1545 {
1546         struct unix_sock *u = unix_sk(sk);
1547
1548         msg->msg_namelen = 0;
1549         if (u->addr) {
1550                 msg->msg_namelen = u->addr->len;
1551                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1552         }
1553 }
1554
1555 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1556                               struct msghdr *msg, size_t size,
1557                               int flags)
1558 {
1559         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1560         struct scm_cookie tmp_scm;
1561         struct sock *sk = sock->sk;
1562         struct unix_sock *u = unix_sk(sk);
1563         int noblock = flags & MSG_DONTWAIT;
1564         struct sk_buff *skb;
1565         int err;
1566
1567         err = -EOPNOTSUPP;
1568         if (flags&MSG_OOB)
1569                 goto out;
1570
1571         msg->msg_namelen = 0;
1572
1573         mutex_lock(&u->readlock);
1574
1575         skb = skb_recv_datagram(sk, flags, noblock, &err);
1576         if (!skb)
1577                 goto out_unlock;
1578
1579         wake_up_interruptible(&u->peer_wait);
1580
1581         if (msg->msg_name)
1582                 unix_copy_addr(msg, skb->sk);
1583
1584         if (size > skb->len)
1585                 size = skb->len;
1586         else if (size < skb->len)
1587                 msg->msg_flags |= MSG_TRUNC;
1588
1589         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1590         if (err)
1591                 goto out_free;
1592
1593         if (!siocb->scm) {
1594                 siocb->scm = &tmp_scm;
1595                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1596         }
1597         siocb->scm->creds = *UNIXCREDS(skb);
1598         unix_set_secdata(siocb->scm, skb);
1599
1600         if (!(flags & MSG_PEEK))
1601         {
1602                 if (UNIXCB(skb).fp)
1603                         unix_detach_fds(siocb->scm, skb);
1604         }
1605         else 
1606         {
1607                 /* It is questionable: on PEEK we could:
1608                    - do not return fds - good, but too simple 8)
1609                    - return fds, and do not return them on read (old strategy,
1610                      apparently wrong)
1611                    - clone fds (I chose it for now, it is the most universal
1612                      solution)
1613                 
1614                    POSIX 1003.1g does not actually define this clearly
1615                    at all. POSIX 1003.1g doesn't define a lot of things
1616                    clearly however!                  
1617                    
1618                 */
1619                 if (UNIXCB(skb).fp)
1620                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1621         }
1622         err = size;
1623
1624         scm_recv(sock, msg, siocb->scm, flags);
1625
1626 out_free:
1627         skb_free_datagram(sk,skb);
1628 out_unlock:
1629         mutex_unlock(&u->readlock);
1630 out:
1631         return err;
1632 }
1633
1634 /*
1635  *      Sleep until data has arrive. But check for races..
1636  */
1637  
1638 static long unix_stream_data_wait(struct sock * sk, long timeo)
1639 {
1640         DEFINE_WAIT(wait);
1641
1642         unix_state_rlock(sk);
1643
1644         for (;;) {
1645                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1646
1647                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1648                     sk->sk_err ||
1649                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1650                     signal_pending(current) ||
1651                     !timeo)
1652                         break;
1653
1654                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1655                 unix_state_runlock(sk);
1656                 timeo = schedule_timeout(timeo);
1657                 unix_state_rlock(sk);
1658                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1659         }
1660
1661         finish_wait(sk->sk_sleep, &wait);
1662         unix_state_runlock(sk);
1663         return timeo;
1664 }
1665
1666
1667
1668 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1669                                struct msghdr *msg, size_t size,
1670                                int flags)
1671 {
1672         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1673         struct scm_cookie tmp_scm;
1674         struct sock *sk = sock->sk;
1675         struct unix_sock *u = unix_sk(sk);
1676         struct sockaddr_un *sunaddr=msg->msg_name;
1677         int copied = 0;
1678         int check_creds = 0;
1679         int target;
1680         int err = 0;
1681         long timeo;
1682
1683         err = -EINVAL;
1684         if (sk->sk_state != TCP_ESTABLISHED)
1685                 goto out;
1686
1687         err = -EOPNOTSUPP;
1688         if (flags&MSG_OOB)
1689                 goto out;
1690
1691         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1692         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1693
1694         msg->msg_namelen = 0;
1695
1696         /* Lock the socket to prevent queue disordering
1697          * while sleeps in memcpy_tomsg
1698          */
1699
1700         if (!siocb->scm) {
1701                 siocb->scm = &tmp_scm;
1702                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1703         }
1704
1705         mutex_lock(&u->readlock);
1706
1707         do
1708         {
1709                 int chunk;
1710                 struct sk_buff *skb;
1711
1712                 skb = skb_dequeue(&sk->sk_receive_queue);
1713                 if (skb==NULL)
1714                 {
1715                         if (copied >= target)
1716                                 break;
1717
1718                         /*
1719                          *      POSIX 1003.1g mandates this order.
1720                          */
1721                          
1722                         if ((err = sock_error(sk)) != 0)
1723                                 break;
1724                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1725                                 break;
1726                         err = -EAGAIN;
1727                         if (!timeo)
1728                                 break;
1729                         mutex_unlock(&u->readlock);
1730
1731                         timeo = unix_stream_data_wait(sk, timeo);
1732
1733                         if (signal_pending(current)) {
1734                                 err = sock_intr_errno(timeo);
1735                                 goto out;
1736                         }
1737                         mutex_lock(&u->readlock);
1738                         continue;
1739                 }
1740
1741                 if (check_creds) {
1742                         /* Never glue messages from different writers */
1743                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1744                                 skb_queue_head(&sk->sk_receive_queue, skb);
1745                                 break;
1746                         }
1747                 } else {
1748                         /* Copy credentials */
1749                         siocb->scm->creds = *UNIXCREDS(skb);
1750                         check_creds = 1;
1751                 }
1752
1753                 /* Copy address just once */
1754                 if (sunaddr)
1755                 {
1756                         unix_copy_addr(msg, skb->sk);
1757                         sunaddr = NULL;
1758                 }
1759
1760                 chunk = min_t(unsigned int, skb->len, size);
1761                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1762                         skb_queue_head(&sk->sk_receive_queue, skb);
1763                         if (copied == 0)
1764                                 copied = -EFAULT;
1765                         break;
1766                 }
1767                 copied += chunk;
1768                 size -= chunk;
1769
1770                 /* Mark read part of skb as used */
1771                 if (!(flags & MSG_PEEK))
1772                 {
1773                         skb_pull(skb, chunk);
1774
1775                         if (UNIXCB(skb).fp)
1776                                 unix_detach_fds(siocb->scm, skb);
1777
1778                         /* put the skb back if we didn't use it up.. */
1779                         if (skb->len)
1780                         {
1781                                 skb_queue_head(&sk->sk_receive_queue, skb);
1782                                 break;
1783                         }
1784
1785                         kfree_skb(skb);
1786
1787                         if (siocb->scm->fp)
1788                                 break;
1789                 }
1790                 else
1791                 {
1792                         /* It is questionable, see note in unix_dgram_recvmsg.
1793                          */
1794                         if (UNIXCB(skb).fp)
1795                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1796
1797                         /* put message back and return */
1798                         skb_queue_head(&sk->sk_receive_queue, skb);
1799                         break;
1800                 }
1801         } while (size);
1802
1803         mutex_unlock(&u->readlock);
1804         scm_recv(sock, msg, siocb->scm, flags);
1805 out:
1806         return copied ? : err;
1807 }
1808
1809 static int unix_shutdown(struct socket *sock, int mode)
1810 {
1811         struct sock *sk = sock->sk;
1812         struct sock *other;
1813
1814         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1815
1816         if (mode) {
1817                 unix_state_wlock(sk);
1818                 sk->sk_shutdown |= mode;
1819                 other=unix_peer(sk);
1820                 if (other)
1821                         sock_hold(other);
1822                 unix_state_wunlock(sk);
1823                 sk->sk_state_change(sk);
1824
1825                 if (other &&
1826                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1827
1828                         int peer_mode = 0;
1829
1830                         if (mode&RCV_SHUTDOWN)
1831                                 peer_mode |= SEND_SHUTDOWN;
1832                         if (mode&SEND_SHUTDOWN)
1833                                 peer_mode |= RCV_SHUTDOWN;
1834                         unix_state_wlock(other);
1835                         other->sk_shutdown |= peer_mode;
1836                         unix_state_wunlock(other);
1837                         other->sk_state_change(other);
1838                         read_lock(&other->sk_callback_lock);
1839                         if (peer_mode == SHUTDOWN_MASK)
1840                                 sk_wake_async(other,1,POLL_HUP);
1841                         else if (peer_mode & RCV_SHUTDOWN)
1842                                 sk_wake_async(other,1,POLL_IN);
1843                         read_unlock(&other->sk_callback_lock);
1844                 }
1845                 if (other)
1846                         sock_put(other);
1847         }
1848         return 0;
1849 }
1850
1851 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1852 {
1853         struct sock *sk = sock->sk;
1854         long amount=0;
1855         int err;
1856
1857         switch(cmd)
1858         {
1859                 case SIOCOUTQ:
1860                         amount = atomic_read(&sk->sk_wmem_alloc);
1861                         err = put_user(amount, (int __user *)arg);
1862                         break;
1863                 case SIOCINQ:
1864                 {
1865                         struct sk_buff *skb;
1866
1867                         if (sk->sk_state == TCP_LISTEN) {
1868                                 err = -EINVAL;
1869                                 break;
1870                         }
1871
1872                         spin_lock(&sk->sk_receive_queue.lock);
1873                         if (sk->sk_type == SOCK_STREAM ||
1874                             sk->sk_type == SOCK_SEQPACKET) {
1875                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1876                                         amount += skb->len;
1877                         } else {
1878                                 skb = skb_peek(&sk->sk_receive_queue);
1879                                 if (skb)
1880                                         amount=skb->len;
1881                         }
1882                         spin_unlock(&sk->sk_receive_queue.lock);
1883                         err = put_user(amount, (int __user *)arg);
1884                         break;
1885                 }
1886
1887                 default:
1888                         err = -ENOIOCTLCMD;
1889                         break;
1890         }
1891         return err;
1892 }
1893
1894 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1895 {
1896         struct sock *sk = sock->sk;
1897         unsigned int mask;
1898
1899         poll_wait(file, sk->sk_sleep, wait);
1900         mask = 0;
1901
1902         /* exceptional events? */
1903         if (sk->sk_err)
1904                 mask |= POLLERR;
1905         if (sk->sk_shutdown == SHUTDOWN_MASK)
1906                 mask |= POLLHUP;
1907         if (sk->sk_shutdown & RCV_SHUTDOWN)
1908                 mask |= POLLRDHUP;
1909
1910         /* readable? */
1911         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1912             (sk->sk_shutdown & RCV_SHUTDOWN))
1913                 mask |= POLLIN | POLLRDNORM;
1914
1915         /* Connection-based need to check for termination and startup */
1916         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1917                 mask |= POLLHUP;
1918
1919         /*
1920          * we set writable also when the other side has shut down the
1921          * connection. This prevents stuck sockets.
1922          */
1923         if (unix_writable(sk))
1924                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1925
1926         return mask;
1927 }
1928
1929
1930 #ifdef CONFIG_PROC_FS
1931 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1932 {
1933         loff_t off = 0;
1934         struct sock *s;
1935
1936         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1937                 if (off == pos) 
1938                         return s;
1939                 ++off;
1940         }
1941         return NULL;
1942 }
1943
1944
1945 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1946 {
1947         spin_lock(&unix_table_lock);
1948         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1949 }
1950
1951 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1952 {
1953         ++*pos;
1954
1955         if (v == (void *)1) 
1956                 return first_unix_socket(seq->private);
1957         return next_unix_socket(seq->private, v);
1958 }
1959
1960 static void unix_seq_stop(struct seq_file *seq, void *v)
1961 {
1962         spin_unlock(&unix_table_lock);
1963 }
1964
1965 static int unix_seq_show(struct seq_file *seq, void *v)
1966 {
1967         
1968         if (v == (void *)1)
1969                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1970                          "Inode Path\n");
1971         else {
1972                 struct sock *s = v;
1973                 struct unix_sock *u = unix_sk(s);
1974                 unix_state_rlock(s);
1975
1976                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1977                         s,
1978                         atomic_read(&s->sk_refcnt),
1979                         0,
1980                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1981                         s->sk_type,
1982                         s->sk_socket ?
1983                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1984                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1985                         sock_i_ino(s));
1986
1987                 if (u->addr) {
1988                         int i, len;
1989                         seq_putc(seq, ' ');
1990
1991                         i = 0;
1992                         len = u->addr->len - sizeof(short);
1993                         if (!UNIX_ABSTRACT(s))
1994                                 len--;
1995                         else {
1996                                 seq_putc(seq, '@');
1997                                 i++;
1998                         }
1999                         for ( ; i < len; i++)
2000                                 seq_putc(seq, u->addr->name->sun_path[i]);
2001                 }
2002                 unix_state_runlock(s);
2003                 seq_putc(seq, '\n');
2004         }
2005
2006         return 0;
2007 }
2008
2009 static struct seq_operations unix_seq_ops = {
2010         .start  = unix_seq_start,
2011         .next   = unix_seq_next,
2012         .stop   = unix_seq_stop,
2013         .show   = unix_seq_show,
2014 };
2015
2016
2017 static int unix_seq_open(struct inode *inode, struct file *file)
2018 {
2019         struct seq_file *seq;
2020         int rc = -ENOMEM;
2021         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2022
2023         if (!iter)
2024                 goto out;
2025
2026         rc = seq_open(file, &unix_seq_ops);
2027         if (rc)
2028                 goto out_kfree;
2029
2030         seq          = file->private_data;
2031         seq->private = iter;
2032         *iter = 0;
2033 out:
2034         return rc;
2035 out_kfree:
2036         kfree(iter);
2037         goto out;
2038 }
2039
2040 static struct file_operations unix_seq_fops = {
2041         .owner          = THIS_MODULE,
2042         .open           = unix_seq_open,
2043         .read           = seq_read,
2044         .llseek         = seq_lseek,
2045         .release        = seq_release_private,
2046 };
2047
2048 #endif
2049
2050 static struct net_proto_family unix_family_ops = {
2051         .family = PF_UNIX,
2052         .create = unix_create,
2053         .owner  = THIS_MODULE,
2054 };
2055
2056 static int __init af_unix_init(void)
2057 {
2058         int rc = -1;
2059         struct sk_buff *dummy_skb;
2060
2061         if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2062                 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2063                 goto out;
2064         }
2065
2066         rc = proto_register(&unix_proto, 1);
2067         if (rc != 0) {
2068                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2069                        __FUNCTION__);
2070                 goto out;
2071         }
2072
2073         sock_register(&unix_family_ops);
2074 #ifdef CONFIG_PROC_FS
2075         proc_net_fops_create("unix", 0, &unix_seq_fops);
2076 #endif
2077         unix_sysctl_register();
2078 out:
2079         return rc;
2080 }
2081
2082 static void __exit af_unix_exit(void)
2083 {
2084         sock_unregister(PF_UNIX);
2085         unix_sysctl_unregister();
2086         proc_net_remove("unix");
2087         proto_unregister(&unix_proto);
2088 }
2089
2090 module_init(af_unix_init);
2091 module_exit(af_unix_exit);
2092
2093 MODULE_LICENSE("GPL");
2094 MODULE_ALIAS_NETPROTO(PF_UNIX);