[AF_UNIX]: Rewrite garbage collector, fixes race.
[linux-2.6] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/sock.h>
107 #include <net/tcp_states.h>
108 #include <net/af_unix.h>
109 #include <linux/proc_fs.h>
110 #include <linux/seq_file.h>
111 #include <net/scm.h>
112 #include <linux/init.h>
113 #include <linux/poll.h>
114 #include <linux/rtnetlink.h>
115 #include <linux/mount.h>
116 #include <net/checksum.h>
117 #include <linux/security.h>
118
119 int sysctl_unix_max_dgram_qlen __read_mostly = 10;
120
121 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
122 DEFINE_SPINLOCK(unix_table_lock);
123 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
124
125 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
126
127 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
128
129 #ifdef CONFIG_SECURITY_NETWORK
130 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
131 {
132         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
133 }
134
135 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
136 {
137         scm->secid = *UNIXSID(skb);
138 }
139 #else
140 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142
143 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144 { }
145 #endif /* CONFIG_SECURITY_NETWORK */
146
147 /*
148  *  SMP locking strategy:
149  *    hash table is protected with spinlock unix_table_lock
150  *    each socket state is protected by separate rwlock.
151  */
152
153 static inline unsigned unix_hash_fold(__wsum n)
154 {
155         unsigned hash = (__force unsigned)n;
156         hash ^= hash>>16;
157         hash ^= hash>>8;
158         return hash&(UNIX_HASH_SIZE-1);
159 }
160
161 #define unix_peer(sk) (unix_sk(sk)->peer)
162
163 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
164 {
165         return unix_peer(osk) == sk;
166 }
167
168 static inline int unix_may_send(struct sock *sk, struct sock *osk)
169 {
170         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
171 }
172
173 static struct sock *unix_peer_get(struct sock *s)
174 {
175         struct sock *peer;
176
177         unix_state_lock(s);
178         peer = unix_peer(s);
179         if (peer)
180                 sock_hold(peer);
181         unix_state_unlock(s);
182         return peer;
183 }
184
185 static inline void unix_release_addr(struct unix_address *addr)
186 {
187         if (atomic_dec_and_test(&addr->refcnt))
188                 kfree(addr);
189 }
190
191 /*
192  *      Check unix socket name:
193  *              - should be not zero length.
194  *              - if started by not zero, should be NULL terminated (FS object)
195  *              - if started by zero, it is abstract name.
196  */
197
198 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
199 {
200         if (len <= sizeof(short) || len > sizeof(*sunaddr))
201                 return -EINVAL;
202         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
203                 return -EINVAL;
204         if (sunaddr->sun_path[0]) {
205                 /*
206                  * This may look like an off by one error but it is a bit more
207                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
208                  * sun_path[108] doesnt as such exist.  However in kernel space
209                  * we are guaranteed that it is a valid memory location in our
210                  * kernel address buffer.
211                  */
212                 ((char *)sunaddr)[len]=0;
213                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
214                 return len;
215         }
216
217         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
218         return len;
219 }
220
221 static void __unix_remove_socket(struct sock *sk)
222 {
223         sk_del_node_init(sk);
224 }
225
226 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
227 {
228         BUG_TRAP(sk_unhashed(sk));
229         sk_add_node(sk, list);
230 }
231
232 static inline void unix_remove_socket(struct sock *sk)
233 {
234         spin_lock(&unix_table_lock);
235         __unix_remove_socket(sk);
236         spin_unlock(&unix_table_lock);
237 }
238
239 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
240 {
241         spin_lock(&unix_table_lock);
242         __unix_insert_socket(list, sk);
243         spin_unlock(&unix_table_lock);
244 }
245
246 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
247                                               int len, int type, unsigned hash)
248 {
249         struct sock *s;
250         struct hlist_node *node;
251
252         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
253                 struct unix_sock *u = unix_sk(s);
254
255                 if (u->addr->len == len &&
256                     !memcmp(u->addr->name, sunname, len))
257                         goto found;
258         }
259         s = NULL;
260 found:
261         return s;
262 }
263
264 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
265                                                    int len, int type,
266                                                    unsigned hash)
267 {
268         struct sock *s;
269
270         spin_lock(&unix_table_lock);
271         s = __unix_find_socket_byname(sunname, len, type, hash);
272         if (s)
273                 sock_hold(s);
274         spin_unlock(&unix_table_lock);
275         return s;
276 }
277
278 static struct sock *unix_find_socket_byinode(struct inode *i)
279 {
280         struct sock *s;
281         struct hlist_node *node;
282
283         spin_lock(&unix_table_lock);
284         sk_for_each(s, node,
285                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
286                 struct dentry *dentry = unix_sk(s)->dentry;
287
288                 if(dentry && dentry->d_inode == i)
289                 {
290                         sock_hold(s);
291                         goto found;
292                 }
293         }
294         s = NULL;
295 found:
296         spin_unlock(&unix_table_lock);
297         return s;
298 }
299
300 static inline int unix_writable(struct sock *sk)
301 {
302         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
303 }
304
305 static void unix_write_space(struct sock *sk)
306 {
307         read_lock(&sk->sk_callback_lock);
308         if (unix_writable(sk)) {
309                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
310                         wake_up_interruptible(sk->sk_sleep);
311                 sk_wake_async(sk, 2, POLL_OUT);
312         }
313         read_unlock(&sk->sk_callback_lock);
314 }
315
316 /* When dgram socket disconnects (or changes its peer), we clear its receive
317  * queue of packets arrived from previous peer. First, it allows to do
318  * flow control based only on wmem_alloc; second, sk connected to peer
319  * may receive messages only from that peer. */
320 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
321 {
322         if (!skb_queue_empty(&sk->sk_receive_queue)) {
323                 skb_queue_purge(&sk->sk_receive_queue);
324                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
325
326                 /* If one link of bidirectional dgram pipe is disconnected,
327                  * we signal error. Messages are lost. Do not make this,
328                  * when peer was not connected to us.
329                  */
330                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
331                         other->sk_err = ECONNRESET;
332                         other->sk_error_report(other);
333                 }
334         }
335 }
336
337 static void unix_sock_destructor(struct sock *sk)
338 {
339         struct unix_sock *u = unix_sk(sk);
340
341         skb_queue_purge(&sk->sk_receive_queue);
342
343         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
344         BUG_TRAP(sk_unhashed(sk));
345         BUG_TRAP(!sk->sk_socket);
346         if (!sock_flag(sk, SOCK_DEAD)) {
347                 printk("Attempt to release alive unix socket: %p\n", sk);
348                 return;
349         }
350
351         if (u->addr)
352                 unix_release_addr(u->addr);
353
354         atomic_dec(&unix_nr_socks);
355 #ifdef UNIX_REFCNT_DEBUG
356         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
357 #endif
358 }
359
360 static int unix_release_sock (struct sock *sk, int embrion)
361 {
362         struct unix_sock *u = unix_sk(sk);
363         struct dentry *dentry;
364         struct vfsmount *mnt;
365         struct sock *skpair;
366         struct sk_buff *skb;
367         int state;
368
369         unix_remove_socket(sk);
370
371         /* Clear state */
372         unix_state_lock(sk);
373         sock_orphan(sk);
374         sk->sk_shutdown = SHUTDOWN_MASK;
375         dentry       = u->dentry;
376         u->dentry    = NULL;
377         mnt          = u->mnt;
378         u->mnt       = NULL;
379         state = sk->sk_state;
380         sk->sk_state = TCP_CLOSE;
381         unix_state_unlock(sk);
382
383         wake_up_interruptible_all(&u->peer_wait);
384
385         skpair=unix_peer(sk);
386
387         if (skpair!=NULL) {
388                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
389                         unix_state_lock(skpair);
390                         /* No more writes */
391                         skpair->sk_shutdown = SHUTDOWN_MASK;
392                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
393                                 skpair->sk_err = ECONNRESET;
394                         unix_state_unlock(skpair);
395                         skpair->sk_state_change(skpair);
396                         read_lock(&skpair->sk_callback_lock);
397                         sk_wake_async(skpair,1,POLL_HUP);
398                         read_unlock(&skpair->sk_callback_lock);
399                 }
400                 sock_put(skpair); /* It may now die */
401                 unix_peer(sk) = NULL;
402         }
403
404         /* Try to flush out this socket. Throw out buffers at least */
405
406         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
407                 if (state==TCP_LISTEN)
408                         unix_release_sock(skb->sk, 1);
409                 /* passed fds are erased in the kfree_skb hook        */
410                 kfree_skb(skb);
411         }
412
413         if (dentry) {
414                 dput(dentry);
415                 mntput(mnt);
416         }
417
418         sock_put(sk);
419
420         /* ---- Socket is dead now and most probably destroyed ---- */
421
422         /*
423          * Fixme: BSD difference: In BSD all sockets connected to use get
424          *        ECONNRESET and we die on the spot. In Linux we behave
425          *        like files and pipes do and wait for the last
426          *        dereference.
427          *
428          * Can't we simply set sock->err?
429          *
430          *        What the above comment does talk about? --ANK(980817)
431          */
432
433         if (atomic_read(&unix_tot_inflight))
434                 unix_gc();              /* Garbage collect fds */
435
436         return 0;
437 }
438
439 static int unix_listen(struct socket *sock, int backlog)
440 {
441         int err;
442         struct sock *sk = sock->sk;
443         struct unix_sock *u = unix_sk(sk);
444
445         err = -EOPNOTSUPP;
446         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
447                 goto out;                       /* Only stream/seqpacket sockets accept */
448         err = -EINVAL;
449         if (!u->addr)
450                 goto out;                       /* No listens on an unbound socket */
451         unix_state_lock(sk);
452         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
453                 goto out_unlock;
454         if (backlog > sk->sk_max_ack_backlog)
455                 wake_up_interruptible_all(&u->peer_wait);
456         sk->sk_max_ack_backlog  = backlog;
457         sk->sk_state            = TCP_LISTEN;
458         /* set credentials so connect can copy them */
459         sk->sk_peercred.pid     = current->tgid;
460         sk->sk_peercred.uid     = current->euid;
461         sk->sk_peercred.gid     = current->egid;
462         err = 0;
463
464 out_unlock:
465         unix_state_unlock(sk);
466 out:
467         return err;
468 }
469
470 static int unix_release(struct socket *);
471 static int unix_bind(struct socket *, struct sockaddr *, int);
472 static int unix_stream_connect(struct socket *, struct sockaddr *,
473                                int addr_len, int flags);
474 static int unix_socketpair(struct socket *, struct socket *);
475 static int unix_accept(struct socket *, struct socket *, int);
476 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
477 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
478 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
479 static int unix_shutdown(struct socket *, int);
480 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
481                                struct msghdr *, size_t);
482 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
483                                struct msghdr *, size_t, int);
484 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
485                               struct msghdr *, size_t);
486 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
487                               struct msghdr *, size_t, int);
488 static int unix_dgram_connect(struct socket *, struct sockaddr *,
489                               int, int);
490 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
491                                   struct msghdr *, size_t);
492
493 static const struct proto_ops unix_stream_ops = {
494         .family =       PF_UNIX,
495         .owner =        THIS_MODULE,
496         .release =      unix_release,
497         .bind =         unix_bind,
498         .connect =      unix_stream_connect,
499         .socketpair =   unix_socketpair,
500         .accept =       unix_accept,
501         .getname =      unix_getname,
502         .poll =         unix_poll,
503         .ioctl =        unix_ioctl,
504         .listen =       unix_listen,
505         .shutdown =     unix_shutdown,
506         .setsockopt =   sock_no_setsockopt,
507         .getsockopt =   sock_no_getsockopt,
508         .sendmsg =      unix_stream_sendmsg,
509         .recvmsg =      unix_stream_recvmsg,
510         .mmap =         sock_no_mmap,
511         .sendpage =     sock_no_sendpage,
512 };
513
514 static const struct proto_ops unix_dgram_ops = {
515         .family =       PF_UNIX,
516         .owner =        THIS_MODULE,
517         .release =      unix_release,
518         .bind =         unix_bind,
519         .connect =      unix_dgram_connect,
520         .socketpair =   unix_socketpair,
521         .accept =       sock_no_accept,
522         .getname =      unix_getname,
523         .poll =         datagram_poll,
524         .ioctl =        unix_ioctl,
525         .listen =       sock_no_listen,
526         .shutdown =     unix_shutdown,
527         .setsockopt =   sock_no_setsockopt,
528         .getsockopt =   sock_no_getsockopt,
529         .sendmsg =      unix_dgram_sendmsg,
530         .recvmsg =      unix_dgram_recvmsg,
531         .mmap =         sock_no_mmap,
532         .sendpage =     sock_no_sendpage,
533 };
534
535 static const struct proto_ops unix_seqpacket_ops = {
536         .family =       PF_UNIX,
537         .owner =        THIS_MODULE,
538         .release =      unix_release,
539         .bind =         unix_bind,
540         .connect =      unix_stream_connect,
541         .socketpair =   unix_socketpair,
542         .accept =       unix_accept,
543         .getname =      unix_getname,
544         .poll =         datagram_poll,
545         .ioctl =        unix_ioctl,
546         .listen =       unix_listen,
547         .shutdown =     unix_shutdown,
548         .setsockopt =   sock_no_setsockopt,
549         .getsockopt =   sock_no_getsockopt,
550         .sendmsg =      unix_seqpacket_sendmsg,
551         .recvmsg =      unix_dgram_recvmsg,
552         .mmap =         sock_no_mmap,
553         .sendpage =     sock_no_sendpage,
554 };
555
556 static struct proto unix_proto = {
557         .name     = "UNIX",
558         .owner    = THIS_MODULE,
559         .obj_size = sizeof(struct unix_sock),
560 };
561
562 /*
563  * AF_UNIX sockets do not interact with hardware, hence they
564  * dont trigger interrupts - so it's safe for them to have
565  * bh-unsafe locking for their sk_receive_queue.lock. Split off
566  * this special lock-class by reinitializing the spinlock key:
567  */
568 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
569
570 static struct sock * unix_create1(struct socket *sock)
571 {
572         struct sock *sk = NULL;
573         struct unix_sock *u;
574
575         if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
576                 goto out;
577
578         sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
579         if (!sk)
580                 goto out;
581
582         atomic_inc(&unix_nr_socks);
583
584         sock_init_data(sock,sk);
585         lockdep_set_class(&sk->sk_receive_queue.lock,
586                                 &af_unix_sk_receive_queue_lock_key);
587
588         sk->sk_write_space      = unix_write_space;
589         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
590         sk->sk_destruct         = unix_sock_destructor;
591         u         = unix_sk(sk);
592         u->dentry = NULL;
593         u->mnt    = NULL;
594         spin_lock_init(&u->lock);
595         atomic_set(&u->inflight, 0);
596         INIT_LIST_HEAD(&u->link);
597         mutex_init(&u->readlock); /* single task reading lock */
598         init_waitqueue_head(&u->peer_wait);
599         unix_insert_socket(unix_sockets_unbound, sk);
600 out:
601         return sk;
602 }
603
604 static int unix_create(struct socket *sock, int protocol)
605 {
606         if (protocol && protocol != PF_UNIX)
607                 return -EPROTONOSUPPORT;
608
609         sock->state = SS_UNCONNECTED;
610
611         switch (sock->type) {
612         case SOCK_STREAM:
613                 sock->ops = &unix_stream_ops;
614                 break;
615                 /*
616                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
617                  *      nothing uses it.
618                  */
619         case SOCK_RAW:
620                 sock->type=SOCK_DGRAM;
621         case SOCK_DGRAM:
622                 sock->ops = &unix_dgram_ops;
623                 break;
624         case SOCK_SEQPACKET:
625                 sock->ops = &unix_seqpacket_ops;
626                 break;
627         default:
628                 return -ESOCKTNOSUPPORT;
629         }
630
631         return unix_create1(sock) ? 0 : -ENOMEM;
632 }
633
634 static int unix_release(struct socket *sock)
635 {
636         struct sock *sk = sock->sk;
637
638         if (!sk)
639                 return 0;
640
641         sock->sk = NULL;
642
643         return unix_release_sock (sk, 0);
644 }
645
646 static int unix_autobind(struct socket *sock)
647 {
648         struct sock *sk = sock->sk;
649         struct unix_sock *u = unix_sk(sk);
650         static u32 ordernum = 1;
651         struct unix_address * addr;
652         int err;
653
654         mutex_lock(&u->readlock);
655
656         err = 0;
657         if (u->addr)
658                 goto out;
659
660         err = -ENOMEM;
661         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
662         if (!addr)
663                 goto out;
664
665         addr->name->sun_family = AF_UNIX;
666         atomic_set(&addr->refcnt, 1);
667
668 retry:
669         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
670         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
671
672         spin_lock(&unix_table_lock);
673         ordernum = (ordernum+1)&0xFFFFF;
674
675         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
676                                       addr->hash)) {
677                 spin_unlock(&unix_table_lock);
678                 /* Sanity yield. It is unusual case, but yet... */
679                 if (!(ordernum&0xFF))
680                         yield();
681                 goto retry;
682         }
683         addr->hash ^= sk->sk_type;
684
685         __unix_remove_socket(sk);
686         u->addr = addr;
687         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
688         spin_unlock(&unix_table_lock);
689         err = 0;
690
691 out:    mutex_unlock(&u->readlock);
692         return err;
693 }
694
695 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
696                                     int type, unsigned hash, int *error)
697 {
698         struct sock *u;
699         struct nameidata nd;
700         int err = 0;
701
702         if (sunname->sun_path[0]) {
703                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
704                 if (err)
705                         goto fail;
706                 err = vfs_permission(&nd, MAY_WRITE);
707                 if (err)
708                         goto put_fail;
709
710                 err = -ECONNREFUSED;
711                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
712                         goto put_fail;
713                 u=unix_find_socket_byinode(nd.dentry->d_inode);
714                 if (!u)
715                         goto put_fail;
716
717                 if (u->sk_type == type)
718                         touch_atime(nd.mnt, nd.dentry);
719
720                 path_release(&nd);
721
722                 err=-EPROTOTYPE;
723                 if (u->sk_type != type) {
724                         sock_put(u);
725                         goto fail;
726                 }
727         } else {
728                 err = -ECONNREFUSED;
729                 u=unix_find_socket_byname(sunname, len, type, hash);
730                 if (u) {
731                         struct dentry *dentry;
732                         dentry = unix_sk(u)->dentry;
733                         if (dentry)
734                                 touch_atime(unix_sk(u)->mnt, dentry);
735                 } else
736                         goto fail;
737         }
738         return u;
739
740 put_fail:
741         path_release(&nd);
742 fail:
743         *error=err;
744         return NULL;
745 }
746
747
748 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
749 {
750         struct sock *sk = sock->sk;
751         struct unix_sock *u = unix_sk(sk);
752         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
753         struct dentry * dentry = NULL;
754         struct nameidata nd;
755         int err;
756         unsigned hash;
757         struct unix_address *addr;
758         struct hlist_head *list;
759
760         err = -EINVAL;
761         if (sunaddr->sun_family != AF_UNIX)
762                 goto out;
763
764         if (addr_len==sizeof(short)) {
765                 err = unix_autobind(sock);
766                 goto out;
767         }
768
769         err = unix_mkname(sunaddr, addr_len, &hash);
770         if (err < 0)
771                 goto out;
772         addr_len = err;
773
774         mutex_lock(&u->readlock);
775
776         err = -EINVAL;
777         if (u->addr)
778                 goto out_up;
779
780         err = -ENOMEM;
781         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
782         if (!addr)
783                 goto out_up;
784
785         memcpy(addr->name, sunaddr, addr_len);
786         addr->len = addr_len;
787         addr->hash = hash ^ sk->sk_type;
788         atomic_set(&addr->refcnt, 1);
789
790         if (sunaddr->sun_path[0]) {
791                 unsigned int mode;
792                 err = 0;
793                 /*
794                  * Get the parent directory, calculate the hash for last
795                  * component.
796                  */
797                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
798                 if (err)
799                         goto out_mknod_parent;
800
801                 dentry = lookup_create(&nd, 0);
802                 err = PTR_ERR(dentry);
803                 if (IS_ERR(dentry))
804                         goto out_mknod_unlock;
805
806                 /*
807                  * All right, let's create it.
808                  */
809                 mode = S_IFSOCK |
810                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
811                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
812                 if (err)
813                         goto out_mknod_dput;
814                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
815                 dput(nd.dentry);
816                 nd.dentry = dentry;
817
818                 addr->hash = UNIX_HASH_SIZE;
819         }
820
821         spin_lock(&unix_table_lock);
822
823         if (!sunaddr->sun_path[0]) {
824                 err = -EADDRINUSE;
825                 if (__unix_find_socket_byname(sunaddr, addr_len,
826                                               sk->sk_type, hash)) {
827                         unix_release_addr(addr);
828                         goto out_unlock;
829                 }
830
831                 list = &unix_socket_table[addr->hash];
832         } else {
833                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
834                 u->dentry = nd.dentry;
835                 u->mnt    = nd.mnt;
836         }
837
838         err = 0;
839         __unix_remove_socket(sk);
840         u->addr = addr;
841         __unix_insert_socket(list, sk);
842
843 out_unlock:
844         spin_unlock(&unix_table_lock);
845 out_up:
846         mutex_unlock(&u->readlock);
847 out:
848         return err;
849
850 out_mknod_dput:
851         dput(dentry);
852 out_mknod_unlock:
853         mutex_unlock(&nd.dentry->d_inode->i_mutex);
854         path_release(&nd);
855 out_mknod_parent:
856         if (err==-EEXIST)
857                 err=-EADDRINUSE;
858         unix_release_addr(addr);
859         goto out_up;
860 }
861
862 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
863 {
864         if (unlikely(sk1 == sk2) || !sk2) {
865                 unix_state_lock(sk1);
866                 return;
867         }
868         if (sk1 < sk2) {
869                 unix_state_lock(sk1);
870                 unix_state_lock_nested(sk2);
871         } else {
872                 unix_state_lock(sk2);
873                 unix_state_lock_nested(sk1);
874         }
875 }
876
877 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
878 {
879         if (unlikely(sk1 == sk2) || !sk2) {
880                 unix_state_unlock(sk1);
881                 return;
882         }
883         unix_state_unlock(sk1);
884         unix_state_unlock(sk2);
885 }
886
887 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
888                               int alen, int flags)
889 {
890         struct sock *sk = sock->sk;
891         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
892         struct sock *other;
893         unsigned hash;
894         int err;
895
896         if (addr->sa_family != AF_UNSPEC) {
897                 err = unix_mkname(sunaddr, alen, &hash);
898                 if (err < 0)
899                         goto out;
900                 alen = err;
901
902                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
903                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
904                         goto out;
905
906 restart:
907                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
908                 if (!other)
909                         goto out;
910
911                 unix_state_double_lock(sk, other);
912
913                 /* Apparently VFS overslept socket death. Retry. */
914                 if (sock_flag(other, SOCK_DEAD)) {
915                         unix_state_double_unlock(sk, other);
916                         sock_put(other);
917                         goto restart;
918                 }
919
920                 err = -EPERM;
921                 if (!unix_may_send(sk, other))
922                         goto out_unlock;
923
924                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
925                 if (err)
926                         goto out_unlock;
927
928         } else {
929                 /*
930                  *      1003.1g breaking connected state with AF_UNSPEC
931                  */
932                 other = NULL;
933                 unix_state_double_lock(sk, other);
934         }
935
936         /*
937          * If it was connected, reconnect.
938          */
939         if (unix_peer(sk)) {
940                 struct sock *old_peer = unix_peer(sk);
941                 unix_peer(sk)=other;
942                 unix_state_double_unlock(sk, other);
943
944                 if (other != old_peer)
945                         unix_dgram_disconnected(sk, old_peer);
946                 sock_put(old_peer);
947         } else {
948                 unix_peer(sk)=other;
949                 unix_state_double_unlock(sk, other);
950         }
951         return 0;
952
953 out_unlock:
954         unix_state_double_unlock(sk, other);
955         sock_put(other);
956 out:
957         return err;
958 }
959
960 static long unix_wait_for_peer(struct sock *other, long timeo)
961 {
962         struct unix_sock *u = unix_sk(other);
963         int sched;
964         DEFINE_WAIT(wait);
965
966         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
967
968         sched = !sock_flag(other, SOCK_DEAD) &&
969                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
970                 (skb_queue_len(&other->sk_receive_queue) >
971                  other->sk_max_ack_backlog);
972
973         unix_state_unlock(other);
974
975         if (sched)
976                 timeo = schedule_timeout(timeo);
977
978         finish_wait(&u->peer_wait, &wait);
979         return timeo;
980 }
981
982 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
983                                int addr_len, int flags)
984 {
985         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
986         struct sock *sk = sock->sk;
987         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
988         struct sock *newsk = NULL;
989         struct sock *other = NULL;
990         struct sk_buff *skb = NULL;
991         unsigned hash;
992         int st;
993         int err;
994         long timeo;
995
996         err = unix_mkname(sunaddr, addr_len, &hash);
997         if (err < 0)
998                 goto out;
999         addr_len = err;
1000
1001         if (test_bit(SOCK_PASSCRED, &sock->flags)
1002                 && !u->addr && (err = unix_autobind(sock)) != 0)
1003                 goto out;
1004
1005         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1006
1007         /* First of all allocate resources.
1008            If we will make it after state is locked,
1009            we will have to recheck all again in any case.
1010          */
1011
1012         err = -ENOMEM;
1013
1014         /* create new sock for complete connection */
1015         newsk = unix_create1(NULL);
1016         if (newsk == NULL)
1017                 goto out;
1018
1019         /* Allocate skb for sending to listening sock */
1020         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1021         if (skb == NULL)
1022                 goto out;
1023
1024 restart:
1025         /*  Find listening sock. */
1026         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
1027         if (!other)
1028                 goto out;
1029
1030         /* Latch state of peer */
1031         unix_state_lock(other);
1032
1033         /* Apparently VFS overslept socket death. Retry. */
1034         if (sock_flag(other, SOCK_DEAD)) {
1035                 unix_state_unlock(other);
1036                 sock_put(other);
1037                 goto restart;
1038         }
1039
1040         err = -ECONNREFUSED;
1041         if (other->sk_state != TCP_LISTEN)
1042                 goto out_unlock;
1043
1044         if (skb_queue_len(&other->sk_receive_queue) >
1045             other->sk_max_ack_backlog) {
1046                 err = -EAGAIN;
1047                 if (!timeo)
1048                         goto out_unlock;
1049
1050                 timeo = unix_wait_for_peer(other, timeo);
1051
1052                 err = sock_intr_errno(timeo);
1053                 if (signal_pending(current))
1054                         goto out;
1055                 sock_put(other);
1056                 goto restart;
1057         }
1058
1059         /* Latch our state.
1060
1061            It is tricky place. We need to grab write lock and cannot
1062            drop lock on peer. It is dangerous because deadlock is
1063            possible. Connect to self case and simultaneous
1064            attempt to connect are eliminated by checking socket
1065            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1066            check this before attempt to grab lock.
1067
1068            Well, and we have to recheck the state after socket locked.
1069          */
1070         st = sk->sk_state;
1071
1072         switch (st) {
1073         case TCP_CLOSE:
1074                 /* This is ok... continue with connect */
1075                 break;
1076         case TCP_ESTABLISHED:
1077                 /* Socket is already connected */
1078                 err = -EISCONN;
1079                 goto out_unlock;
1080         default:
1081                 err = -EINVAL;
1082                 goto out_unlock;
1083         }
1084
1085         unix_state_lock_nested(sk);
1086
1087         if (sk->sk_state != st) {
1088                 unix_state_unlock(sk);
1089                 unix_state_unlock(other);
1090                 sock_put(other);
1091                 goto restart;
1092         }
1093
1094         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1095         if (err) {
1096                 unix_state_unlock(sk);
1097                 goto out_unlock;
1098         }
1099
1100         /* The way is open! Fastly set all the necessary fields... */
1101
1102         sock_hold(sk);
1103         unix_peer(newsk)        = sk;
1104         newsk->sk_state         = TCP_ESTABLISHED;
1105         newsk->sk_type          = sk->sk_type;
1106         newsk->sk_peercred.pid  = current->tgid;
1107         newsk->sk_peercred.uid  = current->euid;
1108         newsk->sk_peercred.gid  = current->egid;
1109         newu = unix_sk(newsk);
1110         newsk->sk_sleep         = &newu->peer_wait;
1111         otheru = unix_sk(other);
1112
1113         /* copy address information from listening to new sock*/
1114         if (otheru->addr) {
1115                 atomic_inc(&otheru->addr->refcnt);
1116                 newu->addr = otheru->addr;
1117         }
1118         if (otheru->dentry) {
1119                 newu->dentry    = dget(otheru->dentry);
1120                 newu->mnt       = mntget(otheru->mnt);
1121         }
1122
1123         /* Set credentials */
1124         sk->sk_peercred = other->sk_peercred;
1125
1126         sock->state     = SS_CONNECTED;
1127         sk->sk_state    = TCP_ESTABLISHED;
1128         sock_hold(newsk);
1129
1130         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1131         unix_peer(sk)   = newsk;
1132
1133         unix_state_unlock(sk);
1134
1135         /* take ten and and send info to listening sock */
1136         spin_lock(&other->sk_receive_queue.lock);
1137         __skb_queue_tail(&other->sk_receive_queue, skb);
1138         spin_unlock(&other->sk_receive_queue.lock);
1139         unix_state_unlock(other);
1140         other->sk_data_ready(other, 0);
1141         sock_put(other);
1142         return 0;
1143
1144 out_unlock:
1145         if (other)
1146                 unix_state_unlock(other);
1147
1148 out:
1149         if (skb)
1150                 kfree_skb(skb);
1151         if (newsk)
1152                 unix_release_sock(newsk, 0);
1153         if (other)
1154                 sock_put(other);
1155         return err;
1156 }
1157
1158 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1159 {
1160         struct sock *ska=socka->sk, *skb = sockb->sk;
1161
1162         /* Join our sockets back to back */
1163         sock_hold(ska);
1164         sock_hold(skb);
1165         unix_peer(ska)=skb;
1166         unix_peer(skb)=ska;
1167         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1168         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1169         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1170
1171         if (ska->sk_type != SOCK_DGRAM) {
1172                 ska->sk_state = TCP_ESTABLISHED;
1173                 skb->sk_state = TCP_ESTABLISHED;
1174                 socka->state  = SS_CONNECTED;
1175                 sockb->state  = SS_CONNECTED;
1176         }
1177         return 0;
1178 }
1179
1180 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1181 {
1182         struct sock *sk = sock->sk;
1183         struct sock *tsk;
1184         struct sk_buff *skb;
1185         int err;
1186
1187         err = -EOPNOTSUPP;
1188         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1189                 goto out;
1190
1191         err = -EINVAL;
1192         if (sk->sk_state != TCP_LISTEN)
1193                 goto out;
1194
1195         /* If socket state is TCP_LISTEN it cannot change (for now...),
1196          * so that no locks are necessary.
1197          */
1198
1199         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1200         if (!skb) {
1201                 /* This means receive shutdown. */
1202                 if (err == 0)
1203                         err = -EINVAL;
1204                 goto out;
1205         }
1206
1207         tsk = skb->sk;
1208         skb_free_datagram(sk, skb);
1209         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1210
1211         /* attach accepted sock to socket */
1212         unix_state_lock(tsk);
1213         newsock->state = SS_CONNECTED;
1214         sock_graft(tsk, newsock);
1215         unix_state_unlock(tsk);
1216         return 0;
1217
1218 out:
1219         return err;
1220 }
1221
1222
1223 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1224 {
1225         struct sock *sk = sock->sk;
1226         struct unix_sock *u;
1227         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1228         int err = 0;
1229
1230         if (peer) {
1231                 sk = unix_peer_get(sk);
1232
1233                 err = -ENOTCONN;
1234                 if (!sk)
1235                         goto out;
1236                 err = 0;
1237         } else {
1238                 sock_hold(sk);
1239         }
1240
1241         u = unix_sk(sk);
1242         unix_state_lock(sk);
1243         if (!u->addr) {
1244                 sunaddr->sun_family = AF_UNIX;
1245                 sunaddr->sun_path[0] = 0;
1246                 *uaddr_len = sizeof(short);
1247         } else {
1248                 struct unix_address *addr = u->addr;
1249
1250                 *uaddr_len = addr->len;
1251                 memcpy(sunaddr, addr->name, *uaddr_len);
1252         }
1253         unix_state_unlock(sk);
1254         sock_put(sk);
1255 out:
1256         return err;
1257 }
1258
1259 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1260 {
1261         int i;
1262
1263         scm->fp = UNIXCB(skb).fp;
1264         skb->destructor = sock_wfree;
1265         UNIXCB(skb).fp = NULL;
1266
1267         for (i=scm->fp->count-1; i>=0; i--)
1268                 unix_notinflight(scm->fp->fp[i]);
1269 }
1270
1271 static void unix_destruct_fds(struct sk_buff *skb)
1272 {
1273         struct scm_cookie scm;
1274         memset(&scm, 0, sizeof(scm));
1275         unix_detach_fds(&scm, skb);
1276
1277         /* Alas, it calls VFS */
1278         /* So fscking what? fput() had been SMP-safe since the last Summer */
1279         scm_destroy(&scm);
1280         sock_wfree(skb);
1281 }
1282
1283 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1284 {
1285         int i;
1286         for (i=scm->fp->count-1; i>=0; i--)
1287                 unix_inflight(scm->fp->fp[i]);
1288         UNIXCB(skb).fp = scm->fp;
1289         skb->destructor = unix_destruct_fds;
1290         scm->fp = NULL;
1291 }
1292
1293 /*
1294  *      Send AF_UNIX data.
1295  */
1296
1297 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1298                               struct msghdr *msg, size_t len)
1299 {
1300         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1301         struct sock *sk = sock->sk;
1302         struct unix_sock *u = unix_sk(sk);
1303         struct sockaddr_un *sunaddr=msg->msg_name;
1304         struct sock *other = NULL;
1305         int namelen = 0; /* fake GCC */
1306         int err;
1307         unsigned hash;
1308         struct sk_buff *skb;
1309         long timeo;
1310         struct scm_cookie tmp_scm;
1311
1312         if (NULL == siocb->scm)
1313                 siocb->scm = &tmp_scm;
1314         err = scm_send(sock, msg, siocb->scm);
1315         if (err < 0)
1316                 return err;
1317
1318         err = -EOPNOTSUPP;
1319         if (msg->msg_flags&MSG_OOB)
1320                 goto out;
1321
1322         if (msg->msg_namelen) {
1323                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1324                 if (err < 0)
1325                         goto out;
1326                 namelen = err;
1327         } else {
1328                 sunaddr = NULL;
1329                 err = -ENOTCONN;
1330                 other = unix_peer_get(sk);
1331                 if (!other)
1332                         goto out;
1333         }
1334
1335         if (test_bit(SOCK_PASSCRED, &sock->flags)
1336                 && !u->addr && (err = unix_autobind(sock)) != 0)
1337                 goto out;
1338
1339         err = -EMSGSIZE;
1340         if (len > sk->sk_sndbuf - 32)
1341                 goto out;
1342
1343         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1344         if (skb==NULL)
1345                 goto out;
1346
1347         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1348         if (siocb->scm->fp)
1349                 unix_attach_fds(siocb->scm, skb);
1350         unix_get_secdata(siocb->scm, skb);
1351
1352         skb_reset_transport_header(skb);
1353         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1354         if (err)
1355                 goto out_free;
1356
1357         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1358
1359 restart:
1360         if (!other) {
1361                 err = -ECONNRESET;
1362                 if (sunaddr == NULL)
1363                         goto out_free;
1364
1365                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1366                                         hash, &err);
1367                 if (other==NULL)
1368                         goto out_free;
1369         }
1370
1371         unix_state_lock(other);
1372         err = -EPERM;
1373         if (!unix_may_send(sk, other))
1374                 goto out_unlock;
1375
1376         if (sock_flag(other, SOCK_DEAD)) {
1377                 /*
1378                  *      Check with 1003.1g - what should
1379                  *      datagram error
1380                  */
1381                 unix_state_unlock(other);
1382                 sock_put(other);
1383
1384                 err = 0;
1385                 unix_state_lock(sk);
1386                 if (unix_peer(sk) == other) {
1387                         unix_peer(sk)=NULL;
1388                         unix_state_unlock(sk);
1389
1390                         unix_dgram_disconnected(sk, other);
1391                         sock_put(other);
1392                         err = -ECONNREFUSED;
1393                 } else {
1394                         unix_state_unlock(sk);
1395                 }
1396
1397                 other = NULL;
1398                 if (err)
1399                         goto out_free;
1400                 goto restart;
1401         }
1402
1403         err = -EPIPE;
1404         if (other->sk_shutdown & RCV_SHUTDOWN)
1405                 goto out_unlock;
1406
1407         if (sk->sk_type != SOCK_SEQPACKET) {
1408                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1409                 if (err)
1410                         goto out_unlock;
1411         }
1412
1413         if (unix_peer(other) != sk &&
1414             (skb_queue_len(&other->sk_receive_queue) >
1415              other->sk_max_ack_backlog)) {
1416                 if (!timeo) {
1417                         err = -EAGAIN;
1418                         goto out_unlock;
1419                 }
1420
1421                 timeo = unix_wait_for_peer(other, timeo);
1422
1423                 err = sock_intr_errno(timeo);
1424                 if (signal_pending(current))
1425                         goto out_free;
1426
1427                 goto restart;
1428         }
1429
1430         skb_queue_tail(&other->sk_receive_queue, skb);
1431         unix_state_unlock(other);
1432         other->sk_data_ready(other, len);
1433         sock_put(other);
1434         scm_destroy(siocb->scm);
1435         return len;
1436
1437 out_unlock:
1438         unix_state_unlock(other);
1439 out_free:
1440         kfree_skb(skb);
1441 out:
1442         if (other)
1443                 sock_put(other);
1444         scm_destroy(siocb->scm);
1445         return err;
1446 }
1447
1448
1449 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1450                                struct msghdr *msg, size_t len)
1451 {
1452         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1453         struct sock *sk = sock->sk;
1454         struct sock *other = NULL;
1455         struct sockaddr_un *sunaddr=msg->msg_name;
1456         int err,size;
1457         struct sk_buff *skb;
1458         int sent=0;
1459         struct scm_cookie tmp_scm;
1460
1461         if (NULL == siocb->scm)
1462                 siocb->scm = &tmp_scm;
1463         err = scm_send(sock, msg, siocb->scm);
1464         if (err < 0)
1465                 return err;
1466
1467         err = -EOPNOTSUPP;
1468         if (msg->msg_flags&MSG_OOB)
1469                 goto out_err;
1470
1471         if (msg->msg_namelen) {
1472                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1473                 goto out_err;
1474         } else {
1475                 sunaddr = NULL;
1476                 err = -ENOTCONN;
1477                 other = unix_peer(sk);
1478                 if (!other)
1479                         goto out_err;
1480         }
1481
1482         if (sk->sk_shutdown & SEND_SHUTDOWN)
1483                 goto pipe_err;
1484
1485         while(sent < len)
1486         {
1487                 /*
1488                  *      Optimisation for the fact that under 0.01% of X
1489                  *      messages typically need breaking up.
1490                  */
1491
1492                 size = len-sent;
1493
1494                 /* Keep two messages in the pipe so it schedules better */
1495                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1496                         size = (sk->sk_sndbuf >> 1) - 64;
1497
1498                 if (size > SKB_MAX_ALLOC)
1499                         size = SKB_MAX_ALLOC;
1500
1501                 /*
1502                  *      Grab a buffer
1503                  */
1504
1505                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1506
1507                 if (skb==NULL)
1508                         goto out_err;
1509
1510                 /*
1511                  *      If you pass two values to the sock_alloc_send_skb
1512                  *      it tries to grab the large buffer with GFP_NOFS
1513                  *      (which can fail easily), and if it fails grab the
1514                  *      fallback size buffer which is under a page and will
1515                  *      succeed. [Alan]
1516                  */
1517                 size = min_t(int, size, skb_tailroom(skb));
1518
1519                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1520                 if (siocb->scm->fp)
1521                         unix_attach_fds(siocb->scm, skb);
1522
1523                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1524                         kfree_skb(skb);
1525                         goto out_err;
1526                 }
1527
1528                 unix_state_lock(other);
1529
1530                 if (sock_flag(other, SOCK_DEAD) ||
1531                     (other->sk_shutdown & RCV_SHUTDOWN))
1532                         goto pipe_err_free;
1533
1534                 skb_queue_tail(&other->sk_receive_queue, skb);
1535                 unix_state_unlock(other);
1536                 other->sk_data_ready(other, size);
1537                 sent+=size;
1538         }
1539
1540         scm_destroy(siocb->scm);
1541         siocb->scm = NULL;
1542
1543         return sent;
1544
1545 pipe_err_free:
1546         unix_state_unlock(other);
1547         kfree_skb(skb);
1548 pipe_err:
1549         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1550                 send_sig(SIGPIPE,current,0);
1551         err = -EPIPE;
1552 out_err:
1553         scm_destroy(siocb->scm);
1554         siocb->scm = NULL;
1555         return sent ? : err;
1556 }
1557
1558 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1559                                   struct msghdr *msg, size_t len)
1560 {
1561         int err;
1562         struct sock *sk = sock->sk;
1563
1564         err = sock_error(sk);
1565         if (err)
1566                 return err;
1567
1568         if (sk->sk_state != TCP_ESTABLISHED)
1569                 return -ENOTCONN;
1570
1571         if (msg->msg_namelen)
1572                 msg->msg_namelen = 0;
1573
1574         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1575 }
1576
1577 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1578 {
1579         struct unix_sock *u = unix_sk(sk);
1580
1581         msg->msg_namelen = 0;
1582         if (u->addr) {
1583                 msg->msg_namelen = u->addr->len;
1584                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1585         }
1586 }
1587
1588 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1589                               struct msghdr *msg, size_t size,
1590                               int flags)
1591 {
1592         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1593         struct scm_cookie tmp_scm;
1594         struct sock *sk = sock->sk;
1595         struct unix_sock *u = unix_sk(sk);
1596         int noblock = flags & MSG_DONTWAIT;
1597         struct sk_buff *skb;
1598         int err;
1599
1600         err = -EOPNOTSUPP;
1601         if (flags&MSG_OOB)
1602                 goto out;
1603
1604         msg->msg_namelen = 0;
1605
1606         mutex_lock(&u->readlock);
1607
1608         skb = skb_recv_datagram(sk, flags, noblock, &err);
1609         if (!skb)
1610                 goto out_unlock;
1611
1612         wake_up_interruptible(&u->peer_wait);
1613
1614         if (msg->msg_name)
1615                 unix_copy_addr(msg, skb->sk);
1616
1617         if (size > skb->len)
1618                 size = skb->len;
1619         else if (size < skb->len)
1620                 msg->msg_flags |= MSG_TRUNC;
1621
1622         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1623         if (err)
1624                 goto out_free;
1625
1626         if (!siocb->scm) {
1627                 siocb->scm = &tmp_scm;
1628                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1629         }
1630         siocb->scm->creds = *UNIXCREDS(skb);
1631         unix_set_secdata(siocb->scm, skb);
1632
1633         if (!(flags & MSG_PEEK))
1634         {
1635                 if (UNIXCB(skb).fp)
1636                         unix_detach_fds(siocb->scm, skb);
1637         }
1638         else
1639         {
1640                 /* It is questionable: on PEEK we could:
1641                    - do not return fds - good, but too simple 8)
1642                    - return fds, and do not return them on read (old strategy,
1643                      apparently wrong)
1644                    - clone fds (I chose it for now, it is the most universal
1645                      solution)
1646
1647                    POSIX 1003.1g does not actually define this clearly
1648                    at all. POSIX 1003.1g doesn't define a lot of things
1649                    clearly however!
1650
1651                 */
1652                 if (UNIXCB(skb).fp)
1653                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1654         }
1655         err = size;
1656
1657         scm_recv(sock, msg, siocb->scm, flags);
1658
1659 out_free:
1660         skb_free_datagram(sk,skb);
1661 out_unlock:
1662         mutex_unlock(&u->readlock);
1663 out:
1664         return err;
1665 }
1666
1667 /*
1668  *      Sleep until data has arrive. But check for races..
1669  */
1670
1671 static long unix_stream_data_wait(struct sock * sk, long timeo)
1672 {
1673         DEFINE_WAIT(wait);
1674
1675         unix_state_lock(sk);
1676
1677         for (;;) {
1678                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1679
1680                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1681                     sk->sk_err ||
1682                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1683                     signal_pending(current) ||
1684                     !timeo)
1685                         break;
1686
1687                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1688                 unix_state_unlock(sk);
1689                 timeo = schedule_timeout(timeo);
1690                 unix_state_lock(sk);
1691                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1692         }
1693
1694         finish_wait(sk->sk_sleep, &wait);
1695         unix_state_unlock(sk);
1696         return timeo;
1697 }
1698
1699
1700
1701 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1702                                struct msghdr *msg, size_t size,
1703                                int flags)
1704 {
1705         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1706         struct scm_cookie tmp_scm;
1707         struct sock *sk = sock->sk;
1708         struct unix_sock *u = unix_sk(sk);
1709         struct sockaddr_un *sunaddr=msg->msg_name;
1710         int copied = 0;
1711         int check_creds = 0;
1712         int target;
1713         int err = 0;
1714         long timeo;
1715
1716         err = -EINVAL;
1717         if (sk->sk_state != TCP_ESTABLISHED)
1718                 goto out;
1719
1720         err = -EOPNOTSUPP;
1721         if (flags&MSG_OOB)
1722                 goto out;
1723
1724         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1725         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1726
1727         msg->msg_namelen = 0;
1728
1729         /* Lock the socket to prevent queue disordering
1730          * while sleeps in memcpy_tomsg
1731          */
1732
1733         if (!siocb->scm) {
1734                 siocb->scm = &tmp_scm;
1735                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1736         }
1737
1738         mutex_lock(&u->readlock);
1739
1740         do
1741         {
1742                 int chunk;
1743                 struct sk_buff *skb;
1744
1745                 unix_state_lock(sk);
1746                 skb = skb_dequeue(&sk->sk_receive_queue);
1747                 if (skb==NULL)
1748                 {
1749                         if (copied >= target)
1750                                 goto unlock;
1751
1752                         /*
1753                          *      POSIX 1003.1g mandates this order.
1754                          */
1755
1756                         if ((err = sock_error(sk)) != 0)
1757                                 goto unlock;
1758                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1759                                 goto unlock;
1760
1761                         unix_state_unlock(sk);
1762                         err = -EAGAIN;
1763                         if (!timeo)
1764                                 break;
1765                         mutex_unlock(&u->readlock);
1766
1767                         timeo = unix_stream_data_wait(sk, timeo);
1768
1769                         if (signal_pending(current)) {
1770                                 err = sock_intr_errno(timeo);
1771                                 goto out;
1772                         }
1773                         mutex_lock(&u->readlock);
1774                         continue;
1775  unlock:
1776                         unix_state_unlock(sk);
1777                         break;
1778                 }
1779                 unix_state_unlock(sk);
1780
1781                 if (check_creds) {
1782                         /* Never glue messages from different writers */
1783                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1784                                 skb_queue_head(&sk->sk_receive_queue, skb);
1785                                 break;
1786                         }
1787                 } else {
1788                         /* Copy credentials */
1789                         siocb->scm->creds = *UNIXCREDS(skb);
1790                         check_creds = 1;
1791                 }
1792
1793                 /* Copy address just once */
1794                 if (sunaddr)
1795                 {
1796                         unix_copy_addr(msg, skb->sk);
1797                         sunaddr = NULL;
1798                 }
1799
1800                 chunk = min_t(unsigned int, skb->len, size);
1801                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1802                         skb_queue_head(&sk->sk_receive_queue, skb);
1803                         if (copied == 0)
1804                                 copied = -EFAULT;
1805                         break;
1806                 }
1807                 copied += chunk;
1808                 size -= chunk;
1809
1810                 /* Mark read part of skb as used */
1811                 if (!(flags & MSG_PEEK))
1812                 {
1813                         skb_pull(skb, chunk);
1814
1815                         if (UNIXCB(skb).fp)
1816                                 unix_detach_fds(siocb->scm, skb);
1817
1818                         /* put the skb back if we didn't use it up.. */
1819                         if (skb->len)
1820                         {
1821                                 skb_queue_head(&sk->sk_receive_queue, skb);
1822                                 break;
1823                         }
1824
1825                         kfree_skb(skb);
1826
1827                         if (siocb->scm->fp)
1828                                 break;
1829                 }
1830                 else
1831                 {
1832                         /* It is questionable, see note in unix_dgram_recvmsg.
1833                          */
1834                         if (UNIXCB(skb).fp)
1835                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1836
1837                         /* put message back and return */
1838                         skb_queue_head(&sk->sk_receive_queue, skb);
1839                         break;
1840                 }
1841         } while (size);
1842
1843         mutex_unlock(&u->readlock);
1844         scm_recv(sock, msg, siocb->scm, flags);
1845 out:
1846         return copied ? : err;
1847 }
1848
1849 static int unix_shutdown(struct socket *sock, int mode)
1850 {
1851         struct sock *sk = sock->sk;
1852         struct sock *other;
1853
1854         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1855
1856         if (mode) {
1857                 unix_state_lock(sk);
1858                 sk->sk_shutdown |= mode;
1859                 other=unix_peer(sk);
1860                 if (other)
1861                         sock_hold(other);
1862                 unix_state_unlock(sk);
1863                 sk->sk_state_change(sk);
1864
1865                 if (other &&
1866                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1867
1868                         int peer_mode = 0;
1869
1870                         if (mode&RCV_SHUTDOWN)
1871                                 peer_mode |= SEND_SHUTDOWN;
1872                         if (mode&SEND_SHUTDOWN)
1873                                 peer_mode |= RCV_SHUTDOWN;
1874                         unix_state_lock(other);
1875                         other->sk_shutdown |= peer_mode;
1876                         unix_state_unlock(other);
1877                         other->sk_state_change(other);
1878                         read_lock(&other->sk_callback_lock);
1879                         if (peer_mode == SHUTDOWN_MASK)
1880                                 sk_wake_async(other,1,POLL_HUP);
1881                         else if (peer_mode & RCV_SHUTDOWN)
1882                                 sk_wake_async(other,1,POLL_IN);
1883                         read_unlock(&other->sk_callback_lock);
1884                 }
1885                 if (other)
1886                         sock_put(other);
1887         }
1888         return 0;
1889 }
1890
1891 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1892 {
1893         struct sock *sk = sock->sk;
1894         long amount=0;
1895         int err;
1896
1897         switch(cmd)
1898         {
1899                 case SIOCOUTQ:
1900                         amount = atomic_read(&sk->sk_wmem_alloc);
1901                         err = put_user(amount, (int __user *)arg);
1902                         break;
1903                 case SIOCINQ:
1904                 {
1905                         struct sk_buff *skb;
1906
1907                         if (sk->sk_state == TCP_LISTEN) {
1908                                 err = -EINVAL;
1909                                 break;
1910                         }
1911
1912                         spin_lock(&sk->sk_receive_queue.lock);
1913                         if (sk->sk_type == SOCK_STREAM ||
1914                             sk->sk_type == SOCK_SEQPACKET) {
1915                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1916                                         amount += skb->len;
1917                         } else {
1918                                 skb = skb_peek(&sk->sk_receive_queue);
1919                                 if (skb)
1920                                         amount=skb->len;
1921                         }
1922                         spin_unlock(&sk->sk_receive_queue.lock);
1923                         err = put_user(amount, (int __user *)arg);
1924                         break;
1925                 }
1926
1927                 default:
1928                         err = -ENOIOCTLCMD;
1929                         break;
1930         }
1931         return err;
1932 }
1933
1934 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1935 {
1936         struct sock *sk = sock->sk;
1937         unsigned int mask;
1938
1939         poll_wait(file, sk->sk_sleep, wait);
1940         mask = 0;
1941
1942         /* exceptional events? */
1943         if (sk->sk_err)
1944                 mask |= POLLERR;
1945         if (sk->sk_shutdown == SHUTDOWN_MASK)
1946                 mask |= POLLHUP;
1947         if (sk->sk_shutdown & RCV_SHUTDOWN)
1948                 mask |= POLLRDHUP;
1949
1950         /* readable? */
1951         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1952             (sk->sk_shutdown & RCV_SHUTDOWN))
1953                 mask |= POLLIN | POLLRDNORM;
1954
1955         /* Connection-based need to check for termination and startup */
1956         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1957                 mask |= POLLHUP;
1958
1959         /*
1960          * we set writable also when the other side has shut down the
1961          * connection. This prevents stuck sockets.
1962          */
1963         if (unix_writable(sk))
1964                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1965
1966         return mask;
1967 }
1968
1969
1970 #ifdef CONFIG_PROC_FS
1971 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1972 {
1973         loff_t off = 0;
1974         struct sock *s;
1975
1976         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1977                 if (off == pos)
1978                         return s;
1979                 ++off;
1980         }
1981         return NULL;
1982 }
1983
1984
1985 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1986 {
1987         spin_lock(&unix_table_lock);
1988         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1989 }
1990
1991 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1992 {
1993         ++*pos;
1994
1995         if (v == (void *)1)
1996                 return first_unix_socket(seq->private);
1997         return next_unix_socket(seq->private, v);
1998 }
1999
2000 static void unix_seq_stop(struct seq_file *seq, void *v)
2001 {
2002         spin_unlock(&unix_table_lock);
2003 }
2004
2005 static int unix_seq_show(struct seq_file *seq, void *v)
2006 {
2007
2008         if (v == (void *)1)
2009                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2010                          "Inode Path\n");
2011         else {
2012                 struct sock *s = v;
2013                 struct unix_sock *u = unix_sk(s);
2014                 unix_state_lock(s);
2015
2016                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2017                         s,
2018                         atomic_read(&s->sk_refcnt),
2019                         0,
2020                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2021                         s->sk_type,
2022                         s->sk_socket ?
2023                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2024                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2025                         sock_i_ino(s));
2026
2027                 if (u->addr) {
2028                         int i, len;
2029                         seq_putc(seq, ' ');
2030
2031                         i = 0;
2032                         len = u->addr->len - sizeof(short);
2033                         if (!UNIX_ABSTRACT(s))
2034                                 len--;
2035                         else {
2036                                 seq_putc(seq, '@');
2037                                 i++;
2038                         }
2039                         for ( ; i < len; i++)
2040                                 seq_putc(seq, u->addr->name->sun_path[i]);
2041                 }
2042                 unix_state_unlock(s);
2043                 seq_putc(seq, '\n');
2044         }
2045
2046         return 0;
2047 }
2048
2049 static const struct seq_operations unix_seq_ops = {
2050         .start  = unix_seq_start,
2051         .next   = unix_seq_next,
2052         .stop   = unix_seq_stop,
2053         .show   = unix_seq_show,
2054 };
2055
2056
2057 static int unix_seq_open(struct inode *inode, struct file *file)
2058 {
2059         struct seq_file *seq;
2060         int rc = -ENOMEM;
2061         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2062
2063         if (!iter)
2064                 goto out;
2065
2066         rc = seq_open(file, &unix_seq_ops);
2067         if (rc)
2068                 goto out_kfree;
2069
2070         seq          = file->private_data;
2071         seq->private = iter;
2072         *iter = 0;
2073 out:
2074         return rc;
2075 out_kfree:
2076         kfree(iter);
2077         goto out;
2078 }
2079
2080 static const struct file_operations unix_seq_fops = {
2081         .owner          = THIS_MODULE,
2082         .open           = unix_seq_open,
2083         .read           = seq_read,
2084         .llseek         = seq_lseek,
2085         .release        = seq_release_private,
2086 };
2087
2088 #endif
2089
2090 static struct net_proto_family unix_family_ops = {
2091         .family = PF_UNIX,
2092         .create = unix_create,
2093         .owner  = THIS_MODULE,
2094 };
2095
2096 static int __init af_unix_init(void)
2097 {
2098         int rc = -1;
2099         struct sk_buff *dummy_skb;
2100
2101         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2102
2103         rc = proto_register(&unix_proto, 1);
2104         if (rc != 0) {
2105                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2106                        __FUNCTION__);
2107                 goto out;
2108         }
2109
2110         sock_register(&unix_family_ops);
2111 #ifdef CONFIG_PROC_FS
2112         proc_net_fops_create("unix", 0, &unix_seq_fops);
2113 #endif
2114         unix_sysctl_register();
2115 out:
2116         return rc;
2117 }
2118
2119 static void __exit af_unix_exit(void)
2120 {
2121         sock_unregister(PF_UNIX);
2122         unix_sysctl_unregister();
2123         proc_net_remove("unix");
2124         proto_unregister(&unix_proto);
2125 }
2126
2127 module_init(af_unix_init);
2128 module_exit(af_unix_exit);
2129
2130 MODULE_LICENSE("GPL");
2131 MODULE_ALIAS_NETPROTO(PF_UNIX);