net: Make sure BHs are disabled in sock_prot_inuse_add()
[linux-2.6] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *              Linus Torvalds  :       Assorted bug cures.
13  *              Niibe Yutaka    :       async I/O support.
14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
15  *              Alan Cox        :       Limit size of allocated blocks.
16  *              Alan Cox        :       Fixed the stupid socketpair bug.
17  *              Alan Cox        :       BSD compatibility fine tuning.
18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
19  *              Alan Cox        :       Sorted out a proper draft version of
20  *                                      file descriptor passing hacked up from
21  *                                      Mike Shaver's work.
22  *              Marty Leisner   :       Fixes to fd passing
23  *              Nick Nevin      :       recvmsg bugfix.
24  *              Alan Cox        :       Started proper garbage collector
25  *              Heiko EiBfeldt  :       Missing verify_area check
26  *              Alan Cox        :       Started POSIXisms
27  *              Andreas Schwab  :       Replace inode by dentry for proper
28  *                                      reference counting
29  *              Kirk Petersen   :       Made this a module
30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
31  *                                      Lots of bug fixes.
32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
33  *                                      by above two patches.
34  *           Andrea Arcangeli   :       If possible we block in connect(2)
35  *                                      if the max backlog of the listen socket
36  *                                      is been reached. This won't break
37  *                                      old apps and it will avoid huge amount
38  *                                      of socks hashed (this for unix_gc()
39  *                                      performances reasons).
40  *                                      Security fix that limits the max
41  *                                      number of socks to 2*max_files and
42  *                                      the number of skb queueable in the
43  *                                      dgram receiver.
44  *              Artur Skawina   :       Hash function optimizations
45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
46  *            Malcolm Beattie   :       Set peercred for socketpair
47  *           Michal Ostrowski   :       Module initialization cleanup.
48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
49  *                                      the core infrastructure is doing that
50  *                                      for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *      [TO FIX]
56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
57  *              other the moment one end closes.
58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *      [NOT TO FIX]
61  *      accept() returns a path name even if the connecting socket has closed
62  *              in the meantime (BSD loses the path and gives up).
63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *      BSD af_unix apparently has connect forgetting to block properly.
67  *              (need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *      Bug fixes and improvements.
71  *              - client shutdown killed server socket.
72  *              - removed all useless cli/sti pairs.
73  *
74  *      Semantic changes/extensions.
75  *              - generic control message passing.
76  *              - SCM_CREDENTIALS control message.
77  *              - "Abstract" (not FS based) socket bindings.
78  *                Abstract names are sequences of bytes (not zero terminated)
79  *                started by 0, so that this name space does not intersect
80  *                with BSD names.
81  */
82
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121
122 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
123
124 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134         scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate rwlock.
148  */
149
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152         unsigned hash = (__force unsigned)n;
153         hash ^= hash>>16;
154         hash ^= hash>>8;
155         return hash&(UNIX_HASH_SIZE-1);
156 }
157
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162         return unix_peer(osk) == sk;
163 }
164
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177         struct sock *peer;
178
179         unix_state_lock(s);
180         peer = unix_peer(s);
181         if (peer)
182                 sock_hold(peer);
183         unix_state_unlock(s);
184         return peer;
185 }
186
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189         if (atomic_dec_and_test(&addr->refcnt))
190                 kfree(addr);
191 }
192
193 /*
194  *      Check unix socket name:
195  *              - should be not zero length.
196  *              - if started by not zero, should be NULL terminated (FS object)
197  *              - if started by zero, it is abstract name.
198  */
199
200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202         if (len <= sizeof(short) || len > sizeof(*sunaddr))
203                 return -EINVAL;
204         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205                 return -EINVAL;
206         if (sunaddr->sun_path[0]) {
207                 /*
208                  * This may look like an off by one error but it is a bit more
209                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
210                  * sun_path[108] doesnt as such exist.  However in kernel space
211                  * we are guaranteed that it is a valid memory location in our
212                  * kernel address buffer.
213                  */
214                 ((char *)sunaddr)[len] = 0;
215                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
216                 return len;
217         }
218
219         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220         return len;
221 }
222
223 static void __unix_remove_socket(struct sock *sk)
224 {
225         sk_del_node_init(sk);
226 }
227
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230         WARN_ON(!sk_unhashed(sk));
231         sk_add_node(sk, list);
232 }
233
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236         spin_lock(&unix_table_lock);
237         __unix_remove_socket(sk);
238         spin_unlock(&unix_table_lock);
239 }
240
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243         spin_lock(&unix_table_lock);
244         __unix_insert_socket(list, sk);
245         spin_unlock(&unix_table_lock);
246 }
247
248 static struct sock *__unix_find_socket_byname(struct net *net,
249                                               struct sockaddr_un *sunname,
250                                               int len, int type, unsigned hash)
251 {
252         struct sock *s;
253         struct hlist_node *node;
254
255         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256                 struct unix_sock *u = unix_sk(s);
257
258                 if (!net_eq(sock_net(s), net))
259                         continue;
260
261                 if (u->addr->len == len &&
262                     !memcmp(u->addr->name, sunname, len))
263                         goto found;
264         }
265         s = NULL;
266 found:
267         return s;
268 }
269
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271                                                    struct sockaddr_un *sunname,
272                                                    int len, int type,
273                                                    unsigned hash)
274 {
275         struct sock *s;
276
277         spin_lock(&unix_table_lock);
278         s = __unix_find_socket_byname(net, sunname, len, type, hash);
279         if (s)
280                 sock_hold(s);
281         spin_unlock(&unix_table_lock);
282         return s;
283 }
284
285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287         struct sock *s;
288         struct hlist_node *node;
289
290         spin_lock(&unix_table_lock);
291         sk_for_each(s, node,
292                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293                 struct dentry *dentry = unix_sk(s)->dentry;
294
295                 if (!net_eq(sock_net(s), net))
296                         continue;
297
298                 if (dentry && dentry->d_inode == i) {
299                         sock_hold(s);
300                         goto found;
301                 }
302         }
303         s = NULL;
304 found:
305         spin_unlock(&unix_table_lock);
306         return s;
307 }
308
309 static inline int unix_writable(struct sock *sk)
310 {
311         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313
314 static void unix_write_space(struct sock *sk)
315 {
316         read_lock(&sk->sk_callback_lock);
317         if (unix_writable(sk)) {
318                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
319                         wake_up_interruptible_sync(sk->sk_sleep);
320                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321         }
322         read_unlock(&sk->sk_callback_lock);
323 }
324
325 /* When dgram socket disconnects (or changes its peer), we clear its receive
326  * queue of packets arrived from previous peer. First, it allows to do
327  * flow control based only on wmem_alloc; second, sk connected to peer
328  * may receive messages only from that peer. */
329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330 {
331         if (!skb_queue_empty(&sk->sk_receive_queue)) {
332                 skb_queue_purge(&sk->sk_receive_queue);
333                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334
335                 /* If one link of bidirectional dgram pipe is disconnected,
336                  * we signal error. Messages are lost. Do not make this,
337                  * when peer was not connected to us.
338                  */
339                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340                         other->sk_err = ECONNRESET;
341                         other->sk_error_report(other);
342                 }
343         }
344 }
345
346 static void unix_sock_destructor(struct sock *sk)
347 {
348         struct unix_sock *u = unix_sk(sk);
349
350         skb_queue_purge(&sk->sk_receive_queue);
351
352         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353         WARN_ON(!sk_unhashed(sk));
354         WARN_ON(sk->sk_socket);
355         if (!sock_flag(sk, SOCK_DEAD)) {
356                 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357                 return;
358         }
359
360         if (u->addr)
361                 unix_release_addr(u->addr);
362
363         atomic_dec(&unix_nr_socks);
364         local_bh_disable();
365         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366         local_bh_enable();
367 #ifdef UNIX_REFCNT_DEBUG
368         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369                 atomic_read(&unix_nr_socks));
370 #endif
371 }
372
373 static int unix_release_sock(struct sock *sk, int embrion)
374 {
375         struct unix_sock *u = unix_sk(sk);
376         struct dentry *dentry;
377         struct vfsmount *mnt;
378         struct sock *skpair;
379         struct sk_buff *skb;
380         int state;
381
382         unix_remove_socket(sk);
383
384         /* Clear state */
385         unix_state_lock(sk);
386         sock_orphan(sk);
387         sk->sk_shutdown = SHUTDOWN_MASK;
388         dentry       = u->dentry;
389         u->dentry    = NULL;
390         mnt          = u->mnt;
391         u->mnt       = NULL;
392         state = sk->sk_state;
393         sk->sk_state = TCP_CLOSE;
394         unix_state_unlock(sk);
395
396         wake_up_interruptible_all(&u->peer_wait);
397
398         skpair = unix_peer(sk);
399
400         if (skpair != NULL) {
401                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402                         unix_state_lock(skpair);
403                         /* No more writes */
404                         skpair->sk_shutdown = SHUTDOWN_MASK;
405                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406                                 skpair->sk_err = ECONNRESET;
407                         unix_state_unlock(skpair);
408                         skpair->sk_state_change(skpair);
409                         read_lock(&skpair->sk_callback_lock);
410                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411                         read_unlock(&skpair->sk_callback_lock);
412                 }
413                 sock_put(skpair); /* It may now die */
414                 unix_peer(sk) = NULL;
415         }
416
417         /* Try to flush out this socket. Throw out buffers at least */
418
419         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420                 if (state == TCP_LISTEN)
421                         unix_release_sock(skb->sk, 1);
422                 /* passed fds are erased in the kfree_skb hook        */
423                 kfree_skb(skb);
424         }
425
426         if (dentry) {
427                 dput(dentry);
428                 mntput(mnt);
429         }
430
431         sock_put(sk);
432
433         /* ---- Socket is dead now and most probably destroyed ---- */
434
435         /*
436          * Fixme: BSD difference: In BSD all sockets connected to use get
437          *        ECONNRESET and we die on the spot. In Linux we behave
438          *        like files and pipes do and wait for the last
439          *        dereference.
440          *
441          * Can't we simply set sock->err?
442          *
443          *        What the above comment does talk about? --ANK(980817)
444          */
445
446         if (unix_tot_inflight)
447                 unix_gc();              /* Garbage collect fds */
448
449         return 0;
450 }
451
452 static int unix_listen(struct socket *sock, int backlog)
453 {
454         int err;
455         struct sock *sk = sock->sk;
456         struct unix_sock *u = unix_sk(sk);
457
458         err = -EOPNOTSUPP;
459         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460                 goto out;       /* Only stream/seqpacket sockets accept */
461         err = -EINVAL;
462         if (!u->addr)
463                 goto out;       /* No listens on an unbound socket */
464         unix_state_lock(sk);
465         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466                 goto out_unlock;
467         if (backlog > sk->sk_max_ack_backlog)
468                 wake_up_interruptible_all(&u->peer_wait);
469         sk->sk_max_ack_backlog  = backlog;
470         sk->sk_state            = TCP_LISTEN;
471         /* set credentials so connect can copy them */
472         sk->sk_peercred.pid     = task_tgid_vnr(current);
473         sk->sk_peercred.uid     = current->euid;
474         sk->sk_peercred.gid     = current->egid;
475         err = 0;
476
477 out_unlock:
478         unix_state_unlock(sk);
479 out:
480         return err;
481 }
482
483 static int unix_release(struct socket *);
484 static int unix_bind(struct socket *, struct sockaddr *, int);
485 static int unix_stream_connect(struct socket *, struct sockaddr *,
486                                int addr_len, int flags);
487 static int unix_socketpair(struct socket *, struct socket *);
488 static int unix_accept(struct socket *, struct socket *, int);
489 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
490 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
491 static unsigned int unix_dgram_poll(struct file *, struct socket *,
492                                     poll_table *);
493 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
494 static int unix_shutdown(struct socket *, int);
495 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
496                                struct msghdr *, size_t);
497 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
498                                struct msghdr *, size_t, int);
499 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
500                               struct msghdr *, size_t);
501 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
502                               struct msghdr *, size_t, int);
503 static int unix_dgram_connect(struct socket *, struct sockaddr *,
504                               int, int);
505 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
506                                   struct msghdr *, size_t);
507
508 static const struct proto_ops unix_stream_ops = {
509         .family =       PF_UNIX,
510         .owner =        THIS_MODULE,
511         .release =      unix_release,
512         .bind =         unix_bind,
513         .connect =      unix_stream_connect,
514         .socketpair =   unix_socketpair,
515         .accept =       unix_accept,
516         .getname =      unix_getname,
517         .poll =         unix_poll,
518         .ioctl =        unix_ioctl,
519         .listen =       unix_listen,
520         .shutdown =     unix_shutdown,
521         .setsockopt =   sock_no_setsockopt,
522         .getsockopt =   sock_no_getsockopt,
523         .sendmsg =      unix_stream_sendmsg,
524         .recvmsg =      unix_stream_recvmsg,
525         .mmap =         sock_no_mmap,
526         .sendpage =     sock_no_sendpage,
527 };
528
529 static const struct proto_ops unix_dgram_ops = {
530         .family =       PF_UNIX,
531         .owner =        THIS_MODULE,
532         .release =      unix_release,
533         .bind =         unix_bind,
534         .connect =      unix_dgram_connect,
535         .socketpair =   unix_socketpair,
536         .accept =       sock_no_accept,
537         .getname =      unix_getname,
538         .poll =         unix_dgram_poll,
539         .ioctl =        unix_ioctl,
540         .listen =       sock_no_listen,
541         .shutdown =     unix_shutdown,
542         .setsockopt =   sock_no_setsockopt,
543         .getsockopt =   sock_no_getsockopt,
544         .sendmsg =      unix_dgram_sendmsg,
545         .recvmsg =      unix_dgram_recvmsg,
546         .mmap =         sock_no_mmap,
547         .sendpage =     sock_no_sendpage,
548 };
549
550 static const struct proto_ops unix_seqpacket_ops = {
551         .family =       PF_UNIX,
552         .owner =        THIS_MODULE,
553         .release =      unix_release,
554         .bind =         unix_bind,
555         .connect =      unix_stream_connect,
556         .socketpair =   unix_socketpair,
557         .accept =       unix_accept,
558         .getname =      unix_getname,
559         .poll =         unix_dgram_poll,
560         .ioctl =        unix_ioctl,
561         .listen =       unix_listen,
562         .shutdown =     unix_shutdown,
563         .setsockopt =   sock_no_setsockopt,
564         .getsockopt =   sock_no_getsockopt,
565         .sendmsg =      unix_seqpacket_sendmsg,
566         .recvmsg =      unix_dgram_recvmsg,
567         .mmap =         sock_no_mmap,
568         .sendpage =     sock_no_sendpage,
569 };
570
571 static struct proto unix_proto = {
572         .name                   = "UNIX",
573         .owner                  = THIS_MODULE,
574         .sockets_allocated      = &unix_nr_socks,
575         .obj_size               = sizeof(struct unix_sock),
576 };
577
578 /*
579  * AF_UNIX sockets do not interact with hardware, hence they
580  * dont trigger interrupts - so it's safe for them to have
581  * bh-unsafe locking for their sk_receive_queue.lock. Split off
582  * this special lock-class by reinitializing the spinlock key:
583  */
584 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
585
586 static struct sock *unix_create1(struct net *net, struct socket *sock)
587 {
588         struct sock *sk = NULL;
589         struct unix_sock *u;
590
591         atomic_inc(&unix_nr_socks);
592         if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
593                 goto out;
594
595         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
596         if (!sk)
597                 goto out;
598
599         sock_init_data(sock, sk);
600         lockdep_set_class(&sk->sk_receive_queue.lock,
601                                 &af_unix_sk_receive_queue_lock_key);
602
603         sk->sk_write_space      = unix_write_space;
604         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
605         sk->sk_destruct         = unix_sock_destructor;
606         u         = unix_sk(sk);
607         u->dentry = NULL;
608         u->mnt    = NULL;
609         spin_lock_init(&u->lock);
610         atomic_long_set(&u->inflight, 0);
611         INIT_LIST_HEAD(&u->link);
612         mutex_init(&u->readlock); /* single task reading lock */
613         init_waitqueue_head(&u->peer_wait);
614         unix_insert_socket(unix_sockets_unbound, sk);
615 out:
616         if (sk == NULL)
617                 atomic_dec(&unix_nr_socks);
618         else
619                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
620
621         return sk;
622 }
623
624 static int unix_create(struct net *net, struct socket *sock, int protocol)
625 {
626         if (protocol && protocol != PF_UNIX)
627                 return -EPROTONOSUPPORT;
628
629         sock->state = SS_UNCONNECTED;
630
631         switch (sock->type) {
632         case SOCK_STREAM:
633                 sock->ops = &unix_stream_ops;
634                 break;
635                 /*
636                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
637                  *      nothing uses it.
638                  */
639         case SOCK_RAW:
640                 sock->type = SOCK_DGRAM;
641         case SOCK_DGRAM:
642                 sock->ops = &unix_dgram_ops;
643                 break;
644         case SOCK_SEQPACKET:
645                 sock->ops = &unix_seqpacket_ops;
646                 break;
647         default:
648                 return -ESOCKTNOSUPPORT;
649         }
650
651         return unix_create1(net, sock) ? 0 : -ENOMEM;
652 }
653
654 static int unix_release(struct socket *sock)
655 {
656         struct sock *sk = sock->sk;
657
658         if (!sk)
659                 return 0;
660
661         sock->sk = NULL;
662
663         return unix_release_sock(sk, 0);
664 }
665
666 static int unix_autobind(struct socket *sock)
667 {
668         struct sock *sk = sock->sk;
669         struct net *net = sock_net(sk);
670         struct unix_sock *u = unix_sk(sk);
671         static u32 ordernum = 1;
672         struct unix_address *addr;
673         int err;
674
675         mutex_lock(&u->readlock);
676
677         err = 0;
678         if (u->addr)
679                 goto out;
680
681         err = -ENOMEM;
682         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
683         if (!addr)
684                 goto out;
685
686         addr->name->sun_family = AF_UNIX;
687         atomic_set(&addr->refcnt, 1);
688
689 retry:
690         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
691         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
692
693         spin_lock(&unix_table_lock);
694         ordernum = (ordernum+1)&0xFFFFF;
695
696         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
697                                       addr->hash)) {
698                 spin_unlock(&unix_table_lock);
699                 /* Sanity yield. It is unusual case, but yet... */
700                 if (!(ordernum&0xFF))
701                         yield();
702                 goto retry;
703         }
704         addr->hash ^= sk->sk_type;
705
706         __unix_remove_socket(sk);
707         u->addr = addr;
708         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
709         spin_unlock(&unix_table_lock);
710         err = 0;
711
712 out:    mutex_unlock(&u->readlock);
713         return err;
714 }
715
716 static struct sock *unix_find_other(struct net *net,
717                                     struct sockaddr_un *sunname, int len,
718                                     int type, unsigned hash, int *error)
719 {
720         struct sock *u;
721         struct path path;
722         int err = 0;
723
724         if (sunname->sun_path[0]) {
725                 struct inode *inode;
726                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
727                 if (err)
728                         goto fail;
729                 inode = path.dentry->d_inode;
730                 err = inode_permission(inode, MAY_WRITE);
731                 if (err)
732                         goto put_fail;
733
734                 err = -ECONNREFUSED;
735                 if (!S_ISSOCK(inode->i_mode))
736                         goto put_fail;
737                 u = unix_find_socket_byinode(net, inode);
738                 if (!u)
739                         goto put_fail;
740
741                 if (u->sk_type == type)
742                         touch_atime(path.mnt, path.dentry);
743
744                 path_put(&path);
745
746                 err = -EPROTOTYPE;
747                 if (u->sk_type != type) {
748                         sock_put(u);
749                         goto fail;
750                 }
751         } else {
752                 err = -ECONNREFUSED;
753                 u = unix_find_socket_byname(net, sunname, len, type, hash);
754                 if (u) {
755                         struct dentry *dentry;
756                         dentry = unix_sk(u)->dentry;
757                         if (dentry)
758                                 touch_atime(unix_sk(u)->mnt, dentry);
759                 } else
760                         goto fail;
761         }
762         return u;
763
764 put_fail:
765         path_put(&path);
766 fail:
767         *error = err;
768         return NULL;
769 }
770
771
772 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
773 {
774         struct sock *sk = sock->sk;
775         struct net *net = sock_net(sk);
776         struct unix_sock *u = unix_sk(sk);
777         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
778         struct dentry *dentry = NULL;
779         struct nameidata nd;
780         int err;
781         unsigned hash;
782         struct unix_address *addr;
783         struct hlist_head *list;
784
785         err = -EINVAL;
786         if (sunaddr->sun_family != AF_UNIX)
787                 goto out;
788
789         if (addr_len == sizeof(short)) {
790                 err = unix_autobind(sock);
791                 goto out;
792         }
793
794         err = unix_mkname(sunaddr, addr_len, &hash);
795         if (err < 0)
796                 goto out;
797         addr_len = err;
798
799         mutex_lock(&u->readlock);
800
801         err = -EINVAL;
802         if (u->addr)
803                 goto out_up;
804
805         err = -ENOMEM;
806         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
807         if (!addr)
808                 goto out_up;
809
810         memcpy(addr->name, sunaddr, addr_len);
811         addr->len = addr_len;
812         addr->hash = hash ^ sk->sk_type;
813         atomic_set(&addr->refcnt, 1);
814
815         if (sunaddr->sun_path[0]) {
816                 unsigned int mode;
817                 err = 0;
818                 /*
819                  * Get the parent directory, calculate the hash for last
820                  * component.
821                  */
822                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
823                 if (err)
824                         goto out_mknod_parent;
825
826                 dentry = lookup_create(&nd, 0);
827                 err = PTR_ERR(dentry);
828                 if (IS_ERR(dentry))
829                         goto out_mknod_unlock;
830
831                 /*
832                  * All right, let's create it.
833                  */
834                 mode = S_IFSOCK |
835                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
836                 err = mnt_want_write(nd.path.mnt);
837                 if (err)
838                         goto out_mknod_dput;
839                 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
840                 mnt_drop_write(nd.path.mnt);
841                 if (err)
842                         goto out_mknod_dput;
843                 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
844                 dput(nd.path.dentry);
845                 nd.path.dentry = dentry;
846
847                 addr->hash = UNIX_HASH_SIZE;
848         }
849
850         spin_lock(&unix_table_lock);
851
852         if (!sunaddr->sun_path[0]) {
853                 err = -EADDRINUSE;
854                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
855                                               sk->sk_type, hash)) {
856                         unix_release_addr(addr);
857                         goto out_unlock;
858                 }
859
860                 list = &unix_socket_table[addr->hash];
861         } else {
862                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
863                 u->dentry = nd.path.dentry;
864                 u->mnt    = nd.path.mnt;
865         }
866
867         err = 0;
868         __unix_remove_socket(sk);
869         u->addr = addr;
870         __unix_insert_socket(list, sk);
871
872 out_unlock:
873         spin_unlock(&unix_table_lock);
874 out_up:
875         mutex_unlock(&u->readlock);
876 out:
877         return err;
878
879 out_mknod_dput:
880         dput(dentry);
881 out_mknod_unlock:
882         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
883         path_put(&nd.path);
884 out_mknod_parent:
885         if (err == -EEXIST)
886                 err = -EADDRINUSE;
887         unix_release_addr(addr);
888         goto out_up;
889 }
890
891 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
892 {
893         if (unlikely(sk1 == sk2) || !sk2) {
894                 unix_state_lock(sk1);
895                 return;
896         }
897         if (sk1 < sk2) {
898                 unix_state_lock(sk1);
899                 unix_state_lock_nested(sk2);
900         } else {
901                 unix_state_lock(sk2);
902                 unix_state_lock_nested(sk1);
903         }
904 }
905
906 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
907 {
908         if (unlikely(sk1 == sk2) || !sk2) {
909                 unix_state_unlock(sk1);
910                 return;
911         }
912         unix_state_unlock(sk1);
913         unix_state_unlock(sk2);
914 }
915
916 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
917                               int alen, int flags)
918 {
919         struct sock *sk = sock->sk;
920         struct net *net = sock_net(sk);
921         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
922         struct sock *other;
923         unsigned hash;
924         int err;
925
926         if (addr->sa_family != AF_UNSPEC) {
927                 err = unix_mkname(sunaddr, alen, &hash);
928                 if (err < 0)
929                         goto out;
930                 alen = err;
931
932                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
933                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
934                         goto out;
935
936 restart:
937                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
938                 if (!other)
939                         goto out;
940
941                 unix_state_double_lock(sk, other);
942
943                 /* Apparently VFS overslept socket death. Retry. */
944                 if (sock_flag(other, SOCK_DEAD)) {
945                         unix_state_double_unlock(sk, other);
946                         sock_put(other);
947                         goto restart;
948                 }
949
950                 err = -EPERM;
951                 if (!unix_may_send(sk, other))
952                         goto out_unlock;
953
954                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
955                 if (err)
956                         goto out_unlock;
957
958         } else {
959                 /*
960                  *      1003.1g breaking connected state with AF_UNSPEC
961                  */
962                 other = NULL;
963                 unix_state_double_lock(sk, other);
964         }
965
966         /*
967          * If it was connected, reconnect.
968          */
969         if (unix_peer(sk)) {
970                 struct sock *old_peer = unix_peer(sk);
971                 unix_peer(sk) = other;
972                 unix_state_double_unlock(sk, other);
973
974                 if (other != old_peer)
975                         unix_dgram_disconnected(sk, old_peer);
976                 sock_put(old_peer);
977         } else {
978                 unix_peer(sk) = other;
979                 unix_state_double_unlock(sk, other);
980         }
981         return 0;
982
983 out_unlock:
984         unix_state_double_unlock(sk, other);
985         sock_put(other);
986 out:
987         return err;
988 }
989
990 static long unix_wait_for_peer(struct sock *other, long timeo)
991 {
992         struct unix_sock *u = unix_sk(other);
993         int sched;
994         DEFINE_WAIT(wait);
995
996         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
997
998         sched = !sock_flag(other, SOCK_DEAD) &&
999                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1000                 unix_recvq_full(other);
1001
1002         unix_state_unlock(other);
1003
1004         if (sched)
1005                 timeo = schedule_timeout(timeo);
1006
1007         finish_wait(&u->peer_wait, &wait);
1008         return timeo;
1009 }
1010
1011 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1012                                int addr_len, int flags)
1013 {
1014         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1015         struct sock *sk = sock->sk;
1016         struct net *net = sock_net(sk);
1017         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1018         struct sock *newsk = NULL;
1019         struct sock *other = NULL;
1020         struct sk_buff *skb = NULL;
1021         unsigned hash;
1022         int st;
1023         int err;
1024         long timeo;
1025
1026         err = unix_mkname(sunaddr, addr_len, &hash);
1027         if (err < 0)
1028                 goto out;
1029         addr_len = err;
1030
1031         if (test_bit(SOCK_PASSCRED, &sock->flags)
1032                 && !u->addr && (err = unix_autobind(sock)) != 0)
1033                 goto out;
1034
1035         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1036
1037         /* First of all allocate resources.
1038            If we will make it after state is locked,
1039            we will have to recheck all again in any case.
1040          */
1041
1042         err = -ENOMEM;
1043
1044         /* create new sock for complete connection */
1045         newsk = unix_create1(sock_net(sk), NULL);
1046         if (newsk == NULL)
1047                 goto out;
1048
1049         /* Allocate skb for sending to listening sock */
1050         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1051         if (skb == NULL)
1052                 goto out;
1053
1054 restart:
1055         /*  Find listening sock. */
1056         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1057         if (!other)
1058                 goto out;
1059
1060         /* Latch state of peer */
1061         unix_state_lock(other);
1062
1063         /* Apparently VFS overslept socket death. Retry. */
1064         if (sock_flag(other, SOCK_DEAD)) {
1065                 unix_state_unlock(other);
1066                 sock_put(other);
1067                 goto restart;
1068         }
1069
1070         err = -ECONNREFUSED;
1071         if (other->sk_state != TCP_LISTEN)
1072                 goto out_unlock;
1073
1074         if (unix_recvq_full(other)) {
1075                 err = -EAGAIN;
1076                 if (!timeo)
1077                         goto out_unlock;
1078
1079                 timeo = unix_wait_for_peer(other, timeo);
1080
1081                 err = sock_intr_errno(timeo);
1082                 if (signal_pending(current))
1083                         goto out;
1084                 sock_put(other);
1085                 goto restart;
1086         }
1087
1088         /* Latch our state.
1089
1090            It is tricky place. We need to grab write lock and cannot
1091            drop lock on peer. It is dangerous because deadlock is
1092            possible. Connect to self case and simultaneous
1093            attempt to connect are eliminated by checking socket
1094            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1095            check this before attempt to grab lock.
1096
1097            Well, and we have to recheck the state after socket locked.
1098          */
1099         st = sk->sk_state;
1100
1101         switch (st) {
1102         case TCP_CLOSE:
1103                 /* This is ok... continue with connect */
1104                 break;
1105         case TCP_ESTABLISHED:
1106                 /* Socket is already connected */
1107                 err = -EISCONN;
1108                 goto out_unlock;
1109         default:
1110                 err = -EINVAL;
1111                 goto out_unlock;
1112         }
1113
1114         unix_state_lock_nested(sk);
1115
1116         if (sk->sk_state != st) {
1117                 unix_state_unlock(sk);
1118                 unix_state_unlock(other);
1119                 sock_put(other);
1120                 goto restart;
1121         }
1122
1123         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1124         if (err) {
1125                 unix_state_unlock(sk);
1126                 goto out_unlock;
1127         }
1128
1129         /* The way is open! Fastly set all the necessary fields... */
1130
1131         sock_hold(sk);
1132         unix_peer(newsk)        = sk;
1133         newsk->sk_state         = TCP_ESTABLISHED;
1134         newsk->sk_type          = sk->sk_type;
1135         newsk->sk_peercred.pid  = task_tgid_vnr(current);
1136         newsk->sk_peercred.uid  = current->euid;
1137         newsk->sk_peercred.gid  = current->egid;
1138         newu = unix_sk(newsk);
1139         newsk->sk_sleep         = &newu->peer_wait;
1140         otheru = unix_sk(other);
1141
1142         /* copy address information from listening to new sock*/
1143         if (otheru->addr) {
1144                 atomic_inc(&otheru->addr->refcnt);
1145                 newu->addr = otheru->addr;
1146         }
1147         if (otheru->dentry) {
1148                 newu->dentry    = dget(otheru->dentry);
1149                 newu->mnt       = mntget(otheru->mnt);
1150         }
1151
1152         /* Set credentials */
1153         sk->sk_peercred = other->sk_peercred;
1154
1155         sock->state     = SS_CONNECTED;
1156         sk->sk_state    = TCP_ESTABLISHED;
1157         sock_hold(newsk);
1158
1159         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1160         unix_peer(sk)   = newsk;
1161
1162         unix_state_unlock(sk);
1163
1164         /* take ten and and send info to listening sock */
1165         spin_lock(&other->sk_receive_queue.lock);
1166         __skb_queue_tail(&other->sk_receive_queue, skb);
1167         spin_unlock(&other->sk_receive_queue.lock);
1168         unix_state_unlock(other);
1169         other->sk_data_ready(other, 0);
1170         sock_put(other);
1171         return 0;
1172
1173 out_unlock:
1174         if (other)
1175                 unix_state_unlock(other);
1176
1177 out:
1178         if (skb)
1179                 kfree_skb(skb);
1180         if (newsk)
1181                 unix_release_sock(newsk, 0);
1182         if (other)
1183                 sock_put(other);
1184         return err;
1185 }
1186
1187 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1188 {
1189         struct sock *ska = socka->sk, *skb = sockb->sk;
1190
1191         /* Join our sockets back to back */
1192         sock_hold(ska);
1193         sock_hold(skb);
1194         unix_peer(ska) = skb;
1195         unix_peer(skb) = ska;
1196         ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1197         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1198         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1199
1200         if (ska->sk_type != SOCK_DGRAM) {
1201                 ska->sk_state = TCP_ESTABLISHED;
1202                 skb->sk_state = TCP_ESTABLISHED;
1203                 socka->state  = SS_CONNECTED;
1204                 sockb->state  = SS_CONNECTED;
1205         }
1206         return 0;
1207 }
1208
1209 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1210 {
1211         struct sock *sk = sock->sk;
1212         struct sock *tsk;
1213         struct sk_buff *skb;
1214         int err;
1215
1216         err = -EOPNOTSUPP;
1217         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1218                 goto out;
1219
1220         err = -EINVAL;
1221         if (sk->sk_state != TCP_LISTEN)
1222                 goto out;
1223
1224         /* If socket state is TCP_LISTEN it cannot change (for now...),
1225          * so that no locks are necessary.
1226          */
1227
1228         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1229         if (!skb) {
1230                 /* This means receive shutdown. */
1231                 if (err == 0)
1232                         err = -EINVAL;
1233                 goto out;
1234         }
1235
1236         tsk = skb->sk;
1237         skb_free_datagram(sk, skb);
1238         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1239
1240         /* attach accepted sock to socket */
1241         unix_state_lock(tsk);
1242         newsock->state = SS_CONNECTED;
1243         sock_graft(tsk, newsock);
1244         unix_state_unlock(tsk);
1245         return 0;
1246
1247 out:
1248         return err;
1249 }
1250
1251
1252 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1253 {
1254         struct sock *sk = sock->sk;
1255         struct unix_sock *u;
1256         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1257         int err = 0;
1258
1259         if (peer) {
1260                 sk = unix_peer_get(sk);
1261
1262                 err = -ENOTCONN;
1263                 if (!sk)
1264                         goto out;
1265                 err = 0;
1266         } else {
1267                 sock_hold(sk);
1268         }
1269
1270         u = unix_sk(sk);
1271         unix_state_lock(sk);
1272         if (!u->addr) {
1273                 sunaddr->sun_family = AF_UNIX;
1274                 sunaddr->sun_path[0] = 0;
1275                 *uaddr_len = sizeof(short);
1276         } else {
1277                 struct unix_address *addr = u->addr;
1278
1279                 *uaddr_len = addr->len;
1280                 memcpy(sunaddr, addr->name, *uaddr_len);
1281         }
1282         unix_state_unlock(sk);
1283         sock_put(sk);
1284 out:
1285         return err;
1286 }
1287
1288 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1289 {
1290         int i;
1291
1292         scm->fp = UNIXCB(skb).fp;
1293         skb->destructor = sock_wfree;
1294         UNIXCB(skb).fp = NULL;
1295
1296         for (i = scm->fp->count-1; i >= 0; i--)
1297                 unix_notinflight(scm->fp->fp[i]);
1298 }
1299
1300 static void unix_destruct_fds(struct sk_buff *skb)
1301 {
1302         struct scm_cookie scm;
1303         memset(&scm, 0, sizeof(scm));
1304         unix_detach_fds(&scm, skb);
1305
1306         /* Alas, it calls VFS */
1307         /* So fscking what? fput() had been SMP-safe since the last Summer */
1308         scm_destroy(&scm);
1309         sock_wfree(skb);
1310 }
1311
1312 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1313 {
1314         int i;
1315
1316         /*
1317          * Need to duplicate file references for the sake of garbage
1318          * collection.  Otherwise a socket in the fps might become a
1319          * candidate for GC while the skb is not yet queued.
1320          */
1321         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1322         if (!UNIXCB(skb).fp)
1323                 return -ENOMEM;
1324
1325         for (i = scm->fp->count-1; i >= 0; i--)
1326                 unix_inflight(scm->fp->fp[i]);
1327         skb->destructor = unix_destruct_fds;
1328         return 0;
1329 }
1330
1331 /*
1332  *      Send AF_UNIX data.
1333  */
1334
1335 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1336                               struct msghdr *msg, size_t len)
1337 {
1338         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1339         struct sock *sk = sock->sk;
1340         struct net *net = sock_net(sk);
1341         struct unix_sock *u = unix_sk(sk);
1342         struct sockaddr_un *sunaddr = msg->msg_name;
1343         struct sock *other = NULL;
1344         int namelen = 0; /* fake GCC */
1345         int err;
1346         unsigned hash;
1347         struct sk_buff *skb;
1348         long timeo;
1349         struct scm_cookie tmp_scm;
1350
1351         if (NULL == siocb->scm)
1352                 siocb->scm = &tmp_scm;
1353         err = scm_send(sock, msg, siocb->scm);
1354         if (err < 0)
1355                 return err;
1356
1357         err = -EOPNOTSUPP;
1358         if (msg->msg_flags&MSG_OOB)
1359                 goto out;
1360
1361         if (msg->msg_namelen) {
1362                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1363                 if (err < 0)
1364                         goto out;
1365                 namelen = err;
1366         } else {
1367                 sunaddr = NULL;
1368                 err = -ENOTCONN;
1369                 other = unix_peer_get(sk);
1370                 if (!other)
1371                         goto out;
1372         }
1373
1374         if (test_bit(SOCK_PASSCRED, &sock->flags)
1375                 && !u->addr && (err = unix_autobind(sock)) != 0)
1376                 goto out;
1377
1378         err = -EMSGSIZE;
1379         if (len > sk->sk_sndbuf - 32)
1380                 goto out;
1381
1382         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1383         if (skb == NULL)
1384                 goto out;
1385
1386         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1387         if (siocb->scm->fp) {
1388                 err = unix_attach_fds(siocb->scm, skb);
1389                 if (err)
1390                         goto out_free;
1391         }
1392         unix_get_secdata(siocb->scm, skb);
1393
1394         skb_reset_transport_header(skb);
1395         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1396         if (err)
1397                 goto out_free;
1398
1399         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1400
1401 restart:
1402         if (!other) {
1403                 err = -ECONNRESET;
1404                 if (sunaddr == NULL)
1405                         goto out_free;
1406
1407                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1408                                         hash, &err);
1409                 if (other == NULL)
1410                         goto out_free;
1411         }
1412
1413         unix_state_lock(other);
1414         err = -EPERM;
1415         if (!unix_may_send(sk, other))
1416                 goto out_unlock;
1417
1418         if (sock_flag(other, SOCK_DEAD)) {
1419                 /*
1420                  *      Check with 1003.1g - what should
1421                  *      datagram error
1422                  */
1423                 unix_state_unlock(other);
1424                 sock_put(other);
1425
1426                 err = 0;
1427                 unix_state_lock(sk);
1428                 if (unix_peer(sk) == other) {
1429                         unix_peer(sk) = NULL;
1430                         unix_state_unlock(sk);
1431
1432                         unix_dgram_disconnected(sk, other);
1433                         sock_put(other);
1434                         err = -ECONNREFUSED;
1435                 } else {
1436                         unix_state_unlock(sk);
1437                 }
1438
1439                 other = NULL;
1440                 if (err)
1441                         goto out_free;
1442                 goto restart;
1443         }
1444
1445         err = -EPIPE;
1446         if (other->sk_shutdown & RCV_SHUTDOWN)
1447                 goto out_unlock;
1448
1449         if (sk->sk_type != SOCK_SEQPACKET) {
1450                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1451                 if (err)
1452                         goto out_unlock;
1453         }
1454
1455         if (unix_peer(other) != sk && unix_recvq_full(other)) {
1456                 if (!timeo) {
1457                         err = -EAGAIN;
1458                         goto out_unlock;
1459                 }
1460
1461                 timeo = unix_wait_for_peer(other, timeo);
1462
1463                 err = sock_intr_errno(timeo);
1464                 if (signal_pending(current))
1465                         goto out_free;
1466
1467                 goto restart;
1468         }
1469
1470         skb_queue_tail(&other->sk_receive_queue, skb);
1471         unix_state_unlock(other);
1472         other->sk_data_ready(other, len);
1473         sock_put(other);
1474         scm_destroy(siocb->scm);
1475         return len;
1476
1477 out_unlock:
1478         unix_state_unlock(other);
1479 out_free:
1480         kfree_skb(skb);
1481 out:
1482         if (other)
1483                 sock_put(other);
1484         scm_destroy(siocb->scm);
1485         return err;
1486 }
1487
1488
1489 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1490                                struct msghdr *msg, size_t len)
1491 {
1492         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1493         struct sock *sk = sock->sk;
1494         struct sock *other = NULL;
1495         struct sockaddr_un *sunaddr = msg->msg_name;
1496         int err, size;
1497         struct sk_buff *skb;
1498         int sent = 0;
1499         struct scm_cookie tmp_scm;
1500
1501         if (NULL == siocb->scm)
1502                 siocb->scm = &tmp_scm;
1503         err = scm_send(sock, msg, siocb->scm);
1504         if (err < 0)
1505                 return err;
1506
1507         err = -EOPNOTSUPP;
1508         if (msg->msg_flags&MSG_OOB)
1509                 goto out_err;
1510
1511         if (msg->msg_namelen) {
1512                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1513                 goto out_err;
1514         } else {
1515                 sunaddr = NULL;
1516                 err = -ENOTCONN;
1517                 other = unix_peer(sk);
1518                 if (!other)
1519                         goto out_err;
1520         }
1521
1522         if (sk->sk_shutdown & SEND_SHUTDOWN)
1523                 goto pipe_err;
1524
1525         while (sent < len) {
1526                 /*
1527                  *      Optimisation for the fact that under 0.01% of X
1528                  *      messages typically need breaking up.
1529                  */
1530
1531                 size = len-sent;
1532
1533                 /* Keep two messages in the pipe so it schedules better */
1534                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1535                         size = (sk->sk_sndbuf >> 1) - 64;
1536
1537                 if (size > SKB_MAX_ALLOC)
1538                         size = SKB_MAX_ALLOC;
1539
1540                 /*
1541                  *      Grab a buffer
1542                  */
1543
1544                 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1545                                           &err);
1546
1547                 if (skb == NULL)
1548                         goto out_err;
1549
1550                 /*
1551                  *      If you pass two values to the sock_alloc_send_skb
1552                  *      it tries to grab the large buffer with GFP_NOFS
1553                  *      (which can fail easily), and if it fails grab the
1554                  *      fallback size buffer which is under a page and will
1555                  *      succeed. [Alan]
1556                  */
1557                 size = min_t(int, size, skb_tailroom(skb));
1558
1559                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1560                 if (siocb->scm->fp) {
1561                         err = unix_attach_fds(siocb->scm, skb);
1562                         if (err) {
1563                                 kfree_skb(skb);
1564                                 goto out_err;
1565                         }
1566                 }
1567
1568                 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1569                 if (err) {
1570                         kfree_skb(skb);
1571                         goto out_err;
1572                 }
1573
1574                 unix_state_lock(other);
1575
1576                 if (sock_flag(other, SOCK_DEAD) ||
1577                     (other->sk_shutdown & RCV_SHUTDOWN))
1578                         goto pipe_err_free;
1579
1580                 skb_queue_tail(&other->sk_receive_queue, skb);
1581                 unix_state_unlock(other);
1582                 other->sk_data_ready(other, size);
1583                 sent += size;
1584         }
1585
1586         scm_destroy(siocb->scm);
1587         siocb->scm = NULL;
1588
1589         return sent;
1590
1591 pipe_err_free:
1592         unix_state_unlock(other);
1593         kfree_skb(skb);
1594 pipe_err:
1595         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1596                 send_sig(SIGPIPE, current, 0);
1597         err = -EPIPE;
1598 out_err:
1599         scm_destroy(siocb->scm);
1600         siocb->scm = NULL;
1601         return sent ? : err;
1602 }
1603
1604 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1605                                   struct msghdr *msg, size_t len)
1606 {
1607         int err;
1608         struct sock *sk = sock->sk;
1609
1610         err = sock_error(sk);
1611         if (err)
1612                 return err;
1613
1614         if (sk->sk_state != TCP_ESTABLISHED)
1615                 return -ENOTCONN;
1616
1617         if (msg->msg_namelen)
1618                 msg->msg_namelen = 0;
1619
1620         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1621 }
1622
1623 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1624 {
1625         struct unix_sock *u = unix_sk(sk);
1626
1627         msg->msg_namelen = 0;
1628         if (u->addr) {
1629                 msg->msg_namelen = u->addr->len;
1630                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1631         }
1632 }
1633
1634 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1635                               struct msghdr *msg, size_t size,
1636                               int flags)
1637 {
1638         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1639         struct scm_cookie tmp_scm;
1640         struct sock *sk = sock->sk;
1641         struct unix_sock *u = unix_sk(sk);
1642         int noblock = flags & MSG_DONTWAIT;
1643         struct sk_buff *skb;
1644         int err;
1645
1646         err = -EOPNOTSUPP;
1647         if (flags&MSG_OOB)
1648                 goto out;
1649
1650         msg->msg_namelen = 0;
1651
1652         mutex_lock(&u->readlock);
1653
1654         skb = skb_recv_datagram(sk, flags, noblock, &err);
1655         if (!skb) {
1656                 unix_state_lock(sk);
1657                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1658                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1659                     (sk->sk_shutdown & RCV_SHUTDOWN))
1660                         err = 0;
1661                 unix_state_unlock(sk);
1662                 goto out_unlock;
1663         }
1664
1665         wake_up_interruptible_sync(&u->peer_wait);
1666
1667         if (msg->msg_name)
1668                 unix_copy_addr(msg, skb->sk);
1669
1670         if (size > skb->len)
1671                 size = skb->len;
1672         else if (size < skb->len)
1673                 msg->msg_flags |= MSG_TRUNC;
1674
1675         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1676         if (err)
1677                 goto out_free;
1678
1679         if (!siocb->scm) {
1680                 siocb->scm = &tmp_scm;
1681                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1682         }
1683         siocb->scm->creds = *UNIXCREDS(skb);
1684         unix_set_secdata(siocb->scm, skb);
1685
1686         if (!(flags & MSG_PEEK)) {
1687                 if (UNIXCB(skb).fp)
1688                         unix_detach_fds(siocb->scm, skb);
1689         } else {
1690                 /* It is questionable: on PEEK we could:
1691                    - do not return fds - good, but too simple 8)
1692                    - return fds, and do not return them on read (old strategy,
1693                      apparently wrong)
1694                    - clone fds (I chose it for now, it is the most universal
1695                      solution)
1696
1697                    POSIX 1003.1g does not actually define this clearly
1698                    at all. POSIX 1003.1g doesn't define a lot of things
1699                    clearly however!
1700
1701                 */
1702                 if (UNIXCB(skb).fp)
1703                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1704         }
1705         err = size;
1706
1707         scm_recv(sock, msg, siocb->scm, flags);
1708
1709 out_free:
1710         skb_free_datagram(sk, skb);
1711 out_unlock:
1712         mutex_unlock(&u->readlock);
1713 out:
1714         return err;
1715 }
1716
1717 /*
1718  *      Sleep until data has arrive. But check for races..
1719  */
1720
1721 static long unix_stream_data_wait(struct sock *sk, long timeo)
1722 {
1723         DEFINE_WAIT(wait);
1724
1725         unix_state_lock(sk);
1726
1727         for (;;) {
1728                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1729
1730                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1731                     sk->sk_err ||
1732                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1733                     signal_pending(current) ||
1734                     !timeo)
1735                         break;
1736
1737                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1738                 unix_state_unlock(sk);
1739                 timeo = schedule_timeout(timeo);
1740                 unix_state_lock(sk);
1741                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1742         }
1743
1744         finish_wait(sk->sk_sleep, &wait);
1745         unix_state_unlock(sk);
1746         return timeo;
1747 }
1748
1749
1750
1751 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1752                                struct msghdr *msg, size_t size,
1753                                int flags)
1754 {
1755         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1756         struct scm_cookie tmp_scm;
1757         struct sock *sk = sock->sk;
1758         struct unix_sock *u = unix_sk(sk);
1759         struct sockaddr_un *sunaddr = msg->msg_name;
1760         int copied = 0;
1761         int check_creds = 0;
1762         int target;
1763         int err = 0;
1764         long timeo;
1765
1766         err = -EINVAL;
1767         if (sk->sk_state != TCP_ESTABLISHED)
1768                 goto out;
1769
1770         err = -EOPNOTSUPP;
1771         if (flags&MSG_OOB)
1772                 goto out;
1773
1774         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1775         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1776
1777         msg->msg_namelen = 0;
1778
1779         /* Lock the socket to prevent queue disordering
1780          * while sleeps in memcpy_tomsg
1781          */
1782
1783         if (!siocb->scm) {
1784                 siocb->scm = &tmp_scm;
1785                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1786         }
1787
1788         mutex_lock(&u->readlock);
1789
1790         do {
1791                 int chunk;
1792                 struct sk_buff *skb;
1793
1794                 unix_state_lock(sk);
1795                 skb = skb_dequeue(&sk->sk_receive_queue);
1796                 if (skb == NULL) {
1797                         if (copied >= target)
1798                                 goto unlock;
1799
1800                         /*
1801                          *      POSIX 1003.1g mandates this order.
1802                          */
1803
1804                         err = sock_error(sk);
1805                         if (err)
1806                                 goto unlock;
1807                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1808                                 goto unlock;
1809
1810                         unix_state_unlock(sk);
1811                         err = -EAGAIN;
1812                         if (!timeo)
1813                                 break;
1814                         mutex_unlock(&u->readlock);
1815
1816                         timeo = unix_stream_data_wait(sk, timeo);
1817
1818                         if (signal_pending(current)) {
1819                                 err = sock_intr_errno(timeo);
1820                                 goto out;
1821                         }
1822                         mutex_lock(&u->readlock);
1823                         continue;
1824  unlock:
1825                         unix_state_unlock(sk);
1826                         break;
1827                 }
1828                 unix_state_unlock(sk);
1829
1830                 if (check_creds) {
1831                         /* Never glue messages from different writers */
1832                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1833                                    sizeof(siocb->scm->creds)) != 0) {
1834                                 skb_queue_head(&sk->sk_receive_queue, skb);
1835                                 break;
1836                         }
1837                 } else {
1838                         /* Copy credentials */
1839                         siocb->scm->creds = *UNIXCREDS(skb);
1840                         check_creds = 1;
1841                 }
1842
1843                 /* Copy address just once */
1844                 if (sunaddr) {
1845                         unix_copy_addr(msg, skb->sk);
1846                         sunaddr = NULL;
1847                 }
1848
1849                 chunk = min_t(unsigned int, skb->len, size);
1850                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1851                         skb_queue_head(&sk->sk_receive_queue, skb);
1852                         if (copied == 0)
1853                                 copied = -EFAULT;
1854                         break;
1855                 }
1856                 copied += chunk;
1857                 size -= chunk;
1858
1859                 /* Mark read part of skb as used */
1860                 if (!(flags & MSG_PEEK)) {
1861                         skb_pull(skb, chunk);
1862
1863                         if (UNIXCB(skb).fp)
1864                                 unix_detach_fds(siocb->scm, skb);
1865
1866                         /* put the skb back if we didn't use it up.. */
1867                         if (skb->len) {
1868                                 skb_queue_head(&sk->sk_receive_queue, skb);
1869                                 break;
1870                         }
1871
1872                         kfree_skb(skb);
1873
1874                         if (siocb->scm->fp)
1875                                 break;
1876                 } else {
1877                         /* It is questionable, see note in unix_dgram_recvmsg.
1878                          */
1879                         if (UNIXCB(skb).fp)
1880                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1881
1882                         /* put message back and return */
1883                         skb_queue_head(&sk->sk_receive_queue, skb);
1884                         break;
1885                 }
1886         } while (size);
1887
1888         mutex_unlock(&u->readlock);
1889         scm_recv(sock, msg, siocb->scm, flags);
1890 out:
1891         return copied ? : err;
1892 }
1893
1894 static int unix_shutdown(struct socket *sock, int mode)
1895 {
1896         struct sock *sk = sock->sk;
1897         struct sock *other;
1898
1899         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1900
1901         if (mode) {
1902                 unix_state_lock(sk);
1903                 sk->sk_shutdown |= mode;
1904                 other = unix_peer(sk);
1905                 if (other)
1906                         sock_hold(other);
1907                 unix_state_unlock(sk);
1908                 sk->sk_state_change(sk);
1909
1910                 if (other &&
1911                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1912
1913                         int peer_mode = 0;
1914
1915                         if (mode&RCV_SHUTDOWN)
1916                                 peer_mode |= SEND_SHUTDOWN;
1917                         if (mode&SEND_SHUTDOWN)
1918                                 peer_mode |= RCV_SHUTDOWN;
1919                         unix_state_lock(other);
1920                         other->sk_shutdown |= peer_mode;
1921                         unix_state_unlock(other);
1922                         other->sk_state_change(other);
1923                         read_lock(&other->sk_callback_lock);
1924                         if (peer_mode == SHUTDOWN_MASK)
1925                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1926                         else if (peer_mode & RCV_SHUTDOWN)
1927                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1928                         read_unlock(&other->sk_callback_lock);
1929                 }
1930                 if (other)
1931                         sock_put(other);
1932         }
1933         return 0;
1934 }
1935
1936 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1937 {
1938         struct sock *sk = sock->sk;
1939         long amount = 0;
1940         int err;
1941
1942         switch (cmd) {
1943         case SIOCOUTQ:
1944                 amount = atomic_read(&sk->sk_wmem_alloc);
1945                 err = put_user(amount, (int __user *)arg);
1946                 break;
1947         case SIOCINQ:
1948                 {
1949                         struct sk_buff *skb;
1950
1951                         if (sk->sk_state == TCP_LISTEN) {
1952                                 err = -EINVAL;
1953                                 break;
1954                         }
1955
1956                         spin_lock(&sk->sk_receive_queue.lock);
1957                         if (sk->sk_type == SOCK_STREAM ||
1958                             sk->sk_type == SOCK_SEQPACKET) {
1959                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1960                                         amount += skb->len;
1961                         } else {
1962                                 skb = skb_peek(&sk->sk_receive_queue);
1963                                 if (skb)
1964                                         amount = skb->len;
1965                         }
1966                         spin_unlock(&sk->sk_receive_queue.lock);
1967                         err = put_user(amount, (int __user *)arg);
1968                         break;
1969                 }
1970
1971         default:
1972                 err = -ENOIOCTLCMD;
1973                 break;
1974         }
1975         return err;
1976 }
1977
1978 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1979 {
1980         struct sock *sk = sock->sk;
1981         unsigned int mask;
1982
1983         poll_wait(file, sk->sk_sleep, wait);
1984         mask = 0;
1985
1986         /* exceptional events? */
1987         if (sk->sk_err)
1988                 mask |= POLLERR;
1989         if (sk->sk_shutdown == SHUTDOWN_MASK)
1990                 mask |= POLLHUP;
1991         if (sk->sk_shutdown & RCV_SHUTDOWN)
1992                 mask |= POLLRDHUP;
1993
1994         /* readable? */
1995         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1996             (sk->sk_shutdown & RCV_SHUTDOWN))
1997                 mask |= POLLIN | POLLRDNORM;
1998
1999         /* Connection-based need to check for termination and startup */
2000         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2001             sk->sk_state == TCP_CLOSE)
2002                 mask |= POLLHUP;
2003
2004         /*
2005          * we set writable also when the other side has shut down the
2006          * connection. This prevents stuck sockets.
2007          */
2008         if (unix_writable(sk))
2009                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2010
2011         return mask;
2012 }
2013
2014 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2015                                     poll_table *wait)
2016 {
2017         struct sock *sk = sock->sk, *other;
2018         unsigned int mask, writable;
2019
2020         poll_wait(file, sk->sk_sleep, wait);
2021         mask = 0;
2022
2023         /* exceptional events? */
2024         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2025                 mask |= POLLERR;
2026         if (sk->sk_shutdown & RCV_SHUTDOWN)
2027                 mask |= POLLRDHUP;
2028         if (sk->sk_shutdown == SHUTDOWN_MASK)
2029                 mask |= POLLHUP;
2030
2031         /* readable? */
2032         if (!skb_queue_empty(&sk->sk_receive_queue) ||
2033             (sk->sk_shutdown & RCV_SHUTDOWN))
2034                 mask |= POLLIN | POLLRDNORM;
2035
2036         /* Connection-based need to check for termination and startup */
2037         if (sk->sk_type == SOCK_SEQPACKET) {
2038                 if (sk->sk_state == TCP_CLOSE)
2039                         mask |= POLLHUP;
2040                 /* connection hasn't started yet? */
2041                 if (sk->sk_state == TCP_SYN_SENT)
2042                         return mask;
2043         }
2044
2045         /* writable? */
2046         writable = unix_writable(sk);
2047         if (writable) {
2048                 other = unix_peer_get(sk);
2049                 if (other) {
2050                         if (unix_peer(other) != sk) {
2051                                 poll_wait(file, &unix_sk(other)->peer_wait,
2052                                           wait);
2053                                 if (unix_recvq_full(other))
2054                                         writable = 0;
2055                         }
2056
2057                         sock_put(other);
2058                 }
2059         }
2060
2061         if (writable)
2062                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2063         else
2064                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2065
2066         return mask;
2067 }
2068
2069 #ifdef CONFIG_PROC_FS
2070 static struct sock *first_unix_socket(int *i)
2071 {
2072         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2073                 if (!hlist_empty(&unix_socket_table[*i]))
2074                         return __sk_head(&unix_socket_table[*i]);
2075         }
2076         return NULL;
2077 }
2078
2079 static struct sock *next_unix_socket(int *i, struct sock *s)
2080 {
2081         struct sock *next = sk_next(s);
2082         /* More in this chain? */
2083         if (next)
2084                 return next;
2085         /* Look for next non-empty chain. */
2086         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2087                 if (!hlist_empty(&unix_socket_table[*i]))
2088                         return __sk_head(&unix_socket_table[*i]);
2089         }
2090         return NULL;
2091 }
2092
2093 struct unix_iter_state {
2094         struct seq_net_private p;
2095         int i;
2096 };
2097
2098 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2099 {
2100         struct unix_iter_state *iter = seq->private;
2101         loff_t off = 0;
2102         struct sock *s;
2103
2104         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2105                 if (sock_net(s) != seq_file_net(seq))
2106                         continue;
2107                 if (off == pos)
2108                         return s;
2109                 ++off;
2110         }
2111         return NULL;
2112 }
2113
2114 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2115         __acquires(unix_table_lock)
2116 {
2117         spin_lock(&unix_table_lock);
2118         return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2119 }
2120
2121 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2122 {
2123         struct unix_iter_state *iter = seq->private;
2124         struct sock *sk = v;
2125         ++*pos;
2126
2127         if (v == SEQ_START_TOKEN)
2128                 sk = first_unix_socket(&iter->i);
2129         else
2130                 sk = next_unix_socket(&iter->i, sk);
2131         while (sk && (sock_net(sk) != seq_file_net(seq)))
2132                 sk = next_unix_socket(&iter->i, sk);
2133         return sk;
2134 }
2135
2136 static void unix_seq_stop(struct seq_file *seq, void *v)
2137         __releases(unix_table_lock)
2138 {
2139         spin_unlock(&unix_table_lock);
2140 }
2141
2142 static int unix_seq_show(struct seq_file *seq, void *v)
2143 {
2144
2145         if (v == SEQ_START_TOKEN)
2146                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2147                          "Inode Path\n");
2148         else {
2149                 struct sock *s = v;
2150                 struct unix_sock *u = unix_sk(s);
2151                 unix_state_lock(s);
2152
2153                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2154                         s,
2155                         atomic_read(&s->sk_refcnt),
2156                         0,
2157                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2158                         s->sk_type,
2159                         s->sk_socket ?
2160                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2161                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2162                         sock_i_ino(s));
2163
2164                 if (u->addr) {
2165                         int i, len;
2166                         seq_putc(seq, ' ');
2167
2168                         i = 0;
2169                         len = u->addr->len - sizeof(short);
2170                         if (!UNIX_ABSTRACT(s))
2171                                 len--;
2172                         else {
2173                                 seq_putc(seq, '@');
2174                                 i++;
2175                         }
2176                         for ( ; i < len; i++)
2177                                 seq_putc(seq, u->addr->name->sun_path[i]);
2178                 }
2179                 unix_state_unlock(s);
2180                 seq_putc(seq, '\n');
2181         }
2182
2183         return 0;
2184 }
2185
2186 static const struct seq_operations unix_seq_ops = {
2187         .start  = unix_seq_start,
2188         .next   = unix_seq_next,
2189         .stop   = unix_seq_stop,
2190         .show   = unix_seq_show,
2191 };
2192
2193 static int unix_seq_open(struct inode *inode, struct file *file)
2194 {
2195         return seq_open_net(inode, file, &unix_seq_ops,
2196                             sizeof(struct unix_iter_state));
2197 }
2198
2199 static const struct file_operations unix_seq_fops = {
2200         .owner          = THIS_MODULE,
2201         .open           = unix_seq_open,
2202         .read           = seq_read,
2203         .llseek         = seq_lseek,
2204         .release        = seq_release_net,
2205 };
2206
2207 #endif
2208
2209 static struct net_proto_family unix_family_ops = {
2210         .family = PF_UNIX,
2211         .create = unix_create,
2212         .owner  = THIS_MODULE,
2213 };
2214
2215
2216 static int unix_net_init(struct net *net)
2217 {
2218         int error = -ENOMEM;
2219
2220         net->unx.sysctl_max_dgram_qlen = 10;
2221         if (unix_sysctl_register(net))
2222                 goto out;
2223
2224 #ifdef CONFIG_PROC_FS
2225         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2226                 unix_sysctl_unregister(net);
2227                 goto out;
2228         }
2229 #endif
2230         error = 0;
2231 out:
2232         return error;
2233 }
2234
2235 static void unix_net_exit(struct net *net)
2236 {
2237         unix_sysctl_unregister(net);
2238         proc_net_remove(net, "unix");
2239 }
2240
2241 static struct pernet_operations unix_net_ops = {
2242         .init = unix_net_init,
2243         .exit = unix_net_exit,
2244 };
2245
2246 static int __init af_unix_init(void)
2247 {
2248         int rc = -1;
2249         struct sk_buff *dummy_skb;
2250
2251         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2252
2253         rc = proto_register(&unix_proto, 1);
2254         if (rc != 0) {
2255                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2256                        __func__);
2257                 goto out;
2258         }
2259
2260         sock_register(&unix_family_ops);
2261         register_pernet_subsys(&unix_net_ops);
2262 out:
2263         return rc;
2264 }
2265
2266 static void __exit af_unix_exit(void)
2267 {
2268         sock_unregister(PF_UNIX);
2269         proto_unregister(&unix_proto);
2270         unregister_pernet_subsys(&unix_net_ops);
2271 }
2272
2273 /* Earlier than device_initcall() so that other drivers invoking
2274    request_module() don't end up in a loop when modprobe tries
2275    to use a UNIX socket. But later than subsys_initcall() because
2276    we depend on stuff initialised there */
2277 fs_initcall(af_unix_init);
2278 module_exit(af_unix_exit);
2279
2280 MODULE_LICENSE("GPL");
2281 MODULE_ALIAS_NETPROTO(PF_UNIX);