Merge branch 'intelfb-patches' of master.kernel.org:/pub/scm/linux/kernel/git/airlied...
[linux-2.6] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/sock.h>
107 #include <net/tcp_states.h>
108 #include <net/af_unix.h>
109 #include <linux/proc_fs.h>
110 #include <linux/seq_file.h>
111 #include <net/scm.h>
112 #include <linux/init.h>
113 #include <linux/poll.h>
114 #include <linux/smp_lock.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119
120 int sysctl_unix_max_dgram_qlen __read_mostly = 10;
121
122 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
123 DEFINE_SPINLOCK(unix_table_lock);
124 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
125
126 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
127
128 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
129
130 #ifdef CONFIG_SECURITY_NETWORK
131 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
132 {
133         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
134 }
135
136 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
137 {
138         scm->secid = *UNIXSID(skb);
139 }
140 #else
141 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 { }
143
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 { }
146 #endif /* CONFIG_SECURITY_NETWORK */
147
148 /*
149  *  SMP locking strategy:
150  *    hash table is protected with spinlock unix_table_lock
151  *    each socket state is protected by separate rwlock.
152  */
153
154 static inline unsigned unix_hash_fold(unsigned hash)
155 {
156         hash ^= hash>>16;
157         hash ^= hash>>8;
158         return hash&(UNIX_HASH_SIZE-1);
159 }
160
161 #define unix_peer(sk) (unix_sk(sk)->peer)
162
163 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
164 {
165         return unix_peer(osk) == sk;
166 }
167
168 static inline int unix_may_send(struct sock *sk, struct sock *osk)
169 {
170         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
171 }
172
173 static struct sock *unix_peer_get(struct sock *s)
174 {
175         struct sock *peer;
176
177         unix_state_rlock(s);
178         peer = unix_peer(s);
179         if (peer)
180                 sock_hold(peer);
181         unix_state_runlock(s);
182         return peer;
183 }
184
185 static inline void unix_release_addr(struct unix_address *addr)
186 {
187         if (atomic_dec_and_test(&addr->refcnt))
188                 kfree(addr);
189 }
190
191 /*
192  *      Check unix socket name:
193  *              - should be not zero length.
194  *              - if started by not zero, should be NULL terminated (FS object)
195  *              - if started by zero, it is abstract name.
196  */
197  
198 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
199 {
200         if (len <= sizeof(short) || len > sizeof(*sunaddr))
201                 return -EINVAL;
202         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
203                 return -EINVAL;
204         if (sunaddr->sun_path[0]) {
205                 /*
206                  * This may look like an off by one error but it is a bit more
207                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
208                  * sun_path[108] doesnt as such exist.  However in kernel space
209                  * we are guaranteed that it is a valid memory location in our
210                  * kernel address buffer.
211                  */
212                 ((char *)sunaddr)[len]=0;
213                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
214                 return len;
215         }
216
217         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
218         return len;
219 }
220
221 static void __unix_remove_socket(struct sock *sk)
222 {
223         sk_del_node_init(sk);
224 }
225
226 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
227 {
228         BUG_TRAP(sk_unhashed(sk));
229         sk_add_node(sk, list);
230 }
231
232 static inline void unix_remove_socket(struct sock *sk)
233 {
234         spin_lock(&unix_table_lock);
235         __unix_remove_socket(sk);
236         spin_unlock(&unix_table_lock);
237 }
238
239 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
240 {
241         spin_lock(&unix_table_lock);
242         __unix_insert_socket(list, sk);
243         spin_unlock(&unix_table_lock);
244 }
245
246 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
247                                               int len, int type, unsigned hash)
248 {
249         struct sock *s;
250         struct hlist_node *node;
251
252         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
253                 struct unix_sock *u = unix_sk(s);
254
255                 if (u->addr->len == len &&
256                     !memcmp(u->addr->name, sunname, len))
257                         goto found;
258         }
259         s = NULL;
260 found:
261         return s;
262 }
263
264 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
265                                                    int len, int type,
266                                                    unsigned hash)
267 {
268         struct sock *s;
269
270         spin_lock(&unix_table_lock);
271         s = __unix_find_socket_byname(sunname, len, type, hash);
272         if (s)
273                 sock_hold(s);
274         spin_unlock(&unix_table_lock);
275         return s;
276 }
277
278 static struct sock *unix_find_socket_byinode(struct inode *i)
279 {
280         struct sock *s;
281         struct hlist_node *node;
282
283         spin_lock(&unix_table_lock);
284         sk_for_each(s, node,
285                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
286                 struct dentry *dentry = unix_sk(s)->dentry;
287
288                 if(dentry && dentry->d_inode == i)
289                 {
290                         sock_hold(s);
291                         goto found;
292                 }
293         }
294         s = NULL;
295 found:
296         spin_unlock(&unix_table_lock);
297         return s;
298 }
299
300 static inline int unix_writable(struct sock *sk)
301 {
302         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
303 }
304
305 static void unix_write_space(struct sock *sk)
306 {
307         read_lock(&sk->sk_callback_lock);
308         if (unix_writable(sk)) {
309                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
310                         wake_up_interruptible(sk->sk_sleep);
311                 sk_wake_async(sk, 2, POLL_OUT);
312         }
313         read_unlock(&sk->sk_callback_lock);
314 }
315
316 /* When dgram socket disconnects (or changes its peer), we clear its receive
317  * queue of packets arrived from previous peer. First, it allows to do
318  * flow control based only on wmem_alloc; second, sk connected to peer
319  * may receive messages only from that peer. */
320 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
321 {
322         if (!skb_queue_empty(&sk->sk_receive_queue)) {
323                 skb_queue_purge(&sk->sk_receive_queue);
324                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
325
326                 /* If one link of bidirectional dgram pipe is disconnected,
327                  * we signal error. Messages are lost. Do not make this,
328                  * when peer was not connected to us.
329                  */
330                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
331                         other->sk_err = ECONNRESET;
332                         other->sk_error_report(other);
333                 }
334         }
335 }
336
337 static void unix_sock_destructor(struct sock *sk)
338 {
339         struct unix_sock *u = unix_sk(sk);
340
341         skb_queue_purge(&sk->sk_receive_queue);
342
343         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
344         BUG_TRAP(sk_unhashed(sk));
345         BUG_TRAP(!sk->sk_socket);
346         if (!sock_flag(sk, SOCK_DEAD)) {
347                 printk("Attempt to release alive unix socket: %p\n", sk);
348                 return;
349         }
350
351         if (u->addr)
352                 unix_release_addr(u->addr);
353
354         atomic_dec(&unix_nr_socks);
355 #ifdef UNIX_REFCNT_DEBUG
356         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
357 #endif
358 }
359
360 static int unix_release_sock (struct sock *sk, int embrion)
361 {
362         struct unix_sock *u = unix_sk(sk);
363         struct dentry *dentry;
364         struct vfsmount *mnt;
365         struct sock *skpair;
366         struct sk_buff *skb;
367         int state;
368
369         unix_remove_socket(sk);
370
371         /* Clear state */
372         unix_state_wlock(sk);
373         sock_orphan(sk);
374         sk->sk_shutdown = SHUTDOWN_MASK;
375         dentry       = u->dentry;
376         u->dentry    = NULL;
377         mnt          = u->mnt;
378         u->mnt       = NULL;
379         state = sk->sk_state;
380         sk->sk_state = TCP_CLOSE;
381         unix_state_wunlock(sk);
382
383         wake_up_interruptible_all(&u->peer_wait);
384
385         skpair=unix_peer(sk);
386
387         if (skpair!=NULL) {
388                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
389                         unix_state_wlock(skpair);
390                         /* No more writes */
391                         skpair->sk_shutdown = SHUTDOWN_MASK;
392                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
393                                 skpair->sk_err = ECONNRESET;
394                         unix_state_wunlock(skpair);
395                         skpair->sk_state_change(skpair);
396                         read_lock(&skpair->sk_callback_lock);
397                         sk_wake_async(skpair,1,POLL_HUP);
398                         read_unlock(&skpair->sk_callback_lock);
399                 }
400                 sock_put(skpair); /* It may now die */
401                 unix_peer(sk) = NULL;
402         }
403
404         /* Try to flush out this socket. Throw out buffers at least */
405
406         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
407                 if (state==TCP_LISTEN)
408                         unix_release_sock(skb->sk, 1);
409                 /* passed fds are erased in the kfree_skb hook        */
410                 kfree_skb(skb);
411         }
412
413         if (dentry) {
414                 dput(dentry);
415                 mntput(mnt);
416         }
417
418         sock_put(sk);
419
420         /* ---- Socket is dead now and most probably destroyed ---- */
421
422         /*
423          * Fixme: BSD difference: In BSD all sockets connected to use get
424          *        ECONNRESET and we die on the spot. In Linux we behave
425          *        like files and pipes do and wait for the last
426          *        dereference.
427          *
428          * Can't we simply set sock->err?
429          *
430          *        What the above comment does talk about? --ANK(980817)
431          */
432
433         if (atomic_read(&unix_tot_inflight))
434                 unix_gc();              /* Garbage collect fds */       
435
436         return 0;
437 }
438
439 static int unix_listen(struct socket *sock, int backlog)
440 {
441         int err;
442         struct sock *sk = sock->sk;
443         struct unix_sock *u = unix_sk(sk);
444
445         err = -EOPNOTSUPP;
446         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
447                 goto out;                       /* Only stream/seqpacket sockets accept */
448         err = -EINVAL;
449         if (!u->addr)
450                 goto out;                       /* No listens on an unbound socket */
451         unix_state_wlock(sk);
452         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
453                 goto out_unlock;
454         if (backlog > sk->sk_max_ack_backlog)
455                 wake_up_interruptible_all(&u->peer_wait);
456         sk->sk_max_ack_backlog  = backlog;
457         sk->sk_state            = TCP_LISTEN;
458         /* set credentials so connect can copy them */
459         sk->sk_peercred.pid     = current->tgid;
460         sk->sk_peercred.uid     = current->euid;
461         sk->sk_peercred.gid     = current->egid;
462         err = 0;
463
464 out_unlock:
465         unix_state_wunlock(sk);
466 out:
467         return err;
468 }
469
470 static int unix_release(struct socket *);
471 static int unix_bind(struct socket *, struct sockaddr *, int);
472 static int unix_stream_connect(struct socket *, struct sockaddr *,
473                                int addr_len, int flags);
474 static int unix_socketpair(struct socket *, struct socket *);
475 static int unix_accept(struct socket *, struct socket *, int);
476 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
477 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
478 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
479 static int unix_shutdown(struct socket *, int);
480 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
481                                struct msghdr *, size_t);
482 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
483                                struct msghdr *, size_t, int);
484 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
485                               struct msghdr *, size_t);
486 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
487                               struct msghdr *, size_t, int);
488 static int unix_dgram_connect(struct socket *, struct sockaddr *,
489                               int, int);
490 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
491                                   struct msghdr *, size_t);
492
493 static const struct proto_ops unix_stream_ops = {
494         .family =       PF_UNIX,
495         .owner =        THIS_MODULE,
496         .release =      unix_release,
497         .bind =         unix_bind,
498         .connect =      unix_stream_connect,
499         .socketpair =   unix_socketpair,
500         .accept =       unix_accept,
501         .getname =      unix_getname,
502         .poll =         unix_poll,
503         .ioctl =        unix_ioctl,
504         .listen =       unix_listen,
505         .shutdown =     unix_shutdown,
506         .setsockopt =   sock_no_setsockopt,
507         .getsockopt =   sock_no_getsockopt,
508         .sendmsg =      unix_stream_sendmsg,
509         .recvmsg =      unix_stream_recvmsg,
510         .mmap =         sock_no_mmap,
511         .sendpage =     sock_no_sendpage,
512 };
513
514 static const struct proto_ops unix_dgram_ops = {
515         .family =       PF_UNIX,
516         .owner =        THIS_MODULE,
517         .release =      unix_release,
518         .bind =         unix_bind,
519         .connect =      unix_dgram_connect,
520         .socketpair =   unix_socketpair,
521         .accept =       sock_no_accept,
522         .getname =      unix_getname,
523         .poll =         datagram_poll,
524         .ioctl =        unix_ioctl,
525         .listen =       sock_no_listen,
526         .shutdown =     unix_shutdown,
527         .setsockopt =   sock_no_setsockopt,
528         .getsockopt =   sock_no_getsockopt,
529         .sendmsg =      unix_dgram_sendmsg,
530         .recvmsg =      unix_dgram_recvmsg,
531         .mmap =         sock_no_mmap,
532         .sendpage =     sock_no_sendpage,
533 };
534
535 static const struct proto_ops unix_seqpacket_ops = {
536         .family =       PF_UNIX,
537         .owner =        THIS_MODULE,
538         .release =      unix_release,
539         .bind =         unix_bind,
540         .connect =      unix_stream_connect,
541         .socketpair =   unix_socketpair,
542         .accept =       unix_accept,
543         .getname =      unix_getname,
544         .poll =         datagram_poll,
545         .ioctl =        unix_ioctl,
546         .listen =       unix_listen,
547         .shutdown =     unix_shutdown,
548         .setsockopt =   sock_no_setsockopt,
549         .getsockopt =   sock_no_getsockopt,
550         .sendmsg =      unix_seqpacket_sendmsg,
551         .recvmsg =      unix_dgram_recvmsg,
552         .mmap =         sock_no_mmap,
553         .sendpage =     sock_no_sendpage,
554 };
555
556 static struct proto unix_proto = {
557         .name     = "UNIX",
558         .owner    = THIS_MODULE,
559         .obj_size = sizeof(struct unix_sock),
560 };
561
562 /*
563  * AF_UNIX sockets do not interact with hardware, hence they
564  * dont trigger interrupts - so it's safe for them to have
565  * bh-unsafe locking for their sk_receive_queue.lock. Split off
566  * this special lock-class by reinitializing the spinlock key:
567  */
568 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
569
570 static struct sock * unix_create1(struct socket *sock)
571 {
572         struct sock *sk = NULL;
573         struct unix_sock *u;
574
575         if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
576                 goto out;
577
578         sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
579         if (!sk)
580                 goto out;
581
582         atomic_inc(&unix_nr_socks);
583
584         sock_init_data(sock,sk);
585         lockdep_set_class(&sk->sk_receive_queue.lock,
586                                 &af_unix_sk_receive_queue_lock_key);
587
588         sk->sk_write_space      = unix_write_space;
589         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
590         sk->sk_destruct         = unix_sock_destructor;
591         u         = unix_sk(sk);
592         u->dentry = NULL;
593         u->mnt    = NULL;
594         spin_lock_init(&u->lock);
595         atomic_set(&u->inflight, sock ? 0 : -1);
596         mutex_init(&u->readlock); /* single task reading lock */
597         init_waitqueue_head(&u->peer_wait);
598         unix_insert_socket(unix_sockets_unbound, sk);
599 out:
600         return sk;
601 }
602
603 static int unix_create(struct socket *sock, int protocol)
604 {
605         if (protocol && protocol != PF_UNIX)
606                 return -EPROTONOSUPPORT;
607
608         sock->state = SS_UNCONNECTED;
609
610         switch (sock->type) {
611         case SOCK_STREAM:
612                 sock->ops = &unix_stream_ops;
613                 break;
614                 /*
615                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
616                  *      nothing uses it.
617                  */
618         case SOCK_RAW:
619                 sock->type=SOCK_DGRAM;
620         case SOCK_DGRAM:
621                 sock->ops = &unix_dgram_ops;
622                 break;
623         case SOCK_SEQPACKET:
624                 sock->ops = &unix_seqpacket_ops;
625                 break;
626         default:
627                 return -ESOCKTNOSUPPORT;
628         }
629
630         return unix_create1(sock) ? 0 : -ENOMEM;
631 }
632
633 static int unix_release(struct socket *sock)
634 {
635         struct sock *sk = sock->sk;
636
637         if (!sk)
638                 return 0;
639
640         sock->sk = NULL;
641
642         return unix_release_sock (sk, 0);
643 }
644
645 static int unix_autobind(struct socket *sock)
646 {
647         struct sock *sk = sock->sk;
648         struct unix_sock *u = unix_sk(sk);
649         static u32 ordernum = 1;
650         struct unix_address * addr;
651         int err;
652
653         mutex_lock(&u->readlock);
654
655         err = 0;
656         if (u->addr)
657                 goto out;
658
659         err = -ENOMEM;
660         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
661         if (!addr)
662                 goto out;
663
664         addr->name->sun_family = AF_UNIX;
665         atomic_set(&addr->refcnt, 1);
666
667 retry:
668         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
669         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
670
671         spin_lock(&unix_table_lock);
672         ordernum = (ordernum+1)&0xFFFFF;
673
674         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
675                                       addr->hash)) {
676                 spin_unlock(&unix_table_lock);
677                 /* Sanity yield. It is unusual case, but yet... */
678                 if (!(ordernum&0xFF))
679                         yield();
680                 goto retry;
681         }
682         addr->hash ^= sk->sk_type;
683
684         __unix_remove_socket(sk);
685         u->addr = addr;
686         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
687         spin_unlock(&unix_table_lock);
688         err = 0;
689
690 out:    mutex_unlock(&u->readlock);
691         return err;
692 }
693
694 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
695                                     int type, unsigned hash, int *error)
696 {
697         struct sock *u;
698         struct nameidata nd;
699         int err = 0;
700         
701         if (sunname->sun_path[0]) {
702                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
703                 if (err)
704                         goto fail;
705                 err = vfs_permission(&nd, MAY_WRITE);
706                 if (err)
707                         goto put_fail;
708
709                 err = -ECONNREFUSED;
710                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
711                         goto put_fail;
712                 u=unix_find_socket_byinode(nd.dentry->d_inode);
713                 if (!u)
714                         goto put_fail;
715
716                 if (u->sk_type == type)
717                         touch_atime(nd.mnt, nd.dentry);
718
719                 path_release(&nd);
720
721                 err=-EPROTOTYPE;
722                 if (u->sk_type != type) {
723                         sock_put(u);
724                         goto fail;
725                 }
726         } else {
727                 err = -ECONNREFUSED;
728                 u=unix_find_socket_byname(sunname, len, type, hash);
729                 if (u) {
730                         struct dentry *dentry;
731                         dentry = unix_sk(u)->dentry;
732                         if (dentry)
733                                 touch_atime(unix_sk(u)->mnt, dentry);
734                 } else
735                         goto fail;
736         }
737         return u;
738
739 put_fail:
740         path_release(&nd);
741 fail:
742         *error=err;
743         return NULL;
744 }
745
746
747 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
748 {
749         struct sock *sk = sock->sk;
750         struct unix_sock *u = unix_sk(sk);
751         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
752         struct dentry * dentry = NULL;
753         struct nameidata nd;
754         int err;
755         unsigned hash;
756         struct unix_address *addr;
757         struct hlist_head *list;
758
759         err = -EINVAL;
760         if (sunaddr->sun_family != AF_UNIX)
761                 goto out;
762
763         if (addr_len==sizeof(short)) {
764                 err = unix_autobind(sock);
765                 goto out;
766         }
767
768         err = unix_mkname(sunaddr, addr_len, &hash);
769         if (err < 0)
770                 goto out;
771         addr_len = err;
772
773         mutex_lock(&u->readlock);
774
775         err = -EINVAL;
776         if (u->addr)
777                 goto out_up;
778
779         err = -ENOMEM;
780         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
781         if (!addr)
782                 goto out_up;
783
784         memcpy(addr->name, sunaddr, addr_len);
785         addr->len = addr_len;
786         addr->hash = hash ^ sk->sk_type;
787         atomic_set(&addr->refcnt, 1);
788
789         if (sunaddr->sun_path[0]) {
790                 unsigned int mode;
791                 err = 0;
792                 /*
793                  * Get the parent directory, calculate the hash for last
794                  * component.
795                  */
796                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
797                 if (err)
798                         goto out_mknod_parent;
799
800                 dentry = lookup_create(&nd, 0);
801                 err = PTR_ERR(dentry);
802                 if (IS_ERR(dentry))
803                         goto out_mknod_unlock;
804
805                 /*
806                  * All right, let's create it.
807                  */
808                 mode = S_IFSOCK |
809                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
810                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
811                 if (err)
812                         goto out_mknod_dput;
813                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
814                 dput(nd.dentry);
815                 nd.dentry = dentry;
816
817                 addr->hash = UNIX_HASH_SIZE;
818         }
819
820         spin_lock(&unix_table_lock);
821
822         if (!sunaddr->sun_path[0]) {
823                 err = -EADDRINUSE;
824                 if (__unix_find_socket_byname(sunaddr, addr_len,
825                                               sk->sk_type, hash)) {
826                         unix_release_addr(addr);
827                         goto out_unlock;
828                 }
829
830                 list = &unix_socket_table[addr->hash];
831         } else {
832                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
833                 u->dentry = nd.dentry;
834                 u->mnt    = nd.mnt;
835         }
836
837         err = 0;
838         __unix_remove_socket(sk);
839         u->addr = addr;
840         __unix_insert_socket(list, sk);
841
842 out_unlock:
843         spin_unlock(&unix_table_lock);
844 out_up:
845         mutex_unlock(&u->readlock);
846 out:
847         return err;
848
849 out_mknod_dput:
850         dput(dentry);
851 out_mknod_unlock:
852         mutex_unlock(&nd.dentry->d_inode->i_mutex);
853         path_release(&nd);
854 out_mknod_parent:
855         if (err==-EEXIST)
856                 err=-EADDRINUSE;
857         unix_release_addr(addr);
858         goto out_up;
859 }
860
861 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
862                               int alen, int flags)
863 {
864         struct sock *sk = sock->sk;
865         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
866         struct sock *other;
867         unsigned hash;
868         int err;
869
870         if (addr->sa_family != AF_UNSPEC) {
871                 err = unix_mkname(sunaddr, alen, &hash);
872                 if (err < 0)
873                         goto out;
874                 alen = err;
875
876                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
877                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
878                         goto out;
879
880                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
881                 if (!other)
882                         goto out;
883
884                 unix_state_wlock(sk);
885
886                 err = -EPERM;
887                 if (!unix_may_send(sk, other))
888                         goto out_unlock;
889
890                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
891                 if (err)
892                         goto out_unlock;
893
894         } else {
895                 /*
896                  *      1003.1g breaking connected state with AF_UNSPEC
897                  */
898                 other = NULL;
899                 unix_state_wlock(sk);
900         }
901
902         /*
903          * If it was connected, reconnect.
904          */
905         if (unix_peer(sk)) {
906                 struct sock *old_peer = unix_peer(sk);
907                 unix_peer(sk)=other;
908                 unix_state_wunlock(sk);
909
910                 if (other != old_peer)
911                         unix_dgram_disconnected(sk, old_peer);
912                 sock_put(old_peer);
913         } else {
914                 unix_peer(sk)=other;
915                 unix_state_wunlock(sk);
916         }
917         return 0;
918
919 out_unlock:
920         unix_state_wunlock(sk);
921         sock_put(other);
922 out:
923         return err;
924 }
925
926 static long unix_wait_for_peer(struct sock *other, long timeo)
927 {
928         struct unix_sock *u = unix_sk(other);
929         int sched;
930         DEFINE_WAIT(wait);
931
932         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
933
934         sched = !sock_flag(other, SOCK_DEAD) &&
935                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
936                 (skb_queue_len(&other->sk_receive_queue) >
937                  other->sk_max_ack_backlog);
938
939         unix_state_runlock(other);
940
941         if (sched)
942                 timeo = schedule_timeout(timeo);
943
944         finish_wait(&u->peer_wait, &wait);
945         return timeo;
946 }
947
948 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
949                                int addr_len, int flags)
950 {
951         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
952         struct sock *sk = sock->sk;
953         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
954         struct sock *newsk = NULL;
955         struct sock *other = NULL;
956         struct sk_buff *skb = NULL;
957         unsigned hash;
958         int st;
959         int err;
960         long timeo;
961
962         err = unix_mkname(sunaddr, addr_len, &hash);
963         if (err < 0)
964                 goto out;
965         addr_len = err;
966
967         if (test_bit(SOCK_PASSCRED, &sock->flags)
968                 && !u->addr && (err = unix_autobind(sock)) != 0)
969                 goto out;
970
971         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
972
973         /* First of all allocate resources.
974            If we will make it after state is locked,
975            we will have to recheck all again in any case.
976          */
977
978         err = -ENOMEM;
979
980         /* create new sock for complete connection */
981         newsk = unix_create1(NULL);
982         if (newsk == NULL)
983                 goto out;
984
985         /* Allocate skb for sending to listening sock */
986         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
987         if (skb == NULL)
988                 goto out;
989
990 restart:
991         /*  Find listening sock. */
992         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
993         if (!other)
994                 goto out;
995
996         /* Latch state of peer */
997         unix_state_rlock(other);
998
999         /* Apparently VFS overslept socket death. Retry. */
1000         if (sock_flag(other, SOCK_DEAD)) {
1001                 unix_state_runlock(other);
1002                 sock_put(other);
1003                 goto restart;
1004         }
1005
1006         err = -ECONNREFUSED;
1007         if (other->sk_state != TCP_LISTEN)
1008                 goto out_unlock;
1009
1010         if (skb_queue_len(&other->sk_receive_queue) >
1011             other->sk_max_ack_backlog) {
1012                 err = -EAGAIN;
1013                 if (!timeo)
1014                         goto out_unlock;
1015
1016                 timeo = unix_wait_for_peer(other, timeo);
1017
1018                 err = sock_intr_errno(timeo);
1019                 if (signal_pending(current))
1020                         goto out;
1021                 sock_put(other);
1022                 goto restart;
1023         }
1024
1025         /* Latch our state.
1026
1027            It is tricky place. We need to grab write lock and cannot
1028            drop lock on peer. It is dangerous because deadlock is
1029            possible. Connect to self case and simultaneous
1030            attempt to connect are eliminated by checking socket
1031            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1032            check this before attempt to grab lock.
1033
1034            Well, and we have to recheck the state after socket locked.
1035          */
1036         st = sk->sk_state;
1037
1038         switch (st) {
1039         case TCP_CLOSE:
1040                 /* This is ok... continue with connect */
1041                 break;
1042         case TCP_ESTABLISHED:
1043                 /* Socket is already connected */
1044                 err = -EISCONN;
1045                 goto out_unlock;
1046         default:
1047                 err = -EINVAL;
1048                 goto out_unlock;
1049         }
1050
1051         unix_state_wlock_nested(sk);
1052
1053         if (sk->sk_state != st) {
1054                 unix_state_wunlock(sk);
1055                 unix_state_runlock(other);
1056                 sock_put(other);
1057                 goto restart;
1058         }
1059
1060         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1061         if (err) {
1062                 unix_state_wunlock(sk);
1063                 goto out_unlock;
1064         }
1065
1066         /* The way is open! Fastly set all the necessary fields... */
1067
1068         sock_hold(sk);
1069         unix_peer(newsk)        = sk;
1070         newsk->sk_state         = TCP_ESTABLISHED;
1071         newsk->sk_type          = sk->sk_type;
1072         newsk->sk_peercred.pid  = current->tgid;
1073         newsk->sk_peercred.uid  = current->euid;
1074         newsk->sk_peercred.gid  = current->egid;
1075         newu = unix_sk(newsk);
1076         newsk->sk_sleep         = &newu->peer_wait;
1077         otheru = unix_sk(other);
1078
1079         /* copy address information from listening to new sock*/
1080         if (otheru->addr) {
1081                 atomic_inc(&otheru->addr->refcnt);
1082                 newu->addr = otheru->addr;
1083         }
1084         if (otheru->dentry) {
1085                 newu->dentry    = dget(otheru->dentry);
1086                 newu->mnt       = mntget(otheru->mnt);
1087         }
1088
1089         /* Set credentials */
1090         sk->sk_peercred = other->sk_peercred;
1091
1092         sock->state     = SS_CONNECTED;
1093         sk->sk_state    = TCP_ESTABLISHED;
1094         sock_hold(newsk);
1095
1096         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1097         unix_peer(sk)   = newsk;
1098
1099         unix_state_wunlock(sk);
1100
1101         /* take ten and and send info to listening sock */
1102         spin_lock(&other->sk_receive_queue.lock);
1103         __skb_queue_tail(&other->sk_receive_queue, skb);
1104         /* Undo artificially decreased inflight after embrion
1105          * is installed to listening socket. */
1106         atomic_inc(&newu->inflight);
1107         spin_unlock(&other->sk_receive_queue.lock);
1108         unix_state_runlock(other);
1109         other->sk_data_ready(other, 0);
1110         sock_put(other);
1111         return 0;
1112
1113 out_unlock:
1114         if (other)
1115                 unix_state_runlock(other);
1116
1117 out:
1118         if (skb)
1119                 kfree_skb(skb);
1120         if (newsk)
1121                 unix_release_sock(newsk, 0);
1122         if (other)
1123                 sock_put(other);
1124         return err;
1125 }
1126
1127 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1128 {
1129         struct sock *ska=socka->sk, *skb = sockb->sk;
1130
1131         /* Join our sockets back to back */
1132         sock_hold(ska);
1133         sock_hold(skb);
1134         unix_peer(ska)=skb;
1135         unix_peer(skb)=ska;
1136         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1137         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1138         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1139
1140         if (ska->sk_type != SOCK_DGRAM) {
1141                 ska->sk_state = TCP_ESTABLISHED;
1142                 skb->sk_state = TCP_ESTABLISHED;
1143                 socka->state  = SS_CONNECTED;
1144                 sockb->state  = SS_CONNECTED;
1145         }
1146         return 0;
1147 }
1148
1149 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1150 {
1151         struct sock *sk = sock->sk;
1152         struct sock *tsk;
1153         struct sk_buff *skb;
1154         int err;
1155
1156         err = -EOPNOTSUPP;
1157         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1158                 goto out;
1159
1160         err = -EINVAL;
1161         if (sk->sk_state != TCP_LISTEN)
1162                 goto out;
1163
1164         /* If socket state is TCP_LISTEN it cannot change (for now...),
1165          * so that no locks are necessary.
1166          */
1167
1168         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1169         if (!skb) {
1170                 /* This means receive shutdown. */
1171                 if (err == 0)
1172                         err = -EINVAL;
1173                 goto out;
1174         }
1175
1176         tsk = skb->sk;
1177         skb_free_datagram(sk, skb);
1178         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1179
1180         /* attach accepted sock to socket */
1181         unix_state_wlock(tsk);
1182         newsock->state = SS_CONNECTED;
1183         sock_graft(tsk, newsock);
1184         unix_state_wunlock(tsk);
1185         return 0;
1186
1187 out:
1188         return err;
1189 }
1190
1191
1192 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1193 {
1194         struct sock *sk = sock->sk;
1195         struct unix_sock *u;
1196         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1197         int err = 0;
1198
1199         if (peer) {
1200                 sk = unix_peer_get(sk);
1201
1202                 err = -ENOTCONN;
1203                 if (!sk)
1204                         goto out;
1205                 err = 0;
1206         } else {
1207                 sock_hold(sk);
1208         }
1209
1210         u = unix_sk(sk);
1211         unix_state_rlock(sk);
1212         if (!u->addr) {
1213                 sunaddr->sun_family = AF_UNIX;
1214                 sunaddr->sun_path[0] = 0;
1215                 *uaddr_len = sizeof(short);
1216         } else {
1217                 struct unix_address *addr = u->addr;
1218
1219                 *uaddr_len = addr->len;
1220                 memcpy(sunaddr, addr->name, *uaddr_len);
1221         }
1222         unix_state_runlock(sk);
1223         sock_put(sk);
1224 out:
1225         return err;
1226 }
1227
1228 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1229 {
1230         int i;
1231
1232         scm->fp = UNIXCB(skb).fp;
1233         skb->destructor = sock_wfree;
1234         UNIXCB(skb).fp = NULL;
1235
1236         for (i=scm->fp->count-1; i>=0; i--)
1237                 unix_notinflight(scm->fp->fp[i]);
1238 }
1239
1240 static void unix_destruct_fds(struct sk_buff *skb)
1241 {
1242         struct scm_cookie scm;
1243         memset(&scm, 0, sizeof(scm));
1244         unix_detach_fds(&scm, skb);
1245
1246         /* Alas, it calls VFS */
1247         /* So fscking what? fput() had been SMP-safe since the last Summer */
1248         scm_destroy(&scm);
1249         sock_wfree(skb);
1250 }
1251
1252 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1253 {
1254         int i;
1255         for (i=scm->fp->count-1; i>=0; i--)
1256                 unix_inflight(scm->fp->fp[i]);
1257         UNIXCB(skb).fp = scm->fp;
1258         skb->destructor = unix_destruct_fds;
1259         scm->fp = NULL;
1260 }
1261
1262 /*
1263  *      Send AF_UNIX data.
1264  */
1265
1266 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1267                               struct msghdr *msg, size_t len)
1268 {
1269         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1270         struct sock *sk = sock->sk;
1271         struct unix_sock *u = unix_sk(sk);
1272         struct sockaddr_un *sunaddr=msg->msg_name;
1273         struct sock *other = NULL;
1274         int namelen = 0; /* fake GCC */
1275         int err;
1276         unsigned hash;
1277         struct sk_buff *skb;
1278         long timeo;
1279         struct scm_cookie tmp_scm;
1280
1281         if (NULL == siocb->scm)
1282                 siocb->scm = &tmp_scm;
1283         err = scm_send(sock, msg, siocb->scm);
1284         if (err < 0)
1285                 return err;
1286
1287         err = -EOPNOTSUPP;
1288         if (msg->msg_flags&MSG_OOB)
1289                 goto out;
1290
1291         if (msg->msg_namelen) {
1292                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1293                 if (err < 0)
1294                         goto out;
1295                 namelen = err;
1296         } else {
1297                 sunaddr = NULL;
1298                 err = -ENOTCONN;
1299                 other = unix_peer_get(sk);
1300                 if (!other)
1301                         goto out;
1302         }
1303
1304         if (test_bit(SOCK_PASSCRED, &sock->flags)
1305                 && !u->addr && (err = unix_autobind(sock)) != 0)
1306                 goto out;
1307
1308         err = -EMSGSIZE;
1309         if (len > sk->sk_sndbuf - 32)
1310                 goto out;
1311
1312         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1313         if (skb==NULL)
1314                 goto out;
1315
1316         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1317         if (siocb->scm->fp)
1318                 unix_attach_fds(siocb->scm, skb);
1319         unix_get_secdata(siocb->scm, skb);
1320
1321         skb->h.raw = skb->data;
1322         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1323         if (err)
1324                 goto out_free;
1325
1326         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1327
1328 restart:
1329         if (!other) {
1330                 err = -ECONNRESET;
1331                 if (sunaddr == NULL)
1332                         goto out_free;
1333
1334                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1335                                         hash, &err);
1336                 if (other==NULL)
1337                         goto out_free;
1338         }
1339
1340         unix_state_rlock(other);
1341         err = -EPERM;
1342         if (!unix_may_send(sk, other))
1343                 goto out_unlock;
1344
1345         if (sock_flag(other, SOCK_DEAD)) {
1346                 /*
1347                  *      Check with 1003.1g - what should
1348                  *      datagram error
1349                  */
1350                 unix_state_runlock(other);
1351                 sock_put(other);
1352
1353                 err = 0;
1354                 unix_state_wlock(sk);
1355                 if (unix_peer(sk) == other) {
1356                         unix_peer(sk)=NULL;
1357                         unix_state_wunlock(sk);
1358
1359                         unix_dgram_disconnected(sk, other);
1360                         sock_put(other);
1361                         err = -ECONNREFUSED;
1362                 } else {
1363                         unix_state_wunlock(sk);
1364                 }
1365
1366                 other = NULL;
1367                 if (err)
1368                         goto out_free;
1369                 goto restart;
1370         }
1371
1372         err = -EPIPE;
1373         if (other->sk_shutdown & RCV_SHUTDOWN)
1374                 goto out_unlock;
1375
1376         if (sk->sk_type != SOCK_SEQPACKET) {
1377                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1378                 if (err)
1379                         goto out_unlock;
1380         }
1381
1382         if (unix_peer(other) != sk &&
1383             (skb_queue_len(&other->sk_receive_queue) >
1384              other->sk_max_ack_backlog)) {
1385                 if (!timeo) {
1386                         err = -EAGAIN;
1387                         goto out_unlock;
1388                 }
1389
1390                 timeo = unix_wait_for_peer(other, timeo);
1391
1392                 err = sock_intr_errno(timeo);
1393                 if (signal_pending(current))
1394                         goto out_free;
1395
1396                 goto restart;
1397         }
1398
1399         skb_queue_tail(&other->sk_receive_queue, skb);
1400         unix_state_runlock(other);
1401         other->sk_data_ready(other, len);
1402         sock_put(other);
1403         scm_destroy(siocb->scm);
1404         return len;
1405
1406 out_unlock:
1407         unix_state_runlock(other);
1408 out_free:
1409         kfree_skb(skb);
1410 out:
1411         if (other)
1412                 sock_put(other);
1413         scm_destroy(siocb->scm);
1414         return err;
1415 }
1416
1417                 
1418 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1419                                struct msghdr *msg, size_t len)
1420 {
1421         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1422         struct sock *sk = sock->sk;
1423         struct sock *other = NULL;
1424         struct sockaddr_un *sunaddr=msg->msg_name;
1425         int err,size;
1426         struct sk_buff *skb;
1427         int sent=0;
1428         struct scm_cookie tmp_scm;
1429
1430         if (NULL == siocb->scm)
1431                 siocb->scm = &tmp_scm;
1432         err = scm_send(sock, msg, siocb->scm);
1433         if (err < 0)
1434                 return err;
1435
1436         err = -EOPNOTSUPP;
1437         if (msg->msg_flags&MSG_OOB)
1438                 goto out_err;
1439
1440         if (msg->msg_namelen) {
1441                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1442                 goto out_err;
1443         } else {
1444                 sunaddr = NULL;
1445                 err = -ENOTCONN;
1446                 other = unix_peer(sk);
1447                 if (!other)
1448                         goto out_err;
1449         }
1450
1451         if (sk->sk_shutdown & SEND_SHUTDOWN)
1452                 goto pipe_err;
1453
1454         while(sent < len)
1455         {
1456                 /*
1457                  *      Optimisation for the fact that under 0.01% of X
1458                  *      messages typically need breaking up.
1459                  */
1460
1461                 size = len-sent;
1462
1463                 /* Keep two messages in the pipe so it schedules better */
1464                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1465                         size = (sk->sk_sndbuf >> 1) - 64;
1466
1467                 if (size > SKB_MAX_ALLOC)
1468                         size = SKB_MAX_ALLOC;
1469                         
1470                 /*
1471                  *      Grab a buffer
1472                  */
1473                  
1474                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1475
1476                 if (skb==NULL)
1477                         goto out_err;
1478
1479                 /*
1480                  *      If you pass two values to the sock_alloc_send_skb
1481                  *      it tries to grab the large buffer with GFP_NOFS
1482                  *      (which can fail easily), and if it fails grab the
1483                  *      fallback size buffer which is under a page and will
1484                  *      succeed. [Alan]
1485                  */
1486                 size = min_t(int, size, skb_tailroom(skb));
1487
1488                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1489                 if (siocb->scm->fp)
1490                         unix_attach_fds(siocb->scm, skb);
1491
1492                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1493                         kfree_skb(skb);
1494                         goto out_err;
1495                 }
1496
1497                 unix_state_rlock(other);
1498
1499                 if (sock_flag(other, SOCK_DEAD) ||
1500                     (other->sk_shutdown & RCV_SHUTDOWN))
1501                         goto pipe_err_free;
1502
1503                 skb_queue_tail(&other->sk_receive_queue, skb);
1504                 unix_state_runlock(other);
1505                 other->sk_data_ready(other, size);
1506                 sent+=size;
1507         }
1508
1509         scm_destroy(siocb->scm);
1510         siocb->scm = NULL;
1511
1512         return sent;
1513
1514 pipe_err_free:
1515         unix_state_runlock(other);
1516         kfree_skb(skb);
1517 pipe_err:
1518         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1519                 send_sig(SIGPIPE,current,0);
1520         err = -EPIPE;
1521 out_err:
1522         scm_destroy(siocb->scm);
1523         siocb->scm = NULL;
1524         return sent ? : err;
1525 }
1526
1527 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1528                                   struct msghdr *msg, size_t len)
1529 {
1530         int err;
1531         struct sock *sk = sock->sk;
1532         
1533         err = sock_error(sk);
1534         if (err)
1535                 return err;
1536
1537         if (sk->sk_state != TCP_ESTABLISHED)
1538                 return -ENOTCONN;
1539
1540         if (msg->msg_namelen)
1541                 msg->msg_namelen = 0;
1542
1543         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1544 }
1545                                                                                             
1546 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1547 {
1548         struct unix_sock *u = unix_sk(sk);
1549
1550         msg->msg_namelen = 0;
1551         if (u->addr) {
1552                 msg->msg_namelen = u->addr->len;
1553                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1554         }
1555 }
1556
1557 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1558                               struct msghdr *msg, size_t size,
1559                               int flags)
1560 {
1561         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1562         struct scm_cookie tmp_scm;
1563         struct sock *sk = sock->sk;
1564         struct unix_sock *u = unix_sk(sk);
1565         int noblock = flags & MSG_DONTWAIT;
1566         struct sk_buff *skb;
1567         int err;
1568
1569         err = -EOPNOTSUPP;
1570         if (flags&MSG_OOB)
1571                 goto out;
1572
1573         msg->msg_namelen = 0;
1574
1575         mutex_lock(&u->readlock);
1576
1577         skb = skb_recv_datagram(sk, flags, noblock, &err);
1578         if (!skb)
1579                 goto out_unlock;
1580
1581         wake_up_interruptible(&u->peer_wait);
1582
1583         if (msg->msg_name)
1584                 unix_copy_addr(msg, skb->sk);
1585
1586         if (size > skb->len)
1587                 size = skb->len;
1588         else if (size < skb->len)
1589                 msg->msg_flags |= MSG_TRUNC;
1590
1591         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1592         if (err)
1593                 goto out_free;
1594
1595         if (!siocb->scm) {
1596                 siocb->scm = &tmp_scm;
1597                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1598         }
1599         siocb->scm->creds = *UNIXCREDS(skb);
1600         unix_set_secdata(siocb->scm, skb);
1601
1602         if (!(flags & MSG_PEEK))
1603         {
1604                 if (UNIXCB(skb).fp)
1605                         unix_detach_fds(siocb->scm, skb);
1606         }
1607         else 
1608         {
1609                 /* It is questionable: on PEEK we could:
1610                    - do not return fds - good, but too simple 8)
1611                    - return fds, and do not return them on read (old strategy,
1612                      apparently wrong)
1613                    - clone fds (I chose it for now, it is the most universal
1614                      solution)
1615                 
1616                    POSIX 1003.1g does not actually define this clearly
1617                    at all. POSIX 1003.1g doesn't define a lot of things
1618                    clearly however!                  
1619                    
1620                 */
1621                 if (UNIXCB(skb).fp)
1622                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1623         }
1624         err = size;
1625
1626         scm_recv(sock, msg, siocb->scm, flags);
1627
1628 out_free:
1629         skb_free_datagram(sk,skb);
1630 out_unlock:
1631         mutex_unlock(&u->readlock);
1632 out:
1633         return err;
1634 }
1635
1636 /*
1637  *      Sleep until data has arrive. But check for races..
1638  */
1639  
1640 static long unix_stream_data_wait(struct sock * sk, long timeo)
1641 {
1642         DEFINE_WAIT(wait);
1643
1644         unix_state_rlock(sk);
1645
1646         for (;;) {
1647                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1648
1649                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1650                     sk->sk_err ||
1651                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1652                     signal_pending(current) ||
1653                     !timeo)
1654                         break;
1655
1656                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1657                 unix_state_runlock(sk);
1658                 timeo = schedule_timeout(timeo);
1659                 unix_state_rlock(sk);
1660                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1661         }
1662
1663         finish_wait(sk->sk_sleep, &wait);
1664         unix_state_runlock(sk);
1665         return timeo;
1666 }
1667
1668
1669
1670 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1671                                struct msghdr *msg, size_t size,
1672                                int flags)
1673 {
1674         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1675         struct scm_cookie tmp_scm;
1676         struct sock *sk = sock->sk;
1677         struct unix_sock *u = unix_sk(sk);
1678         struct sockaddr_un *sunaddr=msg->msg_name;
1679         int copied = 0;
1680         int check_creds = 0;
1681         int target;
1682         int err = 0;
1683         long timeo;
1684
1685         err = -EINVAL;
1686         if (sk->sk_state != TCP_ESTABLISHED)
1687                 goto out;
1688
1689         err = -EOPNOTSUPP;
1690         if (flags&MSG_OOB)
1691                 goto out;
1692
1693         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1694         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1695
1696         msg->msg_namelen = 0;
1697
1698         /* Lock the socket to prevent queue disordering
1699          * while sleeps in memcpy_tomsg
1700          */
1701
1702         if (!siocb->scm) {
1703                 siocb->scm = &tmp_scm;
1704                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1705         }
1706
1707         mutex_lock(&u->readlock);
1708
1709         do
1710         {
1711                 int chunk;
1712                 struct sk_buff *skb;
1713
1714                 skb = skb_dequeue(&sk->sk_receive_queue);
1715                 if (skb==NULL)
1716                 {
1717                         if (copied >= target)
1718                                 break;
1719
1720                         /*
1721                          *      POSIX 1003.1g mandates this order.
1722                          */
1723                          
1724                         if ((err = sock_error(sk)) != 0)
1725                                 break;
1726                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1727                                 break;
1728                         err = -EAGAIN;
1729                         if (!timeo)
1730                                 break;
1731                         mutex_unlock(&u->readlock);
1732
1733                         timeo = unix_stream_data_wait(sk, timeo);
1734
1735                         if (signal_pending(current)) {
1736                                 err = sock_intr_errno(timeo);
1737                                 goto out;
1738                         }
1739                         mutex_lock(&u->readlock);
1740                         continue;
1741                 }
1742
1743                 if (check_creds) {
1744                         /* Never glue messages from different writers */
1745                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1746                                 skb_queue_head(&sk->sk_receive_queue, skb);
1747                                 break;
1748                         }
1749                 } else {
1750                         /* Copy credentials */
1751                         siocb->scm->creds = *UNIXCREDS(skb);
1752                         check_creds = 1;
1753                 }
1754
1755                 /* Copy address just once */
1756                 if (sunaddr)
1757                 {
1758                         unix_copy_addr(msg, skb->sk);
1759                         sunaddr = NULL;
1760                 }
1761
1762                 chunk = min_t(unsigned int, skb->len, size);
1763                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1764                         skb_queue_head(&sk->sk_receive_queue, skb);
1765                         if (copied == 0)
1766                                 copied = -EFAULT;
1767                         break;
1768                 }
1769                 copied += chunk;
1770                 size -= chunk;
1771
1772                 /* Mark read part of skb as used */
1773                 if (!(flags & MSG_PEEK))
1774                 {
1775                         skb_pull(skb, chunk);
1776
1777                         if (UNIXCB(skb).fp)
1778                                 unix_detach_fds(siocb->scm, skb);
1779
1780                         /* put the skb back if we didn't use it up.. */
1781                         if (skb->len)
1782                         {
1783                                 skb_queue_head(&sk->sk_receive_queue, skb);
1784                                 break;
1785                         }
1786
1787                         kfree_skb(skb);
1788
1789                         if (siocb->scm->fp)
1790                                 break;
1791                 }
1792                 else
1793                 {
1794                         /* It is questionable, see note in unix_dgram_recvmsg.
1795                          */
1796                         if (UNIXCB(skb).fp)
1797                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1798
1799                         /* put message back and return */
1800                         skb_queue_head(&sk->sk_receive_queue, skb);
1801                         break;
1802                 }
1803         } while (size);
1804
1805         mutex_unlock(&u->readlock);
1806         scm_recv(sock, msg, siocb->scm, flags);
1807 out:
1808         return copied ? : err;
1809 }
1810
1811 static int unix_shutdown(struct socket *sock, int mode)
1812 {
1813         struct sock *sk = sock->sk;
1814         struct sock *other;
1815
1816         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1817
1818         if (mode) {
1819                 unix_state_wlock(sk);
1820                 sk->sk_shutdown |= mode;
1821                 other=unix_peer(sk);
1822                 if (other)
1823                         sock_hold(other);
1824                 unix_state_wunlock(sk);
1825                 sk->sk_state_change(sk);
1826
1827                 if (other &&
1828                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1829
1830                         int peer_mode = 0;
1831
1832                         if (mode&RCV_SHUTDOWN)
1833                                 peer_mode |= SEND_SHUTDOWN;
1834                         if (mode&SEND_SHUTDOWN)
1835                                 peer_mode |= RCV_SHUTDOWN;
1836                         unix_state_wlock(other);
1837                         other->sk_shutdown |= peer_mode;
1838                         unix_state_wunlock(other);
1839                         other->sk_state_change(other);
1840                         read_lock(&other->sk_callback_lock);
1841                         if (peer_mode == SHUTDOWN_MASK)
1842                                 sk_wake_async(other,1,POLL_HUP);
1843                         else if (peer_mode & RCV_SHUTDOWN)
1844                                 sk_wake_async(other,1,POLL_IN);
1845                         read_unlock(&other->sk_callback_lock);
1846                 }
1847                 if (other)
1848                         sock_put(other);
1849         }
1850         return 0;
1851 }
1852
1853 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1854 {
1855         struct sock *sk = sock->sk;
1856         long amount=0;
1857         int err;
1858
1859         switch(cmd)
1860         {
1861                 case SIOCOUTQ:
1862                         amount = atomic_read(&sk->sk_wmem_alloc);
1863                         err = put_user(amount, (int __user *)arg);
1864                         break;
1865                 case SIOCINQ:
1866                 {
1867                         struct sk_buff *skb;
1868
1869                         if (sk->sk_state == TCP_LISTEN) {
1870                                 err = -EINVAL;
1871                                 break;
1872                         }
1873
1874                         spin_lock(&sk->sk_receive_queue.lock);
1875                         if (sk->sk_type == SOCK_STREAM ||
1876                             sk->sk_type == SOCK_SEQPACKET) {
1877                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1878                                         amount += skb->len;
1879                         } else {
1880                                 skb = skb_peek(&sk->sk_receive_queue);
1881                                 if (skb)
1882                                         amount=skb->len;
1883                         }
1884                         spin_unlock(&sk->sk_receive_queue.lock);
1885                         err = put_user(amount, (int __user *)arg);
1886                         break;
1887                 }
1888
1889                 default:
1890                         err = -ENOIOCTLCMD;
1891                         break;
1892         }
1893         return err;
1894 }
1895
1896 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1897 {
1898         struct sock *sk = sock->sk;
1899         unsigned int mask;
1900
1901         poll_wait(file, sk->sk_sleep, wait);
1902         mask = 0;
1903
1904         /* exceptional events? */
1905         if (sk->sk_err)
1906                 mask |= POLLERR;
1907         if (sk->sk_shutdown == SHUTDOWN_MASK)
1908                 mask |= POLLHUP;
1909         if (sk->sk_shutdown & RCV_SHUTDOWN)
1910                 mask |= POLLRDHUP;
1911
1912         /* readable? */
1913         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1914             (sk->sk_shutdown & RCV_SHUTDOWN))
1915                 mask |= POLLIN | POLLRDNORM;
1916
1917         /* Connection-based need to check for termination and startup */
1918         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1919                 mask |= POLLHUP;
1920
1921         /*
1922          * we set writable also when the other side has shut down the
1923          * connection. This prevents stuck sockets.
1924          */
1925         if (unix_writable(sk))
1926                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1927
1928         return mask;
1929 }
1930
1931
1932 #ifdef CONFIG_PROC_FS
1933 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1934 {
1935         loff_t off = 0;
1936         struct sock *s;
1937
1938         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1939                 if (off == pos) 
1940                         return s;
1941                 ++off;
1942         }
1943         return NULL;
1944 }
1945
1946
1947 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1948 {
1949         spin_lock(&unix_table_lock);
1950         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1951 }
1952
1953 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1954 {
1955         ++*pos;
1956
1957         if (v == (void *)1) 
1958                 return first_unix_socket(seq->private);
1959         return next_unix_socket(seq->private, v);
1960 }
1961
1962 static void unix_seq_stop(struct seq_file *seq, void *v)
1963 {
1964         spin_unlock(&unix_table_lock);
1965 }
1966
1967 static int unix_seq_show(struct seq_file *seq, void *v)
1968 {
1969         
1970         if (v == (void *)1)
1971                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1972                          "Inode Path\n");
1973         else {
1974                 struct sock *s = v;
1975                 struct unix_sock *u = unix_sk(s);
1976                 unix_state_rlock(s);
1977
1978                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1979                         s,
1980                         atomic_read(&s->sk_refcnt),
1981                         0,
1982                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1983                         s->sk_type,
1984                         s->sk_socket ?
1985                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1986                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1987                         sock_i_ino(s));
1988
1989                 if (u->addr) {
1990                         int i, len;
1991                         seq_putc(seq, ' ');
1992
1993                         i = 0;
1994                         len = u->addr->len - sizeof(short);
1995                         if (!UNIX_ABSTRACT(s))
1996                                 len--;
1997                         else {
1998                                 seq_putc(seq, '@');
1999                                 i++;
2000                         }
2001                         for ( ; i < len; i++)
2002                                 seq_putc(seq, u->addr->name->sun_path[i]);
2003                 }
2004                 unix_state_runlock(s);
2005                 seq_putc(seq, '\n');
2006         }
2007
2008         return 0;
2009 }
2010
2011 static struct seq_operations unix_seq_ops = {
2012         .start  = unix_seq_start,
2013         .next   = unix_seq_next,
2014         .stop   = unix_seq_stop,
2015         .show   = unix_seq_show,
2016 };
2017
2018
2019 static int unix_seq_open(struct inode *inode, struct file *file)
2020 {
2021         struct seq_file *seq;
2022         int rc = -ENOMEM;
2023         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2024
2025         if (!iter)
2026                 goto out;
2027
2028         rc = seq_open(file, &unix_seq_ops);
2029         if (rc)
2030                 goto out_kfree;
2031
2032         seq          = file->private_data;
2033         seq->private = iter;
2034         *iter = 0;
2035 out:
2036         return rc;
2037 out_kfree:
2038         kfree(iter);
2039         goto out;
2040 }
2041
2042 static struct file_operations unix_seq_fops = {
2043         .owner          = THIS_MODULE,
2044         .open           = unix_seq_open,
2045         .read           = seq_read,
2046         .llseek         = seq_lseek,
2047         .release        = seq_release_private,
2048 };
2049
2050 #endif
2051
2052 static struct net_proto_family unix_family_ops = {
2053         .family = PF_UNIX,
2054         .create = unix_create,
2055         .owner  = THIS_MODULE,
2056 };
2057
2058 static int __init af_unix_init(void)
2059 {
2060         int rc = -1;
2061         struct sk_buff *dummy_skb;
2062
2063         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2064
2065         rc = proto_register(&unix_proto, 1);
2066         if (rc != 0) {
2067                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2068                        __FUNCTION__);
2069                 goto out;
2070         }
2071
2072         sock_register(&unix_family_ops);
2073 #ifdef CONFIG_PROC_FS
2074         proc_net_fops_create("unix", 0, &unix_seq_fops);
2075 #endif
2076         unix_sysctl_register();
2077 out:
2078         return rc;
2079 }
2080
2081 static void __exit af_unix_exit(void)
2082 {
2083         sock_unregister(PF_UNIX);
2084         unix_sysctl_unregister();
2085         proc_net_remove("unix");
2086         proto_unregister(&unix_proto);
2087 }
2088
2089 module_init(af_unix_init);
2090 module_exit(af_unix_exit);
2091
2092 MODULE_LICENSE("GPL");
2093 MODULE_ALIAS_NETPROTO(PF_UNIX);