git.oblomov.eu Git - linux-2.6/blob - net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #include <linux/module.h>
  84 #include <linux/kernel.h>
  85 #include <linux/signal.h>
  86 #include <linux/sched.h>
  87 #include <linux/errno.h>
  88 #include <linux/string.h>
  89 #include <linux/stat.h>
  90 #include <linux/dcache.h>
  91 #include <linux/namei.h>
  92 #include <linux/socket.h>
  93 #include <linux/un.h>
  94 #include <linux/fcntl.h>
  95 #include <linux/termios.h>
  96 #include <linux/sockios.h>
  97 #include <linux/net.h>
  98 #include <linux/in.h>
  99 #include <linux/fs.h>
 100 #include <linux/slab.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <net/net_namespace.h>
 105 #include <net/sock.h>
 106 #include <net/tcp_states.h>
 107 #include <net/af_unix.h>
 108 #include <linux/proc_fs.h>
 109 #include <linux/seq_file.h>
 110 #include <net/scm.h>
 111 #include <linux/init.h>
 112 #include <linux/poll.h>
 113 #include <linux/rtnetlink.h>
 114 #include <linux/mount.h>
 115 #include <net/checksum.h>
 116 #include <linux/security.h>
 117
 118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 119 static DEFINE_SPINLOCK(unix_table_lock);
 120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 121
 122 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
 123
 124 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 125
 126 #ifdef CONFIG_SECURITY_NETWORK
 127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 128 {
 129         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 130 }
 131
 132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 133 {
 134         scm->secid = *UNIXSID(skb);
 135 }
 136 #else
 137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 138 { }
 139
 140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141 { }
 142 #endif /* CONFIG_SECURITY_NETWORK */
 143
 144 /*
 145  *  SMP locking strategy:
 146  *    hash table is protected with spinlock unix_table_lock
 147  *    each socket state is protected by separate rwlock.
 148  */
 149
 150 static inline unsigned unix_hash_fold(__wsum n)
 151 {
 152         unsigned hash = (__force unsigned)n;
 153         hash ^= hash>>16;
 154         hash ^= hash>>8;
 155         return hash&(UNIX_HASH_SIZE-1);
 156 }
 157
 158 #define unix_peer(sk) (unix_sk(sk)->peer)
 159
 160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 161 {
 162         return unix_peer(osk) == sk;
 163 }
 164
 165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 166 {
 167         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 168 }
 169
 170 static inline int unix_recvq_full(struct sock const *sk)
 171 {
 172         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 173 }
 174
 175 static struct sock *unix_peer_get(struct sock *s)
 176 {
 177         struct sock *peer;
 178
 179         unix_state_lock(s);
 180         peer = unix_peer(s);
 181         if (peer)
 182                 sock_hold(peer);
 183         unix_state_unlock(s);
 184         return peer;
 185 }
 186
 187 static inline void unix_release_addr(struct unix_address *addr)
 188 {
 189         if (atomic_dec_and_test(&addr->refcnt))
 190                 kfree(addr);
 191 }
 192
 193 /*
 194  *      Check unix socket name:
 195  *              - should be not zero length.
 196  *              - if started by not zero, should be NULL terminated (FS object)
 197  *              - if started by zero, it is abstract name.
 198  */
 199
 200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 201 {
 202         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 203                 return -EINVAL;
 204         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 205                 return -EINVAL;
 206         if (sunaddr->sun_path[0]) {
 207                 /*
 208                  * This may look like an off by one error but it is a bit more
 209                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 210                  * sun_path[108] doesnt as such exist.  However in kernel space
 211                  * we are guaranteed that it is a valid memory location in our
 212                  * kernel address buffer.
 213                  */
 214                 ((char *)sunaddr)[len] = 0;
 215                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 216                 return len;
 217         }
 218
 219         *hashp = unix_hash_fold(csum_partial((char *)sunaddr, len, 0));
 220         return len;
 221 }
 222
 223 static void __unix_remove_socket(struct sock *sk)
 224 {
 225         sk_del_node_init(sk);
 226 }
 227
 228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 229 {
 230         WARN_ON(!sk_unhashed(sk));
 231         sk_add_node(sk, list);
 232 }
 233
 234 static inline void unix_remove_socket(struct sock *sk)
 235 {
 236         spin_lock(&unix_table_lock);
 237         __unix_remove_socket(sk);
 238         spin_unlock(&unix_table_lock);
 239 }
 240
 241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 242 {
 243         spin_lock(&unix_table_lock);
 244         __unix_insert_socket(list, sk);
 245         spin_unlock(&unix_table_lock);
 246 }
 247
 248 static struct sock *__unix_find_socket_byname(struct net *net,
 249                                               struct sockaddr_un *sunname,
 250                                               int len, int type, unsigned hash)
 251 {
 252         struct sock *s;
 253         struct hlist_node *node;
 254
 255         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 256                 struct unix_sock *u = unix_sk(s);
 257
 258                 if (!net_eq(sock_net(s), net))
 259                         continue;
 260
 261                 if (u->addr->len == len &&
 262                     !memcmp(u->addr->name, sunname, len))
 263                         goto found;
 264         }
 265         s = NULL;
 266 found:
 267         return s;
 268 }
 269
 270 static inline struct sock *unix_find_socket_byname(struct net *net,
 271                                                    struct sockaddr_un *sunname,
 272                                                    int len, int type,
 273                                                    unsigned hash)
 274 {
 275         struct sock *s;
 276
 277         spin_lock(&unix_table_lock);
 278         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 279         if (s)
 280                 sock_hold(s);
 281         spin_unlock(&unix_table_lock);
 282         return s;
 283 }
 284
 285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 286 {
 287         struct sock *s;
 288         struct hlist_node *node;
 289
 290         spin_lock(&unix_table_lock);
 291         sk_for_each(s, node,
 292                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 293                 struct dentry *dentry = unix_sk(s)->dentry;
 294
 295                 if (!net_eq(sock_net(s), net))
 296                         continue;
 297
 298                 if (dentry && dentry->d_inode == i) {
 299                         sock_hold(s);
 300                         goto found;
 301                 }
 302         }
 303         s = NULL;
 304 found:
 305         spin_unlock(&unix_table_lock);
 306         return s;
 307 }
 308
 309 static inline int unix_writable(struct sock *sk)
 310 {
 311         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 312 }
 313
 314 static void unix_write_space(struct sock *sk)
 315 {
 316         read_lock(&sk->sk_callback_lock);
 317         if (unix_writable(sk)) {
 318                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 319                         wake_up_interruptible_sync(sk->sk_sleep);
 320                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 321         }
 322         read_unlock(&sk->sk_callback_lock);
 323 }
 324
 325 /* When dgram socket disconnects (or changes its peer), we clear its receive
 326  * queue of packets arrived from previous peer. First, it allows to do
 327  * flow control based only on wmem_alloc; second, sk connected to peer
 328  * may receive messages only from that peer. */
 329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 330 {
 331         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 332                 skb_queue_purge(&sk->sk_receive_queue);
 333                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 334
 335                 /* If one link of bidirectional dgram pipe is disconnected,
 336                  * we signal error. Messages are lost. Do not make this,
 337                  * when peer was not connected to us.
 338                  */
 339                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 340                         other->sk_err = ECONNRESET;
 341                         other->sk_error_report(other);
 342                 }
 343         }
 344 }
 345
 346 static void unix_sock_destructor(struct sock *sk)
 347 {
 348         struct unix_sock *u = unix_sk(sk);
 349
 350         skb_queue_purge(&sk->sk_receive_queue);
 351
 352         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 353         WARN_ON(!sk_unhashed(sk));
 354         WARN_ON(sk->sk_socket);
 355         if (!sock_flag(sk, SOCK_DEAD)) {
 356                 printk(KERN_DEBUG "Attempt to release alive unix socket: %p\n", sk);
 357                 return;
 358         }
 359
 360         if (u->addr)
 361                 unix_release_addr(u->addr);
 362
 363         atomic_dec(&unix_nr_socks);
 364 #ifdef UNIX_REFCNT_DEBUG
 365         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
 366                 atomic_read(&unix_nr_socks));
 367 #endif
 368 }
 369
 370 static int unix_release_sock(struct sock *sk, int embrion)
 371 {
 372         struct unix_sock *u = unix_sk(sk);
 373         struct dentry *dentry;
 374         struct vfsmount *mnt;
 375         struct sock *skpair;
 376         struct sk_buff *skb;
 377         int state;
 378
 379         unix_remove_socket(sk);
 380
 381         /* Clear state */
 382         unix_state_lock(sk);
 383         sock_orphan(sk);
 384         sk->sk_shutdown = SHUTDOWN_MASK;
 385         dentry       = u->dentry;
 386         u->dentry    = NULL;
 387         mnt          = u->mnt;
 388         u->mnt       = NULL;
 389         state = sk->sk_state;
 390         sk->sk_state = TCP_CLOSE;
 391         unix_state_unlock(sk);
 392
 393         wake_up_interruptible_all(&u->peer_wait);
 394
 395         skpair = unix_peer(sk);
 396
 397         if (skpair != NULL) {
 398                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 399                         unix_state_lock(skpair);
 400                         /* No more writes */
 401                         skpair->sk_shutdown = SHUTDOWN_MASK;
 402                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 403                                 skpair->sk_err = ECONNRESET;
 404                         unix_state_unlock(skpair);
 405                         skpair->sk_state_change(skpair);
 406                         read_lock(&skpair->sk_callback_lock);
 407                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 408                         read_unlock(&skpair->sk_callback_lock);
 409                 }
 410                 sock_put(skpair); /* It may now die */
 411                 unix_peer(sk) = NULL;
 412         }
 413
 414         /* Try to flush out this socket. Throw out buffers at least */
 415
 416         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 417                 if (state == TCP_LISTEN)
 418                         unix_release_sock(skb->sk, 1);
 419                 /* passed fds are erased in the kfree_skb hook        */
 420                 kfree_skb(skb);
 421         }
 422
 423         if (dentry) {
 424                 dput(dentry);
 425                 mntput(mnt);
 426         }
 427
 428         sock_put(sk);
 429
 430         /* ---- Socket is dead now and most probably destroyed ---- */
 431
 432         /*
 433          * Fixme: BSD difference: In BSD all sockets connected to use get
 434          *        ECONNRESET and we die on the spot. In Linux we behave
 435          *        like files and pipes do and wait for the last
 436          *        dereference.
 437          *
 438          * Can't we simply set sock->err?
 439          *
 440          *        What the above comment does talk about? --ANK(980817)
 441          */
 442
 443         if (unix_tot_inflight)
 444                 unix_gc();              /* Garbage collect fds */
 445
 446         return 0;
 447 }
 448
 449 static int unix_listen(struct socket *sock, int backlog)
 450 {
 451         int err;
 452         struct sock *sk = sock->sk;
 453         struct unix_sock *u = unix_sk(sk);
 454
 455         err = -EOPNOTSUPP;
 456         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 457                 goto out;       /* Only stream/seqpacket sockets accept */
 458         err = -EINVAL;
 459         if (!u->addr)
 460                 goto out;       /* No listens on an unbound socket */
 461         unix_state_lock(sk);
 462         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 463                 goto out_unlock;
 464         if (backlog > sk->sk_max_ack_backlog)
 465                 wake_up_interruptible_all(&u->peer_wait);
 466         sk->sk_max_ack_backlog  = backlog;
 467         sk->sk_state            = TCP_LISTEN;
 468         /* set credentials so connect can copy them */
 469         sk->sk_peercred.pid     = task_tgid_vnr(current);
 470         sk->sk_peercred.uid     = current->euid;
 471         sk->sk_peercred.gid     = current->egid;
 472         err = 0;
 473
 474 out_unlock:
 475         unix_state_unlock(sk);
 476 out:
 477         return err;
 478 }
 479
 480 static int unix_release(struct socket *);
 481 static int unix_bind(struct socket *, struct sockaddr *, int);
 482 static int unix_stream_connect(struct socket *, struct sockaddr *,
 483                                int addr_len, int flags);
 484 static int unix_socketpair(struct socket *, struct socket *);
 485 static int unix_accept(struct socket *, struct socket *, int);
 486 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 487 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 488 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 489                                     poll_table *);
 490 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 491 static int unix_shutdown(struct socket *, int);
 492 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 493                                struct msghdr *, size_t);
 494 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 495                                struct msghdr *, size_t, int);
 496 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 497                               struct msghdr *, size_t);
 498 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 499                               struct msghdr *, size_t, int);
 500 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 501                               int, int);
 502 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 503                                   struct msghdr *, size_t);
 504
 505 static const struct proto_ops unix_stream_ops = {
 506         .family =       PF_UNIX,
 507         .owner =        THIS_MODULE,
 508         .release =      unix_release,
 509         .bind =         unix_bind,
 510         .connect =      unix_stream_connect,
 511         .socketpair =   unix_socketpair,
 512         .accept =       unix_accept,
 513         .getname =      unix_getname,
 514         .poll =         unix_poll,
 515         .ioctl =        unix_ioctl,
 516         .listen =       unix_listen,
 517         .shutdown =     unix_shutdown,
 518         .setsockopt =   sock_no_setsockopt,
 519         .getsockopt =   sock_no_getsockopt,
 520         .sendmsg =      unix_stream_sendmsg,
 521         .recvmsg =      unix_stream_recvmsg,
 522         .mmap =         sock_no_mmap,
 523         .sendpage =     sock_no_sendpage,
 524 };
 525
 526 static const struct proto_ops unix_dgram_ops = {
 527         .family =       PF_UNIX,
 528         .owner =        THIS_MODULE,
 529         .release =      unix_release,
 530         .bind =         unix_bind,
 531         .connect =      unix_dgram_connect,
 532         .socketpair =   unix_socketpair,
 533         .accept =       sock_no_accept,
 534         .getname =      unix_getname,
 535         .poll =         unix_dgram_poll,
 536         .ioctl =        unix_ioctl,
 537         .listen =       sock_no_listen,
 538         .shutdown =     unix_shutdown,
 539         .setsockopt =   sock_no_setsockopt,
 540         .getsockopt =   sock_no_getsockopt,
 541         .sendmsg =      unix_dgram_sendmsg,
 542         .recvmsg =      unix_dgram_recvmsg,
 543         .mmap =         sock_no_mmap,
 544         .sendpage =     sock_no_sendpage,
 545 };
 546
 547 static const struct proto_ops unix_seqpacket_ops = {
 548         .family =       PF_UNIX,
 549         .owner =        THIS_MODULE,
 550         .release =      unix_release,
 551         .bind =         unix_bind,
 552         .connect =      unix_stream_connect,
 553         .socketpair =   unix_socketpair,
 554         .accept =       unix_accept,
 555         .getname =      unix_getname,
 556         .poll =         unix_dgram_poll,
 557         .ioctl =        unix_ioctl,
 558         .listen =       unix_listen,
 559         .shutdown =     unix_shutdown,
 560         .setsockopt =   sock_no_setsockopt,
 561         .getsockopt =   sock_no_getsockopt,
 562         .sendmsg =      unix_seqpacket_sendmsg,
 563         .recvmsg =      unix_dgram_recvmsg,
 564         .mmap =         sock_no_mmap,
 565         .sendpage =     sock_no_sendpage,
 566 };
 567
 568 static struct proto unix_proto = {
 569         .name                   = "UNIX",
 570         .owner                  = THIS_MODULE,
 571         .sockets_allocated      = &unix_nr_socks,
 572         .obj_size               = sizeof(struct unix_sock),
 573 };
 574
 575 /*
 576  * AF_UNIX sockets do not interact with hardware, hence they
 577  * dont trigger interrupts - so it's safe for them to have
 578  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 579  * this special lock-class by reinitializing the spinlock key:
 580  */
 581 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 582
 583 static struct sock *unix_create1(struct net *net, struct socket *sock)
 584 {
 585         struct sock *sk = NULL;
 586         struct unix_sock *u;
 587
 588         atomic_inc(&unix_nr_socks);
 589         if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
 590                 goto out;
 591
 592         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 593         if (!sk)
 594                 goto out;
 595
 596         sock_init_data(sock, sk);
 597         lockdep_set_class(&sk->sk_receive_queue.lock,
 598                                 &af_unix_sk_receive_queue_lock_key);
 599
 600         sk->sk_write_space      = unix_write_space;
 601         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 602         sk->sk_destruct         = unix_sock_destructor;
 603         u         = unix_sk(sk);
 604         u->dentry = NULL;
 605         u->mnt    = NULL;
 606         spin_lock_init(&u->lock);
 607         atomic_long_set(&u->inflight, 0);
 608         INIT_LIST_HEAD(&u->link);
 609         mutex_init(&u->readlock); /* single task reading lock */
 610         init_waitqueue_head(&u->peer_wait);
 611         unix_insert_socket(unix_sockets_unbound, sk);
 612 out:
 613         if (sk == NULL)
 614                 atomic_dec(&unix_nr_socks);
 615         return sk;
 616 }
 617
 618 static int unix_create(struct net *net, struct socket *sock, int protocol)
 619 {
 620         if (protocol && protocol != PF_UNIX)
 621                 return -EPROTONOSUPPORT;
 622
 623         sock->state = SS_UNCONNECTED;
 624
 625         switch (sock->type) {
 626         case SOCK_STREAM:
 627                 sock->ops = &unix_stream_ops;
 628                 break;
 629                 /*
 630                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 631                  *      nothing uses it.
 632                  */
 633         case SOCK_RAW:
 634                 sock->type = SOCK_DGRAM;
 635         case SOCK_DGRAM:
 636                 sock->ops = &unix_dgram_ops;
 637                 break;
 638         case SOCK_SEQPACKET:
 639                 sock->ops = &unix_seqpacket_ops;
 640                 break;
 641         default:
 642                 return -ESOCKTNOSUPPORT;
 643         }
 644
 645         return unix_create1(net, sock) ? 0 : -ENOMEM;
 646 }
 647
 648 static int unix_release(struct socket *sock)
 649 {
 650         struct sock *sk = sock->sk;
 651
 652         if (!sk)
 653                 return 0;
 654
 655         sock->sk = NULL;
 656
 657         return unix_release_sock(sk, 0);
 658 }
 659
 660 static int unix_autobind(struct socket *sock)
 661 {
 662         struct sock *sk = sock->sk;
 663         struct net *net = sock_net(sk);
 664         struct unix_sock *u = unix_sk(sk);
 665         static u32 ordernum = 1;
 666         struct unix_address *addr;
 667         int err;
 668
 669         mutex_lock(&u->readlock);
 670
 671         err = 0;
 672         if (u->addr)
 673                 goto out;
 674
 675         err = -ENOMEM;
 676         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 677         if (!addr)
 678                 goto out;
 679
 680         addr->name->sun_family = AF_UNIX;
 681         atomic_set(&addr->refcnt, 1);
 682
 683 retry:
 684         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 685         addr->hash = unix_hash_fold(csum_partial((void *)addr->name, addr->len, 0));
 686
 687         spin_lock(&unix_table_lock);
 688         ordernum = (ordernum+1)&0xFFFFF;
 689
 690         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 691                                       addr->hash)) {
 692                 spin_unlock(&unix_table_lock);
 693                 /* Sanity yield. It is unusual case, but yet... */
 694                 if (!(ordernum&0xFF))
 695                         yield();
 696                 goto retry;
 697         }
 698         addr->hash ^= sk->sk_type;
 699
 700         __unix_remove_socket(sk);
 701         u->addr = addr;
 702         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 703         spin_unlock(&unix_table_lock);
 704         err = 0;
 705
 706 out:    mutex_unlock(&u->readlock);
 707         return err;
 708 }
 709
 710 static struct sock *unix_find_other(struct net *net,
 711                                     struct sockaddr_un *sunname, int len,
 712                                     int type, unsigned hash, int *error)
 713 {
 714         struct sock *u;
 715         struct path path;
 716         int err = 0;
 717
 718         if (sunname->sun_path[0]) {
 719                 struct inode *inode;
 720                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 721                 if (err)
 722                         goto fail;
 723                 inode = path.dentry->d_inode;
 724                 err = inode_permission(inode, MAY_WRITE);
 725                 if (err)
 726                         goto put_fail;
 727
 728                 err = -ECONNREFUSED;
 729                 if (!S_ISSOCK(inode->i_mode))
 730                         goto put_fail;
 731                 u = unix_find_socket_byinode(net, inode);
 732                 if (!u)
 733                         goto put_fail;
 734
 735                 if (u->sk_type == type)
 736                         touch_atime(path.mnt, path.dentry);
 737
 738                 path_put(&path);
 739
 740                 err = -EPROTOTYPE;
 741                 if (u->sk_type != type) {
 742                         sock_put(u);
 743                         goto fail;
 744                 }
 745         } else {
 746                 err = -ECONNREFUSED;
 747                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 748                 if (u) {
 749                         struct dentry *dentry;
 750                         dentry = unix_sk(u)->dentry;
 751                         if (dentry)
 752                                 touch_atime(unix_sk(u)->mnt, dentry);
 753                 } else
 754                         goto fail;
 755         }
 756         return u;
 757
 758 put_fail:
 759         path_put(&path);
 760 fail:
 761         *error = err;
 762         return NULL;
 763 }
 764
 765
 766 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 767 {
 768         struct sock *sk = sock->sk;
 769         struct net *net = sock_net(sk);
 770         struct unix_sock *u = unix_sk(sk);
 771         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 772         struct dentry *dentry = NULL;
 773         struct nameidata nd;
 774         int err;
 775         unsigned hash;
 776         struct unix_address *addr;
 777         struct hlist_head *list;
 778
 779         err = -EINVAL;
 780         if (sunaddr->sun_family != AF_UNIX)
 781                 goto out;
 782
 783         if (addr_len == sizeof(short)) {
 784                 err = unix_autobind(sock);
 785                 goto out;
 786         }
 787
 788         err = unix_mkname(sunaddr, addr_len, &hash);
 789         if (err < 0)
 790                 goto out;
 791         addr_len = err;
 792
 793         mutex_lock(&u->readlock);
 794
 795         err = -EINVAL;
 796         if (u->addr)
 797                 goto out_up;
 798
 799         err = -ENOMEM;
 800         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 801         if (!addr)
 802                 goto out_up;
 803
 804         memcpy(addr->name, sunaddr, addr_len);
 805         addr->len = addr_len;
 806         addr->hash = hash ^ sk->sk_type;
 807         atomic_set(&addr->refcnt, 1);
 808
 809         if (sunaddr->sun_path[0]) {
 810                 unsigned int mode;
 811                 err = 0;
 812                 /*
 813                  * Get the parent directory, calculate the hash for last
 814                  * component.
 815                  */
 816                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
 817                 if (err)
 818                         goto out_mknod_parent;
 819
 820                 dentry = lookup_create(&nd, 0);
 821                 err = PTR_ERR(dentry);
 822                 if (IS_ERR(dentry))
 823                         goto out_mknod_unlock;
 824
 825                 /*
 826                  * All right, let's create it.
 827                  */
 828                 mode = S_IFSOCK |
 829                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
 830                 err = mnt_want_write(nd.path.mnt);
 831                 if (err)
 832                         goto out_mknod_dput;
 833                 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
 834                 mnt_drop_write(nd.path.mnt);
 835                 if (err)
 836                         goto out_mknod_dput;
 837                 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 838                 dput(nd.path.dentry);
 839                 nd.path.dentry = dentry;
 840
 841                 addr->hash = UNIX_HASH_SIZE;
 842         }
 843
 844         spin_lock(&unix_table_lock);
 845
 846         if (!sunaddr->sun_path[0]) {
 847                 err = -EADDRINUSE;
 848                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
 849                                               sk->sk_type, hash)) {
 850                         unix_release_addr(addr);
 851                         goto out_unlock;
 852                 }
 853
 854                 list = &unix_socket_table[addr->hash];
 855         } else {
 856                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 857                 u->dentry = nd.path.dentry;
 858                 u->mnt    = nd.path.mnt;
 859         }
 860
 861         err = 0;
 862         __unix_remove_socket(sk);
 863         u->addr = addr;
 864         __unix_insert_socket(list, sk);
 865
 866 out_unlock:
 867         spin_unlock(&unix_table_lock);
 868 out_up:
 869         mutex_unlock(&u->readlock);
 870 out:
 871         return err;
 872
 873 out_mknod_dput:
 874         dput(dentry);
 875 out_mknod_unlock:
 876         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 877         path_put(&nd.path);
 878 out_mknod_parent:
 879         if (err == -EEXIST)
 880                 err = -EADDRINUSE;
 881         unix_release_addr(addr);
 882         goto out_up;
 883 }
 884
 885 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 886 {
 887         if (unlikely(sk1 == sk2) || !sk2) {
 888                 unix_state_lock(sk1);
 889                 return;
 890         }
 891         if (sk1 < sk2) {
 892                 unix_state_lock(sk1);
 893                 unix_state_lock_nested(sk2);
 894         } else {
 895                 unix_state_lock(sk2);
 896                 unix_state_lock_nested(sk1);
 897         }
 898 }
 899
 900 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 901 {
 902         if (unlikely(sk1 == sk2) || !sk2) {
 903                 unix_state_unlock(sk1);
 904                 return;
 905         }
 906         unix_state_unlock(sk1);
 907         unix_state_unlock(sk2);
 908 }
 909
 910 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 911                               int alen, int flags)
 912 {
 913         struct sock *sk = sock->sk;
 914         struct net *net = sock_net(sk);
 915         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 916         struct sock *other;
 917         unsigned hash;
 918         int err;
 919
 920         if (addr->sa_family != AF_UNSPEC) {
 921                 err = unix_mkname(sunaddr, alen, &hash);
 922                 if (err < 0)
 923                         goto out;
 924                 alen = err;
 925
 926                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 927                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 928                         goto out;
 929
 930 restart:
 931                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 932                 if (!other)
 933                         goto out;
 934
 935                 unix_state_double_lock(sk, other);
 936
 937                 /* Apparently VFS overslept socket death. Retry. */
 938                 if (sock_flag(other, SOCK_DEAD)) {
 939                         unix_state_double_unlock(sk, other);
 940                         sock_put(other);
 941                         goto restart;
 942                 }
 943
 944                 err = -EPERM;
 945                 if (!unix_may_send(sk, other))
 946                         goto out_unlock;
 947
 948                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 949                 if (err)
 950                         goto out_unlock;
 951
 952         } else {
 953                 /*
 954                  *      1003.1g breaking connected state with AF_UNSPEC
 955                  */
 956                 other = NULL;
 957                 unix_state_double_lock(sk, other);
 958         }
 959
 960         /*
 961          * If it was connected, reconnect.
 962          */
 963         if (unix_peer(sk)) {
 964                 struct sock *old_peer = unix_peer(sk);
 965                 unix_peer(sk) = other;
 966                 unix_state_double_unlock(sk, other);
 967
 968                 if (other != old_peer)
 969                         unix_dgram_disconnected(sk, old_peer);
 970                 sock_put(old_peer);
 971         } else {
 972                 unix_peer(sk) = other;
 973                 unix_state_double_unlock(sk, other);
 974         }
 975         return 0;
 976
 977 out_unlock:
 978         unix_state_double_unlock(sk, other);
 979         sock_put(other);
 980 out:
 981         return err;
 982 }
 983
 984 static long unix_wait_for_peer(struct sock *other, long timeo)
 985 {
 986         struct unix_sock *u = unix_sk(other);
 987         int sched;
 988         DEFINE_WAIT(wait);
 989
 990         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
 991
 992         sched = !sock_flag(other, SOCK_DEAD) &&
 993                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
 994                 unix_recvq_full(other);
 995
 996         unix_state_unlock(other);
 997
 998         if (sched)
 999                 timeo = schedule_timeout(timeo);
1000
1001         finish_wait(&u->peer_wait, &wait);
1002         return timeo;
1003 }
1004
1005 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1006                                int addr_len, int flags)
1007 {
1008         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1009         struct sock *sk = sock->sk;
1010         struct net *net = sock_net(sk);
1011         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1012         struct sock *newsk = NULL;
1013         struct sock *other = NULL;
1014         struct sk_buff *skb = NULL;
1015         unsigned hash;
1016         int st;
1017         int err;
1018         long timeo;
1019
1020         err = unix_mkname(sunaddr, addr_len, &hash);
1021         if (err < 0)
1022                 goto out;
1023         addr_len = err;
1024
1025         if (test_bit(SOCK_PASSCRED, &sock->flags)
1026                 && !u->addr && (err = unix_autobind(sock)) != 0)
1027                 goto out;
1028
1029         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1030
1031         /* First of all allocate resources.
1032            If we will make it after state is locked,
1033            we will have to recheck all again in any case.
1034          */
1035
1036         err = -ENOMEM;
1037
1038         /* create new sock for complete connection */
1039         newsk = unix_create1(sock_net(sk), NULL);
1040         if (newsk == NULL)
1041                 goto out;
1042
1043         /* Allocate skb for sending to listening sock */
1044         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1045         if (skb == NULL)
1046                 goto out;
1047
1048 restart:
1049         /*  Find listening sock. */
1050         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1051         if (!other)
1052                 goto out;
1053
1054         /* Latch state of peer */
1055         unix_state_lock(other);
1056
1057         /* Apparently VFS overslept socket death. Retry. */
1058         if (sock_flag(other, SOCK_DEAD)) {
1059                 unix_state_unlock(other);
1060                 sock_put(other);
1061                 goto restart;
1062         }
1063
1064         err = -ECONNREFUSED;
1065         if (other->sk_state != TCP_LISTEN)
1066                 goto out_unlock;
1067
1068         if (unix_recvq_full(other)) {
1069                 err = -EAGAIN;
1070                 if (!timeo)
1071                         goto out_unlock;
1072
1073                 timeo = unix_wait_for_peer(other, timeo);
1074
1075                 err = sock_intr_errno(timeo);
1076                 if (signal_pending(current))
1077                         goto out;
1078                 sock_put(other);
1079                 goto restart;
1080         }
1081
1082         /* Latch our state.
1083
1084            It is tricky place. We need to grab write lock and cannot
1085            drop lock on peer. It is dangerous because deadlock is
1086            possible. Connect to self case and simultaneous
1087            attempt to connect are eliminated by checking socket
1088            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1089            check this before attempt to grab lock.
1090
1091            Well, and we have to recheck the state after socket locked.
1092          */
1093         st = sk->sk_state;
1094
1095         switch (st) {
1096         case TCP_CLOSE:
1097                 /* This is ok... continue with connect */
1098                 break;
1099         case TCP_ESTABLISHED:
1100                 /* Socket is already connected */
1101                 err = -EISCONN;
1102                 goto out_unlock;
1103         default:
1104                 err = -EINVAL;
1105                 goto out_unlock;
1106         }
1107
1108         unix_state_lock_nested(sk);
1109
1110         if (sk->sk_state != st) {
1111                 unix_state_unlock(sk);
1112                 unix_state_unlock(other);
1113                 sock_put(other);
1114                 goto restart;
1115         }
1116
1117         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1118         if (err) {
1119                 unix_state_unlock(sk);
1120                 goto out_unlock;
1121         }
1122
1123         /* The way is open! Fastly set all the necessary fields... */
1124
1125         sock_hold(sk);
1126         unix_peer(newsk)        = sk;
1127         newsk->sk_state         = TCP_ESTABLISHED;
1128         newsk->sk_type          = sk->sk_type;
1129         newsk->sk_peercred.pid  = task_tgid_vnr(current);
1130         newsk->sk_peercred.uid  = current->euid;
1131         newsk->sk_peercred.gid  = current->egid;
1132         newu = unix_sk(newsk);
1133         newsk->sk_sleep         = &newu->peer_wait;
1134         otheru = unix_sk(other);
1135
1136         /* copy address information from listening to new sock*/
1137         if (otheru->addr) {
1138                 atomic_inc(&otheru->addr->refcnt);
1139                 newu->addr = otheru->addr;
1140         }
1141         if (otheru->dentry) {
1142                 newu->dentry    = dget(otheru->dentry);
1143                 newu->mnt       = mntget(otheru->mnt);
1144         }
1145
1146         /* Set credentials */
1147         sk->sk_peercred = other->sk_peercred;
1148
1149         sock->state     = SS_CONNECTED;
1150         sk->sk_state    = TCP_ESTABLISHED;
1151         sock_hold(newsk);
1152
1153         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1154         unix_peer(sk)   = newsk;
1155
1156         unix_state_unlock(sk);
1157
1158         /* take ten and and send info to listening sock */
1159         spin_lock(&other->sk_receive_queue.lock);
1160         __skb_queue_tail(&other->sk_receive_queue, skb);
1161         spin_unlock(&other->sk_receive_queue.lock);
1162         unix_state_unlock(other);
1163         other->sk_data_ready(other, 0);
1164         sock_put(other);
1165         return 0;
1166
1167 out_unlock:
1168         if (other)
1169                 unix_state_unlock(other);
1170
1171 out:
1172         if (skb)
1173                 kfree_skb(skb);
1174         if (newsk)
1175                 unix_release_sock(newsk, 0);
1176         if (other)
1177                 sock_put(other);
1178         return err;
1179 }
1180
1181 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1182 {
1183         struct sock *ska = socka->sk, *skb = sockb->sk;
1184
1185         /* Join our sockets back to back */
1186         sock_hold(ska);
1187         sock_hold(skb);
1188         unix_peer(ska) = skb;
1189         unix_peer(skb) = ska;
1190         ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1191         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1192         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1193
1194         if (ska->sk_type != SOCK_DGRAM) {
1195                 ska->sk_state = TCP_ESTABLISHED;
1196                 skb->sk_state = TCP_ESTABLISHED;
1197                 socka->state  = SS_CONNECTED;
1198                 sockb->state  = SS_CONNECTED;
1199         }
1200         return 0;
1201 }
1202
1203 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1204 {
1205         struct sock *sk = sock->sk;
1206         struct sock *tsk;
1207         struct sk_buff *skb;
1208         int err;
1209
1210         err = -EOPNOTSUPP;
1211         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1212                 goto out;
1213
1214         err = -EINVAL;
1215         if (sk->sk_state != TCP_LISTEN)
1216                 goto out;
1217
1218         /* If socket state is TCP_LISTEN it cannot change (for now...),
1219          * so that no locks are necessary.
1220          */
1221
1222         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1223         if (!skb) {
1224                 /* This means receive shutdown. */
1225                 if (err == 0)
1226                         err = -EINVAL;
1227                 goto out;
1228         }
1229
1230         tsk = skb->sk;
1231         skb_free_datagram(sk, skb);
1232         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1233
1234         /* attach accepted sock to socket */
1235         unix_state_lock(tsk);
1236         newsock->state = SS_CONNECTED;
1237         sock_graft(tsk, newsock);
1238         unix_state_unlock(tsk);
1239         return 0;
1240
1241 out:
1242         return err;
1243 }
1244
1245
1246 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1247 {
1248         struct sock *sk = sock->sk;
1249         struct unix_sock *u;
1250         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1251         int err = 0;
1252
1253         if (peer) {
1254                 sk = unix_peer_get(sk);
1255
1256                 err = -ENOTCONN;
1257                 if (!sk)
1258                         goto out;
1259                 err = 0;
1260         } else {
1261                 sock_hold(sk);
1262         }
1263
1264         u = unix_sk(sk);
1265         unix_state_lock(sk);
1266         if (!u->addr) {
1267                 sunaddr->sun_family = AF_UNIX;
1268                 sunaddr->sun_path[0] = 0;
1269                 *uaddr_len = sizeof(short);
1270         } else {
1271                 struct unix_address *addr = u->addr;
1272
1273                 *uaddr_len = addr->len;
1274                 memcpy(sunaddr, addr->name, *uaddr_len);
1275         }
1276         unix_state_unlock(sk);
1277         sock_put(sk);
1278 out:
1279         return err;
1280 }
1281
1282 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1283 {
1284         int i;
1285
1286         scm->fp = UNIXCB(skb).fp;
1287         skb->destructor = sock_wfree;
1288         UNIXCB(skb).fp = NULL;
1289
1290         for (i = scm->fp->count-1; i >= 0; i--)
1291                 unix_notinflight(scm->fp->fp[i]);
1292 }
1293
1294 static void unix_destruct_fds(struct sk_buff *skb)
1295 {
1296         struct scm_cookie scm;
1297         memset(&scm, 0, sizeof(scm));
1298         unix_detach_fds(&scm, skb);
1299
1300         /* Alas, it calls VFS */
1301         /* So fscking what? fput() had been SMP-safe since the last Summer */
1302         scm_destroy(&scm);
1303         sock_wfree(skb);
1304 }
1305
1306 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1307 {
1308         int i;
1309
1310         /*
1311          * Need to duplicate file references for the sake of garbage
1312          * collection.  Otherwise a socket in the fps might become a
1313          * candidate for GC while the skb is not yet queued.
1314          */
1315         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1316         if (!UNIXCB(skb).fp)
1317                 return -ENOMEM;
1318
1319         for (i = scm->fp->count-1; i >= 0; i--)
1320                 unix_inflight(scm->fp->fp[i]);
1321         skb->destructor = unix_destruct_fds;
1322         return 0;
1323 }
1324
1325 /*
1326  *      Send AF_UNIX data.
1327  */
1328
1329 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1330                               struct msghdr *msg, size_t len)
1331 {
1332         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1333         struct sock *sk = sock->sk;
1334         struct net *net = sock_net(sk);
1335         struct unix_sock *u = unix_sk(sk);
1336         struct sockaddr_un *sunaddr = msg->msg_name;
1337         struct sock *other = NULL;
1338         int namelen = 0; /* fake GCC */
1339         int err;
1340         unsigned hash;
1341         struct sk_buff *skb;
1342         long timeo;
1343         struct scm_cookie tmp_scm;
1344
1345         if (NULL == siocb->scm)
1346                 siocb->scm = &tmp_scm;
1347         err = scm_send(sock, msg, siocb->scm);
1348         if (err < 0)
1349                 return err;
1350
1351         err = -EOPNOTSUPP;
1352         if (msg->msg_flags&MSG_OOB)
1353                 goto out;
1354
1355         if (msg->msg_namelen) {
1356                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1357                 if (err < 0)
1358                         goto out;
1359                 namelen = err;
1360         } else {
1361                 sunaddr = NULL;
1362                 err = -ENOTCONN;
1363                 other = unix_peer_get(sk);
1364                 if (!other)
1365                         goto out;
1366         }
1367
1368         if (test_bit(SOCK_PASSCRED, &sock->flags)
1369                 && !u->addr && (err = unix_autobind(sock)) != 0)
1370                 goto out;
1371
1372         err = -EMSGSIZE;
1373         if (len > sk->sk_sndbuf - 32)
1374                 goto out;
1375
1376         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1377         if (skb == NULL)
1378                 goto out;
1379
1380         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1381         if (siocb->scm->fp) {
1382                 err = unix_attach_fds(siocb->scm, skb);
1383                 if (err)
1384                         goto out_free;
1385         }
1386         unix_get_secdata(siocb->scm, skb);
1387
1388         skb_reset_transport_header(skb);
1389         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1390         if (err)
1391                 goto out_free;
1392
1393         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1394
1395 restart:
1396         if (!other) {
1397                 err = -ECONNRESET;
1398                 if (sunaddr == NULL)
1399                         goto out_free;
1400
1401                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1402                                         hash, &err);
1403                 if (other == NULL)
1404                         goto out_free;
1405         }
1406
1407         unix_state_lock(other);
1408         err = -EPERM;
1409         if (!unix_may_send(sk, other))
1410                 goto out_unlock;
1411
1412         if (sock_flag(other, SOCK_DEAD)) {
1413                 /*
1414                  *      Check with 1003.1g - what should
1415                  *      datagram error
1416                  */
1417                 unix_state_unlock(other);
1418                 sock_put(other);
1419
1420                 err = 0;
1421                 unix_state_lock(sk);
1422                 if (unix_peer(sk) == other) {
1423                         unix_peer(sk) = NULL;
1424                         unix_state_unlock(sk);
1425
1426                         unix_dgram_disconnected(sk, other);
1427                         sock_put(other);
1428                         err = -ECONNREFUSED;
1429                 } else {
1430                         unix_state_unlock(sk);
1431                 }
1432
1433                 other = NULL;
1434                 if (err)
1435                         goto out_free;
1436                 goto restart;
1437         }
1438
1439         err = -EPIPE;
1440         if (other->sk_shutdown & RCV_SHUTDOWN)
1441                 goto out_unlock;
1442
1443         if (sk->sk_type != SOCK_SEQPACKET) {
1444                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1445                 if (err)
1446                         goto out_unlock;
1447         }
1448
1449         if (unix_peer(other) != sk && unix_recvq_full(other)) {
1450                 if (!timeo) {
1451                         err = -EAGAIN;
1452                         goto out_unlock;
1453                 }
1454
1455                 timeo = unix_wait_for_peer(other, timeo);
1456
1457                 err = sock_intr_errno(timeo);
1458                 if (signal_pending(current))
1459                         goto out_free;
1460
1461                 goto restart;
1462         }
1463
1464         skb_queue_tail(&other->sk_receive_queue, skb);
1465         unix_state_unlock(other);
1466         other->sk_data_ready(other, len);
1467         sock_put(other);
1468         scm_destroy(siocb->scm);
1469         return len;
1470
1471 out_unlock:
1472         unix_state_unlock(other);
1473 out_free:
1474         kfree_skb(skb);
1475 out:
1476         if (other)
1477                 sock_put(other);
1478         scm_destroy(siocb->scm);
1479         return err;
1480 }
1481
1482
1483 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1484                                struct msghdr *msg, size_t len)
1485 {
1486         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1487         struct sock *sk = sock->sk;
1488         struct sock *other = NULL;
1489         struct sockaddr_un *sunaddr = msg->msg_name;
1490         int err, size;
1491         struct sk_buff *skb;
1492         int sent = 0;
1493         struct scm_cookie tmp_scm;
1494
1495         if (NULL == siocb->scm)
1496                 siocb->scm = &tmp_scm;
1497         err = scm_send(sock, msg, siocb->scm);
1498         if (err < 0)
1499                 return err;
1500
1501         err = -EOPNOTSUPP;
1502         if (msg->msg_flags&MSG_OOB)
1503                 goto out_err;
1504
1505         if (msg->msg_namelen) {
1506                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1507                 goto out_err;
1508         } else {
1509                 sunaddr = NULL;
1510                 err = -ENOTCONN;
1511                 other = unix_peer(sk);
1512                 if (!other)
1513                         goto out_err;
1514         }
1515
1516         if (sk->sk_shutdown & SEND_SHUTDOWN)
1517                 goto pipe_err;
1518
1519         while (sent < len) {
1520                 /*
1521                  *      Optimisation for the fact that under 0.01% of X
1522                  *      messages typically need breaking up.
1523                  */
1524
1525                 size = len-sent;
1526
1527                 /* Keep two messages in the pipe so it schedules better */
1528                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1529                         size = (sk->sk_sndbuf >> 1) - 64;
1530
1531                 if (size > SKB_MAX_ALLOC)
1532                         size = SKB_MAX_ALLOC;
1533
1534                 /*
1535                  *      Grab a buffer
1536                  */
1537
1538                 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1539                                           &err);
1540
1541                 if (skb == NULL)
1542                         goto out_err;
1543
1544                 /*
1545                  *      If you pass two values to the sock_alloc_send_skb
1546                  *      it tries to grab the large buffer with GFP_NOFS
1547                  *      (which can fail easily), and if it fails grab the
1548                  *      fallback size buffer which is under a page and will
1549                  *      succeed. [Alan]
1550                  */
1551                 size = min_t(int, size, skb_tailroom(skb));
1552
1553                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1554                 if (siocb->scm->fp) {
1555                         err = unix_attach_fds(siocb->scm, skb);
1556                         if (err) {
1557                                 kfree_skb(skb);
1558                                 goto out_err;
1559                         }
1560                 }
1561
1562                 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1563                 if (err) {
1564                         kfree_skb(skb);
1565                         goto out_err;
1566                 }
1567
1568                 unix_state_lock(other);
1569
1570                 if (sock_flag(other, SOCK_DEAD) ||
1571                     (other->sk_shutdown & RCV_SHUTDOWN))
1572                         goto pipe_err_free;
1573
1574                 skb_queue_tail(&other->sk_receive_queue, skb);
1575                 unix_state_unlock(other);
1576                 other->sk_data_ready(other, size);
1577                 sent += size;
1578         }
1579
1580         scm_destroy(siocb->scm);
1581         siocb->scm = NULL;
1582
1583         return sent;
1584
1585 pipe_err_free:
1586         unix_state_unlock(other);
1587         kfree_skb(skb);
1588 pipe_err:
1589         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1590                 send_sig(SIGPIPE, current, 0);
1591         err = -EPIPE;
1592 out_err:
1593         scm_destroy(siocb->scm);
1594         siocb->scm = NULL;
1595         return sent ? : err;
1596 }
1597
1598 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1599                                   struct msghdr *msg, size_t len)
1600 {
1601         int err;
1602         struct sock *sk = sock->sk;
1603
1604         err = sock_error(sk);
1605         if (err)
1606                 return err;
1607
1608         if (sk->sk_state != TCP_ESTABLISHED)
1609                 return -ENOTCONN;
1610
1611         if (msg->msg_namelen)
1612                 msg->msg_namelen = 0;
1613
1614         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1615 }
1616
1617 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1618 {
1619         struct unix_sock *u = unix_sk(sk);
1620
1621         msg->msg_namelen = 0;
1622         if (u->addr) {
1623                 msg->msg_namelen = u->addr->len;
1624                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1625         }
1626 }
1627
1628 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1629                               struct msghdr *msg, size_t size,
1630                               int flags)
1631 {
1632         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1633         struct scm_cookie tmp_scm;
1634         struct sock *sk = sock->sk;
1635         struct unix_sock *u = unix_sk(sk);
1636         int noblock = flags & MSG_DONTWAIT;
1637         struct sk_buff *skb;
1638         int err;
1639
1640         err = -EOPNOTSUPP;
1641         if (flags&MSG_OOB)
1642                 goto out;
1643
1644         msg->msg_namelen = 0;
1645
1646         mutex_lock(&u->readlock);
1647
1648         skb = skb_recv_datagram(sk, flags, noblock, &err);
1649         if (!skb) {
1650                 unix_state_lock(sk);
1651                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1652                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1653                     (sk->sk_shutdown & RCV_SHUTDOWN))
1654                         err = 0;
1655                 unix_state_unlock(sk);
1656                 goto out_unlock;
1657         }
1658
1659         wake_up_interruptible_sync(&u->peer_wait);
1660
1661         if (msg->msg_name)
1662                 unix_copy_addr(msg, skb->sk);
1663
1664         if (size > skb->len)
1665                 size = skb->len;
1666         else if (size < skb->len)
1667                 msg->msg_flags |= MSG_TRUNC;
1668
1669         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1670         if (err)
1671                 goto out_free;
1672
1673         if (!siocb->scm) {
1674                 siocb->scm = &tmp_scm;
1675                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1676         }
1677         siocb->scm->creds = *UNIXCREDS(skb);
1678         unix_set_secdata(siocb->scm, skb);
1679
1680         if (!(flags & MSG_PEEK)) {
1681                 if (UNIXCB(skb).fp)
1682                         unix_detach_fds(siocb->scm, skb);
1683         } else {
1684                 /* It is questionable: on PEEK we could:
1685                    - do not return fds - good, but too simple 8)
1686                    - return fds, and do not return them on read (old strategy,
1687                      apparently wrong)
1688                    - clone fds (I chose it for now, it is the most universal
1689                      solution)
1690
1691                    POSIX 1003.1g does not actually define this clearly
1692                    at all. POSIX 1003.1g doesn't define a lot of things
1693                    clearly however!
1694
1695                 */
1696                 if (UNIXCB(skb).fp)
1697                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1698         }
1699         err = size;
1700
1701         scm_recv(sock, msg, siocb->scm, flags);
1702
1703 out_free:
1704         skb_free_datagram(sk, skb);
1705 out_unlock:
1706         mutex_unlock(&u->readlock);
1707 out:
1708         return err;
1709 }
1710
1711 /*
1712  *      Sleep until data has arrive. But check for races..
1713  */
1714
1715 static long unix_stream_data_wait(struct sock *sk, long timeo)
1716 {
1717         DEFINE_WAIT(wait);
1718
1719         unix_state_lock(sk);
1720
1721         for (;;) {
1722                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1723
1724                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1725                     sk->sk_err ||
1726                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1727                     signal_pending(current) ||
1728                     !timeo)
1729                         break;
1730
1731                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1732                 unix_state_unlock(sk);
1733                 timeo = schedule_timeout(timeo);
1734                 unix_state_lock(sk);
1735                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1736         }
1737
1738         finish_wait(sk->sk_sleep, &wait);
1739         unix_state_unlock(sk);
1740         return timeo;
1741 }
1742
1743
1744
1745 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1746                                struct msghdr *msg, size_t size,
1747                                int flags)
1748 {
1749         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1750         struct scm_cookie tmp_scm;
1751         struct sock *sk = sock->sk;
1752         struct unix_sock *u = unix_sk(sk);
1753         struct sockaddr_un *sunaddr = msg->msg_name;
1754         int copied = 0;
1755         int check_creds = 0;
1756         int target;
1757         int err = 0;
1758         long timeo;
1759
1760         err = -EINVAL;
1761         if (sk->sk_state != TCP_ESTABLISHED)
1762                 goto out;
1763
1764         err = -EOPNOTSUPP;
1765         if (flags&MSG_OOB)
1766                 goto out;
1767
1768         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1769         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1770
1771         msg->msg_namelen = 0;
1772
1773         /* Lock the socket to prevent queue disordering
1774          * while sleeps in memcpy_tomsg
1775          */
1776
1777         if (!siocb->scm) {
1778                 siocb->scm = &tmp_scm;
1779                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1780         }
1781
1782         mutex_lock(&u->readlock);
1783
1784         do {
1785                 int chunk;
1786                 struct sk_buff *skb;
1787
1788                 unix_state_lock(sk);
1789                 skb = skb_dequeue(&sk->sk_receive_queue);
1790                 if (skb == NULL) {
1791                         if (copied >= target)
1792                                 goto unlock;
1793
1794                         /*
1795                          *      POSIX 1003.1g mandates this order.
1796                          */
1797
1798                         err = sock_error(sk);
1799                         if (err)
1800                                 goto unlock;
1801                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1802                                 goto unlock;
1803
1804                         unix_state_unlock(sk);
1805                         err = -EAGAIN;
1806                         if (!timeo)
1807                                 break;
1808                         mutex_unlock(&u->readlock);
1809
1810                         timeo = unix_stream_data_wait(sk, timeo);
1811
1812                         if (signal_pending(current)) {
1813                                 err = sock_intr_errno(timeo);
1814                                 goto out;
1815                         }
1816                         mutex_lock(&u->readlock);
1817                         continue;
1818  unlock:
1819                         unix_state_unlock(sk);
1820                         break;
1821                 }
1822                 unix_state_unlock(sk);
1823
1824                 if (check_creds) {
1825                         /* Never glue messages from different writers */
1826                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1827                                    sizeof(siocb->scm->creds)) != 0) {
1828                                 skb_queue_head(&sk->sk_receive_queue, skb);
1829                                 break;
1830                         }
1831                 } else {
1832                         /* Copy credentials */
1833                         siocb->scm->creds = *UNIXCREDS(skb);
1834                         check_creds = 1;
1835                 }
1836
1837                 /* Copy address just once */
1838                 if (sunaddr) {
1839                         unix_copy_addr(msg, skb->sk);
1840                         sunaddr = NULL;
1841                 }
1842
1843                 chunk = min_t(unsigned int, skb->len, size);
1844                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1845                         skb_queue_head(&sk->sk_receive_queue, skb);
1846                         if (copied == 0)
1847                                 copied = -EFAULT;
1848                         break;
1849                 }
1850                 copied += chunk;
1851                 size -= chunk;
1852
1853                 /* Mark read part of skb as used */
1854                 if (!(flags & MSG_PEEK)) {
1855                         skb_pull(skb, chunk);
1856
1857                         if (UNIXCB(skb).fp)
1858                                 unix_detach_fds(siocb->scm, skb);
1859
1860                         /* put the skb back if we didn't use it up.. */
1861                         if (skb->len) {
1862                                 skb_queue_head(&sk->sk_receive_queue, skb);
1863                                 break;
1864                         }
1865
1866                         kfree_skb(skb);
1867
1868                         if (siocb->scm->fp)
1869                                 break;
1870                 } else {
1871                         /* It is questionable, see note in unix_dgram_recvmsg.
1872                          */
1873                         if (UNIXCB(skb).fp)
1874                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1875
1876                         /* put message back and return */
1877                         skb_queue_head(&sk->sk_receive_queue, skb);
1878                         break;
1879                 }
1880         } while (size);
1881
1882         mutex_unlock(&u->readlock);
1883         scm_recv(sock, msg, siocb->scm, flags);
1884 out:
1885         return copied ? : err;
1886 }
1887
1888 static int unix_shutdown(struct socket *sock, int mode)
1889 {
1890         struct sock *sk = sock->sk;
1891         struct sock *other;
1892
1893         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1894
1895         if (mode) {
1896                 unix_state_lock(sk);
1897                 sk->sk_shutdown |= mode;
1898                 other = unix_peer(sk);
1899                 if (other)
1900                         sock_hold(other);
1901                 unix_state_unlock(sk);
1902                 sk->sk_state_change(sk);
1903
1904                 if (other &&
1905                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1906
1907                         int peer_mode = 0;
1908
1909                         if (mode&RCV_SHUTDOWN)
1910                                 peer_mode |= SEND_SHUTDOWN;
1911                         if (mode&SEND_SHUTDOWN)
1912                                 peer_mode |= RCV_SHUTDOWN;
1913                         unix_state_lock(other);
1914                         other->sk_shutdown |= peer_mode;
1915                         unix_state_unlock(other);
1916                         other->sk_state_change(other);
1917                         read_lock(&other->sk_callback_lock);
1918                         if (peer_mode == SHUTDOWN_MASK)
1919                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1920                         else if (peer_mode & RCV_SHUTDOWN)
1921                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1922                         read_unlock(&other->sk_callback_lock);
1923                 }
1924                 if (other)
1925                         sock_put(other);
1926         }
1927         return 0;
1928 }
1929
1930 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1931 {
1932         struct sock *sk = sock->sk;
1933         long amount = 0;
1934         int err;
1935
1936         switch (cmd) {
1937         case SIOCOUTQ:
1938                 amount = atomic_read(&sk->sk_wmem_alloc);
1939                 err = put_user(amount, (int __user *)arg);
1940                 break;
1941         case SIOCINQ:
1942                 {
1943                         struct sk_buff *skb;
1944
1945                         if (sk->sk_state == TCP_LISTEN) {
1946                                 err = -EINVAL;
1947                                 break;
1948                         }
1949
1950                         spin_lock(&sk->sk_receive_queue.lock);
1951                         if (sk->sk_type == SOCK_STREAM ||
1952                             sk->sk_type == SOCK_SEQPACKET) {
1953                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1954                                         amount += skb->len;
1955                         } else {
1956                                 skb = skb_peek(&sk->sk_receive_queue);
1957                                 if (skb)
1958                                         amount = skb->len;
1959                         }
1960                         spin_unlock(&sk->sk_receive_queue.lock);
1961                         err = put_user(amount, (int __user *)arg);
1962                         break;
1963                 }
1964
1965         default:
1966                 err = -ENOIOCTLCMD;
1967                 break;
1968         }
1969         return err;
1970 }
1971
1972 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1973 {
1974         struct sock *sk = sock->sk;
1975         unsigned int mask;
1976
1977         poll_wait(file, sk->sk_sleep, wait);
1978         mask = 0;
1979
1980         /* exceptional events? */
1981         if (sk->sk_err)
1982                 mask |= POLLERR;
1983         if (sk->sk_shutdown == SHUTDOWN_MASK)
1984                 mask |= POLLHUP;
1985         if (sk->sk_shutdown & RCV_SHUTDOWN)
1986                 mask |= POLLRDHUP;
1987
1988         /* readable? */
1989         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1990             (sk->sk_shutdown & RCV_SHUTDOWN))
1991                 mask |= POLLIN | POLLRDNORM;
1992
1993         /* Connection-based need to check for termination and startup */
1994         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
1995             sk->sk_state == TCP_CLOSE)
1996                 mask |= POLLHUP;
1997
1998         /*
1999          * we set writable also when the other side has shut down the
2000          * connection. This prevents stuck sockets.
2001          */
2002         if (unix_writable(sk))
2003                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2004
2005         return mask;
2006 }
2007
2008 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2009                                     poll_table *wait)
2010 {
2011         struct sock *sk = sock->sk, *other;
2012         unsigned int mask, writable;
2013
2014         poll_wait(file, sk->sk_sleep, wait);
2015         mask = 0;
2016
2017         /* exceptional events? */
2018         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2019                 mask |= POLLERR;
2020         if (sk->sk_shutdown & RCV_SHUTDOWN)
2021                 mask |= POLLRDHUP;
2022         if (sk->sk_shutdown == SHUTDOWN_MASK)
2023                 mask |= POLLHUP;
2024
2025         /* readable? */
2026         if (!skb_queue_empty(&sk->sk_receive_queue) ||
2027             (sk->sk_shutdown & RCV_SHUTDOWN))
2028                 mask |= POLLIN | POLLRDNORM;
2029
2030         /* Connection-based need to check for termination and startup */
2031         if (sk->sk_type == SOCK_SEQPACKET) {
2032                 if (sk->sk_state == TCP_CLOSE)
2033                         mask |= POLLHUP;
2034                 /* connection hasn't started yet? */
2035                 if (sk->sk_state == TCP_SYN_SENT)
2036                         return mask;
2037         }
2038
2039         /* writable? */
2040         writable = unix_writable(sk);
2041         if (writable) {
2042                 other = unix_peer_get(sk);
2043                 if (other) {
2044                         if (unix_peer(other) != sk) {
2045                                 poll_wait(file, &unix_sk(other)->peer_wait,
2046                                           wait);
2047                                 if (unix_recvq_full(other))
2048                                         writable = 0;
2049                         }
2050
2051                         sock_put(other);
2052                 }
2053         }
2054
2055         if (writable)
2056                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2057         else
2058                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2059
2060         return mask;
2061 }
2062
2063 #ifdef CONFIG_PROC_FS
2064 static struct sock *first_unix_socket(int *i)
2065 {
2066         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2067                 if (!hlist_empty(&unix_socket_table[*i]))
2068                         return __sk_head(&unix_socket_table[*i]);
2069         }
2070         return NULL;
2071 }
2072
2073 static struct sock *next_unix_socket(int *i, struct sock *s)
2074 {
2075         struct sock *next = sk_next(s);
2076         /* More in this chain? */
2077         if (next)
2078                 return next;
2079         /* Look for next non-empty chain. */
2080         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2081                 if (!hlist_empty(&unix_socket_table[*i]))
2082                         return __sk_head(&unix_socket_table[*i]);
2083         }
2084         return NULL;
2085 }
2086
2087 struct unix_iter_state {
2088         struct seq_net_private p;
2089         int i;
2090 };
2091
2092 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2093 {
2094         struct unix_iter_state *iter = seq->private;
2095         loff_t off = 0;
2096         struct sock *s;
2097
2098         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2099                 if (sock_net(s) != seq_file_net(seq))
2100                         continue;
2101                 if (off == pos)
2102                         return s;
2103                 ++off;
2104         }
2105         return NULL;
2106 }
2107
2108 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2109         __acquires(unix_table_lock)
2110 {
2111         spin_lock(&unix_table_lock);
2112         return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2113 }
2114
2115 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2116 {
2117         struct unix_iter_state *iter = seq->private;
2118         struct sock *sk = v;
2119         ++*pos;
2120
2121         if (v == SEQ_START_TOKEN)
2122                 sk = first_unix_socket(&iter->i);
2123         else
2124                 sk = next_unix_socket(&iter->i, sk);
2125         while (sk && (sock_net(sk) != seq_file_net(seq)))
2126                 sk = next_unix_socket(&iter->i, sk);
2127         return sk;
2128 }
2129
2130 static void unix_seq_stop(struct seq_file *seq, void *v)
2131         __releases(unix_table_lock)
2132 {
2133         spin_unlock(&unix_table_lock);
2134 }
2135
2136 static int unix_seq_show(struct seq_file *seq, void *v)
2137 {
2138
2139         if (v == SEQ_START_TOKEN)
2140                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2141                          "Inode Path\n");
2142         else {
2143                 struct sock *s = v;
2144                 struct unix_sock *u = unix_sk(s);
2145                 unix_state_lock(s);
2146
2147                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2148                         s,
2149                         atomic_read(&s->sk_refcnt),
2150                         0,
2151                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2152                         s->sk_type,
2153                         s->sk_socket ?
2154                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2155                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2156                         sock_i_ino(s));
2157
2158                 if (u->addr) {
2159                         int i, len;
2160                         seq_putc(seq, ' ');
2161
2162                         i = 0;
2163                         len = u->addr->len - sizeof(short);
2164                         if (!UNIX_ABSTRACT(s))
2165                                 len--;
2166                         else {
2167                                 seq_putc(seq, '@');
2168                                 i++;
2169                         }
2170                         for ( ; i < len; i++)
2171                                 seq_putc(seq, u->addr->name->sun_path[i]);
2172                 }
2173                 unix_state_unlock(s);
2174                 seq_putc(seq, '\n');
2175         }
2176
2177         return 0;
2178 }
2179
2180 static const struct seq_operations unix_seq_ops = {
2181         .start  = unix_seq_start,
2182         .next   = unix_seq_next,
2183         .stop   = unix_seq_stop,
2184         .show   = unix_seq_show,
2185 };
2186
2187 static int unix_seq_open(struct inode *inode, struct file *file)
2188 {
2189         return seq_open_net(inode, file, &unix_seq_ops,
2190                             sizeof(struct unix_iter_state));
2191 }
2192
2193 static const struct file_operations unix_seq_fops = {
2194         .owner          = THIS_MODULE,
2195         .open           = unix_seq_open,
2196         .read           = seq_read,
2197         .llseek         = seq_lseek,
2198         .release        = seq_release_net,
2199 };
2200
2201 #endif
2202
2203 static struct net_proto_family unix_family_ops = {
2204         .family = PF_UNIX,
2205         .create = unix_create,
2206         .owner  = THIS_MODULE,
2207 };
2208
2209
2210 static int unix_net_init(struct net *net)
2211 {
2212         int error = -ENOMEM;
2213
2214         net->unx.sysctl_max_dgram_qlen = 10;
2215         if (unix_sysctl_register(net))
2216                 goto out;
2217
2218 #ifdef CONFIG_PROC_FS
2219         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2220                 unix_sysctl_unregister(net);
2221                 goto out;
2222         }
2223 #endif
2224         error = 0;
2225 out:
2226         return error;
2227 }
2228
2229 static void unix_net_exit(struct net *net)
2230 {
2231         unix_sysctl_unregister(net);
2232         proc_net_remove(net, "unix");
2233 }
2234
2235 static struct pernet_operations unix_net_ops = {
2236         .init = unix_net_init,
2237         .exit = unix_net_exit,
2238 };
2239
2240 static int __init af_unix_init(void)
2241 {
2242         int rc = -1;
2243         struct sk_buff *dummy_skb;
2244
2245         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2246
2247         rc = proto_register(&unix_proto, 1);
2248         if (rc != 0) {
2249                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2250                        __func__);
2251                 goto out;
2252         }
2253
2254         sock_register(&unix_family_ops);
2255         register_pernet_subsys(&unix_net_ops);
2256 out:
2257         return rc;
2258 }
2259
2260 static void __exit af_unix_exit(void)
2261 {
2262         sock_unregister(PF_UNIX);
2263         proto_unregister(&unix_proto);
2264         unregister_pernet_subsys(&unix_net_ops);
2265 }
2266
2267 /* Earlier than device_initcall() so that other drivers invoking
2268    request_module() don't end up in a loop when modprobe tries
2269    to use a UNIX socket. But later than subsys_initcall() because
2270    we depend on stuff initialised there */
2271 fs_initcall(af_unix_init);
2272 module_exit(af_unix_exit);
2273
2274 MODULE_LICENSE("GPL");
2275 MODULE_ALIAS_NETPROTO(PF_UNIX);