2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
 
   3  *              operating system.  INET is implemented using the  BSD Socket
 
   4  *              interface as the means of communication with the user level.
 
   6  *              Generic socket support routines. Memory allocators, socket lock/release
 
   7  *              handler for protocols to use and generic option handler.
 
  10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
 
  13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
 
  15  *              Alan Cox, <A.Cox@swansea.ac.uk>
 
  18  *              Alan Cox        :       Numerous verify_area() problems
 
  19  *              Alan Cox        :       Connecting on a connecting socket
 
  20  *                                      now returns an error for tcp.
 
  21  *              Alan Cox        :       sock->protocol is set correctly.
 
  22  *                                      and is not sometimes left as 0.
 
  23  *              Alan Cox        :       connect handles icmp errors on a
 
  24  *                                      connect properly. Unfortunately there
 
  25  *                                      is a restart syscall nasty there. I
 
  26  *                                      can't match BSD without hacking the C
 
  27  *                                      library. Ideas urgently sought!
 
  28  *              Alan Cox        :       Disallow bind() to addresses that are
 
  29  *                                      not ours - especially broadcast ones!!
 
  30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
 
  31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
 
  32  *                                      instead they leave that for the DESTROY timer.
 
  33  *              Alan Cox        :       Clean up error flag in accept
 
  34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
 
  35  *                                      was buggy. Put a remove_sock() in the handler
 
  36  *                                      for memory when we hit 0. Also altered the timer
 
  37  *                                      code. The ACK stuff can wait and needs major 
 
  39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
 
  40  *                                      and fixed timer/inet_bh race.
 
  41  *              Alan Cox        :       Added zapped flag for TCP
 
  42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
 
  43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
 
  44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
 
  45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
 
  46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
 
  47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
 
  48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
 
  49  *      Pauline Middelink       :       identd support
 
  50  *              Alan Cox        :       Fixed connect() taking signals I think.
 
  51  *              Alan Cox        :       SO_LINGER supported
 
  52  *              Alan Cox        :       Error reporting fixes
 
  53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
 
  54  *              Alan Cox        :       inet sockets don't set sk->type!
 
  55  *              Alan Cox        :       Split socket option code
 
  56  *              Alan Cox        :       Callbacks
 
  57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
 
  58  *              Alex            :       Removed restriction on inet fioctl
 
  59  *              Alan Cox        :       Splitting INET from NET core
 
  60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
 
  61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
 
  62  *              Alan Cox        :       Split IP from generic code
 
  63  *              Alan Cox        :       New kfree_skbmem()
 
  64  *              Alan Cox        :       Make SO_DEBUG superuser only.
 
  65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
 
  67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
 
  68  *              Alan Cox        :       Allocator for a socket is settable.
 
  69  *              Alan Cox        :       SO_ERROR includes soft errors.
 
  70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
 
  71  *              Alan Cox        :       Generic socket allocation to make hooks
 
  72  *                                      easier (suggested by Craig Metz).
 
  73  *              Michael Pall    :       SO_ERROR returns positive errno again
 
  74  *              Steve Whitehouse:       Added default destructor to free
 
  75  *                                      protocol private data.
 
  76  *              Steve Whitehouse:       Added various other default routines
 
  77  *                                      common to several socket families.
 
  78  *              Chris Evans     :       Call suser() check last on F_SETOWN
 
  79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
 
  80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
 
  81  *              Andi Kleen      :       Fix write_space callback
 
  82  *              Chris Evans     :       Security fixes - signedness again
 
  83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
 
  88  *              This program is free software; you can redistribute it and/or
 
  89  *              modify it under the terms of the GNU General Public License
 
  90  *              as published by the Free Software Foundation; either version
 
  91  *              2 of the License, or (at your option) any later version.
 
  94 #include <linux/capability.h>
 
  95 #include <linux/errno.h>
 
  96 #include <linux/types.h>
 
  97 #include <linux/socket.h>
 
  99 #include <linux/kernel.h>
 
 100 #include <linux/module.h>
 
 101 #include <linux/proc_fs.h>
 
 102 #include <linux/seq_file.h>
 
 103 #include <linux/sched.h>
 
 104 #include <linux/timer.h>
 
 105 #include <linux/string.h>
 
 106 #include <linux/sockios.h>
 
 107 #include <linux/net.h>
 
 108 #include <linux/mm.h>
 
 109 #include <linux/slab.h>
 
 110 #include <linux/interrupt.h>
 
 111 #include <linux/poll.h>
 
 112 #include <linux/tcp.h>
 
 113 #include <linux/init.h>
 
 115 #include <asm/uaccess.h>
 
 116 #include <asm/system.h>
 
 118 #include <linux/netdevice.h>
 
 119 #include <net/protocol.h>
 
 120 #include <linux/skbuff.h>
 
 121 #include <net/request_sock.h>
 
 122 #include <net/sock.h>
 
 123 #include <net/xfrm.h>
 
 124 #include <linux/ipsec.h>
 
 126 #include <linux/filter.h>
 
 133  * Each address family might have different locking rules, so we have
 
 134  * one slock key per address family:
 
 136 static struct lock_class_key af_family_keys[AF_MAX];
 
 137 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
 139 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 141  * Make lock validator output more readable. (we pre-construct these
 
 142  * strings build-time, so that runtime initialization of socket
 
 145 static const char *af_family_key_strings[AF_MAX+1] = {
 
 146   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
 
 147   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
 
 148   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
 
 149   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
 
 150   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
 
 151   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
 
 152   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
 
 153   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
 
 154   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
 
 155   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
 
 156   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
 
 158 static const char *af_family_slock_key_strings[AF_MAX+1] = {
 
 159   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
 
 160   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
 
 161   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
 
 162   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
 
 163   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
 
 164   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
 
 165   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
 
 166   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
 
 167   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
 
 168   "slock-27"       , "slock-28"          , "slock-29"          ,
 
 169   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_MAX"
 
 174  * sk_callback_lock locking rules are per-address-family,
 
 175  * so split the lock classes by using a per-AF key:
 
 177 static struct lock_class_key af_callback_keys[AF_MAX];
 
 179 /* Take into consideration the size of the struct sk_buff overhead in the
 
 180  * determination of these values, since that is non-constant across
 
 181  * platforms.  This makes socket queueing behavior and performance
 
 182  * not depend upon such differences.
 
 184 #define _SK_MEM_PACKETS         256
 
 185 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
 
 186 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
 187 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
 189 /* Run time adjustable parameters. */
 
 190 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
 
 191 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
 
 192 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 
 193 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
 195 /* Maximal space eaten by iovec or ancilliary data plus some space */
 
 196 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 
 198 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 
 202         if (optlen < sizeof(tv))
 
 204         if (copy_from_user(&tv, optval, sizeof(tv)))
 
 207         *timeo_p = MAX_SCHEDULE_TIMEOUT;
 
 208         if (tv.tv_sec == 0 && tv.tv_usec == 0)
 
 210         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
 
 211                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
 
 215 static void sock_warn_obsolete_bsdism(const char *name)
 
 218         static char warncomm[TASK_COMM_LEN];
 
 219         if (strcmp(warncomm, current->comm) && warned < 5) { 
 
 220                 strcpy(warncomm,  current->comm); 
 
 221                 printk(KERN_WARNING "process `%s' is using obsolete "
 
 222                        "%s SO_BSDCOMPAT\n", warncomm, name);
 
 227 static void sock_disable_timestamp(struct sock *sk)
 
 229         if (sock_flag(sk, SOCK_TIMESTAMP)) { 
 
 230                 sock_reset_flag(sk, SOCK_TIMESTAMP);
 
 231                 net_disable_timestamp();
 
 236 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 241         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
 
 242            number of warnings when compiling with -W --ANK
 
 244         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 
 245             (unsigned)sk->sk_rcvbuf) {
 
 250         err = sk_filter(sk, skb);
 
 255         skb_set_owner_r(skb, sk);
 
 257         /* Cache the SKB length before we tack it onto the receive
 
 258          * queue.  Once it is added it no longer belongs to us and
 
 259          * may be freed by other threads of control pulling packets
 
 264         skb_queue_tail(&sk->sk_receive_queue, skb);
 
 266         if (!sock_flag(sk, SOCK_DEAD))
 
 267                 sk->sk_data_ready(sk, skb_len);
 
 271 EXPORT_SYMBOL(sock_queue_rcv_skb);
 
 273 int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
 
 275         int rc = NET_RX_SUCCESS;
 
 277         if (sk_filter(sk, skb))
 
 278                 goto discard_and_relse;
 
 283         if (!sock_owned_by_user(sk)) {
 
 285                  * trylock + unlock semantics:
 
 287                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
 
 289                 rc = sk->sk_backlog_rcv(sk, skb);
 
 291                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
 
 293                 sk_add_backlog(sk, skb);
 
 302 EXPORT_SYMBOL(sk_receive_skb);
 
 304 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 
 306         struct dst_entry *dst = sk->sk_dst_cache;
 
 308         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 
 309                 sk->sk_dst_cache = NULL;
 
 316 EXPORT_SYMBOL(__sk_dst_check);
 
 318 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
 
 320         struct dst_entry *dst = sk_dst_get(sk);
 
 322         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 
 330 EXPORT_SYMBOL(sk_dst_check);
 
 333  *      This is meant for all protocols to use and covers goings on
 
 334  *      at the socket level. Everything here is generic.
 
 337 int sock_setsockopt(struct socket *sock, int level, int optname,
 
 338                     char __user *optval, int optlen)
 
 340         struct sock *sk=sock->sk;
 
 341         struct sk_filter *filter;
 
 348          *      Options without arguments
 
 351 #ifdef SO_DONTLINGER            /* Compatibility item... */
 
 352         if (optname == SO_DONTLINGER) {
 
 354                 sock_reset_flag(sk, SOCK_LINGER);
 
 360         if(optlen<sizeof(int))
 
 363         if (get_user(val, (int __user *)optval))
 
 373                         if(val && !capable(CAP_NET_ADMIN))
 
 378                                 sock_set_flag(sk, SOCK_DBG);
 
 380                                 sock_reset_flag(sk, SOCK_DBG);
 
 383                         sk->sk_reuse = valbool;
 
 391                                 sock_set_flag(sk, SOCK_LOCALROUTE);
 
 393                                 sock_reset_flag(sk, SOCK_LOCALROUTE);
 
 396                         sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
 
 399                         /* Don't error on this BSD doesn't and if you think
 
 400                            about it this is right. Otherwise apps have to
 
 401                            play 'guess the biggest size' games. RCVBUF/SNDBUF
 
 402                            are treated in BSD as hints */
 
 404                         if (val > sysctl_wmem_max)
 
 405                                 val = sysctl_wmem_max;
 
 407                         sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
 
 408                         if ((val * 2) < SOCK_MIN_SNDBUF)
 
 409                                 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
 
 411                                 sk->sk_sndbuf = val * 2;
 
 414                          *      Wake up sending tasks if we
 
 417                         sk->sk_write_space(sk);
 
 421                         if (!capable(CAP_NET_ADMIN)) {
 
 428                         /* Don't error on this BSD doesn't and if you think
 
 429                            about it this is right. Otherwise apps have to
 
 430                            play 'guess the biggest size' games. RCVBUF/SNDBUF
 
 431                            are treated in BSD as hints */
 
 433                         if (val > sysctl_rmem_max)
 
 434                                 val = sysctl_rmem_max;
 
 436                         sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 
 438                          * We double it on the way in to account for
 
 439                          * "struct sk_buff" etc. overhead.   Applications
 
 440                          * assume that the SO_RCVBUF setting they make will
 
 441                          * allow that much actual data to be received on that
 
 444                          * Applications are unaware that "struct sk_buff" and
 
 445                          * other overheads allocate from the receive buffer
 
 446                          * during socket buffer allocation.
 
 448                          * And after considering the possible alternatives,
 
 449                          * returning the value we actually used in getsockopt
 
 450                          * is the most desirable behavior.
 
 452                         if ((val * 2) < SOCK_MIN_RCVBUF)
 
 453                                 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
 
 455                                 sk->sk_rcvbuf = val * 2;
 
 459                         if (!capable(CAP_NET_ADMIN)) {
 
 467                         if (sk->sk_protocol == IPPROTO_TCP)
 
 468                                 tcp_set_keepalive(sk, valbool);
 
 470                         sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
 
 474                         sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
 
 478                         sk->sk_no_check = valbool;
 
 482                         if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 
 
 483                                 sk->sk_priority = val;
 
 489                         if(optlen<sizeof(ling)) {
 
 490                                 ret = -EINVAL;  /* 1003.1g */
 
 493                         if (copy_from_user(&ling,optval,sizeof(ling))) {
 
 498                                 sock_reset_flag(sk, SOCK_LINGER);
 
 500 #if (BITS_PER_LONG == 32)
 
 501                                 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
 
 502                                         sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
 
 505                                         sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
 
 506                                 sock_set_flag(sk, SOCK_LINGER);
 
 511                         sock_warn_obsolete_bsdism("setsockopt");
 
 516                                 set_bit(SOCK_PASSCRED, &sock->flags);
 
 518                                 clear_bit(SOCK_PASSCRED, &sock->flags);
 
 523                                 sock_set_flag(sk, SOCK_RCVTSTAMP);
 
 524                                 sock_enable_timestamp(sk);
 
 526                                 sock_reset_flag(sk, SOCK_RCVTSTAMP);
 
 532                         sk->sk_rcvlowat = val ? : 1;
 
 536                         ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
 
 540                         ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
 
 543 #ifdef CONFIG_NETDEVICES
 
 544                 case SO_BINDTODEVICE:
 
 546                         char devname[IFNAMSIZ]; 
 
 549                         if (!capable(CAP_NET_RAW)) {
 
 554                         /* Bind this socket to a particular device like "eth0",
 
 555                          * as specified in the passed interface name. If the
 
 556                          * name is "" or the option length is zero the socket 
 
 561                                 sk->sk_bound_dev_if = 0;
 
 563                                 if (optlen > IFNAMSIZ - 1)
 
 564                                         optlen = IFNAMSIZ - 1;
 
 565                                 memset(devname, 0, sizeof(devname));
 
 566                                 if (copy_from_user(devname, optval, optlen)) {
 
 571                                 /* Remove any cached route for this socket. */
 
 574                                 if (devname[0] == '\0') {
 
 575                                         sk->sk_bound_dev_if = 0;
 
 577                                         struct net_device *dev = dev_get_by_name(devname);
 
 582                                         sk->sk_bound_dev_if = dev->ifindex;
 
 591                 case SO_ATTACH_FILTER:
 
 593                         if (optlen == sizeof(struct sock_fprog)) {
 
 594                                 struct sock_fprog fprog;
 
 597                                 if (copy_from_user(&fprog, optval, sizeof(fprog)))
 
 600                                 ret = sk_attach_filter(&fprog, sk);
 
 604                 case SO_DETACH_FILTER:
 
 606                         filter = rcu_dereference(sk->sk_filter);
 
 608                                 rcu_assign_pointer(sk->sk_filter, NULL);
 
 609                                 sk_filter_release(sk, filter);
 
 610                                 rcu_read_unlock_bh();
 
 613                         rcu_read_unlock_bh();
 
 619                                 set_bit(SOCK_PASSSEC, &sock->flags);
 
 621                                 clear_bit(SOCK_PASSSEC, &sock->flags);
 
 624                 /* We implement the SO_SNDLOWAT etc to
 
 625                    not be settable (1003.1g 5.3) */
 
 635 int sock_getsockopt(struct socket *sock, int level, int optname,
 
 636                     char __user *optval, int __user *optlen)
 
 638         struct sock *sk = sock->sk;
 
 647         unsigned int lv = sizeof(int);
 
 650         if(get_user(len,optlen))
 
 658                         v.val = sock_flag(sk, SOCK_DBG);
 
 662                         v.val = sock_flag(sk, SOCK_LOCALROUTE);
 
 666                         v.val = !!sock_flag(sk, SOCK_BROADCAST);
 
 670                         v.val = sk->sk_sndbuf;
 
 674                         v.val = sk->sk_rcvbuf;
 
 678                         v.val = sk->sk_reuse;
 
 682                         v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
 
 690                         v.val = -sock_error(sk);
 
 692                                 v.val = xchg(&sk->sk_err_soft, 0);
 
 696                         v.val = !!sock_flag(sk, SOCK_URGINLINE);
 
 700                         v.val = sk->sk_no_check;
 
 704                         v.val = sk->sk_priority;
 
 709                         v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
 
 710                         v.ling.l_linger = sk->sk_lingertime / HZ;
 
 714                         sock_warn_obsolete_bsdism("getsockopt");
 
 718                         v.val = sock_flag(sk, SOCK_RCVTSTAMP);
 
 722                         lv=sizeof(struct timeval);
 
 723                         if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
 
 727                                 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
 
 728                                 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
 
 733                         lv=sizeof(struct timeval);
 
 734                         if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
 
 738                                 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
 
 739                                 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
 
 744                         v.val = sk->sk_rcvlowat;
 
 752                         v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
 
 756                         if (len > sizeof(sk->sk_peercred))
 
 757                                 len = sizeof(sk->sk_peercred);
 
 758                         if (copy_to_user(optval, &sk->sk_peercred, len))
 
 766                         if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
 
 770                         if (copy_to_user(optval, address, len))
 
 775                 /* Dubious BSD thing... Probably nobody even uses it, but
 
 776                  * the UNIX standard wants it for whatever reason... -DaveM
 
 779                         v.val = sk->sk_state == TCP_LISTEN;
 
 783                         v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
 
 787                         return security_socket_getpeersec_stream(sock, optval, optlen, len);
 
 790                         return(-ENOPROTOOPT);
 
 794         if (copy_to_user(optval, &v, len))
 
 797         if (put_user(len, optlen))
 
 803  * Initialize an sk_lock.
 
 805  * (We also register the sk_lock with the lock validator.)
 
 807 static void inline sock_lock_init(struct sock *sk)
 
 809         spin_lock_init(&sk->sk_lock.slock);
 
 810         sk->sk_lock.owner = NULL;
 
 811         init_waitqueue_head(&sk->sk_lock.wq);
 
 813          * Make sure we are not reinitializing a held lock:
 
 815         debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock));
 
 818          * Mark both the sk_lock and the sk_lock.slock as a
 
 819          * per-address-family lock class:
 
 821         lockdep_set_class_and_name(&sk->sk_lock.slock,
 
 822                                    af_family_slock_keys + sk->sk_family,
 
 823                                    af_family_slock_key_strings[sk->sk_family]);
 
 824         lockdep_init_map(&sk->sk_lock.dep_map,
 
 825                          af_family_key_strings[sk->sk_family],
 
 826                          af_family_keys + sk->sk_family);
 
 830  *      sk_alloc - All socket objects are allocated here
 
 831  *      @family: protocol family
 
 832  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
 
 833  *      @prot: struct proto associated with this new sock instance
 
 834  *      @zero_it: if we should zero the newly allocated sock
 
 836 struct sock *sk_alloc(int family, gfp_t priority,
 
 837                       struct proto *prot, int zero_it)
 
 839         struct sock *sk = NULL;
 
 840         kmem_cache_t *slab = prot->slab;
 
 843                 sk = kmem_cache_alloc(slab, priority);
 
 845                 sk = kmalloc(prot->obj_size, priority);
 
 849                         memset(sk, 0, prot->obj_size);
 
 850                         sk->sk_family = family;
 
 852                          * See comment in struct sock definition to understand
 
 853                          * why we need sk_prot_creator -acme
 
 855                         sk->sk_prot = sk->sk_prot_creator = prot;
 
 859                 if (security_sk_alloc(sk, family, priority))
 
 862                 if (!try_module_get(prot->owner))
 
 869                 kmem_cache_free(slab, sk);
 
 875 void sk_free(struct sock *sk)
 
 877         struct sk_filter *filter;
 
 878         struct module *owner = sk->sk_prot_creator->owner;
 
 883         filter = rcu_dereference(sk->sk_filter);
 
 885                 sk_filter_release(sk, filter);
 
 886                 rcu_assign_pointer(sk->sk_filter, NULL);
 
 889         sock_disable_timestamp(sk);
 
 891         if (atomic_read(&sk->sk_omem_alloc))
 
 892                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
 
 893                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
 
 895         security_sk_free(sk);
 
 896         if (sk->sk_prot_creator->slab != NULL)
 
 897                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
 
 903 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 
 905         struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
 
 908                 struct sk_filter *filter;
 
 910                 sock_copy(newsk, sk);
 
 913                 sk_node_init(&newsk->sk_node);
 
 914                 sock_lock_init(newsk);
 
 917                 atomic_set(&newsk->sk_rmem_alloc, 0);
 
 918                 atomic_set(&newsk->sk_wmem_alloc, 0);
 
 919                 atomic_set(&newsk->sk_omem_alloc, 0);
 
 920                 skb_queue_head_init(&newsk->sk_receive_queue);
 
 921                 skb_queue_head_init(&newsk->sk_write_queue);
 
 922 #ifdef CONFIG_NET_DMA
 
 923                 skb_queue_head_init(&newsk->sk_async_wait_queue);
 
 926                 rwlock_init(&newsk->sk_dst_lock);
 
 927                 rwlock_init(&newsk->sk_callback_lock);
 
 928                 lockdep_set_class(&newsk->sk_callback_lock,
 
 929                                    af_callback_keys + newsk->sk_family);
 
 931                 newsk->sk_dst_cache     = NULL;
 
 932                 newsk->sk_wmem_queued   = 0;
 
 933                 newsk->sk_forward_alloc = 0;
 
 934                 newsk->sk_send_head     = NULL;
 
 935                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
 
 936                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 
 938                 sock_reset_flag(newsk, SOCK_DONE);
 
 939                 skb_queue_head_init(&newsk->sk_error_queue);
 
 941                 filter = newsk->sk_filter;
 
 943                         sk_filter_charge(newsk, filter);
 
 945                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
 
 946                         /* It is still raw copy of parent, so invalidate
 
 947                          * destructor and make plain sk_free() */
 
 948                         newsk->sk_destruct = NULL;
 
 955                 newsk->sk_priority = 0;
 
 956                 atomic_set(&newsk->sk_refcnt, 2);
 
 959                  * Increment the counter in the same struct proto as the master
 
 960                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
 
 961                  * is the same as sk->sk_prot->socks, as this field was copied
 
 964                  * This _changes_ the previous behaviour, where
 
 965                  * tcp_create_openreq_child always was incrementing the
 
 966                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
 
 967                  * to be taken into account in all callers. -acme
 
 969                 sk_refcnt_debug_inc(newsk);
 
 970                 newsk->sk_socket = NULL;
 
 971                 newsk->sk_sleep  = NULL;
 
 973                 if (newsk->sk_prot->sockets_allocated)
 
 974                         atomic_inc(newsk->sk_prot->sockets_allocated);
 
 980 EXPORT_SYMBOL_GPL(sk_clone);
 
 982 void __init sk_init(void)
 
 984         if (num_physpages <= 4096) {
 
 985                 sysctl_wmem_max = 32767;
 
 986                 sysctl_rmem_max = 32767;
 
 987                 sysctl_wmem_default = 32767;
 
 988                 sysctl_rmem_default = 32767;
 
 989         } else if (num_physpages >= 131072) {
 
 990                 sysctl_wmem_max = 131071;
 
 991                 sysctl_rmem_max = 131071;
 
 996  *      Simple resource managers for sockets.
 
1001  * Write buffer destructor automatically called from kfree_skb. 
 
1003 void sock_wfree(struct sk_buff *skb)
 
1005         struct sock *sk = skb->sk;
 
1007         /* In case it might be waiting for more memory. */
 
1008         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
 
1009         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
 
1010                 sk->sk_write_space(sk);
 
1015  * Read buffer destructor automatically called from kfree_skb. 
 
1017 void sock_rfree(struct sk_buff *skb)
 
1019         struct sock *sk = skb->sk;
 
1021         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 
1025 int sock_i_uid(struct sock *sk)
 
1029         read_lock(&sk->sk_callback_lock);
 
1030         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
 
1031         read_unlock(&sk->sk_callback_lock);
 
1035 unsigned long sock_i_ino(struct sock *sk)
 
1039         read_lock(&sk->sk_callback_lock);
 
1040         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
 
1041         read_unlock(&sk->sk_callback_lock);
 
1046  * Allocate a skb from the socket's send buffer.
 
1048 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 
1051         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
 
1052                 struct sk_buff * skb = alloc_skb(size, priority);
 
1054                         skb_set_owner_w(skb, sk);
 
1062  * Allocate a skb from the socket's receive buffer.
 
1064 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
 
1067         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
 
1068                 struct sk_buff *skb = alloc_skb(size, priority);
 
1070                         skb_set_owner_r(skb, sk);
 
1078  * Allocate a memory block from the socket's option memory buffer.
 
1080 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
 
1082         if ((unsigned)size <= sysctl_optmem_max &&
 
1083             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
 
1085                 /* First do the add, to avoid the race if kmalloc
 
1088                 atomic_add(size, &sk->sk_omem_alloc);
 
1089                 mem = kmalloc(size, priority);
 
1092                 atomic_sub(size, &sk->sk_omem_alloc);
 
1098  * Free an option memory block.
 
1100 void sock_kfree_s(struct sock *sk, void *mem, int size)
 
1103         atomic_sub(size, &sk->sk_omem_alloc);
 
1106 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
 
1107    I think, these locks should be removed for datagram sockets.
 
1109 static long sock_wait_for_wmem(struct sock * sk, long timeo)
 
1113         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
1117                 if (signal_pending(current))
 
1119                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 
1120                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 
1121                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
 
1123                 if (sk->sk_shutdown & SEND_SHUTDOWN)
 
1127                 timeo = schedule_timeout(timeo);
 
1129         finish_wait(sk->sk_sleep, &wait);
 
1135  *      Generic send/receive buffer handlers
 
1138 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
 
1139                                             unsigned long header_len,
 
1140                                             unsigned long data_len,
 
1141                                             int noblock, int *errcode)
 
1143         struct sk_buff *skb;
 
1148         gfp_mask = sk->sk_allocation;
 
1149         if (gfp_mask & __GFP_WAIT)
 
1150                 gfp_mask |= __GFP_REPEAT;
 
1152         timeo = sock_sndtimeo(sk, noblock);
 
1154                 err = sock_error(sk);
 
1159                 if (sk->sk_shutdown & SEND_SHUTDOWN)
 
1162                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
 
1163                         skb = alloc_skb(header_len, sk->sk_allocation);
 
1168                                 /* No pages, we're done... */
 
1172                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 
1173                                 skb->truesize += data_len;
 
1174                                 skb_shinfo(skb)->nr_frags = npages;
 
1175                                 for (i = 0; i < npages; i++) {
 
1179                                         page = alloc_pages(sk->sk_allocation, 0);
 
1182                                                 skb_shinfo(skb)->nr_frags = i;
 
1187                                         frag = &skb_shinfo(skb)->frags[i];
 
1189                                         frag->page_offset = 0;
 
1190                                         frag->size = (data_len >= PAGE_SIZE ?
 
1193                                         data_len -= PAGE_SIZE;
 
1196                                 /* Full success... */
 
1202                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
1203                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 
1207                 if (signal_pending(current))
 
1209                 timeo = sock_wait_for_wmem(sk, timeo);
 
1212         skb_set_owner_w(skb, sk);
 
1216         err = sock_intr_errno(timeo);
 
1222 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
 
1223                                     int noblock, int *errcode)
 
1225         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
 
1228 static void __lock_sock(struct sock *sk)
 
1233                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
 
1234                                         TASK_UNINTERRUPTIBLE);
 
1235                 spin_unlock_bh(&sk->sk_lock.slock);
 
1237                 spin_lock_bh(&sk->sk_lock.slock);
 
1238                 if(!sock_owned_by_user(sk))
 
1241         finish_wait(&sk->sk_lock.wq, &wait);
 
1244 static void __release_sock(struct sock *sk)
 
1246         struct sk_buff *skb = sk->sk_backlog.head;
 
1249                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
 
1253                         struct sk_buff *next = skb->next;
 
1256                         sk->sk_backlog_rcv(sk, skb);
 
1259                          * We are in process context here with softirqs
 
1260                          * disabled, use cond_resched_softirq() to preempt.
 
1261                          * This is safe to do because we've taken the backlog
 
1264                         cond_resched_softirq();
 
1267                 } while (skb != NULL);
 
1270         } while((skb = sk->sk_backlog.head) != NULL);
 
1274  * sk_wait_data - wait for data to arrive at sk_receive_queue
 
1275  * @sk:    sock to wait on
 
1276  * @timeo: for how long
 
1278  * Now socket state including sk->sk_err is changed only under lock,
 
1279  * hence we may omit checks after joining wait queue.
 
1280  * We check receive queue before schedule() only as optimization;
 
1281  * it is very likely that release_sock() added new data.
 
1283 int sk_wait_data(struct sock *sk, long *timeo)
 
1288         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 
1289         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 
1290         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
 
1291         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 
1292         finish_wait(sk->sk_sleep, &wait);
 
1296 EXPORT_SYMBOL(sk_wait_data);
 
1299  * Set of default routines for initialising struct proto_ops when
 
1300  * the protocol does not support a particular function. In certain
 
1301  * cases where it makes no sense for a protocol to have a "do nothing"
 
1302  * function, some default processing is provided.
 
1305 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
 
1310 int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 
 
1316 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
 
1321 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
 
1326 int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 
 
1332 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
 
1337 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 
1342 int sock_no_listen(struct socket *sock, int backlog)
 
1347 int sock_no_shutdown(struct socket *sock, int how)
 
1352 int sock_no_setsockopt(struct socket *sock, int level, int optname,
 
1353                     char __user *optval, int optlen)
 
1358 int sock_no_getsockopt(struct socket *sock, int level, int optname,
 
1359                     char __user *optval, int __user *optlen)
 
1364 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 
1370 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 
1371                     size_t len, int flags)
 
1376 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
 
1378         /* Mirror missing mmap method error code */
 
1382 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
 
1385         struct msghdr msg = {.msg_flags = flags};
 
1387         char *kaddr = kmap(page);
 
1388         iov.iov_base = kaddr + offset;
 
1390         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
 
1396  *      Default Socket Callbacks
 
1399 static void sock_def_wakeup(struct sock *sk)
 
1401         read_lock(&sk->sk_callback_lock);
 
1402         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 
1403                 wake_up_interruptible_all(sk->sk_sleep);
 
1404         read_unlock(&sk->sk_callback_lock);
 
1407 static void sock_def_error_report(struct sock *sk)
 
1409         read_lock(&sk->sk_callback_lock);
 
1410         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 
1411                 wake_up_interruptible(sk->sk_sleep);
 
1412         sk_wake_async(sk,0,POLL_ERR); 
 
1413         read_unlock(&sk->sk_callback_lock);
 
1416 static void sock_def_readable(struct sock *sk, int len)
 
1418         read_lock(&sk->sk_callback_lock);
 
1419         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 
1420                 wake_up_interruptible(sk->sk_sleep);
 
1421         sk_wake_async(sk,1,POLL_IN);
 
1422         read_unlock(&sk->sk_callback_lock);
 
1425 static void sock_def_write_space(struct sock *sk)
 
1427         read_lock(&sk->sk_callback_lock);
 
1429         /* Do not wake up a writer until he can make "significant"
 
1432         if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
 
1433                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 
1434                         wake_up_interruptible(sk->sk_sleep);
 
1436                 /* Should agree with poll, otherwise some programs break */
 
1437                 if (sock_writeable(sk))
 
1438                         sk_wake_async(sk, 2, POLL_OUT);
 
1441         read_unlock(&sk->sk_callback_lock);
 
1444 static void sock_def_destruct(struct sock *sk)
 
1446         kfree(sk->sk_protinfo);
 
1449 void sk_send_sigurg(struct sock *sk)
 
1451         if (sk->sk_socket && sk->sk_socket->file)
 
1452                 if (send_sigurg(&sk->sk_socket->file->f_owner))
 
1453                         sk_wake_async(sk, 3, POLL_PRI);
 
1456 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
 
1457                     unsigned long expires)
 
1459         if (!mod_timer(timer, expires))
 
1463 EXPORT_SYMBOL(sk_reset_timer);
 
1465 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
 
1467         if (timer_pending(timer) && del_timer(timer))
 
1471 EXPORT_SYMBOL(sk_stop_timer);
 
1473 void sock_init_data(struct socket *sock, struct sock *sk)
 
1475         skb_queue_head_init(&sk->sk_receive_queue);
 
1476         skb_queue_head_init(&sk->sk_write_queue);
 
1477         skb_queue_head_init(&sk->sk_error_queue);
 
1478 #ifdef CONFIG_NET_DMA
 
1479         skb_queue_head_init(&sk->sk_async_wait_queue);
 
1482         sk->sk_send_head        =       NULL;
 
1484         init_timer(&sk->sk_timer);
 
1486         sk->sk_allocation       =       GFP_KERNEL;
 
1487         sk->sk_rcvbuf           =       sysctl_rmem_default;
 
1488         sk->sk_sndbuf           =       sysctl_wmem_default;
 
1489         sk->sk_state            =       TCP_CLOSE;
 
1490         sk->sk_socket           =       sock;
 
1492         sock_set_flag(sk, SOCK_ZAPPED);
 
1496                 sk->sk_type     =       sock->type;
 
1497                 sk->sk_sleep    =       &sock->wait;
 
1500                 sk->sk_sleep    =       NULL;
 
1502         rwlock_init(&sk->sk_dst_lock);
 
1503         rwlock_init(&sk->sk_callback_lock);
 
1504         lockdep_set_class(&sk->sk_callback_lock,
 
1505                            af_callback_keys + sk->sk_family);
 
1507         sk->sk_state_change     =       sock_def_wakeup;
 
1508         sk->sk_data_ready       =       sock_def_readable;
 
1509         sk->sk_write_space      =       sock_def_write_space;
 
1510         sk->sk_error_report     =       sock_def_error_report;
 
1511         sk->sk_destruct         =       sock_def_destruct;
 
1513         sk->sk_sndmsg_page      =       NULL;
 
1514         sk->sk_sndmsg_off       =       0;
 
1516         sk->sk_peercred.pid     =       0;
 
1517         sk->sk_peercred.uid     =       -1;
 
1518         sk->sk_peercred.gid     =       -1;
 
1519         sk->sk_write_pending    =       0;
 
1520         sk->sk_rcvlowat         =       1;
 
1521         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
 
1522         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
 
1524         sk->sk_stamp.tv_sec     = -1L;
 
1525         sk->sk_stamp.tv_usec    = -1L;
 
1527         atomic_set(&sk->sk_refcnt, 1);
 
1530 void fastcall lock_sock(struct sock *sk)
 
1533         spin_lock_bh(&sk->sk_lock.slock);
 
1534         if (sk->sk_lock.owner)
 
1536         sk->sk_lock.owner = (void *)1;
 
1537         spin_unlock(&sk->sk_lock.slock);
 
1539          * The sk_lock has mutex_lock() semantics here:
 
1541         mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
 
1545 EXPORT_SYMBOL(lock_sock);
 
1547 void fastcall release_sock(struct sock *sk)
 
1550          * The sk_lock has mutex_unlock() semantics:
 
1552         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
 
1554         spin_lock_bh(&sk->sk_lock.slock);
 
1555         if (sk->sk_backlog.tail)
 
1557         sk->sk_lock.owner = NULL;
 
1558         if (waitqueue_active(&sk->sk_lock.wq))
 
1559                 wake_up(&sk->sk_lock.wq);
 
1560         spin_unlock_bh(&sk->sk_lock.slock);
 
1562 EXPORT_SYMBOL(release_sock);
 
1564 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 
1566         if (!sock_flag(sk, SOCK_TIMESTAMP))
 
1567                 sock_enable_timestamp(sk);
 
1568         if (sk->sk_stamp.tv_sec == -1) 
 
1570         if (sk->sk_stamp.tv_sec == 0)
 
1571                 do_gettimeofday(&sk->sk_stamp);
 
1572         return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
 
1575 EXPORT_SYMBOL(sock_get_timestamp);
 
1577 void sock_enable_timestamp(struct sock *sk)
 
1579         if (!sock_flag(sk, SOCK_TIMESTAMP)) { 
 
1580                 sock_set_flag(sk, SOCK_TIMESTAMP);
 
1581                 net_enable_timestamp();
 
1584 EXPORT_SYMBOL(sock_enable_timestamp); 
 
1587  *      Get a socket option on an socket.
 
1589  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
 
1590  *      asynchronous errors should be reported by getsockopt. We assume
 
1591  *      this means if you specify SO_ERROR (otherwise whats the point of it).
 
1593 int sock_common_getsockopt(struct socket *sock, int level, int optname,
 
1594                            char __user *optval, int __user *optlen)
 
1596         struct sock *sk = sock->sk;
 
1598         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
 
1601 EXPORT_SYMBOL(sock_common_getsockopt);
 
1603 #ifdef CONFIG_COMPAT
 
1604 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
 
1605                                   char __user *optval, int __user *optlen)
 
1607         struct sock *sk = sock->sk;
 
1609         if (sk->sk_prot->compat_setsockopt != NULL)
 
1610                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
 
1612         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
 
1614 EXPORT_SYMBOL(compat_sock_common_getsockopt);
 
1617 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
 
1618                         struct msghdr *msg, size_t size, int flags)
 
1620         struct sock *sk = sock->sk;
 
1624         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
 
1625                                    flags & ~MSG_DONTWAIT, &addr_len);
 
1627                 msg->msg_namelen = addr_len;
 
1631 EXPORT_SYMBOL(sock_common_recvmsg);
 
1634  *      Set socket options on an inet socket.
 
1636 int sock_common_setsockopt(struct socket *sock, int level, int optname,
 
1637                            char __user *optval, int optlen)
 
1639         struct sock *sk = sock->sk;
 
1641         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
 
1644 EXPORT_SYMBOL(sock_common_setsockopt);
 
1646 #ifdef CONFIG_COMPAT
 
1647 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
 
1648                                   char __user *optval, int optlen)
 
1650         struct sock *sk = sock->sk;
 
1652         if (sk->sk_prot->compat_setsockopt != NULL)
 
1653                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
 
1655         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
 
1657 EXPORT_SYMBOL(compat_sock_common_setsockopt);
 
1660 void sk_common_release(struct sock *sk)
 
1662         if (sk->sk_prot->destroy)
 
1663                 sk->sk_prot->destroy(sk);
 
1666          * Observation: when sock_common_release is called, processes have
 
1667          * no access to socket. But net still has.
 
1668          * Step one, detach it from networking:
 
1670          * A. Remove from hash tables.
 
1673         sk->sk_prot->unhash(sk);
 
1676          * In this point socket cannot receive new packets, but it is possible
 
1677          * that some packets are in flight because some CPU runs receiver and
 
1678          * did hash table lookup before we unhashed socket. They will achieve
 
1679          * receive queue and will be purged by socket destructor.
 
1681          * Also we still have packets pending on receive queue and probably,
 
1682          * our own packets waiting in device queues. sock_destroy will drain
 
1683          * receive queue, but transmitted packets will delay socket destruction
 
1684          * until the last reference will be released.
 
1689         xfrm_sk_free_policy(sk);
 
1691         sk_refcnt_debug_release(sk);
 
1695 EXPORT_SYMBOL(sk_common_release);
 
1697 static DEFINE_RWLOCK(proto_list_lock);
 
1698 static LIST_HEAD(proto_list);
 
1700 int proto_register(struct proto *prot, int alloc_slab)
 
1702         char *request_sock_slab_name = NULL;
 
1703         char *timewait_sock_slab_name;
 
1707                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
 
1708                                                SLAB_HWCACHE_ALIGN, NULL, NULL);
 
1710                 if (prot->slab == NULL) {
 
1711                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
 
1716                 if (prot->rsk_prot != NULL) {
 
1717                         static const char mask[] = "request_sock_%s";
 
1719                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
 
1720                         if (request_sock_slab_name == NULL)
 
1721                                 goto out_free_sock_slab;
 
1723                         sprintf(request_sock_slab_name, mask, prot->name);
 
1724                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
 
1725                                                                  prot->rsk_prot->obj_size, 0,
 
1726                                                                  SLAB_HWCACHE_ALIGN, NULL, NULL);
 
1728                         if (prot->rsk_prot->slab == NULL) {
 
1729                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
 
1731                                 goto out_free_request_sock_slab_name;
 
1735                 if (prot->twsk_prot != NULL) {
 
1736                         static const char mask[] = "tw_sock_%s";
 
1738                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
 
1740                         if (timewait_sock_slab_name == NULL)
 
1741                                 goto out_free_request_sock_slab;
 
1743                         sprintf(timewait_sock_slab_name, mask, prot->name);
 
1744                         prot->twsk_prot->twsk_slab =
 
1745                                 kmem_cache_create(timewait_sock_slab_name,
 
1746                                                   prot->twsk_prot->twsk_obj_size,
 
1747                                                   0, SLAB_HWCACHE_ALIGN,
 
1749                         if (prot->twsk_prot->twsk_slab == NULL)
 
1750                                 goto out_free_timewait_sock_slab_name;
 
1754         write_lock(&proto_list_lock);
 
1755         list_add(&prot->node, &proto_list);
 
1756         write_unlock(&proto_list_lock);
 
1760 out_free_timewait_sock_slab_name:
 
1761         kfree(timewait_sock_slab_name);
 
1762 out_free_request_sock_slab:
 
1763         if (prot->rsk_prot && prot->rsk_prot->slab) {
 
1764                 kmem_cache_destroy(prot->rsk_prot->slab);
 
1765                 prot->rsk_prot->slab = NULL;
 
1767 out_free_request_sock_slab_name:
 
1768         kfree(request_sock_slab_name);
 
1770         kmem_cache_destroy(prot->slab);
 
1775 EXPORT_SYMBOL(proto_register);
 
1777 void proto_unregister(struct proto *prot)
 
1779         write_lock(&proto_list_lock);
 
1780         list_del(&prot->node);
 
1781         write_unlock(&proto_list_lock);
 
1783         if (prot->slab != NULL) {
 
1784                 kmem_cache_destroy(prot->slab);
 
1788         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
 
1789                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
 
1791                 kmem_cache_destroy(prot->rsk_prot->slab);
 
1793                 prot->rsk_prot->slab = NULL;
 
1796         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
 
1797                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
 
1799                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
 
1801                 prot->twsk_prot->twsk_slab = NULL;
 
1805 EXPORT_SYMBOL(proto_unregister);
 
1807 #ifdef CONFIG_PROC_FS
 
1808 static inline struct proto *__proto_head(void)
 
1810         return list_entry(proto_list.next, struct proto, node);
 
1813 static inline struct proto *proto_head(void)
 
1815         return list_empty(&proto_list) ? NULL : __proto_head();
 
1818 static inline struct proto *proto_next(struct proto *proto)
 
1820         return proto->node.next == &proto_list ? NULL :
 
1821                 list_entry(proto->node.next, struct proto, node);
 
1824 static inline struct proto *proto_get_idx(loff_t pos)
 
1826         struct proto *proto;
 
1829         list_for_each_entry(proto, &proto_list, node)
 
1838 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
 
1840         read_lock(&proto_list_lock);
 
1841         return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
 
1844 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
1847         return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
 
1850 static void proto_seq_stop(struct seq_file *seq, void *v)
 
1852         read_unlock(&proto_list_lock);
 
1855 static char proto_method_implemented(const void *method)
 
1857         return method == NULL ? 'n' : 'y';
 
1860 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 
1862         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
 
1863                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
 
1866                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
 
1867                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
 
1868                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
 
1870                    proto->slab == NULL ? "no" : "yes",
 
1871                    module_name(proto->owner),
 
1872                    proto_method_implemented(proto->close),
 
1873                    proto_method_implemented(proto->connect),
 
1874                    proto_method_implemented(proto->disconnect),
 
1875                    proto_method_implemented(proto->accept),
 
1876                    proto_method_implemented(proto->ioctl),
 
1877                    proto_method_implemented(proto->init),
 
1878                    proto_method_implemented(proto->destroy),
 
1879                    proto_method_implemented(proto->shutdown),
 
1880                    proto_method_implemented(proto->setsockopt),
 
1881                    proto_method_implemented(proto->getsockopt),
 
1882                    proto_method_implemented(proto->sendmsg),
 
1883                    proto_method_implemented(proto->recvmsg),
 
1884                    proto_method_implemented(proto->sendpage),
 
1885                    proto_method_implemented(proto->bind),
 
1886                    proto_method_implemented(proto->backlog_rcv),
 
1887                    proto_method_implemented(proto->hash),
 
1888                    proto_method_implemented(proto->unhash),
 
1889                    proto_method_implemented(proto->get_port),
 
1890                    proto_method_implemented(proto->enter_memory_pressure));
 
1893 static int proto_seq_show(struct seq_file *seq, void *v)
 
1895         if (v == SEQ_START_TOKEN)
 
1896                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
 
1905                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
 
1907                 proto_seq_printf(seq, v);
 
1911 static struct seq_operations proto_seq_ops = {
 
1912         .start  = proto_seq_start,
 
1913         .next   = proto_seq_next,
 
1914         .stop   = proto_seq_stop,
 
1915         .show   = proto_seq_show,
 
1918 static int proto_seq_open(struct inode *inode, struct file *file)
 
1920         return seq_open(file, &proto_seq_ops);
 
1923 static struct file_operations proto_seq_fops = {
 
1924         .owner          = THIS_MODULE,
 
1925         .open           = proto_seq_open,
 
1927         .llseek         = seq_lseek,
 
1928         .release        = seq_release,
 
1931 static int __init proto_init(void)
 
1933         /* register /proc/net/protocols */
 
1934         return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
 
1937 subsys_initcall(proto_init);
 
1939 #endif /* PROC_FS */
 
1941 EXPORT_SYMBOL(sk_alloc);
 
1942 EXPORT_SYMBOL(sk_free);
 
1943 EXPORT_SYMBOL(sk_send_sigurg);
 
1944 EXPORT_SYMBOL(sock_alloc_send_skb);
 
1945 EXPORT_SYMBOL(sock_init_data);
 
1946 EXPORT_SYMBOL(sock_kfree_s);
 
1947 EXPORT_SYMBOL(sock_kmalloc);
 
1948 EXPORT_SYMBOL(sock_no_accept);
 
1949 EXPORT_SYMBOL(sock_no_bind);
 
1950 EXPORT_SYMBOL(sock_no_connect);
 
1951 EXPORT_SYMBOL(sock_no_getname);
 
1952 EXPORT_SYMBOL(sock_no_getsockopt);
 
1953 EXPORT_SYMBOL(sock_no_ioctl);
 
1954 EXPORT_SYMBOL(sock_no_listen);
 
1955 EXPORT_SYMBOL(sock_no_mmap);
 
1956 EXPORT_SYMBOL(sock_no_poll);
 
1957 EXPORT_SYMBOL(sock_no_recvmsg);
 
1958 EXPORT_SYMBOL(sock_no_sendmsg);
 
1959 EXPORT_SYMBOL(sock_no_sendpage);
 
1960 EXPORT_SYMBOL(sock_no_setsockopt);
 
1961 EXPORT_SYMBOL(sock_no_shutdown);
 
1962 EXPORT_SYMBOL(sock_no_socketpair);
 
1963 EXPORT_SYMBOL(sock_rfree);
 
1964 EXPORT_SYMBOL(sock_setsockopt);
 
1965 EXPORT_SYMBOL(sock_wfree);
 
1966 EXPORT_SYMBOL(sock_wmalloc);
 
1967 EXPORT_SYMBOL(sock_i_uid);
 
1968 EXPORT_SYMBOL(sock_i_ino);
 
1969 EXPORT_SYMBOL(sysctl_optmem_max);
 
1970 #ifdef CONFIG_SYSCTL
 
1971 EXPORT_SYMBOL(sysctl_rmem_max);
 
1972 EXPORT_SYMBOL(sysctl_wmem_max);