Merge branch 'release' of git://lm-sensors.org/kernel/mhoffman/hwmon-2.6
[linux-2.6] / net / core / sock.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              Generic socket support routines. Memory allocators, socket lock/release
7  *              handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:     Ross Biro
13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *              Florian La Roche, <flla@stud.uni-sb.de>
15  *              Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *              Alan Cox        :       Numerous verify_area() problems
19  *              Alan Cox        :       Connecting on a connecting socket
20  *                                      now returns an error for tcp.
21  *              Alan Cox        :       sock->protocol is set correctly.
22  *                                      and is not sometimes left as 0.
23  *              Alan Cox        :       connect handles icmp errors on a
24  *                                      connect properly. Unfortunately there
25  *                                      is a restart syscall nasty there. I
26  *                                      can't match BSD without hacking the C
27  *                                      library. Ideas urgently sought!
28  *              Alan Cox        :       Disallow bind() to addresses that are
29  *                                      not ours - especially broadcast ones!!
30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32  *                                      instead they leave that for the DESTROY timer.
33  *              Alan Cox        :       Clean up error flag in accept
34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35  *                                      was buggy. Put a remove_sock() in the handler
36  *                                      for memory when we hit 0. Also altered the timer
37  *                                      code. The ACK stuff can wait and needs major
38  *                                      TCP layer surgery.
39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40  *                                      and fixed timer/inet_bh race.
41  *              Alan Cox        :       Added zapped flag for TCP
42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49  *      Pauline Middelink       :       identd support
50  *              Alan Cox        :       Fixed connect() taking signals I think.
51  *              Alan Cox        :       SO_LINGER supported
52  *              Alan Cox        :       Error reporting fixes
53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54  *              Alan Cox        :       inet sockets don't set sk->type!
55  *              Alan Cox        :       Split socket option code
56  *              Alan Cox        :       Callbacks
57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58  *              Alex            :       Removed restriction on inet fioctl
59  *              Alan Cox        :       Splitting INET from NET core
60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62  *              Alan Cox        :       Split IP from generic code
63  *              Alan Cox        :       New kfree_skbmem()
64  *              Alan Cox        :       Make SO_DEBUG superuser only.
65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66  *                                      (compatibility fix)
67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68  *              Alan Cox        :       Allocator for a socket is settable.
69  *              Alan Cox        :       SO_ERROR includes soft errors.
70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71  *              Alan Cox        :       Generic socket allocation to make hooks
72  *                                      easier (suggested by Craig Metz).
73  *              Michael Pall    :       SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81  *              Andi Kleen      :       Fix write_space callback
82  *              Chris Evans     :       Security fixes - signedness again
83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *              This program is free software; you can redistribute it and/or
89  *              modify it under the terms of the GNU General Public License
90  *              as published by the Free Software Foundation; either version
91  *              2 of the License, or (at your option) any later version.
92  */
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/net_namespace.h>
123 #include <net/request_sock.h>
124 #include <net/sock.h>
125 #include <net/xfrm.h>
126 #include <linux/ipsec.h>
127
128 #include <linux/filter.h>
129
130 #ifdef CONFIG_INET
131 #include <net/tcp.h>
132 #endif
133
134 /*
135  * Each address family might have different locking rules, so we have
136  * one slock key per address family:
137  */
138 static struct lock_class_key af_family_keys[AF_MAX];
139 static struct lock_class_key af_family_slock_keys[AF_MAX];
140
141 #ifdef CONFIG_DEBUG_LOCK_ALLOC
142 /*
143  * Make lock validator output more readable. (we pre-construct these
144  * strings build-time, so that runtime initialization of socket
145  * locks is fast):
146  */
147 static const char *af_family_key_strings[AF_MAX+1] = {
148   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
149   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
150   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
151   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
152   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
153   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
154   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
155   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
156   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
157   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
158   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
159   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
160 };
161 static const char *af_family_slock_key_strings[AF_MAX+1] = {
162   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
163   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
164   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
165   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
166   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
167   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
168   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
169   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
170   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
171   "slock-27"       , "slock-28"          , "slock-29"          ,
172   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
173   "slock-AF_RXRPC" , "slock-AF_MAX"
174 };
175 static const char *af_family_clock_key_strings[AF_MAX+1] = {
176   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
177   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
178   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
179   "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
180   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
181   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
182   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
183   "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
184   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
185   "clock-27"       , "clock-28"          , "clock-29"          ,
186   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
187   "clock-AF_RXRPC" , "clock-AF_MAX"
188 };
189 #endif
190
191 /*
192  * sk_callback_lock locking rules are per-address-family,
193  * so split the lock classes by using a per-AF key:
194  */
195 static struct lock_class_key af_callback_keys[AF_MAX];
196
197 /* Take into consideration the size of the struct sk_buff overhead in the
198  * determination of these values, since that is non-constant across
199  * platforms.  This makes socket queueing behavior and performance
200  * not depend upon such differences.
201  */
202 #define _SK_MEM_PACKETS         256
203 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
204 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
206
207 /* Run time adjustable parameters. */
208 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
209 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
210 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
211 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
212
213 /* Maximal space eaten by iovec or ancilliary data plus some space */
214 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
215
216 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
217 {
218         struct timeval tv;
219
220         if (optlen < sizeof(tv))
221                 return -EINVAL;
222         if (copy_from_user(&tv, optval, sizeof(tv)))
223                 return -EFAULT;
224         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
225                 return -EDOM;
226
227         if (tv.tv_sec < 0) {
228                 static int warned __read_mostly;
229
230                 *timeo_p = 0;
231                 if (warned < 10 && net_ratelimit())
232                         warned++;
233                         printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
234                                "tries to set negative timeout\n",
235                                 current->comm, current->pid);
236                 return 0;
237         }
238         *timeo_p = MAX_SCHEDULE_TIMEOUT;
239         if (tv.tv_sec == 0 && tv.tv_usec == 0)
240                 return 0;
241         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
242                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
243         return 0;
244 }
245
246 static void sock_warn_obsolete_bsdism(const char *name)
247 {
248         static int warned;
249         static char warncomm[TASK_COMM_LEN];
250         if (strcmp(warncomm, current->comm) && warned < 5) {
251                 strcpy(warncomm,  current->comm);
252                 printk(KERN_WARNING "process `%s' is using obsolete "
253                        "%s SO_BSDCOMPAT\n", warncomm, name);
254                 warned++;
255         }
256 }
257
258 static void sock_disable_timestamp(struct sock *sk)
259 {
260         if (sock_flag(sk, SOCK_TIMESTAMP)) {
261                 sock_reset_flag(sk, SOCK_TIMESTAMP);
262                 net_disable_timestamp();
263         }
264 }
265
266
267 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
268 {
269         int err = 0;
270         int skb_len;
271
272         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
273            number of warnings when compiling with -W --ANK
274          */
275         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
276             (unsigned)sk->sk_rcvbuf) {
277                 err = -ENOMEM;
278                 goto out;
279         }
280
281         err = sk_filter(sk, skb);
282         if (err)
283                 goto out;
284
285         skb->dev = NULL;
286         skb_set_owner_r(skb, sk);
287
288         /* Cache the SKB length before we tack it onto the receive
289          * queue.  Once it is added it no longer belongs to us and
290          * may be freed by other threads of control pulling packets
291          * from the queue.
292          */
293         skb_len = skb->len;
294
295         skb_queue_tail(&sk->sk_receive_queue, skb);
296
297         if (!sock_flag(sk, SOCK_DEAD))
298                 sk->sk_data_ready(sk, skb_len);
299 out:
300         return err;
301 }
302 EXPORT_SYMBOL(sock_queue_rcv_skb);
303
304 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
305 {
306         int rc = NET_RX_SUCCESS;
307
308         if (sk_filter(sk, skb))
309                 goto discard_and_relse;
310
311         skb->dev = NULL;
312
313         if (nested)
314                 bh_lock_sock_nested(sk);
315         else
316                 bh_lock_sock(sk);
317         if (!sock_owned_by_user(sk)) {
318                 /*
319                  * trylock + unlock semantics:
320                  */
321                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
322
323                 rc = sk->sk_backlog_rcv(sk, skb);
324
325                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
326         } else
327                 sk_add_backlog(sk, skb);
328         bh_unlock_sock(sk);
329 out:
330         sock_put(sk);
331         return rc;
332 discard_and_relse:
333         kfree_skb(skb);
334         goto out;
335 }
336 EXPORT_SYMBOL(sk_receive_skb);
337
338 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
339 {
340         struct dst_entry *dst = sk->sk_dst_cache;
341
342         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
343                 sk->sk_dst_cache = NULL;
344                 dst_release(dst);
345                 return NULL;
346         }
347
348         return dst;
349 }
350 EXPORT_SYMBOL(__sk_dst_check);
351
352 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
353 {
354         struct dst_entry *dst = sk_dst_get(sk);
355
356         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
357                 sk_dst_reset(sk);
358                 dst_release(dst);
359                 return NULL;
360         }
361
362         return dst;
363 }
364 EXPORT_SYMBOL(sk_dst_check);
365
366 static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
367 {
368         int ret = -ENOPROTOOPT;
369 #ifdef CONFIG_NETDEVICES
370         struct net *net = sk->sk_net;
371         char devname[IFNAMSIZ];
372         int index;
373
374         /* Sorry... */
375         ret = -EPERM;
376         if (!capable(CAP_NET_RAW))
377                 goto out;
378
379         ret = -EINVAL;
380         if (optlen < 0)
381                 goto out;
382
383         /* Bind this socket to a particular device like "eth0",
384          * as specified in the passed interface name. If the
385          * name is "" or the option length is zero the socket
386          * is not bound.
387          */
388         if (optlen > IFNAMSIZ - 1)
389                 optlen = IFNAMSIZ - 1;
390         memset(devname, 0, sizeof(devname));
391
392         ret = -EFAULT;
393         if (copy_from_user(devname, optval, optlen))
394                 goto out;
395
396         if (devname[0] == '\0') {
397                 index = 0;
398         } else {
399                 struct net_device *dev = dev_get_by_name(net, devname);
400
401                 ret = -ENODEV;
402                 if (!dev)
403                         goto out;
404
405                 index = dev->ifindex;
406                 dev_put(dev);
407         }
408
409         lock_sock(sk);
410         sk->sk_bound_dev_if = index;
411         sk_dst_reset(sk);
412         release_sock(sk);
413
414         ret = 0;
415
416 out:
417 #endif
418
419         return ret;
420 }
421
422 /*
423  *      This is meant for all protocols to use and covers goings on
424  *      at the socket level. Everything here is generic.
425  */
426
427 int sock_setsockopt(struct socket *sock, int level, int optname,
428                     char __user *optval, int optlen)
429 {
430         struct sock *sk=sock->sk;
431         struct sk_filter *filter;
432         int val;
433         int valbool;
434         struct linger ling;
435         int ret = 0;
436
437         /*
438          *      Options without arguments
439          */
440
441 #ifdef SO_DONTLINGER            /* Compatibility item... */
442         if (optname == SO_DONTLINGER) {
443                 lock_sock(sk);
444                 sock_reset_flag(sk, SOCK_LINGER);
445                 release_sock(sk);
446                 return 0;
447         }
448 #endif
449
450         if (optname == SO_BINDTODEVICE)
451                 return sock_bindtodevice(sk, optval, optlen);
452
453         if (optlen < sizeof(int))
454                 return -EINVAL;
455
456         if (get_user(val, (int __user *)optval))
457                 return -EFAULT;
458
459         valbool = val?1:0;
460
461         lock_sock(sk);
462
463         switch(optname) {
464         case SO_DEBUG:
465                 if (val && !capable(CAP_NET_ADMIN)) {
466                         ret = -EACCES;
467                 }
468                 else if (valbool)
469                         sock_set_flag(sk, SOCK_DBG);
470                 else
471                         sock_reset_flag(sk, SOCK_DBG);
472                 break;
473         case SO_REUSEADDR:
474                 sk->sk_reuse = valbool;
475                 break;
476         case SO_TYPE:
477         case SO_ERROR:
478                 ret = -ENOPROTOOPT;
479                 break;
480         case SO_DONTROUTE:
481                 if (valbool)
482                         sock_set_flag(sk, SOCK_LOCALROUTE);
483                 else
484                         sock_reset_flag(sk, SOCK_LOCALROUTE);
485                 break;
486         case SO_BROADCAST:
487                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
488                 break;
489         case SO_SNDBUF:
490                 /* Don't error on this BSD doesn't and if you think
491                    about it this is right. Otherwise apps have to
492                    play 'guess the biggest size' games. RCVBUF/SNDBUF
493                    are treated in BSD as hints */
494
495                 if (val > sysctl_wmem_max)
496                         val = sysctl_wmem_max;
497 set_sndbuf:
498                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
499                 if ((val * 2) < SOCK_MIN_SNDBUF)
500                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
501                 else
502                         sk->sk_sndbuf = val * 2;
503
504                 /*
505                  *      Wake up sending tasks if we
506                  *      upped the value.
507                  */
508                 sk->sk_write_space(sk);
509                 break;
510
511         case SO_SNDBUFFORCE:
512                 if (!capable(CAP_NET_ADMIN)) {
513                         ret = -EPERM;
514                         break;
515                 }
516                 goto set_sndbuf;
517
518         case SO_RCVBUF:
519                 /* Don't error on this BSD doesn't and if you think
520                    about it this is right. Otherwise apps have to
521                    play 'guess the biggest size' games. RCVBUF/SNDBUF
522                    are treated in BSD as hints */
523
524                 if (val > sysctl_rmem_max)
525                         val = sysctl_rmem_max;
526 set_rcvbuf:
527                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
528                 /*
529                  * We double it on the way in to account for
530                  * "struct sk_buff" etc. overhead.   Applications
531                  * assume that the SO_RCVBUF setting they make will
532                  * allow that much actual data to be received on that
533                  * socket.
534                  *
535                  * Applications are unaware that "struct sk_buff" and
536                  * other overheads allocate from the receive buffer
537                  * during socket buffer allocation.
538                  *
539                  * And after considering the possible alternatives,
540                  * returning the value we actually used in getsockopt
541                  * is the most desirable behavior.
542                  */
543                 if ((val * 2) < SOCK_MIN_RCVBUF)
544                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
545                 else
546                         sk->sk_rcvbuf = val * 2;
547                 break;
548
549         case SO_RCVBUFFORCE:
550                 if (!capable(CAP_NET_ADMIN)) {
551                         ret = -EPERM;
552                         break;
553                 }
554                 goto set_rcvbuf;
555
556         case SO_KEEPALIVE:
557 #ifdef CONFIG_INET
558                 if (sk->sk_protocol == IPPROTO_TCP)
559                         tcp_set_keepalive(sk, valbool);
560 #endif
561                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
562                 break;
563
564         case SO_OOBINLINE:
565                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
566                 break;
567
568         case SO_NO_CHECK:
569                 sk->sk_no_check = valbool;
570                 break;
571
572         case SO_PRIORITY:
573                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
574                         sk->sk_priority = val;
575                 else
576                         ret = -EPERM;
577                 break;
578
579         case SO_LINGER:
580                 if (optlen < sizeof(ling)) {
581                         ret = -EINVAL;  /* 1003.1g */
582                         break;
583                 }
584                 if (copy_from_user(&ling,optval,sizeof(ling))) {
585                         ret = -EFAULT;
586                         break;
587                 }
588                 if (!ling.l_onoff)
589                         sock_reset_flag(sk, SOCK_LINGER);
590                 else {
591 #if (BITS_PER_LONG == 32)
592                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
593                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
594                         else
595 #endif
596                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
597                         sock_set_flag(sk, SOCK_LINGER);
598                 }
599                 break;
600
601         case SO_BSDCOMPAT:
602                 sock_warn_obsolete_bsdism("setsockopt");
603                 break;
604
605         case SO_PASSCRED:
606                 if (valbool)
607                         set_bit(SOCK_PASSCRED, &sock->flags);
608                 else
609                         clear_bit(SOCK_PASSCRED, &sock->flags);
610                 break;
611
612         case SO_TIMESTAMP:
613         case SO_TIMESTAMPNS:
614                 if (valbool)  {
615                         if (optname == SO_TIMESTAMP)
616                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
617                         else
618                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
619                         sock_set_flag(sk, SOCK_RCVTSTAMP);
620                         sock_enable_timestamp(sk);
621                 } else {
622                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
623                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
624                 }
625                 break;
626
627         case SO_RCVLOWAT:
628                 if (val < 0)
629                         val = INT_MAX;
630                 sk->sk_rcvlowat = val ? : 1;
631                 break;
632
633         case SO_RCVTIMEO:
634                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
635                 break;
636
637         case SO_SNDTIMEO:
638                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
639                 break;
640
641         case SO_ATTACH_FILTER:
642                 ret = -EINVAL;
643                 if (optlen == sizeof(struct sock_fprog)) {
644                         struct sock_fprog fprog;
645
646                         ret = -EFAULT;
647                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
648                                 break;
649
650                         ret = sk_attach_filter(&fprog, sk);
651                 }
652                 break;
653
654         case SO_DETACH_FILTER:
655                 rcu_read_lock_bh();
656                 filter = rcu_dereference(sk->sk_filter);
657                 if (filter) {
658                         rcu_assign_pointer(sk->sk_filter, NULL);
659                         sk_filter_release(sk, filter);
660                         rcu_read_unlock_bh();
661                         break;
662                 }
663                 rcu_read_unlock_bh();
664                 ret = -ENONET;
665                 break;
666
667         case SO_PASSSEC:
668                 if (valbool)
669                         set_bit(SOCK_PASSSEC, &sock->flags);
670                 else
671                         clear_bit(SOCK_PASSSEC, &sock->flags);
672                 break;
673
674                 /* We implement the SO_SNDLOWAT etc to
675                    not be settable (1003.1g 5.3) */
676         default:
677                 ret = -ENOPROTOOPT;
678                 break;
679         }
680         release_sock(sk);
681         return ret;
682 }
683
684
685 int sock_getsockopt(struct socket *sock, int level, int optname,
686                     char __user *optval, int __user *optlen)
687 {
688         struct sock *sk = sock->sk;
689
690         union {
691                 int val;
692                 struct linger ling;
693                 struct timeval tm;
694         } v;
695
696         unsigned int lv = sizeof(int);
697         int len;
698
699         if (get_user(len, optlen))
700                 return -EFAULT;
701         if (len < 0)
702                 return -EINVAL;
703
704         switch(optname) {
705         case SO_DEBUG:
706                 v.val = sock_flag(sk, SOCK_DBG);
707                 break;
708
709         case SO_DONTROUTE:
710                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
711                 break;
712
713         case SO_BROADCAST:
714                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
715                 break;
716
717         case SO_SNDBUF:
718                 v.val = sk->sk_sndbuf;
719                 break;
720
721         case SO_RCVBUF:
722                 v.val = sk->sk_rcvbuf;
723                 break;
724
725         case SO_REUSEADDR:
726                 v.val = sk->sk_reuse;
727                 break;
728
729         case SO_KEEPALIVE:
730                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
731                 break;
732
733         case SO_TYPE:
734                 v.val = sk->sk_type;
735                 break;
736
737         case SO_ERROR:
738                 v.val = -sock_error(sk);
739                 if (v.val==0)
740                         v.val = xchg(&sk->sk_err_soft, 0);
741                 break;
742
743         case SO_OOBINLINE:
744                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
745                 break;
746
747         case SO_NO_CHECK:
748                 v.val = sk->sk_no_check;
749                 break;
750
751         case SO_PRIORITY:
752                 v.val = sk->sk_priority;
753                 break;
754
755         case SO_LINGER:
756                 lv              = sizeof(v.ling);
757                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
758                 v.ling.l_linger = sk->sk_lingertime / HZ;
759                 break;
760
761         case SO_BSDCOMPAT:
762                 sock_warn_obsolete_bsdism("getsockopt");
763                 break;
764
765         case SO_TIMESTAMP:
766                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
767                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
768                 break;
769
770         case SO_TIMESTAMPNS:
771                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
772                 break;
773
774         case SO_RCVTIMEO:
775                 lv=sizeof(struct timeval);
776                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
777                         v.tm.tv_sec = 0;
778                         v.tm.tv_usec = 0;
779                 } else {
780                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
781                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
782                 }
783                 break;
784
785         case SO_SNDTIMEO:
786                 lv=sizeof(struct timeval);
787                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
788                         v.tm.tv_sec = 0;
789                         v.tm.tv_usec = 0;
790                 } else {
791                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
792                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
793                 }
794                 break;
795
796         case SO_RCVLOWAT:
797                 v.val = sk->sk_rcvlowat;
798                 break;
799
800         case SO_SNDLOWAT:
801                 v.val=1;
802                 break;
803
804         case SO_PASSCRED:
805                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
806                 break;
807
808         case SO_PEERCRED:
809                 if (len > sizeof(sk->sk_peercred))
810                         len = sizeof(sk->sk_peercred);
811                 if (copy_to_user(optval, &sk->sk_peercred, len))
812                         return -EFAULT;
813                 goto lenout;
814
815         case SO_PEERNAME:
816         {
817                 char address[128];
818
819                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
820                         return -ENOTCONN;
821                 if (lv < len)
822                         return -EINVAL;
823                 if (copy_to_user(optval, address, len))
824                         return -EFAULT;
825                 goto lenout;
826         }
827
828         /* Dubious BSD thing... Probably nobody even uses it, but
829          * the UNIX standard wants it for whatever reason... -DaveM
830          */
831         case SO_ACCEPTCONN:
832                 v.val = sk->sk_state == TCP_LISTEN;
833                 break;
834
835         case SO_PASSSEC:
836                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
837                 break;
838
839         case SO_PEERSEC:
840                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
841
842         default:
843                 return -ENOPROTOOPT;
844         }
845
846         if (len > lv)
847                 len = lv;
848         if (copy_to_user(optval, &v, len))
849                 return -EFAULT;
850 lenout:
851         if (put_user(len, optlen))
852                 return -EFAULT;
853         return 0;
854 }
855
856 /*
857  * Initialize an sk_lock.
858  *
859  * (We also register the sk_lock with the lock validator.)
860  */
861 static inline void sock_lock_init(struct sock *sk)
862 {
863         sock_lock_init_class_and_name(sk,
864                         af_family_slock_key_strings[sk->sk_family],
865                         af_family_slock_keys + sk->sk_family,
866                         af_family_key_strings[sk->sk_family],
867                         af_family_keys + sk->sk_family);
868 }
869
870 /**
871  *      sk_alloc - All socket objects are allocated here
872  *      @net: the applicable net namespace
873  *      @family: protocol family
874  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
875  *      @prot: struct proto associated with this new sock instance
876  *      @zero_it: if we should zero the newly allocated sock
877  */
878 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
879                       struct proto *prot, int zero_it)
880 {
881         struct sock *sk = NULL;
882         struct kmem_cache *slab = prot->slab;
883
884         if (slab != NULL)
885                 sk = kmem_cache_alloc(slab, priority);
886         else
887                 sk = kmalloc(prot->obj_size, priority);
888
889         if (sk) {
890                 if (zero_it) {
891                         memset(sk, 0, prot->obj_size);
892                         sk->sk_family = family;
893                         /*
894                          * See comment in struct sock definition to understand
895                          * why we need sk_prot_creator -acme
896                          */
897                         sk->sk_prot = sk->sk_prot_creator = prot;
898                         sock_lock_init(sk);
899                         sk->sk_net = get_net(net);
900                 }
901
902                 if (security_sk_alloc(sk, family, priority))
903                         goto out_free;
904
905                 if (!try_module_get(prot->owner))
906                         goto out_free;
907         }
908         return sk;
909
910 out_free:
911         if (slab != NULL)
912                 kmem_cache_free(slab, sk);
913         else
914                 kfree(sk);
915         return NULL;
916 }
917
918 void sk_free(struct sock *sk)
919 {
920         struct sk_filter *filter;
921         struct module *owner = sk->sk_prot_creator->owner;
922
923         if (sk->sk_destruct)
924                 sk->sk_destruct(sk);
925
926         filter = rcu_dereference(sk->sk_filter);
927         if (filter) {
928                 sk_filter_release(sk, filter);
929                 rcu_assign_pointer(sk->sk_filter, NULL);
930         }
931
932         sock_disable_timestamp(sk);
933
934         if (atomic_read(&sk->sk_omem_alloc))
935                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
936                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
937
938         security_sk_free(sk);
939         put_net(sk->sk_net);
940         if (sk->sk_prot_creator->slab != NULL)
941                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
942         else
943                 kfree(sk);
944         module_put(owner);
945 }
946
947 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
948 {
949         struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0);
950
951         if (newsk != NULL) {
952                 struct sk_filter *filter;
953
954                 sock_copy(newsk, sk);
955
956                 /* SANITY */
957                 sk_node_init(&newsk->sk_node);
958                 sock_lock_init(newsk);
959                 bh_lock_sock(newsk);
960                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
961
962                 atomic_set(&newsk->sk_rmem_alloc, 0);
963                 atomic_set(&newsk->sk_wmem_alloc, 0);
964                 atomic_set(&newsk->sk_omem_alloc, 0);
965                 skb_queue_head_init(&newsk->sk_receive_queue);
966                 skb_queue_head_init(&newsk->sk_write_queue);
967 #ifdef CONFIG_NET_DMA
968                 skb_queue_head_init(&newsk->sk_async_wait_queue);
969 #endif
970
971                 rwlock_init(&newsk->sk_dst_lock);
972                 rwlock_init(&newsk->sk_callback_lock);
973                 lockdep_set_class_and_name(&newsk->sk_callback_lock,
974                                 af_callback_keys + newsk->sk_family,
975                                 af_family_clock_key_strings[newsk->sk_family]);
976
977                 newsk->sk_dst_cache     = NULL;
978                 newsk->sk_wmem_queued   = 0;
979                 newsk->sk_forward_alloc = 0;
980                 newsk->sk_send_head     = NULL;
981                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
982
983                 sock_reset_flag(newsk, SOCK_DONE);
984                 skb_queue_head_init(&newsk->sk_error_queue);
985
986                 filter = newsk->sk_filter;
987                 if (filter != NULL)
988                         sk_filter_charge(newsk, filter);
989
990                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
991                         /* It is still raw copy of parent, so invalidate
992                          * destructor and make plain sk_free() */
993                         newsk->sk_destruct = NULL;
994                         sk_free(newsk);
995                         newsk = NULL;
996                         goto out;
997                 }
998
999                 newsk->sk_err      = 0;
1000                 newsk->sk_priority = 0;
1001                 atomic_set(&newsk->sk_refcnt, 2);
1002
1003                 /*
1004                  * Increment the counter in the same struct proto as the master
1005                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1006                  * is the same as sk->sk_prot->socks, as this field was copied
1007                  * with memcpy).
1008                  *
1009                  * This _changes_ the previous behaviour, where
1010                  * tcp_create_openreq_child always was incrementing the
1011                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1012                  * to be taken into account in all callers. -acme
1013                  */
1014                 sk_refcnt_debug_inc(newsk);
1015                 newsk->sk_socket = NULL;
1016                 newsk->sk_sleep  = NULL;
1017
1018                 if (newsk->sk_prot->sockets_allocated)
1019                         atomic_inc(newsk->sk_prot->sockets_allocated);
1020         }
1021 out:
1022         return newsk;
1023 }
1024
1025 EXPORT_SYMBOL_GPL(sk_clone);
1026
1027 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1028 {
1029         __sk_dst_set(sk, dst);
1030         sk->sk_route_caps = dst->dev->features;
1031         if (sk->sk_route_caps & NETIF_F_GSO)
1032                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1033         if (sk_can_gso(sk)) {
1034                 if (dst->header_len)
1035                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1036                 else
1037                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1038         }
1039 }
1040 EXPORT_SYMBOL_GPL(sk_setup_caps);
1041
1042 void __init sk_init(void)
1043 {
1044         if (num_physpages <= 4096) {
1045                 sysctl_wmem_max = 32767;
1046                 sysctl_rmem_max = 32767;
1047                 sysctl_wmem_default = 32767;
1048                 sysctl_rmem_default = 32767;
1049         } else if (num_physpages >= 131072) {
1050                 sysctl_wmem_max = 131071;
1051                 sysctl_rmem_max = 131071;
1052         }
1053 }
1054
1055 /*
1056  *      Simple resource managers for sockets.
1057  */
1058
1059
1060 /*
1061  * Write buffer destructor automatically called from kfree_skb.
1062  */
1063 void sock_wfree(struct sk_buff *skb)
1064 {
1065         struct sock *sk = skb->sk;
1066
1067         /* In case it might be waiting for more memory. */
1068         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1069         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1070                 sk->sk_write_space(sk);
1071         sock_put(sk);
1072 }
1073
1074 /*
1075  * Read buffer destructor automatically called from kfree_skb.
1076  */
1077 void sock_rfree(struct sk_buff *skb)
1078 {
1079         struct sock *sk = skb->sk;
1080
1081         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1082 }
1083
1084
1085 int sock_i_uid(struct sock *sk)
1086 {
1087         int uid;
1088
1089         read_lock(&sk->sk_callback_lock);
1090         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1091         read_unlock(&sk->sk_callback_lock);
1092         return uid;
1093 }
1094
1095 unsigned long sock_i_ino(struct sock *sk)
1096 {
1097         unsigned long ino;
1098
1099         read_lock(&sk->sk_callback_lock);
1100         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1101         read_unlock(&sk->sk_callback_lock);
1102         return ino;
1103 }
1104
1105 /*
1106  * Allocate a skb from the socket's send buffer.
1107  */
1108 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1109                              gfp_t priority)
1110 {
1111         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1112                 struct sk_buff * skb = alloc_skb(size, priority);
1113                 if (skb) {
1114                         skb_set_owner_w(skb, sk);
1115                         return skb;
1116                 }
1117         }
1118         return NULL;
1119 }
1120
1121 /*
1122  * Allocate a skb from the socket's receive buffer.
1123  */
1124 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1125                              gfp_t priority)
1126 {
1127         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1128                 struct sk_buff *skb = alloc_skb(size, priority);
1129                 if (skb) {
1130                         skb_set_owner_r(skb, sk);
1131                         return skb;
1132                 }
1133         }
1134         return NULL;
1135 }
1136
1137 /*
1138  * Allocate a memory block from the socket's option memory buffer.
1139  */
1140 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1141 {
1142         if ((unsigned)size <= sysctl_optmem_max &&
1143             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1144                 void *mem;
1145                 /* First do the add, to avoid the race if kmalloc
1146                  * might sleep.
1147                  */
1148                 atomic_add(size, &sk->sk_omem_alloc);
1149                 mem = kmalloc(size, priority);
1150                 if (mem)
1151                         return mem;
1152                 atomic_sub(size, &sk->sk_omem_alloc);
1153         }
1154         return NULL;
1155 }
1156
1157 /*
1158  * Free an option memory block.
1159  */
1160 void sock_kfree_s(struct sock *sk, void *mem, int size)
1161 {
1162         kfree(mem);
1163         atomic_sub(size, &sk->sk_omem_alloc);
1164 }
1165
1166 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1167    I think, these locks should be removed for datagram sockets.
1168  */
1169 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1170 {
1171         DEFINE_WAIT(wait);
1172
1173         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1174         for (;;) {
1175                 if (!timeo)
1176                         break;
1177                 if (signal_pending(current))
1178                         break;
1179                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1180                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1181                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1182                         break;
1183                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1184                         break;
1185                 if (sk->sk_err)
1186                         break;
1187                 timeo = schedule_timeout(timeo);
1188         }
1189         finish_wait(sk->sk_sleep, &wait);
1190         return timeo;
1191 }
1192
1193
1194 /*
1195  *      Generic send/receive buffer handlers
1196  */
1197
1198 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1199                                             unsigned long header_len,
1200                                             unsigned long data_len,
1201                                             int noblock, int *errcode)
1202 {
1203         struct sk_buff *skb;
1204         gfp_t gfp_mask;
1205         long timeo;
1206         int err;
1207
1208         gfp_mask = sk->sk_allocation;
1209         if (gfp_mask & __GFP_WAIT)
1210                 gfp_mask |= __GFP_REPEAT;
1211
1212         timeo = sock_sndtimeo(sk, noblock);
1213         while (1) {
1214                 err = sock_error(sk);
1215                 if (err != 0)
1216                         goto failure;
1217
1218                 err = -EPIPE;
1219                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1220                         goto failure;
1221
1222                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1223                         skb = alloc_skb(header_len, gfp_mask);
1224                         if (skb) {
1225                                 int npages;
1226                                 int i;
1227
1228                                 /* No pages, we're done... */
1229                                 if (!data_len)
1230                                         break;
1231
1232                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1233                                 skb->truesize += data_len;
1234                                 skb_shinfo(skb)->nr_frags = npages;
1235                                 for (i = 0; i < npages; i++) {
1236                                         struct page *page;
1237                                         skb_frag_t *frag;
1238
1239                                         page = alloc_pages(sk->sk_allocation, 0);
1240                                         if (!page) {
1241                                                 err = -ENOBUFS;
1242                                                 skb_shinfo(skb)->nr_frags = i;
1243                                                 kfree_skb(skb);
1244                                                 goto failure;
1245                                         }
1246
1247                                         frag = &skb_shinfo(skb)->frags[i];
1248                                         frag->page = page;
1249                                         frag->page_offset = 0;
1250                                         frag->size = (data_len >= PAGE_SIZE ?
1251                                                       PAGE_SIZE :
1252                                                       data_len);
1253                                         data_len -= PAGE_SIZE;
1254                                 }
1255
1256                                 /* Full success... */
1257                                 break;
1258                         }
1259                         err = -ENOBUFS;
1260                         goto failure;
1261                 }
1262                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1263                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1264                 err = -EAGAIN;
1265                 if (!timeo)
1266                         goto failure;
1267                 if (signal_pending(current))
1268                         goto interrupted;
1269                 timeo = sock_wait_for_wmem(sk, timeo);
1270         }
1271
1272         skb_set_owner_w(skb, sk);
1273         return skb;
1274
1275 interrupted:
1276         err = sock_intr_errno(timeo);
1277 failure:
1278         *errcode = err;
1279         return NULL;
1280 }
1281
1282 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1283                                     int noblock, int *errcode)
1284 {
1285         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1286 }
1287
1288 static void __lock_sock(struct sock *sk)
1289 {
1290         DEFINE_WAIT(wait);
1291
1292         for (;;) {
1293                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1294                                         TASK_UNINTERRUPTIBLE);
1295                 spin_unlock_bh(&sk->sk_lock.slock);
1296                 schedule();
1297                 spin_lock_bh(&sk->sk_lock.slock);
1298                 if (!sock_owned_by_user(sk))
1299                         break;
1300         }
1301         finish_wait(&sk->sk_lock.wq, &wait);
1302 }
1303
1304 static void __release_sock(struct sock *sk)
1305 {
1306         struct sk_buff *skb = sk->sk_backlog.head;
1307
1308         do {
1309                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1310                 bh_unlock_sock(sk);
1311
1312                 do {
1313                         struct sk_buff *next = skb->next;
1314
1315                         skb->next = NULL;
1316                         sk->sk_backlog_rcv(sk, skb);
1317
1318                         /*
1319                          * We are in process context here with softirqs
1320                          * disabled, use cond_resched_softirq() to preempt.
1321                          * This is safe to do because we've taken the backlog
1322                          * queue private:
1323                          */
1324                         cond_resched_softirq();
1325
1326                         skb = next;
1327                 } while (skb != NULL);
1328
1329                 bh_lock_sock(sk);
1330         } while ((skb = sk->sk_backlog.head) != NULL);
1331 }
1332
1333 /**
1334  * sk_wait_data - wait for data to arrive at sk_receive_queue
1335  * @sk:    sock to wait on
1336  * @timeo: for how long
1337  *
1338  * Now socket state including sk->sk_err is changed only under lock,
1339  * hence we may omit checks after joining wait queue.
1340  * We check receive queue before schedule() only as optimization;
1341  * it is very likely that release_sock() added new data.
1342  */
1343 int sk_wait_data(struct sock *sk, long *timeo)
1344 {
1345         int rc;
1346         DEFINE_WAIT(wait);
1347
1348         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1349         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1350         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1351         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1352         finish_wait(sk->sk_sleep, &wait);
1353         return rc;
1354 }
1355
1356 EXPORT_SYMBOL(sk_wait_data);
1357
1358 /*
1359  * Set of default routines for initialising struct proto_ops when
1360  * the protocol does not support a particular function. In certain
1361  * cases where it makes no sense for a protocol to have a "do nothing"
1362  * function, some default processing is provided.
1363  */
1364
1365 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1366 {
1367         return -EOPNOTSUPP;
1368 }
1369
1370 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1371                     int len, int flags)
1372 {
1373         return -EOPNOTSUPP;
1374 }
1375
1376 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1377 {
1378         return -EOPNOTSUPP;
1379 }
1380
1381 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1382 {
1383         return -EOPNOTSUPP;
1384 }
1385
1386 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1387                     int *len, int peer)
1388 {
1389         return -EOPNOTSUPP;
1390 }
1391
1392 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1393 {
1394         return 0;
1395 }
1396
1397 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1398 {
1399         return -EOPNOTSUPP;
1400 }
1401
1402 int sock_no_listen(struct socket *sock, int backlog)
1403 {
1404         return -EOPNOTSUPP;
1405 }
1406
1407 int sock_no_shutdown(struct socket *sock, int how)
1408 {
1409         return -EOPNOTSUPP;
1410 }
1411
1412 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1413                     char __user *optval, int optlen)
1414 {
1415         return -EOPNOTSUPP;
1416 }
1417
1418 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1419                     char __user *optval, int __user *optlen)
1420 {
1421         return -EOPNOTSUPP;
1422 }
1423
1424 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1425                     size_t len)
1426 {
1427         return -EOPNOTSUPP;
1428 }
1429
1430 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1431                     size_t len, int flags)
1432 {
1433         return -EOPNOTSUPP;
1434 }
1435
1436 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1437 {
1438         /* Mirror missing mmap method error code */
1439         return -ENODEV;
1440 }
1441
1442 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1443 {
1444         ssize_t res;
1445         struct msghdr msg = {.msg_flags = flags};
1446         struct kvec iov;
1447         char *kaddr = kmap(page);
1448         iov.iov_base = kaddr + offset;
1449         iov.iov_len = size;
1450         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1451         kunmap(page);
1452         return res;
1453 }
1454
1455 /*
1456  *      Default Socket Callbacks
1457  */
1458
1459 static void sock_def_wakeup(struct sock *sk)
1460 {
1461         read_lock(&sk->sk_callback_lock);
1462         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1463                 wake_up_interruptible_all(sk->sk_sleep);
1464         read_unlock(&sk->sk_callback_lock);
1465 }
1466
1467 static void sock_def_error_report(struct sock *sk)
1468 {
1469         read_lock(&sk->sk_callback_lock);
1470         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1471                 wake_up_interruptible(sk->sk_sleep);
1472         sk_wake_async(sk,0,POLL_ERR);
1473         read_unlock(&sk->sk_callback_lock);
1474 }
1475
1476 static void sock_def_readable(struct sock *sk, int len)
1477 {
1478         read_lock(&sk->sk_callback_lock);
1479         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1480                 wake_up_interruptible(sk->sk_sleep);
1481         sk_wake_async(sk,1,POLL_IN);
1482         read_unlock(&sk->sk_callback_lock);
1483 }
1484
1485 static void sock_def_write_space(struct sock *sk)
1486 {
1487         read_lock(&sk->sk_callback_lock);
1488
1489         /* Do not wake up a writer until he can make "significant"
1490          * progress.  --DaveM
1491          */
1492         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1493                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1494                         wake_up_interruptible(sk->sk_sleep);
1495
1496                 /* Should agree with poll, otherwise some programs break */
1497                 if (sock_writeable(sk))
1498                         sk_wake_async(sk, 2, POLL_OUT);
1499         }
1500
1501         read_unlock(&sk->sk_callback_lock);
1502 }
1503
1504 static void sock_def_destruct(struct sock *sk)
1505 {
1506         kfree(sk->sk_protinfo);
1507 }
1508
1509 void sk_send_sigurg(struct sock *sk)
1510 {
1511         if (sk->sk_socket && sk->sk_socket->file)
1512                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1513                         sk_wake_async(sk, 3, POLL_PRI);
1514 }
1515
1516 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1517                     unsigned long expires)
1518 {
1519         if (!mod_timer(timer, expires))
1520                 sock_hold(sk);
1521 }
1522
1523 EXPORT_SYMBOL(sk_reset_timer);
1524
1525 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1526 {
1527         if (timer_pending(timer) && del_timer(timer))
1528                 __sock_put(sk);
1529 }
1530
1531 EXPORT_SYMBOL(sk_stop_timer);
1532
1533 void sock_init_data(struct socket *sock, struct sock *sk)
1534 {
1535         skb_queue_head_init(&sk->sk_receive_queue);
1536         skb_queue_head_init(&sk->sk_write_queue);
1537         skb_queue_head_init(&sk->sk_error_queue);
1538 #ifdef CONFIG_NET_DMA
1539         skb_queue_head_init(&sk->sk_async_wait_queue);
1540 #endif
1541
1542         sk->sk_send_head        =       NULL;
1543
1544         init_timer(&sk->sk_timer);
1545
1546         sk->sk_allocation       =       GFP_KERNEL;
1547         sk->sk_rcvbuf           =       sysctl_rmem_default;
1548         sk->sk_sndbuf           =       sysctl_wmem_default;
1549         sk->sk_state            =       TCP_CLOSE;
1550         sk->sk_socket           =       sock;
1551
1552         sock_set_flag(sk, SOCK_ZAPPED);
1553
1554         if (sock) {
1555                 sk->sk_type     =       sock->type;
1556                 sk->sk_sleep    =       &sock->wait;
1557                 sock->sk        =       sk;
1558         } else
1559                 sk->sk_sleep    =       NULL;
1560
1561         rwlock_init(&sk->sk_dst_lock);
1562         rwlock_init(&sk->sk_callback_lock);
1563         lockdep_set_class_and_name(&sk->sk_callback_lock,
1564                         af_callback_keys + sk->sk_family,
1565                         af_family_clock_key_strings[sk->sk_family]);
1566
1567         sk->sk_state_change     =       sock_def_wakeup;
1568         sk->sk_data_ready       =       sock_def_readable;
1569         sk->sk_write_space      =       sock_def_write_space;
1570         sk->sk_error_report     =       sock_def_error_report;
1571         sk->sk_destruct         =       sock_def_destruct;
1572
1573         sk->sk_sndmsg_page      =       NULL;
1574         sk->sk_sndmsg_off       =       0;
1575
1576         sk->sk_peercred.pid     =       0;
1577         sk->sk_peercred.uid     =       -1;
1578         sk->sk_peercred.gid     =       -1;
1579         sk->sk_write_pending    =       0;
1580         sk->sk_rcvlowat         =       1;
1581         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1582         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1583
1584         sk->sk_stamp = ktime_set(-1L, -1L);
1585
1586         atomic_set(&sk->sk_refcnt, 1);
1587 }
1588
1589 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1590 {
1591         might_sleep();
1592         spin_lock_bh(&sk->sk_lock.slock);
1593         if (sk->sk_lock.owned)
1594                 __lock_sock(sk);
1595         sk->sk_lock.owned = 1;
1596         spin_unlock(&sk->sk_lock.slock);
1597         /*
1598          * The sk_lock has mutex_lock() semantics here:
1599          */
1600         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1601         local_bh_enable();
1602 }
1603
1604 EXPORT_SYMBOL(lock_sock_nested);
1605
1606 void fastcall release_sock(struct sock *sk)
1607 {
1608         /*
1609          * The sk_lock has mutex_unlock() semantics:
1610          */
1611         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1612
1613         spin_lock_bh(&sk->sk_lock.slock);
1614         if (sk->sk_backlog.tail)
1615                 __release_sock(sk);
1616         sk->sk_lock.owned = 0;
1617         if (waitqueue_active(&sk->sk_lock.wq))
1618                 wake_up(&sk->sk_lock.wq);
1619         spin_unlock_bh(&sk->sk_lock.slock);
1620 }
1621 EXPORT_SYMBOL(release_sock);
1622
1623 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1624 {
1625         struct timeval tv;
1626         if (!sock_flag(sk, SOCK_TIMESTAMP))
1627                 sock_enable_timestamp(sk);
1628         tv = ktime_to_timeval(sk->sk_stamp);
1629         if (tv.tv_sec == -1)
1630                 return -ENOENT;
1631         if (tv.tv_sec == 0) {
1632                 sk->sk_stamp = ktime_get_real();
1633                 tv = ktime_to_timeval(sk->sk_stamp);
1634         }
1635         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1636 }
1637 EXPORT_SYMBOL(sock_get_timestamp);
1638
1639 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1640 {
1641         struct timespec ts;
1642         if (!sock_flag(sk, SOCK_TIMESTAMP))
1643                 sock_enable_timestamp(sk);
1644         ts = ktime_to_timespec(sk->sk_stamp);
1645         if (ts.tv_sec == -1)
1646                 return -ENOENT;
1647         if (ts.tv_sec == 0) {
1648                 sk->sk_stamp = ktime_get_real();
1649                 ts = ktime_to_timespec(sk->sk_stamp);
1650         }
1651         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1652 }
1653 EXPORT_SYMBOL(sock_get_timestampns);
1654
1655 void sock_enable_timestamp(struct sock *sk)
1656 {
1657         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1658                 sock_set_flag(sk, SOCK_TIMESTAMP);
1659                 net_enable_timestamp();
1660         }
1661 }
1662 EXPORT_SYMBOL(sock_enable_timestamp);
1663
1664 /*
1665  *      Get a socket option on an socket.
1666  *
1667  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1668  *      asynchronous errors should be reported by getsockopt. We assume
1669  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1670  */
1671 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1672                            char __user *optval, int __user *optlen)
1673 {
1674         struct sock *sk = sock->sk;
1675
1676         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1677 }
1678
1679 EXPORT_SYMBOL(sock_common_getsockopt);
1680
1681 #ifdef CONFIG_COMPAT
1682 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1683                                   char __user *optval, int __user *optlen)
1684 {
1685         struct sock *sk = sock->sk;
1686
1687         if (sk->sk_prot->compat_getsockopt != NULL)
1688                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1689                                                       optval, optlen);
1690         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1691 }
1692 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1693 #endif
1694
1695 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1696                         struct msghdr *msg, size_t size, int flags)
1697 {
1698         struct sock *sk = sock->sk;
1699         int addr_len = 0;
1700         int err;
1701
1702         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1703                                    flags & ~MSG_DONTWAIT, &addr_len);
1704         if (err >= 0)
1705                 msg->msg_namelen = addr_len;
1706         return err;
1707 }
1708
1709 EXPORT_SYMBOL(sock_common_recvmsg);
1710
1711 /*
1712  *      Set socket options on an inet socket.
1713  */
1714 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1715                            char __user *optval, int optlen)
1716 {
1717         struct sock *sk = sock->sk;
1718
1719         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1720 }
1721
1722 EXPORT_SYMBOL(sock_common_setsockopt);
1723
1724 #ifdef CONFIG_COMPAT
1725 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1726                                   char __user *optval, int optlen)
1727 {
1728         struct sock *sk = sock->sk;
1729
1730         if (sk->sk_prot->compat_setsockopt != NULL)
1731                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1732                                                       optval, optlen);
1733         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1734 }
1735 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1736 #endif
1737
1738 void sk_common_release(struct sock *sk)
1739 {
1740         if (sk->sk_prot->destroy)
1741                 sk->sk_prot->destroy(sk);
1742
1743         /*
1744          * Observation: when sock_common_release is called, processes have
1745          * no access to socket. But net still has.
1746          * Step one, detach it from networking:
1747          *
1748          * A. Remove from hash tables.
1749          */
1750
1751         sk->sk_prot->unhash(sk);
1752
1753         /*
1754          * In this point socket cannot receive new packets, but it is possible
1755          * that some packets are in flight because some CPU runs receiver and
1756          * did hash table lookup before we unhashed socket. They will achieve
1757          * receive queue and will be purged by socket destructor.
1758          *
1759          * Also we still have packets pending on receive queue and probably,
1760          * our own packets waiting in device queues. sock_destroy will drain
1761          * receive queue, but transmitted packets will delay socket destruction
1762          * until the last reference will be released.
1763          */
1764
1765         sock_orphan(sk);
1766
1767         xfrm_sk_free_policy(sk);
1768
1769         sk_refcnt_debug_release(sk);
1770         sock_put(sk);
1771 }
1772
1773 EXPORT_SYMBOL(sk_common_release);
1774
1775 static DEFINE_RWLOCK(proto_list_lock);
1776 static LIST_HEAD(proto_list);
1777
1778 int proto_register(struct proto *prot, int alloc_slab)
1779 {
1780         char *request_sock_slab_name = NULL;
1781         char *timewait_sock_slab_name;
1782         int rc = -ENOBUFS;
1783
1784         if (alloc_slab) {
1785                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1786                                                SLAB_HWCACHE_ALIGN, NULL);
1787
1788                 if (prot->slab == NULL) {
1789                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1790                                prot->name);
1791                         goto out;
1792                 }
1793
1794                 if (prot->rsk_prot != NULL) {
1795                         static const char mask[] = "request_sock_%s";
1796
1797                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1798                         if (request_sock_slab_name == NULL)
1799                                 goto out_free_sock_slab;
1800
1801                         sprintf(request_sock_slab_name, mask, prot->name);
1802                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1803                                                                  prot->rsk_prot->obj_size, 0,
1804                                                                  SLAB_HWCACHE_ALIGN, NULL);
1805
1806                         if (prot->rsk_prot->slab == NULL) {
1807                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1808                                        prot->name);
1809                                 goto out_free_request_sock_slab_name;
1810                         }
1811                 }
1812
1813                 if (prot->twsk_prot != NULL) {
1814                         static const char mask[] = "tw_sock_%s";
1815
1816                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1817
1818                         if (timewait_sock_slab_name == NULL)
1819                                 goto out_free_request_sock_slab;
1820
1821                         sprintf(timewait_sock_slab_name, mask, prot->name);
1822                         prot->twsk_prot->twsk_slab =
1823                                 kmem_cache_create(timewait_sock_slab_name,
1824                                                   prot->twsk_prot->twsk_obj_size,
1825                                                   0, SLAB_HWCACHE_ALIGN,
1826                                                   NULL);
1827                         if (prot->twsk_prot->twsk_slab == NULL)
1828                                 goto out_free_timewait_sock_slab_name;
1829                 }
1830         }
1831
1832         write_lock(&proto_list_lock);
1833         list_add(&prot->node, &proto_list);
1834         write_unlock(&proto_list_lock);
1835         rc = 0;
1836 out:
1837         return rc;
1838 out_free_timewait_sock_slab_name:
1839         kfree(timewait_sock_slab_name);
1840 out_free_request_sock_slab:
1841         if (prot->rsk_prot && prot->rsk_prot->slab) {
1842                 kmem_cache_destroy(prot->rsk_prot->slab);
1843                 prot->rsk_prot->slab = NULL;
1844         }
1845 out_free_request_sock_slab_name:
1846         kfree(request_sock_slab_name);
1847 out_free_sock_slab:
1848         kmem_cache_destroy(prot->slab);
1849         prot->slab = NULL;
1850         goto out;
1851 }
1852
1853 EXPORT_SYMBOL(proto_register);
1854
1855 void proto_unregister(struct proto *prot)
1856 {
1857         write_lock(&proto_list_lock);
1858         list_del(&prot->node);
1859         write_unlock(&proto_list_lock);
1860
1861         if (prot->slab != NULL) {
1862                 kmem_cache_destroy(prot->slab);
1863                 prot->slab = NULL;
1864         }
1865
1866         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1867                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1868
1869                 kmem_cache_destroy(prot->rsk_prot->slab);
1870                 kfree(name);
1871                 prot->rsk_prot->slab = NULL;
1872         }
1873
1874         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1875                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1876
1877                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1878                 kfree(name);
1879                 prot->twsk_prot->twsk_slab = NULL;
1880         }
1881 }
1882
1883 EXPORT_SYMBOL(proto_unregister);
1884
1885 #ifdef CONFIG_PROC_FS
1886 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1887 {
1888         read_lock(&proto_list_lock);
1889         return seq_list_start_head(&proto_list, *pos);
1890 }
1891
1892 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1893 {
1894         return seq_list_next(v, &proto_list, pos);
1895 }
1896
1897 static void proto_seq_stop(struct seq_file *seq, void *v)
1898 {
1899         read_unlock(&proto_list_lock);
1900 }
1901
1902 static char proto_method_implemented(const void *method)
1903 {
1904         return method == NULL ? 'n' : 'y';
1905 }
1906
1907 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1908 {
1909         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1910                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1911                    proto->name,
1912                    proto->obj_size,
1913                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1914                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1915                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1916                    proto->max_header,
1917                    proto->slab == NULL ? "no" : "yes",
1918                    module_name(proto->owner),
1919                    proto_method_implemented(proto->close),
1920                    proto_method_implemented(proto->connect),
1921                    proto_method_implemented(proto->disconnect),
1922                    proto_method_implemented(proto->accept),
1923                    proto_method_implemented(proto->ioctl),
1924                    proto_method_implemented(proto->init),
1925                    proto_method_implemented(proto->destroy),
1926                    proto_method_implemented(proto->shutdown),
1927                    proto_method_implemented(proto->setsockopt),
1928                    proto_method_implemented(proto->getsockopt),
1929                    proto_method_implemented(proto->sendmsg),
1930                    proto_method_implemented(proto->recvmsg),
1931                    proto_method_implemented(proto->sendpage),
1932                    proto_method_implemented(proto->bind),
1933                    proto_method_implemented(proto->backlog_rcv),
1934                    proto_method_implemented(proto->hash),
1935                    proto_method_implemented(proto->unhash),
1936                    proto_method_implemented(proto->get_port),
1937                    proto_method_implemented(proto->enter_memory_pressure));
1938 }
1939
1940 static int proto_seq_show(struct seq_file *seq, void *v)
1941 {
1942         if (v == &proto_list)
1943                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1944                            "protocol",
1945                            "size",
1946                            "sockets",
1947                            "memory",
1948                            "press",
1949                            "maxhdr",
1950                            "slab",
1951                            "module",
1952                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1953         else
1954                 proto_seq_printf(seq, list_entry(v, struct proto, node));
1955         return 0;
1956 }
1957
1958 static const struct seq_operations proto_seq_ops = {
1959         .start  = proto_seq_start,
1960         .next   = proto_seq_next,
1961         .stop   = proto_seq_stop,
1962         .show   = proto_seq_show,
1963 };
1964
1965 static int proto_seq_open(struct inode *inode, struct file *file)
1966 {
1967         return seq_open(file, &proto_seq_ops);
1968 }
1969
1970 static const struct file_operations proto_seq_fops = {
1971         .owner          = THIS_MODULE,
1972         .open           = proto_seq_open,
1973         .read           = seq_read,
1974         .llseek         = seq_lseek,
1975         .release        = seq_release,
1976 };
1977
1978 static int __init proto_init(void)
1979 {
1980         /* register /proc/net/protocols */
1981         return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1982 }
1983
1984 subsys_initcall(proto_init);
1985
1986 #endif /* PROC_FS */
1987
1988 EXPORT_SYMBOL(sk_alloc);
1989 EXPORT_SYMBOL(sk_free);
1990 EXPORT_SYMBOL(sk_send_sigurg);
1991 EXPORT_SYMBOL(sock_alloc_send_skb);
1992 EXPORT_SYMBOL(sock_init_data);
1993 EXPORT_SYMBOL(sock_kfree_s);
1994 EXPORT_SYMBOL(sock_kmalloc);
1995 EXPORT_SYMBOL(sock_no_accept);
1996 EXPORT_SYMBOL(sock_no_bind);
1997 EXPORT_SYMBOL(sock_no_connect);
1998 EXPORT_SYMBOL(sock_no_getname);
1999 EXPORT_SYMBOL(sock_no_getsockopt);
2000 EXPORT_SYMBOL(sock_no_ioctl);
2001 EXPORT_SYMBOL(sock_no_listen);
2002 EXPORT_SYMBOL(sock_no_mmap);
2003 EXPORT_SYMBOL(sock_no_poll);
2004 EXPORT_SYMBOL(sock_no_recvmsg);
2005 EXPORT_SYMBOL(sock_no_sendmsg);
2006 EXPORT_SYMBOL(sock_no_sendpage);
2007 EXPORT_SYMBOL(sock_no_setsockopt);
2008 EXPORT_SYMBOL(sock_no_shutdown);
2009 EXPORT_SYMBOL(sock_no_socketpair);
2010 EXPORT_SYMBOL(sock_rfree);
2011 EXPORT_SYMBOL(sock_setsockopt);
2012 EXPORT_SYMBOL(sock_wfree);
2013 EXPORT_SYMBOL(sock_wmalloc);
2014 EXPORT_SYMBOL(sock_i_uid);
2015 EXPORT_SYMBOL(sock_i_ino);
2016 EXPORT_SYMBOL(sysctl_optmem_max);
2017 #ifdef CONFIG_SYSCTL
2018 EXPORT_SYMBOL(sysctl_rmem_max);
2019 EXPORT_SYMBOL(sysctl_wmem_max);
2020 #endif