Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[linux-2.6] / net / core / sock.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              Generic socket support routines. Memory allocators, socket lock/release
7  *              handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:     Ross Biro
13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *              Florian La Roche, <flla@stud.uni-sb.de>
15  *              Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *              Alan Cox        :       Numerous verify_area() problems
19  *              Alan Cox        :       Connecting on a connecting socket
20  *                                      now returns an error for tcp.
21  *              Alan Cox        :       sock->protocol is set correctly.
22  *                                      and is not sometimes left as 0.
23  *              Alan Cox        :       connect handles icmp errors on a
24  *                                      connect properly. Unfortunately there
25  *                                      is a restart syscall nasty there. I
26  *                                      can't match BSD without hacking the C
27  *                                      library. Ideas urgently sought!
28  *              Alan Cox        :       Disallow bind() to addresses that are
29  *                                      not ours - especially broadcast ones!!
30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32  *                                      instead they leave that for the DESTROY timer.
33  *              Alan Cox        :       Clean up error flag in accept
34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35  *                                      was buggy. Put a remove_sock() in the handler
36  *                                      for memory when we hit 0. Also altered the timer
37  *                                      code. The ACK stuff can wait and needs major
38  *                                      TCP layer surgery.
39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40  *                                      and fixed timer/inet_bh race.
41  *              Alan Cox        :       Added zapped flag for TCP
42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49  *      Pauline Middelink       :       identd support
50  *              Alan Cox        :       Fixed connect() taking signals I think.
51  *              Alan Cox        :       SO_LINGER supported
52  *              Alan Cox        :       Error reporting fixes
53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54  *              Alan Cox        :       inet sockets don't set sk->type!
55  *              Alan Cox        :       Split socket option code
56  *              Alan Cox        :       Callbacks
57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58  *              Alex            :       Removed restriction on inet fioctl
59  *              Alan Cox        :       Splitting INET from NET core
60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62  *              Alan Cox        :       Split IP from generic code
63  *              Alan Cox        :       New kfree_skbmem()
64  *              Alan Cox        :       Make SO_DEBUG superuser only.
65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66  *                                      (compatibility fix)
67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68  *              Alan Cox        :       Allocator for a socket is settable.
69  *              Alan Cox        :       SO_ERROR includes soft errors.
70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71  *              Alan Cox        :       Generic socket allocation to make hooks
72  *                                      easier (suggested by Craig Metz).
73  *              Michael Pall    :       SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81  *              Andi Kleen      :       Fix write_space callback
82  *              Chris Evans     :       Security fixes - signedness again
83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *              This program is free software; you can redistribute it and/or
89  *              modify it under the terms of the GNU General Public License
90  *              as published by the Free Software Foundation; either version
91  *              2 of the License, or (at your option) any later version.
92  */
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126
127 #include <linux/filter.h>
128
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132
133 /*
134  * Each address family might have different locking rules, so we have
135  * one slock key per address family:
136  */
137 static struct lock_class_key af_family_keys[AF_MAX];
138 static struct lock_class_key af_family_slock_keys[AF_MAX];
139
140 #ifdef CONFIG_DEBUG_LOCK_ALLOC
141 /*
142  * Make lock validator output more readable. (we pre-construct these
143  * strings build-time, so that runtime initialization of socket
144  * locks is fast):
145  */
146 static const char *af_family_key_strings[AF_MAX+1] = {
147   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
148   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
149   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
150   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
151   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
152   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
153   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
154   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
155   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
156   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
157   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
158   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
159 };
160 static const char *af_family_slock_key_strings[AF_MAX+1] = {
161   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
162   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
163   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
164   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
165   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
166   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
167   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
168   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
169   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
170   "slock-27"       , "slock-28"          , "slock-29"          ,
171   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
172   "slock-AF_RXRPC" , "slock-AF_MAX"
173 };
174 static const char *af_family_clock_key_strings[AF_MAX+1] = {
175   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
176   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
177   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
178   "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
179   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
180   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
181   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
182   "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
183   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
184   "clock-27"       , "clock-28"          , "clock-29"          ,
185   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
186   "clock-AF_RXRPC" , "clock-AF_MAX"
187 };
188 #endif
189
190 /*
191  * sk_callback_lock locking rules are per-address-family,
192  * so split the lock classes by using a per-AF key:
193  */
194 static struct lock_class_key af_callback_keys[AF_MAX];
195
196 /* Take into consideration the size of the struct sk_buff overhead in the
197  * determination of these values, since that is non-constant across
198  * platforms.  This makes socket queueing behavior and performance
199  * not depend upon such differences.
200  */
201 #define _SK_MEM_PACKETS         256
202 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
203 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
204 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205
206 /* Run time adjustable parameters. */
207 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
208 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
209 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
210 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
211
212 /* Maximal space eaten by iovec or ancilliary data plus some space */
213 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
214
215 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
216 {
217         struct timeval tv;
218
219         if (optlen < sizeof(tv))
220                 return -EINVAL;
221         if (copy_from_user(&tv, optval, sizeof(tv)))
222                 return -EFAULT;
223         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
224                 return -EDOM;
225
226         if (tv.tv_sec < 0) {
227                 static int warned __read_mostly;
228
229                 *timeo_p = 0;
230                 if (warned < 10 && net_ratelimit())
231                         warned++;
232                         printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
233                                "tries to set negative timeout\n",
234                                 current->comm, current->pid);
235                 return 0;
236         }
237         *timeo_p = MAX_SCHEDULE_TIMEOUT;
238         if (tv.tv_sec == 0 && tv.tv_usec == 0)
239                 return 0;
240         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
241                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
242         return 0;
243 }
244
245 static void sock_warn_obsolete_bsdism(const char *name)
246 {
247         static int warned;
248         static char warncomm[TASK_COMM_LEN];
249         if (strcmp(warncomm, current->comm) && warned < 5) {
250                 strcpy(warncomm,  current->comm);
251                 printk(KERN_WARNING "process `%s' is using obsolete "
252                        "%s SO_BSDCOMPAT\n", warncomm, name);
253                 warned++;
254         }
255 }
256
257 static void sock_disable_timestamp(struct sock *sk)
258 {
259         if (sock_flag(sk, SOCK_TIMESTAMP)) {
260                 sock_reset_flag(sk, SOCK_TIMESTAMP);
261                 net_disable_timestamp();
262         }
263 }
264
265
266 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
267 {
268         int err = 0;
269         int skb_len;
270
271         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
272            number of warnings when compiling with -W --ANK
273          */
274         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
275             (unsigned)sk->sk_rcvbuf) {
276                 err = -ENOMEM;
277                 goto out;
278         }
279
280         err = sk_filter(sk, skb);
281         if (err)
282                 goto out;
283
284         skb->dev = NULL;
285         skb_set_owner_r(skb, sk);
286
287         /* Cache the SKB length before we tack it onto the receive
288          * queue.  Once it is added it no longer belongs to us and
289          * may be freed by other threads of control pulling packets
290          * from the queue.
291          */
292         skb_len = skb->len;
293
294         skb_queue_tail(&sk->sk_receive_queue, skb);
295
296         if (!sock_flag(sk, SOCK_DEAD))
297                 sk->sk_data_ready(sk, skb_len);
298 out:
299         return err;
300 }
301 EXPORT_SYMBOL(sock_queue_rcv_skb);
302
303 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
304 {
305         int rc = NET_RX_SUCCESS;
306
307         if (sk_filter(sk, skb))
308                 goto discard_and_relse;
309
310         skb->dev = NULL;
311
312         if (nested)
313                 bh_lock_sock_nested(sk);
314         else
315                 bh_lock_sock(sk);
316         if (!sock_owned_by_user(sk)) {
317                 /*
318                  * trylock + unlock semantics:
319                  */
320                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
321
322                 rc = sk->sk_backlog_rcv(sk, skb);
323
324                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
325         } else
326                 sk_add_backlog(sk, skb);
327         bh_unlock_sock(sk);
328 out:
329         sock_put(sk);
330         return rc;
331 discard_and_relse:
332         kfree_skb(skb);
333         goto out;
334 }
335 EXPORT_SYMBOL(sk_receive_skb);
336
337 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
338 {
339         struct dst_entry *dst = sk->sk_dst_cache;
340
341         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
342                 sk->sk_dst_cache = NULL;
343                 dst_release(dst);
344                 return NULL;
345         }
346
347         return dst;
348 }
349 EXPORT_SYMBOL(__sk_dst_check);
350
351 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
352 {
353         struct dst_entry *dst = sk_dst_get(sk);
354
355         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
356                 sk_dst_reset(sk);
357                 dst_release(dst);
358                 return NULL;
359         }
360
361         return dst;
362 }
363 EXPORT_SYMBOL(sk_dst_check);
364
365 static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
366 {
367         int ret = -ENOPROTOOPT;
368 #ifdef CONFIG_NETDEVICES
369         char devname[IFNAMSIZ];
370         int index;
371
372         /* Sorry... */
373         ret = -EPERM;
374         if (!capable(CAP_NET_RAW))
375                 goto out;
376
377         ret = -EINVAL;
378         if (optlen < 0)
379                 goto out;
380
381         /* Bind this socket to a particular device like "eth0",
382          * as specified in the passed interface name. If the
383          * name is "" or the option length is zero the socket
384          * is not bound.
385          */
386         if (optlen > IFNAMSIZ - 1)
387                 optlen = IFNAMSIZ - 1;
388         memset(devname, 0, sizeof(devname));
389
390         ret = -EFAULT;
391         if (copy_from_user(devname, optval, optlen))
392                 goto out;
393
394         if (devname[0] == '\0') {
395                 index = 0;
396         } else {
397                 struct net_device *dev = dev_get_by_name(devname);
398
399                 ret = -ENODEV;
400                 if (!dev)
401                         goto out;
402
403                 index = dev->ifindex;
404                 dev_put(dev);
405         }
406
407         lock_sock(sk);
408         sk->sk_bound_dev_if = index;
409         sk_dst_reset(sk);
410         release_sock(sk);
411
412         ret = 0;
413
414 out:
415 #endif
416
417         return ret;
418 }
419
420 /*
421  *      This is meant for all protocols to use and covers goings on
422  *      at the socket level. Everything here is generic.
423  */
424
425 int sock_setsockopt(struct socket *sock, int level, int optname,
426                     char __user *optval, int optlen)
427 {
428         struct sock *sk=sock->sk;
429         struct sk_filter *filter;
430         int val;
431         int valbool;
432         struct linger ling;
433         int ret = 0;
434
435         /*
436          *      Options without arguments
437          */
438
439 #ifdef SO_DONTLINGER            /* Compatibility item... */
440         if (optname == SO_DONTLINGER) {
441                 lock_sock(sk);
442                 sock_reset_flag(sk, SOCK_LINGER);
443                 release_sock(sk);
444                 return 0;
445         }
446 #endif
447
448         if (optname == SO_BINDTODEVICE)
449                 return sock_bindtodevice(sk, optval, optlen);
450
451         if (optlen < sizeof(int))
452                 return -EINVAL;
453
454         if (get_user(val, (int __user *)optval))
455                 return -EFAULT;
456
457         valbool = val?1:0;
458
459         lock_sock(sk);
460
461         switch(optname) {
462         case SO_DEBUG:
463                 if (val && !capable(CAP_NET_ADMIN)) {
464                         ret = -EACCES;
465                 }
466                 else if (valbool)
467                         sock_set_flag(sk, SOCK_DBG);
468                 else
469                         sock_reset_flag(sk, SOCK_DBG);
470                 break;
471         case SO_REUSEADDR:
472                 sk->sk_reuse = valbool;
473                 break;
474         case SO_TYPE:
475         case SO_ERROR:
476                 ret = -ENOPROTOOPT;
477                 break;
478         case SO_DONTROUTE:
479                 if (valbool)
480                         sock_set_flag(sk, SOCK_LOCALROUTE);
481                 else
482                         sock_reset_flag(sk, SOCK_LOCALROUTE);
483                 break;
484         case SO_BROADCAST:
485                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
486                 break;
487         case SO_SNDBUF:
488                 /* Don't error on this BSD doesn't and if you think
489                    about it this is right. Otherwise apps have to
490                    play 'guess the biggest size' games. RCVBUF/SNDBUF
491                    are treated in BSD as hints */
492
493                 if (val > sysctl_wmem_max)
494                         val = sysctl_wmem_max;
495 set_sndbuf:
496                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
497                 if ((val * 2) < SOCK_MIN_SNDBUF)
498                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
499                 else
500                         sk->sk_sndbuf = val * 2;
501
502                 /*
503                  *      Wake up sending tasks if we
504                  *      upped the value.
505                  */
506                 sk->sk_write_space(sk);
507                 break;
508
509         case SO_SNDBUFFORCE:
510                 if (!capable(CAP_NET_ADMIN)) {
511                         ret = -EPERM;
512                         break;
513                 }
514                 goto set_sndbuf;
515
516         case SO_RCVBUF:
517                 /* Don't error on this BSD doesn't and if you think
518                    about it this is right. Otherwise apps have to
519                    play 'guess the biggest size' games. RCVBUF/SNDBUF
520                    are treated in BSD as hints */
521
522                 if (val > sysctl_rmem_max)
523                         val = sysctl_rmem_max;
524 set_rcvbuf:
525                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
526                 /*
527                  * We double it on the way in to account for
528                  * "struct sk_buff" etc. overhead.   Applications
529                  * assume that the SO_RCVBUF setting they make will
530                  * allow that much actual data to be received on that
531                  * socket.
532                  *
533                  * Applications are unaware that "struct sk_buff" and
534                  * other overheads allocate from the receive buffer
535                  * during socket buffer allocation.
536                  *
537                  * And after considering the possible alternatives,
538                  * returning the value we actually used in getsockopt
539                  * is the most desirable behavior.
540                  */
541                 if ((val * 2) < SOCK_MIN_RCVBUF)
542                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
543                 else
544                         sk->sk_rcvbuf = val * 2;
545                 break;
546
547         case SO_RCVBUFFORCE:
548                 if (!capable(CAP_NET_ADMIN)) {
549                         ret = -EPERM;
550                         break;
551                 }
552                 goto set_rcvbuf;
553
554         case SO_KEEPALIVE:
555 #ifdef CONFIG_INET
556                 if (sk->sk_protocol == IPPROTO_TCP)
557                         tcp_set_keepalive(sk, valbool);
558 #endif
559                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
560                 break;
561
562         case SO_OOBINLINE:
563                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
564                 break;
565
566         case SO_NO_CHECK:
567                 sk->sk_no_check = valbool;
568                 break;
569
570         case SO_PRIORITY:
571                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
572                         sk->sk_priority = val;
573                 else
574                         ret = -EPERM;
575                 break;
576
577         case SO_LINGER:
578                 if (optlen < sizeof(ling)) {
579                         ret = -EINVAL;  /* 1003.1g */
580                         break;
581                 }
582                 if (copy_from_user(&ling,optval,sizeof(ling))) {
583                         ret = -EFAULT;
584                         break;
585                 }
586                 if (!ling.l_onoff)
587                         sock_reset_flag(sk, SOCK_LINGER);
588                 else {
589 #if (BITS_PER_LONG == 32)
590                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
591                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
592                         else
593 #endif
594                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
595                         sock_set_flag(sk, SOCK_LINGER);
596                 }
597                 break;
598
599         case SO_BSDCOMPAT:
600                 sock_warn_obsolete_bsdism("setsockopt");
601                 break;
602
603         case SO_PASSCRED:
604                 if (valbool)
605                         set_bit(SOCK_PASSCRED, &sock->flags);
606                 else
607                         clear_bit(SOCK_PASSCRED, &sock->flags);
608                 break;
609
610         case SO_TIMESTAMP:
611         case SO_TIMESTAMPNS:
612                 if (valbool)  {
613                         if (optname == SO_TIMESTAMP)
614                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
615                         else
616                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
617                         sock_set_flag(sk, SOCK_RCVTSTAMP);
618                         sock_enable_timestamp(sk);
619                 } else {
620                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
621                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
622                 }
623                 break;
624
625         case SO_RCVLOWAT:
626                 if (val < 0)
627                         val = INT_MAX;
628                 sk->sk_rcvlowat = val ? : 1;
629                 break;
630
631         case SO_RCVTIMEO:
632                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
633                 break;
634
635         case SO_SNDTIMEO:
636                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
637                 break;
638
639         case SO_ATTACH_FILTER:
640                 ret = -EINVAL;
641                 if (optlen == sizeof(struct sock_fprog)) {
642                         struct sock_fprog fprog;
643
644                         ret = -EFAULT;
645                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
646                                 break;
647
648                         ret = sk_attach_filter(&fprog, sk);
649                 }
650                 break;
651
652         case SO_DETACH_FILTER:
653                 rcu_read_lock_bh();
654                 filter = rcu_dereference(sk->sk_filter);
655                 if (filter) {
656                         rcu_assign_pointer(sk->sk_filter, NULL);
657                         sk_filter_release(sk, filter);
658                         rcu_read_unlock_bh();
659                         break;
660                 }
661                 rcu_read_unlock_bh();
662                 ret = -ENONET;
663                 break;
664
665         case SO_PASSSEC:
666                 if (valbool)
667                         set_bit(SOCK_PASSSEC, &sock->flags);
668                 else
669                         clear_bit(SOCK_PASSSEC, &sock->flags);
670                 break;
671
672                 /* We implement the SO_SNDLOWAT etc to
673                    not be settable (1003.1g 5.3) */
674         default:
675                 ret = -ENOPROTOOPT;
676                 break;
677         }
678         release_sock(sk);
679         return ret;
680 }
681
682
683 int sock_getsockopt(struct socket *sock, int level, int optname,
684                     char __user *optval, int __user *optlen)
685 {
686         struct sock *sk = sock->sk;
687
688         union {
689                 int val;
690                 struct linger ling;
691                 struct timeval tm;
692         } v;
693
694         unsigned int lv = sizeof(int);
695         int len;
696
697         if (get_user(len, optlen))
698                 return -EFAULT;
699         if (len < 0)
700                 return -EINVAL;
701
702         switch(optname) {
703         case SO_DEBUG:
704                 v.val = sock_flag(sk, SOCK_DBG);
705                 break;
706
707         case SO_DONTROUTE:
708                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
709                 break;
710
711         case SO_BROADCAST:
712                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
713                 break;
714
715         case SO_SNDBUF:
716                 v.val = sk->sk_sndbuf;
717                 break;
718
719         case SO_RCVBUF:
720                 v.val = sk->sk_rcvbuf;
721                 break;
722
723         case SO_REUSEADDR:
724                 v.val = sk->sk_reuse;
725                 break;
726
727         case SO_KEEPALIVE:
728                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
729                 break;
730
731         case SO_TYPE:
732                 v.val = sk->sk_type;
733                 break;
734
735         case SO_ERROR:
736                 v.val = -sock_error(sk);
737                 if (v.val==0)
738                         v.val = xchg(&sk->sk_err_soft, 0);
739                 break;
740
741         case SO_OOBINLINE:
742                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
743                 break;
744
745         case SO_NO_CHECK:
746                 v.val = sk->sk_no_check;
747                 break;
748
749         case SO_PRIORITY:
750                 v.val = sk->sk_priority;
751                 break;
752
753         case SO_LINGER:
754                 lv              = sizeof(v.ling);
755                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
756                 v.ling.l_linger = sk->sk_lingertime / HZ;
757                 break;
758
759         case SO_BSDCOMPAT:
760                 sock_warn_obsolete_bsdism("getsockopt");
761                 break;
762
763         case SO_TIMESTAMP:
764                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
765                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
766                 break;
767
768         case SO_TIMESTAMPNS:
769                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
770                 break;
771
772         case SO_RCVTIMEO:
773                 lv=sizeof(struct timeval);
774                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
775                         v.tm.tv_sec = 0;
776                         v.tm.tv_usec = 0;
777                 } else {
778                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
779                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
780                 }
781                 break;
782
783         case SO_SNDTIMEO:
784                 lv=sizeof(struct timeval);
785                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
786                         v.tm.tv_sec = 0;
787                         v.tm.tv_usec = 0;
788                 } else {
789                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
790                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
791                 }
792                 break;
793
794         case SO_RCVLOWAT:
795                 v.val = sk->sk_rcvlowat;
796                 break;
797
798         case SO_SNDLOWAT:
799                 v.val=1;
800                 break;
801
802         case SO_PASSCRED:
803                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
804                 break;
805
806         case SO_PEERCRED:
807                 if (len > sizeof(sk->sk_peercred))
808                         len = sizeof(sk->sk_peercred);
809                 if (copy_to_user(optval, &sk->sk_peercred, len))
810                         return -EFAULT;
811                 goto lenout;
812
813         case SO_PEERNAME:
814         {
815                 char address[128];
816
817                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
818                         return -ENOTCONN;
819                 if (lv < len)
820                         return -EINVAL;
821                 if (copy_to_user(optval, address, len))
822                         return -EFAULT;
823                 goto lenout;
824         }
825
826         /* Dubious BSD thing... Probably nobody even uses it, but
827          * the UNIX standard wants it for whatever reason... -DaveM
828          */
829         case SO_ACCEPTCONN:
830                 v.val = sk->sk_state == TCP_LISTEN;
831                 break;
832
833         case SO_PASSSEC:
834                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
835                 break;
836
837         case SO_PEERSEC:
838                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
839
840         default:
841                 return -ENOPROTOOPT;
842         }
843
844         if (len > lv)
845                 len = lv;
846         if (copy_to_user(optval, &v, len))
847                 return -EFAULT;
848 lenout:
849         if (put_user(len, optlen))
850                 return -EFAULT;
851         return 0;
852 }
853
854 /*
855  * Initialize an sk_lock.
856  *
857  * (We also register the sk_lock with the lock validator.)
858  */
859 static inline void sock_lock_init(struct sock *sk)
860 {
861         sock_lock_init_class_and_name(sk,
862                         af_family_slock_key_strings[sk->sk_family],
863                         af_family_slock_keys + sk->sk_family,
864                         af_family_key_strings[sk->sk_family],
865                         af_family_keys + sk->sk_family);
866 }
867
868 /**
869  *      sk_alloc - All socket objects are allocated here
870  *      @family: protocol family
871  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
872  *      @prot: struct proto associated with this new sock instance
873  *      @zero_it: if we should zero the newly allocated sock
874  */
875 struct sock *sk_alloc(int family, gfp_t priority,
876                       struct proto *prot, int zero_it)
877 {
878         struct sock *sk = NULL;
879         struct kmem_cache *slab = prot->slab;
880
881         if (slab != NULL)
882                 sk = kmem_cache_alloc(slab, priority);
883         else
884                 sk = kmalloc(prot->obj_size, priority);
885
886         if (sk) {
887                 if (zero_it) {
888                         memset(sk, 0, prot->obj_size);
889                         sk->sk_family = family;
890                         /*
891                          * See comment in struct sock definition to understand
892                          * why we need sk_prot_creator -acme
893                          */
894                         sk->sk_prot = sk->sk_prot_creator = prot;
895                         sock_lock_init(sk);
896                 }
897
898                 if (security_sk_alloc(sk, family, priority))
899                         goto out_free;
900
901                 if (!try_module_get(prot->owner))
902                         goto out_free;
903         }
904         return sk;
905
906 out_free:
907         if (slab != NULL)
908                 kmem_cache_free(slab, sk);
909         else
910                 kfree(sk);
911         return NULL;
912 }
913
914 void sk_free(struct sock *sk)
915 {
916         struct sk_filter *filter;
917         struct module *owner = sk->sk_prot_creator->owner;
918
919         if (sk->sk_destruct)
920                 sk->sk_destruct(sk);
921
922         filter = rcu_dereference(sk->sk_filter);
923         if (filter) {
924                 sk_filter_release(sk, filter);
925                 rcu_assign_pointer(sk->sk_filter, NULL);
926         }
927
928         sock_disable_timestamp(sk);
929
930         if (atomic_read(&sk->sk_omem_alloc))
931                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
932                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
933
934         security_sk_free(sk);
935         if (sk->sk_prot_creator->slab != NULL)
936                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
937         else
938                 kfree(sk);
939         module_put(owner);
940 }
941
942 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
943 {
944         struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
945
946         if (newsk != NULL) {
947                 struct sk_filter *filter;
948
949                 sock_copy(newsk, sk);
950
951                 /* SANITY */
952                 sk_node_init(&newsk->sk_node);
953                 sock_lock_init(newsk);
954                 bh_lock_sock(newsk);
955                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
956
957                 atomic_set(&newsk->sk_rmem_alloc, 0);
958                 atomic_set(&newsk->sk_wmem_alloc, 0);
959                 atomic_set(&newsk->sk_omem_alloc, 0);
960                 skb_queue_head_init(&newsk->sk_receive_queue);
961                 skb_queue_head_init(&newsk->sk_write_queue);
962 #ifdef CONFIG_NET_DMA
963                 skb_queue_head_init(&newsk->sk_async_wait_queue);
964 #endif
965
966                 rwlock_init(&newsk->sk_dst_lock);
967                 rwlock_init(&newsk->sk_callback_lock);
968                 lockdep_set_class_and_name(&newsk->sk_callback_lock,
969                                 af_callback_keys + newsk->sk_family,
970                                 af_family_clock_key_strings[newsk->sk_family]);
971
972                 newsk->sk_dst_cache     = NULL;
973                 newsk->sk_wmem_queued   = 0;
974                 newsk->sk_forward_alloc = 0;
975                 newsk->sk_send_head     = NULL;
976                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
977
978                 sock_reset_flag(newsk, SOCK_DONE);
979                 skb_queue_head_init(&newsk->sk_error_queue);
980
981                 filter = newsk->sk_filter;
982                 if (filter != NULL)
983                         sk_filter_charge(newsk, filter);
984
985                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
986                         /* It is still raw copy of parent, so invalidate
987                          * destructor and make plain sk_free() */
988                         newsk->sk_destruct = NULL;
989                         sk_free(newsk);
990                         newsk = NULL;
991                         goto out;
992                 }
993
994                 newsk->sk_err      = 0;
995                 newsk->sk_priority = 0;
996                 atomic_set(&newsk->sk_refcnt, 2);
997
998                 /*
999                  * Increment the counter in the same struct proto as the master
1000                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1001                  * is the same as sk->sk_prot->socks, as this field was copied
1002                  * with memcpy).
1003                  *
1004                  * This _changes_ the previous behaviour, where
1005                  * tcp_create_openreq_child always was incrementing the
1006                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1007                  * to be taken into account in all callers. -acme
1008                  */
1009                 sk_refcnt_debug_inc(newsk);
1010                 newsk->sk_socket = NULL;
1011                 newsk->sk_sleep  = NULL;
1012
1013                 if (newsk->sk_prot->sockets_allocated)
1014                         atomic_inc(newsk->sk_prot->sockets_allocated);
1015         }
1016 out:
1017         return newsk;
1018 }
1019
1020 EXPORT_SYMBOL_GPL(sk_clone);
1021
1022 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1023 {
1024         __sk_dst_set(sk, dst);
1025         sk->sk_route_caps = dst->dev->features;
1026         if (sk->sk_route_caps & NETIF_F_GSO)
1027                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1028         if (sk_can_gso(sk)) {
1029                 if (dst->header_len)
1030                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1031                 else
1032                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1033         }
1034 }
1035 EXPORT_SYMBOL_GPL(sk_setup_caps);
1036
1037 void __init sk_init(void)
1038 {
1039         if (num_physpages <= 4096) {
1040                 sysctl_wmem_max = 32767;
1041                 sysctl_rmem_max = 32767;
1042                 sysctl_wmem_default = 32767;
1043                 sysctl_rmem_default = 32767;
1044         } else if (num_physpages >= 131072) {
1045                 sysctl_wmem_max = 131071;
1046                 sysctl_rmem_max = 131071;
1047         }
1048 }
1049
1050 /*
1051  *      Simple resource managers for sockets.
1052  */
1053
1054
1055 /*
1056  * Write buffer destructor automatically called from kfree_skb.
1057  */
1058 void sock_wfree(struct sk_buff *skb)
1059 {
1060         struct sock *sk = skb->sk;
1061
1062         /* In case it might be waiting for more memory. */
1063         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1064         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1065                 sk->sk_write_space(sk);
1066         sock_put(sk);
1067 }
1068
1069 /*
1070  * Read buffer destructor automatically called from kfree_skb.
1071  */
1072 void sock_rfree(struct sk_buff *skb)
1073 {
1074         struct sock *sk = skb->sk;
1075
1076         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1077 }
1078
1079
1080 int sock_i_uid(struct sock *sk)
1081 {
1082         int uid;
1083
1084         read_lock(&sk->sk_callback_lock);
1085         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1086         read_unlock(&sk->sk_callback_lock);
1087         return uid;
1088 }
1089
1090 unsigned long sock_i_ino(struct sock *sk)
1091 {
1092         unsigned long ino;
1093
1094         read_lock(&sk->sk_callback_lock);
1095         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1096         read_unlock(&sk->sk_callback_lock);
1097         return ino;
1098 }
1099
1100 /*
1101  * Allocate a skb from the socket's send buffer.
1102  */
1103 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1104                              gfp_t priority)
1105 {
1106         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1107                 struct sk_buff * skb = alloc_skb(size, priority);
1108                 if (skb) {
1109                         skb_set_owner_w(skb, sk);
1110                         return skb;
1111                 }
1112         }
1113         return NULL;
1114 }
1115
1116 /*
1117  * Allocate a skb from the socket's receive buffer.
1118  */
1119 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1120                              gfp_t priority)
1121 {
1122         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1123                 struct sk_buff *skb = alloc_skb(size, priority);
1124                 if (skb) {
1125                         skb_set_owner_r(skb, sk);
1126                         return skb;
1127                 }
1128         }
1129         return NULL;
1130 }
1131
1132 /*
1133  * Allocate a memory block from the socket's option memory buffer.
1134  */
1135 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1136 {
1137         if ((unsigned)size <= sysctl_optmem_max &&
1138             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1139                 void *mem;
1140                 /* First do the add, to avoid the race if kmalloc
1141                  * might sleep.
1142                  */
1143                 atomic_add(size, &sk->sk_omem_alloc);
1144                 mem = kmalloc(size, priority);
1145                 if (mem)
1146                         return mem;
1147                 atomic_sub(size, &sk->sk_omem_alloc);
1148         }
1149         return NULL;
1150 }
1151
1152 /*
1153  * Free an option memory block.
1154  */
1155 void sock_kfree_s(struct sock *sk, void *mem, int size)
1156 {
1157         kfree(mem);
1158         atomic_sub(size, &sk->sk_omem_alloc);
1159 }
1160
1161 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1162    I think, these locks should be removed for datagram sockets.
1163  */
1164 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1165 {
1166         DEFINE_WAIT(wait);
1167
1168         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1169         for (;;) {
1170                 if (!timeo)
1171                         break;
1172                 if (signal_pending(current))
1173                         break;
1174                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1175                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1176                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1177                         break;
1178                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1179                         break;
1180                 if (sk->sk_err)
1181                         break;
1182                 timeo = schedule_timeout(timeo);
1183         }
1184         finish_wait(sk->sk_sleep, &wait);
1185         return timeo;
1186 }
1187
1188
1189 /*
1190  *      Generic send/receive buffer handlers
1191  */
1192
1193 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1194                                             unsigned long header_len,
1195                                             unsigned long data_len,
1196                                             int noblock, int *errcode)
1197 {
1198         struct sk_buff *skb;
1199         gfp_t gfp_mask;
1200         long timeo;
1201         int err;
1202
1203         gfp_mask = sk->sk_allocation;
1204         if (gfp_mask & __GFP_WAIT)
1205                 gfp_mask |= __GFP_REPEAT;
1206
1207         timeo = sock_sndtimeo(sk, noblock);
1208         while (1) {
1209                 err = sock_error(sk);
1210                 if (err != 0)
1211                         goto failure;
1212
1213                 err = -EPIPE;
1214                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1215                         goto failure;
1216
1217                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1218                         skb = alloc_skb(header_len, gfp_mask);
1219                         if (skb) {
1220                                 int npages;
1221                                 int i;
1222
1223                                 /* No pages, we're done... */
1224                                 if (!data_len)
1225                                         break;
1226
1227                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1228                                 skb->truesize += data_len;
1229                                 skb_shinfo(skb)->nr_frags = npages;
1230                                 for (i = 0; i < npages; i++) {
1231                                         struct page *page;
1232                                         skb_frag_t *frag;
1233
1234                                         page = alloc_pages(sk->sk_allocation, 0);
1235                                         if (!page) {
1236                                                 err = -ENOBUFS;
1237                                                 skb_shinfo(skb)->nr_frags = i;
1238                                                 kfree_skb(skb);
1239                                                 goto failure;
1240                                         }
1241
1242                                         frag = &skb_shinfo(skb)->frags[i];
1243                                         frag->page = page;
1244                                         frag->page_offset = 0;
1245                                         frag->size = (data_len >= PAGE_SIZE ?
1246                                                       PAGE_SIZE :
1247                                                       data_len);
1248                                         data_len -= PAGE_SIZE;
1249                                 }
1250
1251                                 /* Full success... */
1252                                 break;
1253                         }
1254                         err = -ENOBUFS;
1255                         goto failure;
1256                 }
1257                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1258                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1259                 err = -EAGAIN;
1260                 if (!timeo)
1261                         goto failure;
1262                 if (signal_pending(current))
1263                         goto interrupted;
1264                 timeo = sock_wait_for_wmem(sk, timeo);
1265         }
1266
1267         skb_set_owner_w(skb, sk);
1268         return skb;
1269
1270 interrupted:
1271         err = sock_intr_errno(timeo);
1272 failure:
1273         *errcode = err;
1274         return NULL;
1275 }
1276
1277 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1278                                     int noblock, int *errcode)
1279 {
1280         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1281 }
1282
1283 static void __lock_sock(struct sock *sk)
1284 {
1285         DEFINE_WAIT(wait);
1286
1287         for (;;) {
1288                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1289                                         TASK_UNINTERRUPTIBLE);
1290                 spin_unlock_bh(&sk->sk_lock.slock);
1291                 schedule();
1292                 spin_lock_bh(&sk->sk_lock.slock);
1293                 if (!sock_owned_by_user(sk))
1294                         break;
1295         }
1296         finish_wait(&sk->sk_lock.wq, &wait);
1297 }
1298
1299 static void __release_sock(struct sock *sk)
1300 {
1301         struct sk_buff *skb = sk->sk_backlog.head;
1302
1303         do {
1304                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1305                 bh_unlock_sock(sk);
1306
1307                 do {
1308                         struct sk_buff *next = skb->next;
1309
1310                         skb->next = NULL;
1311                         sk->sk_backlog_rcv(sk, skb);
1312
1313                         /*
1314                          * We are in process context here with softirqs
1315                          * disabled, use cond_resched_softirq() to preempt.
1316                          * This is safe to do because we've taken the backlog
1317                          * queue private:
1318                          */
1319                         cond_resched_softirq();
1320
1321                         skb = next;
1322                 } while (skb != NULL);
1323
1324                 bh_lock_sock(sk);
1325         } while ((skb = sk->sk_backlog.head) != NULL);
1326 }
1327
1328 /**
1329  * sk_wait_data - wait for data to arrive at sk_receive_queue
1330  * @sk:    sock to wait on
1331  * @timeo: for how long
1332  *
1333  * Now socket state including sk->sk_err is changed only under lock,
1334  * hence we may omit checks after joining wait queue.
1335  * We check receive queue before schedule() only as optimization;
1336  * it is very likely that release_sock() added new data.
1337  */
1338 int sk_wait_data(struct sock *sk, long *timeo)
1339 {
1340         int rc;
1341         DEFINE_WAIT(wait);
1342
1343         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1344         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1345         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1346         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1347         finish_wait(sk->sk_sleep, &wait);
1348         return rc;
1349 }
1350
1351 EXPORT_SYMBOL(sk_wait_data);
1352
1353 /*
1354  * Set of default routines for initialising struct proto_ops when
1355  * the protocol does not support a particular function. In certain
1356  * cases where it makes no sense for a protocol to have a "do nothing"
1357  * function, some default processing is provided.
1358  */
1359
1360 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1361 {
1362         return -EOPNOTSUPP;
1363 }
1364
1365 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1366                     int len, int flags)
1367 {
1368         return -EOPNOTSUPP;
1369 }
1370
1371 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1372 {
1373         return -EOPNOTSUPP;
1374 }
1375
1376 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1377 {
1378         return -EOPNOTSUPP;
1379 }
1380
1381 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1382                     int *len, int peer)
1383 {
1384         return -EOPNOTSUPP;
1385 }
1386
1387 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1388 {
1389         return 0;
1390 }
1391
1392 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1393 {
1394         return -EOPNOTSUPP;
1395 }
1396
1397 int sock_no_listen(struct socket *sock, int backlog)
1398 {
1399         return -EOPNOTSUPP;
1400 }
1401
1402 int sock_no_shutdown(struct socket *sock, int how)
1403 {
1404         return -EOPNOTSUPP;
1405 }
1406
1407 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1408                     char __user *optval, int optlen)
1409 {
1410         return -EOPNOTSUPP;
1411 }
1412
1413 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1414                     char __user *optval, int __user *optlen)
1415 {
1416         return -EOPNOTSUPP;
1417 }
1418
1419 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1420                     size_t len)
1421 {
1422         return -EOPNOTSUPP;
1423 }
1424
1425 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1426                     size_t len, int flags)
1427 {
1428         return -EOPNOTSUPP;
1429 }
1430
1431 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1432 {
1433         /* Mirror missing mmap method error code */
1434         return -ENODEV;
1435 }
1436
1437 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1438 {
1439         ssize_t res;
1440         struct msghdr msg = {.msg_flags = flags};
1441         struct kvec iov;
1442         char *kaddr = kmap(page);
1443         iov.iov_base = kaddr + offset;
1444         iov.iov_len = size;
1445         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1446         kunmap(page);
1447         return res;
1448 }
1449
1450 /*
1451  *      Default Socket Callbacks
1452  */
1453
1454 static void sock_def_wakeup(struct sock *sk)
1455 {
1456         read_lock(&sk->sk_callback_lock);
1457         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1458                 wake_up_interruptible_all(sk->sk_sleep);
1459         read_unlock(&sk->sk_callback_lock);
1460 }
1461
1462 static void sock_def_error_report(struct sock *sk)
1463 {
1464         read_lock(&sk->sk_callback_lock);
1465         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1466                 wake_up_interruptible(sk->sk_sleep);
1467         sk_wake_async(sk,0,POLL_ERR);
1468         read_unlock(&sk->sk_callback_lock);
1469 }
1470
1471 static void sock_def_readable(struct sock *sk, int len)
1472 {
1473         read_lock(&sk->sk_callback_lock);
1474         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1475                 wake_up_interruptible(sk->sk_sleep);
1476         sk_wake_async(sk,1,POLL_IN);
1477         read_unlock(&sk->sk_callback_lock);
1478 }
1479
1480 static void sock_def_write_space(struct sock *sk)
1481 {
1482         read_lock(&sk->sk_callback_lock);
1483
1484         /* Do not wake up a writer until he can make "significant"
1485          * progress.  --DaveM
1486          */
1487         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1488                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1489                         wake_up_interruptible(sk->sk_sleep);
1490
1491                 /* Should agree with poll, otherwise some programs break */
1492                 if (sock_writeable(sk))
1493                         sk_wake_async(sk, 2, POLL_OUT);
1494         }
1495
1496         read_unlock(&sk->sk_callback_lock);
1497 }
1498
1499 static void sock_def_destruct(struct sock *sk)
1500 {
1501         kfree(sk->sk_protinfo);
1502 }
1503
1504 void sk_send_sigurg(struct sock *sk)
1505 {
1506         if (sk->sk_socket && sk->sk_socket->file)
1507                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1508                         sk_wake_async(sk, 3, POLL_PRI);
1509 }
1510
1511 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1512                     unsigned long expires)
1513 {
1514         if (!mod_timer(timer, expires))
1515                 sock_hold(sk);
1516 }
1517
1518 EXPORT_SYMBOL(sk_reset_timer);
1519
1520 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1521 {
1522         if (timer_pending(timer) && del_timer(timer))
1523                 __sock_put(sk);
1524 }
1525
1526 EXPORT_SYMBOL(sk_stop_timer);
1527
1528 void sock_init_data(struct socket *sock, struct sock *sk)
1529 {
1530         skb_queue_head_init(&sk->sk_receive_queue);
1531         skb_queue_head_init(&sk->sk_write_queue);
1532         skb_queue_head_init(&sk->sk_error_queue);
1533 #ifdef CONFIG_NET_DMA
1534         skb_queue_head_init(&sk->sk_async_wait_queue);
1535 #endif
1536
1537         sk->sk_send_head        =       NULL;
1538
1539         init_timer(&sk->sk_timer);
1540
1541         sk->sk_allocation       =       GFP_KERNEL;
1542         sk->sk_rcvbuf           =       sysctl_rmem_default;
1543         sk->sk_sndbuf           =       sysctl_wmem_default;
1544         sk->sk_state            =       TCP_CLOSE;
1545         sk->sk_socket           =       sock;
1546
1547         sock_set_flag(sk, SOCK_ZAPPED);
1548
1549         if (sock) {
1550                 sk->sk_type     =       sock->type;
1551                 sk->sk_sleep    =       &sock->wait;
1552                 sock->sk        =       sk;
1553         } else
1554                 sk->sk_sleep    =       NULL;
1555
1556         rwlock_init(&sk->sk_dst_lock);
1557         rwlock_init(&sk->sk_callback_lock);
1558         lockdep_set_class_and_name(&sk->sk_callback_lock,
1559                         af_callback_keys + sk->sk_family,
1560                         af_family_clock_key_strings[sk->sk_family]);
1561
1562         sk->sk_state_change     =       sock_def_wakeup;
1563         sk->sk_data_ready       =       sock_def_readable;
1564         sk->sk_write_space      =       sock_def_write_space;
1565         sk->sk_error_report     =       sock_def_error_report;
1566         sk->sk_destruct         =       sock_def_destruct;
1567
1568         sk->sk_sndmsg_page      =       NULL;
1569         sk->sk_sndmsg_off       =       0;
1570
1571         sk->sk_peercred.pid     =       0;
1572         sk->sk_peercred.uid     =       -1;
1573         sk->sk_peercred.gid     =       -1;
1574         sk->sk_write_pending    =       0;
1575         sk->sk_rcvlowat         =       1;
1576         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1577         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1578
1579         sk->sk_stamp = ktime_set(-1L, -1L);
1580
1581         atomic_set(&sk->sk_refcnt, 1);
1582 }
1583
1584 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1585 {
1586         might_sleep();
1587         spin_lock_bh(&sk->sk_lock.slock);
1588         if (sk->sk_lock.owner)
1589                 __lock_sock(sk);
1590         sk->sk_lock.owner = (void *)1;
1591         spin_unlock(&sk->sk_lock.slock);
1592         /*
1593          * The sk_lock has mutex_lock() semantics here:
1594          */
1595         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1596         local_bh_enable();
1597 }
1598
1599 EXPORT_SYMBOL(lock_sock_nested);
1600
1601 void fastcall release_sock(struct sock *sk)
1602 {
1603         /*
1604          * The sk_lock has mutex_unlock() semantics:
1605          */
1606         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1607
1608         spin_lock_bh(&sk->sk_lock.slock);
1609         if (sk->sk_backlog.tail)
1610                 __release_sock(sk);
1611         sk->sk_lock.owner = NULL;
1612         if (waitqueue_active(&sk->sk_lock.wq))
1613                 wake_up(&sk->sk_lock.wq);
1614         spin_unlock_bh(&sk->sk_lock.slock);
1615 }
1616 EXPORT_SYMBOL(release_sock);
1617
1618 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1619 {
1620         struct timeval tv;
1621         if (!sock_flag(sk, SOCK_TIMESTAMP))
1622                 sock_enable_timestamp(sk);
1623         tv = ktime_to_timeval(sk->sk_stamp);
1624         if (tv.tv_sec == -1)
1625                 return -ENOENT;
1626         if (tv.tv_sec == 0) {
1627                 sk->sk_stamp = ktime_get_real();
1628                 tv = ktime_to_timeval(sk->sk_stamp);
1629         }
1630         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1631 }
1632 EXPORT_SYMBOL(sock_get_timestamp);
1633
1634 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1635 {
1636         struct timespec ts;
1637         if (!sock_flag(sk, SOCK_TIMESTAMP))
1638                 sock_enable_timestamp(sk);
1639         ts = ktime_to_timespec(sk->sk_stamp);
1640         if (ts.tv_sec == -1)
1641                 return -ENOENT;
1642         if (ts.tv_sec == 0) {
1643                 sk->sk_stamp = ktime_get_real();
1644                 ts = ktime_to_timespec(sk->sk_stamp);
1645         }
1646         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1647 }
1648 EXPORT_SYMBOL(sock_get_timestampns);
1649
1650 void sock_enable_timestamp(struct sock *sk)
1651 {
1652         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1653                 sock_set_flag(sk, SOCK_TIMESTAMP);
1654                 net_enable_timestamp();
1655         }
1656 }
1657 EXPORT_SYMBOL(sock_enable_timestamp);
1658
1659 /*
1660  *      Get a socket option on an socket.
1661  *
1662  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1663  *      asynchronous errors should be reported by getsockopt. We assume
1664  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1665  */
1666 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1667                            char __user *optval, int __user *optlen)
1668 {
1669         struct sock *sk = sock->sk;
1670
1671         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1672 }
1673
1674 EXPORT_SYMBOL(sock_common_getsockopt);
1675
1676 #ifdef CONFIG_COMPAT
1677 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1678                                   char __user *optval, int __user *optlen)
1679 {
1680         struct sock *sk = sock->sk;
1681
1682         if (sk->sk_prot->compat_getsockopt != NULL)
1683                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1684                                                       optval, optlen);
1685         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1686 }
1687 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1688 #endif
1689
1690 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1691                         struct msghdr *msg, size_t size, int flags)
1692 {
1693         struct sock *sk = sock->sk;
1694         int addr_len = 0;
1695         int err;
1696
1697         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1698                                    flags & ~MSG_DONTWAIT, &addr_len);
1699         if (err >= 0)
1700                 msg->msg_namelen = addr_len;
1701         return err;
1702 }
1703
1704 EXPORT_SYMBOL(sock_common_recvmsg);
1705
1706 /*
1707  *      Set socket options on an inet socket.
1708  */
1709 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1710                            char __user *optval, int optlen)
1711 {
1712         struct sock *sk = sock->sk;
1713
1714         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1715 }
1716
1717 EXPORT_SYMBOL(sock_common_setsockopt);
1718
1719 #ifdef CONFIG_COMPAT
1720 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1721                                   char __user *optval, int optlen)
1722 {
1723         struct sock *sk = sock->sk;
1724
1725         if (sk->sk_prot->compat_setsockopt != NULL)
1726                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1727                                                       optval, optlen);
1728         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1729 }
1730 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1731 #endif
1732
1733 void sk_common_release(struct sock *sk)
1734 {
1735         if (sk->sk_prot->destroy)
1736                 sk->sk_prot->destroy(sk);
1737
1738         /*
1739          * Observation: when sock_common_release is called, processes have
1740          * no access to socket. But net still has.
1741          * Step one, detach it from networking:
1742          *
1743          * A. Remove from hash tables.
1744          */
1745
1746         sk->sk_prot->unhash(sk);
1747
1748         /*
1749          * In this point socket cannot receive new packets, but it is possible
1750          * that some packets are in flight because some CPU runs receiver and
1751          * did hash table lookup before we unhashed socket. They will achieve
1752          * receive queue and will be purged by socket destructor.
1753          *
1754          * Also we still have packets pending on receive queue and probably,
1755          * our own packets waiting in device queues. sock_destroy will drain
1756          * receive queue, but transmitted packets will delay socket destruction
1757          * until the last reference will be released.
1758          */
1759
1760         sock_orphan(sk);
1761
1762         xfrm_sk_free_policy(sk);
1763
1764         sk_refcnt_debug_release(sk);
1765         sock_put(sk);
1766 }
1767
1768 EXPORT_SYMBOL(sk_common_release);
1769
1770 static DEFINE_RWLOCK(proto_list_lock);
1771 static LIST_HEAD(proto_list);
1772
1773 int proto_register(struct proto *prot, int alloc_slab)
1774 {
1775         char *request_sock_slab_name = NULL;
1776         char *timewait_sock_slab_name;
1777         int rc = -ENOBUFS;
1778
1779         if (alloc_slab) {
1780                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1781                                                SLAB_HWCACHE_ALIGN, NULL);
1782
1783                 if (prot->slab == NULL) {
1784                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1785                                prot->name);
1786                         goto out;
1787                 }
1788
1789                 if (prot->rsk_prot != NULL) {
1790                         static const char mask[] = "request_sock_%s";
1791
1792                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1793                         if (request_sock_slab_name == NULL)
1794                                 goto out_free_sock_slab;
1795
1796                         sprintf(request_sock_slab_name, mask, prot->name);
1797                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1798                                                                  prot->rsk_prot->obj_size, 0,
1799                                                                  SLAB_HWCACHE_ALIGN, NULL);
1800
1801                         if (prot->rsk_prot->slab == NULL) {
1802                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1803                                        prot->name);
1804                                 goto out_free_request_sock_slab_name;
1805                         }
1806                 }
1807
1808                 if (prot->twsk_prot != NULL) {
1809                         static const char mask[] = "tw_sock_%s";
1810
1811                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1812
1813                         if (timewait_sock_slab_name == NULL)
1814                                 goto out_free_request_sock_slab;
1815
1816                         sprintf(timewait_sock_slab_name, mask, prot->name);
1817                         prot->twsk_prot->twsk_slab =
1818                                 kmem_cache_create(timewait_sock_slab_name,
1819                                                   prot->twsk_prot->twsk_obj_size,
1820                                                   0, SLAB_HWCACHE_ALIGN,
1821                                                   NULL);
1822                         if (prot->twsk_prot->twsk_slab == NULL)
1823                                 goto out_free_timewait_sock_slab_name;
1824                 }
1825         }
1826
1827         write_lock(&proto_list_lock);
1828         list_add(&prot->node, &proto_list);
1829         write_unlock(&proto_list_lock);
1830         rc = 0;
1831 out:
1832         return rc;
1833 out_free_timewait_sock_slab_name:
1834         kfree(timewait_sock_slab_name);
1835 out_free_request_sock_slab:
1836         if (prot->rsk_prot && prot->rsk_prot->slab) {
1837                 kmem_cache_destroy(prot->rsk_prot->slab);
1838                 prot->rsk_prot->slab = NULL;
1839         }
1840 out_free_request_sock_slab_name:
1841         kfree(request_sock_slab_name);
1842 out_free_sock_slab:
1843         kmem_cache_destroy(prot->slab);
1844         prot->slab = NULL;
1845         goto out;
1846 }
1847
1848 EXPORT_SYMBOL(proto_register);
1849
1850 void proto_unregister(struct proto *prot)
1851 {
1852         write_lock(&proto_list_lock);
1853         list_del(&prot->node);
1854         write_unlock(&proto_list_lock);
1855
1856         if (prot->slab != NULL) {
1857                 kmem_cache_destroy(prot->slab);
1858                 prot->slab = NULL;
1859         }
1860
1861         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1862                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1863
1864                 kmem_cache_destroy(prot->rsk_prot->slab);
1865                 kfree(name);
1866                 prot->rsk_prot->slab = NULL;
1867         }
1868
1869         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1870                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1871
1872                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1873                 kfree(name);
1874                 prot->twsk_prot->twsk_slab = NULL;
1875         }
1876 }
1877
1878 EXPORT_SYMBOL(proto_unregister);
1879
1880 #ifdef CONFIG_PROC_FS
1881 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1882 {
1883         read_lock(&proto_list_lock);
1884         return seq_list_start_head(&proto_list, *pos);
1885 }
1886
1887 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1888 {
1889         return seq_list_next(v, &proto_list, pos);
1890 }
1891
1892 static void proto_seq_stop(struct seq_file *seq, void *v)
1893 {
1894         read_unlock(&proto_list_lock);
1895 }
1896
1897 static char proto_method_implemented(const void *method)
1898 {
1899         return method == NULL ? 'n' : 'y';
1900 }
1901
1902 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1903 {
1904         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1905                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1906                    proto->name,
1907                    proto->obj_size,
1908                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1909                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1910                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1911                    proto->max_header,
1912                    proto->slab == NULL ? "no" : "yes",
1913                    module_name(proto->owner),
1914                    proto_method_implemented(proto->close),
1915                    proto_method_implemented(proto->connect),
1916                    proto_method_implemented(proto->disconnect),
1917                    proto_method_implemented(proto->accept),
1918                    proto_method_implemented(proto->ioctl),
1919                    proto_method_implemented(proto->init),
1920                    proto_method_implemented(proto->destroy),
1921                    proto_method_implemented(proto->shutdown),
1922                    proto_method_implemented(proto->setsockopt),
1923                    proto_method_implemented(proto->getsockopt),
1924                    proto_method_implemented(proto->sendmsg),
1925                    proto_method_implemented(proto->recvmsg),
1926                    proto_method_implemented(proto->sendpage),
1927                    proto_method_implemented(proto->bind),
1928                    proto_method_implemented(proto->backlog_rcv),
1929                    proto_method_implemented(proto->hash),
1930                    proto_method_implemented(proto->unhash),
1931                    proto_method_implemented(proto->get_port),
1932                    proto_method_implemented(proto->enter_memory_pressure));
1933 }
1934
1935 static int proto_seq_show(struct seq_file *seq, void *v)
1936 {
1937         if (v == &proto_list)
1938                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1939                            "protocol",
1940                            "size",
1941                            "sockets",
1942                            "memory",
1943                            "press",
1944                            "maxhdr",
1945                            "slab",
1946                            "module",
1947                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1948         else
1949                 proto_seq_printf(seq, list_entry(v, struct proto, node));
1950         return 0;
1951 }
1952
1953 static const struct seq_operations proto_seq_ops = {
1954         .start  = proto_seq_start,
1955         .next   = proto_seq_next,
1956         .stop   = proto_seq_stop,
1957         .show   = proto_seq_show,
1958 };
1959
1960 static int proto_seq_open(struct inode *inode, struct file *file)
1961 {
1962         return seq_open(file, &proto_seq_ops);
1963 }
1964
1965 static const struct file_operations proto_seq_fops = {
1966         .owner          = THIS_MODULE,
1967         .open           = proto_seq_open,
1968         .read           = seq_read,
1969         .llseek         = seq_lseek,
1970         .release        = seq_release,
1971 };
1972
1973 static int __init proto_init(void)
1974 {
1975         /* register /proc/net/protocols */
1976         return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1977 }
1978
1979 subsys_initcall(proto_init);
1980
1981 #endif /* PROC_FS */
1982
1983 EXPORT_SYMBOL(sk_alloc);
1984 EXPORT_SYMBOL(sk_free);
1985 EXPORT_SYMBOL(sk_send_sigurg);
1986 EXPORT_SYMBOL(sock_alloc_send_skb);
1987 EXPORT_SYMBOL(sock_init_data);
1988 EXPORT_SYMBOL(sock_kfree_s);
1989 EXPORT_SYMBOL(sock_kmalloc);
1990 EXPORT_SYMBOL(sock_no_accept);
1991 EXPORT_SYMBOL(sock_no_bind);
1992 EXPORT_SYMBOL(sock_no_connect);
1993 EXPORT_SYMBOL(sock_no_getname);
1994 EXPORT_SYMBOL(sock_no_getsockopt);
1995 EXPORT_SYMBOL(sock_no_ioctl);
1996 EXPORT_SYMBOL(sock_no_listen);
1997 EXPORT_SYMBOL(sock_no_mmap);
1998 EXPORT_SYMBOL(sock_no_poll);
1999 EXPORT_SYMBOL(sock_no_recvmsg);
2000 EXPORT_SYMBOL(sock_no_sendmsg);
2001 EXPORT_SYMBOL(sock_no_sendpage);
2002 EXPORT_SYMBOL(sock_no_setsockopt);
2003 EXPORT_SYMBOL(sock_no_shutdown);
2004 EXPORT_SYMBOL(sock_no_socketpair);
2005 EXPORT_SYMBOL(sock_rfree);
2006 EXPORT_SYMBOL(sock_setsockopt);
2007 EXPORT_SYMBOL(sock_wfree);
2008 EXPORT_SYMBOL(sock_wmalloc);
2009 EXPORT_SYMBOL(sock_i_uid);
2010 EXPORT_SYMBOL(sock_i_ino);
2011 EXPORT_SYMBOL(sysctl_optmem_max);
2012 #ifdef CONFIG_SYSCTL
2013 EXPORT_SYMBOL(sysctl_rmem_max);
2014 EXPORT_SYMBOL(sysctl_wmem_max);
2015 #endif