Merge master.kernel.org:/pub/scm/linux/kernel/git/herbert/crypto-2.6
[linux-2.6] / net / core / sock.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              Generic socket support routines. Memory allocators, socket lock/release
7  *              handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:     Ross Biro
13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *              Florian La Roche, <flla@stud.uni-sb.de>
15  *              Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *              Alan Cox        :       Numerous verify_area() problems
19  *              Alan Cox        :       Connecting on a connecting socket
20  *                                      now returns an error for tcp.
21  *              Alan Cox        :       sock->protocol is set correctly.
22  *                                      and is not sometimes left as 0.
23  *              Alan Cox        :       connect handles icmp errors on a
24  *                                      connect properly. Unfortunately there
25  *                                      is a restart syscall nasty there. I
26  *                                      can't match BSD without hacking the C
27  *                                      library. Ideas urgently sought!
28  *              Alan Cox        :       Disallow bind() to addresses that are
29  *                                      not ours - especially broadcast ones!!
30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32  *                                      instead they leave that for the DESTROY timer.
33  *              Alan Cox        :       Clean up error flag in accept
34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35  *                                      was buggy. Put a remove_sock() in the handler
36  *                                      for memory when we hit 0. Also altered the timer
37  *                                      code. The ACK stuff can wait and needs major
38  *                                      TCP layer surgery.
39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40  *                                      and fixed timer/inet_bh race.
41  *              Alan Cox        :       Added zapped flag for TCP
42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49  *      Pauline Middelink       :       identd support
50  *              Alan Cox        :       Fixed connect() taking signals I think.
51  *              Alan Cox        :       SO_LINGER supported
52  *              Alan Cox        :       Error reporting fixes
53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54  *              Alan Cox        :       inet sockets don't set sk->type!
55  *              Alan Cox        :       Split socket option code
56  *              Alan Cox        :       Callbacks
57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58  *              Alex            :       Removed restriction on inet fioctl
59  *              Alan Cox        :       Splitting INET from NET core
60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62  *              Alan Cox        :       Split IP from generic code
63  *              Alan Cox        :       New kfree_skbmem()
64  *              Alan Cox        :       Make SO_DEBUG superuser only.
65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66  *                                      (compatibility fix)
67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68  *              Alan Cox        :       Allocator for a socket is settable.
69  *              Alan Cox        :       SO_ERROR includes soft errors.
70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71  *              Alan Cox        :       Generic socket allocation to make hooks
72  *                                      easier (suggested by Craig Metz).
73  *              Michael Pall    :       SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81  *              Andi Kleen      :       Fix write_space callback
82  *              Chris Evans     :       Security fixes - signedness again
83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *              This program is free software; you can redistribute it and/or
89  *              modify it under the terms of the GNU General Public License
90  *              as published by the Free Software Foundation; either version
91  *              2 of the License, or (at your option) any later version.
92  */
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126
127 #include <linux/filter.h>
128
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132
133 /*
134  * Each address family might have different locking rules, so we have
135  * one slock key per address family:
136  */
137 static struct lock_class_key af_family_keys[AF_MAX];
138 static struct lock_class_key af_family_slock_keys[AF_MAX];
139
140 #ifdef CONFIG_DEBUG_LOCK_ALLOC
141 /*
142  * Make lock validator output more readable. (we pre-construct these
143  * strings build-time, so that runtime initialization of socket
144  * locks is fast):
145  */
146 static const char *af_family_key_strings[AF_MAX+1] = {
147   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
148   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
149   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
150   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
151   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
152   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
153   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
154   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
155   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
156   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
157   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
158   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
159 };
160 static const char *af_family_slock_key_strings[AF_MAX+1] = {
161   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
162   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
163   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
164   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
165   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
166   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
167   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
168   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
169   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
170   "slock-27"       , "slock-28"          , "slock-29"          ,
171   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
172   "slock-AF_RXRPC" , "slock-AF_MAX"
173 };
174 #endif
175
176 /*
177  * sk_callback_lock locking rules are per-address-family,
178  * so split the lock classes by using a per-AF key:
179  */
180 static struct lock_class_key af_callback_keys[AF_MAX];
181
182 /* Take into consideration the size of the struct sk_buff overhead in the
183  * determination of these values, since that is non-constant across
184  * platforms.  This makes socket queueing behavior and performance
185  * not depend upon such differences.
186  */
187 #define _SK_MEM_PACKETS         256
188 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
189 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
190 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
191
192 /* Run time adjustable parameters. */
193 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
194 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
195 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
196 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
197
198 /* Maximal space eaten by iovec or ancilliary data plus some space */
199 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
200
201 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
202 {
203         struct timeval tv;
204
205         if (optlen < sizeof(tv))
206                 return -EINVAL;
207         if (copy_from_user(&tv, optval, sizeof(tv)))
208                 return -EFAULT;
209
210         *timeo_p = MAX_SCHEDULE_TIMEOUT;
211         if (tv.tv_sec == 0 && tv.tv_usec == 0)
212                 return 0;
213         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
214                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
215         return 0;
216 }
217
218 static void sock_warn_obsolete_bsdism(const char *name)
219 {
220         static int warned;
221         static char warncomm[TASK_COMM_LEN];
222         if (strcmp(warncomm, current->comm) && warned < 5) {
223                 strcpy(warncomm,  current->comm);
224                 printk(KERN_WARNING "process `%s' is using obsolete "
225                        "%s SO_BSDCOMPAT\n", warncomm, name);
226                 warned++;
227         }
228 }
229
230 static void sock_disable_timestamp(struct sock *sk)
231 {
232         if (sock_flag(sk, SOCK_TIMESTAMP)) {
233                 sock_reset_flag(sk, SOCK_TIMESTAMP);
234                 net_disable_timestamp();
235         }
236 }
237
238
239 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
240 {
241         int err = 0;
242         int skb_len;
243
244         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
245            number of warnings when compiling with -W --ANK
246          */
247         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
248             (unsigned)sk->sk_rcvbuf) {
249                 err = -ENOMEM;
250                 goto out;
251         }
252
253         err = sk_filter(sk, skb);
254         if (err)
255                 goto out;
256
257         skb->dev = NULL;
258         skb_set_owner_r(skb, sk);
259
260         /* Cache the SKB length before we tack it onto the receive
261          * queue.  Once it is added it no longer belongs to us and
262          * may be freed by other threads of control pulling packets
263          * from the queue.
264          */
265         skb_len = skb->len;
266
267         skb_queue_tail(&sk->sk_receive_queue, skb);
268
269         if (!sock_flag(sk, SOCK_DEAD))
270                 sk->sk_data_ready(sk, skb_len);
271 out:
272         return err;
273 }
274 EXPORT_SYMBOL(sock_queue_rcv_skb);
275
276 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
277 {
278         int rc = NET_RX_SUCCESS;
279
280         if (sk_filter(sk, skb))
281                 goto discard_and_relse;
282
283         skb->dev = NULL;
284
285         if (nested)
286                 bh_lock_sock_nested(sk);
287         else
288                 bh_lock_sock(sk);
289         if (!sock_owned_by_user(sk)) {
290                 /*
291                  * trylock + unlock semantics:
292                  */
293                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
294
295                 rc = sk->sk_backlog_rcv(sk, skb);
296
297                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
298         } else
299                 sk_add_backlog(sk, skb);
300         bh_unlock_sock(sk);
301 out:
302         sock_put(sk);
303         return rc;
304 discard_and_relse:
305         kfree_skb(skb);
306         goto out;
307 }
308 EXPORT_SYMBOL(sk_receive_skb);
309
310 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
311 {
312         struct dst_entry *dst = sk->sk_dst_cache;
313
314         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
315                 sk->sk_dst_cache = NULL;
316                 dst_release(dst);
317                 return NULL;
318         }
319
320         return dst;
321 }
322 EXPORT_SYMBOL(__sk_dst_check);
323
324 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
325 {
326         struct dst_entry *dst = sk_dst_get(sk);
327
328         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
329                 sk_dst_reset(sk);
330                 dst_release(dst);
331                 return NULL;
332         }
333
334         return dst;
335 }
336 EXPORT_SYMBOL(sk_dst_check);
337
338 /*
339  *      This is meant for all protocols to use and covers goings on
340  *      at the socket level. Everything here is generic.
341  */
342
343 int sock_setsockopt(struct socket *sock, int level, int optname,
344                     char __user *optval, int optlen)
345 {
346         struct sock *sk=sock->sk;
347         struct sk_filter *filter;
348         int val;
349         int valbool;
350         struct linger ling;
351         int ret = 0;
352
353         /*
354          *      Options without arguments
355          */
356
357 #ifdef SO_DONTLINGER            /* Compatibility item... */
358         if (optname == SO_DONTLINGER) {
359                 lock_sock(sk);
360                 sock_reset_flag(sk, SOCK_LINGER);
361                 release_sock(sk);
362                 return 0;
363         }
364 #endif
365
366         if (optlen < sizeof(int))
367                 return -EINVAL;
368
369         if (get_user(val, (int __user *)optval))
370                 return -EFAULT;
371
372         valbool = val?1:0;
373
374         lock_sock(sk);
375
376         switch(optname) {
377         case SO_DEBUG:
378                 if (val && !capable(CAP_NET_ADMIN)) {
379                         ret = -EACCES;
380                 }
381                 else if (valbool)
382                         sock_set_flag(sk, SOCK_DBG);
383                 else
384                         sock_reset_flag(sk, SOCK_DBG);
385                 break;
386         case SO_REUSEADDR:
387                 sk->sk_reuse = valbool;
388                 break;
389         case SO_TYPE:
390         case SO_ERROR:
391                 ret = -ENOPROTOOPT;
392                 break;
393         case SO_DONTROUTE:
394                 if (valbool)
395                         sock_set_flag(sk, SOCK_LOCALROUTE);
396                 else
397                         sock_reset_flag(sk, SOCK_LOCALROUTE);
398                 break;
399         case SO_BROADCAST:
400                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
401                 break;
402         case SO_SNDBUF:
403                 /* Don't error on this BSD doesn't and if you think
404                    about it this is right. Otherwise apps have to
405                    play 'guess the biggest size' games. RCVBUF/SNDBUF
406                    are treated in BSD as hints */
407
408                 if (val > sysctl_wmem_max)
409                         val = sysctl_wmem_max;
410 set_sndbuf:
411                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
412                 if ((val * 2) < SOCK_MIN_SNDBUF)
413                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
414                 else
415                         sk->sk_sndbuf = val * 2;
416
417                 /*
418                  *      Wake up sending tasks if we
419                  *      upped the value.
420                  */
421                 sk->sk_write_space(sk);
422                 break;
423
424         case SO_SNDBUFFORCE:
425                 if (!capable(CAP_NET_ADMIN)) {
426                         ret = -EPERM;
427                         break;
428                 }
429                 goto set_sndbuf;
430
431         case SO_RCVBUF:
432                 /* Don't error on this BSD doesn't and if you think
433                    about it this is right. Otherwise apps have to
434                    play 'guess the biggest size' games. RCVBUF/SNDBUF
435                    are treated in BSD as hints */
436
437                 if (val > sysctl_rmem_max)
438                         val = sysctl_rmem_max;
439 set_rcvbuf:
440                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
441                 /*
442                  * We double it on the way in to account for
443                  * "struct sk_buff" etc. overhead.   Applications
444                  * assume that the SO_RCVBUF setting they make will
445                  * allow that much actual data to be received on that
446                  * socket.
447                  *
448                  * Applications are unaware that "struct sk_buff" and
449                  * other overheads allocate from the receive buffer
450                  * during socket buffer allocation.
451                  *
452                  * And after considering the possible alternatives,
453                  * returning the value we actually used in getsockopt
454                  * is the most desirable behavior.
455                  */
456                 if ((val * 2) < SOCK_MIN_RCVBUF)
457                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
458                 else
459                         sk->sk_rcvbuf = val * 2;
460                 break;
461
462         case SO_RCVBUFFORCE:
463                 if (!capable(CAP_NET_ADMIN)) {
464                         ret = -EPERM;
465                         break;
466                 }
467                 goto set_rcvbuf;
468
469         case SO_KEEPALIVE:
470 #ifdef CONFIG_INET
471                 if (sk->sk_protocol == IPPROTO_TCP)
472                         tcp_set_keepalive(sk, valbool);
473 #endif
474                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
475                 break;
476
477         case SO_OOBINLINE:
478                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
479                 break;
480
481         case SO_NO_CHECK:
482                 sk->sk_no_check = valbool;
483                 break;
484
485         case SO_PRIORITY:
486                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
487                         sk->sk_priority = val;
488                 else
489                         ret = -EPERM;
490                 break;
491
492         case SO_LINGER:
493                 if (optlen < sizeof(ling)) {
494                         ret = -EINVAL;  /* 1003.1g */
495                         break;
496                 }
497                 if (copy_from_user(&ling,optval,sizeof(ling))) {
498                         ret = -EFAULT;
499                         break;
500                 }
501                 if (!ling.l_onoff)
502                         sock_reset_flag(sk, SOCK_LINGER);
503                 else {
504 #if (BITS_PER_LONG == 32)
505                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
506                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
507                         else
508 #endif
509                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
510                         sock_set_flag(sk, SOCK_LINGER);
511                 }
512                 break;
513
514         case SO_BSDCOMPAT:
515                 sock_warn_obsolete_bsdism("setsockopt");
516                 break;
517
518         case SO_PASSCRED:
519                 if (valbool)
520                         set_bit(SOCK_PASSCRED, &sock->flags);
521                 else
522                         clear_bit(SOCK_PASSCRED, &sock->flags);
523                 break;
524
525         case SO_TIMESTAMP:
526         case SO_TIMESTAMPNS:
527                 if (valbool)  {
528                         if (optname == SO_TIMESTAMP)
529                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
530                         else
531                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
532                         sock_set_flag(sk, SOCK_RCVTSTAMP);
533                         sock_enable_timestamp(sk);
534                 } else {
535                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
536                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
537                 }
538                 break;
539
540         case SO_RCVLOWAT:
541                 if (val < 0)
542                         val = INT_MAX;
543                 sk->sk_rcvlowat = val ? : 1;
544                 break;
545
546         case SO_RCVTIMEO:
547                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
548                 break;
549
550         case SO_SNDTIMEO:
551                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
552                 break;
553
554 #ifdef CONFIG_NETDEVICES
555         case SO_BINDTODEVICE:
556         {
557                 char devname[IFNAMSIZ];
558
559                 /* Sorry... */
560                 if (!capable(CAP_NET_RAW)) {
561                         ret = -EPERM;
562                         break;
563                 }
564
565                 /* Bind this socket to a particular device like "eth0",
566                  * as specified in the passed interface name. If the
567                  * name is "" or the option length is zero the socket
568                  * is not bound.
569                  */
570
571                 if (!valbool) {
572                         sk->sk_bound_dev_if = 0;
573                 } else {
574                         if (optlen > IFNAMSIZ - 1)
575                                 optlen = IFNAMSIZ - 1;
576                         memset(devname, 0, sizeof(devname));
577                         if (copy_from_user(devname, optval, optlen)) {
578                                 ret = -EFAULT;
579                                 break;
580                         }
581
582                         /* Remove any cached route for this socket. */
583                         sk_dst_reset(sk);
584
585                         if (devname[0] == '\0') {
586                                 sk->sk_bound_dev_if = 0;
587                         } else {
588                                 struct net_device *dev = dev_get_by_name(devname);
589                                 if (!dev) {
590                                         ret = -ENODEV;
591                                         break;
592                                 }
593                                 sk->sk_bound_dev_if = dev->ifindex;
594                                 dev_put(dev);
595                         }
596                 }
597                 break;
598         }
599 #endif
600
601
602         case SO_ATTACH_FILTER:
603                 ret = -EINVAL;
604                 if (optlen == sizeof(struct sock_fprog)) {
605                         struct sock_fprog fprog;
606
607                         ret = -EFAULT;
608                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
609                                 break;
610
611                         ret = sk_attach_filter(&fprog, sk);
612                 }
613                 break;
614
615         case SO_DETACH_FILTER:
616                 rcu_read_lock_bh();
617                 filter = rcu_dereference(sk->sk_filter);
618                 if (filter) {
619                         rcu_assign_pointer(sk->sk_filter, NULL);
620                         sk_filter_release(sk, filter);
621                         rcu_read_unlock_bh();
622                         break;
623                 }
624                 rcu_read_unlock_bh();
625                 ret = -ENONET;
626                 break;
627
628         case SO_PASSSEC:
629                 if (valbool)
630                         set_bit(SOCK_PASSSEC, &sock->flags);
631                 else
632                         clear_bit(SOCK_PASSSEC, &sock->flags);
633                 break;
634
635                 /* We implement the SO_SNDLOWAT etc to
636                    not be settable (1003.1g 5.3) */
637         default:
638                 ret = -ENOPROTOOPT;
639                 break;
640         }
641         release_sock(sk);
642         return ret;
643 }
644
645
646 int sock_getsockopt(struct socket *sock, int level, int optname,
647                     char __user *optval, int __user *optlen)
648 {
649         struct sock *sk = sock->sk;
650
651         union {
652                 int val;
653                 struct linger ling;
654                 struct timeval tm;
655         } v;
656
657         unsigned int lv = sizeof(int);
658         int len;
659
660         if (get_user(len, optlen))
661                 return -EFAULT;
662         if (len < 0)
663                 return -EINVAL;
664
665         switch(optname) {
666         case SO_DEBUG:
667                 v.val = sock_flag(sk, SOCK_DBG);
668                 break;
669
670         case SO_DONTROUTE:
671                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
672                 break;
673
674         case SO_BROADCAST:
675                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
676                 break;
677
678         case SO_SNDBUF:
679                 v.val = sk->sk_sndbuf;
680                 break;
681
682         case SO_RCVBUF:
683                 v.val = sk->sk_rcvbuf;
684                 break;
685
686         case SO_REUSEADDR:
687                 v.val = sk->sk_reuse;
688                 break;
689
690         case SO_KEEPALIVE:
691                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
692                 break;
693
694         case SO_TYPE:
695                 v.val = sk->sk_type;
696                 break;
697
698         case SO_ERROR:
699                 v.val = -sock_error(sk);
700                 if (v.val==0)
701                         v.val = xchg(&sk->sk_err_soft, 0);
702                 break;
703
704         case SO_OOBINLINE:
705                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
706                 break;
707
708         case SO_NO_CHECK:
709                 v.val = sk->sk_no_check;
710                 break;
711
712         case SO_PRIORITY:
713                 v.val = sk->sk_priority;
714                 break;
715
716         case SO_LINGER:
717                 lv              = sizeof(v.ling);
718                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
719                 v.ling.l_linger = sk->sk_lingertime / HZ;
720                 break;
721
722         case SO_BSDCOMPAT:
723                 sock_warn_obsolete_bsdism("getsockopt");
724                 break;
725
726         case SO_TIMESTAMP:
727                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
728                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
729                 break;
730
731         case SO_TIMESTAMPNS:
732                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
733                 break;
734
735         case SO_RCVTIMEO:
736                 lv=sizeof(struct timeval);
737                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
738                         v.tm.tv_sec = 0;
739                         v.tm.tv_usec = 0;
740                 } else {
741                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
742                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
743                 }
744                 break;
745
746         case SO_SNDTIMEO:
747                 lv=sizeof(struct timeval);
748                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
749                         v.tm.tv_sec = 0;
750                         v.tm.tv_usec = 0;
751                 } else {
752                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
753                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
754                 }
755                 break;
756
757         case SO_RCVLOWAT:
758                 v.val = sk->sk_rcvlowat;
759                 break;
760
761         case SO_SNDLOWAT:
762                 v.val=1;
763                 break;
764
765         case SO_PASSCRED:
766                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
767                 break;
768
769         case SO_PEERCRED:
770                 if (len > sizeof(sk->sk_peercred))
771                         len = sizeof(sk->sk_peercred);
772                 if (copy_to_user(optval, &sk->sk_peercred, len))
773                         return -EFAULT;
774                 goto lenout;
775
776         case SO_PEERNAME:
777         {
778                 char address[128];
779
780                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
781                         return -ENOTCONN;
782                 if (lv < len)
783                         return -EINVAL;
784                 if (copy_to_user(optval, address, len))
785                         return -EFAULT;
786                 goto lenout;
787         }
788
789         /* Dubious BSD thing... Probably nobody even uses it, but
790          * the UNIX standard wants it for whatever reason... -DaveM
791          */
792         case SO_ACCEPTCONN:
793                 v.val = sk->sk_state == TCP_LISTEN;
794                 break;
795
796         case SO_PASSSEC:
797                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
798                 break;
799
800         case SO_PEERSEC:
801                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
802
803         default:
804                 return -ENOPROTOOPT;
805         }
806
807         if (len > lv)
808                 len = lv;
809         if (copy_to_user(optval, &v, len))
810                 return -EFAULT;
811 lenout:
812         if (put_user(len, optlen))
813                 return -EFAULT;
814         return 0;
815 }
816
817 /*
818  * Initialize an sk_lock.
819  *
820  * (We also register the sk_lock with the lock validator.)
821  */
822 static inline void sock_lock_init(struct sock *sk)
823 {
824         sock_lock_init_class_and_name(sk,
825                         af_family_slock_key_strings[sk->sk_family],
826                         af_family_slock_keys + sk->sk_family,
827                         af_family_key_strings[sk->sk_family],
828                         af_family_keys + sk->sk_family);
829 }
830
831 /**
832  *      sk_alloc - All socket objects are allocated here
833  *      @family: protocol family
834  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
835  *      @prot: struct proto associated with this new sock instance
836  *      @zero_it: if we should zero the newly allocated sock
837  */
838 struct sock *sk_alloc(int family, gfp_t priority,
839                       struct proto *prot, int zero_it)
840 {
841         struct sock *sk = NULL;
842         struct kmem_cache *slab = prot->slab;
843
844         if (slab != NULL)
845                 sk = kmem_cache_alloc(slab, priority);
846         else
847                 sk = kmalloc(prot->obj_size, priority);
848
849         if (sk) {
850                 if (zero_it) {
851                         memset(sk, 0, prot->obj_size);
852                         sk->sk_family = family;
853                         /*
854                          * See comment in struct sock definition to understand
855                          * why we need sk_prot_creator -acme
856                          */
857                         sk->sk_prot = sk->sk_prot_creator = prot;
858                         sock_lock_init(sk);
859                 }
860
861                 if (security_sk_alloc(sk, family, priority))
862                         goto out_free;
863
864                 if (!try_module_get(prot->owner))
865                         goto out_free;
866         }
867         return sk;
868
869 out_free:
870         if (slab != NULL)
871                 kmem_cache_free(slab, sk);
872         else
873                 kfree(sk);
874         return NULL;
875 }
876
877 void sk_free(struct sock *sk)
878 {
879         struct sk_filter *filter;
880         struct module *owner = sk->sk_prot_creator->owner;
881
882         if (sk->sk_destruct)
883                 sk->sk_destruct(sk);
884
885         filter = rcu_dereference(sk->sk_filter);
886         if (filter) {
887                 sk_filter_release(sk, filter);
888                 rcu_assign_pointer(sk->sk_filter, NULL);
889         }
890
891         sock_disable_timestamp(sk);
892
893         if (atomic_read(&sk->sk_omem_alloc))
894                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
895                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
896
897         security_sk_free(sk);
898         if (sk->sk_prot_creator->slab != NULL)
899                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
900         else
901                 kfree(sk);
902         module_put(owner);
903 }
904
905 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
906 {
907         struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
908
909         if (newsk != NULL) {
910                 struct sk_filter *filter;
911
912                 sock_copy(newsk, sk);
913
914                 /* SANITY */
915                 sk_node_init(&newsk->sk_node);
916                 sock_lock_init(newsk);
917                 bh_lock_sock(newsk);
918                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
919
920                 atomic_set(&newsk->sk_rmem_alloc, 0);
921                 atomic_set(&newsk->sk_wmem_alloc, 0);
922                 atomic_set(&newsk->sk_omem_alloc, 0);
923                 skb_queue_head_init(&newsk->sk_receive_queue);
924                 skb_queue_head_init(&newsk->sk_write_queue);
925 #ifdef CONFIG_NET_DMA
926                 skb_queue_head_init(&newsk->sk_async_wait_queue);
927 #endif
928
929                 rwlock_init(&newsk->sk_dst_lock);
930                 rwlock_init(&newsk->sk_callback_lock);
931                 lockdep_set_class(&newsk->sk_callback_lock,
932                                    af_callback_keys + newsk->sk_family);
933
934                 newsk->sk_dst_cache     = NULL;
935                 newsk->sk_wmem_queued   = 0;
936                 newsk->sk_forward_alloc = 0;
937                 newsk->sk_send_head     = NULL;
938                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
939
940                 sock_reset_flag(newsk, SOCK_DONE);
941                 skb_queue_head_init(&newsk->sk_error_queue);
942
943                 filter = newsk->sk_filter;
944                 if (filter != NULL)
945                         sk_filter_charge(newsk, filter);
946
947                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
948                         /* It is still raw copy of parent, so invalidate
949                          * destructor and make plain sk_free() */
950                         newsk->sk_destruct = NULL;
951                         sk_free(newsk);
952                         newsk = NULL;
953                         goto out;
954                 }
955
956                 newsk->sk_err      = 0;
957                 newsk->sk_priority = 0;
958                 atomic_set(&newsk->sk_refcnt, 2);
959
960                 /*
961                  * Increment the counter in the same struct proto as the master
962                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
963                  * is the same as sk->sk_prot->socks, as this field was copied
964                  * with memcpy).
965                  *
966                  * This _changes_ the previous behaviour, where
967                  * tcp_create_openreq_child always was incrementing the
968                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
969                  * to be taken into account in all callers. -acme
970                  */
971                 sk_refcnt_debug_inc(newsk);
972                 newsk->sk_socket = NULL;
973                 newsk->sk_sleep  = NULL;
974
975                 if (newsk->sk_prot->sockets_allocated)
976                         atomic_inc(newsk->sk_prot->sockets_allocated);
977         }
978 out:
979         return newsk;
980 }
981
982 EXPORT_SYMBOL_GPL(sk_clone);
983
984 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
985 {
986         __sk_dst_set(sk, dst);
987         sk->sk_route_caps = dst->dev->features;
988         if (sk->sk_route_caps & NETIF_F_GSO)
989                 sk->sk_route_caps |= NETIF_F_GSO_MASK;
990         if (sk_can_gso(sk)) {
991                 if (dst->header_len)
992                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
993                 else
994                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
995         }
996 }
997 EXPORT_SYMBOL_GPL(sk_setup_caps);
998
999 void __init sk_init(void)
1000 {
1001         if (num_physpages <= 4096) {
1002                 sysctl_wmem_max = 32767;
1003                 sysctl_rmem_max = 32767;
1004                 sysctl_wmem_default = 32767;
1005                 sysctl_rmem_default = 32767;
1006         } else if (num_physpages >= 131072) {
1007                 sysctl_wmem_max = 131071;
1008                 sysctl_rmem_max = 131071;
1009         }
1010 }
1011
1012 /*
1013  *      Simple resource managers for sockets.
1014  */
1015
1016
1017 /*
1018  * Write buffer destructor automatically called from kfree_skb.
1019  */
1020 void sock_wfree(struct sk_buff *skb)
1021 {
1022         struct sock *sk = skb->sk;
1023
1024         /* In case it might be waiting for more memory. */
1025         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1026         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1027                 sk->sk_write_space(sk);
1028         sock_put(sk);
1029 }
1030
1031 /*
1032  * Read buffer destructor automatically called from kfree_skb.
1033  */
1034 void sock_rfree(struct sk_buff *skb)
1035 {
1036         struct sock *sk = skb->sk;
1037
1038         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1039 }
1040
1041
1042 int sock_i_uid(struct sock *sk)
1043 {
1044         int uid;
1045
1046         read_lock(&sk->sk_callback_lock);
1047         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1048         read_unlock(&sk->sk_callback_lock);
1049         return uid;
1050 }
1051
1052 unsigned long sock_i_ino(struct sock *sk)
1053 {
1054         unsigned long ino;
1055
1056         read_lock(&sk->sk_callback_lock);
1057         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1058         read_unlock(&sk->sk_callback_lock);
1059         return ino;
1060 }
1061
1062 /*
1063  * Allocate a skb from the socket's send buffer.
1064  */
1065 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1066                              gfp_t priority)
1067 {
1068         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1069                 struct sk_buff * skb = alloc_skb(size, priority);
1070                 if (skb) {
1071                         skb_set_owner_w(skb, sk);
1072                         return skb;
1073                 }
1074         }
1075         return NULL;
1076 }
1077
1078 /*
1079  * Allocate a skb from the socket's receive buffer.
1080  */
1081 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1082                              gfp_t priority)
1083 {
1084         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1085                 struct sk_buff *skb = alloc_skb(size, priority);
1086                 if (skb) {
1087                         skb_set_owner_r(skb, sk);
1088                         return skb;
1089                 }
1090         }
1091         return NULL;
1092 }
1093
1094 /*
1095  * Allocate a memory block from the socket's option memory buffer.
1096  */
1097 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1098 {
1099         if ((unsigned)size <= sysctl_optmem_max &&
1100             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1101                 void *mem;
1102                 /* First do the add, to avoid the race if kmalloc
1103                  * might sleep.
1104                  */
1105                 atomic_add(size, &sk->sk_omem_alloc);
1106                 mem = kmalloc(size, priority);
1107                 if (mem)
1108                         return mem;
1109                 atomic_sub(size, &sk->sk_omem_alloc);
1110         }
1111         return NULL;
1112 }
1113
1114 /*
1115  * Free an option memory block.
1116  */
1117 void sock_kfree_s(struct sock *sk, void *mem, int size)
1118 {
1119         kfree(mem);
1120         atomic_sub(size, &sk->sk_omem_alloc);
1121 }
1122
1123 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1124    I think, these locks should be removed for datagram sockets.
1125  */
1126 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1127 {
1128         DEFINE_WAIT(wait);
1129
1130         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1131         for (;;) {
1132                 if (!timeo)
1133                         break;
1134                 if (signal_pending(current))
1135                         break;
1136                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1137                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1138                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1139                         break;
1140                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1141                         break;
1142                 if (sk->sk_err)
1143                         break;
1144                 timeo = schedule_timeout(timeo);
1145         }
1146         finish_wait(sk->sk_sleep, &wait);
1147         return timeo;
1148 }
1149
1150
1151 /*
1152  *      Generic send/receive buffer handlers
1153  */
1154
1155 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1156                                             unsigned long header_len,
1157                                             unsigned long data_len,
1158                                             int noblock, int *errcode)
1159 {
1160         struct sk_buff *skb;
1161         gfp_t gfp_mask;
1162         long timeo;
1163         int err;
1164
1165         gfp_mask = sk->sk_allocation;
1166         if (gfp_mask & __GFP_WAIT)
1167                 gfp_mask |= __GFP_REPEAT;
1168
1169         timeo = sock_sndtimeo(sk, noblock);
1170         while (1) {
1171                 err = sock_error(sk);
1172                 if (err != 0)
1173                         goto failure;
1174
1175                 err = -EPIPE;
1176                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1177                         goto failure;
1178
1179                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1180                         skb = alloc_skb(header_len, gfp_mask);
1181                         if (skb) {
1182                                 int npages;
1183                                 int i;
1184
1185                                 /* No pages, we're done... */
1186                                 if (!data_len)
1187                                         break;
1188
1189                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1190                                 skb->truesize += data_len;
1191                                 skb_shinfo(skb)->nr_frags = npages;
1192                                 for (i = 0; i < npages; i++) {
1193                                         struct page *page;
1194                                         skb_frag_t *frag;
1195
1196                                         page = alloc_pages(sk->sk_allocation, 0);
1197                                         if (!page) {
1198                                                 err = -ENOBUFS;
1199                                                 skb_shinfo(skb)->nr_frags = i;
1200                                                 kfree_skb(skb);
1201                                                 goto failure;
1202                                         }
1203
1204                                         frag = &skb_shinfo(skb)->frags[i];
1205                                         frag->page = page;
1206                                         frag->page_offset = 0;
1207                                         frag->size = (data_len >= PAGE_SIZE ?
1208                                                       PAGE_SIZE :
1209                                                       data_len);
1210                                         data_len -= PAGE_SIZE;
1211                                 }
1212
1213                                 /* Full success... */
1214                                 break;
1215                         }
1216                         err = -ENOBUFS;
1217                         goto failure;
1218                 }
1219                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1220                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1221                 err = -EAGAIN;
1222                 if (!timeo)
1223                         goto failure;
1224                 if (signal_pending(current))
1225                         goto interrupted;
1226                 timeo = sock_wait_for_wmem(sk, timeo);
1227         }
1228
1229         skb_set_owner_w(skb, sk);
1230         return skb;
1231
1232 interrupted:
1233         err = sock_intr_errno(timeo);
1234 failure:
1235         *errcode = err;
1236         return NULL;
1237 }
1238
1239 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1240                                     int noblock, int *errcode)
1241 {
1242         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1243 }
1244
1245 static void __lock_sock(struct sock *sk)
1246 {
1247         DEFINE_WAIT(wait);
1248
1249         for (;;) {
1250                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1251                                         TASK_UNINTERRUPTIBLE);
1252                 spin_unlock_bh(&sk->sk_lock.slock);
1253                 schedule();
1254                 spin_lock_bh(&sk->sk_lock.slock);
1255                 if (!sock_owned_by_user(sk))
1256                         break;
1257         }
1258         finish_wait(&sk->sk_lock.wq, &wait);
1259 }
1260
1261 static void __release_sock(struct sock *sk)
1262 {
1263         struct sk_buff *skb = sk->sk_backlog.head;
1264
1265         do {
1266                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1267                 bh_unlock_sock(sk);
1268
1269                 do {
1270                         struct sk_buff *next = skb->next;
1271
1272                         skb->next = NULL;
1273                         sk->sk_backlog_rcv(sk, skb);
1274
1275                         /*
1276                          * We are in process context here with softirqs
1277                          * disabled, use cond_resched_softirq() to preempt.
1278                          * This is safe to do because we've taken the backlog
1279                          * queue private:
1280                          */
1281                         cond_resched_softirq();
1282
1283                         skb = next;
1284                 } while (skb != NULL);
1285
1286                 bh_lock_sock(sk);
1287         } while ((skb = sk->sk_backlog.head) != NULL);
1288 }
1289
1290 /**
1291  * sk_wait_data - wait for data to arrive at sk_receive_queue
1292  * @sk:    sock to wait on
1293  * @timeo: for how long
1294  *
1295  * Now socket state including sk->sk_err is changed only under lock,
1296  * hence we may omit checks after joining wait queue.
1297  * We check receive queue before schedule() only as optimization;
1298  * it is very likely that release_sock() added new data.
1299  */
1300 int sk_wait_data(struct sock *sk, long *timeo)
1301 {
1302         int rc;
1303         DEFINE_WAIT(wait);
1304
1305         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1306         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1307         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1308         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1309         finish_wait(sk->sk_sleep, &wait);
1310         return rc;
1311 }
1312
1313 EXPORT_SYMBOL(sk_wait_data);
1314
1315 /*
1316  * Set of default routines for initialising struct proto_ops when
1317  * the protocol does not support a particular function. In certain
1318  * cases where it makes no sense for a protocol to have a "do nothing"
1319  * function, some default processing is provided.
1320  */
1321
1322 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1323 {
1324         return -EOPNOTSUPP;
1325 }
1326
1327 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1328                     int len, int flags)
1329 {
1330         return -EOPNOTSUPP;
1331 }
1332
1333 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1334 {
1335         return -EOPNOTSUPP;
1336 }
1337
1338 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1339 {
1340         return -EOPNOTSUPP;
1341 }
1342
1343 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1344                     int *len, int peer)
1345 {
1346         return -EOPNOTSUPP;
1347 }
1348
1349 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1350 {
1351         return 0;
1352 }
1353
1354 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1355 {
1356         return -EOPNOTSUPP;
1357 }
1358
1359 int sock_no_listen(struct socket *sock, int backlog)
1360 {
1361         return -EOPNOTSUPP;
1362 }
1363
1364 int sock_no_shutdown(struct socket *sock, int how)
1365 {
1366         return -EOPNOTSUPP;
1367 }
1368
1369 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1370                     char __user *optval, int optlen)
1371 {
1372         return -EOPNOTSUPP;
1373 }
1374
1375 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1376                     char __user *optval, int __user *optlen)
1377 {
1378         return -EOPNOTSUPP;
1379 }
1380
1381 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1382                     size_t len)
1383 {
1384         return -EOPNOTSUPP;
1385 }
1386
1387 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1388                     size_t len, int flags)
1389 {
1390         return -EOPNOTSUPP;
1391 }
1392
1393 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1394 {
1395         /* Mirror missing mmap method error code */
1396         return -ENODEV;
1397 }
1398
1399 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1400 {
1401         ssize_t res;
1402         struct msghdr msg = {.msg_flags = flags};
1403         struct kvec iov;
1404         char *kaddr = kmap(page);
1405         iov.iov_base = kaddr + offset;
1406         iov.iov_len = size;
1407         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1408         kunmap(page);
1409         return res;
1410 }
1411
1412 /*
1413  *      Default Socket Callbacks
1414  */
1415
1416 static void sock_def_wakeup(struct sock *sk)
1417 {
1418         read_lock(&sk->sk_callback_lock);
1419         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1420                 wake_up_interruptible_all(sk->sk_sleep);
1421         read_unlock(&sk->sk_callback_lock);
1422 }
1423
1424 static void sock_def_error_report(struct sock *sk)
1425 {
1426         read_lock(&sk->sk_callback_lock);
1427         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1428                 wake_up_interruptible(sk->sk_sleep);
1429         sk_wake_async(sk,0,POLL_ERR);
1430         read_unlock(&sk->sk_callback_lock);
1431 }
1432
1433 static void sock_def_readable(struct sock *sk, int len)
1434 {
1435         read_lock(&sk->sk_callback_lock);
1436         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1437                 wake_up_interruptible(sk->sk_sleep);
1438         sk_wake_async(sk,1,POLL_IN);
1439         read_unlock(&sk->sk_callback_lock);
1440 }
1441
1442 static void sock_def_write_space(struct sock *sk)
1443 {
1444         read_lock(&sk->sk_callback_lock);
1445
1446         /* Do not wake up a writer until he can make "significant"
1447          * progress.  --DaveM
1448          */
1449         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1450                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1451                         wake_up_interruptible(sk->sk_sleep);
1452
1453                 /* Should agree with poll, otherwise some programs break */
1454                 if (sock_writeable(sk))
1455                         sk_wake_async(sk, 2, POLL_OUT);
1456         }
1457
1458         read_unlock(&sk->sk_callback_lock);
1459 }
1460
1461 static void sock_def_destruct(struct sock *sk)
1462 {
1463         kfree(sk->sk_protinfo);
1464 }
1465
1466 void sk_send_sigurg(struct sock *sk)
1467 {
1468         if (sk->sk_socket && sk->sk_socket->file)
1469                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1470                         sk_wake_async(sk, 3, POLL_PRI);
1471 }
1472
1473 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1474                     unsigned long expires)
1475 {
1476         if (!mod_timer(timer, expires))
1477                 sock_hold(sk);
1478 }
1479
1480 EXPORT_SYMBOL(sk_reset_timer);
1481
1482 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1483 {
1484         if (timer_pending(timer) && del_timer(timer))
1485                 __sock_put(sk);
1486 }
1487
1488 EXPORT_SYMBOL(sk_stop_timer);
1489
1490 void sock_init_data(struct socket *sock, struct sock *sk)
1491 {
1492         skb_queue_head_init(&sk->sk_receive_queue);
1493         skb_queue_head_init(&sk->sk_write_queue);
1494         skb_queue_head_init(&sk->sk_error_queue);
1495 #ifdef CONFIG_NET_DMA
1496         skb_queue_head_init(&sk->sk_async_wait_queue);
1497 #endif
1498
1499         sk->sk_send_head        =       NULL;
1500
1501         init_timer(&sk->sk_timer);
1502
1503         sk->sk_allocation       =       GFP_KERNEL;
1504         sk->sk_rcvbuf           =       sysctl_rmem_default;
1505         sk->sk_sndbuf           =       sysctl_wmem_default;
1506         sk->sk_state            =       TCP_CLOSE;
1507         sk->sk_socket           =       sock;
1508
1509         sock_set_flag(sk, SOCK_ZAPPED);
1510
1511         if (sock) {
1512                 sk->sk_type     =       sock->type;
1513                 sk->sk_sleep    =       &sock->wait;
1514                 sock->sk        =       sk;
1515         } else
1516                 sk->sk_sleep    =       NULL;
1517
1518         rwlock_init(&sk->sk_dst_lock);
1519         rwlock_init(&sk->sk_callback_lock);
1520         lockdep_set_class(&sk->sk_callback_lock,
1521                            af_callback_keys + sk->sk_family);
1522
1523         sk->sk_state_change     =       sock_def_wakeup;
1524         sk->sk_data_ready       =       sock_def_readable;
1525         sk->sk_write_space      =       sock_def_write_space;
1526         sk->sk_error_report     =       sock_def_error_report;
1527         sk->sk_destruct         =       sock_def_destruct;
1528
1529         sk->sk_sndmsg_page      =       NULL;
1530         sk->sk_sndmsg_off       =       0;
1531
1532         sk->sk_peercred.pid     =       0;
1533         sk->sk_peercred.uid     =       -1;
1534         sk->sk_peercred.gid     =       -1;
1535         sk->sk_write_pending    =       0;
1536         sk->sk_rcvlowat         =       1;
1537         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1538         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1539
1540         sk->sk_stamp = ktime_set(-1L, -1L);
1541
1542         atomic_set(&sk->sk_refcnt, 1);
1543 }
1544
1545 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1546 {
1547         might_sleep();
1548         spin_lock_bh(&sk->sk_lock.slock);
1549         if (sk->sk_lock.owner)
1550                 __lock_sock(sk);
1551         sk->sk_lock.owner = (void *)1;
1552         spin_unlock(&sk->sk_lock.slock);
1553         /*
1554          * The sk_lock has mutex_lock() semantics here:
1555          */
1556         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1557         local_bh_enable();
1558 }
1559
1560 EXPORT_SYMBOL(lock_sock_nested);
1561
1562 void fastcall release_sock(struct sock *sk)
1563 {
1564         /*
1565          * The sk_lock has mutex_unlock() semantics:
1566          */
1567         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1568
1569         spin_lock_bh(&sk->sk_lock.slock);
1570         if (sk->sk_backlog.tail)
1571                 __release_sock(sk);
1572         sk->sk_lock.owner = NULL;
1573         if (waitqueue_active(&sk->sk_lock.wq))
1574                 wake_up(&sk->sk_lock.wq);
1575         spin_unlock_bh(&sk->sk_lock.slock);
1576 }
1577 EXPORT_SYMBOL(release_sock);
1578
1579 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1580 {
1581         struct timeval tv;
1582         if (!sock_flag(sk, SOCK_TIMESTAMP))
1583                 sock_enable_timestamp(sk);
1584         tv = ktime_to_timeval(sk->sk_stamp);
1585         if (tv.tv_sec == -1)
1586                 return -ENOENT;
1587         if (tv.tv_sec == 0) {
1588                 sk->sk_stamp = ktime_get_real();
1589                 tv = ktime_to_timeval(sk->sk_stamp);
1590         }
1591         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1592 }
1593 EXPORT_SYMBOL(sock_get_timestamp);
1594
1595 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1596 {
1597         struct timespec ts;
1598         if (!sock_flag(sk, SOCK_TIMESTAMP))
1599                 sock_enable_timestamp(sk);
1600         ts = ktime_to_timespec(sk->sk_stamp);
1601         if (ts.tv_sec == -1)
1602                 return -ENOENT;
1603         if (ts.tv_sec == 0) {
1604                 sk->sk_stamp = ktime_get_real();
1605                 ts = ktime_to_timespec(sk->sk_stamp);
1606         }
1607         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1608 }
1609 EXPORT_SYMBOL(sock_get_timestampns);
1610
1611 void sock_enable_timestamp(struct sock *sk)
1612 {
1613         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1614                 sock_set_flag(sk, SOCK_TIMESTAMP);
1615                 net_enable_timestamp();
1616         }
1617 }
1618 EXPORT_SYMBOL(sock_enable_timestamp);
1619
1620 /*
1621  *      Get a socket option on an socket.
1622  *
1623  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1624  *      asynchronous errors should be reported by getsockopt. We assume
1625  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1626  */
1627 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1628                            char __user *optval, int __user *optlen)
1629 {
1630         struct sock *sk = sock->sk;
1631
1632         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1633 }
1634
1635 EXPORT_SYMBOL(sock_common_getsockopt);
1636
1637 #ifdef CONFIG_COMPAT
1638 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1639                                   char __user *optval, int __user *optlen)
1640 {
1641         struct sock *sk = sock->sk;
1642
1643         if (sk->sk_prot->compat_getsockopt != NULL)
1644                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1645                                                       optval, optlen);
1646         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1647 }
1648 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1649 #endif
1650
1651 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1652                         struct msghdr *msg, size_t size, int flags)
1653 {
1654         struct sock *sk = sock->sk;
1655         int addr_len = 0;
1656         int err;
1657
1658         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1659                                    flags & ~MSG_DONTWAIT, &addr_len);
1660         if (err >= 0)
1661                 msg->msg_namelen = addr_len;
1662         return err;
1663 }
1664
1665 EXPORT_SYMBOL(sock_common_recvmsg);
1666
1667 /*
1668  *      Set socket options on an inet socket.
1669  */
1670 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1671                            char __user *optval, int optlen)
1672 {
1673         struct sock *sk = sock->sk;
1674
1675         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1676 }
1677
1678 EXPORT_SYMBOL(sock_common_setsockopt);
1679
1680 #ifdef CONFIG_COMPAT
1681 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1682                                   char __user *optval, int optlen)
1683 {
1684         struct sock *sk = sock->sk;
1685
1686         if (sk->sk_prot->compat_setsockopt != NULL)
1687                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1688                                                       optval, optlen);
1689         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1690 }
1691 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1692 #endif
1693
1694 void sk_common_release(struct sock *sk)
1695 {
1696         if (sk->sk_prot->destroy)
1697                 sk->sk_prot->destroy(sk);
1698
1699         /*
1700          * Observation: when sock_common_release is called, processes have
1701          * no access to socket. But net still has.
1702          * Step one, detach it from networking:
1703          *
1704          * A. Remove from hash tables.
1705          */
1706
1707         sk->sk_prot->unhash(sk);
1708
1709         /*
1710          * In this point socket cannot receive new packets, but it is possible
1711          * that some packets are in flight because some CPU runs receiver and
1712          * did hash table lookup before we unhashed socket. They will achieve
1713          * receive queue and will be purged by socket destructor.
1714          *
1715          * Also we still have packets pending on receive queue and probably,
1716          * our own packets waiting in device queues. sock_destroy will drain
1717          * receive queue, but transmitted packets will delay socket destruction
1718          * until the last reference will be released.
1719          */
1720
1721         sock_orphan(sk);
1722
1723         xfrm_sk_free_policy(sk);
1724
1725         sk_refcnt_debug_release(sk);
1726         sock_put(sk);
1727 }
1728
1729 EXPORT_SYMBOL(sk_common_release);
1730
1731 static DEFINE_RWLOCK(proto_list_lock);
1732 static LIST_HEAD(proto_list);
1733
1734 int proto_register(struct proto *prot, int alloc_slab)
1735 {
1736         char *request_sock_slab_name = NULL;
1737         char *timewait_sock_slab_name;
1738         int rc = -ENOBUFS;
1739
1740         if (alloc_slab) {
1741                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1742                                                SLAB_HWCACHE_ALIGN, NULL, NULL);
1743
1744                 if (prot->slab == NULL) {
1745                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1746                                prot->name);
1747                         goto out;
1748                 }
1749
1750                 if (prot->rsk_prot != NULL) {
1751                         static const char mask[] = "request_sock_%s";
1752
1753                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1754                         if (request_sock_slab_name == NULL)
1755                                 goto out_free_sock_slab;
1756
1757                         sprintf(request_sock_slab_name, mask, prot->name);
1758                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1759                                                                  prot->rsk_prot->obj_size, 0,
1760                                                                  SLAB_HWCACHE_ALIGN, NULL, NULL);
1761
1762                         if (prot->rsk_prot->slab == NULL) {
1763                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1764                                        prot->name);
1765                                 goto out_free_request_sock_slab_name;
1766                         }
1767                 }
1768
1769                 if (prot->twsk_prot != NULL) {
1770                         static const char mask[] = "tw_sock_%s";
1771
1772                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1773
1774                         if (timewait_sock_slab_name == NULL)
1775                                 goto out_free_request_sock_slab;
1776
1777                         sprintf(timewait_sock_slab_name, mask, prot->name);
1778                         prot->twsk_prot->twsk_slab =
1779                                 kmem_cache_create(timewait_sock_slab_name,
1780                                                   prot->twsk_prot->twsk_obj_size,
1781                                                   0, SLAB_HWCACHE_ALIGN,
1782                                                   NULL, NULL);
1783                         if (prot->twsk_prot->twsk_slab == NULL)
1784                                 goto out_free_timewait_sock_slab_name;
1785                 }
1786         }
1787
1788         write_lock(&proto_list_lock);
1789         list_add(&prot->node, &proto_list);
1790         write_unlock(&proto_list_lock);
1791         rc = 0;
1792 out:
1793         return rc;
1794 out_free_timewait_sock_slab_name:
1795         kfree(timewait_sock_slab_name);
1796 out_free_request_sock_slab:
1797         if (prot->rsk_prot && prot->rsk_prot->slab) {
1798                 kmem_cache_destroy(prot->rsk_prot->slab);
1799                 prot->rsk_prot->slab = NULL;
1800         }
1801 out_free_request_sock_slab_name:
1802         kfree(request_sock_slab_name);
1803 out_free_sock_slab:
1804         kmem_cache_destroy(prot->slab);
1805         prot->slab = NULL;
1806         goto out;
1807 }
1808
1809 EXPORT_SYMBOL(proto_register);
1810
1811 void proto_unregister(struct proto *prot)
1812 {
1813         write_lock(&proto_list_lock);
1814         list_del(&prot->node);
1815         write_unlock(&proto_list_lock);
1816
1817         if (prot->slab != NULL) {
1818                 kmem_cache_destroy(prot->slab);
1819                 prot->slab = NULL;
1820         }
1821
1822         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1823                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1824
1825                 kmem_cache_destroy(prot->rsk_prot->slab);
1826                 kfree(name);
1827                 prot->rsk_prot->slab = NULL;
1828         }
1829
1830         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1831                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1832
1833                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1834                 kfree(name);
1835                 prot->twsk_prot->twsk_slab = NULL;
1836         }
1837 }
1838
1839 EXPORT_SYMBOL(proto_unregister);
1840
1841 #ifdef CONFIG_PROC_FS
1842 static inline struct proto *__proto_head(void)
1843 {
1844         return list_entry(proto_list.next, struct proto, node);
1845 }
1846
1847 static inline struct proto *proto_head(void)
1848 {
1849         return list_empty(&proto_list) ? NULL : __proto_head();
1850 }
1851
1852 static inline struct proto *proto_next(struct proto *proto)
1853 {
1854         return proto->node.next == &proto_list ? NULL :
1855                 list_entry(proto->node.next, struct proto, node);
1856 }
1857
1858 static inline struct proto *proto_get_idx(loff_t pos)
1859 {
1860         struct proto *proto;
1861         loff_t i = 0;
1862
1863         list_for_each_entry(proto, &proto_list, node)
1864                 if (i++ == pos)
1865                         goto out;
1866
1867         proto = NULL;
1868 out:
1869         return proto;
1870 }
1871
1872 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1873 {
1874         read_lock(&proto_list_lock);
1875         return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1876 }
1877
1878 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1879 {
1880         ++*pos;
1881         return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1882 }
1883
1884 static void proto_seq_stop(struct seq_file *seq, void *v)
1885 {
1886         read_unlock(&proto_list_lock);
1887 }
1888
1889 static char proto_method_implemented(const void *method)
1890 {
1891         return method == NULL ? 'n' : 'y';
1892 }
1893
1894 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1895 {
1896         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1897                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1898                    proto->name,
1899                    proto->obj_size,
1900                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1901                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1902                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1903                    proto->max_header,
1904                    proto->slab == NULL ? "no" : "yes",
1905                    module_name(proto->owner),
1906                    proto_method_implemented(proto->close),
1907                    proto_method_implemented(proto->connect),
1908                    proto_method_implemented(proto->disconnect),
1909                    proto_method_implemented(proto->accept),
1910                    proto_method_implemented(proto->ioctl),
1911                    proto_method_implemented(proto->init),
1912                    proto_method_implemented(proto->destroy),
1913                    proto_method_implemented(proto->shutdown),
1914                    proto_method_implemented(proto->setsockopt),
1915                    proto_method_implemented(proto->getsockopt),
1916                    proto_method_implemented(proto->sendmsg),
1917                    proto_method_implemented(proto->recvmsg),
1918                    proto_method_implemented(proto->sendpage),
1919                    proto_method_implemented(proto->bind),
1920                    proto_method_implemented(proto->backlog_rcv),
1921                    proto_method_implemented(proto->hash),
1922                    proto_method_implemented(proto->unhash),
1923                    proto_method_implemented(proto->get_port),
1924                    proto_method_implemented(proto->enter_memory_pressure));
1925 }
1926
1927 static int proto_seq_show(struct seq_file *seq, void *v)
1928 {
1929         if (v == SEQ_START_TOKEN)
1930                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1931                            "protocol",
1932                            "size",
1933                            "sockets",
1934                            "memory",
1935                            "press",
1936                            "maxhdr",
1937                            "slab",
1938                            "module",
1939                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1940         else
1941                 proto_seq_printf(seq, v);
1942         return 0;
1943 }
1944
1945 static const struct seq_operations proto_seq_ops = {
1946         .start  = proto_seq_start,
1947         .next   = proto_seq_next,
1948         .stop   = proto_seq_stop,
1949         .show   = proto_seq_show,
1950 };
1951
1952 static int proto_seq_open(struct inode *inode, struct file *file)
1953 {
1954         return seq_open(file, &proto_seq_ops);
1955 }
1956
1957 static const struct file_operations proto_seq_fops = {
1958         .owner          = THIS_MODULE,
1959         .open           = proto_seq_open,
1960         .read           = seq_read,
1961         .llseek         = seq_lseek,
1962         .release        = seq_release,
1963 };
1964
1965 static int __init proto_init(void)
1966 {
1967         /* register /proc/net/protocols */
1968         return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1969 }
1970
1971 subsys_initcall(proto_init);
1972
1973 #endif /* PROC_FS */
1974
1975 EXPORT_SYMBOL(sk_alloc);
1976 EXPORT_SYMBOL(sk_free);
1977 EXPORT_SYMBOL(sk_send_sigurg);
1978 EXPORT_SYMBOL(sock_alloc_send_skb);
1979 EXPORT_SYMBOL(sock_init_data);
1980 EXPORT_SYMBOL(sock_kfree_s);
1981 EXPORT_SYMBOL(sock_kmalloc);
1982 EXPORT_SYMBOL(sock_no_accept);
1983 EXPORT_SYMBOL(sock_no_bind);
1984 EXPORT_SYMBOL(sock_no_connect);
1985 EXPORT_SYMBOL(sock_no_getname);
1986 EXPORT_SYMBOL(sock_no_getsockopt);
1987 EXPORT_SYMBOL(sock_no_ioctl);
1988 EXPORT_SYMBOL(sock_no_listen);
1989 EXPORT_SYMBOL(sock_no_mmap);
1990 EXPORT_SYMBOL(sock_no_poll);
1991 EXPORT_SYMBOL(sock_no_recvmsg);
1992 EXPORT_SYMBOL(sock_no_sendmsg);
1993 EXPORT_SYMBOL(sock_no_sendpage);
1994 EXPORT_SYMBOL(sock_no_setsockopt);
1995 EXPORT_SYMBOL(sock_no_shutdown);
1996 EXPORT_SYMBOL(sock_no_socketpair);
1997 EXPORT_SYMBOL(sock_rfree);
1998 EXPORT_SYMBOL(sock_setsockopt);
1999 EXPORT_SYMBOL(sock_wfree);
2000 EXPORT_SYMBOL(sock_wmalloc);
2001 EXPORT_SYMBOL(sock_i_uid);
2002 EXPORT_SYMBOL(sock_i_ino);
2003 EXPORT_SYMBOL(sysctl_optmem_max);
2004 #ifdef CONFIG_SYSCTL
2005 EXPORT_SYMBOL(sysctl_rmem_max);
2006 EXPORT_SYMBOL(sysctl_wmem_max);
2007 #endif