Merge git://git.infradead.org/mtd-2.6
[linux-2.6] / net / core / sock.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              Generic socket support routines. Memory allocators, socket lock/release
7  *              handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:     Ross Biro
13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *              Florian La Roche, <flla@stud.uni-sb.de>
15  *              Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *              Alan Cox        :       Numerous verify_area() problems
19  *              Alan Cox        :       Connecting on a connecting socket
20  *                                      now returns an error for tcp.
21  *              Alan Cox        :       sock->protocol is set correctly.
22  *                                      and is not sometimes left as 0.
23  *              Alan Cox        :       connect handles icmp errors on a
24  *                                      connect properly. Unfortunately there
25  *                                      is a restart syscall nasty there. I
26  *                                      can't match BSD without hacking the C
27  *                                      library. Ideas urgently sought!
28  *              Alan Cox        :       Disallow bind() to addresses that are
29  *                                      not ours - especially broadcast ones!!
30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32  *                                      instead they leave that for the DESTROY timer.
33  *              Alan Cox        :       Clean up error flag in accept
34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35  *                                      was buggy. Put a remove_sock() in the handler
36  *                                      for memory when we hit 0. Also altered the timer
37  *                                      code. The ACK stuff can wait and needs major
38  *                                      TCP layer surgery.
39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40  *                                      and fixed timer/inet_bh race.
41  *              Alan Cox        :       Added zapped flag for TCP
42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49  *      Pauline Middelink       :       identd support
50  *              Alan Cox        :       Fixed connect() taking signals I think.
51  *              Alan Cox        :       SO_LINGER supported
52  *              Alan Cox        :       Error reporting fixes
53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54  *              Alan Cox        :       inet sockets don't set sk->type!
55  *              Alan Cox        :       Split socket option code
56  *              Alan Cox        :       Callbacks
57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58  *              Alex            :       Removed restriction on inet fioctl
59  *              Alan Cox        :       Splitting INET from NET core
60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62  *              Alan Cox        :       Split IP from generic code
63  *              Alan Cox        :       New kfree_skbmem()
64  *              Alan Cox        :       Make SO_DEBUG superuser only.
65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66  *                                      (compatibility fix)
67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68  *              Alan Cox        :       Allocator for a socket is settable.
69  *              Alan Cox        :       SO_ERROR includes soft errors.
70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71  *              Alan Cox        :       Generic socket allocation to make hooks
72  *                                      easier (suggested by Craig Metz).
73  *              Michael Pall    :       SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81  *              Andi Kleen      :       Fix write_space callback
82  *              Chris Evans     :       Security fixes - signedness again
83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *              This program is free software; you can redistribute it and/or
89  *              modify it under the terms of the GNU General Public License
90  *              as published by the Free Software Foundation; either version
91  *              2 of the License, or (at your option) any later version.
92  */
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126
127 #include <linux/filter.h>
128
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132
133 /*
134  * Each address family might have different locking rules, so we have
135  * one slock key per address family:
136  */
137 static struct lock_class_key af_family_keys[AF_MAX];
138 static struct lock_class_key af_family_slock_keys[AF_MAX];
139
140 #ifdef CONFIG_DEBUG_LOCK_ALLOC
141 /*
142  * Make lock validator output more readable. (we pre-construct these
143  * strings build-time, so that runtime initialization of socket
144  * locks is fast):
145  */
146 static const char *af_family_key_strings[AF_MAX+1] = {
147   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
148   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
149   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
150   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
151   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
152   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
153   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
154   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
155   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
156   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
157   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
158   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
159 };
160 static const char *af_family_slock_key_strings[AF_MAX+1] = {
161   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
162   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
163   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
164   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
165   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
166   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
167   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
168   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
169   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
170   "slock-27"       , "slock-28"          , "slock-29"          ,
171   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
172   "slock-AF_RXRPC" , "slock-AF_MAX"
173 };
174 #endif
175
176 /*
177  * sk_callback_lock locking rules are per-address-family,
178  * so split the lock classes by using a per-AF key:
179  */
180 static struct lock_class_key af_callback_keys[AF_MAX];
181
182 /* Take into consideration the size of the struct sk_buff overhead in the
183  * determination of these values, since that is non-constant across
184  * platforms.  This makes socket queueing behavior and performance
185  * not depend upon such differences.
186  */
187 #define _SK_MEM_PACKETS         256
188 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
189 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
190 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
191
192 /* Run time adjustable parameters. */
193 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
194 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
195 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
196 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
197
198 /* Maximal space eaten by iovec or ancilliary data plus some space */
199 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
200
201 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
202 {
203         struct timeval tv;
204
205         if (optlen < sizeof(tv))
206                 return -EINVAL;
207         if (copy_from_user(&tv, optval, sizeof(tv)))
208                 return -EFAULT;
209         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
210                 return -EDOM;
211
212         if (tv.tv_sec < 0) {
213                 static int warned = 0;
214                 *timeo_p = 0;
215                 if (warned < 10 && net_ratelimit())
216                         warned++;
217                         printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
218                                "tries to set negative timeout\n",
219                                 current->comm, current->pid);
220                 return 0;
221         }
222         *timeo_p = MAX_SCHEDULE_TIMEOUT;
223         if (tv.tv_sec == 0 && tv.tv_usec == 0)
224                 return 0;
225         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
226                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
227         return 0;
228 }
229
230 static void sock_warn_obsolete_bsdism(const char *name)
231 {
232         static int warned;
233         static char warncomm[TASK_COMM_LEN];
234         if (strcmp(warncomm, current->comm) && warned < 5) {
235                 strcpy(warncomm,  current->comm);
236                 printk(KERN_WARNING "process `%s' is using obsolete "
237                        "%s SO_BSDCOMPAT\n", warncomm, name);
238                 warned++;
239         }
240 }
241
242 static void sock_disable_timestamp(struct sock *sk)
243 {
244         if (sock_flag(sk, SOCK_TIMESTAMP)) {
245                 sock_reset_flag(sk, SOCK_TIMESTAMP);
246                 net_disable_timestamp();
247         }
248 }
249
250
251 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
252 {
253         int err = 0;
254         int skb_len;
255
256         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
257            number of warnings when compiling with -W --ANK
258          */
259         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
260             (unsigned)sk->sk_rcvbuf) {
261                 err = -ENOMEM;
262                 goto out;
263         }
264
265         err = sk_filter(sk, skb);
266         if (err)
267                 goto out;
268
269         skb->dev = NULL;
270         skb_set_owner_r(skb, sk);
271
272         /* Cache the SKB length before we tack it onto the receive
273          * queue.  Once it is added it no longer belongs to us and
274          * may be freed by other threads of control pulling packets
275          * from the queue.
276          */
277         skb_len = skb->len;
278
279         skb_queue_tail(&sk->sk_receive_queue, skb);
280
281         if (!sock_flag(sk, SOCK_DEAD))
282                 sk->sk_data_ready(sk, skb_len);
283 out:
284         return err;
285 }
286 EXPORT_SYMBOL(sock_queue_rcv_skb);
287
288 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
289 {
290         int rc = NET_RX_SUCCESS;
291
292         if (sk_filter(sk, skb))
293                 goto discard_and_relse;
294
295         skb->dev = NULL;
296
297         if (nested)
298                 bh_lock_sock_nested(sk);
299         else
300                 bh_lock_sock(sk);
301         if (!sock_owned_by_user(sk)) {
302                 /*
303                  * trylock + unlock semantics:
304                  */
305                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
306
307                 rc = sk->sk_backlog_rcv(sk, skb);
308
309                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
310         } else
311                 sk_add_backlog(sk, skb);
312         bh_unlock_sock(sk);
313 out:
314         sock_put(sk);
315         return rc;
316 discard_and_relse:
317         kfree_skb(skb);
318         goto out;
319 }
320 EXPORT_SYMBOL(sk_receive_skb);
321
322 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
323 {
324         struct dst_entry *dst = sk->sk_dst_cache;
325
326         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
327                 sk->sk_dst_cache = NULL;
328                 dst_release(dst);
329                 return NULL;
330         }
331
332         return dst;
333 }
334 EXPORT_SYMBOL(__sk_dst_check);
335
336 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
337 {
338         struct dst_entry *dst = sk_dst_get(sk);
339
340         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
341                 sk_dst_reset(sk);
342                 dst_release(dst);
343                 return NULL;
344         }
345
346         return dst;
347 }
348 EXPORT_SYMBOL(sk_dst_check);
349
350 /*
351  *      This is meant for all protocols to use and covers goings on
352  *      at the socket level. Everything here is generic.
353  */
354
355 int sock_setsockopt(struct socket *sock, int level, int optname,
356                     char __user *optval, int optlen)
357 {
358         struct sock *sk=sock->sk;
359         struct sk_filter *filter;
360         int val;
361         int valbool;
362         struct linger ling;
363         int ret = 0;
364
365         /*
366          *      Options without arguments
367          */
368
369 #ifdef SO_DONTLINGER            /* Compatibility item... */
370         if (optname == SO_DONTLINGER) {
371                 lock_sock(sk);
372                 sock_reset_flag(sk, SOCK_LINGER);
373                 release_sock(sk);
374                 return 0;
375         }
376 #endif
377
378         if (optlen < sizeof(int))
379                 return -EINVAL;
380
381         if (get_user(val, (int __user *)optval))
382                 return -EFAULT;
383
384         valbool = val?1:0;
385
386         lock_sock(sk);
387
388         switch(optname) {
389         case SO_DEBUG:
390                 if (val && !capable(CAP_NET_ADMIN)) {
391                         ret = -EACCES;
392                 }
393                 else if (valbool)
394                         sock_set_flag(sk, SOCK_DBG);
395                 else
396                         sock_reset_flag(sk, SOCK_DBG);
397                 break;
398         case SO_REUSEADDR:
399                 sk->sk_reuse = valbool;
400                 break;
401         case SO_TYPE:
402         case SO_ERROR:
403                 ret = -ENOPROTOOPT;
404                 break;
405         case SO_DONTROUTE:
406                 if (valbool)
407                         sock_set_flag(sk, SOCK_LOCALROUTE);
408                 else
409                         sock_reset_flag(sk, SOCK_LOCALROUTE);
410                 break;
411         case SO_BROADCAST:
412                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
413                 break;
414         case SO_SNDBUF:
415                 /* Don't error on this BSD doesn't and if you think
416                    about it this is right. Otherwise apps have to
417                    play 'guess the biggest size' games. RCVBUF/SNDBUF
418                    are treated in BSD as hints */
419
420                 if (val > sysctl_wmem_max)
421                         val = sysctl_wmem_max;
422 set_sndbuf:
423                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
424                 if ((val * 2) < SOCK_MIN_SNDBUF)
425                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
426                 else
427                         sk->sk_sndbuf = val * 2;
428
429                 /*
430                  *      Wake up sending tasks if we
431                  *      upped the value.
432                  */
433                 sk->sk_write_space(sk);
434                 break;
435
436         case SO_SNDBUFFORCE:
437                 if (!capable(CAP_NET_ADMIN)) {
438                         ret = -EPERM;
439                         break;
440                 }
441                 goto set_sndbuf;
442
443         case SO_RCVBUF:
444                 /* Don't error on this BSD doesn't and if you think
445                    about it this is right. Otherwise apps have to
446                    play 'guess the biggest size' games. RCVBUF/SNDBUF
447                    are treated in BSD as hints */
448
449                 if (val > sysctl_rmem_max)
450                         val = sysctl_rmem_max;
451 set_rcvbuf:
452                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
453                 /*
454                  * We double it on the way in to account for
455                  * "struct sk_buff" etc. overhead.   Applications
456                  * assume that the SO_RCVBUF setting they make will
457                  * allow that much actual data to be received on that
458                  * socket.
459                  *
460                  * Applications are unaware that "struct sk_buff" and
461                  * other overheads allocate from the receive buffer
462                  * during socket buffer allocation.
463                  *
464                  * And after considering the possible alternatives,
465                  * returning the value we actually used in getsockopt
466                  * is the most desirable behavior.
467                  */
468                 if ((val * 2) < SOCK_MIN_RCVBUF)
469                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
470                 else
471                         sk->sk_rcvbuf = val * 2;
472                 break;
473
474         case SO_RCVBUFFORCE:
475                 if (!capable(CAP_NET_ADMIN)) {
476                         ret = -EPERM;
477                         break;
478                 }
479                 goto set_rcvbuf;
480
481         case SO_KEEPALIVE:
482 #ifdef CONFIG_INET
483                 if (sk->sk_protocol == IPPROTO_TCP)
484                         tcp_set_keepalive(sk, valbool);
485 #endif
486                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
487                 break;
488
489         case SO_OOBINLINE:
490                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
491                 break;
492
493         case SO_NO_CHECK:
494                 sk->sk_no_check = valbool;
495                 break;
496
497         case SO_PRIORITY:
498                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
499                         sk->sk_priority = val;
500                 else
501                         ret = -EPERM;
502                 break;
503
504         case SO_LINGER:
505                 if (optlen < sizeof(ling)) {
506                         ret = -EINVAL;  /* 1003.1g */
507                         break;
508                 }
509                 if (copy_from_user(&ling,optval,sizeof(ling))) {
510                         ret = -EFAULT;
511                         break;
512                 }
513                 if (!ling.l_onoff)
514                         sock_reset_flag(sk, SOCK_LINGER);
515                 else {
516 #if (BITS_PER_LONG == 32)
517                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
518                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
519                         else
520 #endif
521                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
522                         sock_set_flag(sk, SOCK_LINGER);
523                 }
524                 break;
525
526         case SO_BSDCOMPAT:
527                 sock_warn_obsolete_bsdism("setsockopt");
528                 break;
529
530         case SO_PASSCRED:
531                 if (valbool)
532                         set_bit(SOCK_PASSCRED, &sock->flags);
533                 else
534                         clear_bit(SOCK_PASSCRED, &sock->flags);
535                 break;
536
537         case SO_TIMESTAMP:
538         case SO_TIMESTAMPNS:
539                 if (valbool)  {
540                         if (optname == SO_TIMESTAMP)
541                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
542                         else
543                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
544                         sock_set_flag(sk, SOCK_RCVTSTAMP);
545                         sock_enable_timestamp(sk);
546                 } else {
547                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
548                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
549                 }
550                 break;
551
552         case SO_RCVLOWAT:
553                 if (val < 0)
554                         val = INT_MAX;
555                 sk->sk_rcvlowat = val ? : 1;
556                 break;
557
558         case SO_RCVTIMEO:
559                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
560                 break;
561
562         case SO_SNDTIMEO:
563                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
564                 break;
565
566 #ifdef CONFIG_NETDEVICES
567         case SO_BINDTODEVICE:
568         {
569                 char devname[IFNAMSIZ];
570
571                 /* Sorry... */
572                 if (!capable(CAP_NET_RAW)) {
573                         ret = -EPERM;
574                         break;
575                 }
576
577                 /* Bind this socket to a particular device like "eth0",
578                  * as specified in the passed interface name. If the
579                  * name is "" or the option length is zero the socket
580                  * is not bound.
581                  */
582
583                 if (!valbool) {
584                         sk->sk_bound_dev_if = 0;
585                 } else {
586                         if (optlen > IFNAMSIZ - 1)
587                                 optlen = IFNAMSIZ - 1;
588                         memset(devname, 0, sizeof(devname));
589                         if (copy_from_user(devname, optval, optlen)) {
590                                 ret = -EFAULT;
591                                 break;
592                         }
593
594                         /* Remove any cached route for this socket. */
595                         sk_dst_reset(sk);
596
597                         if (devname[0] == '\0') {
598                                 sk->sk_bound_dev_if = 0;
599                         } else {
600                                 struct net_device *dev = dev_get_by_name(devname);
601                                 if (!dev) {
602                                         ret = -ENODEV;
603                                         break;
604                                 }
605                                 sk->sk_bound_dev_if = dev->ifindex;
606                                 dev_put(dev);
607                         }
608                 }
609                 break;
610         }
611 #endif
612
613
614         case SO_ATTACH_FILTER:
615                 ret = -EINVAL;
616                 if (optlen == sizeof(struct sock_fprog)) {
617                         struct sock_fprog fprog;
618
619                         ret = -EFAULT;
620                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
621                                 break;
622
623                         ret = sk_attach_filter(&fprog, sk);
624                 }
625                 break;
626
627         case SO_DETACH_FILTER:
628                 rcu_read_lock_bh();
629                 filter = rcu_dereference(sk->sk_filter);
630                 if (filter) {
631                         rcu_assign_pointer(sk->sk_filter, NULL);
632                         sk_filter_release(sk, filter);
633                         rcu_read_unlock_bh();
634                         break;
635                 }
636                 rcu_read_unlock_bh();
637                 ret = -ENONET;
638                 break;
639
640         case SO_PASSSEC:
641                 if (valbool)
642                         set_bit(SOCK_PASSSEC, &sock->flags);
643                 else
644                         clear_bit(SOCK_PASSSEC, &sock->flags);
645                 break;
646
647                 /* We implement the SO_SNDLOWAT etc to
648                    not be settable (1003.1g 5.3) */
649         default:
650                 ret = -ENOPROTOOPT;
651                 break;
652         }
653         release_sock(sk);
654         return ret;
655 }
656
657
658 int sock_getsockopt(struct socket *sock, int level, int optname,
659                     char __user *optval, int __user *optlen)
660 {
661         struct sock *sk = sock->sk;
662
663         union {
664                 int val;
665                 struct linger ling;
666                 struct timeval tm;
667         } v;
668
669         unsigned int lv = sizeof(int);
670         int len;
671
672         if (get_user(len, optlen))
673                 return -EFAULT;
674         if (len < 0)
675                 return -EINVAL;
676
677         switch(optname) {
678         case SO_DEBUG:
679                 v.val = sock_flag(sk, SOCK_DBG);
680                 break;
681
682         case SO_DONTROUTE:
683                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
684                 break;
685
686         case SO_BROADCAST:
687                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
688                 break;
689
690         case SO_SNDBUF:
691                 v.val = sk->sk_sndbuf;
692                 break;
693
694         case SO_RCVBUF:
695                 v.val = sk->sk_rcvbuf;
696                 break;
697
698         case SO_REUSEADDR:
699                 v.val = sk->sk_reuse;
700                 break;
701
702         case SO_KEEPALIVE:
703                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
704                 break;
705
706         case SO_TYPE:
707                 v.val = sk->sk_type;
708                 break;
709
710         case SO_ERROR:
711                 v.val = -sock_error(sk);
712                 if (v.val==0)
713                         v.val = xchg(&sk->sk_err_soft, 0);
714                 break;
715
716         case SO_OOBINLINE:
717                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
718                 break;
719
720         case SO_NO_CHECK:
721                 v.val = sk->sk_no_check;
722                 break;
723
724         case SO_PRIORITY:
725                 v.val = sk->sk_priority;
726                 break;
727
728         case SO_LINGER:
729                 lv              = sizeof(v.ling);
730                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
731                 v.ling.l_linger = sk->sk_lingertime / HZ;
732                 break;
733
734         case SO_BSDCOMPAT:
735                 sock_warn_obsolete_bsdism("getsockopt");
736                 break;
737
738         case SO_TIMESTAMP:
739                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
740                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
741                 break;
742
743         case SO_TIMESTAMPNS:
744                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
745                 break;
746
747         case SO_RCVTIMEO:
748                 lv=sizeof(struct timeval);
749                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
750                         v.tm.tv_sec = 0;
751                         v.tm.tv_usec = 0;
752                 } else {
753                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
754                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
755                 }
756                 break;
757
758         case SO_SNDTIMEO:
759                 lv=sizeof(struct timeval);
760                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
761                         v.tm.tv_sec = 0;
762                         v.tm.tv_usec = 0;
763                 } else {
764                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
765                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
766                 }
767                 break;
768
769         case SO_RCVLOWAT:
770                 v.val = sk->sk_rcvlowat;
771                 break;
772
773         case SO_SNDLOWAT:
774                 v.val=1;
775                 break;
776
777         case SO_PASSCRED:
778                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
779                 break;
780
781         case SO_PEERCRED:
782                 if (len > sizeof(sk->sk_peercred))
783                         len = sizeof(sk->sk_peercred);
784                 if (copy_to_user(optval, &sk->sk_peercred, len))
785                         return -EFAULT;
786                 goto lenout;
787
788         case SO_PEERNAME:
789         {
790                 char address[128];
791
792                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
793                         return -ENOTCONN;
794                 if (lv < len)
795                         return -EINVAL;
796                 if (copy_to_user(optval, address, len))
797                         return -EFAULT;
798                 goto lenout;
799         }
800
801         /* Dubious BSD thing... Probably nobody even uses it, but
802          * the UNIX standard wants it for whatever reason... -DaveM
803          */
804         case SO_ACCEPTCONN:
805                 v.val = sk->sk_state == TCP_LISTEN;
806                 break;
807
808         case SO_PASSSEC:
809                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
810                 break;
811
812         case SO_PEERSEC:
813                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
814
815         default:
816                 return -ENOPROTOOPT;
817         }
818
819         if (len > lv)
820                 len = lv;
821         if (copy_to_user(optval, &v, len))
822                 return -EFAULT;
823 lenout:
824         if (put_user(len, optlen))
825                 return -EFAULT;
826         return 0;
827 }
828
829 /*
830  * Initialize an sk_lock.
831  *
832  * (We also register the sk_lock with the lock validator.)
833  */
834 static inline void sock_lock_init(struct sock *sk)
835 {
836         sock_lock_init_class_and_name(sk,
837                         af_family_slock_key_strings[sk->sk_family],
838                         af_family_slock_keys + sk->sk_family,
839                         af_family_key_strings[sk->sk_family],
840                         af_family_keys + sk->sk_family);
841 }
842
843 /**
844  *      sk_alloc - All socket objects are allocated here
845  *      @family: protocol family
846  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
847  *      @prot: struct proto associated with this new sock instance
848  *      @zero_it: if we should zero the newly allocated sock
849  */
850 struct sock *sk_alloc(int family, gfp_t priority,
851                       struct proto *prot, int zero_it)
852 {
853         struct sock *sk = NULL;
854         struct kmem_cache *slab = prot->slab;
855
856         if (slab != NULL)
857                 sk = kmem_cache_alloc(slab, priority);
858         else
859                 sk = kmalloc(prot->obj_size, priority);
860
861         if (sk) {
862                 if (zero_it) {
863                         memset(sk, 0, prot->obj_size);
864                         sk->sk_family = family;
865                         /*
866                          * See comment in struct sock definition to understand
867                          * why we need sk_prot_creator -acme
868                          */
869                         sk->sk_prot = sk->sk_prot_creator = prot;
870                         sock_lock_init(sk);
871                 }
872
873                 if (security_sk_alloc(sk, family, priority))
874                         goto out_free;
875
876                 if (!try_module_get(prot->owner))
877                         goto out_free;
878         }
879         return sk;
880
881 out_free:
882         if (slab != NULL)
883                 kmem_cache_free(slab, sk);
884         else
885                 kfree(sk);
886         return NULL;
887 }
888
889 void sk_free(struct sock *sk)
890 {
891         struct sk_filter *filter;
892         struct module *owner = sk->sk_prot_creator->owner;
893
894         if (sk->sk_destruct)
895                 sk->sk_destruct(sk);
896
897         filter = rcu_dereference(sk->sk_filter);
898         if (filter) {
899                 sk_filter_release(sk, filter);
900                 rcu_assign_pointer(sk->sk_filter, NULL);
901         }
902
903         sock_disable_timestamp(sk);
904
905         if (atomic_read(&sk->sk_omem_alloc))
906                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
907                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
908
909         security_sk_free(sk);
910         if (sk->sk_prot_creator->slab != NULL)
911                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
912         else
913                 kfree(sk);
914         module_put(owner);
915 }
916
917 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
918 {
919         struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
920
921         if (newsk != NULL) {
922                 struct sk_filter *filter;
923
924                 sock_copy(newsk, sk);
925
926                 /* SANITY */
927                 sk_node_init(&newsk->sk_node);
928                 sock_lock_init(newsk);
929                 bh_lock_sock(newsk);
930                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
931
932                 atomic_set(&newsk->sk_rmem_alloc, 0);
933                 atomic_set(&newsk->sk_wmem_alloc, 0);
934                 atomic_set(&newsk->sk_omem_alloc, 0);
935                 skb_queue_head_init(&newsk->sk_receive_queue);
936                 skb_queue_head_init(&newsk->sk_write_queue);
937 #ifdef CONFIG_NET_DMA
938                 skb_queue_head_init(&newsk->sk_async_wait_queue);
939 #endif
940
941                 rwlock_init(&newsk->sk_dst_lock);
942                 rwlock_init(&newsk->sk_callback_lock);
943                 lockdep_set_class(&newsk->sk_callback_lock,
944                                    af_callback_keys + newsk->sk_family);
945
946                 newsk->sk_dst_cache     = NULL;
947                 newsk->sk_wmem_queued   = 0;
948                 newsk->sk_forward_alloc = 0;
949                 newsk->sk_send_head     = NULL;
950                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
951
952                 sock_reset_flag(newsk, SOCK_DONE);
953                 skb_queue_head_init(&newsk->sk_error_queue);
954
955                 filter = newsk->sk_filter;
956                 if (filter != NULL)
957                         sk_filter_charge(newsk, filter);
958
959                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
960                         /* It is still raw copy of parent, so invalidate
961                          * destructor and make plain sk_free() */
962                         newsk->sk_destruct = NULL;
963                         sk_free(newsk);
964                         newsk = NULL;
965                         goto out;
966                 }
967
968                 newsk->sk_err      = 0;
969                 newsk->sk_priority = 0;
970                 atomic_set(&newsk->sk_refcnt, 2);
971
972                 /*
973                  * Increment the counter in the same struct proto as the master
974                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
975                  * is the same as sk->sk_prot->socks, as this field was copied
976                  * with memcpy).
977                  *
978                  * This _changes_ the previous behaviour, where
979                  * tcp_create_openreq_child always was incrementing the
980                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
981                  * to be taken into account in all callers. -acme
982                  */
983                 sk_refcnt_debug_inc(newsk);
984                 newsk->sk_socket = NULL;
985                 newsk->sk_sleep  = NULL;
986
987                 if (newsk->sk_prot->sockets_allocated)
988                         atomic_inc(newsk->sk_prot->sockets_allocated);
989         }
990 out:
991         return newsk;
992 }
993
994 EXPORT_SYMBOL_GPL(sk_clone);
995
996 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
997 {
998         __sk_dst_set(sk, dst);
999         sk->sk_route_caps = dst->dev->features;
1000         if (sk->sk_route_caps & NETIF_F_GSO)
1001                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1002         if (sk_can_gso(sk)) {
1003                 if (dst->header_len)
1004                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1005                 else
1006                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1007         }
1008 }
1009 EXPORT_SYMBOL_GPL(sk_setup_caps);
1010
1011 void __init sk_init(void)
1012 {
1013         if (num_physpages <= 4096) {
1014                 sysctl_wmem_max = 32767;
1015                 sysctl_rmem_max = 32767;
1016                 sysctl_wmem_default = 32767;
1017                 sysctl_rmem_default = 32767;
1018         } else if (num_physpages >= 131072) {
1019                 sysctl_wmem_max = 131071;
1020                 sysctl_rmem_max = 131071;
1021         }
1022 }
1023
1024 /*
1025  *      Simple resource managers for sockets.
1026  */
1027
1028
1029 /*
1030  * Write buffer destructor automatically called from kfree_skb.
1031  */
1032 void sock_wfree(struct sk_buff *skb)
1033 {
1034         struct sock *sk = skb->sk;
1035
1036         /* In case it might be waiting for more memory. */
1037         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1038         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1039                 sk->sk_write_space(sk);
1040         sock_put(sk);
1041 }
1042
1043 /*
1044  * Read buffer destructor automatically called from kfree_skb.
1045  */
1046 void sock_rfree(struct sk_buff *skb)
1047 {
1048         struct sock *sk = skb->sk;
1049
1050         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1051 }
1052
1053
1054 int sock_i_uid(struct sock *sk)
1055 {
1056         int uid;
1057
1058         read_lock(&sk->sk_callback_lock);
1059         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1060         read_unlock(&sk->sk_callback_lock);
1061         return uid;
1062 }
1063
1064 unsigned long sock_i_ino(struct sock *sk)
1065 {
1066         unsigned long ino;
1067
1068         read_lock(&sk->sk_callback_lock);
1069         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1070         read_unlock(&sk->sk_callback_lock);
1071         return ino;
1072 }
1073
1074 /*
1075  * Allocate a skb from the socket's send buffer.
1076  */
1077 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1078                              gfp_t priority)
1079 {
1080         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1081                 struct sk_buff * skb = alloc_skb(size, priority);
1082                 if (skb) {
1083                         skb_set_owner_w(skb, sk);
1084                         return skb;
1085                 }
1086         }
1087         return NULL;
1088 }
1089
1090 /*
1091  * Allocate a skb from the socket's receive buffer.
1092  */
1093 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1094                              gfp_t priority)
1095 {
1096         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1097                 struct sk_buff *skb = alloc_skb(size, priority);
1098                 if (skb) {
1099                         skb_set_owner_r(skb, sk);
1100                         return skb;
1101                 }
1102         }
1103         return NULL;
1104 }
1105
1106 /*
1107  * Allocate a memory block from the socket's option memory buffer.
1108  */
1109 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1110 {
1111         if ((unsigned)size <= sysctl_optmem_max &&
1112             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1113                 void *mem;
1114                 /* First do the add, to avoid the race if kmalloc
1115                  * might sleep.
1116                  */
1117                 atomic_add(size, &sk->sk_omem_alloc);
1118                 mem = kmalloc(size, priority);
1119                 if (mem)
1120                         return mem;
1121                 atomic_sub(size, &sk->sk_omem_alloc);
1122         }
1123         return NULL;
1124 }
1125
1126 /*
1127  * Free an option memory block.
1128  */
1129 void sock_kfree_s(struct sock *sk, void *mem, int size)
1130 {
1131         kfree(mem);
1132         atomic_sub(size, &sk->sk_omem_alloc);
1133 }
1134
1135 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1136    I think, these locks should be removed for datagram sockets.
1137  */
1138 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1139 {
1140         DEFINE_WAIT(wait);
1141
1142         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1143         for (;;) {
1144                 if (!timeo)
1145                         break;
1146                 if (signal_pending(current))
1147                         break;
1148                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1149                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1150                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1151                         break;
1152                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1153                         break;
1154                 if (sk->sk_err)
1155                         break;
1156                 timeo = schedule_timeout(timeo);
1157         }
1158         finish_wait(sk->sk_sleep, &wait);
1159         return timeo;
1160 }
1161
1162
1163 /*
1164  *      Generic send/receive buffer handlers
1165  */
1166
1167 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1168                                             unsigned long header_len,
1169                                             unsigned long data_len,
1170                                             int noblock, int *errcode)
1171 {
1172         struct sk_buff *skb;
1173         gfp_t gfp_mask;
1174         long timeo;
1175         int err;
1176
1177         gfp_mask = sk->sk_allocation;
1178         if (gfp_mask & __GFP_WAIT)
1179                 gfp_mask |= __GFP_REPEAT;
1180
1181         timeo = sock_sndtimeo(sk, noblock);
1182         while (1) {
1183                 err = sock_error(sk);
1184                 if (err != 0)
1185                         goto failure;
1186
1187                 err = -EPIPE;
1188                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1189                         goto failure;
1190
1191                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1192                         skb = alloc_skb(header_len, gfp_mask);
1193                         if (skb) {
1194                                 int npages;
1195                                 int i;
1196
1197                                 /* No pages, we're done... */
1198                                 if (!data_len)
1199                                         break;
1200
1201                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1202                                 skb->truesize += data_len;
1203                                 skb_shinfo(skb)->nr_frags = npages;
1204                                 for (i = 0; i < npages; i++) {
1205                                         struct page *page;
1206                                         skb_frag_t *frag;
1207
1208                                         page = alloc_pages(sk->sk_allocation, 0);
1209                                         if (!page) {
1210                                                 err = -ENOBUFS;
1211                                                 skb_shinfo(skb)->nr_frags = i;
1212                                                 kfree_skb(skb);
1213                                                 goto failure;
1214                                         }
1215
1216                                         frag = &skb_shinfo(skb)->frags[i];
1217                                         frag->page = page;
1218                                         frag->page_offset = 0;
1219                                         frag->size = (data_len >= PAGE_SIZE ?
1220                                                       PAGE_SIZE :
1221                                                       data_len);
1222                                         data_len -= PAGE_SIZE;
1223                                 }
1224
1225                                 /* Full success... */
1226                                 break;
1227                         }
1228                         err = -ENOBUFS;
1229                         goto failure;
1230                 }
1231                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1232                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1233                 err = -EAGAIN;
1234                 if (!timeo)
1235                         goto failure;
1236                 if (signal_pending(current))
1237                         goto interrupted;
1238                 timeo = sock_wait_for_wmem(sk, timeo);
1239         }
1240
1241         skb_set_owner_w(skb, sk);
1242         return skb;
1243
1244 interrupted:
1245         err = sock_intr_errno(timeo);
1246 failure:
1247         *errcode = err;
1248         return NULL;
1249 }
1250
1251 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1252                                     int noblock, int *errcode)
1253 {
1254         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1255 }
1256
1257 static void __lock_sock(struct sock *sk)
1258 {
1259         DEFINE_WAIT(wait);
1260
1261         for (;;) {
1262                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1263                                         TASK_UNINTERRUPTIBLE);
1264                 spin_unlock_bh(&sk->sk_lock.slock);
1265                 schedule();
1266                 spin_lock_bh(&sk->sk_lock.slock);
1267                 if (!sock_owned_by_user(sk))
1268                         break;
1269         }
1270         finish_wait(&sk->sk_lock.wq, &wait);
1271 }
1272
1273 static void __release_sock(struct sock *sk)
1274 {
1275         struct sk_buff *skb = sk->sk_backlog.head;
1276
1277         do {
1278                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1279                 bh_unlock_sock(sk);
1280
1281                 do {
1282                         struct sk_buff *next = skb->next;
1283
1284                         skb->next = NULL;
1285                         sk->sk_backlog_rcv(sk, skb);
1286
1287                         /*
1288                          * We are in process context here with softirqs
1289                          * disabled, use cond_resched_softirq() to preempt.
1290                          * This is safe to do because we've taken the backlog
1291                          * queue private:
1292                          */
1293                         cond_resched_softirq();
1294
1295                         skb = next;
1296                 } while (skb != NULL);
1297
1298                 bh_lock_sock(sk);
1299         } while ((skb = sk->sk_backlog.head) != NULL);
1300 }
1301
1302 /**
1303  * sk_wait_data - wait for data to arrive at sk_receive_queue
1304  * @sk:    sock to wait on
1305  * @timeo: for how long
1306  *
1307  * Now socket state including sk->sk_err is changed only under lock,
1308  * hence we may omit checks after joining wait queue.
1309  * We check receive queue before schedule() only as optimization;
1310  * it is very likely that release_sock() added new data.
1311  */
1312 int sk_wait_data(struct sock *sk, long *timeo)
1313 {
1314         int rc;
1315         DEFINE_WAIT(wait);
1316
1317         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1318         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1319         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1320         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1321         finish_wait(sk->sk_sleep, &wait);
1322         return rc;
1323 }
1324
1325 EXPORT_SYMBOL(sk_wait_data);
1326
1327 /*
1328  * Set of default routines for initialising struct proto_ops when
1329  * the protocol does not support a particular function. In certain
1330  * cases where it makes no sense for a protocol to have a "do nothing"
1331  * function, some default processing is provided.
1332  */
1333
1334 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1335 {
1336         return -EOPNOTSUPP;
1337 }
1338
1339 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1340                     int len, int flags)
1341 {
1342         return -EOPNOTSUPP;
1343 }
1344
1345 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1346 {
1347         return -EOPNOTSUPP;
1348 }
1349
1350 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1351 {
1352         return -EOPNOTSUPP;
1353 }
1354
1355 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1356                     int *len, int peer)
1357 {
1358         return -EOPNOTSUPP;
1359 }
1360
1361 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1362 {
1363         return 0;
1364 }
1365
1366 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1367 {
1368         return -EOPNOTSUPP;
1369 }
1370
1371 int sock_no_listen(struct socket *sock, int backlog)
1372 {
1373         return -EOPNOTSUPP;
1374 }
1375
1376 int sock_no_shutdown(struct socket *sock, int how)
1377 {
1378         return -EOPNOTSUPP;
1379 }
1380
1381 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1382                     char __user *optval, int optlen)
1383 {
1384         return -EOPNOTSUPP;
1385 }
1386
1387 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1388                     char __user *optval, int __user *optlen)
1389 {
1390         return -EOPNOTSUPP;
1391 }
1392
1393 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1394                     size_t len)
1395 {
1396         return -EOPNOTSUPP;
1397 }
1398
1399 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1400                     size_t len, int flags)
1401 {
1402         return -EOPNOTSUPP;
1403 }
1404
1405 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1406 {
1407         /* Mirror missing mmap method error code */
1408         return -ENODEV;
1409 }
1410
1411 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1412 {
1413         ssize_t res;
1414         struct msghdr msg = {.msg_flags = flags};
1415         struct kvec iov;
1416         char *kaddr = kmap(page);
1417         iov.iov_base = kaddr + offset;
1418         iov.iov_len = size;
1419         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1420         kunmap(page);
1421         return res;
1422 }
1423
1424 /*
1425  *      Default Socket Callbacks
1426  */
1427
1428 static void sock_def_wakeup(struct sock *sk)
1429 {
1430         read_lock(&sk->sk_callback_lock);
1431         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1432                 wake_up_interruptible_all(sk->sk_sleep);
1433         read_unlock(&sk->sk_callback_lock);
1434 }
1435
1436 static void sock_def_error_report(struct sock *sk)
1437 {
1438         read_lock(&sk->sk_callback_lock);
1439         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1440                 wake_up_interruptible(sk->sk_sleep);
1441         sk_wake_async(sk,0,POLL_ERR);
1442         read_unlock(&sk->sk_callback_lock);
1443 }
1444
1445 static void sock_def_readable(struct sock *sk, int len)
1446 {
1447         read_lock(&sk->sk_callback_lock);
1448         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1449                 wake_up_interruptible(sk->sk_sleep);
1450         sk_wake_async(sk,1,POLL_IN);
1451         read_unlock(&sk->sk_callback_lock);
1452 }
1453
1454 static void sock_def_write_space(struct sock *sk)
1455 {
1456         read_lock(&sk->sk_callback_lock);
1457
1458         /* Do not wake up a writer until he can make "significant"
1459          * progress.  --DaveM
1460          */
1461         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1462                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1463                         wake_up_interruptible(sk->sk_sleep);
1464
1465                 /* Should agree with poll, otherwise some programs break */
1466                 if (sock_writeable(sk))
1467                         sk_wake_async(sk, 2, POLL_OUT);
1468         }
1469
1470         read_unlock(&sk->sk_callback_lock);
1471 }
1472
1473 static void sock_def_destruct(struct sock *sk)
1474 {
1475         kfree(sk->sk_protinfo);
1476 }
1477
1478 void sk_send_sigurg(struct sock *sk)
1479 {
1480         if (sk->sk_socket && sk->sk_socket->file)
1481                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1482                         sk_wake_async(sk, 3, POLL_PRI);
1483 }
1484
1485 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1486                     unsigned long expires)
1487 {
1488         if (!mod_timer(timer, expires))
1489                 sock_hold(sk);
1490 }
1491
1492 EXPORT_SYMBOL(sk_reset_timer);
1493
1494 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1495 {
1496         if (timer_pending(timer) && del_timer(timer))
1497                 __sock_put(sk);
1498 }
1499
1500 EXPORT_SYMBOL(sk_stop_timer);
1501
1502 void sock_init_data(struct socket *sock, struct sock *sk)
1503 {
1504         skb_queue_head_init(&sk->sk_receive_queue);
1505         skb_queue_head_init(&sk->sk_write_queue);
1506         skb_queue_head_init(&sk->sk_error_queue);
1507 #ifdef CONFIG_NET_DMA
1508         skb_queue_head_init(&sk->sk_async_wait_queue);
1509 #endif
1510
1511         sk->sk_send_head        =       NULL;
1512
1513         init_timer(&sk->sk_timer);
1514
1515         sk->sk_allocation       =       GFP_KERNEL;
1516         sk->sk_rcvbuf           =       sysctl_rmem_default;
1517         sk->sk_sndbuf           =       sysctl_wmem_default;
1518         sk->sk_state            =       TCP_CLOSE;
1519         sk->sk_socket           =       sock;
1520
1521         sock_set_flag(sk, SOCK_ZAPPED);
1522
1523         if (sock) {
1524                 sk->sk_type     =       sock->type;
1525                 sk->sk_sleep    =       &sock->wait;
1526                 sock->sk        =       sk;
1527         } else
1528                 sk->sk_sleep    =       NULL;
1529
1530         rwlock_init(&sk->sk_dst_lock);
1531         rwlock_init(&sk->sk_callback_lock);
1532         lockdep_set_class(&sk->sk_callback_lock,
1533                            af_callback_keys + sk->sk_family);
1534
1535         sk->sk_state_change     =       sock_def_wakeup;
1536         sk->sk_data_ready       =       sock_def_readable;
1537         sk->sk_write_space      =       sock_def_write_space;
1538         sk->sk_error_report     =       sock_def_error_report;
1539         sk->sk_destruct         =       sock_def_destruct;
1540
1541         sk->sk_sndmsg_page      =       NULL;
1542         sk->sk_sndmsg_off       =       0;
1543
1544         sk->sk_peercred.pid     =       0;
1545         sk->sk_peercred.uid     =       -1;
1546         sk->sk_peercred.gid     =       -1;
1547         sk->sk_write_pending    =       0;
1548         sk->sk_rcvlowat         =       1;
1549         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1550         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1551
1552         sk->sk_stamp = ktime_set(-1L, -1L);
1553
1554         atomic_set(&sk->sk_refcnt, 1);
1555 }
1556
1557 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1558 {
1559         might_sleep();
1560         spin_lock_bh(&sk->sk_lock.slock);
1561         if (sk->sk_lock.owner)
1562                 __lock_sock(sk);
1563         sk->sk_lock.owner = (void *)1;
1564         spin_unlock(&sk->sk_lock.slock);
1565         /*
1566          * The sk_lock has mutex_lock() semantics here:
1567          */
1568         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1569         local_bh_enable();
1570 }
1571
1572 EXPORT_SYMBOL(lock_sock_nested);
1573
1574 void fastcall release_sock(struct sock *sk)
1575 {
1576         /*
1577          * The sk_lock has mutex_unlock() semantics:
1578          */
1579         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1580
1581         spin_lock_bh(&sk->sk_lock.slock);
1582         if (sk->sk_backlog.tail)
1583                 __release_sock(sk);
1584         sk->sk_lock.owner = NULL;
1585         if (waitqueue_active(&sk->sk_lock.wq))
1586                 wake_up(&sk->sk_lock.wq);
1587         spin_unlock_bh(&sk->sk_lock.slock);
1588 }
1589 EXPORT_SYMBOL(release_sock);
1590
1591 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1592 {
1593         struct timeval tv;
1594         if (!sock_flag(sk, SOCK_TIMESTAMP))
1595                 sock_enable_timestamp(sk);
1596         tv = ktime_to_timeval(sk->sk_stamp);
1597         if (tv.tv_sec == -1)
1598                 return -ENOENT;
1599         if (tv.tv_sec == 0) {
1600                 sk->sk_stamp = ktime_get_real();
1601                 tv = ktime_to_timeval(sk->sk_stamp);
1602         }
1603         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1604 }
1605 EXPORT_SYMBOL(sock_get_timestamp);
1606
1607 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1608 {
1609         struct timespec ts;
1610         if (!sock_flag(sk, SOCK_TIMESTAMP))
1611                 sock_enable_timestamp(sk);
1612         ts = ktime_to_timespec(sk->sk_stamp);
1613         if (ts.tv_sec == -1)
1614                 return -ENOENT;
1615         if (ts.tv_sec == 0) {
1616                 sk->sk_stamp = ktime_get_real();
1617                 ts = ktime_to_timespec(sk->sk_stamp);
1618         }
1619         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1620 }
1621 EXPORT_SYMBOL(sock_get_timestampns);
1622
1623 void sock_enable_timestamp(struct sock *sk)
1624 {
1625         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1626                 sock_set_flag(sk, SOCK_TIMESTAMP);
1627                 net_enable_timestamp();
1628         }
1629 }
1630 EXPORT_SYMBOL(sock_enable_timestamp);
1631
1632 /*
1633  *      Get a socket option on an socket.
1634  *
1635  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1636  *      asynchronous errors should be reported by getsockopt. We assume
1637  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1638  */
1639 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1640                            char __user *optval, int __user *optlen)
1641 {
1642         struct sock *sk = sock->sk;
1643
1644         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1645 }
1646
1647 EXPORT_SYMBOL(sock_common_getsockopt);
1648
1649 #ifdef CONFIG_COMPAT
1650 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1651                                   char __user *optval, int __user *optlen)
1652 {
1653         struct sock *sk = sock->sk;
1654
1655         if (sk->sk_prot->compat_getsockopt != NULL)
1656                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1657                                                       optval, optlen);
1658         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1659 }
1660 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1661 #endif
1662
1663 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1664                         struct msghdr *msg, size_t size, int flags)
1665 {
1666         struct sock *sk = sock->sk;
1667         int addr_len = 0;
1668         int err;
1669
1670         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1671                                    flags & ~MSG_DONTWAIT, &addr_len);
1672         if (err >= 0)
1673                 msg->msg_namelen = addr_len;
1674         return err;
1675 }
1676
1677 EXPORT_SYMBOL(sock_common_recvmsg);
1678
1679 /*
1680  *      Set socket options on an inet socket.
1681  */
1682 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1683                            char __user *optval, int optlen)
1684 {
1685         struct sock *sk = sock->sk;
1686
1687         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1688 }
1689
1690 EXPORT_SYMBOL(sock_common_setsockopt);
1691
1692 #ifdef CONFIG_COMPAT
1693 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1694                                   char __user *optval, int optlen)
1695 {
1696         struct sock *sk = sock->sk;
1697
1698         if (sk->sk_prot->compat_setsockopt != NULL)
1699                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1700                                                       optval, optlen);
1701         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1702 }
1703 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1704 #endif
1705
1706 void sk_common_release(struct sock *sk)
1707 {
1708         if (sk->sk_prot->destroy)
1709                 sk->sk_prot->destroy(sk);
1710
1711         /*
1712          * Observation: when sock_common_release is called, processes have
1713          * no access to socket. But net still has.
1714          * Step one, detach it from networking:
1715          *
1716          * A. Remove from hash tables.
1717          */
1718
1719         sk->sk_prot->unhash(sk);
1720
1721         /*
1722          * In this point socket cannot receive new packets, but it is possible
1723          * that some packets are in flight because some CPU runs receiver and
1724          * did hash table lookup before we unhashed socket. They will achieve
1725          * receive queue and will be purged by socket destructor.
1726          *
1727          * Also we still have packets pending on receive queue and probably,
1728          * our own packets waiting in device queues. sock_destroy will drain
1729          * receive queue, but transmitted packets will delay socket destruction
1730          * until the last reference will be released.
1731          */
1732
1733         sock_orphan(sk);
1734
1735         xfrm_sk_free_policy(sk);
1736
1737         sk_refcnt_debug_release(sk);
1738         sock_put(sk);
1739 }
1740
1741 EXPORT_SYMBOL(sk_common_release);
1742
1743 static DEFINE_RWLOCK(proto_list_lock);
1744 static LIST_HEAD(proto_list);
1745
1746 int proto_register(struct proto *prot, int alloc_slab)
1747 {
1748         char *request_sock_slab_name = NULL;
1749         char *timewait_sock_slab_name;
1750         int rc = -ENOBUFS;
1751
1752         if (alloc_slab) {
1753                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1754                                                SLAB_HWCACHE_ALIGN, NULL, NULL);
1755
1756                 if (prot->slab == NULL) {
1757                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1758                                prot->name);
1759                         goto out;
1760                 }
1761
1762                 if (prot->rsk_prot != NULL) {
1763                         static const char mask[] = "request_sock_%s";
1764
1765                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1766                         if (request_sock_slab_name == NULL)
1767                                 goto out_free_sock_slab;
1768
1769                         sprintf(request_sock_slab_name, mask, prot->name);
1770                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1771                                                                  prot->rsk_prot->obj_size, 0,
1772                                                                  SLAB_HWCACHE_ALIGN, NULL, NULL);
1773
1774                         if (prot->rsk_prot->slab == NULL) {
1775                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1776                                        prot->name);
1777                                 goto out_free_request_sock_slab_name;
1778                         }
1779                 }
1780
1781                 if (prot->twsk_prot != NULL) {
1782                         static const char mask[] = "tw_sock_%s";
1783
1784                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1785
1786                         if (timewait_sock_slab_name == NULL)
1787                                 goto out_free_request_sock_slab;
1788
1789                         sprintf(timewait_sock_slab_name, mask, prot->name);
1790                         prot->twsk_prot->twsk_slab =
1791                                 kmem_cache_create(timewait_sock_slab_name,
1792                                                   prot->twsk_prot->twsk_obj_size,
1793                                                   0, SLAB_HWCACHE_ALIGN,
1794                                                   NULL, NULL);
1795                         if (prot->twsk_prot->twsk_slab == NULL)
1796                                 goto out_free_timewait_sock_slab_name;
1797                 }
1798         }
1799
1800         write_lock(&proto_list_lock);
1801         list_add(&prot->node, &proto_list);
1802         write_unlock(&proto_list_lock);
1803         rc = 0;
1804 out:
1805         return rc;
1806 out_free_timewait_sock_slab_name:
1807         kfree(timewait_sock_slab_name);
1808 out_free_request_sock_slab:
1809         if (prot->rsk_prot && prot->rsk_prot->slab) {
1810                 kmem_cache_destroy(prot->rsk_prot->slab);
1811                 prot->rsk_prot->slab = NULL;
1812         }
1813 out_free_request_sock_slab_name:
1814         kfree(request_sock_slab_name);
1815 out_free_sock_slab:
1816         kmem_cache_destroy(prot->slab);
1817         prot->slab = NULL;
1818         goto out;
1819 }
1820
1821 EXPORT_SYMBOL(proto_register);
1822
1823 void proto_unregister(struct proto *prot)
1824 {
1825         write_lock(&proto_list_lock);
1826         list_del(&prot->node);
1827         write_unlock(&proto_list_lock);
1828
1829         if (prot->slab != NULL) {
1830                 kmem_cache_destroy(prot->slab);
1831                 prot->slab = NULL;
1832         }
1833
1834         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1835                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1836
1837                 kmem_cache_destroy(prot->rsk_prot->slab);
1838                 kfree(name);
1839                 prot->rsk_prot->slab = NULL;
1840         }
1841
1842         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1843                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1844
1845                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1846                 kfree(name);
1847                 prot->twsk_prot->twsk_slab = NULL;
1848         }
1849 }
1850
1851 EXPORT_SYMBOL(proto_unregister);
1852
1853 #ifdef CONFIG_PROC_FS
1854 static inline struct proto *__proto_head(void)
1855 {
1856         return list_entry(proto_list.next, struct proto, node);
1857 }
1858
1859 static inline struct proto *proto_head(void)
1860 {
1861         return list_empty(&proto_list) ? NULL : __proto_head();
1862 }
1863
1864 static inline struct proto *proto_next(struct proto *proto)
1865 {
1866         return proto->node.next == &proto_list ? NULL :
1867                 list_entry(proto->node.next, struct proto, node);
1868 }
1869
1870 static inline struct proto *proto_get_idx(loff_t pos)
1871 {
1872         struct proto *proto;
1873         loff_t i = 0;
1874
1875         list_for_each_entry(proto, &proto_list, node)
1876                 if (i++ == pos)
1877                         goto out;
1878
1879         proto = NULL;
1880 out:
1881         return proto;
1882 }
1883
1884 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1885 {
1886         read_lock(&proto_list_lock);
1887         return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1888 }
1889
1890 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1891 {
1892         ++*pos;
1893         return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1894 }
1895
1896 static void proto_seq_stop(struct seq_file *seq, void *v)
1897 {
1898         read_unlock(&proto_list_lock);
1899 }
1900
1901 static char proto_method_implemented(const void *method)
1902 {
1903         return method == NULL ? 'n' : 'y';
1904 }
1905
1906 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1907 {
1908         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1909                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1910                    proto->name,
1911                    proto->obj_size,
1912                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1913                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1914                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1915                    proto->max_header,
1916                    proto->slab == NULL ? "no" : "yes",
1917                    module_name(proto->owner),
1918                    proto_method_implemented(proto->close),
1919                    proto_method_implemented(proto->connect),
1920                    proto_method_implemented(proto->disconnect),
1921                    proto_method_implemented(proto->accept),
1922                    proto_method_implemented(proto->ioctl),
1923                    proto_method_implemented(proto->init),
1924                    proto_method_implemented(proto->destroy),
1925                    proto_method_implemented(proto->shutdown),
1926                    proto_method_implemented(proto->setsockopt),
1927                    proto_method_implemented(proto->getsockopt),
1928                    proto_method_implemented(proto->sendmsg),
1929                    proto_method_implemented(proto->recvmsg),
1930                    proto_method_implemented(proto->sendpage),
1931                    proto_method_implemented(proto->bind),
1932                    proto_method_implemented(proto->backlog_rcv),
1933                    proto_method_implemented(proto->hash),
1934                    proto_method_implemented(proto->unhash),
1935                    proto_method_implemented(proto->get_port),
1936                    proto_method_implemented(proto->enter_memory_pressure));
1937 }
1938
1939 static int proto_seq_show(struct seq_file *seq, void *v)
1940 {
1941         if (v == SEQ_START_TOKEN)
1942                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1943                            "protocol",
1944                            "size",
1945                            "sockets",
1946                            "memory",
1947                            "press",
1948                            "maxhdr",
1949                            "slab",
1950                            "module",
1951                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1952         else
1953                 proto_seq_printf(seq, v);
1954         return 0;
1955 }
1956
1957 static const struct seq_operations proto_seq_ops = {
1958         .start  = proto_seq_start,
1959         .next   = proto_seq_next,
1960         .stop   = proto_seq_stop,
1961         .show   = proto_seq_show,
1962 };
1963
1964 static int proto_seq_open(struct inode *inode, struct file *file)
1965 {
1966         return seq_open(file, &proto_seq_ops);
1967 }
1968
1969 static const struct file_operations proto_seq_fops = {
1970         .owner          = THIS_MODULE,
1971         .open           = proto_seq_open,
1972         .read           = seq_read,
1973         .llseek         = seq_lseek,
1974         .release        = seq_release,
1975 };
1976
1977 static int __init proto_init(void)
1978 {
1979         /* register /proc/net/protocols */
1980         return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1981 }
1982
1983 subsys_initcall(proto_init);
1984
1985 #endif /* PROC_FS */
1986
1987 EXPORT_SYMBOL(sk_alloc);
1988 EXPORT_SYMBOL(sk_free);
1989 EXPORT_SYMBOL(sk_send_sigurg);
1990 EXPORT_SYMBOL(sock_alloc_send_skb);
1991 EXPORT_SYMBOL(sock_init_data);
1992 EXPORT_SYMBOL(sock_kfree_s);
1993 EXPORT_SYMBOL(sock_kmalloc);
1994 EXPORT_SYMBOL(sock_no_accept);
1995 EXPORT_SYMBOL(sock_no_bind);
1996 EXPORT_SYMBOL(sock_no_connect);
1997 EXPORT_SYMBOL(sock_no_getname);
1998 EXPORT_SYMBOL(sock_no_getsockopt);
1999 EXPORT_SYMBOL(sock_no_ioctl);
2000 EXPORT_SYMBOL(sock_no_listen);
2001 EXPORT_SYMBOL(sock_no_mmap);
2002 EXPORT_SYMBOL(sock_no_poll);
2003 EXPORT_SYMBOL(sock_no_recvmsg);
2004 EXPORT_SYMBOL(sock_no_sendmsg);
2005 EXPORT_SYMBOL(sock_no_sendpage);
2006 EXPORT_SYMBOL(sock_no_setsockopt);
2007 EXPORT_SYMBOL(sock_no_shutdown);
2008 EXPORT_SYMBOL(sock_no_socketpair);
2009 EXPORT_SYMBOL(sock_rfree);
2010 EXPORT_SYMBOL(sock_setsockopt);
2011 EXPORT_SYMBOL(sock_wfree);
2012 EXPORT_SYMBOL(sock_wmalloc);
2013 EXPORT_SYMBOL(sock_i_uid);
2014 EXPORT_SYMBOL(sock_i_ino);
2015 EXPORT_SYMBOL(sysctl_optmem_max);
2016 #ifdef CONFIG_SYSCTL
2017 EXPORT_SYMBOL(sysctl_rmem_max);
2018 EXPORT_SYMBOL(sysctl_wmem_max);
2019 #endif