git.oblomov.eu Git - linux-2.6/blob - net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro, <bir7@leland.Stanford.Edu>
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/config.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/string.h>
  84 #include <linux/mm.h>
  85 #include <linux/socket.h>
  86 #include <linux/sockios.h>
  87 #include <linux/errno.h>
  88 #include <linux/interrupt.h>
  89 #include <linux/if_ether.h>
  90 #include <linux/netdevice.h>
  91 #include <linux/etherdevice.h>
  92 #include <linux/notifier.h>
  93 #include <linux/skbuff.h>
  94 #include <net/sock.h>
  95 #include <linux/rtnetlink.h>
  96 #include <linux/proc_fs.h>
  97 #include <linux/seq_file.h>
  98 #include <linux/stat.h>
  99 #include <linux/if_bridge.h>
 100 #include <linux/divert.h>
 101 #include <net/dst.h>
 102 #include <net/pkt_sched.h>
 103 #include <net/checksum.h>
 104 #include <linux/highmem.h>
 105 #include <linux/init.h>
 106 #include <linux/kmod.h>
 107 #include <linux/module.h>
 108 #include <linux/kallsyms.h>
 109 #include <linux/netpoll.h>
 110 #include <linux/rcupdate.h>
 111 #include <linux/delay.h>
 112 #ifdef CONFIG_NET_RADIO
 113 #include <linux/wireless.h>             /* Note : will define WIRELESS_EXT */
 114 #include <net/iw_handler.h>
 115 #endif  /* CONFIG_NET_RADIO */
 116 #include <asm/current.h>
 117
 118 /* This define, if set, will randomly drop a packet when congestion
 119  * is more than moderate.  It helps fairness in the multi-interface
 120  * case when one of them is a hog, but it kills performance for the
 121  * single interface case so it is off now by default.
 122  */
 123 #undef RAND_LIE
 124
 125 /* Setting this will sample the queue lengths and thus congestion
 126  * via a timer instead of as each packet is received.
 127  */
 128 #undef OFFLINE_SAMPLE
 129
 130 /*
 131  *      The list of packet types we will receive (as opposed to discard)
 132  *      and the routines to invoke.
 133  *
 134  *      Why 16. Because with 16 the only overlap we get on a hash of the
 135  *      low nibble of the protocol value is RARP/SNAP/X.25.
 136  *
 137  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 138  *             sure which should go first, but I bet it won't make much
 139  *             difference if we are running VLANs.  The good news is that
 140  *             this protocol won't be in the list unless compiled in, so
 141  *             the average user (w/out VLANs) will not be adversly affected.
 142  *             --BLG
 143  *
 144  *              0800    IP
 145  *              8100    802.1Q VLAN
 146  *              0001    802.3
 147  *              0002    AX.25
 148  *              0004    802.2
 149  *              8035    RARP
 150  *              0005    SNAP
 151  *              0805    X.25
 152  *              0806    ARP
 153  *              8137    IPX
 154  *              0009    Localtalk
 155  *              86DD    IPv6
 156  */
 157
 158 static DEFINE_SPINLOCK(ptype_lock);
 159 static struct list_head ptype_base[16]; /* 16 way hashed list */
 160 static struct list_head ptype_all;              /* Taps */
 161
 162 #ifdef OFFLINE_SAMPLE
 163 static void sample_queue(unsigned long dummy);
 164 static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
 165 #endif
 166
 167 /*
 168  * The @dev_base list is protected by @dev_base_lock and the rtln
 169  * semaphore.
 170  *
 171  * Pure readers hold dev_base_lock for reading.
 172  *
 173  * Writers must hold the rtnl semaphore while they loop through the
 174  * dev_base list, and hold dev_base_lock for writing when they do the
 175  * actual updates.  This allows pure readers to access the list even
 176  * while a writer is preparing to update it.
 177  *
 178  * To put it another way, dev_base_lock is held for writing only to
 179  * protect against pure readers; the rtnl semaphore provides the
 180  * protection against other writers.
 181  *
 182  * See, for example usages, register_netdevice() and
 183  * unregister_netdevice(), which must be called with the rtnl
 184  * semaphore held.
 185  */
 186 struct net_device *dev_base;
 187 static struct net_device **dev_tail = &dev_base;
 188 DEFINE_RWLOCK(dev_base_lock);
 189
 190 EXPORT_SYMBOL(dev_base);
 191 EXPORT_SYMBOL(dev_base_lock);
 192
 193 #define NETDEV_HASHBITS 8
 194 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 195 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 196
 197 static inline struct hlist_head *dev_name_hash(const char *name)
 198 {
 199         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 200         return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 201 }
 202
 203 static inline struct hlist_head *dev_index_hash(int ifindex)
 204 {
 205         return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 206 }
 207
 208 /*
 209  *      Our notifier list
 210  */
 211
 212 static struct notifier_block *netdev_chain;
 213
 214 /*
 215  *      Device drivers call our routines to queue packets here. We empty the
 216  *      queue in the local softnet handler.
 217  */
 218 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
 219
 220 #ifdef CONFIG_SYSFS
 221 extern int netdev_sysfs_init(void);
 222 extern int netdev_register_sysfs(struct net_device *);
 223 extern void netdev_unregister_sysfs(struct net_device *);
 224 #else
 225 #define netdev_sysfs_init()             (0)
 226 #define netdev_register_sysfs(dev)      (0)
 227 #define netdev_unregister_sysfs(dev)    do { } while(0)
 228 #endif
 229
 230
 231 /*******************************************************************************
 232
 233                 Protocol management and registration routines
 234
 235 *******************************************************************************/
 236
 237 /*
 238  *      For efficiency
 239  */
 240
 241 int netdev_nit;
 242
 243 /*
 244  *      Add a protocol ID to the list. Now that the input handler is
 245  *      smarter we can dispense with all the messy stuff that used to be
 246  *      here.
 247  *
 248  *      BEWARE!!! Protocol handlers, mangling input packets,
 249  *      MUST BE last in hash buckets and checking protocol handlers
 250  *      MUST start from promiscuous ptype_all chain in net_bh.
 251  *      It is true now, do not change it.
 252  *      Explanation follows: if protocol handler, mangling packet, will
 253  *      be the first on list, it is not able to sense, that packet
 254  *      is cloned and should be copied-on-write, so that it will
 255  *      change it and subsequent readers will get broken packet.
 256  *                                                      --ANK (980803)
 257  */
 258
 259 /**
 260  *      dev_add_pack - add packet handler
 261  *      @pt: packet type declaration
 262  *
 263  *      Add a protocol handler to the networking stack. The passed &packet_type
 264  *      is linked into kernel lists and may not be freed until it has been
 265  *      removed from the kernel lists.
 266  *
 267  *      This call does not sleep therefore it can not
 268  *      guarantee all CPU's that are in middle of receiving packets
 269  *      will see the new packet type (until the next received packet).
 270  */
 271
 272 void dev_add_pack(struct packet_type *pt)
 273 {
 274         int hash;
 275
 276         spin_lock_bh(&ptype_lock);
 277         if (pt->type == htons(ETH_P_ALL)) {
 278                 netdev_nit++;
 279                 list_add_rcu(&pt->list, &ptype_all);
 280         } else {
 281                 hash = ntohs(pt->type) & 15;
 282                 list_add_rcu(&pt->list, &ptype_base[hash]);
 283         }
 284         spin_unlock_bh(&ptype_lock);
 285 }
 286
 287 extern void linkwatch_run_queue(void);
 288
 289
 290
 291 /**
 292  *      __dev_remove_pack        - remove packet handler
 293  *      @pt: packet type declaration
 294  *
 295  *      Remove a protocol handler that was previously added to the kernel
 296  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 297  *      from the kernel lists and can be freed or reused once this function
 298  *      returns.
 299  *
 300  *      The packet type might still be in use by receivers
 301  *      and must not be freed until after all the CPU's have gone
 302  *      through a quiescent state.
 303  */
 304 void __dev_remove_pack(struct packet_type *pt)
 305 {
 306         struct list_head *head;
 307         struct packet_type *pt1;
 308
 309         spin_lock_bh(&ptype_lock);
 310
 311         if (pt->type == htons(ETH_P_ALL)) {
 312                 netdev_nit--;
 313                 head = &ptype_all;
 314         } else
 315                 head = &ptype_base[ntohs(pt->type) & 15];
 316
 317         list_for_each_entry(pt1, head, list) {
 318                 if (pt == pt1) {
 319                         list_del_rcu(&pt->list);
 320                         goto out;
 321                 }
 322         }
 323
 324         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 325 out:
 326         spin_unlock_bh(&ptype_lock);
 327 }
 328 /**
 329  *      dev_remove_pack  - remove packet handler
 330  *      @pt: packet type declaration
 331  *
 332  *      Remove a protocol handler that was previously added to the kernel
 333  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 334  *      from the kernel lists and can be freed or reused once this function
 335  *      returns.
 336  *
 337  *      This call sleeps to guarantee that no CPU is looking at the packet
 338  *      type after return.
 339  */
 340 void dev_remove_pack(struct packet_type *pt)
 341 {
 342         __dev_remove_pack(pt);
 343
 344         synchronize_net();
 345 }
 346
 347 /******************************************************************************
 348
 349                       Device Boot-time Settings Routines
 350
 351 *******************************************************************************/
 352
 353 /* Boot time configuration table */
 354 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 355
 356 /**
 357  *      netdev_boot_setup_add   - add new setup entry
 358  *      @name: name of the device
 359  *      @map: configured settings for the device
 360  *
 361  *      Adds new setup entry to the dev_boot_setup list.  The function
 362  *      returns 0 on error and 1 on success.  This is a generic routine to
 363  *      all netdevices.
 364  */
 365 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 366 {
 367         struct netdev_boot_setup *s;
 368         int i;
 369
 370         s = dev_boot_setup;
 371         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 372                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 373                         memset(s[i].name, 0, sizeof(s[i].name));
 374                         strcpy(s[i].name, name);
 375                         memcpy(&s[i].map, map, sizeof(s[i].map));
 376                         break;
 377                 }
 378         }
 379
 380         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 381 }
 382
 383 /**
 384  *      netdev_boot_setup_check - check boot time settings
 385  *      @dev: the netdevice
 386  *
 387  *      Check boot time settings for the device.
 388  *      The found settings are set for the device to be used
 389  *      later in the device probing.
 390  *      Returns 0 if no settings found, 1 if they are.
 391  */
 392 int netdev_boot_setup_check(struct net_device *dev)
 393 {
 394         struct netdev_boot_setup *s = dev_boot_setup;
 395         int i;
 396
 397         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 398                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 399                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 400                         dev->irq        = s[i].map.irq;
 401                         dev->base_addr  = s[i].map.base_addr;
 402                         dev->mem_start  = s[i].map.mem_start;
 403                         dev->mem_end    = s[i].map.mem_end;
 404                         return 1;
 405                 }
 406         }
 407         return 0;
 408 }
 409
 410
 411 /**
 412  *      netdev_boot_base        - get address from boot time settings
 413  *      @prefix: prefix for network device
 414  *      @unit: id for network device
 415  *
 416  *      Check boot time settings for the base address of device.
 417  *      The found settings are set for the device to be used
 418  *      later in the device probing.
 419  *      Returns 0 if no settings found.
 420  */
 421 unsigned long netdev_boot_base(const char *prefix, int unit)
 422 {
 423         const struct netdev_boot_setup *s = dev_boot_setup;
 424         char name[IFNAMSIZ];
 425         int i;
 426
 427         sprintf(name, "%s%d", prefix, unit);
 428
 429         /*
 430          * If device already registered then return base of 1
 431          * to indicate not to probe for this interface
 432          */
 433         if (__dev_get_by_name(name))
 434                 return 1;
 435
 436         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 437                 if (!strcmp(name, s[i].name))
 438                         return s[i].map.base_addr;
 439         return 0;
 440 }
 441
 442 /*
 443  * Saves at boot time configured settings for any netdevice.
 444  */
 445 int __init netdev_boot_setup(char *str)
 446 {
 447         int ints[5];
 448         struct ifmap map;
 449
 450         str = get_options(str, ARRAY_SIZE(ints), ints);
 451         if (!str || !*str)
 452                 return 0;
 453
 454         /* Save settings */
 455         memset(&map, 0, sizeof(map));
 456         if (ints[0] > 0)
 457                 map.irq = ints[1];
 458         if (ints[0] > 1)
 459                 map.base_addr = ints[2];
 460         if (ints[0] > 2)
 461                 map.mem_start = ints[3];
 462         if (ints[0] > 3)
 463                 map.mem_end = ints[4];
 464
 465         /* Add new entry to the list */
 466         return netdev_boot_setup_add(str, &map);
 467 }
 468
 469 __setup("netdev=", netdev_boot_setup);
 470
 471 /*******************************************************************************
 472
 473                             Device Interface Subroutines
 474
 475 *******************************************************************************/
 476
 477 /**
 478  *      __dev_get_by_name       - find a device by its name
 479  *      @name: name to find
 480  *
 481  *      Find an interface by name. Must be called under RTNL semaphore
 482  *      or @dev_base_lock. If the name is found a pointer to the device
 483  *      is returned. If the name is not found then %NULL is returned. The
 484  *      reference counters are not incremented so the caller must be
 485  *      careful with locks.
 486  */
 487
 488 struct net_device *__dev_get_by_name(const char *name)
 489 {
 490         struct hlist_node *p;
 491
 492         hlist_for_each(p, dev_name_hash(name)) {
 493                 struct net_device *dev
 494                         = hlist_entry(p, struct net_device, name_hlist);
 495                 if (!strncmp(dev->name, name, IFNAMSIZ))
 496                         return dev;
 497         }
 498         return NULL;
 499 }
 500
 501 /**
 502  *      dev_get_by_name         - find a device by its name
 503  *      @name: name to find
 504  *
 505  *      Find an interface by name. This can be called from any
 506  *      context and does its own locking. The returned handle has
 507  *      the usage count incremented and the caller must use dev_put() to
 508  *      release it when it is no longer needed. %NULL is returned if no
 509  *      matching device is found.
 510  */
 511
 512 struct net_device *dev_get_by_name(const char *name)
 513 {
 514         struct net_device *dev;
 515
 516         read_lock(&dev_base_lock);
 517         dev = __dev_get_by_name(name);
 518         if (dev)
 519                 dev_hold(dev);
 520         read_unlock(&dev_base_lock);
 521         return dev;
 522 }
 523
 524 /**
 525  *      __dev_get_by_index - find a device by its ifindex
 526  *      @ifindex: index of device
 527  *
 528  *      Search for an interface by index. Returns %NULL if the device
 529  *      is not found or a pointer to the device. The device has not
 530  *      had its reference counter increased so the caller must be careful
 531  *      about locking. The caller must hold either the RTNL semaphore
 532  *      or @dev_base_lock.
 533  */
 534
 535 struct net_device *__dev_get_by_index(int ifindex)
 536 {
 537         struct hlist_node *p;
 538
 539         hlist_for_each(p, dev_index_hash(ifindex)) {
 540                 struct net_device *dev
 541                         = hlist_entry(p, struct net_device, index_hlist);
 542                 if (dev->ifindex == ifindex)
 543                         return dev;
 544         }
 545         return NULL;
 546 }
 547
 548
 549 /**
 550  *      dev_get_by_index - find a device by its ifindex
 551  *      @ifindex: index of device
 552  *
 553  *      Search for an interface by index. Returns NULL if the device
 554  *      is not found or a pointer to the device. The device returned has
 555  *      had a reference added and the pointer is safe until the user calls
 556  *      dev_put to indicate they have finished with it.
 557  */
 558
 559 struct net_device *dev_get_by_index(int ifindex)
 560 {
 561         struct net_device *dev;
 562
 563         read_lock(&dev_base_lock);
 564         dev = __dev_get_by_index(ifindex);
 565         if (dev)
 566                 dev_hold(dev);
 567         read_unlock(&dev_base_lock);
 568         return dev;
 569 }
 570
 571 /**
 572  *      dev_getbyhwaddr - find a device by its hardware address
 573  *      @type: media type of device
 574  *      @ha: hardware address
 575  *
 576  *      Search for an interface by MAC address. Returns NULL if the device
 577  *      is not found or a pointer to the device. The caller must hold the
 578  *      rtnl semaphore. The returned device has not had its ref count increased
 579  *      and the caller must therefore be careful about locking
 580  *
 581  *      BUGS:
 582  *      If the API was consistent this would be __dev_get_by_hwaddr
 583  */
 584
 585 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 586 {
 587         struct net_device *dev;
 588
 589         ASSERT_RTNL();
 590
 591         for (dev = dev_base; dev; dev = dev->next)
 592                 if (dev->type == type &&
 593                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 594                         break;
 595         return dev;
 596 }
 597
 598 struct net_device *dev_getfirstbyhwtype(unsigned short type)
 599 {
 600         struct net_device *dev;
 601
 602         rtnl_lock();
 603         for (dev = dev_base; dev; dev = dev->next) {
 604                 if (dev->type == type) {
 605                         dev_hold(dev);
 606                         break;
 607                 }
 608         }
 609         rtnl_unlock();
 610         return dev;
 611 }
 612
 613 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 614
 615 /**
 616  *      dev_get_by_flags - find any device with given flags
 617  *      @if_flags: IFF_* values
 618  *      @mask: bitmask of bits in if_flags to check
 619  *
 620  *      Search for any interface with the given flags. Returns NULL if a device
 621  *      is not found or a pointer to the device. The device returned has
 622  *      had a reference added and the pointer is safe until the user calls
 623  *      dev_put to indicate they have finished with it.
 624  */
 625
 626 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 627 {
 628         struct net_device *dev;
 629
 630         read_lock(&dev_base_lock);
 631         for (dev = dev_base; dev != NULL; dev = dev->next) {
 632                 if (((dev->flags ^ if_flags) & mask) == 0) {
 633                         dev_hold(dev);
 634                         break;
 635                 }
 636         }
 637         read_unlock(&dev_base_lock);
 638         return dev;
 639 }
 640
 641 /**
 642  *      dev_valid_name - check if name is okay for network device
 643  *      @name: name string
 644  *
 645  *      Network device names need to be valid file names to
 646  *      to allow sysfs to work
 647  */
 648 static int dev_valid_name(const char *name)
 649 {
 650         return !(*name == '\0'
 651                  || !strcmp(name, ".")
 652                  || !strcmp(name, "..")
 653                  || strchr(name, '/'));
 654 }
 655
 656 /**
 657  *      dev_alloc_name - allocate a name for a device
 658  *      @dev: device
 659  *      @name: name format string
 660  *
 661  *      Passed a format string - eg "lt%d" it will try and find a suitable
 662  *      id. Not efficient for many devices, not called a lot. The caller
 663  *      must hold the dev_base or rtnl lock while allocating the name and
 664  *      adding the device in order to avoid duplicates. Returns the number
 665  *      of the unit assigned or a negative errno code.
 666  */
 667
 668 int dev_alloc_name(struct net_device *dev, const char *name)
 669 {
 670         int i = 0;
 671         char buf[IFNAMSIZ];
 672         const char *p;
 673         const int max_netdevices = 8*PAGE_SIZE;
 674         long *inuse;
 675         struct net_device *d;
 676
 677         p = strnchr(name, IFNAMSIZ-1, '%');
 678         if (p) {
 679                 /*
 680                  * Verify the string as this thing may have come from
 681                  * the user.  There must be either one "%d" and no other "%"
 682                  * characters.
 683                  */
 684                 if (p[1] != 'd' || strchr(p + 2, '%'))
 685                         return -EINVAL;
 686
 687                 /* Use one page as a bit array of possible slots */
 688                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 689                 if (!inuse)
 690                         return -ENOMEM;
 691
 692                 for (d = dev_base; d; d = d->next) {
 693                         if (!sscanf(d->name, name, &i))
 694                                 continue;
 695                         if (i < 0 || i >= max_netdevices)
 696                                 continue;
 697
 698                         /*  avoid cases where sscanf is not exact inverse of printf */
 699                         snprintf(buf, sizeof(buf), name, i);
 700                         if (!strncmp(buf, d->name, IFNAMSIZ))
 701                                 set_bit(i, inuse);
 702                 }
 703
 704                 i = find_first_zero_bit(inuse, max_netdevices);
 705                 free_page((unsigned long) inuse);
 706         }
 707
 708         snprintf(buf, sizeof(buf), name, i);
 709         if (!__dev_get_by_name(buf)) {
 710                 strlcpy(dev->name, buf, IFNAMSIZ);
 711                 return i;
 712         }
 713
 714         /* It is possible to run out of possible slots
 715          * when the name is long and there isn't enough space left
 716          * for the digits, or if all bits are used.
 717          */
 718         return -ENFILE;
 719 }
 720
 721
 722 /**
 723  *      dev_change_name - change name of a device
 724  *      @dev: device
 725  *      @newname: name (or format string) must be at least IFNAMSIZ
 726  *
 727  *      Change name of a device, can pass format strings "eth%d".
 728  *      for wildcarding.
 729  */
 730 int dev_change_name(struct net_device *dev, char *newname)
 731 {
 732         int err = 0;
 733
 734         ASSERT_RTNL();
 735
 736         if (dev->flags & IFF_UP)
 737                 return -EBUSY;
 738
 739         if (!dev_valid_name(newname))
 740                 return -EINVAL;
 741
 742         if (strchr(newname, '%')) {
 743                 err = dev_alloc_name(dev, newname);
 744                 if (err < 0)
 745                         return err;
 746                 strcpy(newname, dev->name);
 747         }
 748         else if (__dev_get_by_name(newname))
 749                 return -EEXIST;
 750         else
 751                 strlcpy(dev->name, newname, IFNAMSIZ);
 752
 753         err = class_device_rename(&dev->class_dev, dev->name);
 754         if (!err) {
 755                 hlist_del(&dev->name_hlist);
 756                 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 757                 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 758         }
 759
 760         return err;
 761 }
 762
 763 /**
 764  *      netdev_state_change - device changes state
 765  *      @dev: device to cause notification
 766  *
 767  *      Called to indicate a device has changed state. This function calls
 768  *      the notifier chains for netdev_chain and sends a NEWLINK message
 769  *      to the routing socket.
 770  */
 771 void netdev_state_change(struct net_device *dev)
 772 {
 773         if (dev->flags & IFF_UP) {
 774                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
 775                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 776         }
 777 }
 778
 779 /**
 780  *      dev_load        - load a network module
 781  *      @name: name of interface
 782  *
 783  *      If a network interface is not present and the process has suitable
 784  *      privileges this function loads the module. If module loading is not
 785  *      available in this kernel then it becomes a nop.
 786  */
 787
 788 void dev_load(const char *name)
 789 {
 790         struct net_device *dev;
 791
 792         read_lock(&dev_base_lock);
 793         dev = __dev_get_by_name(name);
 794         read_unlock(&dev_base_lock);
 795
 796         if (!dev && capable(CAP_SYS_MODULE))
 797                 request_module("%s", name);
 798 }
 799
 800 static int default_rebuild_header(struct sk_buff *skb)
 801 {
 802         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 803                skb->dev ? skb->dev->name : "NULL!!!");
 804         kfree_skb(skb);
 805         return 1;
 806 }
 807
 808
 809 /**
 810  *      dev_open        - prepare an interface for use.
 811  *      @dev:   device to open
 812  *
 813  *      Takes a device from down to up state. The device's private open
 814  *      function is invoked and then the multicast lists are loaded. Finally
 815  *      the device is moved into the up state and a %NETDEV_UP message is
 816  *      sent to the netdev notifier chain.
 817  *
 818  *      Calling this function on an active interface is a nop. On a failure
 819  *      a negative errno code is returned.
 820  */
 821 int dev_open(struct net_device *dev)
 822 {
 823         int ret = 0;
 824
 825         /*
 826          *      Is it already up?
 827          */
 828
 829         if (dev->flags & IFF_UP)
 830                 return 0;
 831
 832         /*
 833          *      Is it even present?
 834          */
 835         if (!netif_device_present(dev))
 836                 return -ENODEV;
 837
 838         /*
 839          *      Call device private open method
 840          */
 841         set_bit(__LINK_STATE_START, &dev->state);
 842         if (dev->open) {
 843                 ret = dev->open(dev);
 844                 if (ret)
 845                         clear_bit(__LINK_STATE_START, &dev->state);
 846         }
 847
 848         /*
 849          *      If it went open OK then:
 850          */
 851
 852         if (!ret) {
 853                 /*
 854                  *      Set the flags.
 855                  */
 856                 dev->flags |= IFF_UP;
 857
 858                 /*
 859                  *      Initialize multicasting status
 860                  */
 861                 dev_mc_upload(dev);
 862
 863                 /*
 864                  *      Wakeup transmit queue engine
 865                  */
 866                 dev_activate(dev);
 867
 868                 /*
 869                  *      ... and announce new interface.
 870                  */
 871                 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 872         }
 873         return ret;
 874 }
 875
 876 /**
 877  *      dev_close - shutdown an interface.
 878  *      @dev: device to shutdown
 879  *
 880  *      This function moves an active device into down state. A
 881  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
 882  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
 883  *      chain.
 884  */
 885 int dev_close(struct net_device *dev)
 886 {
 887         if (!(dev->flags & IFF_UP))
 888                 return 0;
 889
 890         /*
 891          *      Tell people we are going down, so that they can
 892          *      prepare to death, when device is still operating.
 893          */
 894         notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 895
 896         dev_deactivate(dev);
 897
 898         clear_bit(__LINK_STATE_START, &dev->state);
 899
 900         /* Synchronize to scheduled poll. We cannot touch poll list,
 901          * it can be even on different cpu. So just clear netif_running(),
 902          * and wait when poll really will happen. Actually, the best place
 903          * for this is inside dev->stop() after device stopped its irq
 904          * engine, but this requires more changes in devices. */
 905
 906         smp_mb__after_clear_bit(); /* Commit netif_running(). */
 907         while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
 908                 /* No hurry. */
 909                 current->state = TASK_INTERRUPTIBLE;
 910                 schedule_timeout(1);
 911         }
 912
 913         /*
 914          *      Call the device specific close. This cannot fail.
 915          *      Only if device is UP
 916          *
 917          *      We allow it to be called even after a DETACH hot-plug
 918          *      event.
 919          */
 920         if (dev->stop)
 921                 dev->stop(dev);
 922
 923         /*
 924          *      Device is now down.
 925          */
 926
 927         dev->flags &= ~IFF_UP;
 928
 929         /*
 930          * Tell people we are down
 931          */
 932         notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
 933
 934         return 0;
 935 }
 936
 937
 938 /*
 939  *      Device change register/unregister. These are not inline or static
 940  *      as we export them to the world.
 941  */
 942
 943 /**
 944  *      register_netdevice_notifier - register a network notifier block
 945  *      @nb: notifier
 946  *
 947  *      Register a notifier to be called when network device events occur.
 948  *      The notifier passed is linked into the kernel structures and must
 949  *      not be reused until it has been unregistered. A negative errno code
 950  *      is returned on a failure.
 951  *
 952  *      When registered all registration and up events are replayed
 953  *      to the new notifier to allow device to have a race free
 954  *      view of the network device list.
 955  */
 956
 957 int register_netdevice_notifier(struct notifier_block *nb)
 958 {
 959         struct net_device *dev;
 960         int err;
 961
 962         rtnl_lock();
 963         err = notifier_chain_register(&netdev_chain, nb);
 964         if (!err) {
 965                 for (dev = dev_base; dev; dev = dev->next) {
 966                         nb->notifier_call(nb, NETDEV_REGISTER, dev);
 967
 968                         if (dev->flags & IFF_UP)
 969                                 nb->notifier_call(nb, NETDEV_UP, dev);
 970                 }
 971         }
 972         rtnl_unlock();
 973         return err;
 974 }
 975
 976 /**
 977  *      unregister_netdevice_notifier - unregister a network notifier block
 978  *      @nb: notifier
 979  *
 980  *      Unregister a notifier previously registered by
 981  *      register_netdevice_notifier(). The notifier is unlinked into the
 982  *      kernel structures and may then be reused. A negative errno code
 983  *      is returned on a failure.
 984  */
 985
 986 int unregister_netdevice_notifier(struct notifier_block *nb)
 987 {
 988         return notifier_chain_unregister(&netdev_chain, nb);
 989 }
 990
 991 /**
 992  *      call_netdevice_notifiers - call all network notifier blocks
 993  *      @val: value passed unmodified to notifier function
 994  *      @v:   pointer passed unmodified to notifier function
 995  *
 996  *      Call all network notifier blocks.  Parameters and return value
 997  *      are as for notifier_call_chain().
 998  */
 999
1000 int call_netdevice_notifiers(unsigned long val, void *v)
1001 {
1002         return notifier_call_chain(&netdev_chain, val, v);
1003 }
1004
1005 /* When > 0 there are consumers of rx skb time stamps */
1006 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1007
1008 void net_enable_timestamp(void)
1009 {
1010         atomic_inc(&netstamp_needed);
1011 }
1012
1013 void net_disable_timestamp(void)
1014 {
1015         atomic_dec(&netstamp_needed);
1016 }
1017
1018 static inline void net_timestamp(struct timeval *stamp)
1019 {
1020         if (atomic_read(&netstamp_needed))
1021                 do_gettimeofday(stamp);
1022         else {
1023                 stamp->tv_sec = 0;
1024                 stamp->tv_usec = 0;
1025         }
1026 }
1027
1028 /*
1029  *      Support routine. Sends outgoing frames to any network
1030  *      taps currently in use.
1031  */
1032
1033 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1034 {
1035         struct packet_type *ptype;
1036         net_timestamp(&skb->stamp);
1037
1038         rcu_read_lock();
1039         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1040                 /* Never send packets back to the socket
1041                  * they originated from - MvS (miquels@drinkel.ow.org)
1042                  */
1043                 if ((ptype->dev == dev || !ptype->dev) &&
1044                     (ptype->af_packet_priv == NULL ||
1045                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1046                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1047                         if (!skb2)
1048                                 break;
1049
1050                         /* skb->nh should be correctly
1051                            set by sender, so that the second statement is
1052                            just protection against buggy protocols.
1053                          */
1054                         skb2->mac.raw = skb2->data;
1055
1056                         if (skb2->nh.raw < skb2->data ||
1057                             skb2->nh.raw > skb2->tail) {
1058                                 if (net_ratelimit())
1059                                         printk(KERN_CRIT "protocol %04x is "
1060                                                "buggy, dev %s\n",
1061                                                skb2->protocol, dev->name);
1062                                 skb2->nh.raw = skb2->data;
1063                         }
1064
1065                         skb2->h.raw = skb2->nh.raw;
1066                         skb2->pkt_type = PACKET_OUTGOING;
1067                         ptype->func(skb2, skb->dev, ptype);
1068                 }
1069         }
1070         rcu_read_unlock();
1071 }
1072
1073 /*
1074  * Invalidate hardware checksum when packet is to be mangled, and
1075  * complete checksum manually on outgoing path.
1076  */
1077 int skb_checksum_help(struct sk_buff *skb, int inward)
1078 {
1079         unsigned int csum;
1080         int ret = 0, offset = skb->h.raw - skb->data;
1081
1082         if (inward) {
1083                 skb->ip_summed = CHECKSUM_NONE;
1084                 goto out;
1085         }
1086
1087         if (skb_cloned(skb)) {
1088                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1089                 if (ret)
1090                         goto out;
1091         }
1092
1093         if (offset > (int)skb->len)
1094                 BUG();
1095         csum = skb_checksum(skb, offset, skb->len-offset, 0);
1096
1097         offset = skb->tail - skb->h.raw;
1098         if (offset <= 0)
1099                 BUG();
1100         if (skb->csum + 2 > offset)
1101                 BUG();
1102
1103         *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1104         skb->ip_summed = CHECKSUM_NONE;
1105 out:
1106         return ret;
1107 }
1108
1109 #ifdef CONFIG_HIGHMEM
1110 /* Actually, we should eliminate this check as soon as we know, that:
1111  * 1. IOMMU is present and allows to map all the memory.
1112  * 2. No high memory really exists on this machine.
1113  */
1114
1115 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1116 {
1117         int i;
1118
1119         if (dev->features & NETIF_F_HIGHDMA)
1120                 return 0;
1121
1122         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1123                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1124                         return 1;
1125
1126         return 0;
1127 }
1128 #else
1129 #define illegal_highdma(dev, skb)       (0)
1130 #endif
1131
1132 extern void skb_release_data(struct sk_buff *);
1133
1134 /* Keep head the same: replace data */
1135 int __skb_linearize(struct sk_buff *skb, int gfp_mask)
1136 {
1137         unsigned int size;
1138         u8 *data;
1139         long offset;
1140         struct skb_shared_info *ninfo;
1141         int headerlen = skb->data - skb->head;
1142         int expand = (skb->tail + skb->data_len) - skb->end;
1143
1144         if (skb_shared(skb))
1145                 BUG();
1146
1147         if (expand <= 0)
1148                 expand = 0;
1149
1150         size = skb->end - skb->head + expand;
1151         size = SKB_DATA_ALIGN(size);
1152         data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1153         if (!data)
1154                 return -ENOMEM;
1155
1156         /* Copy entire thing */
1157         if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1158                 BUG();
1159
1160         /* Set up shinfo */
1161         ninfo = (struct skb_shared_info*)(data + size);
1162         atomic_set(&ninfo->dataref, 1);
1163         ninfo->tso_size = skb_shinfo(skb)->tso_size;
1164         ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1165         ninfo->nr_frags = 0;
1166         ninfo->frag_list = NULL;
1167
1168         /* Offset between the two in bytes */
1169         offset = data - skb->head;
1170
1171         /* Free old data. */
1172         skb_release_data(skb);
1173
1174         skb->head = data;
1175         skb->end  = data + size;
1176
1177         /* Set up new pointers */
1178         skb->h.raw   += offset;
1179         skb->nh.raw  += offset;
1180         skb->mac.raw += offset;
1181         skb->tail    += offset;
1182         skb->data    += offset;
1183
1184         /* We are no longer a clone, even if we were. */
1185         skb->cloned    = 0;
1186
1187         skb->tail     += skb->data_len;
1188         skb->data_len  = 0;
1189         return 0;
1190 }
1191
1192 #define HARD_TX_LOCK(dev, cpu) {                        \
1193         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1194                 spin_lock(&dev->xmit_lock);             \
1195                 dev->xmit_lock_owner = cpu;             \
1196         }                                               \
1197 }
1198
1199 #define HARD_TX_UNLOCK(dev) {                           \
1200         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1201                 dev->xmit_lock_owner = -1;              \
1202                 spin_unlock(&dev->xmit_lock);           \
1203         }                                               \
1204 }
1205
1206 /**
1207  *      dev_queue_xmit - transmit a buffer
1208  *      @skb: buffer to transmit
1209  *
1210  *      Queue a buffer for transmission to a network device. The caller must
1211  *      have set the device and priority and built the buffer before calling
1212  *      this function. The function can be called from an interrupt.
1213  *
1214  *      A negative errno code is returned on a failure. A success does not
1215  *      guarantee the frame will be transmitted as it may be dropped due
1216  *      to congestion or traffic shaping.
1217  *
1218  * -----------------------------------------------------------------------------------
1219  *      I notice this method can also return errors from the queue disciplines,
1220  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1221  *      be positive.
1222  *
1223  *      Regardless of the return value, the skb is consumed, so it is currently
1224  *      difficult to retry a send to this method.  (You can bump the ref count
1225  *      before sending to hold a reference for retry if you are careful.)
1226  *
1227  *      When calling this method, interrupts MUST be enabled.  This is because
1228  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1229  *          --BLG
1230  */
1231
1232 int dev_queue_xmit(struct sk_buff *skb)
1233 {
1234         struct net_device *dev = skb->dev;
1235         struct Qdisc *q;
1236         int rc = -ENOMEM;
1237
1238         if (skb_shinfo(skb)->frag_list &&
1239             !(dev->features & NETIF_F_FRAGLIST) &&
1240             __skb_linearize(skb, GFP_ATOMIC))
1241                 goto out_kfree_skb;
1242
1243         /* Fragmented skb is linearized if device does not support SG,
1244          * or if at least one of fragments is in highmem and device
1245          * does not support DMA from it.
1246          */
1247         if (skb_shinfo(skb)->nr_frags &&
1248             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1249             __skb_linearize(skb, GFP_ATOMIC))
1250                 goto out_kfree_skb;
1251
1252         /* If packet is not checksummed and device does not support
1253          * checksumming for this protocol, complete checksumming here.
1254          */
1255         if (skb->ip_summed == CHECKSUM_HW &&
1256             (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1257              (!(dev->features & NETIF_F_IP_CSUM) ||
1258               skb->protocol != htons(ETH_P_IP))))
1259                 if (skb_checksum_help(skb, 0))
1260                         goto out_kfree_skb;
1261
1262         /* Disable soft irqs for various locks below. Also
1263          * stops preemption for RCU.
1264          */
1265         local_bh_disable();
1266
1267         /* Updates of qdisc are serialized by queue_lock.
1268          * The struct Qdisc which is pointed to by qdisc is now a
1269          * rcu structure - it may be accessed without acquiring
1270          * a lock (but the structure may be stale.) The freeing of the
1271          * qdisc will be deferred until it's known that there are no
1272          * more references to it.
1273          *
1274          * If the qdisc has an enqueue function, we still need to
1275          * hold the queue_lock before calling it, since queue_lock
1276          * also serializes access to the device queue.
1277          */
1278
1279         q = rcu_dereference(dev->qdisc);
1280 #ifdef CONFIG_NET_CLS_ACT
1281         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1282 #endif
1283         if (q->enqueue) {
1284                 /* Grab device queue */
1285                 spin_lock(&dev->queue_lock);
1286
1287                 rc = q->enqueue(skb, q);
1288
1289                 qdisc_run(dev);
1290
1291                 spin_unlock(&dev->queue_lock);
1292                 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1293                 goto out;
1294         }
1295
1296         /* The device has no queue. Common case for software devices:
1297            loopback, all the sorts of tunnels...
1298
1299            Really, it is unlikely that xmit_lock protection is necessary here.
1300            (f.e. loopback and IP tunnels are clean ignoring statistics
1301            counters.)
1302            However, it is possible, that they rely on protection
1303            made by us here.
1304
1305            Check this and shot the lock. It is not prone from deadlocks.
1306            Either shot noqueue qdisc, it is even simpler 8)
1307          */
1308         if (dev->flags & IFF_UP) {
1309                 int cpu = smp_processor_id(); /* ok because BHs are off */
1310
1311                 if (dev->xmit_lock_owner != cpu) {
1312
1313                         HARD_TX_LOCK(dev, cpu);
1314
1315                         if (!netif_queue_stopped(dev)) {
1316                                 if (netdev_nit)
1317                                         dev_queue_xmit_nit(skb, dev);
1318
1319                                 rc = 0;
1320                                 if (!dev->hard_start_xmit(skb, dev)) {
1321                                         HARD_TX_UNLOCK(dev);
1322                                         goto out;
1323                                 }
1324                         }
1325                         HARD_TX_UNLOCK(dev);
1326                         if (net_ratelimit())
1327                                 printk(KERN_CRIT "Virtual device %s asks to "
1328                                        "queue packet!\n", dev->name);
1329                 } else {
1330                         /* Recursion is detected! It is possible,
1331                          * unfortunately */
1332                         if (net_ratelimit())
1333                                 printk(KERN_CRIT "Dead loop on virtual device "
1334                                        "%s, fix it urgently!\n", dev->name);
1335                 }
1336         }
1337
1338         rc = -ENETDOWN;
1339         local_bh_enable();
1340
1341 out_kfree_skb:
1342         kfree_skb(skb);
1343         return rc;
1344 out:
1345         local_bh_enable();
1346         return rc;
1347 }
1348
1349
1350 /*=======================================================================
1351                         Receiver routines
1352   =======================================================================*/
1353
1354 int netdev_max_backlog = 300;
1355 int weight_p = 64;            /* old backlog weight */
1356 /* These numbers are selected based on intuition and some
1357  * experimentatiom, if you have more scientific way of doing this
1358  * please go ahead and fix things.
1359  */
1360 int no_cong_thresh = 10;
1361 int no_cong = 20;
1362 int lo_cong = 100;
1363 int mod_cong = 290;
1364
1365 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1366
1367
1368 static void get_sample_stats(int cpu)
1369 {
1370 #ifdef RAND_LIE
1371         unsigned long rd;
1372         int rq;
1373 #endif
1374         struct softnet_data *sd = &per_cpu(softnet_data, cpu);
1375         int blog = sd->input_pkt_queue.qlen;
1376         int avg_blog = sd->avg_blog;
1377
1378         avg_blog = (avg_blog >> 1) + (blog >> 1);
1379
1380         if (avg_blog > mod_cong) {
1381                 /* Above moderate congestion levels. */
1382                 sd->cng_level = NET_RX_CN_HIGH;
1383 #ifdef RAND_LIE
1384                 rd = net_random();
1385                 rq = rd % netdev_max_backlog;
1386                 if (rq < avg_blog) /* unlucky bastard */
1387                         sd->cng_level = NET_RX_DROP;
1388 #endif
1389         } else if (avg_blog > lo_cong) {
1390                 sd->cng_level = NET_RX_CN_MOD;
1391 #ifdef RAND_LIE
1392                 rd = net_random();
1393                 rq = rd % netdev_max_backlog;
1394                         if (rq < avg_blog) /* unlucky bastard */
1395                                 sd->cng_level = NET_RX_CN_HIGH;
1396 #endif
1397         } else if (avg_blog > no_cong)
1398                 sd->cng_level = NET_RX_CN_LOW;
1399         else  /* no congestion */
1400                 sd->cng_level = NET_RX_SUCCESS;
1401
1402         sd->avg_blog = avg_blog;
1403 }
1404
1405 #ifdef OFFLINE_SAMPLE
1406 static void sample_queue(unsigned long dummy)
1407 {
1408 /* 10 ms 0r 1ms -- i don't care -- JHS */
1409         int next_tick = 1;
1410         int cpu = smp_processor_id();
1411
1412         get_sample_stats(cpu);
1413         next_tick += jiffies;
1414         mod_timer(&samp_timer, next_tick);
1415 }
1416 #endif
1417
1418
1419 /**
1420  *      netif_rx        -       post buffer to the network code
1421  *      @skb: buffer to post
1422  *
1423  *      This function receives a packet from a device driver and queues it for
1424  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1425  *      may be dropped during processing for congestion control or by the
1426  *      protocol layers.
1427  *
1428  *      return values:
1429  *      NET_RX_SUCCESS  (no congestion)
1430  *      NET_RX_CN_LOW   (low congestion)
1431  *      NET_RX_CN_MOD   (moderate congestion)
1432  *      NET_RX_CN_HIGH  (high congestion)
1433  *      NET_RX_DROP     (packet was dropped)
1434  *
1435  */
1436
1437 int netif_rx(struct sk_buff *skb)
1438 {
1439         int this_cpu;
1440         struct softnet_data *queue;
1441         unsigned long flags;
1442
1443         /* if netpoll wants it, pretend we never saw it */
1444         if (netpoll_rx(skb))
1445                 return NET_RX_DROP;
1446
1447         if (!skb->stamp.tv_sec)
1448                 net_timestamp(&skb->stamp);
1449
1450         /*
1451          * The code is rearranged so that the path is the most
1452          * short when CPU is congested, but is still operating.
1453          */
1454         local_irq_save(flags);
1455         this_cpu = smp_processor_id();
1456         queue = &__get_cpu_var(softnet_data);
1457
1458         __get_cpu_var(netdev_rx_stat).total++;
1459         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1460                 if (queue->input_pkt_queue.qlen) {
1461                         if (queue->throttle)
1462                                 goto drop;
1463
1464 enqueue:
1465                         dev_hold(skb->dev);
1466                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1467 #ifndef OFFLINE_SAMPLE
1468                         get_sample_stats(this_cpu);
1469 #endif
1470                         local_irq_restore(flags);
1471                         return queue->cng_level;
1472                 }
1473
1474                 if (queue->throttle)
1475                         queue->throttle = 0;
1476
1477                 netif_rx_schedule(&queue->backlog_dev);
1478                 goto enqueue;
1479         }
1480
1481         if (!queue->throttle) {
1482                 queue->throttle = 1;
1483                 __get_cpu_var(netdev_rx_stat).throttled++;
1484         }
1485
1486 drop:
1487         __get_cpu_var(netdev_rx_stat).dropped++;
1488         local_irq_restore(flags);
1489
1490         kfree_skb(skb);
1491         return NET_RX_DROP;
1492 }
1493
1494 int netif_rx_ni(struct sk_buff *skb)
1495 {
1496         int err;
1497
1498         preempt_disable();
1499         err = netif_rx(skb);
1500         if (local_softirq_pending())
1501                 do_softirq();
1502         preempt_enable();
1503
1504         return err;
1505 }
1506
1507 EXPORT_SYMBOL(netif_rx_ni);
1508
1509 static __inline__ void skb_bond(struct sk_buff *skb)
1510 {
1511         struct net_device *dev = skb->dev;
1512
1513         if (dev->master) {
1514                 skb->real_dev = skb->dev;
1515                 skb->dev = dev->master;
1516         }
1517 }
1518
1519 static void net_tx_action(struct softirq_action *h)
1520 {
1521         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1522
1523         if (sd->completion_queue) {
1524                 struct sk_buff *clist;
1525
1526                 local_irq_disable();
1527                 clist = sd->completion_queue;
1528                 sd->completion_queue = NULL;
1529                 local_irq_enable();
1530
1531                 while (clist) {
1532                         struct sk_buff *skb = clist;
1533                         clist = clist->next;
1534
1535                         BUG_TRAP(!atomic_read(&skb->users));
1536                         __kfree_skb(skb);
1537                 }
1538         }
1539
1540         if (sd->output_queue) {
1541                 struct net_device *head;
1542
1543                 local_irq_disable();
1544                 head = sd->output_queue;
1545                 sd->output_queue = NULL;
1546                 local_irq_enable();
1547
1548                 while (head) {
1549                         struct net_device *dev = head;
1550                         head = head->next_sched;
1551
1552                         smp_mb__before_clear_bit();
1553                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1554
1555                         if (spin_trylock(&dev->queue_lock)) {
1556                                 qdisc_run(dev);
1557                                 spin_unlock(&dev->queue_lock);
1558                         } else {
1559                                 netif_schedule(dev);
1560                         }
1561                 }
1562         }
1563 }
1564
1565 static __inline__ int deliver_skb(struct sk_buff *skb,
1566                                   struct packet_type *pt_prev)
1567 {
1568         atomic_inc(&skb->users);
1569         return pt_prev->func(skb, skb->dev, pt_prev);
1570 }
1571
1572 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1573 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1574 struct net_bridge;
1575 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1576                                                 unsigned char *addr);
1577 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1578
1579 static __inline__ int handle_bridge(struct sk_buff **pskb,
1580                                     struct packet_type **pt_prev, int *ret)
1581 {
1582         struct net_bridge_port *port;
1583
1584         if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1585             (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1586                 return 0;
1587
1588         if (*pt_prev) {
1589                 *ret = deliver_skb(*pskb, *pt_prev);
1590                 *pt_prev = NULL;
1591         }
1592
1593         return br_handle_frame_hook(port, pskb);
1594 }
1595 #else
1596 #define handle_bridge(skb, pt_prev, ret)        (0)
1597 #endif
1598
1599 #ifdef CONFIG_NET_CLS_ACT
1600 /* TODO: Maybe we should just force sch_ingress to be compiled in
1601  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1602  * a compare and 2 stores extra right now if we dont have it on
1603  * but have CONFIG_NET_CLS_ACT
1604  * NOTE: This doesnt stop any functionality; if you dont have
1605  * the ingress scheduler, you just cant add policies on ingress.
1606  *
1607  */
1608 static int ing_filter(struct sk_buff *skb)
1609 {
1610         struct Qdisc *q;
1611         struct net_device *dev = skb->dev;
1612         int result = TC_ACT_OK;
1613
1614         if (dev->qdisc_ingress) {
1615                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1616                 if (MAX_RED_LOOP < ttl++) {
1617                         printk("Redir loop detected Dropping packet (%s->%s)\n",
1618                                 skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
1619                         return TC_ACT_SHOT;
1620                 }
1621
1622                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1623
1624                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1625                 if (NULL == skb->input_dev) {
1626                         skb->input_dev = skb->dev;
1627                         printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
1628                 }
1629                 spin_lock(&dev->ingress_lock);
1630                 if ((q = dev->qdisc_ingress) != NULL)
1631                         result = q->enqueue(skb, q);
1632                 spin_unlock(&dev->ingress_lock);
1633
1634         }
1635
1636         return result;
1637 }
1638 #endif
1639
1640 int netif_receive_skb(struct sk_buff *skb)
1641 {
1642         struct packet_type *ptype, *pt_prev;
1643         int ret = NET_RX_DROP;
1644         unsigned short type;
1645
1646         /* if we've gotten here through NAPI, check netpoll */
1647         if (skb->dev->poll && netpoll_rx(skb))
1648                 return NET_RX_DROP;
1649
1650         if (!skb->stamp.tv_sec)
1651                 net_timestamp(&skb->stamp);
1652
1653         skb_bond(skb);
1654
1655         __get_cpu_var(netdev_rx_stat).total++;
1656
1657         skb->h.raw = skb->nh.raw = skb->data;
1658         skb->mac_len = skb->nh.raw - skb->mac.raw;
1659
1660         pt_prev = NULL;
1661
1662         rcu_read_lock();
1663
1664 #ifdef CONFIG_NET_CLS_ACT
1665         if (skb->tc_verd & TC_NCLS) {
1666                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1667                 goto ncls;
1668         }
1669 #endif
1670
1671         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1672                 if (!ptype->dev || ptype->dev == skb->dev) {
1673                         if (pt_prev)
1674                                 ret = deliver_skb(skb, pt_prev);
1675                         pt_prev = ptype;
1676                 }
1677         }
1678
1679 #ifdef CONFIG_NET_CLS_ACT
1680         if (pt_prev) {
1681                 ret = deliver_skb(skb, pt_prev);
1682                 pt_prev = NULL; /* noone else should process this after*/
1683         } else {
1684                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1685         }
1686
1687         ret = ing_filter(skb);
1688
1689         if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1690                 kfree_skb(skb);
1691                 goto out;
1692         }
1693
1694         skb->tc_verd = 0;
1695 ncls:
1696 #endif
1697
1698         handle_diverter(skb);
1699
1700         if (handle_bridge(&skb, &pt_prev, &ret))
1701                 goto out;
1702
1703         type = skb->protocol;
1704         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1705                 if (ptype->type == type &&
1706                     (!ptype->dev || ptype->dev == skb->dev)) {
1707                         if (pt_prev)
1708                                 ret = deliver_skb(skb, pt_prev);
1709                         pt_prev = ptype;
1710                 }
1711         }
1712
1713         if (pt_prev) {
1714                 ret = pt_prev->func(skb, skb->dev, pt_prev);
1715         } else {
1716                 kfree_skb(skb);
1717                 /* Jamal, now you will not able to escape explaining
1718                  * me how you were going to use this. :-)
1719                  */
1720                 ret = NET_RX_DROP;
1721         }
1722
1723 out:
1724         rcu_read_unlock();
1725         return ret;
1726 }
1727
1728 static int process_backlog(struct net_device *backlog_dev, int *budget)
1729 {
1730         int work = 0;
1731         int quota = min(backlog_dev->quota, *budget);
1732         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1733         unsigned long start_time = jiffies;
1734
1735         for (;;) {
1736                 struct sk_buff *skb;
1737                 struct net_device *dev;
1738
1739                 local_irq_disable();
1740                 skb = __skb_dequeue(&queue->input_pkt_queue);
1741                 if (!skb)
1742                         goto job_done;
1743                 local_irq_enable();
1744
1745                 dev = skb->dev;
1746
1747                 netif_receive_skb(skb);
1748
1749                 dev_put(dev);
1750
1751                 work++;
1752
1753                 if (work >= quota || jiffies - start_time > 1)
1754                         break;
1755
1756         }
1757
1758         backlog_dev->quota -= work;
1759         *budget -= work;
1760         return -1;
1761
1762 job_done:
1763         backlog_dev->quota -= work;
1764         *budget -= work;
1765
1766         list_del(&backlog_dev->poll_list);
1767         smp_mb__before_clear_bit();
1768         netif_poll_enable(backlog_dev);
1769
1770         if (queue->throttle)
1771                 queue->throttle = 0;
1772         local_irq_enable();
1773         return 0;
1774 }
1775
1776 static void net_rx_action(struct softirq_action *h)
1777 {
1778         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1779         unsigned long start_time = jiffies;
1780         int budget = netdev_max_backlog;
1781
1782
1783         local_irq_disable();
1784
1785         while (!list_empty(&queue->poll_list)) {
1786                 struct net_device *dev;
1787
1788                 if (budget <= 0 || jiffies - start_time > 1)
1789                         goto softnet_break;
1790
1791                 local_irq_enable();
1792
1793                 dev = list_entry(queue->poll_list.next,
1794                                  struct net_device, poll_list);
1795                 netpoll_poll_lock(dev);
1796
1797                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1798                         netpoll_poll_unlock(dev);
1799                         local_irq_disable();
1800                         list_del(&dev->poll_list);
1801                         list_add_tail(&dev->poll_list, &queue->poll_list);
1802                         if (dev->quota < 0)
1803                                 dev->quota += dev->weight;
1804                         else
1805                                 dev->quota = dev->weight;
1806                 } else {
1807                         netpoll_poll_unlock(dev);
1808                         dev_put(dev);
1809                         local_irq_disable();
1810                 }
1811         }
1812 out:
1813         local_irq_enable();
1814         return;
1815
1816 softnet_break:
1817         __get_cpu_var(netdev_rx_stat).time_squeeze++;
1818         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1819         goto out;
1820 }
1821
1822 static gifconf_func_t * gifconf_list [NPROTO];
1823
1824 /**
1825  *      register_gifconf        -       register a SIOCGIF handler
1826  *      @family: Address family
1827  *      @gifconf: Function handler
1828  *
1829  *      Register protocol dependent address dumping routines. The handler
1830  *      that is passed must not be freed or reused until it has been replaced
1831  *      by another handler.
1832  */
1833 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1834 {
1835         if (family >= NPROTO)
1836                 return -EINVAL;
1837         gifconf_list[family] = gifconf;
1838         return 0;
1839 }
1840
1841
1842 /*
1843  *      Map an interface index to its name (SIOCGIFNAME)
1844  */
1845
1846 /*
1847  *      We need this ioctl for efficient implementation of the
1848  *      if_indextoname() function required by the IPv6 API.  Without
1849  *      it, we would have to search all the interfaces to find a
1850  *      match.  --pb
1851  */
1852
1853 static int dev_ifname(struct ifreq __user *arg)
1854 {
1855         struct net_device *dev;
1856         struct ifreq ifr;
1857
1858         /*
1859          *      Fetch the caller's info block.
1860          */
1861
1862         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1863                 return -EFAULT;
1864
1865         read_lock(&dev_base_lock);
1866         dev = __dev_get_by_index(ifr.ifr_ifindex);
1867         if (!dev) {
1868                 read_unlock(&dev_base_lock);
1869                 return -ENODEV;
1870         }
1871
1872         strcpy(ifr.ifr_name, dev->name);
1873         read_unlock(&dev_base_lock);
1874
1875         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1876                 return -EFAULT;
1877         return 0;
1878 }
1879
1880 /*
1881  *      Perform a SIOCGIFCONF call. This structure will change
1882  *      size eventually, and there is nothing I can do about it.
1883  *      Thus we will need a 'compatibility mode'.
1884  */
1885
1886 static int dev_ifconf(char __user *arg)
1887 {
1888         struct ifconf ifc;
1889         struct net_device *dev;
1890         char __user *pos;
1891         int len;
1892         int total;
1893         int i;
1894
1895         /*
1896          *      Fetch the caller's info block.
1897          */
1898
1899         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1900                 return -EFAULT;
1901
1902         pos = ifc.ifc_buf;
1903         len = ifc.ifc_len;
1904
1905         /*
1906          *      Loop over the interfaces, and write an info block for each.
1907          */
1908
1909         total = 0;
1910         for (dev = dev_base; dev; dev = dev->next) {
1911                 for (i = 0; i < NPROTO; i++) {
1912                         if (gifconf_list[i]) {
1913                                 int done;
1914                                 if (!pos)
1915                                         done = gifconf_list[i](dev, NULL, 0);
1916                                 else
1917                                         done = gifconf_list[i](dev, pos + total,
1918                                                                len - total);
1919                                 if (done < 0)
1920                                         return -EFAULT;
1921                                 total += done;
1922                         }
1923                 }
1924         }
1925
1926         /*
1927          *      All done.  Write the updated control block back to the caller.
1928          */
1929         ifc.ifc_len = total;
1930
1931         /*
1932          *      Both BSD and Solaris return 0 here, so we do too.
1933          */
1934         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1935 }
1936
1937 #ifdef CONFIG_PROC_FS
1938 /*
1939  *      This is invoked by the /proc filesystem handler to display a device
1940  *      in detail.
1941  */
1942 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1943 {
1944         struct net_device *dev;
1945         loff_t i;
1946
1947         for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1948
1949         return i == pos ? dev : NULL;
1950 }
1951
1952 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1953 {
1954         read_lock(&dev_base_lock);
1955         return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1956 }
1957
1958 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1959 {
1960         ++*pos;
1961         return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1962 }
1963
1964 void dev_seq_stop(struct seq_file *seq, void *v)
1965 {
1966         read_unlock(&dev_base_lock);
1967 }
1968
1969 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1970 {
1971         if (dev->get_stats) {
1972                 struct net_device_stats *stats = dev->get_stats(dev);
1973
1974                 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1975                                 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1976                            dev->name, stats->rx_bytes, stats->rx_packets,
1977                            stats->rx_errors,
1978                            stats->rx_dropped + stats->rx_missed_errors,
1979                            stats->rx_fifo_errors,
1980                            stats->rx_length_errors + stats->rx_over_errors +
1981                              stats->rx_crc_errors + stats->rx_frame_errors,
1982                            stats->rx_compressed, stats->multicast,
1983                            stats->tx_bytes, stats->tx_packets,
1984                            stats->tx_errors, stats->tx_dropped,
1985                            stats->tx_fifo_errors, stats->collisions,
1986                            stats->tx_carrier_errors +
1987                              stats->tx_aborted_errors +
1988                              stats->tx_window_errors +
1989                              stats->tx_heartbeat_errors,
1990                            stats->tx_compressed);
1991         } else
1992                 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1993 }
1994
1995 /*
1996  *      Called from the PROCfs module. This now uses the new arbitrary sized
1997  *      /proc/net interface to create /proc/net/dev
1998  */
1999 static int dev_seq_show(struct seq_file *seq, void *v)
2000 {
2001         if (v == SEQ_START_TOKEN)
2002                 seq_puts(seq, "Inter-|   Receive                            "
2003                               "                    |  Transmit\n"
2004                               " face |bytes    packets errs drop fifo frame "
2005                               "compressed multicast|bytes    packets errs "
2006                               "drop fifo colls carrier compressed\n");
2007         else
2008                 dev_seq_printf_stats(seq, v);
2009         return 0;
2010 }
2011
2012 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2013 {
2014         struct netif_rx_stats *rc = NULL;
2015
2016         while (*pos < NR_CPUS)
2017                 if (cpu_online(*pos)) {
2018                         rc = &per_cpu(netdev_rx_stat, *pos);
2019                         break;
2020                 } else
2021                         ++*pos;
2022         return rc;
2023 }
2024
2025 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2026 {
2027         return softnet_get_online(pos);
2028 }
2029
2030 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2031 {
2032         ++*pos;
2033         return softnet_get_online(pos);
2034 }
2035
2036 static void softnet_seq_stop(struct seq_file *seq, void *v)
2037 {
2038 }
2039
2040 static int softnet_seq_show(struct seq_file *seq, void *v)
2041 {
2042         struct netif_rx_stats *s = v;
2043
2044         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2045                    s->total, s->dropped, s->time_squeeze, s->throttled,
2046                    s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
2047                    s->fastroute_deferred_out,
2048 #if 0
2049                    s->fastroute_latency_reduction
2050 #else
2051                    s->cpu_collision
2052 #endif
2053                   );
2054         return 0;
2055 }
2056
2057 static struct seq_operations dev_seq_ops = {
2058         .start = dev_seq_start,
2059         .next  = dev_seq_next,
2060         .stop  = dev_seq_stop,
2061         .show  = dev_seq_show,
2062 };
2063
2064 static int dev_seq_open(struct inode *inode, struct file *file)
2065 {
2066         return seq_open(file, &dev_seq_ops);
2067 }
2068
2069 static struct file_operations dev_seq_fops = {
2070         .owner   = THIS_MODULE,
2071         .open    = dev_seq_open,
2072         .read    = seq_read,
2073         .llseek  = seq_lseek,
2074         .release = seq_release,
2075 };
2076
2077 static struct seq_operations softnet_seq_ops = {
2078         .start = softnet_seq_start,
2079         .next  = softnet_seq_next,
2080         .stop  = softnet_seq_stop,
2081         .show  = softnet_seq_show,
2082 };
2083
2084 static int softnet_seq_open(struct inode *inode, struct file *file)
2085 {
2086         return seq_open(file, &softnet_seq_ops);
2087 }
2088
2089 static struct file_operations softnet_seq_fops = {
2090         .owner   = THIS_MODULE,
2091         .open    = softnet_seq_open,
2092         .read    = seq_read,
2093         .llseek  = seq_lseek,
2094         .release = seq_release,
2095 };
2096
2097 #ifdef WIRELESS_EXT
2098 extern int wireless_proc_init(void);
2099 #else
2100 #define wireless_proc_init() 0
2101 #endif
2102
2103 static int __init dev_proc_init(void)
2104 {
2105         int rc = -ENOMEM;
2106
2107         if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2108                 goto out;
2109         if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2110                 goto out_dev;
2111         if (wireless_proc_init())
2112                 goto out_softnet;
2113         rc = 0;
2114 out:
2115         return rc;
2116 out_softnet:
2117         proc_net_remove("softnet_stat");
2118 out_dev:
2119         proc_net_remove("dev");
2120         goto out;
2121 }
2122 #else
2123 #define dev_proc_init() 0
2124 #endif  /* CONFIG_PROC_FS */
2125
2126
2127 /**
2128  *      netdev_set_master       -       set up master/slave pair
2129  *      @slave: slave device
2130  *      @master: new master device
2131  *
2132  *      Changes the master device of the slave. Pass %NULL to break the
2133  *      bonding. The caller must hold the RTNL semaphore. On a failure
2134  *      a negative errno code is returned. On success the reference counts
2135  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2136  *      function returns zero.
2137  */
2138 int netdev_set_master(struct net_device *slave, struct net_device *master)
2139 {
2140         struct net_device *old = slave->master;
2141
2142         ASSERT_RTNL();
2143
2144         if (master) {
2145                 if (old)
2146                         return -EBUSY;
2147                 dev_hold(master);
2148         }
2149
2150         slave->master = master;
2151
2152         synchronize_net();
2153
2154         if (old)
2155                 dev_put(old);
2156
2157         if (master)
2158                 slave->flags |= IFF_SLAVE;
2159         else
2160                 slave->flags &= ~IFF_SLAVE;
2161
2162         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2163         return 0;
2164 }
2165
2166 /**
2167  *      dev_set_promiscuity     - update promiscuity count on a device
2168  *      @dev: device
2169  *      @inc: modifier
2170  *
2171  *      Add or remove promsicuity from a device. While the count in the device
2172  *      remains above zero the interface remains promiscuous. Once it hits zero
2173  *      the device reverts back to normal filtering operation. A negative inc
2174  *      value is used to drop promiscuity on the device.
2175  */
2176 void dev_set_promiscuity(struct net_device *dev, int inc)
2177 {
2178         unsigned short old_flags = dev->flags;
2179
2180         dev->flags |= IFF_PROMISC;
2181         if ((dev->promiscuity += inc) == 0)
2182                 dev->flags &= ~IFF_PROMISC;
2183         if (dev->flags ^ old_flags) {
2184                 dev_mc_upload(dev);
2185                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2186                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2187                                                                "left");
2188         }
2189 }
2190
2191 /**
2192  *      dev_set_allmulti        - update allmulti count on a device
2193  *      @dev: device
2194  *      @inc: modifier
2195  *
2196  *      Add or remove reception of all multicast frames to a device. While the
2197  *      count in the device remains above zero the interface remains listening
2198  *      to all interfaces. Once it hits zero the device reverts back to normal
2199  *      filtering operation. A negative @inc value is used to drop the counter
2200  *      when releasing a resource needing all multicasts.
2201  */
2202
2203 void dev_set_allmulti(struct net_device *dev, int inc)
2204 {
2205         unsigned short old_flags = dev->flags;
2206
2207         dev->flags |= IFF_ALLMULTI;
2208         if ((dev->allmulti += inc) == 0)
2209                 dev->flags &= ~IFF_ALLMULTI;
2210         if (dev->flags ^ old_flags)
2211                 dev_mc_upload(dev);
2212 }
2213
2214 unsigned dev_get_flags(const struct net_device *dev)
2215 {
2216         unsigned flags;
2217
2218         flags = (dev->flags & ~(IFF_PROMISC |
2219                                 IFF_ALLMULTI |
2220                                 IFF_RUNNING)) |
2221                 (dev->gflags & (IFF_PROMISC |
2222                                 IFF_ALLMULTI));
2223
2224         if (netif_running(dev) && netif_carrier_ok(dev))
2225                 flags |= IFF_RUNNING;
2226
2227         return flags;
2228 }
2229
2230 int dev_change_flags(struct net_device *dev, unsigned flags)
2231 {
2232         int ret;
2233         int old_flags = dev->flags;
2234
2235         /*
2236          *      Set the flags on our device.
2237          */
2238
2239         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2240                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2241                                IFF_AUTOMEDIA)) |
2242                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2243                                     IFF_ALLMULTI));
2244
2245         /*
2246          *      Load in the correct multicast list now the flags have changed.
2247          */
2248
2249         dev_mc_upload(dev);
2250
2251         /*
2252          *      Have we downed the interface. We handle IFF_UP ourselves
2253          *      according to user attempts to set it, rather than blindly
2254          *      setting it.
2255          */
2256
2257         ret = 0;
2258         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2259                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2260
2261                 if (!ret)
2262                         dev_mc_upload(dev);
2263         }
2264
2265         if (dev->flags & IFF_UP &&
2266             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2267                                           IFF_VOLATILE)))
2268                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2269
2270         if ((flags ^ dev->gflags) & IFF_PROMISC) {
2271                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2272                 dev->gflags ^= IFF_PROMISC;
2273                 dev_set_promiscuity(dev, inc);
2274         }
2275
2276         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2277            is important. Some (broken) drivers set IFF_PROMISC, when
2278            IFF_ALLMULTI is requested not asking us and not reporting.
2279          */
2280         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2281                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2282                 dev->gflags ^= IFF_ALLMULTI;
2283                 dev_set_allmulti(dev, inc);
2284         }
2285
2286         if (old_flags ^ dev->flags)
2287                 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2288
2289         return ret;
2290 }
2291
2292 int dev_set_mtu(struct net_device *dev, int new_mtu)
2293 {
2294         int err;
2295
2296         if (new_mtu == dev->mtu)
2297                 return 0;
2298
2299         /*      MTU must be positive.    */
2300         if (new_mtu < 0)
2301                 return -EINVAL;
2302
2303         if (!netif_device_present(dev))
2304                 return -ENODEV;
2305
2306         err = 0;
2307         if (dev->change_mtu)
2308                 err = dev->change_mtu(dev, new_mtu);
2309         else
2310                 dev->mtu = new_mtu;
2311         if (!err && dev->flags & IFF_UP)
2312                 notifier_call_chain(&netdev_chain,
2313                                     NETDEV_CHANGEMTU, dev);
2314         return err;
2315 }
2316
2317 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2318 {
2319         int err;
2320
2321         if (!dev->set_mac_address)
2322                 return -EOPNOTSUPP;
2323         if (sa->sa_family != dev->type)
2324                 return -EINVAL;
2325         if (!netif_device_present(dev))
2326                 return -ENODEV;
2327         err = dev->set_mac_address(dev, sa);
2328         if (!err)
2329                 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2330         return err;
2331 }
2332
2333 /*
2334  *      Perform the SIOCxIFxxx calls.
2335  */
2336 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2337 {
2338         int err;
2339         struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2340
2341         if (!dev)
2342                 return -ENODEV;
2343
2344         switch (cmd) {
2345                 case SIOCGIFFLAGS:      /* Get interface flags */
2346                         ifr->ifr_flags = dev_get_flags(dev);
2347                         return 0;
2348
2349                 case SIOCSIFFLAGS:      /* Set interface flags */
2350                         return dev_change_flags(dev, ifr->ifr_flags);
2351
2352                 case SIOCGIFMETRIC:     /* Get the metric on the interface
2353                                            (currently unused) */
2354                         ifr->ifr_metric = 0;
2355                         return 0;
2356
2357                 case SIOCSIFMETRIC:     /* Set the metric on the interface
2358                                            (currently unused) */
2359                         return -EOPNOTSUPP;
2360
2361                 case SIOCGIFMTU:        /* Get the MTU of a device */
2362                         ifr->ifr_mtu = dev->mtu;
2363                         return 0;
2364
2365                 case SIOCSIFMTU:        /* Set the MTU of a device */
2366                         return dev_set_mtu(dev, ifr->ifr_mtu);
2367
2368                 case SIOCGIFHWADDR:
2369                         if (!dev->addr_len)
2370                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2371                         else
2372                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2373                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2374                         ifr->ifr_hwaddr.sa_family = dev->type;
2375                         return 0;
2376
2377                 case SIOCSIFHWADDR:
2378                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2379
2380                 case SIOCSIFHWBROADCAST:
2381                         if (ifr->ifr_hwaddr.sa_family != dev->type)
2382                                 return -EINVAL;
2383                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2384                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2385                         notifier_call_chain(&netdev_chain,
2386                                             NETDEV_CHANGEADDR, dev);
2387                         return 0;
2388
2389                 case SIOCGIFMAP:
2390                         ifr->ifr_map.mem_start = dev->mem_start;
2391                         ifr->ifr_map.mem_end   = dev->mem_end;
2392                         ifr->ifr_map.base_addr = dev->base_addr;
2393                         ifr->ifr_map.irq       = dev->irq;
2394                         ifr->ifr_map.dma       = dev->dma;
2395                         ifr->ifr_map.port      = dev->if_port;
2396                         return 0;
2397
2398                 case SIOCSIFMAP:
2399                         if (dev->set_config) {
2400                                 if (!netif_device_present(dev))
2401                                         return -ENODEV;
2402                                 return dev->set_config(dev, &ifr->ifr_map);
2403                         }
2404                         return -EOPNOTSUPP;
2405
2406                 case SIOCADDMULTI:
2407                         if (!dev->set_multicast_list ||
2408                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2409                                 return -EINVAL;
2410                         if (!netif_device_present(dev))
2411                                 return -ENODEV;
2412                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2413                                           dev->addr_len, 1);
2414
2415                 case SIOCDELMULTI:
2416                         if (!dev->set_multicast_list ||
2417                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2418                                 return -EINVAL;
2419                         if (!netif_device_present(dev))
2420                                 return -ENODEV;
2421                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2422                                              dev->addr_len, 1);
2423
2424                 case SIOCGIFINDEX:
2425                         ifr->ifr_ifindex = dev->ifindex;
2426                         return 0;
2427
2428                 case SIOCGIFTXQLEN:
2429                         ifr->ifr_qlen = dev->tx_queue_len;
2430                         return 0;
2431
2432                 case SIOCSIFTXQLEN:
2433                         if (ifr->ifr_qlen < 0)
2434                                 return -EINVAL;
2435                         dev->tx_queue_len = ifr->ifr_qlen;
2436                         return 0;
2437
2438                 case SIOCSIFNAME:
2439                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2440                         return dev_change_name(dev, ifr->ifr_newname);
2441
2442                 /*
2443                  *      Unknown or private ioctl
2444                  */
2445
2446                 default:
2447                         if ((cmd >= SIOCDEVPRIVATE &&
2448                             cmd <= SIOCDEVPRIVATE + 15) ||
2449                             cmd == SIOCBONDENSLAVE ||
2450                             cmd == SIOCBONDRELEASE ||
2451                             cmd == SIOCBONDSETHWADDR ||
2452                             cmd == SIOCBONDSLAVEINFOQUERY ||
2453                             cmd == SIOCBONDINFOQUERY ||
2454                             cmd == SIOCBONDCHANGEACTIVE ||
2455                             cmd == SIOCGMIIPHY ||
2456                             cmd == SIOCGMIIREG ||
2457                             cmd == SIOCSMIIREG ||
2458                             cmd == SIOCBRADDIF ||
2459                             cmd == SIOCBRDELIF ||
2460                             cmd == SIOCWANDEV) {
2461                                 err = -EOPNOTSUPP;
2462                                 if (dev->do_ioctl) {
2463                                         if (netif_device_present(dev))
2464                                                 err = dev->do_ioctl(dev, ifr,
2465                                                                     cmd);
2466                                         else
2467                                                 err = -ENODEV;
2468                                 }
2469                         } else
2470                                 err = -EINVAL;
2471
2472         }
2473         return err;
2474 }
2475
2476 /*
2477  *      This function handles all "interface"-type I/O control requests. The actual
2478  *      'doing' part of this is dev_ifsioc above.
2479  */
2480
2481 /**
2482  *      dev_ioctl       -       network device ioctl
2483  *      @cmd: command to issue
2484  *      @arg: pointer to a struct ifreq in user space
2485  *
2486  *      Issue ioctl functions to devices. This is normally called by the
2487  *      user space syscall interfaces but can sometimes be useful for
2488  *      other purposes. The return value is the return from the syscall if
2489  *      positive or a negative errno code on error.
2490  */
2491
2492 int dev_ioctl(unsigned int cmd, void __user *arg)
2493 {
2494         struct ifreq ifr;
2495         int ret;
2496         char *colon;
2497
2498         /* One special case: SIOCGIFCONF takes ifconf argument
2499            and requires shared lock, because it sleeps writing
2500            to user space.
2501          */
2502
2503         if (cmd == SIOCGIFCONF) {
2504                 rtnl_shlock();
2505                 ret = dev_ifconf((char __user *) arg);
2506                 rtnl_shunlock();
2507                 return ret;
2508         }
2509         if (cmd == SIOCGIFNAME)
2510                 return dev_ifname((struct ifreq __user *)arg);
2511
2512         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2513                 return -EFAULT;
2514
2515         ifr.ifr_name[IFNAMSIZ-1] = 0;
2516
2517         colon = strchr(ifr.ifr_name, ':');
2518         if (colon)
2519                 *colon = 0;
2520
2521         /*
2522          *      See which interface the caller is talking about.
2523          */
2524
2525         switch (cmd) {
2526                 /*
2527                  *      These ioctl calls:
2528                  *      - can be done by all.
2529                  *      - atomic and do not require locking.
2530                  *      - return a value
2531                  */
2532                 case SIOCGIFFLAGS:
2533                 case SIOCGIFMETRIC:
2534                 case SIOCGIFMTU:
2535                 case SIOCGIFHWADDR:
2536                 case SIOCGIFSLAVE:
2537                 case SIOCGIFMAP:
2538                 case SIOCGIFINDEX:
2539                 case SIOCGIFTXQLEN:
2540                         dev_load(ifr.ifr_name);
2541                         read_lock(&dev_base_lock);
2542                         ret = dev_ifsioc(&ifr, cmd);
2543                         read_unlock(&dev_base_lock);
2544                         if (!ret) {
2545                                 if (colon)
2546                                         *colon = ':';
2547                                 if (copy_to_user(arg, &ifr,
2548                                                  sizeof(struct ifreq)))
2549                                         ret = -EFAULT;
2550                         }
2551                         return ret;
2552
2553                 case SIOCETHTOOL:
2554                         dev_load(ifr.ifr_name);
2555                         rtnl_lock();
2556                         ret = dev_ethtool(&ifr);
2557                         rtnl_unlock();
2558                         if (!ret) {
2559                                 if (colon)
2560                                         *colon = ':';
2561                                 if (copy_to_user(arg, &ifr,
2562                                                  sizeof(struct ifreq)))
2563                                         ret = -EFAULT;
2564                         }
2565                         return ret;
2566
2567                 /*
2568                  *      These ioctl calls:
2569                  *      - require superuser power.
2570                  *      - require strict serialization.
2571                  *      - return a value
2572                  */
2573                 case SIOCGMIIPHY:
2574                 case SIOCGMIIREG:
2575                 case SIOCSIFNAME:
2576                         if (!capable(CAP_NET_ADMIN))
2577                                 return -EPERM;
2578                         dev_load(ifr.ifr_name);
2579                         rtnl_lock();
2580                         ret = dev_ifsioc(&ifr, cmd);
2581                         rtnl_unlock();
2582                         if (!ret) {
2583                                 if (colon)
2584                                         *colon = ':';
2585                                 if (copy_to_user(arg, &ifr,
2586                                                  sizeof(struct ifreq)))
2587                                         ret = -EFAULT;
2588                         }
2589                         return ret;
2590
2591                 /*
2592                  *      These ioctl calls:
2593                  *      - require superuser power.
2594                  *      - require strict serialization.
2595                  *      - do not return a value
2596                  */
2597                 case SIOCSIFFLAGS:
2598                 case SIOCSIFMETRIC:
2599                 case SIOCSIFMTU:
2600                 case SIOCSIFMAP:
2601                 case SIOCSIFHWADDR:
2602                 case SIOCSIFSLAVE:
2603                 case SIOCADDMULTI:
2604                 case SIOCDELMULTI:
2605                 case SIOCSIFHWBROADCAST:
2606                 case SIOCSIFTXQLEN:
2607                 case SIOCSMIIREG:
2608                 case SIOCBONDENSLAVE:
2609                 case SIOCBONDRELEASE:
2610                 case SIOCBONDSETHWADDR:
2611                 case SIOCBONDSLAVEINFOQUERY:
2612                 case SIOCBONDINFOQUERY:
2613                 case SIOCBONDCHANGEACTIVE:
2614                 case SIOCBRADDIF:
2615                 case SIOCBRDELIF:
2616                         if (!capable(CAP_NET_ADMIN))
2617                                 return -EPERM;
2618                         dev_load(ifr.ifr_name);
2619                         rtnl_lock();
2620                         ret = dev_ifsioc(&ifr, cmd);
2621                         rtnl_unlock();
2622                         return ret;
2623
2624                 case SIOCGIFMEM:
2625                         /* Get the per device memory space. We can add this but
2626                          * currently do not support it */
2627                 case SIOCSIFMEM:
2628                         /* Set the per device memory buffer space.
2629                          * Not applicable in our case */
2630                 case SIOCSIFLINK:
2631                         return -EINVAL;
2632
2633                 /*
2634                  *      Unknown or private ioctl.
2635                  */
2636                 default:
2637                         if (cmd == SIOCWANDEV ||
2638                             (cmd >= SIOCDEVPRIVATE &&
2639                              cmd <= SIOCDEVPRIVATE + 15)) {
2640                                 dev_load(ifr.ifr_name);
2641                                 rtnl_lock();
2642                                 ret = dev_ifsioc(&ifr, cmd);
2643                                 rtnl_unlock();
2644                                 if (!ret && copy_to_user(arg, &ifr,
2645                                                          sizeof(struct ifreq)))
2646                                         ret = -EFAULT;
2647                                 return ret;
2648                         }
2649 #ifdef WIRELESS_EXT
2650                         /* Take care of Wireless Extensions */
2651                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2652                                 /* If command is `set a parameter', or
2653                                  * `get the encoding parameters', check if
2654                                  * the user has the right to do it */
2655                                 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2656                                         if (!capable(CAP_NET_ADMIN))
2657                                                 return -EPERM;
2658                                 }
2659                                 dev_load(ifr.ifr_name);
2660                                 rtnl_lock();
2661                                 /* Follow me in net/core/wireless.c */
2662                                 ret = wireless_process_ioctl(&ifr, cmd);
2663                                 rtnl_unlock();
2664                                 if (IW_IS_GET(cmd) &&
2665                                     copy_to_user(arg, &ifr,
2666                                                  sizeof(struct ifreq)))
2667                                         ret = -EFAULT;
2668                                 return ret;
2669                         }
2670 #endif  /* WIRELESS_EXT */
2671                         return -EINVAL;
2672         }
2673 }
2674
2675
2676 /**
2677  *      dev_new_index   -       allocate an ifindex
2678  *
2679  *      Returns a suitable unique value for a new device interface
2680  *      number.  The caller must hold the rtnl semaphore or the
2681  *      dev_base_lock to be sure it remains unique.
2682  */
2683 static int dev_new_index(void)
2684 {
2685         static int ifindex;
2686         for (;;) {
2687                 if (++ifindex <= 0)
2688                         ifindex = 1;
2689                 if (!__dev_get_by_index(ifindex))
2690                         return ifindex;
2691         }
2692 }
2693
2694 static int dev_boot_phase = 1;
2695
2696 /* Delayed registration/unregisteration */
2697 static DEFINE_SPINLOCK(net_todo_list_lock);
2698 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2699
2700 static inline void net_set_todo(struct net_device *dev)
2701 {
2702         spin_lock(&net_todo_list_lock);
2703         list_add_tail(&dev->todo_list, &net_todo_list);
2704         spin_unlock(&net_todo_list_lock);
2705 }
2706
2707 /**
2708  *      register_netdevice      - register a network device
2709  *      @dev: device to register
2710  *
2711  *      Take a completed network device structure and add it to the kernel
2712  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2713  *      chain. 0 is returned on success. A negative errno code is returned
2714  *      on a failure to set up the device, or if the name is a duplicate.
2715  *
2716  *      Callers must hold the rtnl semaphore. You may want
2717  *      register_netdev() instead of this.
2718  *
2719  *      BUGS:
2720  *      The locking appears insufficient to guarantee two parallel registers
2721  *      will not get the same name.
2722  */
2723
2724 int register_netdevice(struct net_device *dev)
2725 {
2726         struct hlist_head *head;
2727         struct hlist_node *p;
2728         int ret;
2729
2730         BUG_ON(dev_boot_phase);
2731         ASSERT_RTNL();
2732
2733         /* When net_device's are persistent, this will be fatal. */
2734         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2735
2736         spin_lock_init(&dev->queue_lock);
2737         spin_lock_init(&dev->xmit_lock);
2738         dev->xmit_lock_owner = -1;
2739 #ifdef CONFIG_NET_CLS_ACT
2740         spin_lock_init(&dev->ingress_lock);
2741 #endif
2742
2743         ret = alloc_divert_blk(dev);
2744         if (ret)
2745                 goto out;
2746
2747         dev->iflink = -1;
2748
2749         /* Init, if this function is available */
2750         if (dev->init) {
2751                 ret = dev->init(dev);
2752                 if (ret) {
2753                         if (ret > 0)
2754                                 ret = -EIO;
2755                         goto out_err;
2756                 }
2757         }
2758
2759         if (!dev_valid_name(dev->name)) {
2760                 ret = -EINVAL;
2761                 goto out_err;
2762         }
2763
2764         dev->ifindex = dev_new_index();
2765         if (dev->iflink == -1)
2766                 dev->iflink = dev->ifindex;
2767
2768         /* Check for existence of name */
2769         head = dev_name_hash(dev->name);
2770         hlist_for_each(p, head) {
2771                 struct net_device *d
2772                         = hlist_entry(p, struct net_device, name_hlist);
2773                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2774                         ret = -EEXIST;
2775                         goto out_err;
2776                 }
2777         }
2778
2779         /* Fix illegal SG+CSUM combinations. */
2780         if ((dev->features & NETIF_F_SG) &&
2781             !(dev->features & (NETIF_F_IP_CSUM |
2782                                NETIF_F_NO_CSUM |
2783                                NETIF_F_HW_CSUM))) {
2784                 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2785                        dev->name);
2786                 dev->features &= ~NETIF_F_SG;
2787         }
2788
2789         /* TSO requires that SG is present as well. */
2790         if ((dev->features & NETIF_F_TSO) &&
2791             !(dev->features & NETIF_F_SG)) {
2792                 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2793                        dev->name);
2794                 dev->features &= ~NETIF_F_TSO;
2795         }
2796
2797         /*
2798          *      nil rebuild_header routine,
2799          *      that should be never called and used as just bug trap.
2800          */
2801
2802         if (!dev->rebuild_header)
2803                 dev->rebuild_header = default_rebuild_header;
2804
2805         /*
2806          *      Default initial state at registry is that the
2807          *      device is present.
2808          */
2809
2810         set_bit(__LINK_STATE_PRESENT, &dev->state);
2811
2812         dev->next = NULL;
2813         dev_init_scheduler(dev);
2814         write_lock_bh(&dev_base_lock);
2815         *dev_tail = dev;
2816         dev_tail = &dev->next;
2817         hlist_add_head(&dev->name_hlist, head);
2818         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2819         dev_hold(dev);
2820         dev->reg_state = NETREG_REGISTERING;
2821         write_unlock_bh(&dev_base_lock);
2822
2823         /* Notify protocols, that a new device appeared. */
2824         notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2825
2826         /* Finish registration after unlock */
2827         net_set_todo(dev);
2828         ret = 0;
2829
2830 out:
2831         return ret;
2832 out_err:
2833         free_divert_blk(dev);
2834         goto out;
2835 }
2836
2837 /**
2838  *      register_netdev - register a network device
2839  *      @dev: device to register
2840  *
2841  *      Take a completed network device structure and add it to the kernel
2842  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2843  *      chain. 0 is returned on success. A negative errno code is returned
2844  *      on a failure to set up the device, or if the name is a duplicate.
2845  *
2846  *      This is a wrapper around register_netdev that takes the rtnl semaphore
2847  *      and expands the device name if you passed a format string to
2848  *      alloc_netdev.
2849  */
2850 int register_netdev(struct net_device *dev)
2851 {
2852         int err;
2853
2854         rtnl_lock();
2855
2856         /*
2857          * If the name is a format string the caller wants us to do a
2858          * name allocation.
2859          */
2860         if (strchr(dev->name, '%')) {
2861                 err = dev_alloc_name(dev, dev->name);
2862                 if (err < 0)
2863                         goto out;
2864         }
2865
2866         /*
2867          * Back compatibility hook. Kill this one in 2.5
2868          */
2869         if (dev->name[0] == 0 || dev->name[0] == ' ') {
2870                 err = dev_alloc_name(dev, "eth%d");
2871                 if (err < 0)
2872                         goto out;
2873         }
2874
2875         err = register_netdevice(dev);
2876 out:
2877         rtnl_unlock();
2878         return err;
2879 }
2880 EXPORT_SYMBOL(register_netdev);
2881
2882 /*
2883  * netdev_wait_allrefs - wait until all references are gone.
2884  *
2885  * This is called when unregistering network devices.
2886  *
2887  * Any protocol or device that holds a reference should register
2888  * for netdevice notification, and cleanup and put back the
2889  * reference if they receive an UNREGISTER event.
2890  * We can get stuck here if buggy protocols don't correctly
2891  * call dev_put.
2892  */
2893 static void netdev_wait_allrefs(struct net_device *dev)
2894 {
2895         unsigned long rebroadcast_time, warning_time;
2896
2897         rebroadcast_time = warning_time = jiffies;
2898         while (atomic_read(&dev->refcnt) != 0) {
2899                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2900                         rtnl_shlock();
2901
2902                         /* Rebroadcast unregister notification */
2903                         notifier_call_chain(&netdev_chain,
2904                                             NETDEV_UNREGISTER, dev);
2905
2906                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2907                                      &dev->state)) {
2908                                 /* We must not have linkwatch events
2909                                  * pending on unregister. If this
2910                                  * happens, we simply run the queue
2911                                  * unscheduled, resulting in a noop
2912                                  * for this device.
2913                                  */
2914                                 linkwatch_run_queue();
2915                         }
2916
2917                         rtnl_shunlock();
2918
2919                         rebroadcast_time = jiffies;
2920                 }
2921
2922                 msleep(250);
2923
2924                 if (time_after(jiffies, warning_time + 10 * HZ)) {
2925                         printk(KERN_EMERG "unregister_netdevice: "
2926                                "waiting for %s to become free. Usage "
2927                                "count = %d\n",
2928                                dev->name, atomic_read(&dev->refcnt));
2929                         warning_time = jiffies;
2930                 }
2931         }
2932 }
2933
2934 /* The sequence is:
2935  *
2936  *      rtnl_lock();
2937  *      ...
2938  *      register_netdevice(x1);
2939  *      register_netdevice(x2);
2940  *      ...
2941  *      unregister_netdevice(y1);
2942  *      unregister_netdevice(y2);
2943  *      ...
2944  *      rtnl_unlock();
2945  *      free_netdev(y1);
2946  *      free_netdev(y2);
2947  *
2948  * We are invoked by rtnl_unlock() after it drops the semaphore.
2949  * This allows us to deal with problems:
2950  * 1) We can create/delete sysfs objects which invoke hotplug
2951  *    without deadlocking with linkwatch via keventd.
2952  * 2) Since we run with the RTNL semaphore not held, we can sleep
2953  *    safely in order to wait for the netdev refcnt to drop to zero.
2954  */
2955 static DECLARE_MUTEX(net_todo_run_mutex);
2956 void netdev_run_todo(void)
2957 {
2958         struct list_head list = LIST_HEAD_INIT(list);
2959         int err;
2960
2961
2962         /* Need to guard against multiple cpu's getting out of order. */
2963         down(&net_todo_run_mutex);
2964
2965         /* Not safe to do outside the semaphore.  We must not return
2966          * until all unregister events invoked by the local processor
2967          * have been completed (either by this todo run, or one on
2968          * another cpu).
2969          */
2970         if (list_empty(&net_todo_list))
2971                 goto out;
2972
2973         /* Snapshot list, allow later requests */
2974         spin_lock(&net_todo_list_lock);
2975         list_splice_init(&net_todo_list, &list);
2976         spin_unlock(&net_todo_list_lock);
2977
2978         while (!list_empty(&list)) {
2979                 struct net_device *dev
2980                         = list_entry(list.next, struct net_device, todo_list);
2981                 list_del(&dev->todo_list);
2982
2983                 switch(dev->reg_state) {
2984                 case NETREG_REGISTERING:
2985                         err = netdev_register_sysfs(dev);
2986                         if (err)
2987                                 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
2988                                        dev->name, err);
2989                         dev->reg_state = NETREG_REGISTERED;
2990                         break;
2991
2992                 case NETREG_UNREGISTERING:
2993                         netdev_unregister_sysfs(dev);
2994                         dev->reg_state = NETREG_UNREGISTERED;
2995
2996                         netdev_wait_allrefs(dev);
2997
2998                         /* paranoia */
2999                         BUG_ON(atomic_read(&dev->refcnt));
3000                         BUG_TRAP(!dev->ip_ptr);
3001                         BUG_TRAP(!dev->ip6_ptr);
3002                         BUG_TRAP(!dev->dn_ptr);
3003
3004
3005                         /* It must be the very last action,
3006                          * after this 'dev' may point to freed up memory.
3007                          */
3008                         if (dev->destructor)
3009                                 dev->destructor(dev);
3010                         break;
3011
3012                 default:
3013                         printk(KERN_ERR "network todo '%s' but state %d\n",
3014                                dev->name, dev->reg_state);
3015                         break;
3016                 }
3017         }
3018
3019 out:
3020         up(&net_todo_run_mutex);
3021 }
3022
3023 /**
3024  *      alloc_netdev - allocate network device
3025  *      @sizeof_priv:   size of private data to allocate space for
3026  *      @name:          device name format string
3027  *      @setup:         callback to initialize device
3028  *
3029  *      Allocates a struct net_device with private data area for driver use
3030  *      and performs basic initialization.
3031  */
3032 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3033                 void (*setup)(struct net_device *))
3034 {
3035         void *p;
3036         struct net_device *dev;
3037         int alloc_size;
3038
3039         /* ensure 32-byte alignment of both the device and private area */
3040         alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3041         alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3042
3043         p = kmalloc(alloc_size, GFP_KERNEL);
3044         if (!p) {
3045                 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3046                 return NULL;
3047         }
3048         memset(p, 0, alloc_size);
3049
3050         dev = (struct net_device *)
3051                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3052         dev->padded = (char *)dev - (char *)p;
3053
3054         if (sizeof_priv)
3055                 dev->priv = netdev_priv(dev);
3056
3057         setup(dev);
3058         strcpy(dev->name, name);
3059         return dev;
3060 }
3061 EXPORT_SYMBOL(alloc_netdev);
3062
3063 /**
3064  *      free_netdev - free network device
3065  *      @dev: device
3066  *
3067  *      This function does the last stage of destroying an allocated device
3068  *      interface. The reference to the device object is released.
3069  *      If this is the last reference then it will be freed.
3070  */
3071 void free_netdev(struct net_device *dev)
3072 {
3073 #ifdef CONFIG_SYSFS
3074         /*  Compatiablity with error handling in drivers */
3075         if (dev->reg_state == NETREG_UNINITIALIZED) {
3076                 kfree((char *)dev - dev->padded);
3077                 return;
3078         }
3079
3080         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3081         dev->reg_state = NETREG_RELEASED;
3082
3083         /* will free via class release */
3084         class_device_put(&dev->class_dev);
3085 #else
3086         kfree((char *)dev - dev->padded);
3087 #endif
3088 }
3089
3090 /* Synchronize with packet receive processing. */
3091 void synchronize_net(void)
3092 {
3093         might_sleep();
3094         synchronize_rcu();
3095 }
3096
3097 /**
3098  *      unregister_netdevice - remove device from the kernel
3099  *      @dev: device
3100  *
3101  *      This function shuts down a device interface and removes it
3102  *      from the kernel tables. On success 0 is returned, on a failure
3103  *      a negative errno code is returned.
3104  *
3105  *      Callers must hold the rtnl semaphore.  You may want
3106  *      unregister_netdev() instead of this.
3107  */
3108
3109 int unregister_netdevice(struct net_device *dev)
3110 {
3111         struct net_device *d, **dp;
3112
3113         BUG_ON(dev_boot_phase);
3114         ASSERT_RTNL();
3115
3116         /* Some devices call without registering for initialization unwind. */
3117         if (dev->reg_state == NETREG_UNINITIALIZED) {
3118                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3119                                   "was registered\n", dev->name, dev);
3120                 return -ENODEV;
3121         }
3122
3123         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3124
3125         /* If device is running, close it first. */
3126         if (dev->flags & IFF_UP)
3127                 dev_close(dev);
3128
3129         /* And unlink it from device chain. */
3130         for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3131                 if (d == dev) {
3132                         write_lock_bh(&dev_base_lock);
3133                         hlist_del(&dev->name_hlist);
3134                         hlist_del(&dev->index_hlist);
3135                         if (dev_tail == &dev->next)
3136                                 dev_tail = dp;
3137                         *dp = d->next;
3138                         write_unlock_bh(&dev_base_lock);
3139                         break;
3140                 }
3141         }
3142         if (!d) {
3143                 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3144                        dev->name);
3145                 return -ENODEV;
3146         }
3147
3148         dev->reg_state = NETREG_UNREGISTERING;
3149
3150         synchronize_net();
3151
3152         /* Shutdown queueing discipline. */
3153         dev_shutdown(dev);
3154
3155
3156         /* Notify protocols, that we are about to destroy
3157            this device. They should clean all the things.
3158         */
3159         notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3160
3161         /*
3162          *      Flush the multicast chain
3163          */
3164         dev_mc_discard(dev);
3165
3166         if (dev->uninit)
3167                 dev->uninit(dev);
3168
3169         /* Notifier chain MUST detach us from master device. */
3170         BUG_TRAP(!dev->master);
3171
3172         free_divert_blk(dev);
3173
3174         /* Finish processing unregister after unlock */
3175         net_set_todo(dev);
3176
3177         synchronize_net();
3178
3179         dev_put(dev);
3180         return 0;
3181 }
3182
3183 /**
3184  *      unregister_netdev - remove device from the kernel
3185  *      @dev: device
3186  *
3187  *      This function shuts down a device interface and removes it
3188  *      from the kernel tables. On success 0 is returned, on a failure
3189  *      a negative errno code is returned.
3190  *
3191  *      This is just a wrapper for unregister_netdevice that takes
3192  *      the rtnl semaphore.  In general you want to use this and not
3193  *      unregister_netdevice.
3194  */
3195 void unregister_netdev(struct net_device *dev)
3196 {
3197         rtnl_lock();
3198         unregister_netdevice(dev);
3199         rtnl_unlock();
3200 }
3201
3202 EXPORT_SYMBOL(unregister_netdev);
3203
3204 #ifdef CONFIG_HOTPLUG_CPU
3205 static int dev_cpu_callback(struct notifier_block *nfb,
3206                             unsigned long action,
3207                             void *ocpu)
3208 {
3209         struct sk_buff **list_skb;
3210         struct net_device **list_net;
3211         struct sk_buff *skb;
3212         unsigned int cpu, oldcpu = (unsigned long)ocpu;
3213         struct softnet_data *sd, *oldsd;
3214
3215         if (action != CPU_DEAD)
3216                 return NOTIFY_OK;
3217
3218         local_irq_disable();
3219         cpu = smp_processor_id();
3220         sd = &per_cpu(softnet_data, cpu);
3221         oldsd = &per_cpu(softnet_data, oldcpu);
3222
3223         /* Find end of our completion_queue. */
3224         list_skb = &sd->completion_queue;
3225         while (*list_skb)
3226                 list_skb = &(*list_skb)->next;
3227         /* Append completion queue from offline CPU. */
3228         *list_skb = oldsd->completion_queue;
3229         oldsd->completion_queue = NULL;
3230
3231         /* Find end of our output_queue. */
3232         list_net = &sd->output_queue;
3233         while (*list_net)
3234                 list_net = &(*list_net)->next_sched;
3235         /* Append output queue from offline CPU. */
3236         *list_net = oldsd->output_queue;
3237         oldsd->output_queue = NULL;
3238
3239         raise_softirq_irqoff(NET_TX_SOFTIRQ);
3240         local_irq_enable();
3241
3242         /* Process offline CPU's input_pkt_queue */
3243         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3244                 netif_rx(skb);
3245
3246         return NOTIFY_OK;
3247 }
3248 #endif /* CONFIG_HOTPLUG_CPU */
3249
3250
3251 /*
3252  *      Initialize the DEV module. At boot time this walks the device list and
3253  *      unhooks any devices that fail to initialise (normally hardware not
3254  *      present) and leaves us with a valid list of present and active devices.
3255  *
3256  */
3257
3258 /*
3259  *       This is called single threaded during boot, so no need
3260  *       to take the rtnl semaphore.
3261  */
3262 static int __init net_dev_init(void)
3263 {
3264         int i, rc = -ENOMEM;
3265
3266         BUG_ON(!dev_boot_phase);
3267
3268         net_random_init();
3269
3270         if (dev_proc_init())
3271                 goto out;
3272
3273         if (netdev_sysfs_init())
3274                 goto out;
3275
3276         INIT_LIST_HEAD(&ptype_all);
3277         for (i = 0; i < 16; i++)
3278                 INIT_LIST_HEAD(&ptype_base[i]);
3279
3280         for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3281                 INIT_HLIST_HEAD(&dev_name_head[i]);
3282
3283         for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3284                 INIT_HLIST_HEAD(&dev_index_head[i]);
3285
3286         /*
3287          *      Initialise the packet receive queues.
3288          */
3289
3290         for (i = 0; i < NR_CPUS; i++) {
3291                 struct softnet_data *queue;
3292
3293                 queue = &per_cpu(softnet_data, i);
3294                 skb_queue_head_init(&queue->input_pkt_queue);
3295                 queue->throttle = 0;
3296                 queue->cng_level = 0;
3297                 queue->avg_blog = 10; /* arbitrary non-zero */
3298                 queue->completion_queue = NULL;
3299                 INIT_LIST_HEAD(&queue->poll_list);
3300                 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3301                 queue->backlog_dev.weight = weight_p;
3302                 queue->backlog_dev.poll = process_backlog;
3303                 atomic_set(&queue->backlog_dev.refcnt, 1);
3304         }
3305
3306 #ifdef OFFLINE_SAMPLE
3307         samp_timer.expires = jiffies + (10 * HZ);
3308         add_timer(&samp_timer);
3309 #endif
3310
3311         dev_boot_phase = 0;
3312
3313         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3314         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3315
3316         hotcpu_notifier(dev_cpu_callback, 0);
3317         dst_init();
3318         dev_mcast_init();
3319         rc = 0;
3320 out:
3321         return rc;
3322 }
3323
3324 subsys_initcall(net_dev_init);
3325
3326 EXPORT_SYMBOL(__dev_get_by_index);
3327 EXPORT_SYMBOL(__dev_get_by_name);
3328 EXPORT_SYMBOL(__dev_remove_pack);
3329 EXPORT_SYMBOL(__skb_linearize);
3330 EXPORT_SYMBOL(dev_add_pack);
3331 EXPORT_SYMBOL(dev_alloc_name);
3332 EXPORT_SYMBOL(dev_close);
3333 EXPORT_SYMBOL(dev_get_by_flags);
3334 EXPORT_SYMBOL(dev_get_by_index);
3335 EXPORT_SYMBOL(dev_get_by_name);
3336 EXPORT_SYMBOL(dev_ioctl);
3337 EXPORT_SYMBOL(dev_open);
3338 EXPORT_SYMBOL(dev_queue_xmit);
3339 EXPORT_SYMBOL(dev_remove_pack);
3340 EXPORT_SYMBOL(dev_set_allmulti);
3341 EXPORT_SYMBOL(dev_set_promiscuity);
3342 EXPORT_SYMBOL(dev_change_flags);
3343 EXPORT_SYMBOL(dev_set_mtu);
3344 EXPORT_SYMBOL(dev_set_mac_address);
3345 EXPORT_SYMBOL(free_netdev);
3346 EXPORT_SYMBOL(netdev_boot_setup_check);
3347 EXPORT_SYMBOL(netdev_set_master);
3348 EXPORT_SYMBOL(netdev_state_change);
3349 EXPORT_SYMBOL(netif_receive_skb);
3350 EXPORT_SYMBOL(netif_rx);
3351 EXPORT_SYMBOL(register_gifconf);
3352 EXPORT_SYMBOL(register_netdevice);
3353 EXPORT_SYMBOL(register_netdevice_notifier);
3354 EXPORT_SYMBOL(skb_checksum_help);
3355 EXPORT_SYMBOL(synchronize_net);
3356 EXPORT_SYMBOL(unregister_netdevice);
3357 EXPORT_SYMBOL(unregister_netdevice_notifier);
3358 EXPORT_SYMBOL(net_enable_timestamp);
3359 EXPORT_SYMBOL(net_disable_timestamp);
3360 EXPORT_SYMBOL(dev_get_flags);
3361
3362 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3363 EXPORT_SYMBOL(br_handle_frame_hook);
3364 EXPORT_SYMBOL(br_fdb_get_hook);
3365 EXPORT_SYMBOL(br_fdb_put_hook);
3366 #endif
3367
3368 #ifdef CONFIG_KMOD
3369 EXPORT_SYMBOL(dev_load);
3370 #endif
3371
3372 EXPORT_PER_CPU_SYMBOL(softnet_data);