git.oblomov.eu Git - linux-2.6/blob - net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/ethtool.h>
  94 #include <linux/notifier.h>
  95 #include <linux/skbuff.h>
  96 #include <net/net_namespace.h>
  97 #include <net/sock.h>
  98 #include <linux/rtnetlink.h>
  99 #include <linux/proc_fs.h>
 100 #include <linux/seq_file.h>
 101 #include <linux/stat.h>
 102 #include <linux/if_bridge.h>
 103 #include <linux/if_macvlan.h>
 104 #include <net/dst.h>
 105 #include <net/pkt_sched.h>
 106 #include <net/checksum.h>
 107 #include <linux/highmem.h>
 108 #include <linux/init.h>
 109 #include <linux/kmod.h>
 110 #include <linux/module.h>
 111 #include <linux/kallsyms.h>
 112 #include <linux/netpoll.h>
 113 #include <linux/rcupdate.h>
 114 #include <linux/delay.h>
 115 #include <net/wext.h>
 116 #include <net/iw_handler.h>
 117 #include <asm/current.h>
 118 #include <linux/audit.h>
 119 #include <linux/dmaengine.h>
 120 #include <linux/err.h>
 121 #include <linux/ctype.h>
 122 #include <linux/if_arp.h>
 123 #include <linux/if_vlan.h>
 124 #include <linux/ip.h>
 125 #include <net/ip.h>
 126 #include <linux/ipv6.h>
 127 #include <linux/in.h>
 128 #include <linux/jhash.h>
 129 #include <linux/random.h>
 130
 131 #include "net-sysfs.h"
 132
 133 /*
 134  *      The list of packet types we will receive (as opposed to discard)
 135  *      and the routines to invoke.
 136  *
 137  *      Why 16. Because with 16 the only overlap we get on a hash of the
 138  *      low nibble of the protocol value is RARP/SNAP/X.25.
 139  *
 140  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 141  *             sure which should go first, but I bet it won't make much
 142  *             difference if we are running VLANs.  The good news is that
 143  *             this protocol won't be in the list unless compiled in, so
 144  *             the average user (w/out VLANs) will not be adversely affected.
 145  *             --BLG
 146  *
 147  *              0800    IP
 148  *              8100    802.1Q VLAN
 149  *              0001    802.3
 150  *              0002    AX.25
 151  *              0004    802.2
 152  *              8035    RARP
 153  *              0005    SNAP
 154  *              0805    X.25
 155  *              0806    ARP
 156  *              8137    IPX
 157  *              0009    Localtalk
 158  *              86DD    IPv6
 159  */
 160
 161 #define PTYPE_HASH_SIZE (16)
 162 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
 163
 164 static DEFINE_SPINLOCK(ptype_lock);
 165 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 166 static struct list_head ptype_all __read_mostly;        /* Taps */
 167
 168 #ifdef CONFIG_NET_DMA
 169 struct net_dma {
 170         struct dma_client client;
 171         spinlock_t lock;
 172         cpumask_t channel_mask;
 173         struct dma_chan **channels;
 174 };
 175
 176 static enum dma_state_client
 177 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 178         enum dma_state state);
 179
 180 static struct net_dma net_dma = {
 181         .client = {
 182                 .event_callback = netdev_dma_event,
 183         },
 184 };
 185 #endif
 186
 187 /*
 188  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 189  * semaphore.
 190  *
 191  * Pure readers hold dev_base_lock for reading.
 192  *
 193  * Writers must hold the rtnl semaphore while they loop through the
 194  * dev_base_head list, and hold dev_base_lock for writing when they do the
 195  * actual updates.  This allows pure readers to access the list even
 196  * while a writer is preparing to update it.
 197  *
 198  * To put it another way, dev_base_lock is held for writing only to
 199  * protect against pure readers; the rtnl semaphore provides the
 200  * protection against other writers.
 201  *
 202  * See, for example usages, register_netdevice() and
 203  * unregister_netdevice(), which must be called with the rtnl
 204  * semaphore held.
 205  */
 206 DEFINE_RWLOCK(dev_base_lock);
 207
 208 EXPORT_SYMBOL(dev_base_lock);
 209
 210 #define NETDEV_HASHBITS 8
 211 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 212
 213 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 214 {
 215         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 216         return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 217 }
 218
 219 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 220 {
 221         return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 222 }
 223
 224 /* Device list insertion */
 225 static int list_netdevice(struct net_device *dev)
 226 {
 227         struct net *net = dev_net(dev);
 228
 229         ASSERT_RTNL();
 230
 231         write_lock_bh(&dev_base_lock);
 232         list_add_tail(&dev->dev_list, &net->dev_base_head);
 233         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 234         hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 235         write_unlock_bh(&dev_base_lock);
 236         return 0;
 237 }
 238
 239 /* Device list removal */
 240 static void unlist_netdevice(struct net_device *dev)
 241 {
 242         ASSERT_RTNL();
 243
 244         /* Unlink dev from the device chain */
 245         write_lock_bh(&dev_base_lock);
 246         list_del(&dev->dev_list);
 247         hlist_del(&dev->name_hlist);
 248         hlist_del(&dev->index_hlist);
 249         write_unlock_bh(&dev_base_lock);
 250 }
 251
 252 /*
 253  *      Our notifier list
 254  */
 255
 256 static RAW_NOTIFIER_HEAD(netdev_chain);
 257
 258 /*
 259  *      Device drivers call our routines to queue packets here. We empty the
 260  *      queue in the local softnet handler.
 261  */
 262
 263 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 264
 265 #ifdef CONFIG_LOCKDEP
 266 /*
 267  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
 268  * according to dev->type
 269  */
 270 static const unsigned short netdev_lock_type[] =
 271         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 272          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 273          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 274          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 275          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 276          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 277          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 278          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 279          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 280          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 281          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 282          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 283          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 284          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 285          ARPHRD_NONE};
 286
 287 static const char *netdev_lock_name[] =
 288         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 289          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 290          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 291          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 292          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 293          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 294          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 295          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 296          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 297          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 298          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 299          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 300          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 301          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 302          "_xmit_NONE"};
 303
 304 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 305 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 306
 307 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 308 {
 309         int i;
 310
 311         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 312                 if (netdev_lock_type[i] == dev_type)
 313                         return i;
 314         /* the last key is used by default */
 315         return ARRAY_SIZE(netdev_lock_type) - 1;
 316 }
 317
 318 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 319                                                  unsigned short dev_type)
 320 {
 321         int i;
 322
 323         i = netdev_lock_pos(dev_type);
 324         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 325                                    netdev_lock_name[i]);
 326 }
 327
 328 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 329 {
 330         int i;
 331
 332         i = netdev_lock_pos(dev->type);
 333         lockdep_set_class_and_name(&dev->addr_list_lock,
 334                                    &netdev_addr_lock_key[i],
 335                                    netdev_lock_name[i]);
 336 }
 337 #else
 338 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 339                                                  unsigned short dev_type)
 340 {
 341 }
 342 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 343 {
 344 }
 345 #endif
 346
 347 /*******************************************************************************
 348
 349                 Protocol management and registration routines
 350
 351 *******************************************************************************/
 352
 353 /*
 354  *      Add a protocol ID to the list. Now that the input handler is
 355  *      smarter we can dispense with all the messy stuff that used to be
 356  *      here.
 357  *
 358  *      BEWARE!!! Protocol handlers, mangling input packets,
 359  *      MUST BE last in hash buckets and checking protocol handlers
 360  *      MUST start from promiscuous ptype_all chain in net_bh.
 361  *      It is true now, do not change it.
 362  *      Explanation follows: if protocol handler, mangling packet, will
 363  *      be the first on list, it is not able to sense, that packet
 364  *      is cloned and should be copied-on-write, so that it will
 365  *      change it and subsequent readers will get broken packet.
 366  *                                                      --ANK (980803)
 367  */
 368
 369 /**
 370  *      dev_add_pack - add packet handler
 371  *      @pt: packet type declaration
 372  *
 373  *      Add a protocol handler to the networking stack. The passed &packet_type
 374  *      is linked into kernel lists and may not be freed until it has been
 375  *      removed from the kernel lists.
 376  *
 377  *      This call does not sleep therefore it can not
 378  *      guarantee all CPU's that are in middle of receiving packets
 379  *      will see the new packet type (until the next received packet).
 380  */
 381
 382 void dev_add_pack(struct packet_type *pt)
 383 {
 384         int hash;
 385
 386         spin_lock_bh(&ptype_lock);
 387         if (pt->type == htons(ETH_P_ALL))
 388                 list_add_rcu(&pt->list, &ptype_all);
 389         else {
 390                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
 391                 list_add_rcu(&pt->list, &ptype_base[hash]);
 392         }
 393         spin_unlock_bh(&ptype_lock);
 394 }
 395
 396 /**
 397  *      __dev_remove_pack        - remove packet handler
 398  *      @pt: packet type declaration
 399  *
 400  *      Remove a protocol handler that was previously added to the kernel
 401  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 402  *      from the kernel lists and can be freed or reused once this function
 403  *      returns.
 404  *
 405  *      The packet type might still be in use by receivers
 406  *      and must not be freed until after all the CPU's have gone
 407  *      through a quiescent state.
 408  */
 409 void __dev_remove_pack(struct packet_type *pt)
 410 {
 411         struct list_head *head;
 412         struct packet_type *pt1;
 413
 414         spin_lock_bh(&ptype_lock);
 415
 416         if (pt->type == htons(ETH_P_ALL))
 417                 head = &ptype_all;
 418         else
 419                 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 420
 421         list_for_each_entry(pt1, head, list) {
 422                 if (pt == pt1) {
 423                         list_del_rcu(&pt->list);
 424                         goto out;
 425                 }
 426         }
 427
 428         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 429 out:
 430         spin_unlock_bh(&ptype_lock);
 431 }
 432 /**
 433  *      dev_remove_pack  - remove packet handler
 434  *      @pt: packet type declaration
 435  *
 436  *      Remove a protocol handler that was previously added to the kernel
 437  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 438  *      from the kernel lists and can be freed or reused once this function
 439  *      returns.
 440  *
 441  *      This call sleeps to guarantee that no CPU is looking at the packet
 442  *      type after return.
 443  */
 444 void dev_remove_pack(struct packet_type *pt)
 445 {
 446         __dev_remove_pack(pt);
 447
 448         synchronize_net();
 449 }
 450
 451 /******************************************************************************
 452
 453                       Device Boot-time Settings Routines
 454
 455 *******************************************************************************/
 456
 457 /* Boot time configuration table */
 458 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 459
 460 /**
 461  *      netdev_boot_setup_add   - add new setup entry
 462  *      @name: name of the device
 463  *      @map: configured settings for the device
 464  *
 465  *      Adds new setup entry to the dev_boot_setup list.  The function
 466  *      returns 0 on error and 1 on success.  This is a generic routine to
 467  *      all netdevices.
 468  */
 469 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 470 {
 471         struct netdev_boot_setup *s;
 472         int i;
 473
 474         s = dev_boot_setup;
 475         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 476                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 477                         memset(s[i].name, 0, sizeof(s[i].name));
 478                         strlcpy(s[i].name, name, IFNAMSIZ);
 479                         memcpy(&s[i].map, map, sizeof(s[i].map));
 480                         break;
 481                 }
 482         }
 483
 484         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 485 }
 486
 487 /**
 488  *      netdev_boot_setup_check - check boot time settings
 489  *      @dev: the netdevice
 490  *
 491  *      Check boot time settings for the device.
 492  *      The found settings are set for the device to be used
 493  *      later in the device probing.
 494  *      Returns 0 if no settings found, 1 if they are.
 495  */
 496 int netdev_boot_setup_check(struct net_device *dev)
 497 {
 498         struct netdev_boot_setup *s = dev_boot_setup;
 499         int i;
 500
 501         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 502                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 503                     !strcmp(dev->name, s[i].name)) {
 504                         dev->irq        = s[i].map.irq;
 505                         dev->base_addr  = s[i].map.base_addr;
 506                         dev->mem_start  = s[i].map.mem_start;
 507                         dev->mem_end    = s[i].map.mem_end;
 508                         return 1;
 509                 }
 510         }
 511         return 0;
 512 }
 513
 514
 515 /**
 516  *      netdev_boot_base        - get address from boot time settings
 517  *      @prefix: prefix for network device
 518  *      @unit: id for network device
 519  *
 520  *      Check boot time settings for the base address of device.
 521  *      The found settings are set for the device to be used
 522  *      later in the device probing.
 523  *      Returns 0 if no settings found.
 524  */
 525 unsigned long netdev_boot_base(const char *prefix, int unit)
 526 {
 527         const struct netdev_boot_setup *s = dev_boot_setup;
 528         char name[IFNAMSIZ];
 529         int i;
 530
 531         sprintf(name, "%s%d", prefix, unit);
 532
 533         /*
 534          * If device already registered then return base of 1
 535          * to indicate not to probe for this interface
 536          */
 537         if (__dev_get_by_name(&init_net, name))
 538                 return 1;
 539
 540         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 541                 if (!strcmp(name, s[i].name))
 542                         return s[i].map.base_addr;
 543         return 0;
 544 }
 545
 546 /*
 547  * Saves at boot time configured settings for any netdevice.
 548  */
 549 int __init netdev_boot_setup(char *str)
 550 {
 551         int ints[5];
 552         struct ifmap map;
 553
 554         str = get_options(str, ARRAY_SIZE(ints), ints);
 555         if (!str || !*str)
 556                 return 0;
 557
 558         /* Save settings */
 559         memset(&map, 0, sizeof(map));
 560         if (ints[0] > 0)
 561                 map.irq = ints[1];
 562         if (ints[0] > 1)
 563                 map.base_addr = ints[2];
 564         if (ints[0] > 2)
 565                 map.mem_start = ints[3];
 566         if (ints[0] > 3)
 567                 map.mem_end = ints[4];
 568
 569         /* Add new entry to the list */
 570         return netdev_boot_setup_add(str, &map);
 571 }
 572
 573 __setup("netdev=", netdev_boot_setup);
 574
 575 /*******************************************************************************
 576
 577                             Device Interface Subroutines
 578
 579 *******************************************************************************/
 580
 581 /**
 582  *      __dev_get_by_name       - find a device by its name
 583  *      @net: the applicable net namespace
 584  *      @name: name to find
 585  *
 586  *      Find an interface by name. Must be called under RTNL semaphore
 587  *      or @dev_base_lock. If the name is found a pointer to the device
 588  *      is returned. If the name is not found then %NULL is returned. The
 589  *      reference counters are not incremented so the caller must be
 590  *      careful with locks.
 591  */
 592
 593 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 594 {
 595         struct hlist_node *p;
 596
 597         hlist_for_each(p, dev_name_hash(net, name)) {
 598                 struct net_device *dev
 599                         = hlist_entry(p, struct net_device, name_hlist);
 600                 if (!strncmp(dev->name, name, IFNAMSIZ))
 601                         return dev;
 602         }
 603         return NULL;
 604 }
 605
 606 /**
 607  *      dev_get_by_name         - find a device by its name
 608  *      @net: the applicable net namespace
 609  *      @name: name to find
 610  *
 611  *      Find an interface by name. This can be called from any
 612  *      context and does its own locking. The returned handle has
 613  *      the usage count incremented and the caller must use dev_put() to
 614  *      release it when it is no longer needed. %NULL is returned if no
 615  *      matching device is found.
 616  */
 617
 618 struct net_device *dev_get_by_name(struct net *net, const char *name)
 619 {
 620         struct net_device *dev;
 621
 622         read_lock(&dev_base_lock);
 623         dev = __dev_get_by_name(net, name);
 624         if (dev)
 625                 dev_hold(dev);
 626         read_unlock(&dev_base_lock);
 627         return dev;
 628 }
 629
 630 /**
 631  *      __dev_get_by_index - find a device by its ifindex
 632  *      @net: the applicable net namespace
 633  *      @ifindex: index of device
 634  *
 635  *      Search for an interface by index. Returns %NULL if the device
 636  *      is not found or a pointer to the device. The device has not
 637  *      had its reference counter increased so the caller must be careful
 638  *      about locking. The caller must hold either the RTNL semaphore
 639  *      or @dev_base_lock.
 640  */
 641
 642 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 643 {
 644         struct hlist_node *p;
 645
 646         hlist_for_each(p, dev_index_hash(net, ifindex)) {
 647                 struct net_device *dev
 648                         = hlist_entry(p, struct net_device, index_hlist);
 649                 if (dev->ifindex == ifindex)
 650                         return dev;
 651         }
 652         return NULL;
 653 }
 654
 655
 656 /**
 657  *      dev_get_by_index - find a device by its ifindex
 658  *      @net: the applicable net namespace
 659  *      @ifindex: index of device
 660  *
 661  *      Search for an interface by index. Returns NULL if the device
 662  *      is not found or a pointer to the device. The device returned has
 663  *      had a reference added and the pointer is safe until the user calls
 664  *      dev_put to indicate they have finished with it.
 665  */
 666
 667 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 668 {
 669         struct net_device *dev;
 670
 671         read_lock(&dev_base_lock);
 672         dev = __dev_get_by_index(net, ifindex);
 673         if (dev)
 674                 dev_hold(dev);
 675         read_unlock(&dev_base_lock);
 676         return dev;
 677 }
 678
 679 /**
 680  *      dev_getbyhwaddr - find a device by its hardware address
 681  *      @net: the applicable net namespace
 682  *      @type: media type of device
 683  *      @ha: hardware address
 684  *
 685  *      Search for an interface by MAC address. Returns NULL if the device
 686  *      is not found or a pointer to the device. The caller must hold the
 687  *      rtnl semaphore. The returned device has not had its ref count increased
 688  *      and the caller must therefore be careful about locking
 689  *
 690  *      BUGS:
 691  *      If the API was consistent this would be __dev_get_by_hwaddr
 692  */
 693
 694 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 695 {
 696         struct net_device *dev;
 697
 698         ASSERT_RTNL();
 699
 700         for_each_netdev(net, dev)
 701                 if (dev->type == type &&
 702                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 703                         return dev;
 704
 705         return NULL;
 706 }
 707
 708 EXPORT_SYMBOL(dev_getbyhwaddr);
 709
 710 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 711 {
 712         struct net_device *dev;
 713
 714         ASSERT_RTNL();
 715         for_each_netdev(net, dev)
 716                 if (dev->type == type)
 717                         return dev;
 718
 719         return NULL;
 720 }
 721
 722 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 723
 724 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 725 {
 726         struct net_device *dev;
 727
 728         rtnl_lock();
 729         dev = __dev_getfirstbyhwtype(net, type);
 730         if (dev)
 731                 dev_hold(dev);
 732         rtnl_unlock();
 733         return dev;
 734 }
 735
 736 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 737
 738 /**
 739  *      dev_get_by_flags - find any device with given flags
 740  *      @net: the applicable net namespace
 741  *      @if_flags: IFF_* values
 742  *      @mask: bitmask of bits in if_flags to check
 743  *
 744  *      Search for any interface with the given flags. Returns NULL if a device
 745  *      is not found or a pointer to the device. The device returned has
 746  *      had a reference added and the pointer is safe until the user calls
 747  *      dev_put to indicate they have finished with it.
 748  */
 749
 750 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 751 {
 752         struct net_device *dev, *ret;
 753
 754         ret = NULL;
 755         read_lock(&dev_base_lock);
 756         for_each_netdev(net, dev) {
 757                 if (((dev->flags ^ if_flags) & mask) == 0) {
 758                         dev_hold(dev);
 759                         ret = dev;
 760                         break;
 761                 }
 762         }
 763         read_unlock(&dev_base_lock);
 764         return ret;
 765 }
 766
 767 /**
 768  *      dev_valid_name - check if name is okay for network device
 769  *      @name: name string
 770  *
 771  *      Network device names need to be valid file names to
 772  *      to allow sysfs to work.  We also disallow any kind of
 773  *      whitespace.
 774  */
 775 int dev_valid_name(const char *name)
 776 {
 777         if (*name == '\0')
 778                 return 0;
 779         if (strlen(name) >= IFNAMSIZ)
 780                 return 0;
 781         if (!strcmp(name, ".") || !strcmp(name, ".."))
 782                 return 0;
 783
 784         while (*name) {
 785                 if (*name == '/' || isspace(*name))
 786                         return 0;
 787                 name++;
 788         }
 789         return 1;
 790 }
 791
 792 /**
 793  *      __dev_alloc_name - allocate a name for a device
 794  *      @net: network namespace to allocate the device name in
 795  *      @name: name format string
 796  *      @buf:  scratch buffer and result name string
 797  *
 798  *      Passed a format string - eg "lt%d" it will try and find a suitable
 799  *      id. It scans list of devices to build up a free map, then chooses
 800  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 801  *      while allocating the name and adding the device in order to avoid
 802  *      duplicates.
 803  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 804  *      Returns the number of the unit assigned or a negative errno code.
 805  */
 806
 807 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 808 {
 809         int i = 0;
 810         const char *p;
 811         const int max_netdevices = 8*PAGE_SIZE;
 812         unsigned long *inuse;
 813         struct net_device *d;
 814
 815         p = strnchr(name, IFNAMSIZ-1, '%');
 816         if (p) {
 817                 /*
 818                  * Verify the string as this thing may have come from
 819                  * the user.  There must be either one "%d" and no other "%"
 820                  * characters.
 821                  */
 822                 if (p[1] != 'd' || strchr(p + 2, '%'))
 823                         return -EINVAL;
 824
 825                 /* Use one page as a bit array of possible slots */
 826                 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 827                 if (!inuse)
 828                         return -ENOMEM;
 829
 830                 for_each_netdev(net, d) {
 831                         if (!sscanf(d->name, name, &i))
 832                                 continue;
 833                         if (i < 0 || i >= max_netdevices)
 834                                 continue;
 835
 836                         /*  avoid cases where sscanf is not exact inverse of printf */
 837                         snprintf(buf, IFNAMSIZ, name, i);
 838                         if (!strncmp(buf, d->name, IFNAMSIZ))
 839                                 set_bit(i, inuse);
 840                 }
 841
 842                 i = find_first_zero_bit(inuse, max_netdevices);
 843                 free_page((unsigned long) inuse);
 844         }
 845
 846         snprintf(buf, IFNAMSIZ, name, i);
 847         if (!__dev_get_by_name(net, buf))
 848                 return i;
 849
 850         /* It is possible to run out of possible slots
 851          * when the name is long and there isn't enough space left
 852          * for the digits, or if all bits are used.
 853          */
 854         return -ENFILE;
 855 }
 856
 857 /**
 858  *      dev_alloc_name - allocate a name for a device
 859  *      @dev: device
 860  *      @name: name format string
 861  *
 862  *      Passed a format string - eg "lt%d" it will try and find a suitable
 863  *      id. It scans list of devices to build up a free map, then chooses
 864  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 865  *      while allocating the name and adding the device in order to avoid
 866  *      duplicates.
 867  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 868  *      Returns the number of the unit assigned or a negative errno code.
 869  */
 870
 871 int dev_alloc_name(struct net_device *dev, const char *name)
 872 {
 873         char buf[IFNAMSIZ];
 874         struct net *net;
 875         int ret;
 876
 877         BUG_ON(!dev_net(dev));
 878         net = dev_net(dev);
 879         ret = __dev_alloc_name(net, name, buf);
 880         if (ret >= 0)
 881                 strlcpy(dev->name, buf, IFNAMSIZ);
 882         return ret;
 883 }
 884
 885
 886 /**
 887  *      dev_change_name - change name of a device
 888  *      @dev: device
 889  *      @newname: name (or format string) must be at least IFNAMSIZ
 890  *
 891  *      Change name of a device, can pass format strings "eth%d".
 892  *      for wildcarding.
 893  */
 894 int dev_change_name(struct net_device *dev, char *newname)
 895 {
 896         char oldname[IFNAMSIZ];
 897         int err = 0;
 898         int ret;
 899         struct net *net;
 900
 901         ASSERT_RTNL();
 902         BUG_ON(!dev_net(dev));
 903
 904         net = dev_net(dev);
 905         if (dev->flags & IFF_UP)
 906                 return -EBUSY;
 907
 908         if (!dev_valid_name(newname))
 909                 return -EINVAL;
 910
 911         if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 912                 return 0;
 913
 914         memcpy(oldname, dev->name, IFNAMSIZ);
 915
 916         if (strchr(newname, '%')) {
 917                 err = dev_alloc_name(dev, newname);
 918                 if (err < 0)
 919                         return err;
 920                 strcpy(newname, dev->name);
 921         }
 922         else if (__dev_get_by_name(net, newname))
 923                 return -EEXIST;
 924         else
 925                 strlcpy(dev->name, newname, IFNAMSIZ);
 926
 927 rollback:
 928         err = device_rename(&dev->dev, dev->name);
 929         if (err) {
 930                 memcpy(dev->name, oldname, IFNAMSIZ);
 931                 return err;
 932         }
 933
 934         write_lock_bh(&dev_base_lock);
 935         hlist_del(&dev->name_hlist);
 936         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 937         write_unlock_bh(&dev_base_lock);
 938
 939         ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 940         ret = notifier_to_errno(ret);
 941
 942         if (ret) {
 943                 if (err) {
 944                         printk(KERN_ERR
 945                                "%s: name change rollback failed: %d.\n",
 946                                dev->name, ret);
 947                 } else {
 948                         err = ret;
 949                         memcpy(dev->name, oldname, IFNAMSIZ);
 950                         goto rollback;
 951                 }
 952         }
 953
 954         return err;
 955 }
 956
 957 /**
 958  *      netdev_features_change - device changes features
 959  *      @dev: device to cause notification
 960  *
 961  *      Called to indicate a device has changed features.
 962  */
 963 void netdev_features_change(struct net_device *dev)
 964 {
 965         call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 966 }
 967 EXPORT_SYMBOL(netdev_features_change);
 968
 969 /**
 970  *      netdev_state_change - device changes state
 971  *      @dev: device to cause notification
 972  *
 973  *      Called to indicate a device has changed state. This function calls
 974  *      the notifier chains for netdev_chain and sends a NEWLINK message
 975  *      to the routing socket.
 976  */
 977 void netdev_state_change(struct net_device *dev)
 978 {
 979         if (dev->flags & IFF_UP) {
 980                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
 981                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 982         }
 983 }
 984
 985 void netdev_bonding_change(struct net_device *dev)
 986 {
 987         call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
 988 }
 989 EXPORT_SYMBOL(netdev_bonding_change);
 990
 991 /**
 992  *      dev_load        - load a network module
 993  *      @net: the applicable net namespace
 994  *      @name: name of interface
 995  *
 996  *      If a network interface is not present and the process has suitable
 997  *      privileges this function loads the module. If module loading is not
 998  *      available in this kernel then it becomes a nop.
 999  */
1000
1001 void dev_load(struct net *net, const char *name)
1002 {
1003         struct net_device *dev;
1004
1005         read_lock(&dev_base_lock);
1006         dev = __dev_get_by_name(net, name);
1007         read_unlock(&dev_base_lock);
1008
1009         if (!dev && capable(CAP_SYS_MODULE))
1010                 request_module("%s", name);
1011 }
1012
1013 /**
1014  *      dev_open        - prepare an interface for use.
1015  *      @dev:   device to open
1016  *
1017  *      Takes a device from down to up state. The device's private open
1018  *      function is invoked and then the multicast lists are loaded. Finally
1019  *      the device is moved into the up state and a %NETDEV_UP message is
1020  *      sent to the netdev notifier chain.
1021  *
1022  *      Calling this function on an active interface is a nop. On a failure
1023  *      a negative errno code is returned.
1024  */
1025 int dev_open(struct net_device *dev)
1026 {
1027         int ret = 0;
1028
1029         ASSERT_RTNL();
1030
1031         /*
1032          *      Is it already up?
1033          */
1034
1035         if (dev->flags & IFF_UP)
1036                 return 0;
1037
1038         /*
1039          *      Is it even present?
1040          */
1041         if (!netif_device_present(dev))
1042                 return -ENODEV;
1043
1044         /*
1045          *      Call device private open method
1046          */
1047         set_bit(__LINK_STATE_START, &dev->state);
1048
1049         if (dev->validate_addr)
1050                 ret = dev->validate_addr(dev);
1051
1052         if (!ret && dev->open)
1053                 ret = dev->open(dev);
1054
1055         /*
1056          *      If it went open OK then:
1057          */
1058
1059         if (ret)
1060                 clear_bit(__LINK_STATE_START, &dev->state);
1061         else {
1062                 /*
1063                  *      Set the flags.
1064                  */
1065                 dev->flags |= IFF_UP;
1066
1067                 /*
1068                  *      Initialize multicasting status
1069                  */
1070                 dev_set_rx_mode(dev);
1071
1072                 /*
1073                  *      Wakeup transmit queue engine
1074                  */
1075                 dev_activate(dev);
1076
1077                 /*
1078                  *      ... and announce new interface.
1079                  */
1080                 call_netdevice_notifiers(NETDEV_UP, dev);
1081         }
1082
1083         return ret;
1084 }
1085
1086 /**
1087  *      dev_close - shutdown an interface.
1088  *      @dev: device to shutdown
1089  *
1090  *      This function moves an active device into down state. A
1091  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1092  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1093  *      chain.
1094  */
1095 int dev_close(struct net_device *dev)
1096 {
1097         ASSERT_RTNL();
1098
1099         might_sleep();
1100
1101         if (!(dev->flags & IFF_UP))
1102                 return 0;
1103
1104         /*
1105          *      Tell people we are going down, so that they can
1106          *      prepare to death, when device is still operating.
1107          */
1108         call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1109
1110         clear_bit(__LINK_STATE_START, &dev->state);
1111
1112         /* Synchronize to scheduled poll. We cannot touch poll list,
1113          * it can be even on different cpu. So just clear netif_running().
1114          *
1115          * dev->stop() will invoke napi_disable() on all of it's
1116          * napi_struct instances on this device.
1117          */
1118         smp_mb__after_clear_bit(); /* Commit netif_running(). */
1119
1120         dev_deactivate(dev);
1121
1122         /*
1123          *      Call the device specific close. This cannot fail.
1124          *      Only if device is UP
1125          *
1126          *      We allow it to be called even after a DETACH hot-plug
1127          *      event.
1128          */
1129         if (dev->stop)
1130                 dev->stop(dev);
1131
1132         /*
1133          *      Device is now down.
1134          */
1135
1136         dev->flags &= ~IFF_UP;
1137
1138         /*
1139          * Tell people we are down
1140          */
1141         call_netdevice_notifiers(NETDEV_DOWN, dev);
1142
1143         return 0;
1144 }
1145
1146
1147 /**
1148  *      dev_disable_lro - disable Large Receive Offload on a device
1149  *      @dev: device
1150  *
1151  *      Disable Large Receive Offload (LRO) on a net device.  Must be
1152  *      called under RTNL.  This is needed if received packets may be
1153  *      forwarded to another interface.
1154  */
1155 void dev_disable_lro(struct net_device *dev)
1156 {
1157         if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1158             dev->ethtool_ops->set_flags) {
1159                 u32 flags = dev->ethtool_ops->get_flags(dev);
1160                 if (flags & ETH_FLAG_LRO) {
1161                         flags &= ~ETH_FLAG_LRO;
1162                         dev->ethtool_ops->set_flags(dev, flags);
1163                 }
1164         }
1165         WARN_ON(dev->features & NETIF_F_LRO);
1166 }
1167 EXPORT_SYMBOL(dev_disable_lro);
1168
1169
1170 static int dev_boot_phase = 1;
1171
1172 /*
1173  *      Device change register/unregister. These are not inline or static
1174  *      as we export them to the world.
1175  */
1176
1177 /**
1178  *      register_netdevice_notifier - register a network notifier block
1179  *      @nb: notifier
1180  *
1181  *      Register a notifier to be called when network device events occur.
1182  *      The notifier passed is linked into the kernel structures and must
1183  *      not be reused until it has been unregistered. A negative errno code
1184  *      is returned on a failure.
1185  *
1186  *      When registered all registration and up events are replayed
1187  *      to the new notifier to allow device to have a race free
1188  *      view of the network device list.
1189  */
1190
1191 int register_netdevice_notifier(struct notifier_block *nb)
1192 {
1193         struct net_device *dev;
1194         struct net_device *last;
1195         struct net *net;
1196         int err;
1197
1198         rtnl_lock();
1199         err = raw_notifier_chain_register(&netdev_chain, nb);
1200         if (err)
1201                 goto unlock;
1202         if (dev_boot_phase)
1203                 goto unlock;
1204         for_each_net(net) {
1205                 for_each_netdev(net, dev) {
1206                         err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1207                         err = notifier_to_errno(err);
1208                         if (err)
1209                                 goto rollback;
1210
1211                         if (!(dev->flags & IFF_UP))
1212                                 continue;
1213
1214                         nb->notifier_call(nb, NETDEV_UP, dev);
1215                 }
1216         }
1217
1218 unlock:
1219         rtnl_unlock();
1220         return err;
1221
1222 rollback:
1223         last = dev;
1224         for_each_net(net) {
1225                 for_each_netdev(net, dev) {
1226                         if (dev == last)
1227                                 break;
1228
1229                         if (dev->flags & IFF_UP) {
1230                                 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1231                                 nb->notifier_call(nb, NETDEV_DOWN, dev);
1232                         }
1233                         nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1234                 }
1235         }
1236
1237         raw_notifier_chain_unregister(&netdev_chain, nb);
1238         goto unlock;
1239 }
1240
1241 /**
1242  *      unregister_netdevice_notifier - unregister a network notifier block
1243  *      @nb: notifier
1244  *
1245  *      Unregister a notifier previously registered by
1246  *      register_netdevice_notifier(). The notifier is unlinked into the
1247  *      kernel structures and may then be reused. A negative errno code
1248  *      is returned on a failure.
1249  */
1250
1251 int unregister_netdevice_notifier(struct notifier_block *nb)
1252 {
1253         int err;
1254
1255         rtnl_lock();
1256         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1257         rtnl_unlock();
1258         return err;
1259 }
1260
1261 /**
1262  *      call_netdevice_notifiers - call all network notifier blocks
1263  *      @val: value passed unmodified to notifier function
1264  *      @dev: net_device pointer passed unmodified to notifier function
1265  *
1266  *      Call all network notifier blocks.  Parameters and return value
1267  *      are as for raw_notifier_call_chain().
1268  */
1269
1270 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1271 {
1272         return raw_notifier_call_chain(&netdev_chain, val, dev);
1273 }
1274
1275 /* When > 0 there are consumers of rx skb time stamps */
1276 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1277
1278 void net_enable_timestamp(void)
1279 {
1280         atomic_inc(&netstamp_needed);
1281 }
1282
1283 void net_disable_timestamp(void)
1284 {
1285         atomic_dec(&netstamp_needed);
1286 }
1287
1288 static inline void net_timestamp(struct sk_buff *skb)
1289 {
1290         if (atomic_read(&netstamp_needed))
1291                 __net_timestamp(skb);
1292         else
1293                 skb->tstamp.tv64 = 0;
1294 }
1295
1296 /*
1297  *      Support routine. Sends outgoing frames to any network
1298  *      taps currently in use.
1299  */
1300
1301 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1302 {
1303         struct packet_type *ptype;
1304
1305         net_timestamp(skb);
1306
1307         rcu_read_lock();
1308         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1309                 /* Never send packets back to the socket
1310                  * they originated from - MvS (miquels@drinkel.ow.org)
1311                  */
1312                 if ((ptype->dev == dev || !ptype->dev) &&
1313                     (ptype->af_packet_priv == NULL ||
1314                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1315                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1316                         if (!skb2)
1317                                 break;
1318
1319                         /* skb->nh should be correctly
1320                            set by sender, so that the second statement is
1321                            just protection against buggy protocols.
1322                          */
1323                         skb_reset_mac_header(skb2);
1324
1325                         if (skb_network_header(skb2) < skb2->data ||
1326                             skb2->network_header > skb2->tail) {
1327                                 if (net_ratelimit())
1328                                         printk(KERN_CRIT "protocol %04x is "
1329                                                "buggy, dev %s\n",
1330                                                skb2->protocol, dev->name);
1331                                 skb_reset_network_header(skb2);
1332                         }
1333
1334                         skb2->transport_header = skb2->network_header;
1335                         skb2->pkt_type = PACKET_OUTGOING;
1336                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1337                 }
1338         }
1339         rcu_read_unlock();
1340 }
1341
1342
1343 static inline void __netif_reschedule(struct Qdisc *q)
1344 {
1345         struct softnet_data *sd;
1346         unsigned long flags;
1347
1348         local_irq_save(flags);
1349         sd = &__get_cpu_var(softnet_data);
1350         q->next_sched = sd->output_queue;
1351         sd->output_queue = q;
1352         raise_softirq_irqoff(NET_TX_SOFTIRQ);
1353         local_irq_restore(flags);
1354 }
1355
1356 void __netif_schedule(struct Qdisc *q)
1357 {
1358         if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1359                 __netif_reschedule(q);
1360 }
1361 EXPORT_SYMBOL(__netif_schedule);
1362
1363 void dev_kfree_skb_irq(struct sk_buff *skb)
1364 {
1365         if (atomic_dec_and_test(&skb->users)) {
1366                 struct softnet_data *sd;
1367                 unsigned long flags;
1368
1369                 local_irq_save(flags);
1370                 sd = &__get_cpu_var(softnet_data);
1371                 skb->next = sd->completion_queue;
1372                 sd->completion_queue = skb;
1373                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1374                 local_irq_restore(flags);
1375         }
1376 }
1377 EXPORT_SYMBOL(dev_kfree_skb_irq);
1378
1379 void dev_kfree_skb_any(struct sk_buff *skb)
1380 {
1381         if (in_irq() || irqs_disabled())
1382                 dev_kfree_skb_irq(skb);
1383         else
1384                 dev_kfree_skb(skb);
1385 }
1386 EXPORT_SYMBOL(dev_kfree_skb_any);
1387
1388
1389 /**
1390  * netif_device_detach - mark device as removed
1391  * @dev: network device
1392  *
1393  * Mark device as removed from system and therefore no longer available.
1394  */
1395 void netif_device_detach(struct net_device *dev)
1396 {
1397         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1398             netif_running(dev)) {
1399                 netif_stop_queue(dev);
1400         }
1401 }
1402 EXPORT_SYMBOL(netif_device_detach);
1403
1404 /**
1405  * netif_device_attach - mark device as attached
1406  * @dev: network device
1407  *
1408  * Mark device as attached from system and restart if needed.
1409  */
1410 void netif_device_attach(struct net_device *dev)
1411 {
1412         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1413             netif_running(dev)) {
1414                 netif_wake_queue(dev);
1415                 __netdev_watchdog_up(dev);
1416         }
1417 }
1418 EXPORT_SYMBOL(netif_device_attach);
1419
1420 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1421 {
1422         return ((features & NETIF_F_GEN_CSUM) ||
1423                 ((features & NETIF_F_IP_CSUM) &&
1424                  protocol == htons(ETH_P_IP)) ||
1425                 ((features & NETIF_F_IPV6_CSUM) &&
1426                  protocol == htons(ETH_P_IPV6)));
1427 }
1428
1429 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1430 {
1431         if (can_checksum_protocol(dev->features, skb->protocol))
1432                 return true;
1433
1434         if (skb->protocol == htons(ETH_P_8021Q)) {
1435                 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1436                 if (can_checksum_protocol(dev->features & dev->vlan_features,
1437                                           veh->h_vlan_encapsulated_proto))
1438                         return true;
1439         }
1440
1441         return false;
1442 }
1443
1444 /*
1445  * Invalidate hardware checksum when packet is to be mangled, and
1446  * complete checksum manually on outgoing path.
1447  */
1448 int skb_checksum_help(struct sk_buff *skb)
1449 {
1450         __wsum csum;
1451         int ret = 0, offset;
1452
1453         if (skb->ip_summed == CHECKSUM_COMPLETE)
1454                 goto out_set_summed;
1455
1456         if (unlikely(skb_shinfo(skb)->gso_size)) {
1457                 /* Let GSO fix up the checksum. */
1458                 goto out_set_summed;
1459         }
1460
1461         offset = skb->csum_start - skb_headroom(skb);
1462         BUG_ON(offset >= skb_headlen(skb));
1463         csum = skb_checksum(skb, offset, skb->len - offset, 0);
1464
1465         offset += skb->csum_offset;
1466         BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1467
1468         if (skb_cloned(skb) &&
1469             !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1470                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1471                 if (ret)
1472                         goto out;
1473         }
1474
1475         *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1476 out_set_summed:
1477         skb->ip_summed = CHECKSUM_NONE;
1478 out:
1479         return ret;
1480 }
1481
1482 /**
1483  *      skb_gso_segment - Perform segmentation on skb.
1484  *      @skb: buffer to segment
1485  *      @features: features for the output path (see dev->features)
1486  *
1487  *      This function segments the given skb and returns a list of segments.
1488  *
1489  *      It may return NULL if the skb requires no segmentation.  This is
1490  *      only possible when GSO is used for verifying header integrity.
1491  */
1492 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1493 {
1494         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1495         struct packet_type *ptype;
1496         __be16 type = skb->protocol;
1497         int err;
1498
1499         BUG_ON(skb_shinfo(skb)->frag_list);
1500
1501         skb_reset_mac_header(skb);
1502         skb->mac_len = skb->network_header - skb->mac_header;
1503         __skb_pull(skb, skb->mac_len);
1504
1505         if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1506                 if (skb_header_cloned(skb) &&
1507                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1508                         return ERR_PTR(err);
1509         }
1510
1511         rcu_read_lock();
1512         list_for_each_entry_rcu(ptype,
1513                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1514                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1515                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1516                                 err = ptype->gso_send_check(skb);
1517                                 segs = ERR_PTR(err);
1518                                 if (err || skb_gso_ok(skb, features))
1519                                         break;
1520                                 __skb_push(skb, (skb->data -
1521                                                  skb_network_header(skb)));
1522                         }
1523                         segs = ptype->gso_segment(skb, features);
1524                         break;
1525                 }
1526         }
1527         rcu_read_unlock();
1528
1529         __skb_push(skb, skb->data - skb_mac_header(skb));
1530
1531         return segs;
1532 }
1533
1534 EXPORT_SYMBOL(skb_gso_segment);
1535
1536 /* Take action when hardware reception checksum errors are detected. */
1537 #ifdef CONFIG_BUG
1538 void netdev_rx_csum_fault(struct net_device *dev)
1539 {
1540         if (net_ratelimit()) {
1541                 printk(KERN_ERR "%s: hw csum failure.\n",
1542                         dev ? dev->name : "<unknown>");
1543                 dump_stack();
1544         }
1545 }
1546 EXPORT_SYMBOL(netdev_rx_csum_fault);
1547 #endif
1548
1549 /* Actually, we should eliminate this check as soon as we know, that:
1550  * 1. IOMMU is present and allows to map all the memory.
1551  * 2. No high memory really exists on this machine.
1552  */
1553
1554 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1555 {
1556 #ifdef CONFIG_HIGHMEM
1557         int i;
1558
1559         if (dev->features & NETIF_F_HIGHDMA)
1560                 return 0;
1561
1562         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1563                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1564                         return 1;
1565
1566 #endif
1567         return 0;
1568 }
1569
1570 struct dev_gso_cb {
1571         void (*destructor)(struct sk_buff *skb);
1572 };
1573
1574 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1575
1576 static void dev_gso_skb_destructor(struct sk_buff *skb)
1577 {
1578         struct dev_gso_cb *cb;
1579
1580         do {
1581                 struct sk_buff *nskb = skb->next;
1582
1583                 skb->next = nskb->next;
1584                 nskb->next = NULL;
1585                 kfree_skb(nskb);
1586         } while (skb->next);
1587
1588         cb = DEV_GSO_CB(skb);
1589         if (cb->destructor)
1590                 cb->destructor(skb);
1591 }
1592
1593 /**
1594  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1595  *      @skb: buffer to segment
1596  *
1597  *      This function segments the given skb and stores the list of segments
1598  *      in skb->next.
1599  */
1600 static int dev_gso_segment(struct sk_buff *skb)
1601 {
1602         struct net_device *dev = skb->dev;
1603         struct sk_buff *segs;
1604         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1605                                          NETIF_F_SG : 0);
1606
1607         segs = skb_gso_segment(skb, features);
1608
1609         /* Verifying header integrity only. */
1610         if (!segs)
1611                 return 0;
1612
1613         if (IS_ERR(segs))
1614                 return PTR_ERR(segs);
1615
1616         skb->next = segs;
1617         DEV_GSO_CB(skb)->destructor = skb->destructor;
1618         skb->destructor = dev_gso_skb_destructor;
1619
1620         return 0;
1621 }
1622
1623 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1624                         struct netdev_queue *txq)
1625 {
1626         if (likely(!skb->next)) {
1627                 if (!list_empty(&ptype_all))
1628                         dev_queue_xmit_nit(skb, dev);
1629
1630                 if (netif_needs_gso(dev, skb)) {
1631                         if (unlikely(dev_gso_segment(skb)))
1632                                 goto out_kfree_skb;
1633                         if (skb->next)
1634                                 goto gso;
1635                 }
1636
1637                 return dev->hard_start_xmit(skb, dev);
1638         }
1639
1640 gso:
1641         do {
1642                 struct sk_buff *nskb = skb->next;
1643                 int rc;
1644
1645                 skb->next = nskb->next;
1646                 nskb->next = NULL;
1647                 rc = dev->hard_start_xmit(nskb, dev);
1648                 if (unlikely(rc)) {
1649                         nskb->next = skb->next;
1650                         skb->next = nskb;
1651                         return rc;
1652                 }
1653                 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1654                         return NETDEV_TX_BUSY;
1655         } while (skb->next);
1656
1657         skb->destructor = DEV_GSO_CB(skb)->destructor;
1658
1659 out_kfree_skb:
1660         kfree_skb(skb);
1661         return 0;
1662 }
1663
1664 static u32 simple_tx_hashrnd;
1665 static int simple_tx_hashrnd_initialized = 0;
1666
1667 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1668 {
1669         u32 addr1, addr2, ports;
1670         u32 hash, ihl;
1671         u8 ip_proto = 0;
1672
1673         if (unlikely(!simple_tx_hashrnd_initialized)) {
1674                 get_random_bytes(&simple_tx_hashrnd, 4);
1675                 simple_tx_hashrnd_initialized = 1;
1676         }
1677
1678         switch (skb->protocol) {
1679         case __constant_htons(ETH_P_IP):
1680                 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1681                         ip_proto = ip_hdr(skb)->protocol;
1682                 addr1 = ip_hdr(skb)->saddr;
1683                 addr2 = ip_hdr(skb)->daddr;
1684                 ihl = ip_hdr(skb)->ihl;
1685                 break;
1686         case __constant_htons(ETH_P_IPV6):
1687                 ip_proto = ipv6_hdr(skb)->nexthdr;
1688                 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1689                 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1690                 ihl = (40 >> 2);
1691                 break;
1692         default:
1693                 return 0;
1694         }
1695
1696
1697         switch (ip_proto) {
1698         case IPPROTO_TCP:
1699         case IPPROTO_UDP:
1700         case IPPROTO_DCCP:
1701         case IPPROTO_ESP:
1702         case IPPROTO_AH:
1703         case IPPROTO_SCTP:
1704         case IPPROTO_UDPLITE:
1705                 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1706                 break;
1707
1708         default:
1709                 ports = 0;
1710                 break;
1711         }
1712
1713         hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1714
1715         return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1716 }
1717
1718 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1719                                         struct sk_buff *skb)
1720 {
1721         u16 queue_index = 0;
1722
1723         if (dev->select_queue)
1724                 queue_index = dev->select_queue(dev, skb);
1725         else if (dev->real_num_tx_queues > 1)
1726                 queue_index = simple_tx_hash(dev, skb);
1727
1728         skb_set_queue_mapping(skb, queue_index);
1729         return netdev_get_tx_queue(dev, queue_index);
1730 }
1731
1732 /**
1733  *      dev_queue_xmit - transmit a buffer
1734  *      @skb: buffer to transmit
1735  *
1736  *      Queue a buffer for transmission to a network device. The caller must
1737  *      have set the device and priority and built the buffer before calling
1738  *      this function. The function can be called from an interrupt.
1739  *
1740  *      A negative errno code is returned on a failure. A success does not
1741  *      guarantee the frame will be transmitted as it may be dropped due
1742  *      to congestion or traffic shaping.
1743  *
1744  * -----------------------------------------------------------------------------------
1745  *      I notice this method can also return errors from the queue disciplines,
1746  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1747  *      be positive.
1748  *
1749  *      Regardless of the return value, the skb is consumed, so it is currently
1750  *      difficult to retry a send to this method.  (You can bump the ref count
1751  *      before sending to hold a reference for retry if you are careful.)
1752  *
1753  *      When calling this method, interrupts MUST be enabled.  This is because
1754  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1755  *          --BLG
1756  */
1757 int dev_queue_xmit(struct sk_buff *skb)
1758 {
1759         struct net_device *dev = skb->dev;
1760         struct netdev_queue *txq;
1761         struct Qdisc *q;
1762         int rc = -ENOMEM;
1763
1764         /* GSO will handle the following emulations directly. */
1765         if (netif_needs_gso(dev, skb))
1766                 goto gso;
1767
1768         if (skb_shinfo(skb)->frag_list &&
1769             !(dev->features & NETIF_F_FRAGLIST) &&
1770             __skb_linearize(skb))
1771                 goto out_kfree_skb;
1772
1773         /* Fragmented skb is linearized if device does not support SG,
1774          * or if at least one of fragments is in highmem and device
1775          * does not support DMA from it.
1776          */
1777         if (skb_shinfo(skb)->nr_frags &&
1778             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1779             __skb_linearize(skb))
1780                 goto out_kfree_skb;
1781
1782         /* If packet is not checksummed and device does not support
1783          * checksumming for this protocol, complete checksumming here.
1784          */
1785         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1786                 skb_set_transport_header(skb, skb->csum_start -
1787                                               skb_headroom(skb));
1788                 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1789                         goto out_kfree_skb;
1790         }
1791
1792 gso:
1793         /* Disable soft irqs for various locks below. Also
1794          * stops preemption for RCU.
1795          */
1796         rcu_read_lock_bh();
1797
1798         txq = dev_pick_tx(dev, skb);
1799         q = rcu_dereference(txq->qdisc);
1800
1801 #ifdef CONFIG_NET_CLS_ACT
1802         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1803 #endif
1804         if (q->enqueue) {
1805                 spinlock_t *root_lock = qdisc_lock(q);
1806
1807                 spin_lock(root_lock);
1808
1809                 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1810                         kfree_skb(skb);
1811                         rc = NET_XMIT_DROP;
1812                 } else {
1813                         rc = qdisc_enqueue_root(skb, q);
1814                         qdisc_run(q);
1815                 }
1816                 spin_unlock(root_lock);
1817
1818                 goto out;
1819         }
1820
1821         /* The device has no queue. Common case for software devices:
1822            loopback, all the sorts of tunnels...
1823
1824            Really, it is unlikely that netif_tx_lock protection is necessary
1825            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1826            counters.)
1827            However, it is possible, that they rely on protection
1828            made by us here.
1829
1830            Check this and shot the lock. It is not prone from deadlocks.
1831            Either shot noqueue qdisc, it is even simpler 8)
1832          */
1833         if (dev->flags & IFF_UP) {
1834                 int cpu = smp_processor_id(); /* ok because BHs are off */
1835
1836                 if (txq->xmit_lock_owner != cpu) {
1837
1838                         HARD_TX_LOCK(dev, txq, cpu);
1839
1840                         if (!netif_tx_queue_stopped(txq)) {
1841                                 rc = 0;
1842                                 if (!dev_hard_start_xmit(skb, dev, txq)) {
1843                                         HARD_TX_UNLOCK(dev, txq);
1844                                         goto out;
1845                                 }
1846                         }
1847                         HARD_TX_UNLOCK(dev, txq);
1848                         if (net_ratelimit())
1849                                 printk(KERN_CRIT "Virtual device %s asks to "
1850                                        "queue packet!\n", dev->name);
1851                 } else {
1852                         /* Recursion is detected! It is possible,
1853                          * unfortunately */
1854                         if (net_ratelimit())
1855                                 printk(KERN_CRIT "Dead loop on virtual device "
1856                                        "%s, fix it urgently!\n", dev->name);
1857                 }
1858         }
1859
1860         rc = -ENETDOWN;
1861         rcu_read_unlock_bh();
1862
1863 out_kfree_skb:
1864         kfree_skb(skb);
1865         return rc;
1866 out:
1867         rcu_read_unlock_bh();
1868         return rc;
1869 }
1870
1871
1872 /*=======================================================================
1873                         Receiver routines
1874   =======================================================================*/
1875
1876 int netdev_max_backlog __read_mostly = 1000;
1877 int netdev_budget __read_mostly = 300;
1878 int weight_p __read_mostly = 64;            /* old backlog weight */
1879
1880 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1881
1882
1883 /**
1884  *      netif_rx        -       post buffer to the network code
1885  *      @skb: buffer to post
1886  *
1887  *      This function receives a packet from a device driver and queues it for
1888  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1889  *      may be dropped during processing for congestion control or by the
1890  *      protocol layers.
1891  *
1892  *      return values:
1893  *      NET_RX_SUCCESS  (no congestion)
1894  *      NET_RX_DROP     (packet was dropped)
1895  *
1896  */
1897
1898 int netif_rx(struct sk_buff *skb)
1899 {
1900         struct softnet_data *queue;
1901         unsigned long flags;
1902
1903         /* if netpoll wants it, pretend we never saw it */
1904         if (netpoll_rx(skb))
1905                 return NET_RX_DROP;
1906
1907         if (!skb->tstamp.tv64)
1908                 net_timestamp(skb);
1909
1910         /*
1911          * The code is rearranged so that the path is the most
1912          * short when CPU is congested, but is still operating.
1913          */
1914         local_irq_save(flags);
1915         queue = &__get_cpu_var(softnet_data);
1916
1917         __get_cpu_var(netdev_rx_stat).total++;
1918         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1919                 if (queue->input_pkt_queue.qlen) {
1920 enqueue:
1921                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1922                         local_irq_restore(flags);
1923                         return NET_RX_SUCCESS;
1924                 }
1925
1926                 napi_schedule(&queue->backlog);
1927                 goto enqueue;
1928         }
1929
1930         __get_cpu_var(netdev_rx_stat).dropped++;
1931         local_irq_restore(flags);
1932
1933         kfree_skb(skb);
1934         return NET_RX_DROP;
1935 }
1936
1937 int netif_rx_ni(struct sk_buff *skb)
1938 {
1939         int err;
1940
1941         preempt_disable();
1942         err = netif_rx(skb);
1943         if (local_softirq_pending())
1944                 do_softirq();
1945         preempt_enable();
1946
1947         return err;
1948 }
1949
1950 EXPORT_SYMBOL(netif_rx_ni);
1951
1952 static void net_tx_action(struct softirq_action *h)
1953 {
1954         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1955
1956         if (sd->completion_queue) {
1957                 struct sk_buff *clist;
1958
1959                 local_irq_disable();
1960                 clist = sd->completion_queue;
1961                 sd->completion_queue = NULL;
1962                 local_irq_enable();
1963
1964                 while (clist) {
1965                         struct sk_buff *skb = clist;
1966                         clist = clist->next;
1967
1968                         WARN_ON(atomic_read(&skb->users));
1969                         __kfree_skb(skb);
1970                 }
1971         }
1972
1973         if (sd->output_queue) {
1974                 struct Qdisc *head;
1975
1976                 local_irq_disable();
1977                 head = sd->output_queue;
1978                 sd->output_queue = NULL;
1979                 local_irq_enable();
1980
1981                 while (head) {
1982                         struct Qdisc *q = head;
1983                         spinlock_t *root_lock;
1984
1985                         head = head->next_sched;
1986
1987                         root_lock = qdisc_lock(q);
1988                         if (spin_trylock(root_lock)) {
1989                                 smp_mb__before_clear_bit();
1990                                 clear_bit(__QDISC_STATE_SCHED,
1991                                           &q->state);
1992                                 qdisc_run(q);
1993                                 spin_unlock(root_lock);
1994                         } else {
1995                                 if (!test_bit(__QDISC_STATE_DEACTIVATED,
1996                                               &q->state)) {
1997                                         __netif_reschedule(q);
1998                                 } else {
1999                                         smp_mb__before_clear_bit();
2000                                         clear_bit(__QDISC_STATE_SCHED,
2001                                                   &q->state);
2002                                 }
2003                         }
2004                 }
2005         }
2006 }
2007
2008 static inline int deliver_skb(struct sk_buff *skb,
2009                               struct packet_type *pt_prev,
2010                               struct net_device *orig_dev)
2011 {
2012         atomic_inc(&skb->users);
2013         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2014 }
2015
2016 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2017 /* These hooks defined here for ATM */
2018 struct net_bridge;
2019 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2020                                                 unsigned char *addr);
2021 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2022
2023 /*
2024  * If bridge module is loaded call bridging hook.
2025  *  returns NULL if packet was consumed.
2026  */
2027 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2028                                         struct sk_buff *skb) __read_mostly;
2029 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2030                                             struct packet_type **pt_prev, int *ret,
2031                                             struct net_device *orig_dev)
2032 {
2033         struct net_bridge_port *port;
2034
2035         if (skb->pkt_type == PACKET_LOOPBACK ||
2036             (port = rcu_dereference(skb->dev->br_port)) == NULL)
2037                 return skb;
2038
2039         if (*pt_prev) {
2040                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2041                 *pt_prev = NULL;
2042         }
2043
2044         return br_handle_frame_hook(port, skb);
2045 }
2046 #else
2047 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
2048 #endif
2049
2050 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2051 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2052 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2053
2054 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2055                                              struct packet_type **pt_prev,
2056                                              int *ret,
2057                                              struct net_device *orig_dev)
2058 {
2059         if (skb->dev->macvlan_port == NULL)
2060                 return skb;
2061
2062         if (*pt_prev) {
2063                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2064                 *pt_prev = NULL;
2065         }
2066         return macvlan_handle_frame_hook(skb);
2067 }
2068 #else
2069 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
2070 #endif
2071
2072 #ifdef CONFIG_NET_CLS_ACT
2073 /* TODO: Maybe we should just force sch_ingress to be compiled in
2074  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2075  * a compare and 2 stores extra right now if we dont have it on
2076  * but have CONFIG_NET_CLS_ACT
2077  * NOTE: This doesnt stop any functionality; if you dont have
2078  * the ingress scheduler, you just cant add policies on ingress.
2079  *
2080  */
2081 static int ing_filter(struct sk_buff *skb)
2082 {
2083         struct net_device *dev = skb->dev;
2084         u32 ttl = G_TC_RTTL(skb->tc_verd);
2085         struct netdev_queue *rxq;
2086         int result = TC_ACT_OK;
2087         struct Qdisc *q;
2088
2089         if (MAX_RED_LOOP < ttl++) {
2090                 printk(KERN_WARNING
2091                        "Redir loop detected Dropping packet (%d->%d)\n",
2092                        skb->iif, dev->ifindex);
2093                 return TC_ACT_SHOT;
2094         }
2095
2096         skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2097         skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2098
2099         rxq = &dev->rx_queue;
2100
2101         q = rxq->qdisc;
2102         if (q != &noop_qdisc) {
2103                 spin_lock(qdisc_lock(q));
2104                 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2105                         result = qdisc_enqueue_root(skb, q);
2106                 spin_unlock(qdisc_lock(q));
2107         }
2108
2109         return result;
2110 }
2111
2112 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2113                                          struct packet_type **pt_prev,
2114                                          int *ret, struct net_device *orig_dev)
2115 {
2116         if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2117                 goto out;
2118
2119         if (*pt_prev) {
2120                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2121                 *pt_prev = NULL;
2122         } else {
2123                 /* Huh? Why does turning on AF_PACKET affect this? */
2124                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2125         }
2126
2127         switch (ing_filter(skb)) {
2128         case TC_ACT_SHOT:
2129         case TC_ACT_STOLEN:
2130                 kfree_skb(skb);
2131                 return NULL;
2132         }
2133
2134 out:
2135         skb->tc_verd = 0;
2136         return skb;
2137 }
2138 #endif
2139
2140 /*
2141  *      netif_nit_deliver - deliver received packets to network taps
2142  *      @skb: buffer
2143  *
2144  *      This function is used to deliver incoming packets to network
2145  *      taps. It should be used when the normal netif_receive_skb path
2146  *      is bypassed, for example because of VLAN acceleration.
2147  */
2148 void netif_nit_deliver(struct sk_buff *skb)
2149 {
2150         struct packet_type *ptype;
2151
2152         if (list_empty(&ptype_all))
2153                 return;
2154
2155         skb_reset_network_header(skb);
2156         skb_reset_transport_header(skb);
2157         skb->mac_len = skb->network_header - skb->mac_header;
2158
2159         rcu_read_lock();
2160         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2161                 if (!ptype->dev || ptype->dev == skb->dev)
2162                         deliver_skb(skb, ptype, skb->dev);
2163         }
2164         rcu_read_unlock();
2165 }
2166
2167 /**
2168  *      netif_receive_skb - process receive buffer from network
2169  *      @skb: buffer to process
2170  *
2171  *      netif_receive_skb() is the main receive data processing function.
2172  *      It always succeeds. The buffer may be dropped during processing
2173  *      for congestion control or by the protocol layers.
2174  *
2175  *      This function may only be called from softirq context and interrupts
2176  *      should be enabled.
2177  *
2178  *      Return values (usually ignored):
2179  *      NET_RX_SUCCESS: no congestion
2180  *      NET_RX_DROP: packet was dropped
2181  */
2182 int netif_receive_skb(struct sk_buff *skb)
2183 {
2184         struct packet_type *ptype, *pt_prev;
2185         struct net_device *orig_dev;
2186         struct net_device *null_or_orig;
2187         int ret = NET_RX_DROP;
2188         __be16 type;
2189
2190         /* if we've gotten here through NAPI, check netpoll */
2191         if (netpoll_receive_skb(skb))
2192                 return NET_RX_DROP;
2193
2194         if (!skb->tstamp.tv64)
2195                 net_timestamp(skb);
2196
2197         if (!skb->iif)
2198                 skb->iif = skb->dev->ifindex;
2199
2200         null_or_orig = NULL;
2201         orig_dev = skb->dev;
2202         if (orig_dev->master) {
2203                 if (skb_bond_should_drop(skb))
2204                         null_or_orig = orig_dev; /* deliver only exact match */
2205                 else
2206                         skb->dev = orig_dev->master;
2207         }
2208
2209         __get_cpu_var(netdev_rx_stat).total++;
2210
2211         skb_reset_network_header(skb);
2212         skb_reset_transport_header(skb);
2213         skb->mac_len = skb->network_header - skb->mac_header;
2214
2215         pt_prev = NULL;
2216
2217         rcu_read_lock();
2218
2219         /* Don't receive packets in an exiting network namespace */
2220         if (!net_alive(dev_net(skb->dev)))
2221                 goto out;
2222
2223 #ifdef CONFIG_NET_CLS_ACT
2224         if (skb->tc_verd & TC_NCLS) {
2225                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2226                 goto ncls;
2227         }
2228 #endif
2229
2230         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2231                 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2232                     ptype->dev == orig_dev) {
2233                         if (pt_prev)
2234                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2235                         pt_prev = ptype;
2236                 }
2237         }
2238
2239 #ifdef CONFIG_NET_CLS_ACT
2240         skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2241         if (!skb)
2242                 goto out;
2243 ncls:
2244 #endif
2245
2246         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2247         if (!skb)
2248                 goto out;
2249         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2250         if (!skb)
2251                 goto out;
2252
2253         type = skb->protocol;
2254         list_for_each_entry_rcu(ptype,
2255                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2256                 if (ptype->type == type &&
2257                     (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2258                      ptype->dev == orig_dev)) {
2259                         if (pt_prev)
2260                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2261                         pt_prev = ptype;
2262                 }
2263         }
2264
2265         if (pt_prev) {
2266                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2267         } else {
2268                 kfree_skb(skb);
2269                 /* Jamal, now you will not able to escape explaining
2270                  * me how you were going to use this. :-)
2271                  */
2272                 ret = NET_RX_DROP;
2273         }
2274
2275 out:
2276         rcu_read_unlock();
2277         return ret;
2278 }
2279
2280 /* Network device is going away, flush any packets still pending  */
2281 static void flush_backlog(void *arg)
2282 {
2283         struct net_device *dev = arg;
2284         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2285         struct sk_buff *skb, *tmp;
2286
2287         skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2288                 if (skb->dev == dev) {
2289                         __skb_unlink(skb, &queue->input_pkt_queue);
2290                         kfree_skb(skb);
2291                 }
2292 }
2293
2294 static int process_backlog(struct napi_struct *napi, int quota)
2295 {
2296         int work = 0;
2297         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2298         unsigned long start_time = jiffies;
2299
2300         napi->weight = weight_p;
2301         do {
2302                 struct sk_buff *skb;
2303
2304                 local_irq_disable();
2305                 skb = __skb_dequeue(&queue->input_pkt_queue);
2306                 if (!skb) {
2307                         __napi_complete(napi);
2308                         local_irq_enable();
2309                         break;
2310                 }
2311                 local_irq_enable();
2312
2313                 netif_receive_skb(skb);
2314         } while (++work < quota && jiffies == start_time);
2315
2316         return work;
2317 }
2318
2319 /**
2320  * __napi_schedule - schedule for receive
2321  * @n: entry to schedule
2322  *
2323  * The entry's receive function will be scheduled to run
2324  */
2325 void __napi_schedule(struct napi_struct *n)
2326 {
2327         unsigned long flags;
2328
2329         local_irq_save(flags);
2330         list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2331         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2332         local_irq_restore(flags);
2333 }
2334 EXPORT_SYMBOL(__napi_schedule);
2335
2336
2337 static void net_rx_action(struct softirq_action *h)
2338 {
2339         struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2340         unsigned long start_time = jiffies;
2341         int budget = netdev_budget;
2342         void *have;
2343
2344         local_irq_disable();
2345
2346         while (!list_empty(list)) {
2347                 struct napi_struct *n;
2348                 int work, weight;
2349
2350                 /* If softirq window is exhuasted then punt.
2351                  *
2352                  * Note that this is a slight policy change from the
2353                  * previous NAPI code, which would allow up to 2
2354                  * jiffies to pass before breaking out.  The test
2355                  * used to be "jiffies - start_time > 1".
2356                  */
2357                 if (unlikely(budget <= 0 || jiffies != start_time))
2358                         goto softnet_break;
2359
2360                 local_irq_enable();
2361
2362                 /* Even though interrupts have been re-enabled, this
2363                  * access is safe because interrupts can only add new
2364                  * entries to the tail of this list, and only ->poll()
2365                  * calls can remove this head entry from the list.
2366                  */
2367                 n = list_entry(list->next, struct napi_struct, poll_list);
2368
2369                 have = netpoll_poll_lock(n);
2370
2371                 weight = n->weight;
2372
2373                 /* This NAPI_STATE_SCHED test is for avoiding a race
2374                  * with netpoll's poll_napi().  Only the entity which
2375                  * obtains the lock and sees NAPI_STATE_SCHED set will
2376                  * actually make the ->poll() call.  Therefore we avoid
2377                  * accidently calling ->poll() when NAPI is not scheduled.
2378                  */
2379                 work = 0;
2380                 if (test_bit(NAPI_STATE_SCHED, &n->state))
2381                         work = n->poll(n, weight);
2382
2383                 WARN_ON_ONCE(work > weight);
2384
2385                 budget -= work;
2386
2387                 local_irq_disable();
2388
2389                 /* Drivers must not modify the NAPI state if they
2390                  * consume the entire weight.  In such cases this code
2391                  * still "owns" the NAPI instance and therefore can
2392                  * move the instance around on the list at-will.
2393                  */
2394                 if (unlikely(work == weight)) {
2395                         if (unlikely(napi_disable_pending(n)))
2396                                 __napi_complete(n);
2397                         else
2398                                 list_move_tail(&n->poll_list, list);
2399                 }
2400
2401                 netpoll_poll_unlock(have);
2402         }
2403 out:
2404         local_irq_enable();
2405
2406 #ifdef CONFIG_NET_DMA
2407         /*
2408          * There may not be any more sk_buffs coming right now, so push
2409          * any pending DMA copies to hardware
2410          */
2411         if (!cpus_empty(net_dma.channel_mask)) {
2412                 int chan_idx;
2413                 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
2414                         struct dma_chan *chan = net_dma.channels[chan_idx];
2415                         if (chan)
2416                                 dma_async_memcpy_issue_pending(chan);
2417                 }
2418         }
2419 #endif
2420
2421         return;
2422
2423 softnet_break:
2424         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2425         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2426         goto out;
2427 }
2428
2429 static gifconf_func_t * gifconf_list [NPROTO];
2430
2431 /**
2432  *      register_gifconf        -       register a SIOCGIF handler
2433  *      @family: Address family
2434  *      @gifconf: Function handler
2435  *
2436  *      Register protocol dependent address dumping routines. The handler
2437  *      that is passed must not be freed or reused until it has been replaced
2438  *      by another handler.
2439  */
2440 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2441 {
2442         if (family >= NPROTO)
2443                 return -EINVAL;
2444         gifconf_list[family] = gifconf;
2445         return 0;
2446 }
2447
2448
2449 /*
2450  *      Map an interface index to its name (SIOCGIFNAME)
2451  */
2452
2453 /*
2454  *      We need this ioctl for efficient implementation of the
2455  *      if_indextoname() function required by the IPv6 API.  Without
2456  *      it, we would have to search all the interfaces to find a
2457  *      match.  --pb
2458  */
2459
2460 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2461 {
2462         struct net_device *dev;
2463         struct ifreq ifr;
2464
2465         /*
2466          *      Fetch the caller's info block.
2467          */
2468
2469         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2470                 return -EFAULT;
2471
2472         read_lock(&dev_base_lock);
2473         dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2474         if (!dev) {
2475                 read_unlock(&dev_base_lock);
2476                 return -ENODEV;
2477         }
2478
2479         strcpy(ifr.ifr_name, dev->name);
2480         read_unlock(&dev_base_lock);
2481
2482         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2483                 return -EFAULT;
2484         return 0;
2485 }
2486
2487 /*
2488  *      Perform a SIOCGIFCONF call. This structure will change
2489  *      size eventually, and there is nothing I can do about it.
2490  *      Thus we will need a 'compatibility mode'.
2491  */
2492
2493 static int dev_ifconf(struct net *net, char __user *arg)
2494 {
2495         struct ifconf ifc;
2496         struct net_device *dev;
2497         char __user *pos;
2498         int len;
2499         int total;
2500         int i;
2501
2502         /*
2503          *      Fetch the caller's info block.
2504          */
2505
2506         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2507                 return -EFAULT;
2508
2509         pos = ifc.ifc_buf;
2510         len = ifc.ifc_len;
2511
2512         /*
2513          *      Loop over the interfaces, and write an info block for each.
2514          */
2515
2516         total = 0;
2517         for_each_netdev(net, dev) {
2518                 for (i = 0; i < NPROTO; i++) {
2519                         if (gifconf_list[i]) {
2520                                 int done;
2521                                 if (!pos)
2522                                         done = gifconf_list[i](dev, NULL, 0);
2523                                 else
2524                                         done = gifconf_list[i](dev, pos + total,
2525                                                                len - total);
2526                                 if (done < 0)
2527                                         return -EFAULT;
2528                                 total += done;
2529                         }
2530                 }
2531         }
2532
2533         /*
2534          *      All done.  Write the updated control block back to the caller.
2535          */
2536         ifc.ifc_len = total;
2537
2538         /*
2539          *      Both BSD and Solaris return 0 here, so we do too.
2540          */
2541         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2542 }
2543
2544 #ifdef CONFIG_PROC_FS
2545 /*
2546  *      This is invoked by the /proc filesystem handler to display a device
2547  *      in detail.
2548  */
2549 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2550         __acquires(dev_base_lock)
2551 {
2552         struct net *net = seq_file_net(seq);
2553         loff_t off;
2554         struct net_device *dev;
2555
2556         read_lock(&dev_base_lock);
2557         if (!*pos)
2558                 return SEQ_START_TOKEN;
2559
2560         off = 1;
2561         for_each_netdev(net, dev)
2562                 if (off++ == *pos)
2563                         return dev;
2564
2565         return NULL;
2566 }
2567
2568 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2569 {
2570         struct net *net = seq_file_net(seq);
2571         ++*pos;
2572         return v == SEQ_START_TOKEN ?
2573                 first_net_device(net) : next_net_device((struct net_device *)v);
2574 }
2575
2576 void dev_seq_stop(struct seq_file *seq, void *v)
2577         __releases(dev_base_lock)
2578 {
2579         read_unlock(&dev_base_lock);
2580 }
2581
2582 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2583 {
2584         struct net_device_stats *stats = dev->get_stats(dev);
2585
2586         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2587                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2588                    dev->name, stats->rx_bytes, stats->rx_packets,
2589                    stats->rx_errors,
2590                    stats->rx_dropped + stats->rx_missed_errors,
2591                    stats->rx_fifo_errors,
2592                    stats->rx_length_errors + stats->rx_over_errors +
2593                     stats->rx_crc_errors + stats->rx_frame_errors,
2594                    stats->rx_compressed, stats->multicast,
2595                    stats->tx_bytes, stats->tx_packets,
2596                    stats->tx_errors, stats->tx_dropped,
2597                    stats->tx_fifo_errors, stats->collisions,
2598                    stats->tx_carrier_errors +
2599                     stats->tx_aborted_errors +
2600                     stats->tx_window_errors +
2601                     stats->tx_heartbeat_errors,
2602                    stats->tx_compressed);
2603 }
2604
2605 /*
2606  *      Called from the PROCfs module. This now uses the new arbitrary sized
2607  *      /proc/net interface to create /proc/net/dev
2608  */
2609 static int dev_seq_show(struct seq_file *seq, void *v)
2610 {
2611         if (v == SEQ_START_TOKEN)
2612                 seq_puts(seq, "Inter-|   Receive                            "
2613                               "                    |  Transmit\n"
2614                               " face |bytes    packets errs drop fifo frame "
2615                               "compressed multicast|bytes    packets errs "
2616                               "drop fifo colls carrier compressed\n");
2617         else
2618                 dev_seq_printf_stats(seq, v);
2619         return 0;
2620 }
2621
2622 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2623 {
2624         struct netif_rx_stats *rc = NULL;
2625
2626         while (*pos < nr_cpu_ids)
2627                 if (cpu_online(*pos)) {
2628                         rc = &per_cpu(netdev_rx_stat, *pos);
2629                         break;
2630                 } else
2631                         ++*pos;
2632         return rc;
2633 }
2634
2635 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2636 {
2637         return softnet_get_online(pos);
2638 }
2639
2640 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2641 {
2642         ++*pos;
2643         return softnet_get_online(pos);
2644 }
2645
2646 static void softnet_seq_stop(struct seq_file *seq, void *v)
2647 {
2648 }
2649
2650 static int softnet_seq_show(struct seq_file *seq, void *v)
2651 {
2652         struct netif_rx_stats *s = v;
2653
2654         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2655                    s->total, s->dropped, s->time_squeeze, 0,
2656                    0, 0, 0, 0, /* was fastroute */
2657                    s->cpu_collision );
2658         return 0;
2659 }
2660
2661 static const struct seq_operations dev_seq_ops = {
2662         .start = dev_seq_start,
2663         .next  = dev_seq_next,
2664         .stop  = dev_seq_stop,
2665         .show  = dev_seq_show,
2666 };
2667
2668 static int dev_seq_open(struct inode *inode, struct file *file)
2669 {
2670         return seq_open_net(inode, file, &dev_seq_ops,
2671                             sizeof(struct seq_net_private));
2672 }
2673
2674 static const struct file_operations dev_seq_fops = {
2675         .owner   = THIS_MODULE,
2676         .open    = dev_seq_open,
2677         .read    = seq_read,
2678         .llseek  = seq_lseek,
2679         .release = seq_release_net,
2680 };
2681
2682 static const struct seq_operations softnet_seq_ops = {
2683         .start = softnet_seq_start,
2684         .next  = softnet_seq_next,
2685         .stop  = softnet_seq_stop,
2686         .show  = softnet_seq_show,
2687 };
2688
2689 static int softnet_seq_open(struct inode *inode, struct file *file)
2690 {
2691         return seq_open(file, &softnet_seq_ops);
2692 }
2693
2694 static const struct file_operations softnet_seq_fops = {
2695         .owner   = THIS_MODULE,
2696         .open    = softnet_seq_open,
2697         .read    = seq_read,
2698         .llseek  = seq_lseek,
2699         .release = seq_release,
2700 };
2701
2702 static void *ptype_get_idx(loff_t pos)
2703 {
2704         struct packet_type *pt = NULL;
2705         loff_t i = 0;
2706         int t;
2707
2708         list_for_each_entry_rcu(pt, &ptype_all, list) {
2709                 if (i == pos)
2710                         return pt;
2711                 ++i;
2712         }
2713
2714         for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2715                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2716                         if (i == pos)
2717                                 return pt;
2718                         ++i;
2719                 }
2720         }
2721         return NULL;
2722 }
2723
2724 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2725         __acquires(RCU)
2726 {
2727         rcu_read_lock();
2728         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2729 }
2730
2731 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2732 {
2733         struct packet_type *pt;
2734         struct list_head *nxt;
2735         int hash;
2736
2737         ++*pos;
2738         if (v == SEQ_START_TOKEN)
2739                 return ptype_get_idx(0);
2740
2741         pt = v;
2742         nxt = pt->list.next;
2743         if (pt->type == htons(ETH_P_ALL)) {
2744                 if (nxt != &ptype_all)
2745                         goto found;
2746                 hash = 0;
2747                 nxt = ptype_base[0].next;
2748         } else
2749                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2750
2751         while (nxt == &ptype_base[hash]) {
2752                 if (++hash >= PTYPE_HASH_SIZE)
2753                         return NULL;
2754                 nxt = ptype_base[hash].next;
2755         }
2756 found:
2757         return list_entry(nxt, struct packet_type, list);
2758 }
2759
2760 static void ptype_seq_stop(struct seq_file *seq, void *v)
2761         __releases(RCU)
2762 {
2763         rcu_read_unlock();
2764 }
2765
2766 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2767 {
2768 #ifdef CONFIG_KALLSYMS
2769         unsigned long offset = 0, symsize;
2770         const char *symname;
2771         char *modname;
2772         char namebuf[128];
2773
2774         symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2775                                   &modname, namebuf);
2776
2777         if (symname) {
2778                 char *delim = ":";
2779
2780                 if (!modname)
2781                         modname = delim = "";
2782                 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2783                            symname, offset);
2784                 return;
2785         }
2786 #endif
2787
2788         seq_printf(seq, "[%p]", sym);
2789 }
2790
2791 static int ptype_seq_show(struct seq_file *seq, void *v)
2792 {
2793         struct packet_type *pt = v;
2794
2795         if (v == SEQ_START_TOKEN)
2796                 seq_puts(seq, "Type Device      Function\n");
2797         else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2798                 if (pt->type == htons(ETH_P_ALL))
2799                         seq_puts(seq, "ALL ");
2800                 else
2801                         seq_printf(seq, "%04x", ntohs(pt->type));
2802
2803                 seq_printf(seq, " %-8s ",
2804                            pt->dev ? pt->dev->name : "");
2805                 ptype_seq_decode(seq,  pt->func);
2806                 seq_putc(seq, '\n');
2807         }
2808
2809         return 0;
2810 }
2811
2812 static const struct seq_operations ptype_seq_ops = {
2813         .start = ptype_seq_start,
2814         .next  = ptype_seq_next,
2815         .stop  = ptype_seq_stop,
2816         .show  = ptype_seq_show,
2817 };
2818
2819 static int ptype_seq_open(struct inode *inode, struct file *file)
2820 {
2821         return seq_open_net(inode, file, &ptype_seq_ops,
2822                         sizeof(struct seq_net_private));
2823 }
2824
2825 static const struct file_operations ptype_seq_fops = {
2826         .owner   = THIS_MODULE,
2827         .open    = ptype_seq_open,
2828         .read    = seq_read,
2829         .llseek  = seq_lseek,
2830         .release = seq_release_net,
2831 };
2832
2833
2834 static int __net_init dev_proc_net_init(struct net *net)
2835 {
2836         int rc = -ENOMEM;
2837
2838         if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2839                 goto out;
2840         if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2841                 goto out_dev;
2842         if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2843                 goto out_softnet;
2844
2845         if (wext_proc_init(net))
2846                 goto out_ptype;
2847         rc = 0;
2848 out:
2849         return rc;
2850 out_ptype:
2851         proc_net_remove(net, "ptype");
2852 out_softnet:
2853         proc_net_remove(net, "softnet_stat");
2854 out_dev:
2855         proc_net_remove(net, "dev");
2856         goto out;
2857 }
2858
2859 static void __net_exit dev_proc_net_exit(struct net *net)
2860 {
2861         wext_proc_exit(net);
2862
2863         proc_net_remove(net, "ptype");
2864         proc_net_remove(net, "softnet_stat");
2865         proc_net_remove(net, "dev");
2866 }
2867
2868 static struct pernet_operations __net_initdata dev_proc_ops = {
2869         .init = dev_proc_net_init,
2870         .exit = dev_proc_net_exit,
2871 };
2872
2873 static int __init dev_proc_init(void)
2874 {
2875         return register_pernet_subsys(&dev_proc_ops);
2876 }
2877 #else
2878 #define dev_proc_init() 0
2879 #endif  /* CONFIG_PROC_FS */
2880
2881
2882 /**
2883  *      netdev_set_master       -       set up master/slave pair
2884  *      @slave: slave device
2885  *      @master: new master device
2886  *
2887  *      Changes the master device of the slave. Pass %NULL to break the
2888  *      bonding. The caller must hold the RTNL semaphore. On a failure
2889  *      a negative errno code is returned. On success the reference counts
2890  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2891  *      function returns zero.
2892  */
2893 int netdev_set_master(struct net_device *slave, struct net_device *master)
2894 {
2895         struct net_device *old = slave->master;
2896
2897         ASSERT_RTNL();
2898
2899         if (master) {
2900                 if (old)
2901                         return -EBUSY;
2902                 dev_hold(master);
2903         }
2904
2905         slave->master = master;
2906
2907         synchronize_net();
2908
2909         if (old)
2910                 dev_put(old);
2911
2912         if (master)
2913                 slave->flags |= IFF_SLAVE;
2914         else
2915                 slave->flags &= ~IFF_SLAVE;
2916
2917         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2918         return 0;
2919 }
2920
2921 static void dev_change_rx_flags(struct net_device *dev, int flags)
2922 {
2923         if (dev->flags & IFF_UP && dev->change_rx_flags)
2924                 dev->change_rx_flags(dev, flags);
2925 }
2926
2927 static int __dev_set_promiscuity(struct net_device *dev, int inc)
2928 {
2929         unsigned short old_flags = dev->flags;
2930
2931         ASSERT_RTNL();
2932
2933         dev->flags |= IFF_PROMISC;
2934         dev->promiscuity += inc;
2935         if (dev->promiscuity == 0) {
2936                 /*
2937                  * Avoid overflow.
2938                  * If inc causes overflow, untouch promisc and return error.
2939                  */
2940                 if (inc < 0)
2941                         dev->flags &= ~IFF_PROMISC;
2942                 else {
2943                         dev->promiscuity -= inc;
2944                         printk(KERN_WARNING "%s: promiscuity touches roof, "
2945                                 "set promiscuity failed, promiscuity feature "
2946                                 "of device might be broken.\n", dev->name);
2947                         return -EOVERFLOW;
2948                 }
2949         }
2950         if (dev->flags != old_flags) {
2951                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2952                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2953                                                                "left");
2954                 if (audit_enabled)
2955                         audit_log(current->audit_context, GFP_ATOMIC,
2956                                 AUDIT_ANOM_PROMISCUOUS,
2957                                 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2958                                 dev->name, (dev->flags & IFF_PROMISC),
2959                                 (old_flags & IFF_PROMISC),
2960                                 audit_get_loginuid(current),
2961                                 current->uid, current->gid,
2962                                 audit_get_sessionid(current));
2963
2964                 dev_change_rx_flags(dev, IFF_PROMISC);
2965         }
2966         return 0;
2967 }
2968
2969 /**
2970  *      dev_set_promiscuity     - update promiscuity count on a device
2971  *      @dev: device
2972  *      @inc: modifier
2973  *
2974  *      Add or remove promiscuity from a device. While the count in the device
2975  *      remains above zero the interface remains promiscuous. Once it hits zero
2976  *      the device reverts back to normal filtering operation. A negative inc
2977  *      value is used to drop promiscuity on the device.
2978  *      Return 0 if successful or a negative errno code on error.
2979  */
2980 int dev_set_promiscuity(struct net_device *dev, int inc)
2981 {
2982         unsigned short old_flags = dev->flags;
2983         int err;
2984
2985         err = __dev_set_promiscuity(dev, inc);
2986         if (err < 0)
2987                 return err;
2988         if (dev->flags != old_flags)
2989                 dev_set_rx_mode(dev);
2990         return err;
2991 }
2992
2993 /**
2994  *      dev_set_allmulti        - update allmulti count on a device
2995  *      @dev: device
2996  *      @inc: modifier
2997  *
2998  *      Add or remove reception of all multicast frames to a device. While the
2999  *      count in the device remains above zero the interface remains listening
3000  *      to all interfaces. Once it hits zero the device reverts back to normal
3001  *      filtering operation. A negative @inc value is used to drop the counter
3002  *      when releasing a resource needing all multicasts.
3003  *      Return 0 if successful or a negative errno code on error.
3004  */
3005
3006 int dev_set_allmulti(struct net_device *dev, int inc)
3007 {
3008         unsigned short old_flags = dev->flags;
3009
3010         ASSERT_RTNL();
3011
3012         dev->flags |= IFF_ALLMULTI;
3013         dev->allmulti += inc;
3014         if (dev->allmulti == 0) {
3015                 /*
3016                  * Avoid overflow.
3017                  * If inc causes overflow, untouch allmulti and return error.
3018                  */
3019                 if (inc < 0)
3020                         dev->flags &= ~IFF_ALLMULTI;
3021                 else {
3022                         dev->allmulti -= inc;
3023                         printk(KERN_WARNING "%s: allmulti touches roof, "
3024                                 "set allmulti failed, allmulti feature of "
3025                                 "device might be broken.\n", dev->name);
3026                         return -EOVERFLOW;
3027                 }
3028         }
3029         if (dev->flags ^ old_flags) {
3030                 dev_change_rx_flags(dev, IFF_ALLMULTI);
3031                 dev_set_rx_mode(dev);
3032         }
3033         return 0;
3034 }
3035
3036 /*
3037  *      Upload unicast and multicast address lists to device and
3038  *      configure RX filtering. When the device doesn't support unicast
3039  *      filtering it is put in promiscuous mode while unicast addresses
3040  *      are present.
3041  */
3042 void __dev_set_rx_mode(struct net_device *dev)
3043 {
3044         /* dev_open will call this function so the list will stay sane. */
3045         if (!(dev->flags&IFF_UP))
3046                 return;
3047
3048         if (!netif_device_present(dev))
3049                 return;
3050
3051         if (dev->set_rx_mode)
3052                 dev->set_rx_mode(dev);
3053         else {
3054                 /* Unicast addresses changes may only happen under the rtnl,
3055                  * therefore calling __dev_set_promiscuity here is safe.
3056                  */
3057                 if (dev->uc_count > 0 && !dev->uc_promisc) {
3058                         __dev_set_promiscuity(dev, 1);
3059                         dev->uc_promisc = 1;
3060                 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3061                         __dev_set_promiscuity(dev, -1);
3062                         dev->uc_promisc = 0;
3063                 }
3064
3065                 if (dev->set_multicast_list)
3066                         dev->set_multicast_list(dev);
3067         }
3068 }
3069
3070 void dev_set_rx_mode(struct net_device *dev)
3071 {
3072         netif_addr_lock_bh(dev);
3073         __dev_set_rx_mode(dev);
3074         netif_addr_unlock_bh(dev);
3075 }
3076
3077 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3078                       void *addr, int alen, int glbl)
3079 {
3080         struct dev_addr_list *da;
3081
3082         for (; (da = *list) != NULL; list = &da->next) {
3083                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3084                     alen == da->da_addrlen) {
3085                         if (glbl) {
3086                                 int old_glbl = da->da_gusers;
3087                                 da->da_gusers = 0;
3088                                 if (old_glbl == 0)
3089                                         break;
3090                         }
3091                         if (--da->da_users)
3092                                 return 0;
3093
3094                         *list = da->next;
3095                         kfree(da);
3096                         (*count)--;
3097                         return 0;
3098                 }
3099         }
3100         return -ENOENT;
3101 }
3102
3103 int __dev_addr_add(struct dev_addr_list **list, int *count,
3104                    void *addr, int alen, int glbl)
3105 {
3106         struct dev_addr_list *da;
3107
3108         for (da = *list; da != NULL; da = da->next) {
3109                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3110                     da->da_addrlen == alen) {
3111                         if (glbl) {
3112                                 int old_glbl = da->da_gusers;
3113                                 da->da_gusers = 1;
3114                                 if (old_glbl)
3115                                         return 0;
3116                         }
3117                         da->da_users++;
3118                         return 0;
3119                 }
3120         }
3121
3122         da = kzalloc(sizeof(*da), GFP_ATOMIC);
3123         if (da == NULL)
3124                 return -ENOMEM;
3125         memcpy(da->da_addr, addr, alen);
3126         da->da_addrlen = alen;
3127         da->da_users = 1;
3128         da->da_gusers = glbl ? 1 : 0;
3129         da->next = *list;
3130         *list = da;
3131         (*count)++;
3132         return 0;
3133 }
3134
3135 /**
3136  *      dev_unicast_delete      - Release secondary unicast address.
3137  *      @dev: device
3138  *      @addr: address to delete
3139  *      @alen: length of @addr
3140  *
3141  *      Release reference to a secondary unicast address and remove it
3142  *      from the device if the reference count drops to zero.
3143  *
3144  *      The caller must hold the rtnl_mutex.
3145  */
3146 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3147 {
3148         int err;
3149
3150         ASSERT_RTNL();
3151
3152         netif_addr_lock_bh(dev);
3153         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3154         if (!err)
3155                 __dev_set_rx_mode(dev);
3156         netif_addr_unlock_bh(dev);
3157         return err;
3158 }
3159 EXPORT_SYMBOL(dev_unicast_delete);
3160
3161 /**
3162  *      dev_unicast_add         - add a secondary unicast address
3163  *      @dev: device
3164  *      @addr: address to add
3165  *      @alen: length of @addr
3166  *
3167  *      Add a secondary unicast address to the device or increase
3168  *      the reference count if it already exists.
3169  *
3170  *      The caller must hold the rtnl_mutex.
3171  */
3172 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3173 {
3174         int err;
3175
3176         ASSERT_RTNL();
3177
3178         netif_addr_lock_bh(dev);
3179         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3180         if (!err)
3181                 __dev_set_rx_mode(dev);
3182         netif_addr_unlock_bh(dev);
3183         return err;
3184 }
3185 EXPORT_SYMBOL(dev_unicast_add);
3186
3187 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3188                     struct dev_addr_list **from, int *from_count)
3189 {
3190         struct dev_addr_list *da, *next;
3191         int err = 0;
3192
3193         da = *from;
3194         while (da != NULL) {
3195                 next = da->next;
3196                 if (!da->da_synced) {
3197                         err = __dev_addr_add(to, to_count,
3198                                              da->da_addr, da->da_addrlen, 0);
3199                         if (err < 0)
3200                                 break;
3201                         da->da_synced = 1;
3202                         da->da_users++;
3203                 } else if (da->da_users == 1) {
3204                         __dev_addr_delete(to, to_count,
3205                                           da->da_addr, da->da_addrlen, 0);
3206                         __dev_addr_delete(from, from_count,
3207                                           da->da_addr, da->da_addrlen, 0);
3208                 }
3209                 da = next;
3210         }
3211         return err;
3212 }
3213
3214 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3215                        struct dev_addr_list **from, int *from_count)
3216 {
3217         struct dev_addr_list *da, *next;
3218
3219         da = *from;
3220         while (da != NULL) {
3221                 next = da->next;
3222                 if (da->da_synced) {
3223                         __dev_addr_delete(to, to_count,
3224                                           da->da_addr, da->da_addrlen, 0);
3225                         da->da_synced = 0;
3226                         __dev_addr_delete(from, from_count,
3227                                           da->da_addr, da->da_addrlen, 0);
3228                 }
3229                 da = next;
3230         }
3231 }
3232
3233 /**
3234  *      dev_unicast_sync - Synchronize device's unicast list to another device
3235  *      @to: destination device
3236  *      @from: source device
3237  *
3238  *      Add newly added addresses to the destination device and release
3239  *      addresses that have no users left. The source device must be
3240  *      locked by netif_tx_lock_bh.
3241  *
3242  *      This function is intended to be called from the dev->set_rx_mode
3243  *      function of layered software devices.
3244  */
3245 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3246 {
3247         int err = 0;
3248
3249         netif_addr_lock_bh(to);
3250         err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3251                               &from->uc_list, &from->uc_count);
3252         if (!err)
3253                 __dev_set_rx_mode(to);
3254         netif_addr_unlock_bh(to);
3255         return err;
3256 }
3257 EXPORT_SYMBOL(dev_unicast_sync);
3258
3259 /**
3260  *      dev_unicast_unsync - Remove synchronized addresses from the destination device
3261  *      @to: destination device
3262  *      @from: source device
3263  *
3264  *      Remove all addresses that were added to the destination device by
3265  *      dev_unicast_sync(). This function is intended to be called from the
3266  *      dev->stop function of layered software devices.
3267  */
3268 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3269 {
3270         netif_addr_lock_bh(from);
3271         netif_addr_lock(to);
3272
3273         __dev_addr_unsync(&to->uc_list, &to->uc_count,
3274                           &from->uc_list, &from->uc_count);
3275         __dev_set_rx_mode(to);
3276
3277         netif_addr_unlock(to);
3278         netif_addr_unlock_bh(from);
3279 }
3280 EXPORT_SYMBOL(dev_unicast_unsync);
3281
3282 static void __dev_addr_discard(struct dev_addr_list **list)
3283 {
3284         struct dev_addr_list *tmp;
3285
3286         while (*list != NULL) {
3287                 tmp = *list;
3288                 *list = tmp->next;
3289                 if (tmp->da_users > tmp->da_gusers)
3290                         printk("__dev_addr_discard: address leakage! "
3291                                "da_users=%d\n", tmp->da_users);
3292                 kfree(tmp);
3293         }
3294 }
3295
3296 static void dev_addr_discard(struct net_device *dev)
3297 {
3298         netif_addr_lock_bh(dev);
3299
3300         __dev_addr_discard(&dev->uc_list);
3301         dev->uc_count = 0;
3302
3303         __dev_addr_discard(&dev->mc_list);
3304         dev->mc_count = 0;
3305
3306         netif_addr_unlock_bh(dev);
3307 }
3308
3309 unsigned dev_get_flags(const struct net_device *dev)
3310 {
3311         unsigned flags;
3312
3313         flags = (dev->flags & ~(IFF_PROMISC |
3314                                 IFF_ALLMULTI |
3315                                 IFF_RUNNING |
3316                                 IFF_LOWER_UP |
3317                                 IFF_DORMANT)) |
3318                 (dev->gflags & (IFF_PROMISC |
3319                                 IFF_ALLMULTI));
3320
3321         if (netif_running(dev)) {
3322                 if (netif_oper_up(dev))
3323                         flags |= IFF_RUNNING;
3324                 if (netif_carrier_ok(dev))
3325                         flags |= IFF_LOWER_UP;
3326                 if (netif_dormant(dev))
3327                         flags |= IFF_DORMANT;
3328         }
3329
3330         return flags;
3331 }
3332
3333 int dev_change_flags(struct net_device *dev, unsigned flags)
3334 {
3335         int ret, changes;
3336         int old_flags = dev->flags;
3337
3338         ASSERT_RTNL();
3339
3340         /*
3341          *      Set the flags on our device.
3342          */
3343
3344         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3345                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3346                                IFF_AUTOMEDIA)) |
3347                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3348                                     IFF_ALLMULTI));
3349
3350         /*
3351          *      Load in the correct multicast list now the flags have changed.
3352          */
3353
3354         if ((old_flags ^ flags) & IFF_MULTICAST)
3355                 dev_change_rx_flags(dev, IFF_MULTICAST);
3356
3357         dev_set_rx_mode(dev);
3358
3359         /*
3360          *      Have we downed the interface. We handle IFF_UP ourselves
3361          *      according to user attempts to set it, rather than blindly
3362          *      setting it.
3363          */
3364
3365         ret = 0;
3366         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
3367                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3368
3369                 if (!ret)
3370                         dev_set_rx_mode(dev);
3371         }
3372
3373         if (dev->flags & IFF_UP &&
3374             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3375                                           IFF_VOLATILE)))
3376                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
3377
3378         if ((flags ^ dev->gflags) & IFF_PROMISC) {
3379                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3380                 dev->gflags ^= IFF_PROMISC;
3381                 dev_set_promiscuity(dev, inc);
3382         }
3383
3384         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3385            is important. Some (broken) drivers set IFF_PROMISC, when
3386            IFF_ALLMULTI is requested not asking us and not reporting.
3387          */
3388         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3389                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3390                 dev->gflags ^= IFF_ALLMULTI;
3391                 dev_set_allmulti(dev, inc);
3392         }
3393
3394         /* Exclude state transition flags, already notified */
3395         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3396         if (changes)
3397                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3398
3399         return ret;
3400 }
3401
3402 int dev_set_mtu(struct net_device *dev, int new_mtu)
3403 {
3404         int err;
3405
3406         if (new_mtu == dev->mtu)
3407                 return 0;
3408
3409         /*      MTU must be positive.    */
3410         if (new_mtu < 0)
3411                 return -EINVAL;
3412
3413         if (!netif_device_present(dev))
3414                 return -ENODEV;
3415
3416         err = 0;
3417         if (dev->change_mtu)
3418                 err = dev->change_mtu(dev, new_mtu);
3419         else
3420                 dev->mtu = new_mtu;
3421         if (!err && dev->flags & IFF_UP)
3422                 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3423         return err;
3424 }
3425
3426 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3427 {
3428         int err;
3429
3430         if (!dev->set_mac_address)
3431                 return -EOPNOTSUPP;
3432         if (sa->sa_family != dev->type)
3433                 return -EINVAL;
3434         if (!netif_device_present(dev))
3435                 return -ENODEV;
3436         err = dev->set_mac_address(dev, sa);
3437         if (!err)
3438                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3439         return err;
3440 }
3441
3442 /*
3443  *      Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3444  */
3445 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3446 {
3447         int err;
3448         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3449
3450         if (!dev)
3451                 return -ENODEV;
3452
3453         switch (cmd) {
3454                 case SIOCGIFFLAGS:      /* Get interface flags */
3455                         ifr->ifr_flags = dev_get_flags(dev);
3456                         return 0;
3457
3458                 case SIOCGIFMETRIC:     /* Get the metric on the interface
3459                                            (currently unused) */
3460                         ifr->ifr_metric = 0;
3461                         return 0;
3462
3463                 case SIOCGIFMTU:        /* Get the MTU of a device */
3464                         ifr->ifr_mtu = dev->mtu;
3465                         return 0;
3466
3467                 case SIOCGIFHWADDR:
3468                         if (!dev->addr_len)
3469                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3470                         else
3471                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3472                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3473                         ifr->ifr_hwaddr.sa_family = dev->type;
3474                         return 0;
3475
3476                 case SIOCGIFSLAVE:
3477                         err = -EINVAL;
3478                         break;
3479
3480                 case SIOCGIFMAP:
3481                         ifr->ifr_map.mem_start = dev->mem_start;
3482                         ifr->ifr_map.mem_end   = dev->mem_end;
3483                         ifr->ifr_map.base_addr = dev->base_addr;
3484                         ifr->ifr_map.irq       = dev->irq;
3485                         ifr->ifr_map.dma       = dev->dma;
3486                         ifr->ifr_map.port      = dev->if_port;
3487                         return 0;
3488
3489                 case SIOCGIFINDEX:
3490                         ifr->ifr_ifindex = dev->ifindex;
3491                         return 0;
3492
3493                 case SIOCGIFTXQLEN:
3494                         ifr->ifr_qlen = dev->tx_queue_len;
3495                         return 0;
3496
3497                 default:
3498                         /* dev_ioctl() should ensure this case
3499                          * is never reached
3500                          */
3501                         WARN_ON(1);
3502                         err = -EINVAL;
3503                         break;
3504
3505         }
3506         return err;
3507 }
3508
3509 /*
3510  *      Perform the SIOCxIFxxx calls, inside rtnl_lock()
3511  */
3512 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3513 {
3514         int err;
3515         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3516
3517         if (!dev)
3518                 return -ENODEV;
3519
3520         switch (cmd) {
3521                 case SIOCSIFFLAGS:      /* Set interface flags */
3522                         return dev_change_flags(dev, ifr->ifr_flags);
3523
3524                 case SIOCSIFMETRIC:     /* Set the metric on the interface
3525                                            (currently unused) */
3526                         return -EOPNOTSUPP;
3527
3528                 case SIOCSIFMTU:        /* Set the MTU of a device */
3529                         return dev_set_mtu(dev, ifr->ifr_mtu);
3530
3531                 case SIOCSIFHWADDR:
3532                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3533
3534                 case SIOCSIFHWBROADCAST:
3535                         if (ifr->ifr_hwaddr.sa_family != dev->type)
3536                                 return -EINVAL;
3537                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3538                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3539                         call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3540                         return 0;
3541
3542                 case SIOCSIFMAP:
3543                         if (dev->set_config) {
3544                                 if (!netif_device_present(dev))
3545                                         return -ENODEV;
3546                                 return dev->set_config(dev, &ifr->ifr_map);
3547                         }
3548                         return -EOPNOTSUPP;
3549
3550                 case SIOCADDMULTI:
3551                         if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3552                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3553                                 return -EINVAL;
3554                         if (!netif_device_present(dev))
3555                                 return -ENODEV;
3556                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3557                                           dev->addr_len, 1);
3558
3559                 case SIOCDELMULTI:
3560                         if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3561                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3562                                 return -EINVAL;
3563                         if (!netif_device_present(dev))
3564                                 return -ENODEV;
3565                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3566                                              dev->addr_len, 1);
3567
3568                 case SIOCSIFTXQLEN:
3569                         if (ifr->ifr_qlen < 0)
3570                                 return -EINVAL;
3571                         dev->tx_queue_len = ifr->ifr_qlen;
3572                         return 0;
3573
3574                 case SIOCSIFNAME:
3575                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3576                         return dev_change_name(dev, ifr->ifr_newname);
3577
3578                 /*
3579                  *      Unknown or private ioctl
3580                  */
3581
3582                 default:
3583                         if ((cmd >= SIOCDEVPRIVATE &&
3584                             cmd <= SIOCDEVPRIVATE + 15) ||
3585                             cmd == SIOCBONDENSLAVE ||
3586                             cmd == SIOCBONDRELEASE ||
3587                             cmd == SIOCBONDSETHWADDR ||
3588                             cmd == SIOCBONDSLAVEINFOQUERY ||
3589                             cmd == SIOCBONDINFOQUERY ||
3590                             cmd == SIOCBONDCHANGEACTIVE ||
3591                             cmd == SIOCGMIIPHY ||
3592                             cmd == SIOCGMIIREG ||
3593                             cmd == SIOCSMIIREG ||
3594                             cmd == SIOCBRADDIF ||
3595                             cmd == SIOCBRDELIF ||
3596                             cmd == SIOCWANDEV) {
3597                                 err = -EOPNOTSUPP;
3598                                 if (dev->do_ioctl) {
3599                                         if (netif_device_present(dev))
3600                                                 err = dev->do_ioctl(dev, ifr,
3601                                                                     cmd);
3602                                         else
3603                                                 err = -ENODEV;
3604                                 }
3605                         } else
3606                                 err = -EINVAL;
3607
3608         }
3609         return err;
3610 }
3611
3612 /*
3613  *      This function handles all "interface"-type I/O control requests. The actual
3614  *      'doing' part of this is dev_ifsioc above.
3615  */
3616
3617 /**
3618  *      dev_ioctl       -       network device ioctl
3619  *      @net: the applicable net namespace
3620  *      @cmd: command to issue
3621  *      @arg: pointer to a struct ifreq in user space
3622  *
3623  *      Issue ioctl functions to devices. This is normally called by the
3624  *      user space syscall interfaces but can sometimes be useful for
3625  *      other purposes. The return value is the return from the syscall if
3626  *      positive or a negative errno code on error.
3627  */
3628
3629 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3630 {
3631         struct ifreq ifr;
3632         int ret;
3633         char *colon;
3634
3635         /* One special case: SIOCGIFCONF takes ifconf argument
3636            and requires shared lock, because it sleeps writing
3637            to user space.
3638          */
3639
3640         if (cmd == SIOCGIFCONF) {
3641                 rtnl_lock();
3642                 ret = dev_ifconf(net, (char __user *) arg);
3643                 rtnl_unlock();
3644                 return ret;
3645         }
3646         if (cmd == SIOCGIFNAME)
3647                 return dev_ifname(net, (struct ifreq __user *)arg);
3648
3649         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3650                 return -EFAULT;
3651
3652         ifr.ifr_name[IFNAMSIZ-1] = 0;
3653
3654         colon = strchr(ifr.ifr_name, ':');
3655         if (colon)
3656                 *colon = 0;
3657
3658         /*
3659          *      See which interface the caller is talking about.
3660          */
3661
3662         switch (cmd) {
3663                 /*
3664                  *      These ioctl calls:
3665                  *      - can be done by all.
3666                  *      - atomic and do not require locking.
3667                  *      - return a value
3668                  */
3669                 case SIOCGIFFLAGS:
3670                 case SIOCGIFMETRIC:
3671                 case SIOCGIFMTU:
3672                 case SIOCGIFHWADDR:
3673                 case SIOCGIFSLAVE:
3674                 case SIOCGIFMAP:
3675                 case SIOCGIFINDEX:
3676                 case SIOCGIFTXQLEN:
3677                         dev_load(net, ifr.ifr_name);
3678                         read_lock(&dev_base_lock);
3679                         ret = dev_ifsioc_locked(net, &ifr, cmd);
3680                         read_unlock(&dev_base_lock);
3681                         if (!ret) {
3682                                 if (colon)
3683                                         *colon = ':';
3684                                 if (copy_to_user(arg, &ifr,
3685                                                  sizeof(struct ifreq)))
3686                                         ret = -EFAULT;
3687                         }
3688                         return ret;
3689
3690                 case SIOCETHTOOL:
3691                         dev_load(net, ifr.ifr_name);
3692                         rtnl_lock();
3693                         ret = dev_ethtool(net, &ifr);
3694                         rtnl_unlock();
3695                         if (!ret) {
3696                                 if (colon)
3697                                         *colon = ':';
3698                                 if (copy_to_user(arg, &ifr,
3699                                                  sizeof(struct ifreq)))
3700                                         ret = -EFAULT;
3701                         }
3702                         return ret;
3703
3704                 /*
3705                  *      These ioctl calls:
3706                  *      - require superuser power.
3707                  *      - require strict serialization.
3708                  *      - return a value
3709                  */
3710                 case SIOCGMIIPHY:
3711                 case SIOCGMIIREG:
3712                 case SIOCSIFNAME:
3713                         if (!capable(CAP_NET_ADMIN))
3714                                 return -EPERM;
3715                         dev_load(net, ifr.ifr_name);
3716                         rtnl_lock();
3717                         ret = dev_ifsioc(net, &ifr, cmd);
3718                         rtnl_unlock();
3719                         if (!ret) {
3720                                 if (colon)
3721                                         *colon = ':';
3722                                 if (copy_to_user(arg, &ifr,
3723                                                  sizeof(struct ifreq)))
3724                                         ret = -EFAULT;
3725                         }
3726                         return ret;
3727
3728                 /*
3729                  *      These ioctl calls:
3730                  *      - require superuser power.
3731                  *      - require strict serialization.
3732                  *      - do not return a value
3733                  */
3734                 case SIOCSIFFLAGS:
3735                 case SIOCSIFMETRIC:
3736                 case SIOCSIFMTU:
3737                 case SIOCSIFMAP:
3738                 case SIOCSIFHWADDR:
3739                 case SIOCSIFSLAVE:
3740                 case SIOCADDMULTI:
3741                 case SIOCDELMULTI:
3742                 case SIOCSIFHWBROADCAST:
3743                 case SIOCSIFTXQLEN:
3744                 case SIOCSMIIREG:
3745                 case SIOCBONDENSLAVE:
3746                 case SIOCBONDRELEASE:
3747                 case SIOCBONDSETHWADDR:
3748                 case SIOCBONDCHANGEACTIVE:
3749                 case SIOCBRADDIF:
3750                 case SIOCBRDELIF:
3751                         if (!capable(CAP_NET_ADMIN))
3752                                 return -EPERM;
3753                         /* fall through */
3754                 case SIOCBONDSLAVEINFOQUERY:
3755                 case SIOCBONDINFOQUERY:
3756                         dev_load(net, ifr.ifr_name);
3757                         rtnl_lock();
3758                         ret = dev_ifsioc(net, &ifr, cmd);
3759                         rtnl_unlock();
3760                         return ret;
3761
3762                 case SIOCGIFMEM:
3763                         /* Get the per device memory space. We can add this but
3764                          * currently do not support it */
3765                 case SIOCSIFMEM:
3766                         /* Set the per device memory buffer space.
3767                          * Not applicable in our case */
3768                 case SIOCSIFLINK:
3769                         return -EINVAL;
3770
3771                 /*
3772                  *      Unknown or private ioctl.
3773                  */
3774                 default:
3775                         if (cmd == SIOCWANDEV ||
3776                             (cmd >= SIOCDEVPRIVATE &&
3777                              cmd <= SIOCDEVPRIVATE + 15)) {
3778                                 dev_load(net, ifr.ifr_name);
3779                                 rtnl_lock();
3780                                 ret = dev_ifsioc(net, &ifr, cmd);
3781                                 rtnl_unlock();
3782                                 if (!ret && copy_to_user(arg, &ifr,
3783                                                          sizeof(struct ifreq)))
3784                                         ret = -EFAULT;
3785                                 return ret;
3786                         }
3787                         /* Take care of Wireless Extensions */
3788                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3789                                 return wext_handle_ioctl(net, &ifr, cmd, arg);
3790                         return -EINVAL;
3791         }
3792 }
3793
3794
3795 /**
3796  *      dev_new_index   -       allocate an ifindex
3797  *      @net: the applicable net namespace
3798  *
3799  *      Returns a suitable unique value for a new device interface
3800  *      number.  The caller must hold the rtnl semaphore or the
3801  *      dev_base_lock to be sure it remains unique.
3802  */
3803 static int dev_new_index(struct net *net)
3804 {
3805         static int ifindex;
3806         for (;;) {
3807                 if (++ifindex <= 0)
3808                         ifindex = 1;
3809                 if (!__dev_get_by_index(net, ifindex))
3810                         return ifindex;
3811         }
3812 }
3813
3814 /* Delayed registration/unregisteration */
3815 static LIST_HEAD(net_todo_list);
3816
3817 static void net_set_todo(struct net_device *dev)
3818 {
3819         list_add_tail(&dev->todo_list, &net_todo_list);
3820 }
3821
3822 static void rollback_registered(struct net_device *dev)
3823 {
3824         BUG_ON(dev_boot_phase);
3825         ASSERT_RTNL();
3826
3827         /* Some devices call without registering for initialization unwind. */
3828         if (dev->reg_state == NETREG_UNINITIALIZED) {
3829                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3830                                   "was registered\n", dev->name, dev);
3831
3832                 WARN_ON(1);
3833                 return;
3834         }
3835
3836         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3837
3838         /* If device is running, close it first. */
3839         dev_close(dev);
3840
3841         /* And unlink it from device chain. */
3842         unlist_netdevice(dev);
3843
3844         dev->reg_state = NETREG_UNREGISTERING;
3845
3846         synchronize_net();
3847
3848         /* Shutdown queueing discipline. */
3849         dev_shutdown(dev);
3850
3851
3852         /* Notify protocols, that we are about to destroy
3853            this device. They should clean all the things.
3854         */
3855         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3856
3857         /*
3858          *      Flush the unicast and multicast chains
3859          */
3860         dev_addr_discard(dev);
3861
3862         if (dev->uninit)
3863                 dev->uninit(dev);
3864
3865         /* Notifier chain MUST detach us from master device. */
3866         WARN_ON(dev->master);
3867
3868         /* Remove entries from kobject tree */
3869         netdev_unregister_kobject(dev);
3870
3871         synchronize_net();
3872
3873         dev_put(dev);
3874 }
3875
3876 static void __netdev_init_queue_locks_one(struct net_device *dev,
3877                                           struct netdev_queue *dev_queue,
3878                                           void *_unused)
3879 {
3880         spin_lock_init(&dev_queue->_xmit_lock);
3881         netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
3882         dev_queue->xmit_lock_owner = -1;
3883 }
3884
3885 static void netdev_init_queue_locks(struct net_device *dev)
3886 {
3887         netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3888         __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3889 }
3890
3891 /**
3892  *      register_netdevice      - register a network device
3893  *      @dev: device to register
3894  *
3895  *      Take a completed network device structure and add it to the kernel
3896  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3897  *      chain. 0 is returned on success. A negative errno code is returned
3898  *      on a failure to set up the device, or if the name is a duplicate.
3899  *
3900  *      Callers must hold the rtnl semaphore. You may want
3901  *      register_netdev() instead of this.
3902  *
3903  *      BUGS:
3904  *      The locking appears insufficient to guarantee two parallel registers
3905  *      will not get the same name.
3906  */
3907
3908 int register_netdevice(struct net_device *dev)
3909 {
3910         struct hlist_head *head;
3911         struct hlist_node *p;
3912         int ret;
3913         struct net *net;
3914
3915         BUG_ON(dev_boot_phase);
3916         ASSERT_RTNL();
3917
3918         might_sleep();
3919
3920         /* When net_device's are persistent, this will be fatal. */
3921         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3922         BUG_ON(!dev_net(dev));
3923         net = dev_net(dev);
3924
3925         spin_lock_init(&dev->addr_list_lock);
3926         netdev_set_addr_lockdep_class(dev);
3927         netdev_init_queue_locks(dev);
3928
3929         dev->iflink = -1;
3930
3931         /* Init, if this function is available */
3932         if (dev->init) {
3933                 ret = dev->init(dev);
3934                 if (ret) {
3935                         if (ret > 0)
3936                                 ret = -EIO;
3937                         goto out;
3938                 }
3939         }
3940
3941         if (!dev_valid_name(dev->name)) {
3942                 ret = -EINVAL;
3943                 goto err_uninit;
3944         }
3945
3946         dev->ifindex = dev_new_index(net);
3947         if (dev->iflink == -1)
3948                 dev->iflink = dev->ifindex;
3949
3950         /* Check for existence of name */
3951         head = dev_name_hash(net, dev->name);
3952         hlist_for_each(p, head) {
3953                 struct net_device *d
3954                         = hlist_entry(p, struct net_device, name_hlist);
3955                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3956                         ret = -EEXIST;
3957                         goto err_uninit;
3958                 }
3959         }
3960
3961         /* Fix illegal checksum combinations */
3962         if ((dev->features & NETIF_F_HW_CSUM) &&
3963             (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3964                 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3965                        dev->name);
3966                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3967         }
3968
3969         if ((dev->features & NETIF_F_NO_CSUM) &&
3970             (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3971                 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3972                        dev->name);
3973                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3974         }
3975
3976
3977         /* Fix illegal SG+CSUM combinations. */
3978         if ((dev->features & NETIF_F_SG) &&
3979             !(dev->features & NETIF_F_ALL_CSUM)) {
3980                 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3981                        dev->name);
3982                 dev->features &= ~NETIF_F_SG;
3983         }
3984
3985         /* TSO requires that SG is present as well. */
3986         if ((dev->features & NETIF_F_TSO) &&
3987             !(dev->features & NETIF_F_SG)) {
3988                 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3989                        dev->name);
3990                 dev->features &= ~NETIF_F_TSO;
3991         }
3992         if (dev->features & NETIF_F_UFO) {
3993                 if (!(dev->features & NETIF_F_HW_CSUM)) {
3994                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3995                                         "NETIF_F_HW_CSUM feature.\n",
3996                                                         dev->name);
3997                         dev->features &= ~NETIF_F_UFO;
3998                 }
3999                 if (!(dev->features & NETIF_F_SG)) {
4000                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
4001                                         "NETIF_F_SG feature.\n",
4002                                         dev->name);
4003                         dev->features &= ~NETIF_F_UFO;
4004                 }
4005         }
4006
4007         /* Enable software GSO if SG is supported. */
4008         if (dev->features & NETIF_F_SG)
4009                 dev->features |= NETIF_F_GSO;
4010
4011         netdev_initialize_kobject(dev);
4012         ret = netdev_register_kobject(dev);
4013         if (ret)
4014                 goto err_uninit;
4015         dev->reg_state = NETREG_REGISTERED;
4016
4017         /*
4018          *      Default initial state at registry is that the
4019          *      device is present.
4020          */
4021
4022         set_bit(__LINK_STATE_PRESENT, &dev->state);
4023
4024         dev_init_scheduler(dev);
4025         dev_hold(dev);
4026         list_netdevice(dev);
4027
4028         /* Notify protocols, that a new device appeared. */
4029         ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4030         ret = notifier_to_errno(ret);
4031         if (ret) {
4032                 rollback_registered(dev);
4033                 dev->reg_state = NETREG_UNREGISTERED;
4034         }
4035
4036 out:
4037         return ret;
4038
4039 err_uninit:
4040         if (dev->uninit)
4041                 dev->uninit(dev);
4042         goto out;
4043 }
4044
4045 /**
4046  *      register_netdev - register a network device
4047  *      @dev: device to register
4048  *
4049  *      Take a completed network device structure and add it to the kernel
4050  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4051  *      chain. 0 is returned on success. A negative errno code is returned
4052  *      on a failure to set up the device, or if the name is a duplicate.
4053  *
4054  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
4055  *      and expands the device name if you passed a format string to
4056  *      alloc_netdev.
4057  */
4058 int register_netdev(struct net_device *dev)
4059 {
4060         int err;
4061
4062         rtnl_lock();
4063
4064         /*
4065          * If the name is a format string the caller wants us to do a
4066          * name allocation.
4067          */
4068         if (strchr(dev->name, '%')) {
4069                 err = dev_alloc_name(dev, dev->name);
4070                 if (err < 0)
4071                         goto out;
4072         }
4073
4074         err = register_netdevice(dev);
4075 out:
4076         rtnl_unlock();
4077         return err;
4078 }
4079 EXPORT_SYMBOL(register_netdev);
4080
4081 /*
4082  * netdev_wait_allrefs - wait until all references are gone.
4083  *
4084  * This is called when unregistering network devices.
4085  *
4086  * Any protocol or device that holds a reference should register
4087  * for netdevice notification, and cleanup and put back the
4088  * reference if they receive an UNREGISTER event.
4089  * We can get stuck here if buggy protocols don't correctly
4090  * call dev_put.
4091  */
4092 static void netdev_wait_allrefs(struct net_device *dev)
4093 {
4094         unsigned long rebroadcast_time, warning_time;
4095
4096         rebroadcast_time = warning_time = jiffies;
4097         while (atomic_read(&dev->refcnt) != 0) {
4098                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4099                         rtnl_lock();
4100
4101                         /* Rebroadcast unregister notification */
4102                         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4103
4104                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4105                                      &dev->state)) {
4106                                 /* We must not have linkwatch events
4107                                  * pending on unregister. If this
4108                                  * happens, we simply run the queue
4109                                  * unscheduled, resulting in a noop
4110                                  * for this device.
4111                                  */
4112                                 linkwatch_run_queue();
4113                         }
4114
4115                         __rtnl_unlock();
4116
4117                         rebroadcast_time = jiffies;
4118                 }
4119
4120                 msleep(250);
4121
4122                 if (time_after(jiffies, warning_time + 10 * HZ)) {
4123                         printk(KERN_EMERG "unregister_netdevice: "
4124                                "waiting for %s to become free. Usage "
4125                                "count = %d\n",
4126                                dev->name, atomic_read(&dev->refcnt));
4127                         warning_time = jiffies;
4128                 }
4129         }
4130 }
4131
4132 /* The sequence is:
4133  *
4134  *      rtnl_lock();
4135  *      ...
4136  *      register_netdevice(x1);
4137  *      register_netdevice(x2);
4138  *      ...
4139  *      unregister_netdevice(y1);
4140  *      unregister_netdevice(y2);
4141  *      ...
4142  *      rtnl_unlock();
4143  *      free_netdev(y1);
4144  *      free_netdev(y2);
4145  *
4146  * We are invoked by rtnl_unlock().
4147  * This allows us to deal with problems:
4148  * 1) We can delete sysfs objects which invoke hotplug
4149  *    without deadlocking with linkwatch via keventd.
4150  * 2) Since we run with the RTNL semaphore not held, we can sleep
4151  *    safely in order to wait for the netdev refcnt to drop to zero.
4152  *
4153  * We must not return until all unregister events added during
4154  * the interval the lock was held have been completed.
4155  */
4156 void netdev_run_todo(void)
4157 {
4158         struct list_head list;
4159
4160         /* Snapshot list, allow later requests */
4161         list_replace_init(&net_todo_list, &list);
4162
4163         __rtnl_unlock();
4164
4165         while (!list_empty(&list)) {
4166                 struct net_device *dev
4167                         = list_entry(list.next, struct net_device, todo_list);
4168                 list_del(&dev->todo_list);
4169
4170                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4171                         printk(KERN_ERR "network todo '%s' but state %d\n",
4172                                dev->name, dev->reg_state);
4173                         dump_stack();
4174                         continue;
4175                 }
4176
4177                 dev->reg_state = NETREG_UNREGISTERED;
4178
4179                 on_each_cpu(flush_backlog, dev, 1);
4180
4181                 netdev_wait_allrefs(dev);
4182
4183                 /* paranoia */
4184                 BUG_ON(atomic_read(&dev->refcnt));
4185                 WARN_ON(dev->ip_ptr);
4186                 WARN_ON(dev->ip6_ptr);
4187                 WARN_ON(dev->dn_ptr);
4188
4189                 if (dev->destructor)
4190                         dev->destructor(dev);
4191
4192                 /* Free network device */
4193                 kobject_put(&dev->dev.kobj);
4194         }
4195 }
4196
4197 static struct net_device_stats *internal_stats(struct net_device *dev)
4198 {
4199         return &dev->stats;
4200 }
4201
4202 static void netdev_init_one_queue(struct net_device *dev,
4203                                   struct netdev_queue *queue,
4204                                   void *_unused)
4205 {
4206         queue->dev = dev;
4207 }
4208
4209 static void netdev_init_queues(struct net_device *dev)
4210 {
4211         netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4212         netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4213         spin_lock_init(&dev->tx_global_lock);
4214 }
4215
4216 /**
4217  *      alloc_netdev_mq - allocate network device
4218  *      @sizeof_priv:   size of private data to allocate space for
4219  *      @name:          device name format string
4220  *      @setup:         callback to initialize device
4221  *      @queue_count:   the number of subqueues to allocate
4222  *
4223  *      Allocates a struct net_device with private data area for driver use
4224  *      and performs basic initialization.  Also allocates subquue structs
4225  *      for each queue on the device at the end of the netdevice.
4226  */
4227 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4228                 void (*setup)(struct net_device *), unsigned int queue_count)
4229 {
4230         struct netdev_queue *tx;
4231         struct net_device *dev;
4232         size_t alloc_size;
4233         void *p;
4234
4235         BUG_ON(strlen(name) >= sizeof(dev->name));
4236
4237         alloc_size = sizeof(struct net_device);
4238         if (sizeof_priv) {
4239                 /* ensure 32-byte alignment of private area */
4240                 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4241                 alloc_size += sizeof_priv;
4242         }
4243         /* ensure 32-byte alignment of whole construct */
4244         alloc_size += NETDEV_ALIGN_CONST;
4245
4246         p = kzalloc(alloc_size, GFP_KERNEL);
4247         if (!p) {
4248                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4249                 return NULL;
4250         }
4251
4252         tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4253         if (!tx) {
4254                 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4255                        "tx qdiscs.\n");
4256                 kfree(p);
4257                 return NULL;
4258         }
4259
4260         dev = (struct net_device *)
4261                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4262         dev->padded = (char *)dev - (char *)p;
4263         dev_net_set(dev, &init_net);
4264
4265         dev->_tx = tx;
4266         dev->num_tx_queues = queue_count;
4267         dev->real_num_tx_queues = queue_count;
4268
4269         if (sizeof_priv) {
4270                 dev->priv = ((char *)dev +
4271                              ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4272                               & ~NETDEV_ALIGN_CONST));
4273         }
4274
4275         dev->gso_max_size = GSO_MAX_SIZE;
4276
4277         netdev_init_queues(dev);
4278
4279         dev->get_stats = internal_stats;
4280         netpoll_netdev_init(dev);
4281         setup(dev);
4282         strcpy(dev->name, name);
4283         return dev;
4284 }
4285 EXPORT_SYMBOL(alloc_netdev_mq);
4286
4287 /**
4288  *      free_netdev - free network device
4289  *      @dev: device
4290  *
4291  *      This function does the last stage of destroying an allocated device
4292  *      interface. The reference to the device object is released.
4293  *      If this is the last reference then it will be freed.
4294  */
4295 void free_netdev(struct net_device *dev)
4296 {
4297         release_net(dev_net(dev));
4298
4299         kfree(dev->_tx);
4300
4301         /*  Compatibility with error handling in drivers */
4302         if (dev->reg_state == NETREG_UNINITIALIZED) {
4303                 kfree((char *)dev - dev->padded);
4304                 return;
4305         }
4306
4307         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4308         dev->reg_state = NETREG_RELEASED;
4309
4310         /* will free via device release */
4311         put_device(&dev->dev);
4312 }
4313
4314 /* Synchronize with packet receive processing. */
4315 void synchronize_net(void)
4316 {
4317         might_sleep();
4318         synchronize_rcu();
4319 }
4320
4321 /**
4322  *      unregister_netdevice - remove device from the kernel
4323  *      @dev: device
4324  *
4325  *      This function shuts down a device interface and removes it
4326  *      from the kernel tables.
4327  *
4328  *      Callers must hold the rtnl semaphore.  You may want
4329  *      unregister_netdev() instead of this.
4330  */
4331
4332 void unregister_netdevice(struct net_device *dev)
4333 {
4334         ASSERT_RTNL();
4335
4336         rollback_registered(dev);
4337         /* Finish processing unregister after unlock */
4338         net_set_todo(dev);
4339 }
4340
4341 /**
4342  *      unregister_netdev - remove device from the kernel
4343  *      @dev: device
4344  *
4345  *      This function shuts down a device interface and removes it
4346  *      from the kernel tables.
4347  *
4348  *      This is just a wrapper for unregister_netdevice that takes
4349  *      the rtnl semaphore.  In general you want to use this and not
4350  *      unregister_netdevice.
4351  */
4352 void unregister_netdev(struct net_device *dev)
4353 {
4354         rtnl_lock();
4355         unregister_netdevice(dev);
4356         rtnl_unlock();
4357 }
4358
4359 EXPORT_SYMBOL(unregister_netdev);
4360
4361 /**
4362  *      dev_change_net_namespace - move device to different nethost namespace
4363  *      @dev: device
4364  *      @net: network namespace
4365  *      @pat: If not NULL name pattern to try if the current device name
4366  *            is already taken in the destination network namespace.
4367  *
4368  *      This function shuts down a device interface and moves it
4369  *      to a new network namespace. On success 0 is returned, on
4370  *      a failure a netagive errno code is returned.
4371  *
4372  *      Callers must hold the rtnl semaphore.
4373  */
4374
4375 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4376 {
4377         char buf[IFNAMSIZ];
4378         const char *destname;
4379         int err;
4380
4381         ASSERT_RTNL();
4382
4383         /* Don't allow namespace local devices to be moved. */
4384         err = -EINVAL;
4385         if (dev->features & NETIF_F_NETNS_LOCAL)
4386                 goto out;
4387
4388         /* Ensure the device has been registrered */
4389         err = -EINVAL;
4390         if (dev->reg_state != NETREG_REGISTERED)
4391                 goto out;
4392
4393         /* Get out if there is nothing todo */
4394         err = 0;
4395         if (net_eq(dev_net(dev), net))
4396                 goto out;
4397
4398         /* Pick the destination device name, and ensure
4399          * we can use it in the destination network namespace.
4400          */
4401         err = -EEXIST;
4402         destname = dev->name;
4403         if (__dev_get_by_name(net, destname)) {
4404                 /* We get here if we can't use the current device name */
4405                 if (!pat)
4406                         goto out;
4407                 if (!dev_valid_name(pat))
4408                         goto out;
4409                 if (strchr(pat, '%')) {
4410                         if (__dev_alloc_name(net, pat, buf) < 0)
4411                                 goto out;
4412                         destname = buf;
4413                 } else
4414                         destname = pat;
4415                 if (__dev_get_by_name(net, destname))
4416                         goto out;
4417         }
4418
4419         /*
4420          * And now a mini version of register_netdevice unregister_netdevice.
4421          */
4422
4423         /* If device is running close it first. */
4424         dev_close(dev);
4425
4426         /* And unlink it from device chain */
4427         err = -ENODEV;
4428         unlist_netdevice(dev);
4429
4430         synchronize_net();
4431
4432         /* Shutdown queueing discipline. */
4433         dev_shutdown(dev);
4434
4435         /* Notify protocols, that we are about to destroy
4436            this device. They should clean all the things.
4437         */
4438         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4439
4440         /*
4441          *      Flush the unicast and multicast chains
4442          */
4443         dev_addr_discard(dev);
4444
4445         /* Actually switch the network namespace */
4446         dev_net_set(dev, net);
4447
4448         /* Assign the new device name */
4449         if (destname != dev->name)
4450                 strcpy(dev->name, destname);
4451
4452         /* If there is an ifindex conflict assign a new one */
4453         if (__dev_get_by_index(net, dev->ifindex)) {
4454                 int iflink = (dev->iflink == dev->ifindex);
4455                 dev->ifindex = dev_new_index(net);
4456                 if (iflink)
4457                         dev->iflink = dev->ifindex;
4458         }
4459
4460         /* Fixup kobjects */
4461         netdev_unregister_kobject(dev);
4462         err = netdev_register_kobject(dev);
4463         WARN_ON(err);
4464
4465         /* Add the device back in the hashes */
4466         list_netdevice(dev);
4467
4468         /* Notify protocols, that a new device appeared. */
4469         call_netdevice_notifiers(NETDEV_REGISTER, dev);
4470
4471         synchronize_net();
4472         err = 0;
4473 out:
4474         return err;
4475 }
4476
4477 static int dev_cpu_callback(struct notifier_block *nfb,
4478                             unsigned long action,
4479                             void *ocpu)
4480 {
4481         struct sk_buff **list_skb;
4482         struct Qdisc **list_net;
4483         struct sk_buff *skb;
4484         unsigned int cpu, oldcpu = (unsigned long)ocpu;
4485         struct softnet_data *sd, *oldsd;
4486
4487         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4488                 return NOTIFY_OK;
4489
4490         local_irq_disable();
4491         cpu = smp_processor_id();
4492         sd = &per_cpu(softnet_data, cpu);
4493         oldsd = &per_cpu(softnet_data, oldcpu);
4494
4495         /* Find end of our completion_queue. */
4496         list_skb = &sd->completion_queue;
4497         while (*list_skb)
4498                 list_skb = &(*list_skb)->next;
4499         /* Append completion queue from offline CPU. */
4500         *list_skb = oldsd->completion_queue;
4501         oldsd->completion_queue = NULL;
4502
4503         /* Find end of our output_queue. */
4504         list_net = &sd->output_queue;
4505         while (*list_net)
4506                 list_net = &(*list_net)->next_sched;
4507         /* Append output queue from offline CPU. */
4508         *list_net = oldsd->output_queue;
4509         oldsd->output_queue = NULL;
4510
4511         raise_softirq_irqoff(NET_TX_SOFTIRQ);
4512         local_irq_enable();
4513
4514         /* Process offline CPU's input_pkt_queue */
4515         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4516                 netif_rx(skb);
4517
4518         return NOTIFY_OK;
4519 }
4520
4521 #ifdef CONFIG_NET_DMA
4522 /**
4523  * net_dma_rebalance - try to maintain one DMA channel per CPU
4524  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4525  *
4526  * This is called when the number of channels allocated to the net_dma client
4527  * changes.  The net_dma client tries to have one DMA channel per CPU.
4528  */
4529
4530 static void net_dma_rebalance(struct net_dma *net_dma)
4531 {
4532         unsigned int cpu, i, n, chan_idx;
4533         struct dma_chan *chan;
4534
4535         if (cpus_empty(net_dma->channel_mask)) {
4536                 for_each_online_cpu(cpu)
4537                         rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4538                 return;
4539         }
4540
4541         i = 0;
4542         cpu = first_cpu(cpu_online_map);
4543
4544         for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
4545                 chan = net_dma->channels[chan_idx];
4546
4547                 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4548                    + (i < (num_online_cpus() %
4549                         cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4550
4551                 while(n) {
4552                         per_cpu(softnet_data, cpu).net_dma = chan;
4553                         cpu = next_cpu(cpu, cpu_online_map);
4554                         n--;
4555                 }
4556                 i++;
4557         }
4558 }
4559
4560 /**
4561  * netdev_dma_event - event callback for the net_dma_client
4562  * @client: should always be net_dma_client
4563  * @chan: DMA channel for the event
4564  * @state: DMA state to be handled
4565  */
4566 static enum dma_state_client
4567 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4568         enum dma_state state)
4569 {
4570         int i, found = 0, pos = -1;
4571         struct net_dma *net_dma =
4572                 container_of(client, struct net_dma, client);
4573         enum dma_state_client ack = DMA_DUP; /* default: take no action */
4574
4575         spin_lock(&net_dma->lock);
4576         switch (state) {
4577         case DMA_RESOURCE_AVAILABLE:
4578                 for (i = 0; i < nr_cpu_ids; i++)
4579                         if (net_dma->channels[i] == chan) {
4580                                 found = 1;
4581                                 break;
4582                         } else if (net_dma->channels[i] == NULL && pos < 0)
4583                                 pos = i;
4584
4585                 if (!found && pos >= 0) {
4586                         ack = DMA_ACK;
4587                         net_dma->channels[pos] = chan;
4588                         cpu_set(pos, net_dma->channel_mask);
4589                         net_dma_rebalance(net_dma);
4590                 }
4591                 break;
4592         case DMA_RESOURCE_REMOVED:
4593                 for (i = 0; i < nr_cpu_ids; i++)
4594                         if (net_dma->channels[i] == chan) {
4595                                 found = 1;
4596                                 pos = i;
4597                                 break;
4598                         }
4599
4600                 if (found) {
4601                         ack = DMA_ACK;
4602                         cpu_clear(pos, net_dma->channel_mask);
4603                         net_dma->channels[i] = NULL;
4604                         net_dma_rebalance(net_dma);
4605                 }
4606                 break;
4607         default:
4608                 break;
4609         }
4610         spin_unlock(&net_dma->lock);
4611
4612         return ack;
4613 }
4614
4615 /**
4616  * netdev_dma_regiser - register the networking subsystem as a DMA client
4617  */
4618 static int __init netdev_dma_register(void)
4619 {
4620         net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4621                                                                 GFP_KERNEL);
4622         if (unlikely(!net_dma.channels)) {
4623                 printk(KERN_NOTICE
4624                                 "netdev_dma: no memory for net_dma.channels\n");
4625                 return -ENOMEM;
4626         }
4627         spin_lock_init(&net_dma.lock);
4628         dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4629         dma_async_client_register(&net_dma.client);
4630         dma_async_client_chan_request(&net_dma.client);
4631         return 0;
4632 }
4633
4634 #else
4635 static int __init netdev_dma_register(void) { return -ENODEV; }
4636 #endif /* CONFIG_NET_DMA */
4637
4638 /**
4639  *      netdev_compute_feature - compute conjunction of two feature sets
4640  *      @all: first feature set
4641  *      @one: second feature set
4642  *
4643  *      Computes a new feature set after adding a device with feature set
4644  *      @one to the master device with current feature set @all.  Returns
4645  *      the new feature set.
4646  */
4647 int netdev_compute_features(unsigned long all, unsigned long one)
4648 {
4649         /* if device needs checksumming, downgrade to hw checksumming */
4650         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4651                 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4652
4653         /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4654         if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4655                 all ^= NETIF_F_HW_CSUM
4656                         | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4657
4658         if (one & NETIF_F_GSO)
4659                 one |= NETIF_F_GSO_SOFTWARE;
4660         one |= NETIF_F_GSO;
4661
4662         /* If even one device supports robust GSO, enable it for all. */
4663         if (one & NETIF_F_GSO_ROBUST)
4664                 all |= NETIF_F_GSO_ROBUST;
4665
4666         all &= one | NETIF_F_LLTX;
4667
4668         if (!(all & NETIF_F_ALL_CSUM))
4669                 all &= ~NETIF_F_SG;
4670         if (!(all & NETIF_F_SG))
4671                 all &= ~NETIF_F_GSO_MASK;
4672
4673         return all;
4674 }
4675 EXPORT_SYMBOL(netdev_compute_features);
4676
4677 static struct hlist_head *netdev_create_hash(void)
4678 {
4679         int i;
4680         struct hlist_head *hash;
4681
4682         hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4683         if (hash != NULL)
4684                 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4685                         INIT_HLIST_HEAD(&hash[i]);
4686
4687         return hash;
4688 }
4689
4690 /* Initialize per network namespace state */
4691 static int __net_init netdev_init(struct net *net)
4692 {
4693         INIT_LIST_HEAD(&net->dev_base_head);
4694
4695         net->dev_name_head = netdev_create_hash();
4696         if (net->dev_name_head == NULL)
4697                 goto err_name;
4698
4699         net->dev_index_head = netdev_create_hash();
4700         if (net->dev_index_head == NULL)
4701                 goto err_idx;
4702
4703         return 0;
4704
4705 err_idx:
4706         kfree(net->dev_name_head);
4707 err_name:
4708         return -ENOMEM;
4709 }
4710
4711 char *netdev_drivername(struct net_device *dev, char *buffer, int len)
4712 {
4713         struct device_driver *driver;
4714         struct device *parent;
4715
4716         if (len <= 0 || !buffer)
4717                 return buffer;
4718         buffer[0] = 0;
4719
4720         parent = dev->dev.parent;
4721
4722         if (!parent)
4723                 return buffer;
4724
4725         driver = parent->driver;
4726         if (driver && driver->name)
4727                 strlcpy(buffer, driver->name, len);
4728         return buffer;
4729 }
4730
4731 static void __net_exit netdev_exit(struct net *net)
4732 {
4733         kfree(net->dev_name_head);
4734         kfree(net->dev_index_head);
4735 }
4736
4737 static struct pernet_operations __net_initdata netdev_net_ops = {
4738         .init = netdev_init,
4739         .exit = netdev_exit,
4740 };
4741
4742 static void __net_exit default_device_exit(struct net *net)
4743 {
4744         struct net_device *dev, *next;
4745         /*
4746          * Push all migratable of the network devices back to the
4747          * initial network namespace
4748          */
4749         rtnl_lock();
4750         for_each_netdev_safe(net, dev, next) {
4751                 int err;
4752                 char fb_name[IFNAMSIZ];
4753
4754                 /* Ignore unmoveable devices (i.e. loopback) */
4755                 if (dev->features & NETIF_F_NETNS_LOCAL)
4756                         continue;
4757
4758                 /* Push remaing network devices to init_net */
4759                 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4760                 err = dev_change_net_namespace(dev, &init_net, fb_name);
4761                 if (err) {
4762                         printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4763                                 __func__, dev->name, err);
4764                         BUG();
4765                 }
4766         }
4767         rtnl_unlock();
4768 }
4769
4770 static struct pernet_operations __net_initdata default_device_ops = {
4771         .exit = default_device_exit,
4772 };
4773
4774 /*
4775  *      Initialize the DEV module. At boot time this walks the device list and
4776  *      unhooks any devices that fail to initialise (normally hardware not
4777  *      present) and leaves us with a valid list of present and active devices.
4778  *
4779  */
4780
4781 /*
4782  *       This is called single threaded during boot, so no need
4783  *       to take the rtnl semaphore.
4784  */
4785 static int __init net_dev_init(void)
4786 {
4787         int i, rc = -ENOMEM;
4788
4789         BUG_ON(!dev_boot_phase);
4790
4791         if (dev_proc_init())
4792                 goto out;
4793
4794         if (netdev_kobject_init())
4795                 goto out;
4796
4797         INIT_LIST_HEAD(&ptype_all);
4798         for (i = 0; i < PTYPE_HASH_SIZE; i++)
4799                 INIT_LIST_HEAD(&ptype_base[i]);
4800
4801         if (register_pernet_subsys(&netdev_net_ops))
4802                 goto out;
4803
4804         if (register_pernet_device(&default_device_ops))
4805                 goto out;
4806
4807         /*
4808          *      Initialise the packet receive queues.
4809          */
4810
4811         for_each_possible_cpu(i) {
4812                 struct softnet_data *queue;
4813
4814                 queue = &per_cpu(softnet_data, i);
4815                 skb_queue_head_init(&queue->input_pkt_queue);
4816                 queue->completion_queue = NULL;
4817                 INIT_LIST_HEAD(&queue->poll_list);
4818
4819                 queue->backlog.poll = process_backlog;
4820                 queue->backlog.weight = weight_p;
4821         }
4822
4823         netdev_dma_register();
4824
4825         dev_boot_phase = 0;
4826
4827         open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4828         open_softirq(NET_RX_SOFTIRQ, net_rx_action);
4829
4830         hotcpu_notifier(dev_cpu_callback, 0);
4831         dst_init();
4832         dev_mcast_init();
4833         rc = 0;
4834 out:
4835         return rc;
4836 }
4837
4838 subsys_initcall(net_dev_init);
4839
4840 EXPORT_SYMBOL(__dev_get_by_index);
4841 EXPORT_SYMBOL(__dev_get_by_name);
4842 EXPORT_SYMBOL(__dev_remove_pack);
4843 EXPORT_SYMBOL(dev_valid_name);
4844 EXPORT_SYMBOL(dev_add_pack);
4845 EXPORT_SYMBOL(dev_alloc_name);
4846 EXPORT_SYMBOL(dev_close);
4847 EXPORT_SYMBOL(dev_get_by_flags);
4848 EXPORT_SYMBOL(dev_get_by_index);
4849 EXPORT_SYMBOL(dev_get_by_name);
4850 EXPORT_SYMBOL(dev_open);
4851 EXPORT_SYMBOL(dev_queue_xmit);
4852 EXPORT_SYMBOL(dev_remove_pack);
4853 EXPORT_SYMBOL(dev_set_allmulti);
4854 EXPORT_SYMBOL(dev_set_promiscuity);
4855 EXPORT_SYMBOL(dev_change_flags);
4856 EXPORT_SYMBOL(dev_set_mtu);
4857 EXPORT_SYMBOL(dev_set_mac_address);
4858 EXPORT_SYMBOL(free_netdev);
4859 EXPORT_SYMBOL(netdev_boot_setup_check);
4860 EXPORT_SYMBOL(netdev_set_master);
4861 EXPORT_SYMBOL(netdev_state_change);
4862 EXPORT_SYMBOL(netif_receive_skb);
4863 EXPORT_SYMBOL(netif_rx);
4864 EXPORT_SYMBOL(register_gifconf);
4865 EXPORT_SYMBOL(register_netdevice);
4866 EXPORT_SYMBOL(register_netdevice_notifier);
4867 EXPORT_SYMBOL(skb_checksum_help);
4868 EXPORT_SYMBOL(synchronize_net);
4869 EXPORT_SYMBOL(unregister_netdevice);
4870 EXPORT_SYMBOL(unregister_netdevice_notifier);
4871 EXPORT_SYMBOL(net_enable_timestamp);
4872 EXPORT_SYMBOL(net_disable_timestamp);
4873 EXPORT_SYMBOL(dev_get_flags);
4874
4875 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4876 EXPORT_SYMBOL(br_handle_frame_hook);
4877 EXPORT_SYMBOL(br_fdb_get_hook);
4878 EXPORT_SYMBOL(br_fdb_put_hook);
4879 #endif
4880
4881 #ifdef CONFIG_KMOD
4882 EXPORT_SYMBOL(dev_load);
4883 #endif
4884
4885 EXPORT_PER_CPU_SYMBOL(softnet_data);