git.oblomov.eu Git - linux-2.6/blob - net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/ethtool.h>
  94 #include <linux/notifier.h>
  95 #include <linux/skbuff.h>
  96 #include <net/net_namespace.h>
  97 #include <net/sock.h>
  98 #include <linux/rtnetlink.h>
  99 #include <linux/proc_fs.h>
 100 #include <linux/seq_file.h>
 101 #include <linux/stat.h>
 102 #include <linux/if_bridge.h>
 103 #include <linux/if_macvlan.h>
 104 #include <net/dst.h>
 105 #include <net/pkt_sched.h>
 106 #include <net/checksum.h>
 107 #include <linux/highmem.h>
 108 #include <linux/init.h>
 109 #include <linux/kmod.h>
 110 #include <linux/module.h>
 111 #include <linux/netpoll.h>
 112 #include <linux/rcupdate.h>
 113 #include <linux/delay.h>
 114 #include <net/wext.h>
 115 #include <net/iw_handler.h>
 116 #include <asm/current.h>
 117 #include <linux/audit.h>
 118 #include <linux/dmaengine.h>
 119 #include <linux/err.h>
 120 #include <linux/ctype.h>
 121 #include <linux/if_arp.h>
 122 #include <linux/if_vlan.h>
 123 #include <linux/ip.h>
 124 #include <net/ip.h>
 125 #include <linux/ipv6.h>
 126 #include <linux/in.h>
 127 #include <linux/jhash.h>
 128 #include <linux/random.h>
 129
 130 #include "net-sysfs.h"
 131
 132 /* Instead of increasing this, you should create a hash table. */
 133 #define MAX_GRO_SKBS 8
 134
 135 /* This should be increased if a protocol with a bigger head is added. */
 136 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 137
 138 /*
 139  *      The list of packet types we will receive (as opposed to discard)
 140  *      and the routines to invoke.
 141  *
 142  *      Why 16. Because with 16 the only overlap we get on a hash of the
 143  *      low nibble of the protocol value is RARP/SNAP/X.25.
 144  *
 145  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 146  *             sure which should go first, but I bet it won't make much
 147  *             difference if we are running VLANs.  The good news is that
 148  *             this protocol won't be in the list unless compiled in, so
 149  *             the average user (w/out VLANs) will not be adversely affected.
 150  *             --BLG
 151  *
 152  *              0800    IP
 153  *              8100    802.1Q VLAN
 154  *              0001    802.3
 155  *              0002    AX.25
 156  *              0004    802.2
 157  *              8035    RARP
 158  *              0005    SNAP
 159  *              0805    X.25
 160  *              0806    ARP
 161  *              8137    IPX
 162  *              0009    Localtalk
 163  *              86DD    IPv6
 164  */
 165
 166 #define PTYPE_HASH_SIZE (16)
 167 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
 168
 169 static DEFINE_SPINLOCK(ptype_lock);
 170 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 171 static struct list_head ptype_all __read_mostly;        /* Taps */
 172
 173 /*
 174  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 175  * semaphore.
 176  *
 177  * Pure readers hold dev_base_lock for reading.
 178  *
 179  * Writers must hold the rtnl semaphore while they loop through the
 180  * dev_base_head list, and hold dev_base_lock for writing when they do the
 181  * actual updates.  This allows pure readers to access the list even
 182  * while a writer is preparing to update it.
 183  *
 184  * To put it another way, dev_base_lock is held for writing only to
 185  * protect against pure readers; the rtnl semaphore provides the
 186  * protection against other writers.
 187  *
 188  * See, for example usages, register_netdevice() and
 189  * unregister_netdevice(), which must be called with the rtnl
 190  * semaphore held.
 191  */
 192 DEFINE_RWLOCK(dev_base_lock);
 193
 194 EXPORT_SYMBOL(dev_base_lock);
 195
 196 #define NETDEV_HASHBITS 8
 197 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 198
 199 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 200 {
 201         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 202         return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 203 }
 204
 205 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 206 {
 207         return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 208 }
 209
 210 /* Device list insertion */
 211 static int list_netdevice(struct net_device *dev)
 212 {
 213         struct net *net = dev_net(dev);
 214
 215         ASSERT_RTNL();
 216
 217         write_lock_bh(&dev_base_lock);
 218         list_add_tail(&dev->dev_list, &net->dev_base_head);
 219         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 220         hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 221         write_unlock_bh(&dev_base_lock);
 222         return 0;
 223 }
 224
 225 /* Device list removal */
 226 static void unlist_netdevice(struct net_device *dev)
 227 {
 228         ASSERT_RTNL();
 229
 230         /* Unlink dev from the device chain */
 231         write_lock_bh(&dev_base_lock);
 232         list_del(&dev->dev_list);
 233         hlist_del(&dev->name_hlist);
 234         hlist_del(&dev->index_hlist);
 235         write_unlock_bh(&dev_base_lock);
 236 }
 237
 238 /*
 239  *      Our notifier list
 240  */
 241
 242 static RAW_NOTIFIER_HEAD(netdev_chain);
 243
 244 /*
 245  *      Device drivers call our routines to queue packets here. We empty the
 246  *      queue in the local softnet handler.
 247  */
 248
 249 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 250
 251 #ifdef CONFIG_LOCKDEP
 252 /*
 253  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
 254  * according to dev->type
 255  */
 256 static const unsigned short netdev_lock_type[] =
 257         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 258          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 259          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 260          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 261          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 262          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 263          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 264          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 265          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 266          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 267          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 268          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 269          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 270          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
 271          ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
 272
 273 static const char *netdev_lock_name[] =
 274         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 275          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 276          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 277          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 278          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 279          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 280          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 281          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 282          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 283          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 284          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 285          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 286          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 287          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
 288          "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
 289
 290 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 291 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 292
 293 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 294 {
 295         int i;
 296
 297         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 298                 if (netdev_lock_type[i] == dev_type)
 299                         return i;
 300         /* the last key is used by default */
 301         return ARRAY_SIZE(netdev_lock_type) - 1;
 302 }
 303
 304 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 305                                                  unsigned short dev_type)
 306 {
 307         int i;
 308
 309         i = netdev_lock_pos(dev_type);
 310         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 311                                    netdev_lock_name[i]);
 312 }
 313
 314 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 315 {
 316         int i;
 317
 318         i = netdev_lock_pos(dev->type);
 319         lockdep_set_class_and_name(&dev->addr_list_lock,
 320                                    &netdev_addr_lock_key[i],
 321                                    netdev_lock_name[i]);
 322 }
 323 #else
 324 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 325                                                  unsigned short dev_type)
 326 {
 327 }
 328 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 329 {
 330 }
 331 #endif
 332
 333 /*******************************************************************************
 334
 335                 Protocol management and registration routines
 336
 337 *******************************************************************************/
 338
 339 /*
 340  *      Add a protocol ID to the list. Now that the input handler is
 341  *      smarter we can dispense with all the messy stuff that used to be
 342  *      here.
 343  *
 344  *      BEWARE!!! Protocol handlers, mangling input packets,
 345  *      MUST BE last in hash buckets and checking protocol handlers
 346  *      MUST start from promiscuous ptype_all chain in net_bh.
 347  *      It is true now, do not change it.
 348  *      Explanation follows: if protocol handler, mangling packet, will
 349  *      be the first on list, it is not able to sense, that packet
 350  *      is cloned and should be copied-on-write, so that it will
 351  *      change it and subsequent readers will get broken packet.
 352  *                                                      --ANK (980803)
 353  */
 354
 355 /**
 356  *      dev_add_pack - add packet handler
 357  *      @pt: packet type declaration
 358  *
 359  *      Add a protocol handler to the networking stack. The passed &packet_type
 360  *      is linked into kernel lists and may not be freed until it has been
 361  *      removed from the kernel lists.
 362  *
 363  *      This call does not sleep therefore it can not
 364  *      guarantee all CPU's that are in middle of receiving packets
 365  *      will see the new packet type (until the next received packet).
 366  */
 367
 368 void dev_add_pack(struct packet_type *pt)
 369 {
 370         int hash;
 371
 372         spin_lock_bh(&ptype_lock);
 373         if (pt->type == htons(ETH_P_ALL))
 374                 list_add_rcu(&pt->list, &ptype_all);
 375         else {
 376                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
 377                 list_add_rcu(&pt->list, &ptype_base[hash]);
 378         }
 379         spin_unlock_bh(&ptype_lock);
 380 }
 381
 382 /**
 383  *      __dev_remove_pack        - remove packet handler
 384  *      @pt: packet type declaration
 385  *
 386  *      Remove a protocol handler that was previously added to the kernel
 387  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 388  *      from the kernel lists and can be freed or reused once this function
 389  *      returns.
 390  *
 391  *      The packet type might still be in use by receivers
 392  *      and must not be freed until after all the CPU's have gone
 393  *      through a quiescent state.
 394  */
 395 void __dev_remove_pack(struct packet_type *pt)
 396 {
 397         struct list_head *head;
 398         struct packet_type *pt1;
 399
 400         spin_lock_bh(&ptype_lock);
 401
 402         if (pt->type == htons(ETH_P_ALL))
 403                 head = &ptype_all;
 404         else
 405                 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 406
 407         list_for_each_entry(pt1, head, list) {
 408                 if (pt == pt1) {
 409                         list_del_rcu(&pt->list);
 410                         goto out;
 411                 }
 412         }
 413
 414         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 415 out:
 416         spin_unlock_bh(&ptype_lock);
 417 }
 418 /**
 419  *      dev_remove_pack  - remove packet handler
 420  *      @pt: packet type declaration
 421  *
 422  *      Remove a protocol handler that was previously added to the kernel
 423  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 424  *      from the kernel lists and can be freed or reused once this function
 425  *      returns.
 426  *
 427  *      This call sleeps to guarantee that no CPU is looking at the packet
 428  *      type after return.
 429  */
 430 void dev_remove_pack(struct packet_type *pt)
 431 {
 432         __dev_remove_pack(pt);
 433
 434         synchronize_net();
 435 }
 436
 437 /******************************************************************************
 438
 439                       Device Boot-time Settings Routines
 440
 441 *******************************************************************************/
 442
 443 /* Boot time configuration table */
 444 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 445
 446 /**
 447  *      netdev_boot_setup_add   - add new setup entry
 448  *      @name: name of the device
 449  *      @map: configured settings for the device
 450  *
 451  *      Adds new setup entry to the dev_boot_setup list.  The function
 452  *      returns 0 on error and 1 on success.  This is a generic routine to
 453  *      all netdevices.
 454  */
 455 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 456 {
 457         struct netdev_boot_setup *s;
 458         int i;
 459
 460         s = dev_boot_setup;
 461         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 462                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 463                         memset(s[i].name, 0, sizeof(s[i].name));
 464                         strlcpy(s[i].name, name, IFNAMSIZ);
 465                         memcpy(&s[i].map, map, sizeof(s[i].map));
 466                         break;
 467                 }
 468         }
 469
 470         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 471 }
 472
 473 /**
 474  *      netdev_boot_setup_check - check boot time settings
 475  *      @dev: the netdevice
 476  *
 477  *      Check boot time settings for the device.
 478  *      The found settings are set for the device to be used
 479  *      later in the device probing.
 480  *      Returns 0 if no settings found, 1 if they are.
 481  */
 482 int netdev_boot_setup_check(struct net_device *dev)
 483 {
 484         struct netdev_boot_setup *s = dev_boot_setup;
 485         int i;
 486
 487         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 488                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 489                     !strcmp(dev->name, s[i].name)) {
 490                         dev->irq        = s[i].map.irq;
 491                         dev->base_addr  = s[i].map.base_addr;
 492                         dev->mem_start  = s[i].map.mem_start;
 493                         dev->mem_end    = s[i].map.mem_end;
 494                         return 1;
 495                 }
 496         }
 497         return 0;
 498 }
 499
 500
 501 /**
 502  *      netdev_boot_base        - get address from boot time settings
 503  *      @prefix: prefix for network device
 504  *      @unit: id for network device
 505  *
 506  *      Check boot time settings for the base address of device.
 507  *      The found settings are set for the device to be used
 508  *      later in the device probing.
 509  *      Returns 0 if no settings found.
 510  */
 511 unsigned long netdev_boot_base(const char *prefix, int unit)
 512 {
 513         const struct netdev_boot_setup *s = dev_boot_setup;
 514         char name[IFNAMSIZ];
 515         int i;
 516
 517         sprintf(name, "%s%d", prefix, unit);
 518
 519         /*
 520          * If device already registered then return base of 1
 521          * to indicate not to probe for this interface
 522          */
 523         if (__dev_get_by_name(&init_net, name))
 524                 return 1;
 525
 526         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 527                 if (!strcmp(name, s[i].name))
 528                         return s[i].map.base_addr;
 529         return 0;
 530 }
 531
 532 /*
 533  * Saves at boot time configured settings for any netdevice.
 534  */
 535 int __init netdev_boot_setup(char *str)
 536 {
 537         int ints[5];
 538         struct ifmap map;
 539
 540         str = get_options(str, ARRAY_SIZE(ints), ints);
 541         if (!str || !*str)
 542                 return 0;
 543
 544         /* Save settings */
 545         memset(&map, 0, sizeof(map));
 546         if (ints[0] > 0)
 547                 map.irq = ints[1];
 548         if (ints[0] > 1)
 549                 map.base_addr = ints[2];
 550         if (ints[0] > 2)
 551                 map.mem_start = ints[3];
 552         if (ints[0] > 3)
 553                 map.mem_end = ints[4];
 554
 555         /* Add new entry to the list */
 556         return netdev_boot_setup_add(str, &map);
 557 }
 558
 559 __setup("netdev=", netdev_boot_setup);
 560
 561 /*******************************************************************************
 562
 563                             Device Interface Subroutines
 564
 565 *******************************************************************************/
 566
 567 /**
 568  *      __dev_get_by_name       - find a device by its name
 569  *      @net: the applicable net namespace
 570  *      @name: name to find
 571  *
 572  *      Find an interface by name. Must be called under RTNL semaphore
 573  *      or @dev_base_lock. If the name is found a pointer to the device
 574  *      is returned. If the name is not found then %NULL is returned. The
 575  *      reference counters are not incremented so the caller must be
 576  *      careful with locks.
 577  */
 578
 579 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 580 {
 581         struct hlist_node *p;
 582
 583         hlist_for_each(p, dev_name_hash(net, name)) {
 584                 struct net_device *dev
 585                         = hlist_entry(p, struct net_device, name_hlist);
 586                 if (!strncmp(dev->name, name, IFNAMSIZ))
 587                         return dev;
 588         }
 589         return NULL;
 590 }
 591
 592 /**
 593  *      dev_get_by_name         - find a device by its name
 594  *      @net: the applicable net namespace
 595  *      @name: name to find
 596  *
 597  *      Find an interface by name. This can be called from any
 598  *      context and does its own locking. The returned handle has
 599  *      the usage count incremented and the caller must use dev_put() to
 600  *      release it when it is no longer needed. %NULL is returned if no
 601  *      matching device is found.
 602  */
 603
 604 struct net_device *dev_get_by_name(struct net *net, const char *name)
 605 {
 606         struct net_device *dev;
 607
 608         read_lock(&dev_base_lock);
 609         dev = __dev_get_by_name(net, name);
 610         if (dev)
 611                 dev_hold(dev);
 612         read_unlock(&dev_base_lock);
 613         return dev;
 614 }
 615
 616 /**
 617  *      __dev_get_by_index - find a device by its ifindex
 618  *      @net: the applicable net namespace
 619  *      @ifindex: index of device
 620  *
 621  *      Search for an interface by index. Returns %NULL if the device
 622  *      is not found or a pointer to the device. The device has not
 623  *      had its reference counter increased so the caller must be careful
 624  *      about locking. The caller must hold either the RTNL semaphore
 625  *      or @dev_base_lock.
 626  */
 627
 628 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 629 {
 630         struct hlist_node *p;
 631
 632         hlist_for_each(p, dev_index_hash(net, ifindex)) {
 633                 struct net_device *dev
 634                         = hlist_entry(p, struct net_device, index_hlist);
 635                 if (dev->ifindex == ifindex)
 636                         return dev;
 637         }
 638         return NULL;
 639 }
 640
 641
 642 /**
 643  *      dev_get_by_index - find a device by its ifindex
 644  *      @net: the applicable net namespace
 645  *      @ifindex: index of device
 646  *
 647  *      Search for an interface by index. Returns NULL if the device
 648  *      is not found or a pointer to the device. The device returned has
 649  *      had a reference added and the pointer is safe until the user calls
 650  *      dev_put to indicate they have finished with it.
 651  */
 652
 653 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 654 {
 655         struct net_device *dev;
 656
 657         read_lock(&dev_base_lock);
 658         dev = __dev_get_by_index(net, ifindex);
 659         if (dev)
 660                 dev_hold(dev);
 661         read_unlock(&dev_base_lock);
 662         return dev;
 663 }
 664
 665 /**
 666  *      dev_getbyhwaddr - find a device by its hardware address
 667  *      @net: the applicable net namespace
 668  *      @type: media type of device
 669  *      @ha: hardware address
 670  *
 671  *      Search for an interface by MAC address. Returns NULL if the device
 672  *      is not found or a pointer to the device. The caller must hold the
 673  *      rtnl semaphore. The returned device has not had its ref count increased
 674  *      and the caller must therefore be careful about locking
 675  *
 676  *      BUGS:
 677  *      If the API was consistent this would be __dev_get_by_hwaddr
 678  */
 679
 680 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 681 {
 682         struct net_device *dev;
 683
 684         ASSERT_RTNL();
 685
 686         for_each_netdev(net, dev)
 687                 if (dev->type == type &&
 688                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 689                         return dev;
 690
 691         return NULL;
 692 }
 693
 694 EXPORT_SYMBOL(dev_getbyhwaddr);
 695
 696 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 697 {
 698         struct net_device *dev;
 699
 700         ASSERT_RTNL();
 701         for_each_netdev(net, dev)
 702                 if (dev->type == type)
 703                         return dev;
 704
 705         return NULL;
 706 }
 707
 708 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 709
 710 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 711 {
 712         struct net_device *dev;
 713
 714         rtnl_lock();
 715         dev = __dev_getfirstbyhwtype(net, type);
 716         if (dev)
 717                 dev_hold(dev);
 718         rtnl_unlock();
 719         return dev;
 720 }
 721
 722 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 723
 724 /**
 725  *      dev_get_by_flags - find any device with given flags
 726  *      @net: the applicable net namespace
 727  *      @if_flags: IFF_* values
 728  *      @mask: bitmask of bits in if_flags to check
 729  *
 730  *      Search for any interface with the given flags. Returns NULL if a device
 731  *      is not found or a pointer to the device. The device returned has
 732  *      had a reference added and the pointer is safe until the user calls
 733  *      dev_put to indicate they have finished with it.
 734  */
 735
 736 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 737 {
 738         struct net_device *dev, *ret;
 739
 740         ret = NULL;
 741         read_lock(&dev_base_lock);
 742         for_each_netdev(net, dev) {
 743                 if (((dev->flags ^ if_flags) & mask) == 0) {
 744                         dev_hold(dev);
 745                         ret = dev;
 746                         break;
 747                 }
 748         }
 749         read_unlock(&dev_base_lock);
 750         return ret;
 751 }
 752
 753 /**
 754  *      dev_valid_name - check if name is okay for network device
 755  *      @name: name string
 756  *
 757  *      Network device names need to be valid file names to
 758  *      to allow sysfs to work.  We also disallow any kind of
 759  *      whitespace.
 760  */
 761 int dev_valid_name(const char *name)
 762 {
 763         if (*name == '\0')
 764                 return 0;
 765         if (strlen(name) >= IFNAMSIZ)
 766                 return 0;
 767         if (!strcmp(name, ".") || !strcmp(name, ".."))
 768                 return 0;
 769
 770         while (*name) {
 771                 if (*name == '/' || isspace(*name))
 772                         return 0;
 773                 name++;
 774         }
 775         return 1;
 776 }
 777
 778 /**
 779  *      __dev_alloc_name - allocate a name for a device
 780  *      @net: network namespace to allocate the device name in
 781  *      @name: name format string
 782  *      @buf:  scratch buffer and result name string
 783  *
 784  *      Passed a format string - eg "lt%d" it will try and find a suitable
 785  *      id. It scans list of devices to build up a free map, then chooses
 786  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 787  *      while allocating the name and adding the device in order to avoid
 788  *      duplicates.
 789  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 790  *      Returns the number of the unit assigned or a negative errno code.
 791  */
 792
 793 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 794 {
 795         int i = 0;
 796         const char *p;
 797         const int max_netdevices = 8*PAGE_SIZE;
 798         unsigned long *inuse;
 799         struct net_device *d;
 800
 801         p = strnchr(name, IFNAMSIZ-1, '%');
 802         if (p) {
 803                 /*
 804                  * Verify the string as this thing may have come from
 805                  * the user.  There must be either one "%d" and no other "%"
 806                  * characters.
 807                  */
 808                 if (p[1] != 'd' || strchr(p + 2, '%'))
 809                         return -EINVAL;
 810
 811                 /* Use one page as a bit array of possible slots */
 812                 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 813                 if (!inuse)
 814                         return -ENOMEM;
 815
 816                 for_each_netdev(net, d) {
 817                         if (!sscanf(d->name, name, &i))
 818                                 continue;
 819                         if (i < 0 || i >= max_netdevices)
 820                                 continue;
 821
 822                         /*  avoid cases where sscanf is not exact inverse of printf */
 823                         snprintf(buf, IFNAMSIZ, name, i);
 824                         if (!strncmp(buf, d->name, IFNAMSIZ))
 825                                 set_bit(i, inuse);
 826                 }
 827
 828                 i = find_first_zero_bit(inuse, max_netdevices);
 829                 free_page((unsigned long) inuse);
 830         }
 831
 832         snprintf(buf, IFNAMSIZ, name, i);
 833         if (!__dev_get_by_name(net, buf))
 834                 return i;
 835
 836         /* It is possible to run out of possible slots
 837          * when the name is long and there isn't enough space left
 838          * for the digits, or if all bits are used.
 839          */
 840         return -ENFILE;
 841 }
 842
 843 /**
 844  *      dev_alloc_name - allocate a name for a device
 845  *      @dev: device
 846  *      @name: name format string
 847  *
 848  *      Passed a format string - eg "lt%d" it will try and find a suitable
 849  *      id. It scans list of devices to build up a free map, then chooses
 850  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 851  *      while allocating the name and adding the device in order to avoid
 852  *      duplicates.
 853  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 854  *      Returns the number of the unit assigned or a negative errno code.
 855  */
 856
 857 int dev_alloc_name(struct net_device *dev, const char *name)
 858 {
 859         char buf[IFNAMSIZ];
 860         struct net *net;
 861         int ret;
 862
 863         BUG_ON(!dev_net(dev));
 864         net = dev_net(dev);
 865         ret = __dev_alloc_name(net, name, buf);
 866         if (ret >= 0)
 867                 strlcpy(dev->name, buf, IFNAMSIZ);
 868         return ret;
 869 }
 870
 871
 872 /**
 873  *      dev_change_name - change name of a device
 874  *      @dev: device
 875  *      @newname: name (or format string) must be at least IFNAMSIZ
 876  *
 877  *      Change name of a device, can pass format strings "eth%d".
 878  *      for wildcarding.
 879  */
 880 int dev_change_name(struct net_device *dev, const char *newname)
 881 {
 882         char oldname[IFNAMSIZ];
 883         int err = 0;
 884         int ret;
 885         struct net *net;
 886
 887         ASSERT_RTNL();
 888         BUG_ON(!dev_net(dev));
 889
 890         net = dev_net(dev);
 891         if (dev->flags & IFF_UP)
 892                 return -EBUSY;
 893
 894         if (!dev_valid_name(newname))
 895                 return -EINVAL;
 896
 897         if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 898                 return 0;
 899
 900         memcpy(oldname, dev->name, IFNAMSIZ);
 901
 902         if (strchr(newname, '%')) {
 903                 err = dev_alloc_name(dev, newname);
 904                 if (err < 0)
 905                         return err;
 906         }
 907         else if (__dev_get_by_name(net, newname))
 908                 return -EEXIST;
 909         else
 910                 strlcpy(dev->name, newname, IFNAMSIZ);
 911
 912 rollback:
 913         /* For now only devices in the initial network namespace
 914          * are in sysfs.
 915          */
 916         if (net == &init_net) {
 917                 ret = device_rename(&dev->dev, dev->name);
 918                 if (ret) {
 919                         memcpy(dev->name, oldname, IFNAMSIZ);
 920                         return ret;
 921                 }
 922         }
 923
 924         write_lock_bh(&dev_base_lock);
 925         hlist_del(&dev->name_hlist);
 926         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 927         write_unlock_bh(&dev_base_lock);
 928
 929         ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 930         ret = notifier_to_errno(ret);
 931
 932         if (ret) {
 933                 if (err) {
 934                         printk(KERN_ERR
 935                                "%s: name change rollback failed: %d.\n",
 936                                dev->name, ret);
 937                 } else {
 938                         err = ret;
 939                         memcpy(dev->name, oldname, IFNAMSIZ);
 940                         goto rollback;
 941                 }
 942         }
 943
 944         return err;
 945 }
 946
 947 /**
 948  *      dev_set_alias - change ifalias of a device
 949  *      @dev: device
 950  *      @alias: name up to IFALIASZ
 951  *      @len: limit of bytes to copy from info
 952  *
 953  *      Set ifalias for a device,
 954  */
 955 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 956 {
 957         ASSERT_RTNL();
 958
 959         if (len >= IFALIASZ)
 960                 return -EINVAL;
 961
 962         if (!len) {
 963                 if (dev->ifalias) {
 964                         kfree(dev->ifalias);
 965                         dev->ifalias = NULL;
 966                 }
 967                 return 0;
 968         }
 969
 970         dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
 971         if (!dev->ifalias)
 972                 return -ENOMEM;
 973
 974         strlcpy(dev->ifalias, alias, len+1);
 975         return len;
 976 }
 977
 978
 979 /**
 980  *      netdev_features_change - device changes features
 981  *      @dev: device to cause notification
 982  *
 983  *      Called to indicate a device has changed features.
 984  */
 985 void netdev_features_change(struct net_device *dev)
 986 {
 987         call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 988 }
 989 EXPORT_SYMBOL(netdev_features_change);
 990
 991 /**
 992  *      netdev_state_change - device changes state
 993  *      @dev: device to cause notification
 994  *
 995  *      Called to indicate a device has changed state. This function calls
 996  *      the notifier chains for netdev_chain and sends a NEWLINK message
 997  *      to the routing socket.
 998  */
 999 void netdev_state_change(struct net_device *dev)
1000 {
1001         if (dev->flags & IFF_UP) {
1002                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1003                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1004         }
1005 }
1006
1007 void netdev_bonding_change(struct net_device *dev)
1008 {
1009         call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1010 }
1011 EXPORT_SYMBOL(netdev_bonding_change);
1012
1013 /**
1014  *      dev_load        - load a network module
1015  *      @net: the applicable net namespace
1016  *      @name: name of interface
1017  *
1018  *      If a network interface is not present and the process has suitable
1019  *      privileges this function loads the module. If module loading is not
1020  *      available in this kernel then it becomes a nop.
1021  */
1022
1023 void dev_load(struct net *net, const char *name)
1024 {
1025         struct net_device *dev;
1026
1027         read_lock(&dev_base_lock);
1028         dev = __dev_get_by_name(net, name);
1029         read_unlock(&dev_base_lock);
1030
1031         if (!dev && capable(CAP_SYS_MODULE))
1032                 request_module("%s", name);
1033 }
1034
1035 /**
1036  *      dev_open        - prepare an interface for use.
1037  *      @dev:   device to open
1038  *
1039  *      Takes a device from down to up state. The device's private open
1040  *      function is invoked and then the multicast lists are loaded. Finally
1041  *      the device is moved into the up state and a %NETDEV_UP message is
1042  *      sent to the netdev notifier chain.
1043  *
1044  *      Calling this function on an active interface is a nop. On a failure
1045  *      a negative errno code is returned.
1046  */
1047 int dev_open(struct net_device *dev)
1048 {
1049         const struct net_device_ops *ops = dev->netdev_ops;
1050         int ret = 0;
1051
1052         ASSERT_RTNL();
1053
1054         /*
1055          *      Is it already up?
1056          */
1057
1058         if (dev->flags & IFF_UP)
1059                 return 0;
1060
1061         /*
1062          *      Is it even present?
1063          */
1064         if (!netif_device_present(dev))
1065                 return -ENODEV;
1066
1067         /*
1068          *      Call device private open method
1069          */
1070         set_bit(__LINK_STATE_START, &dev->state);
1071
1072         if (ops->ndo_validate_addr)
1073                 ret = ops->ndo_validate_addr(dev);
1074
1075         if (!ret && ops->ndo_open)
1076                 ret = ops->ndo_open(dev);
1077
1078         /*
1079          *      If it went open OK then:
1080          */
1081
1082         if (ret)
1083                 clear_bit(__LINK_STATE_START, &dev->state);
1084         else {
1085                 /*
1086                  *      Set the flags.
1087                  */
1088                 dev->flags |= IFF_UP;
1089
1090                 /*
1091                  *      Enable NET_DMA
1092                  */
1093                 net_dmaengine_get();
1094
1095                 /*
1096                  *      Initialize multicasting status
1097                  */
1098                 dev_set_rx_mode(dev);
1099
1100                 /*
1101                  *      Wakeup transmit queue engine
1102                  */
1103                 dev_activate(dev);
1104
1105                 /*
1106                  *      ... and announce new interface.
1107                  */
1108                 call_netdevice_notifiers(NETDEV_UP, dev);
1109         }
1110
1111         return ret;
1112 }
1113
1114 /**
1115  *      dev_close - shutdown an interface.
1116  *      @dev: device to shutdown
1117  *
1118  *      This function moves an active device into down state. A
1119  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1120  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1121  *      chain.
1122  */
1123 int dev_close(struct net_device *dev)
1124 {
1125         const struct net_device_ops *ops = dev->netdev_ops;
1126         ASSERT_RTNL();
1127
1128         might_sleep();
1129
1130         if (!(dev->flags & IFF_UP))
1131                 return 0;
1132
1133         /*
1134          *      Tell people we are going down, so that they can
1135          *      prepare to death, when device is still operating.
1136          */
1137         call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1138
1139         clear_bit(__LINK_STATE_START, &dev->state);
1140
1141         /* Synchronize to scheduled poll. We cannot touch poll list,
1142          * it can be even on different cpu. So just clear netif_running().
1143          *
1144          * dev->stop() will invoke napi_disable() on all of it's
1145          * napi_struct instances on this device.
1146          */
1147         smp_mb__after_clear_bit(); /* Commit netif_running(). */
1148
1149         dev_deactivate(dev);
1150
1151         /*
1152          *      Call the device specific close. This cannot fail.
1153          *      Only if device is UP
1154          *
1155          *      We allow it to be called even after a DETACH hot-plug
1156          *      event.
1157          */
1158         if (ops->ndo_stop)
1159                 ops->ndo_stop(dev);
1160
1161         /*
1162          *      Device is now down.
1163          */
1164
1165         dev->flags &= ~IFF_UP;
1166
1167         /*
1168          * Tell people we are down
1169          */
1170         call_netdevice_notifiers(NETDEV_DOWN, dev);
1171
1172         /*
1173          *      Shutdown NET_DMA
1174          */
1175         net_dmaengine_put();
1176
1177         return 0;
1178 }
1179
1180
1181 /**
1182  *      dev_disable_lro - disable Large Receive Offload on a device
1183  *      @dev: device
1184  *
1185  *      Disable Large Receive Offload (LRO) on a net device.  Must be
1186  *      called under RTNL.  This is needed if received packets may be
1187  *      forwarded to another interface.
1188  */
1189 void dev_disable_lro(struct net_device *dev)
1190 {
1191         if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1192             dev->ethtool_ops->set_flags) {
1193                 u32 flags = dev->ethtool_ops->get_flags(dev);
1194                 if (flags & ETH_FLAG_LRO) {
1195                         flags &= ~ETH_FLAG_LRO;
1196                         dev->ethtool_ops->set_flags(dev, flags);
1197                 }
1198         }
1199         WARN_ON(dev->features & NETIF_F_LRO);
1200 }
1201 EXPORT_SYMBOL(dev_disable_lro);
1202
1203
1204 static int dev_boot_phase = 1;
1205
1206 /*
1207  *      Device change register/unregister. These are not inline or static
1208  *      as we export them to the world.
1209  */
1210
1211 /**
1212  *      register_netdevice_notifier - register a network notifier block
1213  *      @nb: notifier
1214  *
1215  *      Register a notifier to be called when network device events occur.
1216  *      The notifier passed is linked into the kernel structures and must
1217  *      not be reused until it has been unregistered. A negative errno code
1218  *      is returned on a failure.
1219  *
1220  *      When registered all registration and up events are replayed
1221  *      to the new notifier to allow device to have a race free
1222  *      view of the network device list.
1223  */
1224
1225 int register_netdevice_notifier(struct notifier_block *nb)
1226 {
1227         struct net_device *dev;
1228         struct net_device *last;
1229         struct net *net;
1230         int err;
1231
1232         rtnl_lock();
1233         err = raw_notifier_chain_register(&netdev_chain, nb);
1234         if (err)
1235                 goto unlock;
1236         if (dev_boot_phase)
1237                 goto unlock;
1238         for_each_net(net) {
1239                 for_each_netdev(net, dev) {
1240                         err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1241                         err = notifier_to_errno(err);
1242                         if (err)
1243                                 goto rollback;
1244
1245                         if (!(dev->flags & IFF_UP))
1246                                 continue;
1247
1248                         nb->notifier_call(nb, NETDEV_UP, dev);
1249                 }
1250         }
1251
1252 unlock:
1253         rtnl_unlock();
1254         return err;
1255
1256 rollback:
1257         last = dev;
1258         for_each_net(net) {
1259                 for_each_netdev(net, dev) {
1260                         if (dev == last)
1261                                 break;
1262
1263                         if (dev->flags & IFF_UP) {
1264                                 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1265                                 nb->notifier_call(nb, NETDEV_DOWN, dev);
1266                         }
1267                         nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1268                 }
1269         }
1270
1271         raw_notifier_chain_unregister(&netdev_chain, nb);
1272         goto unlock;
1273 }
1274
1275 /**
1276  *      unregister_netdevice_notifier - unregister a network notifier block
1277  *      @nb: notifier
1278  *
1279  *      Unregister a notifier previously registered by
1280  *      register_netdevice_notifier(). The notifier is unlinked into the
1281  *      kernel structures and may then be reused. A negative errno code
1282  *      is returned on a failure.
1283  */
1284
1285 int unregister_netdevice_notifier(struct notifier_block *nb)
1286 {
1287         int err;
1288
1289         rtnl_lock();
1290         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1291         rtnl_unlock();
1292         return err;
1293 }
1294
1295 /**
1296  *      call_netdevice_notifiers - call all network notifier blocks
1297  *      @val: value passed unmodified to notifier function
1298  *      @dev: net_device pointer passed unmodified to notifier function
1299  *
1300  *      Call all network notifier blocks.  Parameters and return value
1301  *      are as for raw_notifier_call_chain().
1302  */
1303
1304 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1305 {
1306         return raw_notifier_call_chain(&netdev_chain, val, dev);
1307 }
1308
1309 /* When > 0 there are consumers of rx skb time stamps */
1310 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1311
1312 void net_enable_timestamp(void)
1313 {
1314         atomic_inc(&netstamp_needed);
1315 }
1316
1317 void net_disable_timestamp(void)
1318 {
1319         atomic_dec(&netstamp_needed);
1320 }
1321
1322 static inline void net_timestamp(struct sk_buff *skb)
1323 {
1324         if (atomic_read(&netstamp_needed))
1325                 __net_timestamp(skb);
1326         else
1327                 skb->tstamp.tv64 = 0;
1328 }
1329
1330 /*
1331  *      Support routine. Sends outgoing frames to any network
1332  *      taps currently in use.
1333  */
1334
1335 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1336 {
1337         struct packet_type *ptype;
1338
1339 #ifdef CONFIG_NET_CLS_ACT
1340         if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1341                 net_timestamp(skb);
1342 #else
1343         net_timestamp(skb);
1344 #endif
1345
1346         rcu_read_lock();
1347         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1348                 /* Never send packets back to the socket
1349                  * they originated from - MvS (miquels@drinkel.ow.org)
1350                  */
1351                 if ((ptype->dev == dev || !ptype->dev) &&
1352                     (ptype->af_packet_priv == NULL ||
1353                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1354                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1355                         if (!skb2)
1356                                 break;
1357
1358                         /* skb->nh should be correctly
1359                            set by sender, so that the second statement is
1360                            just protection against buggy protocols.
1361                          */
1362                         skb_reset_mac_header(skb2);
1363
1364                         if (skb_network_header(skb2) < skb2->data ||
1365                             skb2->network_header > skb2->tail) {
1366                                 if (net_ratelimit())
1367                                         printk(KERN_CRIT "protocol %04x is "
1368                                                "buggy, dev %s\n",
1369                                                skb2->protocol, dev->name);
1370                                 skb_reset_network_header(skb2);
1371                         }
1372
1373                         skb2->transport_header = skb2->network_header;
1374                         skb2->pkt_type = PACKET_OUTGOING;
1375                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1376                 }
1377         }
1378         rcu_read_unlock();
1379 }
1380
1381
1382 static inline void __netif_reschedule(struct Qdisc *q)
1383 {
1384         struct softnet_data *sd;
1385         unsigned long flags;
1386
1387         local_irq_save(flags);
1388         sd = &__get_cpu_var(softnet_data);
1389         q->next_sched = sd->output_queue;
1390         sd->output_queue = q;
1391         raise_softirq_irqoff(NET_TX_SOFTIRQ);
1392         local_irq_restore(flags);
1393 }
1394
1395 void __netif_schedule(struct Qdisc *q)
1396 {
1397         if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1398                 __netif_reschedule(q);
1399 }
1400 EXPORT_SYMBOL(__netif_schedule);
1401
1402 void dev_kfree_skb_irq(struct sk_buff *skb)
1403 {
1404         if (atomic_dec_and_test(&skb->users)) {
1405                 struct softnet_data *sd;
1406                 unsigned long flags;
1407
1408                 local_irq_save(flags);
1409                 sd = &__get_cpu_var(softnet_data);
1410                 skb->next = sd->completion_queue;
1411                 sd->completion_queue = skb;
1412                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1413                 local_irq_restore(flags);
1414         }
1415 }
1416 EXPORT_SYMBOL(dev_kfree_skb_irq);
1417
1418 void dev_kfree_skb_any(struct sk_buff *skb)
1419 {
1420         if (in_irq() || irqs_disabled())
1421                 dev_kfree_skb_irq(skb);
1422         else
1423                 dev_kfree_skb(skb);
1424 }
1425 EXPORT_SYMBOL(dev_kfree_skb_any);
1426
1427
1428 /**
1429  * netif_device_detach - mark device as removed
1430  * @dev: network device
1431  *
1432  * Mark device as removed from system and therefore no longer available.
1433  */
1434 void netif_device_detach(struct net_device *dev)
1435 {
1436         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1437             netif_running(dev)) {
1438                 netif_tx_stop_all_queues(dev);
1439         }
1440 }
1441 EXPORT_SYMBOL(netif_device_detach);
1442
1443 /**
1444  * netif_device_attach - mark device as attached
1445  * @dev: network device
1446  *
1447  * Mark device as attached from system and restart if needed.
1448  */
1449 void netif_device_attach(struct net_device *dev)
1450 {
1451         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1452             netif_running(dev)) {
1453                 netif_tx_wake_all_queues(dev);
1454                 __netdev_watchdog_up(dev);
1455         }
1456 }
1457 EXPORT_SYMBOL(netif_device_attach);
1458
1459 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1460 {
1461         return ((features & NETIF_F_GEN_CSUM) ||
1462                 ((features & NETIF_F_IP_CSUM) &&
1463                  protocol == htons(ETH_P_IP)) ||
1464                 ((features & NETIF_F_IPV6_CSUM) &&
1465                  protocol == htons(ETH_P_IPV6)) ||
1466                 ((features & NETIF_F_FCOE_CRC) &&
1467                  protocol == htons(ETH_P_FCOE)));
1468 }
1469
1470 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1471 {
1472         if (can_checksum_protocol(dev->features, skb->protocol))
1473                 return true;
1474
1475         if (skb->protocol == htons(ETH_P_8021Q)) {
1476                 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1477                 if (can_checksum_protocol(dev->features & dev->vlan_features,
1478                                           veh->h_vlan_encapsulated_proto))
1479                         return true;
1480         }
1481
1482         return false;
1483 }
1484
1485 /*
1486  * Invalidate hardware checksum when packet is to be mangled, and
1487  * complete checksum manually on outgoing path.
1488  */
1489 int skb_checksum_help(struct sk_buff *skb)
1490 {
1491         __wsum csum;
1492         int ret = 0, offset;
1493
1494         if (skb->ip_summed == CHECKSUM_COMPLETE)
1495                 goto out_set_summed;
1496
1497         if (unlikely(skb_shinfo(skb)->gso_size)) {
1498                 /* Let GSO fix up the checksum. */
1499                 goto out_set_summed;
1500         }
1501
1502         offset = skb->csum_start - skb_headroom(skb);
1503         BUG_ON(offset >= skb_headlen(skb));
1504         csum = skb_checksum(skb, offset, skb->len - offset, 0);
1505
1506         offset += skb->csum_offset;
1507         BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1508
1509         if (skb_cloned(skb) &&
1510             !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1511                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1512                 if (ret)
1513                         goto out;
1514         }
1515
1516         *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1517 out_set_summed:
1518         skb->ip_summed = CHECKSUM_NONE;
1519 out:
1520         return ret;
1521 }
1522
1523 /**
1524  *      skb_gso_segment - Perform segmentation on skb.
1525  *      @skb: buffer to segment
1526  *      @features: features for the output path (see dev->features)
1527  *
1528  *      This function segments the given skb and returns a list of segments.
1529  *
1530  *      It may return NULL if the skb requires no segmentation.  This is
1531  *      only possible when GSO is used for verifying header integrity.
1532  */
1533 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1534 {
1535         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1536         struct packet_type *ptype;
1537         __be16 type = skb->protocol;
1538         int err;
1539
1540         skb_reset_mac_header(skb);
1541         skb->mac_len = skb->network_header - skb->mac_header;
1542         __skb_pull(skb, skb->mac_len);
1543
1544         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1545                 struct net_device *dev = skb->dev;
1546                 struct ethtool_drvinfo info = {};
1547
1548                 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1549                         dev->ethtool_ops->get_drvinfo(dev, &info);
1550
1551                 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1552                         "ip_summed=%d",
1553                      info.driver, dev ? dev->features : 0L,
1554                      skb->sk ? skb->sk->sk_route_caps : 0L,
1555                      skb->len, skb->data_len, skb->ip_summed);
1556
1557                 if (skb_header_cloned(skb) &&
1558                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1559                         return ERR_PTR(err);
1560         }
1561
1562         rcu_read_lock();
1563         list_for_each_entry_rcu(ptype,
1564                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1565                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1566                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1567                                 err = ptype->gso_send_check(skb);
1568                                 segs = ERR_PTR(err);
1569                                 if (err || skb_gso_ok(skb, features))
1570                                         break;
1571                                 __skb_push(skb, (skb->data -
1572                                                  skb_network_header(skb)));
1573                         }
1574                         segs = ptype->gso_segment(skb, features);
1575                         break;
1576                 }
1577         }
1578         rcu_read_unlock();
1579
1580         __skb_push(skb, skb->data - skb_mac_header(skb));
1581
1582         return segs;
1583 }
1584
1585 EXPORT_SYMBOL(skb_gso_segment);
1586
1587 /* Take action when hardware reception checksum errors are detected. */
1588 #ifdef CONFIG_BUG
1589 void netdev_rx_csum_fault(struct net_device *dev)
1590 {
1591         if (net_ratelimit()) {
1592                 printk(KERN_ERR "%s: hw csum failure.\n",
1593                         dev ? dev->name : "<unknown>");
1594                 dump_stack();
1595         }
1596 }
1597 EXPORT_SYMBOL(netdev_rx_csum_fault);
1598 #endif
1599
1600 /* Actually, we should eliminate this check as soon as we know, that:
1601  * 1. IOMMU is present and allows to map all the memory.
1602  * 2. No high memory really exists on this machine.
1603  */
1604
1605 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1606 {
1607 #ifdef CONFIG_HIGHMEM
1608         int i;
1609
1610         if (dev->features & NETIF_F_HIGHDMA)
1611                 return 0;
1612
1613         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1614                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1615                         return 1;
1616
1617 #endif
1618         return 0;
1619 }
1620
1621 struct dev_gso_cb {
1622         void (*destructor)(struct sk_buff *skb);
1623 };
1624
1625 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1626
1627 static void dev_gso_skb_destructor(struct sk_buff *skb)
1628 {
1629         struct dev_gso_cb *cb;
1630
1631         do {
1632                 struct sk_buff *nskb = skb->next;
1633
1634                 skb->next = nskb->next;
1635                 nskb->next = NULL;
1636                 kfree_skb(nskb);
1637         } while (skb->next);
1638
1639         cb = DEV_GSO_CB(skb);
1640         if (cb->destructor)
1641                 cb->destructor(skb);
1642 }
1643
1644 /**
1645  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1646  *      @skb: buffer to segment
1647  *
1648  *      This function segments the given skb and stores the list of segments
1649  *      in skb->next.
1650  */
1651 static int dev_gso_segment(struct sk_buff *skb)
1652 {
1653         struct net_device *dev = skb->dev;
1654         struct sk_buff *segs;
1655         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1656                                          NETIF_F_SG : 0);
1657
1658         segs = skb_gso_segment(skb, features);
1659
1660         /* Verifying header integrity only. */
1661         if (!segs)
1662                 return 0;
1663
1664         if (IS_ERR(segs))
1665                 return PTR_ERR(segs);
1666
1667         skb->next = segs;
1668         DEV_GSO_CB(skb)->destructor = skb->destructor;
1669         skb->destructor = dev_gso_skb_destructor;
1670
1671         return 0;
1672 }
1673
1674 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1675                         struct netdev_queue *txq)
1676 {
1677         const struct net_device_ops *ops = dev->netdev_ops;
1678         int rc;
1679
1680         if (likely(!skb->next)) {
1681                 if (!list_empty(&ptype_all))
1682                         dev_queue_xmit_nit(skb, dev);
1683
1684                 if (netif_needs_gso(dev, skb)) {
1685                         if (unlikely(dev_gso_segment(skb)))
1686                                 goto out_kfree_skb;
1687                         if (skb->next)
1688                                 goto gso;
1689                 }
1690
1691                 /*
1692                  * If device doesnt need skb->dst, release it right now while
1693                  * its hot in this cpu cache
1694                  */
1695                 if ((dev->priv_flags & IFF_XMIT_DST_RELEASE) && skb->dst) {
1696                         dst_release(skb->dst);
1697                         skb->dst = NULL;
1698                 }
1699                 rc = ops->ndo_start_xmit(skb, dev);
1700                 /*
1701                  * TODO: if skb_orphan() was called by
1702                  * dev->hard_start_xmit() (for example, the unmodified
1703                  * igb driver does that; bnx2 doesn't), then
1704                  * skb_tx_software_timestamp() will be unable to send
1705                  * back the time stamp.
1706                  *
1707                  * How can this be prevented? Always create another
1708                  * reference to the socket before calling
1709                  * dev->hard_start_xmit()? Prevent that skb_orphan()
1710                  * does anything in dev->hard_start_xmit() by clearing
1711                  * the skb destructor before the call and restoring it
1712                  * afterwards, then doing the skb_orphan() ourselves?
1713                  */
1714                 return rc;
1715         }
1716
1717 gso:
1718         do {
1719                 struct sk_buff *nskb = skb->next;
1720
1721                 skb->next = nskb->next;
1722                 nskb->next = NULL;
1723                 rc = ops->ndo_start_xmit(nskb, dev);
1724                 if (unlikely(rc)) {
1725                         nskb->next = skb->next;
1726                         skb->next = nskb;
1727                         return rc;
1728                 }
1729                 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1730                         return NETDEV_TX_BUSY;
1731         } while (skb->next);
1732
1733         skb->destructor = DEV_GSO_CB(skb)->destructor;
1734
1735 out_kfree_skb:
1736         kfree_skb(skb);
1737         return 0;
1738 }
1739
1740 static u32 skb_tx_hashrnd;
1741
1742 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1743 {
1744         u32 hash;
1745
1746         if (skb_rx_queue_recorded(skb)) {
1747                 hash = skb_get_rx_queue(skb);
1748                 while (unlikely (hash >= dev->real_num_tx_queues))
1749                         hash -= dev->real_num_tx_queues;
1750                 return hash;
1751         }
1752
1753         if (skb->sk && skb->sk->sk_hash)
1754                 hash = skb->sk->sk_hash;
1755         else
1756                 hash = skb->protocol;
1757
1758         hash = jhash_1word(hash, skb_tx_hashrnd);
1759
1760         return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1761 }
1762 EXPORT_SYMBOL(skb_tx_hash);
1763
1764 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1765                                         struct sk_buff *skb)
1766 {
1767         const struct net_device_ops *ops = dev->netdev_ops;
1768         u16 queue_index = 0;
1769
1770         if (ops->ndo_select_queue)
1771                 queue_index = ops->ndo_select_queue(dev, skb);
1772         else if (dev->real_num_tx_queues > 1)
1773                 queue_index = skb_tx_hash(dev, skb);
1774
1775         skb_set_queue_mapping(skb, queue_index);
1776         return netdev_get_tx_queue(dev, queue_index);
1777 }
1778
1779 /**
1780  *      dev_queue_xmit - transmit a buffer
1781  *      @skb: buffer to transmit
1782  *
1783  *      Queue a buffer for transmission to a network device. The caller must
1784  *      have set the device and priority and built the buffer before calling
1785  *      this function. The function can be called from an interrupt.
1786  *
1787  *      A negative errno code is returned on a failure. A success does not
1788  *      guarantee the frame will be transmitted as it may be dropped due
1789  *      to congestion or traffic shaping.
1790  *
1791  * -----------------------------------------------------------------------------------
1792  *      I notice this method can also return errors from the queue disciplines,
1793  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1794  *      be positive.
1795  *
1796  *      Regardless of the return value, the skb is consumed, so it is currently
1797  *      difficult to retry a send to this method.  (You can bump the ref count
1798  *      before sending to hold a reference for retry if you are careful.)
1799  *
1800  *      When calling this method, interrupts MUST be enabled.  This is because
1801  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1802  *          --BLG
1803  */
1804 int dev_queue_xmit(struct sk_buff *skb)
1805 {
1806         struct net_device *dev = skb->dev;
1807         struct netdev_queue *txq;
1808         struct Qdisc *q;
1809         int rc = -ENOMEM;
1810
1811         /* GSO will handle the following emulations directly. */
1812         if (netif_needs_gso(dev, skb))
1813                 goto gso;
1814
1815         if (skb_shinfo(skb)->frag_list &&
1816             !(dev->features & NETIF_F_FRAGLIST) &&
1817             __skb_linearize(skb))
1818                 goto out_kfree_skb;
1819
1820         /* Fragmented skb is linearized if device does not support SG,
1821          * or if at least one of fragments is in highmem and device
1822          * does not support DMA from it.
1823          */
1824         if (skb_shinfo(skb)->nr_frags &&
1825             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1826             __skb_linearize(skb))
1827                 goto out_kfree_skb;
1828
1829         /* If packet is not checksummed and device does not support
1830          * checksumming for this protocol, complete checksumming here.
1831          */
1832         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1833                 skb_set_transport_header(skb, skb->csum_start -
1834                                               skb_headroom(skb));
1835                 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1836                         goto out_kfree_skb;
1837         }
1838
1839 gso:
1840         /* Disable soft irqs for various locks below. Also
1841          * stops preemption for RCU.
1842          */
1843         rcu_read_lock_bh();
1844
1845         txq = dev_pick_tx(dev, skb);
1846         q = rcu_dereference(txq->qdisc);
1847
1848 #ifdef CONFIG_NET_CLS_ACT
1849         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1850 #endif
1851         if (q->enqueue) {
1852                 spinlock_t *root_lock = qdisc_lock(q);
1853
1854                 spin_lock(root_lock);
1855
1856                 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1857                         kfree_skb(skb);
1858                         rc = NET_XMIT_DROP;
1859                 } else {
1860                         rc = qdisc_enqueue_root(skb, q);
1861                         qdisc_run(q);
1862                 }
1863                 spin_unlock(root_lock);
1864
1865                 goto out;
1866         }
1867
1868         /* The device has no queue. Common case for software devices:
1869            loopback, all the sorts of tunnels...
1870
1871            Really, it is unlikely that netif_tx_lock protection is necessary
1872            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1873            counters.)
1874            However, it is possible, that they rely on protection
1875            made by us here.
1876
1877            Check this and shot the lock. It is not prone from deadlocks.
1878            Either shot noqueue qdisc, it is even simpler 8)
1879          */
1880         if (dev->flags & IFF_UP) {
1881                 int cpu = smp_processor_id(); /* ok because BHs are off */
1882
1883                 if (txq->xmit_lock_owner != cpu) {
1884
1885                         HARD_TX_LOCK(dev, txq, cpu);
1886
1887                         if (!netif_tx_queue_stopped(txq)) {
1888                                 rc = 0;
1889                                 if (!dev_hard_start_xmit(skb, dev, txq)) {
1890                                         HARD_TX_UNLOCK(dev, txq);
1891                                         goto out;
1892                                 }
1893                         }
1894                         HARD_TX_UNLOCK(dev, txq);
1895                         if (net_ratelimit())
1896                                 printk(KERN_CRIT "Virtual device %s asks to "
1897                                        "queue packet!\n", dev->name);
1898                 } else {
1899                         /* Recursion is detected! It is possible,
1900                          * unfortunately */
1901                         if (net_ratelimit())
1902                                 printk(KERN_CRIT "Dead loop on virtual device "
1903                                        "%s, fix it urgently!\n", dev->name);
1904                 }
1905         }
1906
1907         rc = -ENETDOWN;
1908         rcu_read_unlock_bh();
1909
1910 out_kfree_skb:
1911         kfree_skb(skb);
1912         return rc;
1913 out:
1914         rcu_read_unlock_bh();
1915         return rc;
1916 }
1917
1918
1919 /*=======================================================================
1920                         Receiver routines
1921   =======================================================================*/
1922
1923 int netdev_max_backlog __read_mostly = 1000;
1924 int netdev_budget __read_mostly = 300;
1925 int weight_p __read_mostly = 64;            /* old backlog weight */
1926
1927 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1928
1929
1930 /**
1931  *      netif_rx        -       post buffer to the network code
1932  *      @skb: buffer to post
1933  *
1934  *      This function receives a packet from a device driver and queues it for
1935  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1936  *      may be dropped during processing for congestion control or by the
1937  *      protocol layers.
1938  *
1939  *      return values:
1940  *      NET_RX_SUCCESS  (no congestion)
1941  *      NET_RX_DROP     (packet was dropped)
1942  *
1943  */
1944
1945 int netif_rx(struct sk_buff *skb)
1946 {
1947         struct softnet_data *queue;
1948         unsigned long flags;
1949
1950         /* if netpoll wants it, pretend we never saw it */
1951         if (netpoll_rx(skb))
1952                 return NET_RX_DROP;
1953
1954         if (!skb->tstamp.tv64)
1955                 net_timestamp(skb);
1956
1957         /*
1958          * The code is rearranged so that the path is the most
1959          * short when CPU is congested, but is still operating.
1960          */
1961         local_irq_save(flags);
1962         queue = &__get_cpu_var(softnet_data);
1963
1964         __get_cpu_var(netdev_rx_stat).total++;
1965         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1966                 if (queue->input_pkt_queue.qlen) {
1967 enqueue:
1968                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1969                         local_irq_restore(flags);
1970                         return NET_RX_SUCCESS;
1971                 }
1972
1973                 napi_schedule(&queue->backlog);
1974                 goto enqueue;
1975         }
1976
1977         __get_cpu_var(netdev_rx_stat).dropped++;
1978         local_irq_restore(flags);
1979
1980         kfree_skb(skb);
1981         return NET_RX_DROP;
1982 }
1983
1984 int netif_rx_ni(struct sk_buff *skb)
1985 {
1986         int err;
1987
1988         preempt_disable();
1989         err = netif_rx(skb);
1990         if (local_softirq_pending())
1991                 do_softirq();
1992         preempt_enable();
1993
1994         return err;
1995 }
1996
1997 EXPORT_SYMBOL(netif_rx_ni);
1998
1999 static void net_tx_action(struct softirq_action *h)
2000 {
2001         struct softnet_data *sd = &__get_cpu_var(softnet_data);
2002
2003         if (sd->completion_queue) {
2004                 struct sk_buff *clist;
2005
2006                 local_irq_disable();
2007                 clist = sd->completion_queue;
2008                 sd->completion_queue = NULL;
2009                 local_irq_enable();
2010
2011                 while (clist) {
2012                         struct sk_buff *skb = clist;
2013                         clist = clist->next;
2014
2015                         WARN_ON(atomic_read(&skb->users));
2016                         __kfree_skb(skb);
2017                 }
2018         }
2019
2020         if (sd->output_queue) {
2021                 struct Qdisc *head;
2022
2023                 local_irq_disable();
2024                 head = sd->output_queue;
2025                 sd->output_queue = NULL;
2026                 local_irq_enable();
2027
2028                 while (head) {
2029                         struct Qdisc *q = head;
2030                         spinlock_t *root_lock;
2031
2032                         head = head->next_sched;
2033
2034                         root_lock = qdisc_lock(q);
2035                         if (spin_trylock(root_lock)) {
2036                                 smp_mb__before_clear_bit();
2037                                 clear_bit(__QDISC_STATE_SCHED,
2038                                           &q->state);
2039                                 qdisc_run(q);
2040                                 spin_unlock(root_lock);
2041                         } else {
2042                                 if (!test_bit(__QDISC_STATE_DEACTIVATED,
2043                                               &q->state)) {
2044                                         __netif_reschedule(q);
2045                                 } else {
2046                                         smp_mb__before_clear_bit();
2047                                         clear_bit(__QDISC_STATE_SCHED,
2048                                                   &q->state);
2049                                 }
2050                         }
2051                 }
2052         }
2053 }
2054
2055 static inline int deliver_skb(struct sk_buff *skb,
2056                               struct packet_type *pt_prev,
2057                               struct net_device *orig_dev)
2058 {
2059         atomic_inc(&skb->users);
2060         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2061 }
2062
2063 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2064 /* These hooks defined here for ATM */
2065 struct net_bridge;
2066 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2067                                                 unsigned char *addr);
2068 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2069
2070 /*
2071  * If bridge module is loaded call bridging hook.
2072  *  returns NULL if packet was consumed.
2073  */
2074 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2075                                         struct sk_buff *skb) __read_mostly;
2076 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2077                                             struct packet_type **pt_prev, int *ret,
2078                                             struct net_device *orig_dev)
2079 {
2080         struct net_bridge_port *port;
2081
2082         if (skb->pkt_type == PACKET_LOOPBACK ||
2083             (port = rcu_dereference(skb->dev->br_port)) == NULL)
2084                 return skb;
2085
2086         if (*pt_prev) {
2087                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2088                 *pt_prev = NULL;
2089         }
2090
2091         return br_handle_frame_hook(port, skb);
2092 }
2093 #else
2094 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
2095 #endif
2096
2097 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2098 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2099 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2100
2101 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2102                                              struct packet_type **pt_prev,
2103                                              int *ret,
2104                                              struct net_device *orig_dev)
2105 {
2106         if (skb->dev->macvlan_port == NULL)
2107                 return skb;
2108
2109         if (*pt_prev) {
2110                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2111                 *pt_prev = NULL;
2112         }
2113         return macvlan_handle_frame_hook(skb);
2114 }
2115 #else
2116 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
2117 #endif
2118
2119 #ifdef CONFIG_NET_CLS_ACT
2120 /* TODO: Maybe we should just force sch_ingress to be compiled in
2121  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2122  * a compare and 2 stores extra right now if we dont have it on
2123  * but have CONFIG_NET_CLS_ACT
2124  * NOTE: This doesnt stop any functionality; if you dont have
2125  * the ingress scheduler, you just cant add policies on ingress.
2126  *
2127  */
2128 static int ing_filter(struct sk_buff *skb)
2129 {
2130         struct net_device *dev = skb->dev;
2131         u32 ttl = G_TC_RTTL(skb->tc_verd);
2132         struct netdev_queue *rxq;
2133         int result = TC_ACT_OK;
2134         struct Qdisc *q;
2135
2136         if (MAX_RED_LOOP < ttl++) {
2137                 printk(KERN_WARNING
2138                        "Redir loop detected Dropping packet (%d->%d)\n",
2139                        skb->iif, dev->ifindex);
2140                 return TC_ACT_SHOT;
2141         }
2142
2143         skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2144         skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2145
2146         rxq = &dev->rx_queue;
2147
2148         q = rxq->qdisc;
2149         if (q != &noop_qdisc) {
2150                 spin_lock(qdisc_lock(q));
2151                 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2152                         result = qdisc_enqueue_root(skb, q);
2153                 spin_unlock(qdisc_lock(q));
2154         }
2155
2156         return result;
2157 }
2158
2159 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2160                                          struct packet_type **pt_prev,
2161                                          int *ret, struct net_device *orig_dev)
2162 {
2163         if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2164                 goto out;
2165
2166         if (*pt_prev) {
2167                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2168                 *pt_prev = NULL;
2169         } else {
2170                 /* Huh? Why does turning on AF_PACKET affect this? */
2171                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2172         }
2173
2174         switch (ing_filter(skb)) {
2175         case TC_ACT_SHOT:
2176         case TC_ACT_STOLEN:
2177                 kfree_skb(skb);
2178                 return NULL;
2179         }
2180
2181 out:
2182         skb->tc_verd = 0;
2183         return skb;
2184 }
2185 #endif
2186
2187 /*
2188  *      netif_nit_deliver - deliver received packets to network taps
2189  *      @skb: buffer
2190  *
2191  *      This function is used to deliver incoming packets to network
2192  *      taps. It should be used when the normal netif_receive_skb path
2193  *      is bypassed, for example because of VLAN acceleration.
2194  */
2195 void netif_nit_deliver(struct sk_buff *skb)
2196 {
2197         struct packet_type *ptype;
2198
2199         if (list_empty(&ptype_all))
2200                 return;
2201
2202         skb_reset_network_header(skb);
2203         skb_reset_transport_header(skb);
2204         skb->mac_len = skb->network_header - skb->mac_header;
2205
2206         rcu_read_lock();
2207         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2208                 if (!ptype->dev || ptype->dev == skb->dev)
2209                         deliver_skb(skb, ptype, skb->dev);
2210         }
2211         rcu_read_unlock();
2212 }
2213
2214 /**
2215  *      netif_receive_skb - process receive buffer from network
2216  *      @skb: buffer to process
2217  *
2218  *      netif_receive_skb() is the main receive data processing function.
2219  *      It always succeeds. The buffer may be dropped during processing
2220  *      for congestion control or by the protocol layers.
2221  *
2222  *      This function may only be called from softirq context and interrupts
2223  *      should be enabled.
2224  *
2225  *      Return values (usually ignored):
2226  *      NET_RX_SUCCESS: no congestion
2227  *      NET_RX_DROP: packet was dropped
2228  */
2229 int netif_receive_skb(struct sk_buff *skb)
2230 {
2231         struct packet_type *ptype, *pt_prev;
2232         struct net_device *orig_dev;
2233         struct net_device *null_or_orig;
2234         int ret = NET_RX_DROP;
2235         __be16 type;
2236
2237         if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2238                 return NET_RX_SUCCESS;
2239
2240         /* if we've gotten here through NAPI, check netpoll */
2241         if (netpoll_receive_skb(skb))
2242                 return NET_RX_DROP;
2243
2244         if (!skb->tstamp.tv64)
2245                 net_timestamp(skb);
2246
2247         if (!skb->iif)
2248                 skb->iif = skb->dev->ifindex;
2249
2250         null_or_orig = NULL;
2251         orig_dev = skb->dev;
2252         if (orig_dev->master) {
2253                 if (skb_bond_should_drop(skb))
2254                         null_or_orig = orig_dev; /* deliver only exact match */
2255                 else
2256                         skb->dev = orig_dev->master;
2257         }
2258
2259         __get_cpu_var(netdev_rx_stat).total++;
2260
2261         skb_reset_network_header(skb);
2262         skb_reset_transport_header(skb);
2263         skb->mac_len = skb->network_header - skb->mac_header;
2264
2265         pt_prev = NULL;
2266
2267         rcu_read_lock();
2268
2269 #ifdef CONFIG_NET_CLS_ACT
2270         if (skb->tc_verd & TC_NCLS) {
2271                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2272                 goto ncls;
2273         }
2274 #endif
2275
2276         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2277                 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2278                     ptype->dev == orig_dev) {
2279                         if (pt_prev)
2280                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2281                         pt_prev = ptype;
2282                 }
2283         }
2284
2285 #ifdef CONFIG_NET_CLS_ACT
2286         skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2287         if (!skb)
2288                 goto out;
2289 ncls:
2290 #endif
2291
2292         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2293         if (!skb)
2294                 goto out;
2295         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2296         if (!skb)
2297                 goto out;
2298
2299         skb_orphan(skb);
2300
2301         type = skb->protocol;
2302         list_for_each_entry_rcu(ptype,
2303                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2304                 if (ptype->type == type &&
2305                     (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2306                      ptype->dev == orig_dev)) {
2307                         if (pt_prev)
2308                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2309                         pt_prev = ptype;
2310                 }
2311         }
2312
2313         if (pt_prev) {
2314                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2315         } else {
2316                 kfree_skb(skb);
2317                 /* Jamal, now you will not able to escape explaining
2318                  * me how you were going to use this. :-)
2319                  */
2320                 ret = NET_RX_DROP;
2321         }
2322
2323 out:
2324         rcu_read_unlock();
2325         return ret;
2326 }
2327
2328 /* Network device is going away, flush any packets still pending  */
2329 static void flush_backlog(void *arg)
2330 {
2331         struct net_device *dev = arg;
2332         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2333         struct sk_buff *skb, *tmp;
2334
2335         skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2336                 if (skb->dev == dev) {
2337                         __skb_unlink(skb, &queue->input_pkt_queue);
2338                         kfree_skb(skb);
2339                 }
2340 }
2341
2342 static int napi_gro_complete(struct sk_buff *skb)
2343 {
2344         struct packet_type *ptype;
2345         __be16 type = skb->protocol;
2346         struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2347         int err = -ENOENT;
2348
2349         if (NAPI_GRO_CB(skb)->count == 1) {
2350                 skb_shinfo(skb)->gso_size = 0;
2351                 goto out;
2352         }
2353
2354         rcu_read_lock();
2355         list_for_each_entry_rcu(ptype, head, list) {
2356                 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2357                         continue;
2358
2359                 err = ptype->gro_complete(skb);
2360                 break;
2361         }
2362         rcu_read_unlock();
2363
2364         if (err) {
2365                 WARN_ON(&ptype->list == head);
2366                 kfree_skb(skb);
2367                 return NET_RX_SUCCESS;
2368         }
2369
2370 out:
2371         return netif_receive_skb(skb);
2372 }
2373
2374 void napi_gro_flush(struct napi_struct *napi)
2375 {
2376         struct sk_buff *skb, *next;
2377
2378         for (skb = napi->gro_list; skb; skb = next) {
2379                 next = skb->next;
2380                 skb->next = NULL;
2381                 napi_gro_complete(skb);
2382         }
2383
2384         napi->gro_count = 0;
2385         napi->gro_list = NULL;
2386 }
2387 EXPORT_SYMBOL(napi_gro_flush);
2388
2389 void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
2390 {
2391         unsigned int offset = skb_gro_offset(skb);
2392
2393         hlen += offset;
2394         if (unlikely(skb_headlen(skb) ||
2395                      skb_shinfo(skb)->frags[0].size < hlen ||
2396                      PageHighMem(skb_shinfo(skb)->frags[0].page)))
2397                 return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
2398
2399         return page_address(skb_shinfo(skb)->frags[0].page) +
2400                skb_shinfo(skb)->frags[0].page_offset + offset;
2401 }
2402 EXPORT_SYMBOL(skb_gro_header);
2403
2404 int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2405 {
2406         struct sk_buff **pp = NULL;
2407         struct packet_type *ptype;
2408         __be16 type = skb->protocol;
2409         struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2410         int same_flow;
2411         int mac_len;
2412         int ret;
2413
2414         if (!(skb->dev->features & NETIF_F_GRO))
2415                 goto normal;
2416
2417         if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
2418                 goto normal;
2419
2420         rcu_read_lock();
2421         list_for_each_entry_rcu(ptype, head, list) {
2422                 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2423                         continue;
2424
2425                 skb_set_network_header(skb, skb_gro_offset(skb));
2426                 mac_len = skb->network_header - skb->mac_header;
2427                 skb->mac_len = mac_len;
2428                 NAPI_GRO_CB(skb)->same_flow = 0;
2429                 NAPI_GRO_CB(skb)->flush = 0;
2430                 NAPI_GRO_CB(skb)->free = 0;
2431
2432                 pp = ptype->gro_receive(&napi->gro_list, skb);
2433                 break;
2434         }
2435         rcu_read_unlock();
2436
2437         if (&ptype->list == head)
2438                 goto normal;
2439
2440         same_flow = NAPI_GRO_CB(skb)->same_flow;
2441         ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2442
2443         if (pp) {
2444                 struct sk_buff *nskb = *pp;
2445
2446                 *pp = nskb->next;
2447                 nskb->next = NULL;
2448                 napi_gro_complete(nskb);
2449                 napi->gro_count--;
2450         }
2451
2452         if (same_flow)
2453                 goto ok;
2454
2455         if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2456                 goto normal;
2457
2458         napi->gro_count++;
2459         NAPI_GRO_CB(skb)->count = 1;
2460         skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2461         skb->next = napi->gro_list;
2462         napi->gro_list = skb;
2463         ret = GRO_HELD;
2464
2465 pull:
2466         if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
2467                 if (napi->gro_list == skb)
2468                         napi->gro_list = skb->next;
2469                 ret = GRO_DROP;
2470         }
2471
2472 ok:
2473         return ret;
2474
2475 normal:
2476         ret = GRO_NORMAL;
2477         goto pull;
2478 }
2479 EXPORT_SYMBOL(dev_gro_receive);
2480
2481 static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2482 {
2483         struct sk_buff *p;
2484
2485         if (netpoll_rx_on(skb))
2486                 return GRO_NORMAL;
2487
2488         for (p = napi->gro_list; p; p = p->next) {
2489                 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2490                         && !compare_ether_header(skb_mac_header(p),
2491                                                  skb_gro_mac_header(skb));
2492                 NAPI_GRO_CB(p)->flush = 0;
2493         }
2494
2495         return dev_gro_receive(napi, skb);
2496 }
2497
2498 int napi_skb_finish(int ret, struct sk_buff *skb)
2499 {
2500         int err = NET_RX_SUCCESS;
2501
2502         switch (ret) {
2503         case GRO_NORMAL:
2504                 return netif_receive_skb(skb);
2505
2506         case GRO_DROP:
2507                 err = NET_RX_DROP;
2508                 /* fall through */
2509
2510         case GRO_MERGED_FREE:
2511                 kfree_skb(skb);
2512                 break;
2513         }
2514
2515         return err;
2516 }
2517 EXPORT_SYMBOL(napi_skb_finish);
2518
2519 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2520 {
2521         skb_gro_reset_offset(skb);
2522
2523         return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2524 }
2525 EXPORT_SYMBOL(napi_gro_receive);
2526
2527 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2528 {
2529         __skb_pull(skb, skb_headlen(skb));
2530         skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2531
2532         napi->skb = skb;
2533 }
2534 EXPORT_SYMBOL(napi_reuse_skb);
2535
2536 struct sk_buff *napi_get_frags(struct napi_struct *napi)
2537 {
2538         struct net_device *dev = napi->dev;
2539         struct sk_buff *skb = napi->skb;
2540
2541         if (!skb) {
2542                 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2543                 if (!skb)
2544                         goto out;
2545
2546                 skb_reserve(skb, NET_IP_ALIGN);
2547
2548                 napi->skb = skb;
2549         }
2550
2551 out:
2552         return skb;
2553 }
2554 EXPORT_SYMBOL(napi_get_frags);
2555
2556 int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2557 {
2558         int err = NET_RX_SUCCESS;
2559
2560         switch (ret) {
2561         case GRO_NORMAL:
2562         case GRO_HELD:
2563                 skb->protocol = eth_type_trans(skb, napi->dev);
2564
2565                 if (ret == GRO_NORMAL)
2566                         return netif_receive_skb(skb);
2567
2568                 skb_gro_pull(skb, -ETH_HLEN);
2569                 break;
2570
2571         case GRO_DROP:
2572                 err = NET_RX_DROP;
2573                 /* fall through */
2574
2575         case GRO_MERGED_FREE:
2576                 napi_reuse_skb(napi, skb);
2577                 break;
2578         }
2579
2580         return err;
2581 }
2582 EXPORT_SYMBOL(napi_frags_finish);
2583
2584 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2585 {
2586         struct sk_buff *skb = napi->skb;
2587         struct ethhdr *eth;
2588
2589         napi->skb = NULL;
2590
2591         skb_reset_mac_header(skb);
2592         skb_gro_reset_offset(skb);
2593
2594         eth = skb_gro_header(skb, sizeof(*eth));
2595         if (!eth) {
2596                 napi_reuse_skb(napi, skb);
2597                 skb = NULL;
2598                 goto out;
2599         }
2600
2601         skb_gro_pull(skb, sizeof(*eth));
2602
2603         /*
2604          * This works because the only protocols we care about don't require
2605          * special handling.  We'll fix it up properly at the end.
2606          */
2607         skb->protocol = eth->h_proto;
2608
2609 out:
2610         return skb;
2611 }
2612 EXPORT_SYMBOL(napi_frags_skb);
2613
2614 int napi_gro_frags(struct napi_struct *napi)
2615 {
2616         struct sk_buff *skb = napi_frags_skb(napi);
2617
2618         if (!skb)
2619                 return NET_RX_DROP;
2620
2621         return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2622 }
2623 EXPORT_SYMBOL(napi_gro_frags);
2624
2625 static int process_backlog(struct napi_struct *napi, int quota)
2626 {
2627         int work = 0;
2628         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2629         unsigned long start_time = jiffies;
2630
2631         napi->weight = weight_p;
2632         do {
2633                 struct sk_buff *skb;
2634
2635                 local_irq_disable();
2636                 skb = __skb_dequeue(&queue->input_pkt_queue);
2637                 if (!skb) {
2638                         __napi_complete(napi);
2639                         local_irq_enable();
2640                         break;
2641                 }
2642                 local_irq_enable();
2643
2644                 netif_receive_skb(skb);
2645         } while (++work < quota && jiffies == start_time);
2646
2647         return work;
2648 }
2649
2650 /**
2651  * __napi_schedule - schedule for receive
2652  * @n: entry to schedule
2653  *
2654  * The entry's receive function will be scheduled to run
2655  */
2656 void __napi_schedule(struct napi_struct *n)
2657 {
2658         unsigned long flags;
2659
2660         local_irq_save(flags);
2661         list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2662         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2663         local_irq_restore(flags);
2664 }
2665 EXPORT_SYMBOL(__napi_schedule);
2666
2667 void __napi_complete(struct napi_struct *n)
2668 {
2669         BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2670         BUG_ON(n->gro_list);
2671
2672         list_del(&n->poll_list);
2673         smp_mb__before_clear_bit();
2674         clear_bit(NAPI_STATE_SCHED, &n->state);
2675 }
2676 EXPORT_SYMBOL(__napi_complete);
2677
2678 void napi_complete(struct napi_struct *n)
2679 {
2680         unsigned long flags;
2681
2682         /*
2683          * don't let napi dequeue from the cpu poll list
2684          * just in case its running on a different cpu
2685          */
2686         if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2687                 return;
2688
2689         napi_gro_flush(n);
2690         local_irq_save(flags);
2691         __napi_complete(n);
2692         local_irq_restore(flags);
2693 }
2694 EXPORT_SYMBOL(napi_complete);
2695
2696 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2697                     int (*poll)(struct napi_struct *, int), int weight)
2698 {
2699         INIT_LIST_HEAD(&napi->poll_list);
2700         napi->gro_count = 0;
2701         napi->gro_list = NULL;
2702         napi->skb = NULL;
2703         napi->poll = poll;
2704         napi->weight = weight;
2705         list_add(&napi->dev_list, &dev->napi_list);
2706         napi->dev = dev;
2707 #ifdef CONFIG_NETPOLL
2708         spin_lock_init(&napi->poll_lock);
2709         napi->poll_owner = -1;
2710 #endif
2711         set_bit(NAPI_STATE_SCHED, &napi->state);
2712 }
2713 EXPORT_SYMBOL(netif_napi_add);
2714
2715 void netif_napi_del(struct napi_struct *napi)
2716 {
2717         struct sk_buff *skb, *next;
2718
2719         list_del_init(&napi->dev_list);
2720         napi_free_frags(napi);
2721
2722         for (skb = napi->gro_list; skb; skb = next) {
2723                 next = skb->next;
2724                 skb->next = NULL;
2725                 kfree_skb(skb);
2726         }
2727
2728         napi->gro_list = NULL;
2729         napi->gro_count = 0;
2730 }
2731 EXPORT_SYMBOL(netif_napi_del);
2732
2733
2734 static void net_rx_action(struct softirq_action *h)
2735 {
2736         struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2737         unsigned long time_limit = jiffies + 2;
2738         int budget = netdev_budget;
2739         void *have;
2740
2741         local_irq_disable();
2742
2743         while (!list_empty(list)) {
2744                 struct napi_struct *n;
2745                 int work, weight;
2746
2747                 /* If softirq window is exhuasted then punt.
2748                  * Allow this to run for 2 jiffies since which will allow
2749                  * an average latency of 1.5/HZ.
2750                  */
2751                 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2752                         goto softnet_break;
2753
2754                 local_irq_enable();
2755
2756                 /* Even though interrupts have been re-enabled, this
2757                  * access is safe because interrupts can only add new
2758                  * entries to the tail of this list, and only ->poll()
2759                  * calls can remove this head entry from the list.
2760                  */
2761                 n = list_entry(list->next, struct napi_struct, poll_list);
2762
2763                 have = netpoll_poll_lock(n);
2764
2765                 weight = n->weight;
2766
2767                 /* This NAPI_STATE_SCHED test is for avoiding a race
2768                  * with netpoll's poll_napi().  Only the entity which
2769                  * obtains the lock and sees NAPI_STATE_SCHED set will
2770                  * actually make the ->poll() call.  Therefore we avoid
2771                  * accidently calling ->poll() when NAPI is not scheduled.
2772                  */
2773                 work = 0;
2774                 if (test_bit(NAPI_STATE_SCHED, &n->state))
2775                         work = n->poll(n, weight);
2776
2777                 WARN_ON_ONCE(work > weight);
2778
2779                 budget -= work;
2780
2781                 local_irq_disable();
2782
2783                 /* Drivers must not modify the NAPI state if they
2784                  * consume the entire weight.  In such cases this code
2785                  * still "owns" the NAPI instance and therefore can
2786                  * move the instance around on the list at-will.
2787                  */
2788                 if (unlikely(work == weight)) {
2789                         if (unlikely(napi_disable_pending(n)))
2790                                 __napi_complete(n);
2791                         else
2792                                 list_move_tail(&n->poll_list, list);
2793                 }
2794
2795                 netpoll_poll_unlock(have);
2796         }
2797 out:
2798         local_irq_enable();
2799
2800 #ifdef CONFIG_NET_DMA
2801         /*
2802          * There may not be any more sk_buffs coming right now, so push
2803          * any pending DMA copies to hardware
2804          */
2805         dma_issue_pending_all();
2806 #endif
2807
2808         return;
2809
2810 softnet_break:
2811         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2812         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2813         goto out;
2814 }
2815
2816 static gifconf_func_t * gifconf_list [NPROTO];
2817
2818 /**
2819  *      register_gifconf        -       register a SIOCGIF handler
2820  *      @family: Address family
2821  *      @gifconf: Function handler
2822  *
2823  *      Register protocol dependent address dumping routines. The handler
2824  *      that is passed must not be freed or reused until it has been replaced
2825  *      by another handler.
2826  */
2827 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2828 {
2829         if (family >= NPROTO)
2830                 return -EINVAL;
2831         gifconf_list[family] = gifconf;
2832         return 0;
2833 }
2834
2835
2836 /*
2837  *      Map an interface index to its name (SIOCGIFNAME)
2838  */
2839
2840 /*
2841  *      We need this ioctl for efficient implementation of the
2842  *      if_indextoname() function required by the IPv6 API.  Without
2843  *      it, we would have to search all the interfaces to find a
2844  *      match.  --pb
2845  */
2846
2847 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2848 {
2849         struct net_device *dev;
2850         struct ifreq ifr;
2851
2852         /*
2853          *      Fetch the caller's info block.
2854          */
2855
2856         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2857                 return -EFAULT;
2858
2859         read_lock(&dev_base_lock);
2860         dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2861         if (!dev) {
2862                 read_unlock(&dev_base_lock);
2863                 return -ENODEV;
2864         }
2865
2866         strcpy(ifr.ifr_name, dev->name);
2867         read_unlock(&dev_base_lock);
2868
2869         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2870                 return -EFAULT;
2871         return 0;
2872 }
2873
2874 /*
2875  *      Perform a SIOCGIFCONF call. This structure will change
2876  *      size eventually, and there is nothing I can do about it.
2877  *      Thus we will need a 'compatibility mode'.
2878  */
2879
2880 static int dev_ifconf(struct net *net, char __user *arg)
2881 {
2882         struct ifconf ifc;
2883         struct net_device *dev;
2884         char __user *pos;
2885         int len;
2886         int total;
2887         int i;
2888
2889         /*
2890          *      Fetch the caller's info block.
2891          */
2892
2893         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2894                 return -EFAULT;
2895
2896         pos = ifc.ifc_buf;
2897         len = ifc.ifc_len;
2898
2899         /*
2900          *      Loop over the interfaces, and write an info block for each.
2901          */
2902
2903         total = 0;
2904         for_each_netdev(net, dev) {
2905                 for (i = 0; i < NPROTO; i++) {
2906                         if (gifconf_list[i]) {
2907                                 int done;
2908                                 if (!pos)
2909                                         done = gifconf_list[i](dev, NULL, 0);
2910                                 else
2911                                         done = gifconf_list[i](dev, pos + total,
2912                                                                len - total);
2913                                 if (done < 0)
2914                                         return -EFAULT;
2915                                 total += done;
2916                         }
2917                 }
2918         }
2919
2920         /*
2921          *      All done.  Write the updated control block back to the caller.
2922          */
2923         ifc.ifc_len = total;
2924
2925         /*
2926          *      Both BSD and Solaris return 0 here, so we do too.
2927          */
2928         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2929 }
2930
2931 #ifdef CONFIG_PROC_FS
2932 /*
2933  *      This is invoked by the /proc filesystem handler to display a device
2934  *      in detail.
2935  */
2936 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2937         __acquires(dev_base_lock)
2938 {
2939         struct net *net = seq_file_net(seq);
2940         loff_t off;
2941         struct net_device *dev;
2942
2943         read_lock(&dev_base_lock);
2944         if (!*pos)
2945                 return SEQ_START_TOKEN;
2946
2947         off = 1;
2948         for_each_netdev(net, dev)
2949                 if (off++ == *pos)
2950                         return dev;
2951
2952         return NULL;
2953 }
2954
2955 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2956 {
2957         struct net *net = seq_file_net(seq);
2958         ++*pos;
2959         return v == SEQ_START_TOKEN ?
2960                 first_net_device(net) : next_net_device((struct net_device *)v);
2961 }
2962
2963 void dev_seq_stop(struct seq_file *seq, void *v)
2964         __releases(dev_base_lock)
2965 {
2966         read_unlock(&dev_base_lock);
2967 }
2968
2969 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2970 {
2971         const struct net_device_stats *stats = dev_get_stats(dev);
2972
2973         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2974                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2975                    dev->name, stats->rx_bytes, stats->rx_packets,
2976                    stats->rx_errors,
2977                    stats->rx_dropped + stats->rx_missed_errors,
2978                    stats->rx_fifo_errors,
2979                    stats->rx_length_errors + stats->rx_over_errors +
2980                     stats->rx_crc_errors + stats->rx_frame_errors,
2981                    stats->rx_compressed, stats->multicast,
2982                    stats->tx_bytes, stats->tx_packets,
2983                    stats->tx_errors, stats->tx_dropped,
2984                    stats->tx_fifo_errors, stats->collisions,
2985                    stats->tx_carrier_errors +
2986                     stats->tx_aborted_errors +
2987                     stats->tx_window_errors +
2988                     stats->tx_heartbeat_errors,
2989                    stats->tx_compressed);
2990 }
2991
2992 /*
2993  *      Called from the PROCfs module. This now uses the new arbitrary sized
2994  *      /proc/net interface to create /proc/net/dev
2995  */
2996 static int dev_seq_show(struct seq_file *seq, void *v)
2997 {
2998         if (v == SEQ_START_TOKEN)
2999                 seq_puts(seq, "Inter-|   Receive                            "
3000                               "                    |  Transmit\n"
3001                               " face |bytes    packets errs drop fifo frame "
3002                               "compressed multicast|bytes    packets errs "
3003                               "drop fifo colls carrier compressed\n");
3004         else
3005                 dev_seq_printf_stats(seq, v);
3006         return 0;
3007 }
3008
3009 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3010 {
3011         struct netif_rx_stats *rc = NULL;
3012
3013         while (*pos < nr_cpu_ids)
3014                 if (cpu_online(*pos)) {
3015                         rc = &per_cpu(netdev_rx_stat, *pos);
3016                         break;
3017                 } else
3018                         ++*pos;
3019         return rc;
3020 }
3021
3022 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3023 {
3024         return softnet_get_online(pos);
3025 }
3026
3027 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3028 {
3029         ++*pos;
3030         return softnet_get_online(pos);
3031 }
3032
3033 static void softnet_seq_stop(struct seq_file *seq, void *v)
3034 {
3035 }
3036
3037 static int softnet_seq_show(struct seq_file *seq, void *v)
3038 {
3039         struct netif_rx_stats *s = v;
3040
3041         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3042                    s->total, s->dropped, s->time_squeeze, 0,
3043                    0, 0, 0, 0, /* was fastroute */
3044                    s->cpu_collision );
3045         return 0;
3046 }
3047
3048 static const struct seq_operations dev_seq_ops = {
3049         .start = dev_seq_start,
3050         .next  = dev_seq_next,
3051         .stop  = dev_seq_stop,
3052         .show  = dev_seq_show,
3053 };
3054
3055 static int dev_seq_open(struct inode *inode, struct file *file)
3056 {
3057         return seq_open_net(inode, file, &dev_seq_ops,
3058                             sizeof(struct seq_net_private));
3059 }
3060
3061 static const struct file_operations dev_seq_fops = {
3062         .owner   = THIS_MODULE,
3063         .open    = dev_seq_open,
3064         .read    = seq_read,
3065         .llseek  = seq_lseek,
3066         .release = seq_release_net,
3067 };
3068
3069 static const struct seq_operations softnet_seq_ops = {
3070         .start = softnet_seq_start,
3071         .next  = softnet_seq_next,
3072         .stop  = softnet_seq_stop,
3073         .show  = softnet_seq_show,
3074 };
3075
3076 static int softnet_seq_open(struct inode *inode, struct file *file)
3077 {
3078         return seq_open(file, &softnet_seq_ops);
3079 }
3080
3081 static const struct file_operations softnet_seq_fops = {
3082         .owner   = THIS_MODULE,
3083         .open    = softnet_seq_open,
3084         .read    = seq_read,
3085         .llseek  = seq_lseek,
3086         .release = seq_release,
3087 };
3088
3089 static void *ptype_get_idx(loff_t pos)
3090 {
3091         struct packet_type *pt = NULL;
3092         loff_t i = 0;
3093         int t;
3094
3095         list_for_each_entry_rcu(pt, &ptype_all, list) {
3096                 if (i == pos)
3097                         return pt;
3098                 ++i;
3099         }
3100
3101         for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3102                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3103                         if (i == pos)
3104                                 return pt;
3105                         ++i;
3106                 }
3107         }
3108         return NULL;
3109 }
3110
3111 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3112         __acquires(RCU)
3113 {
3114         rcu_read_lock();
3115         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3116 }
3117
3118 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3119 {
3120         struct packet_type *pt;
3121         struct list_head *nxt;
3122         int hash;
3123
3124         ++*pos;
3125         if (v == SEQ_START_TOKEN)
3126                 return ptype_get_idx(0);
3127
3128         pt = v;
3129         nxt = pt->list.next;
3130         if (pt->type == htons(ETH_P_ALL)) {
3131                 if (nxt != &ptype_all)
3132                         goto found;
3133                 hash = 0;
3134                 nxt = ptype_base[0].next;
3135         } else
3136                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3137
3138         while (nxt == &ptype_base[hash]) {
3139                 if (++hash >= PTYPE_HASH_SIZE)
3140                         return NULL;
3141                 nxt = ptype_base[hash].next;
3142         }
3143 found:
3144         return list_entry(nxt, struct packet_type, list);
3145 }
3146
3147 static void ptype_seq_stop(struct seq_file *seq, void *v)
3148         __releases(RCU)
3149 {
3150         rcu_read_unlock();
3151 }
3152
3153 static int ptype_seq_show(struct seq_file *seq, void *v)
3154 {
3155         struct packet_type *pt = v;
3156
3157         if (v == SEQ_START_TOKEN)
3158                 seq_puts(seq, "Type Device      Function\n");
3159         else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3160                 if (pt->type == htons(ETH_P_ALL))
3161                         seq_puts(seq, "ALL ");
3162                 else
3163                         seq_printf(seq, "%04x", ntohs(pt->type));
3164
3165                 seq_printf(seq, " %-8s %pF\n",
3166                            pt->dev ? pt->dev->name : "", pt->func);
3167         }
3168
3169         return 0;
3170 }
3171
3172 static const struct seq_operations ptype_seq_ops = {
3173         .start = ptype_seq_start,
3174         .next  = ptype_seq_next,
3175         .stop  = ptype_seq_stop,
3176         .show  = ptype_seq_show,
3177 };
3178
3179 static int ptype_seq_open(struct inode *inode, struct file *file)
3180 {
3181         return seq_open_net(inode, file, &ptype_seq_ops,
3182                         sizeof(struct seq_net_private));
3183 }
3184
3185 static const struct file_operations ptype_seq_fops = {
3186         .owner   = THIS_MODULE,
3187         .open    = ptype_seq_open,
3188         .read    = seq_read,
3189         .llseek  = seq_lseek,
3190         .release = seq_release_net,
3191 };
3192
3193
3194 static int __net_init dev_proc_net_init(struct net *net)
3195 {
3196         int rc = -ENOMEM;
3197
3198         if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3199                 goto out;
3200         if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3201                 goto out_dev;
3202         if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3203                 goto out_softnet;
3204
3205         if (wext_proc_init(net))
3206                 goto out_ptype;
3207         rc = 0;
3208 out:
3209         return rc;
3210 out_ptype:
3211         proc_net_remove(net, "ptype");
3212 out_softnet:
3213         proc_net_remove(net, "softnet_stat");
3214 out_dev:
3215         proc_net_remove(net, "dev");
3216         goto out;
3217 }
3218
3219 static void __net_exit dev_proc_net_exit(struct net *net)
3220 {
3221         wext_proc_exit(net);
3222
3223         proc_net_remove(net, "ptype");
3224         proc_net_remove(net, "softnet_stat");
3225         proc_net_remove(net, "dev");
3226 }
3227
3228 static struct pernet_operations __net_initdata dev_proc_ops = {
3229         .init = dev_proc_net_init,
3230         .exit = dev_proc_net_exit,
3231 };
3232
3233 static int __init dev_proc_init(void)
3234 {
3235         return register_pernet_subsys(&dev_proc_ops);
3236 }
3237 #else
3238 #define dev_proc_init() 0
3239 #endif  /* CONFIG_PROC_FS */
3240
3241
3242 /**
3243  *      netdev_set_master       -       set up master/slave pair
3244  *      @slave: slave device
3245  *      @master: new master device
3246  *
3247  *      Changes the master device of the slave. Pass %NULL to break the
3248  *      bonding. The caller must hold the RTNL semaphore. On a failure
3249  *      a negative errno code is returned. On success the reference counts
3250  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3251  *      function returns zero.
3252  */
3253 int netdev_set_master(struct net_device *slave, struct net_device *master)
3254 {
3255         struct net_device *old = slave->master;
3256
3257         ASSERT_RTNL();
3258
3259         if (master) {
3260                 if (old)
3261                         return -EBUSY;
3262                 dev_hold(master);
3263         }
3264
3265         slave->master = master;
3266
3267         synchronize_net();
3268
3269         if (old)
3270                 dev_put(old);
3271
3272         if (master)
3273                 slave->flags |= IFF_SLAVE;
3274         else
3275                 slave->flags &= ~IFF_SLAVE;
3276
3277         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3278         return 0;
3279 }
3280
3281 static void dev_change_rx_flags(struct net_device *dev, int flags)
3282 {
3283         const struct net_device_ops *ops = dev->netdev_ops;
3284
3285         if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3286                 ops->ndo_change_rx_flags(dev, flags);
3287 }
3288
3289 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3290 {
3291         unsigned short old_flags = dev->flags;
3292         uid_t uid;
3293         gid_t gid;
3294
3295         ASSERT_RTNL();
3296
3297         dev->flags |= IFF_PROMISC;
3298         dev->promiscuity += inc;
3299         if (dev->promiscuity == 0) {
3300                 /*
3301                  * Avoid overflow.
3302                  * If inc causes overflow, untouch promisc and return error.
3303                  */
3304                 if (inc < 0)
3305                         dev->flags &= ~IFF_PROMISC;
3306                 else {
3307                         dev->promiscuity -= inc;
3308                         printk(KERN_WARNING "%s: promiscuity touches roof, "
3309                                 "set promiscuity failed, promiscuity feature "
3310                                 "of device might be broken.\n", dev->name);
3311                         return -EOVERFLOW;
3312                 }
3313         }
3314         if (dev->flags != old_flags) {
3315                 printk(KERN_INFO "device %s %s promiscuous mode\n",
3316                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3317                                                                "left");
3318                 if (audit_enabled) {
3319                         current_uid_gid(&uid, &gid);
3320                         audit_log(current->audit_context, GFP_ATOMIC,
3321                                 AUDIT_ANOM_PROMISCUOUS,
3322                                 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3323                                 dev->name, (dev->flags & IFF_PROMISC),
3324                                 (old_flags & IFF_PROMISC),
3325                                 audit_get_loginuid(current),
3326                                 uid, gid,
3327                                 audit_get_sessionid(current));
3328                 }
3329
3330                 dev_change_rx_flags(dev, IFF_PROMISC);
3331         }
3332         return 0;
3333 }
3334
3335 /**
3336  *      dev_set_promiscuity     - update promiscuity count on a device
3337  *      @dev: device
3338  *      @inc: modifier
3339  *
3340  *      Add or remove promiscuity from a device. While the count in the device
3341  *      remains above zero the interface remains promiscuous. Once it hits zero
3342  *      the device reverts back to normal filtering operation. A negative inc
3343  *      value is used to drop promiscuity on the device.
3344  *      Return 0 if successful or a negative errno code on error.
3345  */
3346 int dev_set_promiscuity(struct net_device *dev, int inc)
3347 {
3348         unsigned short old_flags = dev->flags;
3349         int err;
3350
3351         err = __dev_set_promiscuity(dev, inc);
3352         if (err < 0)
3353                 return err;
3354         if (dev->flags != old_flags)
3355                 dev_set_rx_mode(dev);
3356         return err;
3357 }
3358
3359 /**
3360  *      dev_set_allmulti        - update allmulti count on a device
3361  *      @dev: device
3362  *      @inc: modifier
3363  *
3364  *      Add or remove reception of all multicast frames to a device. While the
3365  *      count in the device remains above zero the interface remains listening
3366  *      to all interfaces. Once it hits zero the device reverts back to normal
3367  *      filtering operation. A negative @inc value is used to drop the counter
3368  *      when releasing a resource needing all multicasts.
3369  *      Return 0 if successful or a negative errno code on error.
3370  */
3371
3372 int dev_set_allmulti(struct net_device *dev, int inc)
3373 {
3374         unsigned short old_flags = dev->flags;
3375
3376         ASSERT_RTNL();
3377
3378         dev->flags |= IFF_ALLMULTI;
3379         dev->allmulti += inc;
3380         if (dev->allmulti == 0) {
3381                 /*
3382                  * Avoid overflow.
3383                  * If inc causes overflow, untouch allmulti and return error.
3384                  */
3385                 if (inc < 0)
3386                         dev->flags &= ~IFF_ALLMULTI;
3387                 else {
3388                         dev->allmulti -= inc;
3389                         printk(KERN_WARNING "%s: allmulti touches roof, "
3390                                 "set allmulti failed, allmulti feature of "
3391                                 "device might be broken.\n", dev->name);
3392                         return -EOVERFLOW;
3393                 }
3394         }
3395         if (dev->flags ^ old_flags) {
3396                 dev_change_rx_flags(dev, IFF_ALLMULTI);
3397                 dev_set_rx_mode(dev);
3398         }
3399         return 0;
3400 }
3401
3402 /*
3403  *      Upload unicast and multicast address lists to device and
3404  *      configure RX filtering. When the device doesn't support unicast
3405  *      filtering it is put in promiscuous mode while unicast addresses
3406  *      are present.
3407  */
3408 void __dev_set_rx_mode(struct net_device *dev)
3409 {
3410         const struct net_device_ops *ops = dev->netdev_ops;
3411
3412         /* dev_open will call this function so the list will stay sane. */
3413         if (!(dev->flags&IFF_UP))
3414                 return;
3415
3416         if (!netif_device_present(dev))
3417                 return;
3418
3419         if (ops->ndo_set_rx_mode)
3420                 ops->ndo_set_rx_mode(dev);
3421         else {
3422                 /* Unicast addresses changes may only happen under the rtnl,
3423                  * therefore calling __dev_set_promiscuity here is safe.
3424                  */
3425                 if (dev->uc_count > 0 && !dev->uc_promisc) {
3426                         __dev_set_promiscuity(dev, 1);
3427                         dev->uc_promisc = 1;
3428                 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3429                         __dev_set_promiscuity(dev, -1);
3430                         dev->uc_promisc = 0;
3431                 }
3432
3433                 if (ops->ndo_set_multicast_list)
3434                         ops->ndo_set_multicast_list(dev);
3435         }
3436 }
3437
3438 void dev_set_rx_mode(struct net_device *dev)
3439 {
3440         netif_addr_lock_bh(dev);
3441         __dev_set_rx_mode(dev);
3442         netif_addr_unlock_bh(dev);
3443 }
3444
3445 /* hw addresses list handling functions */
3446
3447 static int __hw_addr_add(struct list_head *list, unsigned char *addr,
3448                          int addr_len, unsigned char addr_type)
3449 {
3450         struct netdev_hw_addr *ha;
3451         int alloc_size;
3452
3453         if (addr_len > MAX_ADDR_LEN)
3454                 return -EINVAL;
3455
3456         alloc_size = sizeof(*ha);
3457         if (alloc_size < L1_CACHE_BYTES)
3458                 alloc_size = L1_CACHE_BYTES;
3459         ha = kmalloc(alloc_size, GFP_ATOMIC);
3460         if (!ha)
3461                 return -ENOMEM;
3462         memcpy(ha->addr, addr, addr_len);
3463         ha->type = addr_type;
3464         list_add_tail_rcu(&ha->list, list);
3465         return 0;
3466 }
3467
3468 static void ha_rcu_free(struct rcu_head *head)
3469 {
3470         struct netdev_hw_addr *ha;
3471
3472         ha = container_of(head, struct netdev_hw_addr, rcu_head);
3473         kfree(ha);
3474 }
3475
3476 static int __hw_addr_del_ii(struct list_head *list, unsigned char *addr,
3477                             int addr_len, unsigned char addr_type,
3478                             int ignore_index)
3479 {
3480         struct netdev_hw_addr *ha;
3481         int i = 0;
3482
3483         list_for_each_entry(ha, list, list) {
3484                 if (i++ != ignore_index &&
3485                     !memcmp(ha->addr, addr, addr_len) &&
3486                     (ha->type == addr_type || !addr_type)) {
3487                         list_del_rcu(&ha->list);
3488                         call_rcu(&ha->rcu_head, ha_rcu_free);
3489                         return 0;
3490                 }
3491         }
3492         return -ENOENT;
3493 }
3494
3495 static int __hw_addr_add_multiple_ii(struct list_head *to_list,
3496                                      struct list_head *from_list,
3497                                      int addr_len, unsigned char addr_type,
3498                                      int ignore_index)
3499 {
3500         int err;
3501         struct netdev_hw_addr *ha, *ha2;
3502         unsigned char type;
3503
3504         list_for_each_entry(ha, from_list, list) {
3505                 type = addr_type ? addr_type : ha->type;
3506                 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3507                 if (err)
3508                         goto unroll;
3509         }
3510         return 0;
3511
3512 unroll:
3513         list_for_each_entry(ha2, from_list, list) {
3514                 if (ha2 == ha)
3515                         break;
3516                 type = addr_type ? addr_type : ha2->type;
3517                 __hw_addr_del_ii(to_list, ha2->addr, addr_len, type,
3518                                  ignore_index);
3519         }
3520         return err;
3521 }
3522
3523 static void __hw_addr_del_multiple_ii(struct list_head *to_list,
3524                                       struct list_head *from_list,
3525                                       int addr_len, unsigned char addr_type,
3526                                       int ignore_index)
3527 {
3528         struct netdev_hw_addr *ha;
3529         unsigned char type;
3530
3531         list_for_each_entry(ha, from_list, list) {
3532                 type = addr_type ? addr_type : ha->type;
3533                 __hw_addr_del_ii(to_list, ha->addr, addr_len, addr_type,
3534                                  ignore_index);
3535         }
3536 }
3537
3538 static void __hw_addr_flush(struct list_head *list)
3539 {
3540         struct netdev_hw_addr *ha, *tmp;
3541
3542         list_for_each_entry_safe(ha, tmp, list, list) {
3543                 list_del_rcu(&ha->list);
3544                 call_rcu(&ha->rcu_head, ha_rcu_free);
3545         }
3546 }
3547
3548 /* Device addresses handling functions */
3549
3550 static void dev_addr_flush(struct net_device *dev)
3551 {
3552         /* rtnl_mutex must be held here */
3553
3554         __hw_addr_flush(&dev->dev_addr_list);
3555         dev->dev_addr = NULL;
3556 }
3557
3558 static int dev_addr_init(struct net_device *dev)
3559 {
3560         unsigned char addr[MAX_ADDR_LEN];
3561         struct netdev_hw_addr *ha;
3562         int err;
3563
3564         /* rtnl_mutex must be held here */
3565
3566         INIT_LIST_HEAD(&dev->dev_addr_list);
3567         memset(addr, 0, sizeof(*addr));
3568         err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr),
3569                             NETDEV_HW_ADDR_T_LAN);
3570         if (!err) {
3571                 /*
3572                  * Get the first (previously created) address from the list
3573                  * and set dev_addr pointer to this location.
3574                  */
3575                 ha = list_first_entry(&dev->dev_addr_list,
3576                                       struct netdev_hw_addr, list);
3577                 dev->dev_addr = ha->addr;
3578         }
3579         return err;
3580 }
3581
3582 /**
3583  *      dev_addr_add    - Add a device address
3584  *      @dev: device
3585  *      @addr: address to add
3586  *      @addr_type: address type
3587  *
3588  *      Add a device address to the device or increase the reference count if
3589  *      it already exists.
3590  *
3591  *      The caller must hold the rtnl_mutex.
3592  */
3593 int dev_addr_add(struct net_device *dev, unsigned char *addr,
3594                  unsigned char addr_type)
3595 {
3596         int err;
3597
3598         ASSERT_RTNL();
3599
3600         err = __hw_addr_add(&dev->dev_addr_list, addr, dev->addr_len,
3601                             addr_type);
3602         if (!err)
3603                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3604         return err;
3605 }
3606 EXPORT_SYMBOL(dev_addr_add);
3607
3608 /**
3609  *      dev_addr_del    - Release a device address.
3610  *      @dev: device
3611  *      @addr: address to delete
3612  *      @addr_type: address type
3613  *
3614  *      Release reference to a device address and remove it from the device
3615  *      if the reference count drops to zero.
3616  *
3617  *      The caller must hold the rtnl_mutex.
3618  */
3619 int dev_addr_del(struct net_device *dev, unsigned char *addr,
3620                  unsigned char addr_type)
3621 {
3622         int err;
3623
3624         ASSERT_RTNL();
3625
3626         err = __hw_addr_del_ii(&dev->dev_addr_list, addr, dev->addr_len,
3627                                addr_type, 0);
3628         if (!err)
3629                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3630         return err;
3631 }
3632 EXPORT_SYMBOL(dev_addr_del);
3633
3634 /**
3635  *      dev_addr_add_multiple   - Add device addresses from another device
3636  *      @to_dev: device to which addresses will be added
3637  *      @from_dev: device from which addresses will be added
3638  *      @addr_type: address type - 0 means type will be used from from_dev
3639  *
3640  *      Add device addresses of the one device to another.
3641  **
3642  *      The caller must hold the rtnl_mutex.
3643  */
3644 int dev_addr_add_multiple(struct net_device *to_dev,
3645                           struct net_device *from_dev,
3646                           unsigned char addr_type)
3647 {
3648         int err;
3649
3650         ASSERT_RTNL();
3651
3652         if (from_dev->addr_len != to_dev->addr_len)
3653                 return -EINVAL;
3654         err = __hw_addr_add_multiple_ii(&to_dev->dev_addr_list,
3655                                         &from_dev->dev_addr_list,
3656                                         to_dev->addr_len, addr_type, 0);
3657         if (!err)
3658                 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3659         return err;
3660 }
3661 EXPORT_SYMBOL(dev_addr_add_multiple);
3662
3663 /**
3664  *      dev_addr_del_multiple   - Delete device addresses by another device
3665  *      @to_dev: device where the addresses will be deleted
3666  *      @from_dev: device by which addresses the addresses will be deleted
3667  *      @addr_type: address type - 0 means type will used from from_dev
3668  *
3669  *      Deletes addresses in to device by the list of addresses in from device.
3670  *
3671  *      The caller must hold the rtnl_mutex.
3672  */
3673 int dev_addr_del_multiple(struct net_device *to_dev,
3674                           struct net_device *from_dev,
3675                           unsigned char addr_type)
3676 {
3677         ASSERT_RTNL();
3678
3679         if (from_dev->addr_len != to_dev->addr_len)
3680                 return -EINVAL;
3681         __hw_addr_del_multiple_ii(&to_dev->dev_addr_list,
3682                                   &from_dev->dev_addr_list,
3683                                   to_dev->addr_len, addr_type, 0);
3684         call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3685         return 0;
3686 }
3687 EXPORT_SYMBOL(dev_addr_del_multiple);
3688
3689 /* unicast and multicast addresses handling functions */
3690
3691 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3692                       void *addr, int alen, int glbl)
3693 {
3694         struct dev_addr_list *da;
3695
3696         for (; (da = *list) != NULL; list = &da->next) {
3697                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3698                     alen == da->da_addrlen) {
3699                         if (glbl) {
3700                                 int old_glbl = da->da_gusers;
3701                                 da->da_gusers = 0;
3702                                 if (old_glbl == 0)
3703                                         break;
3704                         }
3705                         if (--da->da_users)
3706                                 return 0;
3707
3708                         *list = da->next;
3709                         kfree(da);
3710                         (*count)--;
3711                         return 0;
3712                 }
3713         }
3714         return -ENOENT;
3715 }
3716
3717 int __dev_addr_add(struct dev_addr_list **list, int *count,
3718                    void *addr, int alen, int glbl)
3719 {
3720         struct dev_addr_list *da;
3721
3722         for (da = *list; da != NULL; da = da->next) {
3723                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3724                     da->da_addrlen == alen) {
3725                         if (glbl) {
3726                                 int old_glbl = da->da_gusers;
3727                                 da->da_gusers = 1;
3728                                 if (old_glbl)
3729                                         return 0;
3730                         }
3731                         da->da_users++;
3732                         return 0;
3733                 }
3734         }
3735
3736         da = kzalloc(sizeof(*da), GFP_ATOMIC);
3737         if (da == NULL)
3738                 return -ENOMEM;
3739         memcpy(da->da_addr, addr, alen);
3740         da->da_addrlen = alen;
3741         da->da_users = 1;
3742         da->da_gusers = glbl ? 1 : 0;
3743         da->next = *list;
3744         *list = da;
3745         (*count)++;
3746         return 0;
3747 }
3748
3749 /**
3750  *      dev_unicast_delete      - Release secondary unicast address.
3751  *      @dev: device
3752  *      @addr: address to delete
3753  *      @alen: length of @addr
3754  *
3755  *      Release reference to a secondary unicast address and remove it
3756  *      from the device if the reference count drops to zero.
3757  *
3758  *      The caller must hold the rtnl_mutex.
3759  */
3760 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3761 {
3762         int err;
3763
3764         ASSERT_RTNL();
3765
3766         netif_addr_lock_bh(dev);
3767         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3768         if (!err)
3769                 __dev_set_rx_mode(dev);
3770         netif_addr_unlock_bh(dev);
3771         return err;
3772 }
3773 EXPORT_SYMBOL(dev_unicast_delete);
3774
3775 /**
3776  *      dev_unicast_add         - add a secondary unicast address
3777  *      @dev: device
3778  *      @addr: address to add
3779  *      @alen: length of @addr
3780  *
3781  *      Add a secondary unicast address to the device or increase
3782  *      the reference count if it already exists.
3783  *
3784  *      The caller must hold the rtnl_mutex.
3785  */
3786 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3787 {
3788         int err;
3789
3790         ASSERT_RTNL();
3791
3792         netif_addr_lock_bh(dev);
3793         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3794         if (!err)
3795                 __dev_set_rx_mode(dev);
3796         netif_addr_unlock_bh(dev);
3797         return err;
3798 }
3799 EXPORT_SYMBOL(dev_unicast_add);
3800
3801 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3802                     struct dev_addr_list **from, int *from_count)
3803 {
3804         struct dev_addr_list *da, *next;
3805         int err = 0;
3806
3807         da = *from;
3808         while (da != NULL) {
3809                 next = da->next;
3810                 if (!da->da_synced) {
3811                         err = __dev_addr_add(to, to_count,
3812                                              da->da_addr, da->da_addrlen, 0);
3813                         if (err < 0)
3814                                 break;
3815                         da->da_synced = 1;
3816                         da->da_users++;
3817                 } else if (da->da_users == 1) {
3818                         __dev_addr_delete(to, to_count,
3819                                           da->da_addr, da->da_addrlen, 0);
3820                         __dev_addr_delete(from, from_count,
3821                                           da->da_addr, da->da_addrlen, 0);
3822                 }
3823                 da = next;
3824         }
3825         return err;
3826 }
3827
3828 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3829                        struct dev_addr_list **from, int *from_count)
3830 {
3831         struct dev_addr_list *da, *next;
3832
3833         da = *from;
3834         while (da != NULL) {
3835                 next = da->next;
3836                 if (da->da_synced) {
3837                         __dev_addr_delete(to, to_count,
3838                                           da->da_addr, da->da_addrlen, 0);
3839                         da->da_synced = 0;
3840                         __dev_addr_delete(from, from_count,
3841                                           da->da_addr, da->da_addrlen, 0);
3842                 }
3843                 da = next;
3844         }
3845 }
3846
3847 /**
3848  *      dev_unicast_sync - Synchronize device's unicast list to another device
3849  *      @to: destination device
3850  *      @from: source device
3851  *
3852  *      Add newly added addresses to the destination device and release
3853  *      addresses that have no users left. The source device must be
3854  *      locked by netif_tx_lock_bh.
3855  *
3856  *      This function is intended to be called from the dev->set_rx_mode
3857  *      function of layered software devices.
3858  */
3859 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3860 {
3861         int err = 0;
3862
3863         netif_addr_lock_bh(to);
3864         err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3865                               &from->uc_list, &from->uc_count);
3866         if (!err)
3867                 __dev_set_rx_mode(to);
3868         netif_addr_unlock_bh(to);
3869         return err;
3870 }
3871 EXPORT_SYMBOL(dev_unicast_sync);
3872
3873 /**
3874  *      dev_unicast_unsync - Remove synchronized addresses from the destination device
3875  *      @to: destination device
3876  *      @from: source device
3877  *
3878  *      Remove all addresses that were added to the destination device by
3879  *      dev_unicast_sync(). This function is intended to be called from the
3880  *      dev->stop function of layered software devices.
3881  */
3882 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3883 {
3884         netif_addr_lock_bh(from);
3885         netif_addr_lock(to);
3886
3887         __dev_addr_unsync(&to->uc_list, &to->uc_count,
3888                           &from->uc_list, &from->uc_count);
3889         __dev_set_rx_mode(to);
3890
3891         netif_addr_unlock(to);
3892         netif_addr_unlock_bh(from);
3893 }
3894 EXPORT_SYMBOL(dev_unicast_unsync);
3895
3896 static void __dev_addr_discard(struct dev_addr_list **list)
3897 {
3898         struct dev_addr_list *tmp;
3899
3900         while (*list != NULL) {
3901                 tmp = *list;
3902                 *list = tmp->next;
3903                 if (tmp->da_users > tmp->da_gusers)
3904                         printk("__dev_addr_discard: address leakage! "
3905                                "da_users=%d\n", tmp->da_users);
3906                 kfree(tmp);
3907         }
3908 }
3909
3910 static void dev_addr_discard(struct net_device *dev)
3911 {
3912         netif_addr_lock_bh(dev);
3913
3914         __dev_addr_discard(&dev->uc_list);
3915         dev->uc_count = 0;
3916
3917         __dev_addr_discard(&dev->mc_list);
3918         dev->mc_count = 0;
3919
3920         netif_addr_unlock_bh(dev);
3921 }
3922
3923 /**
3924  *      dev_get_flags - get flags reported to userspace
3925  *      @dev: device
3926  *
3927  *      Get the combination of flag bits exported through APIs to userspace.
3928  */
3929 unsigned dev_get_flags(const struct net_device *dev)
3930 {
3931         unsigned flags;
3932
3933         flags = (dev->flags & ~(IFF_PROMISC |
3934                                 IFF_ALLMULTI |
3935                                 IFF_RUNNING |
3936                                 IFF_LOWER_UP |
3937                                 IFF_DORMANT)) |
3938                 (dev->gflags & (IFF_PROMISC |
3939                                 IFF_ALLMULTI));
3940
3941         if (netif_running(dev)) {
3942                 if (netif_oper_up(dev))
3943                         flags |= IFF_RUNNING;
3944                 if (netif_carrier_ok(dev))
3945                         flags |= IFF_LOWER_UP;
3946                 if (netif_dormant(dev))
3947                         flags |= IFF_DORMANT;
3948         }
3949
3950         return flags;
3951 }
3952
3953 /**
3954  *      dev_change_flags - change device settings
3955  *      @dev: device
3956  *      @flags: device state flags
3957  *
3958  *      Change settings on device based state flags. The flags are
3959  *      in the userspace exported format.
3960  */
3961 int dev_change_flags(struct net_device *dev, unsigned flags)
3962 {
3963         int ret, changes;
3964         int old_flags = dev->flags;
3965
3966         ASSERT_RTNL();
3967
3968         /*
3969          *      Set the flags on our device.
3970          */
3971
3972         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3973                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3974                                IFF_AUTOMEDIA)) |
3975                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3976                                     IFF_ALLMULTI));
3977
3978         /*
3979          *      Load in the correct multicast list now the flags have changed.
3980          */
3981
3982         if ((old_flags ^ flags) & IFF_MULTICAST)
3983                 dev_change_rx_flags(dev, IFF_MULTICAST);
3984
3985         dev_set_rx_mode(dev);
3986
3987         /*
3988          *      Have we downed the interface. We handle IFF_UP ourselves
3989          *      according to user attempts to set it, rather than blindly
3990          *      setting it.
3991          */
3992
3993         ret = 0;
3994         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
3995                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3996
3997                 if (!ret)
3998                         dev_set_rx_mode(dev);
3999         }
4000
4001         if (dev->flags & IFF_UP &&
4002             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4003                                           IFF_VOLATILE)))
4004                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4005
4006         if ((flags ^ dev->gflags) & IFF_PROMISC) {
4007                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
4008                 dev->gflags ^= IFF_PROMISC;
4009                 dev_set_promiscuity(dev, inc);
4010         }
4011
4012         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4013            is important. Some (broken) drivers set IFF_PROMISC, when
4014            IFF_ALLMULTI is requested not asking us and not reporting.
4015          */
4016         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4017                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
4018                 dev->gflags ^= IFF_ALLMULTI;
4019                 dev_set_allmulti(dev, inc);
4020         }
4021
4022         /* Exclude state transition flags, already notified */
4023         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4024         if (changes)
4025                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4026
4027         return ret;
4028 }
4029
4030 /**
4031  *      dev_set_mtu - Change maximum transfer unit
4032  *      @dev: device
4033  *      @new_mtu: new transfer unit
4034  *
4035  *      Change the maximum transfer size of the network device.
4036  */
4037 int dev_set_mtu(struct net_device *dev, int new_mtu)
4038 {
4039         const struct net_device_ops *ops = dev->netdev_ops;
4040         int err;
4041
4042         if (new_mtu == dev->mtu)
4043                 return 0;
4044
4045         /*      MTU must be positive.    */
4046         if (new_mtu < 0)
4047                 return -EINVAL;
4048
4049         if (!netif_device_present(dev))
4050                 return -ENODEV;
4051
4052         err = 0;
4053         if (ops->ndo_change_mtu)
4054                 err = ops->ndo_change_mtu(dev, new_mtu);
4055         else
4056                 dev->mtu = new_mtu;
4057
4058         if (!err && dev->flags & IFF_UP)
4059                 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4060         return err;
4061 }
4062
4063 /**
4064  *      dev_set_mac_address - Change Media Access Control Address
4065  *      @dev: device
4066  *      @sa: new address
4067  *
4068  *      Change the hardware (MAC) address of the device
4069  */
4070 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4071 {
4072         const struct net_device_ops *ops = dev->netdev_ops;
4073         int err;
4074
4075         if (!ops->ndo_set_mac_address)
4076                 return -EOPNOTSUPP;
4077         if (sa->sa_family != dev->type)
4078                 return -EINVAL;
4079         if (!netif_device_present(dev))
4080                 return -ENODEV;
4081         err = ops->ndo_set_mac_address(dev, sa);
4082         if (!err)
4083                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4084         return err;
4085 }
4086
4087 /*
4088  *      Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
4089  */
4090 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4091 {
4092         int err;
4093         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4094
4095         if (!dev)
4096                 return -ENODEV;
4097
4098         switch (cmd) {
4099                 case SIOCGIFFLAGS:      /* Get interface flags */
4100                         ifr->ifr_flags = dev_get_flags(dev);
4101                         return 0;
4102
4103                 case SIOCGIFMETRIC:     /* Get the metric on the interface
4104                                            (currently unused) */
4105                         ifr->ifr_metric = 0;
4106                         return 0;
4107
4108                 case SIOCGIFMTU:        /* Get the MTU of a device */
4109                         ifr->ifr_mtu = dev->mtu;
4110                         return 0;
4111
4112                 case SIOCGIFHWADDR:
4113                         if (!dev->addr_len)
4114                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4115                         else
4116                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4117                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4118                         ifr->ifr_hwaddr.sa_family = dev->type;
4119                         return 0;
4120
4121                 case SIOCGIFSLAVE:
4122                         err = -EINVAL;
4123                         break;
4124
4125                 case SIOCGIFMAP:
4126                         ifr->ifr_map.mem_start = dev->mem_start;
4127                         ifr->ifr_map.mem_end   = dev->mem_end;
4128                         ifr->ifr_map.base_addr = dev->base_addr;
4129                         ifr->ifr_map.irq       = dev->irq;
4130                         ifr->ifr_map.dma       = dev->dma;
4131                         ifr->ifr_map.port      = dev->if_port;
4132                         return 0;
4133
4134                 case SIOCGIFINDEX:
4135                         ifr->ifr_ifindex = dev->ifindex;
4136                         return 0;
4137
4138                 case SIOCGIFTXQLEN:
4139                         ifr->ifr_qlen = dev->tx_queue_len;
4140                         return 0;
4141
4142                 default:
4143                         /* dev_ioctl() should ensure this case
4144                          * is never reached
4145                          */
4146                         WARN_ON(1);
4147                         err = -EINVAL;
4148                         break;
4149
4150         }
4151         return err;
4152 }
4153
4154 /*
4155  *      Perform the SIOCxIFxxx calls, inside rtnl_lock()
4156  */
4157 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4158 {
4159         int err;
4160         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4161         const struct net_device_ops *ops;
4162
4163         if (!dev)
4164                 return -ENODEV;
4165
4166         ops = dev->netdev_ops;
4167
4168         switch (cmd) {
4169                 case SIOCSIFFLAGS:      /* Set interface flags */
4170                         return dev_change_flags(dev, ifr->ifr_flags);
4171
4172                 case SIOCSIFMETRIC:     /* Set the metric on the interface
4173                                            (currently unused) */
4174                         return -EOPNOTSUPP;
4175
4176                 case SIOCSIFMTU:        /* Set the MTU of a device */
4177                         return dev_set_mtu(dev, ifr->ifr_mtu);
4178
4179                 case SIOCSIFHWADDR:
4180                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4181
4182                 case SIOCSIFHWBROADCAST:
4183                         if (ifr->ifr_hwaddr.sa_family != dev->type)
4184                                 return -EINVAL;
4185                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4186                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4187                         call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4188                         return 0;
4189
4190                 case SIOCSIFMAP:
4191                         if (ops->ndo_set_config) {
4192                                 if (!netif_device_present(dev))
4193                                         return -ENODEV;
4194                                 return ops->ndo_set_config(dev, &ifr->ifr_map);
4195                         }
4196                         return -EOPNOTSUPP;
4197
4198                 case SIOCADDMULTI:
4199                         if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4200                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4201                                 return -EINVAL;
4202                         if (!netif_device_present(dev))
4203                                 return -ENODEV;
4204                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4205                                           dev->addr_len, 1);
4206
4207                 case SIOCDELMULTI:
4208                         if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4209                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4210                                 return -EINVAL;
4211                         if (!netif_device_present(dev))
4212                                 return -ENODEV;
4213                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4214                                              dev->addr_len, 1);
4215
4216                 case SIOCSIFTXQLEN:
4217                         if (ifr->ifr_qlen < 0)
4218                                 return -EINVAL;
4219                         dev->tx_queue_len = ifr->ifr_qlen;
4220                         return 0;
4221
4222                 case SIOCSIFNAME:
4223                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4224                         return dev_change_name(dev, ifr->ifr_newname);
4225
4226                 /*
4227                  *      Unknown or private ioctl
4228                  */
4229
4230                 default:
4231                         if ((cmd >= SIOCDEVPRIVATE &&
4232                             cmd <= SIOCDEVPRIVATE + 15) ||
4233                             cmd == SIOCBONDENSLAVE ||
4234                             cmd == SIOCBONDRELEASE ||
4235                             cmd == SIOCBONDSETHWADDR ||
4236                             cmd == SIOCBONDSLAVEINFOQUERY ||
4237                             cmd == SIOCBONDINFOQUERY ||
4238                             cmd == SIOCBONDCHANGEACTIVE ||
4239                             cmd == SIOCGMIIPHY ||
4240                             cmd == SIOCGMIIREG ||
4241                             cmd == SIOCSMIIREG ||
4242                             cmd == SIOCBRADDIF ||
4243                             cmd == SIOCBRDELIF ||
4244                             cmd == SIOCSHWTSTAMP ||
4245                             cmd == SIOCWANDEV) {
4246                                 err = -EOPNOTSUPP;
4247                                 if (ops->ndo_do_ioctl) {
4248                                         if (netif_device_present(dev))
4249                                                 err = ops->ndo_do_ioctl(dev, ifr, cmd);
4250                                         else
4251                                                 err = -ENODEV;
4252                                 }
4253                         } else
4254                                 err = -EINVAL;
4255
4256         }
4257         return err;
4258 }
4259
4260 /*
4261  *      This function handles all "interface"-type I/O control requests. The actual
4262  *      'doing' part of this is dev_ifsioc above.
4263  */
4264
4265 /**
4266  *      dev_ioctl       -       network device ioctl
4267  *      @net: the applicable net namespace
4268  *      @cmd: command to issue
4269  *      @arg: pointer to a struct ifreq in user space
4270  *
4271  *      Issue ioctl functions to devices. This is normally called by the
4272  *      user space syscall interfaces but can sometimes be useful for
4273  *      other purposes. The return value is the return from the syscall if
4274  *      positive or a negative errno code on error.
4275  */
4276
4277 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4278 {
4279         struct ifreq ifr;
4280         int ret;
4281         char *colon;
4282
4283         /* One special case: SIOCGIFCONF takes ifconf argument
4284            and requires shared lock, because it sleeps writing
4285            to user space.
4286          */
4287
4288         if (cmd == SIOCGIFCONF) {
4289                 rtnl_lock();
4290                 ret = dev_ifconf(net, (char __user *) arg);
4291                 rtnl_unlock();
4292                 return ret;
4293         }
4294         if (cmd == SIOCGIFNAME)
4295                 return dev_ifname(net, (struct ifreq __user *)arg);
4296
4297         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4298                 return -EFAULT;
4299
4300         ifr.ifr_name[IFNAMSIZ-1] = 0;
4301
4302         colon = strchr(ifr.ifr_name, ':');
4303         if (colon)
4304                 *colon = 0;
4305
4306         /*
4307          *      See which interface the caller is talking about.
4308          */
4309
4310         switch (cmd) {
4311                 /*
4312                  *      These ioctl calls:
4313                  *      - can be done by all.
4314                  *      - atomic and do not require locking.
4315                  *      - return a value
4316                  */
4317                 case SIOCGIFFLAGS:
4318                 case SIOCGIFMETRIC:
4319                 case SIOCGIFMTU:
4320                 case SIOCGIFHWADDR:
4321                 case SIOCGIFSLAVE:
4322                 case SIOCGIFMAP:
4323                 case SIOCGIFINDEX:
4324                 case SIOCGIFTXQLEN:
4325                         dev_load(net, ifr.ifr_name);
4326                         read_lock(&dev_base_lock);
4327                         ret = dev_ifsioc_locked(net, &ifr, cmd);
4328                         read_unlock(&dev_base_lock);
4329                         if (!ret) {
4330                                 if (colon)
4331                                         *colon = ':';
4332                                 if (copy_to_user(arg, &ifr,
4333                                                  sizeof(struct ifreq)))
4334                                         ret = -EFAULT;
4335                         }
4336                         return ret;
4337
4338                 case SIOCETHTOOL:
4339                         dev_load(net, ifr.ifr_name);
4340                         rtnl_lock();
4341                         ret = dev_ethtool(net, &ifr);
4342                         rtnl_unlock();
4343                         if (!ret) {
4344                                 if (colon)
4345                                         *colon = ':';
4346                                 if (copy_to_user(arg, &ifr,
4347                                                  sizeof(struct ifreq)))
4348                                         ret = -EFAULT;
4349                         }
4350                         return ret;
4351
4352                 /*
4353                  *      These ioctl calls:
4354                  *      - require superuser power.
4355                  *      - require strict serialization.
4356                  *      - return a value
4357                  */
4358                 case SIOCGMIIPHY:
4359                 case SIOCGMIIREG:
4360                 case SIOCSIFNAME:
4361                         if (!capable(CAP_NET_ADMIN))
4362                                 return -EPERM;
4363                         dev_load(net, ifr.ifr_name);
4364                         rtnl_lock();
4365                         ret = dev_ifsioc(net, &ifr, cmd);
4366                         rtnl_unlock();
4367                         if (!ret) {
4368                                 if (colon)
4369                                         *colon = ':';
4370                                 if (copy_to_user(arg, &ifr,
4371                                                  sizeof(struct ifreq)))
4372                                         ret = -EFAULT;
4373                         }
4374                         return ret;
4375
4376                 /*
4377                  *      These ioctl calls:
4378                  *      - require superuser power.
4379                  *      - require strict serialization.
4380                  *      - do not return a value
4381                  */
4382                 case SIOCSIFFLAGS:
4383                 case SIOCSIFMETRIC:
4384                 case SIOCSIFMTU:
4385                 case SIOCSIFMAP:
4386                 case SIOCSIFHWADDR:
4387                 case SIOCSIFSLAVE:
4388                 case SIOCADDMULTI:
4389                 case SIOCDELMULTI:
4390                 case SIOCSIFHWBROADCAST:
4391                 case SIOCSIFTXQLEN:
4392                 case SIOCSMIIREG:
4393                 case SIOCBONDENSLAVE:
4394                 case SIOCBONDRELEASE:
4395                 case SIOCBONDSETHWADDR:
4396                 case SIOCBONDCHANGEACTIVE:
4397                 case SIOCBRADDIF:
4398                 case SIOCBRDELIF:
4399                 case SIOCSHWTSTAMP:
4400                         if (!capable(CAP_NET_ADMIN))
4401                                 return -EPERM;
4402                         /* fall through */
4403                 case SIOCBONDSLAVEINFOQUERY:
4404                 case SIOCBONDINFOQUERY:
4405                         dev_load(net, ifr.ifr_name);
4406                         rtnl_lock();
4407                         ret = dev_ifsioc(net, &ifr, cmd);
4408                         rtnl_unlock();
4409                         return ret;
4410
4411                 case SIOCGIFMEM:
4412                         /* Get the per device memory space. We can add this but
4413                          * currently do not support it */
4414                 case SIOCSIFMEM:
4415                         /* Set the per device memory buffer space.
4416                          * Not applicable in our case */
4417                 case SIOCSIFLINK:
4418                         return -EINVAL;
4419
4420                 /*
4421                  *      Unknown or private ioctl.
4422                  */
4423                 default:
4424                         if (cmd == SIOCWANDEV ||
4425                             (cmd >= SIOCDEVPRIVATE &&
4426                              cmd <= SIOCDEVPRIVATE + 15)) {
4427                                 dev_load(net, ifr.ifr_name);
4428                                 rtnl_lock();
4429                                 ret = dev_ifsioc(net, &ifr, cmd);
4430                                 rtnl_unlock();
4431                                 if (!ret && copy_to_user(arg, &ifr,
4432                                                          sizeof(struct ifreq)))
4433                                         ret = -EFAULT;
4434                                 return ret;
4435                         }
4436                         /* Take care of Wireless Extensions */
4437                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4438                                 return wext_handle_ioctl(net, &ifr, cmd, arg);
4439                         return -EINVAL;
4440         }
4441 }
4442
4443
4444 /**
4445  *      dev_new_index   -       allocate an ifindex
4446  *      @net: the applicable net namespace
4447  *
4448  *      Returns a suitable unique value for a new device interface
4449  *      number.  The caller must hold the rtnl semaphore or the
4450  *      dev_base_lock to be sure it remains unique.
4451  */
4452 static int dev_new_index(struct net *net)
4453 {
4454         static int ifindex;
4455         for (;;) {
4456                 if (++ifindex <= 0)
4457                         ifindex = 1;
4458                 if (!__dev_get_by_index(net, ifindex))
4459                         return ifindex;
4460         }
4461 }
4462
4463 /* Delayed registration/unregisteration */
4464 static LIST_HEAD(net_todo_list);
4465
4466 static void net_set_todo(struct net_device *dev)
4467 {
4468         list_add_tail(&dev->todo_list, &net_todo_list);
4469 }
4470
4471 static void rollback_registered(struct net_device *dev)
4472 {
4473         BUG_ON(dev_boot_phase);
4474         ASSERT_RTNL();
4475
4476         /* Some devices call without registering for initialization unwind. */
4477         if (dev->reg_state == NETREG_UNINITIALIZED) {
4478                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4479                                   "was registered\n", dev->name, dev);
4480
4481                 WARN_ON(1);
4482                 return;
4483         }
4484
4485         BUG_ON(dev->reg_state != NETREG_REGISTERED);
4486
4487         /* If device is running, close it first. */
4488         dev_close(dev);
4489
4490         /* And unlink it from device chain. */
4491         unlist_netdevice(dev);
4492
4493         dev->reg_state = NETREG_UNREGISTERING;
4494
4495         synchronize_net();
4496
4497         /* Shutdown queueing discipline. */
4498         dev_shutdown(dev);
4499
4500
4501         /* Notify protocols, that we are about to destroy
4502            this device. They should clean all the things.
4503         */
4504         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4505
4506         /*
4507          *      Flush the unicast and multicast chains
4508          */
4509         dev_addr_discard(dev);
4510
4511         if (dev->netdev_ops->ndo_uninit)
4512                 dev->netdev_ops->ndo_uninit(dev);
4513
4514         /* Notifier chain MUST detach us from master device. */
4515         WARN_ON(dev->master);
4516
4517         /* Remove entries from kobject tree */
4518         netdev_unregister_kobject(dev);
4519
4520         synchronize_net();
4521
4522         dev_put(dev);
4523 }
4524
4525 static void __netdev_init_queue_locks_one(struct net_device *dev,
4526                                           struct netdev_queue *dev_queue,
4527                                           void *_unused)
4528 {
4529         spin_lock_init(&dev_queue->_xmit_lock);
4530         netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4531         dev_queue->xmit_lock_owner = -1;
4532 }
4533
4534 static void netdev_init_queue_locks(struct net_device *dev)
4535 {
4536         netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4537         __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4538 }
4539
4540 unsigned long netdev_fix_features(unsigned long features, const char *name)
4541 {
4542         /* Fix illegal SG+CSUM combinations. */
4543         if ((features & NETIF_F_SG) &&
4544             !(features & NETIF_F_ALL_CSUM)) {
4545                 if (name)
4546                         printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4547                                "checksum feature.\n", name);
4548                 features &= ~NETIF_F_SG;
4549         }
4550
4551         /* TSO requires that SG is present as well. */
4552         if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4553                 if (name)
4554                         printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4555                                "SG feature.\n", name);
4556                 features &= ~NETIF_F_TSO;
4557         }
4558
4559         if (features & NETIF_F_UFO) {
4560                 if (!(features & NETIF_F_GEN_CSUM)) {
4561                         if (name)
4562                                 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4563                                        "since no NETIF_F_HW_CSUM feature.\n",
4564                                        name);
4565                         features &= ~NETIF_F_UFO;
4566                 }
4567
4568                 if (!(features & NETIF_F_SG)) {
4569                         if (name)
4570                                 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4571                                        "since no NETIF_F_SG feature.\n", name);
4572                         features &= ~NETIF_F_UFO;
4573                 }
4574         }
4575
4576         return features;
4577 }
4578 EXPORT_SYMBOL(netdev_fix_features);
4579
4580 /* Some devices need to (re-)set their netdev_ops inside
4581  * ->init() or similar.  If that happens, we have to setup
4582  * the compat pointers again.
4583  */
4584 void netdev_resync_ops(struct net_device *dev)
4585 {
4586 #ifdef CONFIG_COMPAT_NET_DEV_OPS
4587         const struct net_device_ops *ops = dev->netdev_ops;
4588
4589         dev->init = ops->ndo_init;
4590         dev->uninit = ops->ndo_uninit;
4591         dev->open = ops->ndo_open;
4592         dev->change_rx_flags = ops->ndo_change_rx_flags;
4593         dev->set_rx_mode = ops->ndo_set_rx_mode;
4594         dev->set_multicast_list = ops->ndo_set_multicast_list;
4595         dev->set_mac_address = ops->ndo_set_mac_address;
4596         dev->validate_addr = ops->ndo_validate_addr;
4597         dev->do_ioctl = ops->ndo_do_ioctl;
4598         dev->set_config = ops->ndo_set_config;
4599         dev->change_mtu = ops->ndo_change_mtu;
4600         dev->neigh_setup = ops->ndo_neigh_setup;
4601         dev->tx_timeout = ops->ndo_tx_timeout;
4602         dev->get_stats = ops->ndo_get_stats;
4603         dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4604         dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4605         dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4606 #ifdef CONFIG_NET_POLL_CONTROLLER
4607         dev->poll_controller = ops->ndo_poll_controller;
4608 #endif
4609 #endif
4610 }
4611 EXPORT_SYMBOL(netdev_resync_ops);
4612
4613 /**
4614  *      register_netdevice      - register a network device
4615  *      @dev: device to register
4616  *
4617  *      Take a completed network device structure and add it to the kernel
4618  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4619  *      chain. 0 is returned on success. A negative errno code is returned
4620  *      on a failure to set up the device, or if the name is a duplicate.
4621  *
4622  *      Callers must hold the rtnl semaphore. You may want
4623  *      register_netdev() instead of this.
4624  *
4625  *      BUGS:
4626  *      The locking appears insufficient to guarantee two parallel registers
4627  *      will not get the same name.
4628  */
4629
4630 int register_netdevice(struct net_device *dev)
4631 {
4632         struct hlist_head *head;
4633         struct hlist_node *p;
4634         int ret;
4635         struct net *net = dev_net(dev);
4636
4637         BUG_ON(dev_boot_phase);
4638         ASSERT_RTNL();
4639
4640         might_sleep();
4641
4642         /* When net_device's are persistent, this will be fatal. */
4643         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4644         BUG_ON(!net);
4645
4646         spin_lock_init(&dev->addr_list_lock);
4647         netdev_set_addr_lockdep_class(dev);
4648         netdev_init_queue_locks(dev);
4649
4650         dev->iflink = -1;
4651
4652 #ifdef CONFIG_COMPAT_NET_DEV_OPS
4653         /* Netdevice_ops API compatibility support.
4654          * This is temporary until all network devices are converted.
4655          */
4656         if (dev->netdev_ops) {
4657                 netdev_resync_ops(dev);
4658         } else {
4659                 char drivername[64];
4660                 pr_info("%s (%s): not using net_device_ops yet\n",
4661                         dev->name, netdev_drivername(dev, drivername, 64));
4662
4663                 /* This works only because net_device_ops and the
4664                    compatibility structure are the same. */
4665                 dev->netdev_ops = (void *) &(dev->init);
4666         }
4667 #endif
4668
4669         /* Init, if this function is available */
4670         if (dev->netdev_ops->ndo_init) {
4671                 ret = dev->netdev_ops->ndo_init(dev);
4672                 if (ret) {
4673                         if (ret > 0)
4674                                 ret = -EIO;
4675                         goto out;
4676                 }
4677         }
4678
4679         if (!dev_valid_name(dev->name)) {
4680                 ret = -EINVAL;
4681                 goto err_uninit;
4682         }
4683
4684         dev->ifindex = dev_new_index(net);
4685         if (dev->iflink == -1)
4686                 dev->iflink = dev->ifindex;
4687
4688         /* Check for existence of name */
4689         head = dev_name_hash(net, dev->name);
4690         hlist_for_each(p, head) {
4691                 struct net_device *d
4692                         = hlist_entry(p, struct net_device, name_hlist);
4693                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4694                         ret = -EEXIST;
4695                         goto err_uninit;
4696                 }
4697         }
4698
4699         /* Fix illegal checksum combinations */
4700         if ((dev->features & NETIF_F_HW_CSUM) &&
4701             (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4702                 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4703                        dev->name);
4704                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4705         }
4706
4707         if ((dev->features & NETIF_F_NO_CSUM) &&
4708             (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4709                 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4710                        dev->name);
4711                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4712         }
4713
4714         dev->features = netdev_fix_features(dev->features, dev->name);
4715
4716         /* Enable software GSO if SG is supported. */
4717         if (dev->features & NETIF_F_SG)
4718                 dev->features |= NETIF_F_GSO;
4719
4720         netdev_initialize_kobject(dev);
4721         ret = netdev_register_kobject(dev);
4722         if (ret)
4723                 goto err_uninit;
4724         dev->reg_state = NETREG_REGISTERED;
4725
4726         /*
4727          *      Default initial state at registry is that the
4728          *      device is present.
4729          */
4730
4731         set_bit(__LINK_STATE_PRESENT, &dev->state);
4732
4733         dev_init_scheduler(dev);
4734         dev_hold(dev);
4735         list_netdevice(dev);
4736
4737         /* Notify protocols, that a new device appeared. */
4738         ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4739         ret = notifier_to_errno(ret);
4740         if (ret) {
4741                 rollback_registered(dev);
4742                 dev->reg_state = NETREG_UNREGISTERED;
4743         }
4744
4745 out:
4746         return ret;
4747
4748 err_uninit:
4749         if (dev->netdev_ops->ndo_uninit)
4750                 dev->netdev_ops->ndo_uninit(dev);
4751         goto out;
4752 }
4753
4754 /**
4755  *      init_dummy_netdev       - init a dummy network device for NAPI
4756  *      @dev: device to init
4757  *
4758  *      This takes a network device structure and initialize the minimum
4759  *      amount of fields so it can be used to schedule NAPI polls without
4760  *      registering a full blown interface. This is to be used by drivers
4761  *      that need to tie several hardware interfaces to a single NAPI
4762  *      poll scheduler due to HW limitations.
4763  */
4764 int init_dummy_netdev(struct net_device *dev)
4765 {
4766         /* Clear everything. Note we don't initialize spinlocks
4767          * are they aren't supposed to be taken by any of the
4768          * NAPI code and this dummy netdev is supposed to be
4769          * only ever used for NAPI polls
4770          */
4771         memset(dev, 0, sizeof(struct net_device));
4772
4773         /* make sure we BUG if trying to hit standard
4774          * register/unregister code path
4775          */
4776         dev->reg_state = NETREG_DUMMY;
4777
4778         /* initialize the ref count */
4779         atomic_set(&dev->refcnt, 1);
4780
4781         /* NAPI wants this */
4782         INIT_LIST_HEAD(&dev->napi_list);
4783
4784         /* a dummy interface is started by default */
4785         set_bit(__LINK_STATE_PRESENT, &dev->state);
4786         set_bit(__LINK_STATE_START, &dev->state);
4787
4788         return 0;
4789 }
4790 EXPORT_SYMBOL_GPL(init_dummy_netdev);
4791
4792
4793 /**
4794  *      register_netdev - register a network device
4795  *      @dev: device to register
4796  *
4797  *      Take a completed network device structure and add it to the kernel
4798  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4799  *      chain. 0 is returned on success. A negative errno code is returned
4800  *      on a failure to set up the device, or if the name is a duplicate.
4801  *
4802  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
4803  *      and expands the device name if you passed a format string to
4804  *      alloc_netdev.
4805  */
4806 int register_netdev(struct net_device *dev)
4807 {
4808         int err;
4809
4810         rtnl_lock();
4811
4812         /*
4813          * If the name is a format string the caller wants us to do a
4814          * name allocation.
4815          */
4816         if (strchr(dev->name, '%')) {
4817                 err = dev_alloc_name(dev, dev->name);
4818                 if (err < 0)
4819                         goto out;
4820         }
4821
4822         err = register_netdevice(dev);
4823 out:
4824         rtnl_unlock();
4825         return err;
4826 }
4827 EXPORT_SYMBOL(register_netdev);
4828
4829 /*
4830  * netdev_wait_allrefs - wait until all references are gone.
4831  *
4832  * This is called when unregistering network devices.
4833  *
4834  * Any protocol or device that holds a reference should register
4835  * for netdevice notification, and cleanup and put back the
4836  * reference if they receive an UNREGISTER event.
4837  * We can get stuck here if buggy protocols don't correctly
4838  * call dev_put.
4839  */
4840 static void netdev_wait_allrefs(struct net_device *dev)
4841 {
4842         unsigned long rebroadcast_time, warning_time;
4843
4844         rebroadcast_time = warning_time = jiffies;
4845         while (atomic_read(&dev->refcnt) != 0) {
4846                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4847                         rtnl_lock();
4848
4849                         /* Rebroadcast unregister notification */
4850                         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4851
4852                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4853                                      &dev->state)) {
4854                                 /* We must not have linkwatch events
4855                                  * pending on unregister. If this
4856                                  * happens, we simply run the queue
4857                                  * unscheduled, resulting in a noop
4858                                  * for this device.
4859                                  */
4860                                 linkwatch_run_queue();
4861                         }
4862
4863                         __rtnl_unlock();
4864
4865                         rebroadcast_time = jiffies;
4866                 }
4867
4868                 msleep(250);
4869
4870                 if (time_after(jiffies, warning_time + 10 * HZ)) {
4871                         printk(KERN_EMERG "unregister_netdevice: "
4872                                "waiting for %s to become free. Usage "
4873                                "count = %d\n",
4874                                dev->name, atomic_read(&dev->refcnt));
4875                         warning_time = jiffies;
4876                 }
4877         }
4878 }
4879
4880 /* The sequence is:
4881  *
4882  *      rtnl_lock();
4883  *      ...
4884  *      register_netdevice(x1);
4885  *      register_netdevice(x2);
4886  *      ...
4887  *      unregister_netdevice(y1);
4888  *      unregister_netdevice(y2);
4889  *      ...
4890  *      rtnl_unlock();
4891  *      free_netdev(y1);
4892  *      free_netdev(y2);
4893  *
4894  * We are invoked by rtnl_unlock().
4895  * This allows us to deal with problems:
4896  * 1) We can delete sysfs objects which invoke hotplug
4897  *    without deadlocking with linkwatch via keventd.
4898  * 2) Since we run with the RTNL semaphore not held, we can sleep
4899  *    safely in order to wait for the netdev refcnt to drop to zero.
4900  *
4901  * We must not return until all unregister events added during
4902  * the interval the lock was held have been completed.
4903  */
4904 void netdev_run_todo(void)
4905 {
4906         struct list_head list;
4907
4908         /* Snapshot list, allow later requests */
4909         list_replace_init(&net_todo_list, &list);
4910
4911         __rtnl_unlock();
4912
4913         while (!list_empty(&list)) {
4914                 struct net_device *dev
4915                         = list_entry(list.next, struct net_device, todo_list);
4916                 list_del(&dev->todo_list);
4917
4918                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4919                         printk(KERN_ERR "network todo '%s' but state %d\n",
4920                                dev->name, dev->reg_state);
4921                         dump_stack();
4922                         continue;
4923                 }
4924
4925                 dev->reg_state = NETREG_UNREGISTERED;
4926
4927                 on_each_cpu(flush_backlog, dev, 1);
4928
4929                 netdev_wait_allrefs(dev);
4930
4931                 /* paranoia */
4932                 BUG_ON(atomic_read(&dev->refcnt));
4933                 WARN_ON(dev->ip_ptr);
4934                 WARN_ON(dev->ip6_ptr);
4935                 WARN_ON(dev->dn_ptr);
4936
4937                 if (dev->destructor)
4938                         dev->destructor(dev);
4939
4940                 /* Free network device */
4941                 kobject_put(&dev->dev.kobj);
4942         }
4943 }
4944
4945 /**
4946  *      dev_get_stats   - get network device statistics
4947  *      @dev: device to get statistics from
4948  *
4949  *      Get network statistics from device. The device driver may provide
4950  *      its own method by setting dev->netdev_ops->get_stats; otherwise
4951  *      the internal statistics structure is used.
4952  */
4953 const struct net_device_stats *dev_get_stats(struct net_device *dev)
4954 {
4955         const struct net_device_ops *ops = dev->netdev_ops;
4956
4957         if (ops->ndo_get_stats)
4958                 return ops->ndo_get_stats(dev);
4959         else {
4960                 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
4961                 struct net_device_stats *stats = &dev->stats;
4962                 unsigned int i;
4963                 struct netdev_queue *txq;
4964
4965                 for (i = 0; i < dev->num_tx_queues; i++) {
4966                         txq = netdev_get_tx_queue(dev, i);
4967                         tx_bytes   += txq->tx_bytes;
4968                         tx_packets += txq->tx_packets;
4969                         tx_dropped += txq->tx_dropped;
4970                 }
4971                 if (tx_bytes || tx_packets || tx_dropped) {
4972                         stats->tx_bytes   = tx_bytes;
4973                         stats->tx_packets = tx_packets;
4974                         stats->tx_dropped = tx_dropped;
4975                 }
4976                 return stats;
4977         }
4978 }
4979 EXPORT_SYMBOL(dev_get_stats);
4980
4981 static void netdev_init_one_queue(struct net_device *dev,
4982                                   struct netdev_queue *queue,
4983                                   void *_unused)
4984 {
4985         queue->dev = dev;
4986 }
4987
4988 static void netdev_init_queues(struct net_device *dev)
4989 {
4990         netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4991         netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4992         spin_lock_init(&dev->tx_global_lock);
4993 }
4994
4995 /**
4996  *      alloc_netdev_mq - allocate network device
4997  *      @sizeof_priv:   size of private data to allocate space for
4998  *      @name:          device name format string
4999  *      @setup:         callback to initialize device
5000  *      @queue_count:   the number of subqueues to allocate
5001  *
5002  *      Allocates a struct net_device with private data area for driver use
5003  *      and performs basic initialization.  Also allocates subquue structs
5004  *      for each queue on the device at the end of the netdevice.
5005  */
5006 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5007                 void (*setup)(struct net_device *), unsigned int queue_count)
5008 {
5009         struct netdev_queue *tx;
5010         struct net_device *dev;
5011         size_t alloc_size;
5012         void *p;
5013
5014         BUG_ON(strlen(name) >= sizeof(dev->name));
5015
5016         alloc_size = sizeof(struct net_device);
5017         if (sizeof_priv) {
5018                 /* ensure 32-byte alignment of private area */
5019                 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
5020                 alloc_size += sizeof_priv;
5021         }
5022         /* ensure 32-byte alignment of whole construct */
5023         alloc_size += NETDEV_ALIGN_CONST;
5024
5025         p = kzalloc(alloc_size, GFP_KERNEL);
5026         if (!p) {
5027                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5028                 return NULL;
5029         }
5030
5031         tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5032         if (!tx) {
5033                 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5034                        "tx qdiscs.\n");
5035                 goto free_p;
5036         }
5037
5038         dev = (struct net_device *)
5039                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
5040         dev->padded = (char *)dev - (char *)p;
5041
5042         if (dev_addr_init(dev))
5043                 goto free_tx;
5044
5045         dev_net_set(dev, &init_net);
5046
5047         dev->_tx = tx;
5048         dev->num_tx_queues = queue_count;
5049         dev->real_num_tx_queues = queue_count;
5050
5051         dev->gso_max_size = GSO_MAX_SIZE;
5052
5053         netdev_init_queues(dev);
5054
5055         INIT_LIST_HEAD(&dev->napi_list);
5056         dev->priv_flags = IFF_XMIT_DST_RELEASE;
5057         setup(dev);
5058         strcpy(dev->name, name);
5059         return dev;
5060
5061 free_tx:
5062         kfree(tx);
5063
5064 free_p:
5065         kfree(p);
5066         return NULL;
5067 }
5068 EXPORT_SYMBOL(alloc_netdev_mq);
5069
5070 /**
5071  *      free_netdev - free network device
5072  *      @dev: device
5073  *
5074  *      This function does the last stage of destroying an allocated device
5075  *      interface. The reference to the device object is released.
5076  *      If this is the last reference then it will be freed.
5077  */
5078 void free_netdev(struct net_device *dev)
5079 {
5080         struct napi_struct *p, *n;
5081
5082         release_net(dev_net(dev));
5083
5084         kfree(dev->_tx);
5085
5086         /* Flush device addresses */
5087         dev_addr_flush(dev);
5088
5089         list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5090                 netif_napi_del(p);
5091
5092         /*  Compatibility with error handling in drivers */
5093         if (dev->reg_state == NETREG_UNINITIALIZED) {
5094                 kfree((char *)dev - dev->padded);
5095                 return;
5096         }
5097
5098         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5099         dev->reg_state = NETREG_RELEASED;
5100
5101         /* will free via device release */
5102         put_device(&dev->dev);
5103 }
5104
5105 /**
5106  *      synchronize_net -  Synchronize with packet receive processing
5107  *
5108  *      Wait for packets currently being received to be done.
5109  *      Does not block later packets from starting.
5110  */
5111 void synchronize_net(void)
5112 {
5113         might_sleep();
5114         synchronize_rcu();
5115 }
5116
5117 /**
5118  *      unregister_netdevice - remove device from the kernel
5119  *      @dev: device
5120  *
5121  *      This function shuts down a device interface and removes it
5122  *      from the kernel tables.
5123  *
5124  *      Callers must hold the rtnl semaphore.  You may want
5125  *      unregister_netdev() instead of this.
5126  */
5127
5128 void unregister_netdevice(struct net_device *dev)
5129 {
5130         ASSERT_RTNL();
5131
5132         rollback_registered(dev);
5133         /* Finish processing unregister after unlock */
5134         net_set_todo(dev);
5135 }
5136
5137 /**
5138  *      unregister_netdev - remove device from the kernel
5139  *      @dev: device
5140  *
5141  *      This function shuts down a device interface and removes it
5142  *      from the kernel tables.
5143  *
5144  *      This is just a wrapper for unregister_netdevice that takes
5145  *      the rtnl semaphore.  In general you want to use this and not
5146  *      unregister_netdevice.
5147  */
5148 void unregister_netdev(struct net_device *dev)
5149 {
5150         rtnl_lock();
5151         unregister_netdevice(dev);
5152         rtnl_unlock();
5153 }
5154
5155 EXPORT_SYMBOL(unregister_netdev);
5156
5157 /**
5158  *      dev_change_net_namespace - move device to different nethost namespace
5159  *      @dev: device
5160  *      @net: network namespace
5161  *      @pat: If not NULL name pattern to try if the current device name
5162  *            is already taken in the destination network namespace.
5163  *
5164  *      This function shuts down a device interface and moves it
5165  *      to a new network namespace. On success 0 is returned, on
5166  *      a failure a netagive errno code is returned.
5167  *
5168  *      Callers must hold the rtnl semaphore.
5169  */
5170
5171 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5172 {
5173         char buf[IFNAMSIZ];
5174         const char *destname;
5175         int err;
5176
5177         ASSERT_RTNL();
5178
5179         /* Don't allow namespace local devices to be moved. */
5180         err = -EINVAL;
5181         if (dev->features & NETIF_F_NETNS_LOCAL)
5182                 goto out;
5183
5184 #ifdef CONFIG_SYSFS
5185         /* Don't allow real devices to be moved when sysfs
5186          * is enabled.
5187          */
5188         err = -EINVAL;
5189         if (dev->dev.parent)
5190                 goto out;
5191 #endif
5192
5193         /* Ensure the device has been registrered */
5194         err = -EINVAL;
5195         if (dev->reg_state != NETREG_REGISTERED)
5196                 goto out;
5197
5198         /* Get out if there is nothing todo */
5199         err = 0;
5200         if (net_eq(dev_net(dev), net))
5201                 goto out;
5202
5203         /* Pick the destination device name, and ensure
5204          * we can use it in the destination network namespace.
5205          */
5206         err = -EEXIST;
5207         destname = dev->name;
5208         if (__dev_get_by_name(net, destname)) {
5209                 /* We get here if we can't use the current device name */
5210                 if (!pat)
5211                         goto out;
5212                 if (!dev_valid_name(pat))
5213                         goto out;
5214                 if (strchr(pat, '%')) {
5215                         if (__dev_alloc_name(net, pat, buf) < 0)
5216                                 goto out;
5217                         destname = buf;
5218                 } else
5219                         destname = pat;
5220                 if (__dev_get_by_name(net, destname))
5221                         goto out;
5222         }
5223
5224         /*
5225          * And now a mini version of register_netdevice unregister_netdevice.
5226          */
5227
5228         /* If device is running close it first. */
5229         dev_close(dev);
5230
5231         /* And unlink it from device chain */
5232         err = -ENODEV;
5233         unlist_netdevice(dev);
5234
5235         synchronize_net();
5236
5237         /* Shutdown queueing discipline. */
5238         dev_shutdown(dev);
5239
5240         /* Notify protocols, that we are about to destroy
5241            this device. They should clean all the things.
5242         */
5243         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5244
5245         /*
5246          *      Flush the unicast and multicast chains
5247          */
5248         dev_addr_discard(dev);
5249
5250         netdev_unregister_kobject(dev);
5251
5252         /* Actually switch the network namespace */
5253         dev_net_set(dev, net);
5254
5255         /* Assign the new device name */
5256         if (destname != dev->name)
5257                 strcpy(dev->name, destname);
5258
5259         /* If there is an ifindex conflict assign a new one */
5260         if (__dev_get_by_index(net, dev->ifindex)) {
5261                 int iflink = (dev->iflink == dev->ifindex);
5262                 dev->ifindex = dev_new_index(net);
5263                 if (iflink)
5264                         dev->iflink = dev->ifindex;
5265         }
5266
5267         /* Fixup kobjects */
5268         err = netdev_register_kobject(dev);
5269         WARN_ON(err);
5270
5271         /* Add the device back in the hashes */
5272         list_netdevice(dev);
5273
5274         /* Notify protocols, that a new device appeared. */
5275         call_netdevice_notifiers(NETDEV_REGISTER, dev);
5276
5277         synchronize_net();
5278         err = 0;
5279 out:
5280         return err;
5281 }
5282
5283 static int dev_cpu_callback(struct notifier_block *nfb,
5284                             unsigned long action,
5285                             void *ocpu)
5286 {
5287         struct sk_buff **list_skb;
5288         struct Qdisc **list_net;
5289         struct sk_buff *skb;
5290         unsigned int cpu, oldcpu = (unsigned long)ocpu;
5291         struct softnet_data *sd, *oldsd;
5292
5293         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5294                 return NOTIFY_OK;
5295
5296         local_irq_disable();
5297         cpu = smp_processor_id();
5298         sd = &per_cpu(softnet_data, cpu);
5299         oldsd = &per_cpu(softnet_data, oldcpu);
5300
5301         /* Find end of our completion_queue. */
5302         list_skb = &sd->completion_queue;
5303         while (*list_skb)
5304                 list_skb = &(*list_skb)->next;
5305         /* Append completion queue from offline CPU. */
5306         *list_skb = oldsd->completion_queue;
5307         oldsd->completion_queue = NULL;
5308
5309         /* Find end of our output_queue. */
5310         list_net = &sd->output_queue;
5311         while (*list_net)
5312                 list_net = &(*list_net)->next_sched;
5313         /* Append output queue from offline CPU. */
5314         *list_net = oldsd->output_queue;
5315         oldsd->output_queue = NULL;
5316
5317         raise_softirq_irqoff(NET_TX_SOFTIRQ);
5318         local_irq_enable();
5319
5320         /* Process offline CPU's input_pkt_queue */
5321         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5322                 netif_rx(skb);
5323
5324         return NOTIFY_OK;
5325 }
5326
5327
5328 /**
5329  *      netdev_increment_features - increment feature set by one
5330  *      @all: current feature set
5331  *      @one: new feature set
5332  *      @mask: mask feature set
5333  *
5334  *      Computes a new feature set after adding a device with feature set
5335  *      @one to the master device with current feature set @all.  Will not
5336  *      enable anything that is off in @mask. Returns the new feature set.
5337  */
5338 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5339                                         unsigned long mask)
5340 {
5341         /* If device needs checksumming, downgrade to it. */
5342         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5343                 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5344         else if (mask & NETIF_F_ALL_CSUM) {
5345                 /* If one device supports v4/v6 checksumming, set for all. */
5346                 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5347                     !(all & NETIF_F_GEN_CSUM)) {
5348                         all &= ~NETIF_F_ALL_CSUM;
5349                         all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5350                 }
5351
5352                 /* If one device supports hw checksumming, set for all. */
5353                 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5354                         all &= ~NETIF_F_ALL_CSUM;
5355                         all |= NETIF_F_HW_CSUM;
5356                 }
5357         }
5358
5359         one |= NETIF_F_ALL_CSUM;
5360
5361         one |= all & NETIF_F_ONE_FOR_ALL;
5362         all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5363         all |= one & mask & NETIF_F_ONE_FOR_ALL;
5364
5365         return all;
5366 }
5367 EXPORT_SYMBOL(netdev_increment_features);
5368
5369 static struct hlist_head *netdev_create_hash(void)
5370 {
5371         int i;
5372         struct hlist_head *hash;
5373
5374         hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5375         if (hash != NULL)
5376                 for (i = 0; i < NETDEV_HASHENTRIES; i++)
5377                         INIT_HLIST_HEAD(&hash[i]);
5378
5379         return hash;
5380 }
5381
5382 /* Initialize per network namespace state */
5383 static int __net_init netdev_init(struct net *net)
5384 {
5385         INIT_LIST_HEAD(&net->dev_base_head);
5386
5387         net->dev_name_head = netdev_create_hash();
5388         if (net->dev_name_head == NULL)
5389                 goto err_name;
5390
5391         net->dev_index_head = netdev_create_hash();
5392         if (net->dev_index_head == NULL)
5393                 goto err_idx;
5394
5395         return 0;
5396
5397 err_idx:
5398         kfree(net->dev_name_head);
5399 err_name:
5400         return -ENOMEM;
5401 }
5402
5403 /**
5404  *      netdev_drivername - network driver for the device
5405  *      @dev: network device
5406  *      @buffer: buffer for resulting name
5407  *      @len: size of buffer
5408  *
5409  *      Determine network driver for device.
5410  */
5411 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5412 {
5413         const struct device_driver *driver;
5414         const struct device *parent;
5415
5416         if (len <= 0 || !buffer)
5417                 return buffer;
5418         buffer[0] = 0;
5419
5420         parent = dev->dev.parent;
5421
5422         if (!parent)
5423                 return buffer;
5424
5425         driver = parent->driver;
5426         if (driver && driver->name)
5427                 strlcpy(buffer, driver->name, len);
5428         return buffer;
5429 }
5430
5431 static void __net_exit netdev_exit(struct net *net)
5432 {
5433         kfree(net->dev_name_head);
5434         kfree(net->dev_index_head);
5435 }
5436
5437 static struct pernet_operations __net_initdata netdev_net_ops = {
5438         .init = netdev_init,
5439         .exit = netdev_exit,
5440 };
5441
5442 static void __net_exit default_device_exit(struct net *net)
5443 {
5444         struct net_device *dev;
5445         /*
5446          * Push all migratable of the network devices back to the
5447          * initial network namespace
5448          */
5449         rtnl_lock();
5450 restart:
5451         for_each_netdev(net, dev) {
5452                 int err;
5453                 char fb_name[IFNAMSIZ];
5454
5455                 /* Ignore unmoveable devices (i.e. loopback) */
5456                 if (dev->features & NETIF_F_NETNS_LOCAL)
5457                         continue;
5458
5459                 /* Delete virtual devices */
5460                 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5461                         dev->rtnl_link_ops->dellink(dev);
5462                         goto restart;
5463                 }
5464
5465                 /* Push remaing network devices to init_net */
5466                 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5467                 err = dev_change_net_namespace(dev, &init_net, fb_name);
5468                 if (err) {
5469                         printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5470                                 __func__, dev->name, err);
5471                         BUG();
5472                 }
5473                 goto restart;
5474         }
5475         rtnl_unlock();
5476 }
5477
5478 static struct pernet_operations __net_initdata default_device_ops = {
5479         .exit = default_device_exit,
5480 };
5481
5482 /*
5483  *      Initialize the DEV module. At boot time this walks the device list and
5484  *      unhooks any devices that fail to initialise (normally hardware not
5485  *      present) and leaves us with a valid list of present and active devices.
5486  *
5487  */
5488
5489 /*
5490  *       This is called single threaded during boot, so no need
5491  *       to take the rtnl semaphore.
5492  */
5493 static int __init net_dev_init(void)
5494 {
5495         int i, rc = -ENOMEM;
5496
5497         BUG_ON(!dev_boot_phase);
5498
5499         if (dev_proc_init())
5500                 goto out;
5501
5502         if (netdev_kobject_init())
5503                 goto out;
5504
5505         INIT_LIST_HEAD(&ptype_all);
5506         for (i = 0; i < PTYPE_HASH_SIZE; i++)
5507                 INIT_LIST_HEAD(&ptype_base[i]);
5508
5509         if (register_pernet_subsys(&netdev_net_ops))
5510                 goto out;
5511
5512         /*
5513          *      Initialise the packet receive queues.
5514          */
5515
5516         for_each_possible_cpu(i) {
5517                 struct softnet_data *queue;
5518
5519                 queue = &per_cpu(softnet_data, i);
5520                 skb_queue_head_init(&queue->input_pkt_queue);
5521                 queue->completion_queue = NULL;
5522                 INIT_LIST_HEAD(&queue->poll_list);
5523
5524                 queue->backlog.poll = process_backlog;
5525                 queue->backlog.weight = weight_p;
5526                 queue->backlog.gro_list = NULL;
5527                 queue->backlog.gro_count = 0;
5528         }
5529
5530         dev_boot_phase = 0;
5531
5532         /* The loopback device is special if any other network devices
5533          * is present in a network namespace the loopback device must
5534          * be present. Since we now dynamically allocate and free the
5535          * loopback device ensure this invariant is maintained by
5536          * keeping the loopback device as the first device on the
5537          * list of network devices.  Ensuring the loopback devices
5538          * is the first device that appears and the last network device
5539          * that disappears.
5540          */
5541         if (register_pernet_device(&loopback_net_ops))
5542                 goto out;
5543
5544         if (register_pernet_device(&default_device_ops))
5545                 goto out;
5546
5547         open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5548         open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5549
5550         hotcpu_notifier(dev_cpu_callback, 0);
5551         dst_init();
5552         dev_mcast_init();
5553         rc = 0;
5554 out:
5555         return rc;
5556 }
5557
5558 subsys_initcall(net_dev_init);
5559
5560 static int __init initialize_hashrnd(void)
5561 {
5562         get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5563         return 0;
5564 }
5565
5566 late_initcall_sync(initialize_hashrnd);
5567
5568 EXPORT_SYMBOL(__dev_get_by_index);
5569 EXPORT_SYMBOL(__dev_get_by_name);
5570 EXPORT_SYMBOL(__dev_remove_pack);
5571 EXPORT_SYMBOL(dev_valid_name);
5572 EXPORT_SYMBOL(dev_add_pack);
5573 EXPORT_SYMBOL(dev_alloc_name);
5574 EXPORT_SYMBOL(dev_close);
5575 EXPORT_SYMBOL(dev_get_by_flags);
5576 EXPORT_SYMBOL(dev_get_by_index);
5577 EXPORT_SYMBOL(dev_get_by_name);
5578 EXPORT_SYMBOL(dev_open);
5579 EXPORT_SYMBOL(dev_queue_xmit);
5580 EXPORT_SYMBOL(dev_remove_pack);
5581 EXPORT_SYMBOL(dev_set_allmulti);
5582 EXPORT_SYMBOL(dev_set_promiscuity);
5583 EXPORT_SYMBOL(dev_change_flags);
5584 EXPORT_SYMBOL(dev_set_mtu);
5585 EXPORT_SYMBOL(dev_set_mac_address);
5586 EXPORT_SYMBOL(free_netdev);
5587 EXPORT_SYMBOL(netdev_boot_setup_check);
5588 EXPORT_SYMBOL(netdev_set_master);
5589 EXPORT_SYMBOL(netdev_state_change);
5590 EXPORT_SYMBOL(netif_receive_skb);
5591 EXPORT_SYMBOL(netif_rx);
5592 EXPORT_SYMBOL(register_gifconf);
5593 EXPORT_SYMBOL(register_netdevice);
5594 EXPORT_SYMBOL(register_netdevice_notifier);
5595 EXPORT_SYMBOL(skb_checksum_help);
5596 EXPORT_SYMBOL(synchronize_net);
5597 EXPORT_SYMBOL(unregister_netdevice);
5598 EXPORT_SYMBOL(unregister_netdevice_notifier);
5599 EXPORT_SYMBOL(net_enable_timestamp);
5600 EXPORT_SYMBOL(net_disable_timestamp);
5601 EXPORT_SYMBOL(dev_get_flags);
5602
5603 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5604 EXPORT_SYMBOL(br_handle_frame_hook);
5605 EXPORT_SYMBOL(br_fdb_get_hook);
5606 EXPORT_SYMBOL(br_fdb_put_hook);
5607 #endif
5608
5609 EXPORT_SYMBOL(dev_load);
5610
5611 EXPORT_PER_CPU_SYMBOL(softnet_data);