2  * Common framework for low-level network console, dump, and debugger code
 
   4  * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 
   6  * based on the netconsole code from:
 
   8  * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 
   9  * Copyright (C) 2002  Red Hat, Inc.
 
  12 #include <linux/smp_lock.h>
 
  13 #include <linux/netdevice.h>
 
  14 #include <linux/etherdevice.h>
 
  15 #include <linux/string.h>
 
  16 #include <linux/if_arp.h>
 
  17 #include <linux/inetdevice.h>
 
  18 #include <linux/inet.h>
 
  19 #include <linux/interrupt.h>
 
  20 #include <linux/netpoll.h>
 
  21 #include <linux/sched.h>
 
  22 #include <linux/delay.h>
 
  23 #include <linux/rcupdate.h>
 
  24 #include <linux/workqueue.h>
 
  27 #include <asm/unaligned.h>
 
  30  * We maintain a small pool of fully-sized skbs, to make sure the
 
  31  * message gets out even in extreme OOM situations.
 
  34 #define MAX_UDP_CHUNK 1460
 
  36 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
 
  37 #define MAX_RETRIES 20000
 
  39 static DEFINE_SPINLOCK(skb_list_lock);
 
  41 static struct sk_buff *skbs;
 
  43 static DEFINE_SPINLOCK(queue_lock);
 
  44 static int queue_depth;
 
  45 static struct sk_buff *queue_head, *queue_tail;
 
  47 static atomic_t trapped;
 
  49 #define NETPOLL_RX_ENABLED  1
 
  50 #define NETPOLL_RX_DROP     2
 
  52 #define MAX_SKB_SIZE \
 
  53                 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
 
  54                                 sizeof(struct iphdr) + sizeof(struct ethhdr))
 
  56 static void zap_completion_queue(void);
 
  58 static void queue_process(void *p)
 
  64                 spin_lock_irqsave(&queue_lock, flags);
 
  67                 queue_head = skb->next;
 
  68                 if (skb == queue_tail)
 
  73                 spin_unlock_irqrestore(&queue_lock, flags);
 
  79 static DECLARE_WORK(send_queue, queue_process, NULL);
 
  81 void netpoll_queue(struct sk_buff *skb)
 
  85         if (queue_depth == MAX_QUEUE_DEPTH) {
 
  90         spin_lock_irqsave(&queue_lock, flags);
 
  94                 queue_tail->next = skb;
 
  97         spin_unlock_irqrestore(&queue_lock, flags);
 
  99         schedule_work(&send_queue);
 
 102 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 
 103                              unsigned short ulen, u32 saddr, u32 daddr)
 
 107         if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
 
 110         psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
 
 112         if (skb->ip_summed == CHECKSUM_HW &&
 
 113             !(u16)csum_fold(csum_add(psum, skb->csum)))
 
 118         return __skb_checksum_complete(skb);
 
 122  * Check whether delayed processing was scheduled for our NIC. If so,
 
 123  * we attempt to grab the poll lock and use ->poll() to pump the card.
 
 124  * If this fails, either we've recursed in ->poll() or it's already
 
 125  * running on another CPU.
 
 127  * Note: we don't mask interrupts with this lock because we're using
 
 128  * trylock here and interrupts are already disabled in the softirq
 
 129  * case. Further, we test the poll_owner to avoid recursion on UP
 
 130  * systems where the lock doesn't exist.
 
 132  * In cases where there is bi-directional communications, reading only
 
 133  * one message at a time can lead to packets being dropped by the
 
 134  * network adapter, forcing superfluous retries and possibly timeouts.
 
 135  * Thus, we set our budget to greater than 1.
 
 137 static void poll_napi(struct netpoll *np)
 
 139         struct netpoll_info *npinfo = np->dev->npinfo;
 
 142         if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
 
 143             npinfo->poll_owner != smp_processor_id() &&
 
 144             spin_trylock(&npinfo->poll_lock)) {
 
 145                 npinfo->rx_flags |= NETPOLL_RX_DROP;
 
 146                 atomic_inc(&trapped);
 
 148                 np->dev->poll(np->dev, &budget);
 
 150                 atomic_dec(&trapped);
 
 151                 npinfo->rx_flags &= ~NETPOLL_RX_DROP;
 
 152                 spin_unlock(&npinfo->poll_lock);
 
 156 void netpoll_poll(struct netpoll *np)
 
 158         if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
 
 161         /* Process pending work on NIC */
 
 162         np->dev->poll_controller(np->dev);
 
 166         zap_completion_queue();
 
 169 static void refill_skbs(void)
 
 174         spin_lock_irqsave(&skb_list_lock, flags);
 
 175         while (nr_skbs < MAX_SKBS) {
 
 176                 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
 
 184         spin_unlock_irqrestore(&skb_list_lock, flags);
 
 187 static void zap_completion_queue(void)
 
 190         struct softnet_data *sd = &get_cpu_var(softnet_data);
 
 192         if (sd->completion_queue) {
 
 193                 struct sk_buff *clist;
 
 195                 local_irq_save(flags);
 
 196                 clist = sd->completion_queue;
 
 197                 sd->completion_queue = NULL;
 
 198                 local_irq_restore(flags);
 
 200                 while (clist != NULL) {
 
 201                         struct sk_buff *skb = clist;
 
 204                                 dev_kfree_skb_any(skb); /* put this one back */
 
 210         put_cpu_var(softnet_data);
 
 213 static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
 
 215         int once = 1, count = 0;
 
 217         struct sk_buff *skb = NULL;
 
 219         zap_completion_queue();
 
 221         if (nr_skbs < MAX_SKBS)
 
 224         skb = alloc_skb(len, GFP_ATOMIC);
 
 227                 spin_lock_irqsave(&skb_list_lock, flags);
 
 234                 spin_unlock_irqrestore(&skb_list_lock, flags);
 
 239                 if (once && (count == 1000000)) {
 
 240                         printk("out of netpoll skbs!\n");
 
 247         atomic_set(&skb->users, 1);
 
 248         skb_reserve(skb, reserve);
 
 252 static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
 255         struct netpoll_info *npinfo;
 
 257         if (!np || !np->dev || !netif_running(np->dev)) {
 
 262         npinfo = np->dev->npinfo;
 
 264         /* avoid recursion */
 
 265         if (npinfo->poll_owner == smp_processor_id() ||
 
 266             np->dev->xmit_lock_owner == smp_processor_id()) {
 
 276                 spin_lock(&np->dev->xmit_lock);
 
 277                 np->dev->xmit_lock_owner = smp_processor_id();
 
 280                  * network drivers do not expect to be called if the queue is
 
 283                 if (netif_queue_stopped(np->dev)) {
 
 284                         np->dev->xmit_lock_owner = -1;
 
 285                         spin_unlock(&np->dev->xmit_lock);
 
 291                 status = np->dev->hard_start_xmit(skb, np->dev);
 
 292                 np->dev->xmit_lock_owner = -1;
 
 293                 spin_unlock(&np->dev->xmit_lock);
 
 297                         npinfo->tries = MAX_RETRIES; /* reset */
 
 304         } while (npinfo->tries > 0);
 
 307 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 
 309         int total_len, eth_len, ip_len, udp_len;
 
 315         udp_len = len + sizeof(*udph);
 
 316         ip_len = eth_len = udp_len + sizeof(*iph);
 
 317         total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
 
 319         skb = find_skb(np, total_len, total_len - len);
 
 323         memcpy(skb->data, msg, len);
 
 326         udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
 
 327         udph->source = htons(np->local_port);
 
 328         udph->dest = htons(np->remote_port);
 
 329         udph->len = htons(udp_len);
 
 332         iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
 
 334         /* iph->version = 4; iph->ihl = 5; */
 
 335         put_unaligned(0x45, (unsigned char *)iph);
 
 337         put_unaligned(htons(ip_len), &(iph->tot_len));
 
 341         iph->protocol = IPPROTO_UDP;
 
 343         put_unaligned(htonl(np->local_ip), &(iph->saddr));
 
 344         put_unaligned(htonl(np->remote_ip), &(iph->daddr));
 
 345         iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
 
 347         eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
 
 349         eth->h_proto = htons(ETH_P_IP);
 
 350         memcpy(eth->h_source, np->local_mac, 6);
 
 351         memcpy(eth->h_dest, np->remote_mac, 6);
 
 355         netpoll_send_skb(np, skb);
 
 358 static void arp_reply(struct sk_buff *skb)
 
 360         struct netpoll_info *npinfo = skb->dev->npinfo;
 
 362         unsigned char *arp_ptr;
 
 363         int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
 
 365         struct sk_buff *send_skb;
 
 366         struct netpoll *np = NULL;
 
 368         if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
 
 373         /* No arp on this interface */
 
 374         if (skb->dev->flags & IFF_NOARP)
 
 377         if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
 
 378                                  (2 * skb->dev->addr_len) +
 
 382         skb->h.raw = skb->nh.raw = skb->data;
 
 385         if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
 
 386              arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
 
 387             arp->ar_pro != htons(ETH_P_IP) ||
 
 388             arp->ar_op != htons(ARPOP_REQUEST))
 
 391         arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
 
 392         memcpy(&sip, arp_ptr, 4);
 
 393         arp_ptr += 4 + skb->dev->addr_len;
 
 394         memcpy(&tip, arp_ptr, 4);
 
 396         /* Should we ignore arp? */
 
 397         if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
 
 400         size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
 
 401         send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
 
 402                             LL_RESERVED_SPACE(np->dev));
 
 407         send_skb->nh.raw = send_skb->data;
 
 408         arp = (struct arphdr *) skb_put(send_skb, size);
 
 409         send_skb->dev = skb->dev;
 
 410         send_skb->protocol = htons(ETH_P_ARP);
 
 412         /* Fill the device header for the ARP frame */
 
 414         if (np->dev->hard_header &&
 
 415             np->dev->hard_header(send_skb, skb->dev, ptype,
 
 416                                        np->remote_mac, np->local_mac,
 
 417                                        send_skb->len) < 0) {
 
 423          * Fill out the arp protocol part.
 
 425          * we only support ethernet device type,
 
 426          * which (according to RFC 1390) should always equal 1 (Ethernet).
 
 429         arp->ar_hrd = htons(np->dev->type);
 
 430         arp->ar_pro = htons(ETH_P_IP);
 
 431         arp->ar_hln = np->dev->addr_len;
 
 433         arp->ar_op = htons(type);
 
 435         arp_ptr=(unsigned char *)(arp + 1);
 
 436         memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
 
 437         arp_ptr += np->dev->addr_len;
 
 438         memcpy(arp_ptr, &tip, 4);
 
 440         memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
 
 441         arp_ptr += np->dev->addr_len;
 
 442         memcpy(arp_ptr, &sip, 4);
 
 444         netpoll_send_skb(np, send_skb);
 
 447 int __netpoll_rx(struct sk_buff *skb)
 
 449         int proto, len, ulen;
 
 452         struct netpoll *np = skb->dev->npinfo->rx_np;
 
 456         if (skb->dev->type != ARPHRD_ETHER)
 
 459         /* check if netpoll clients need ARP */
 
 460         if (skb->protocol == __constant_htons(ETH_P_ARP) &&
 
 461             atomic_read(&trapped)) {
 
 466         proto = ntohs(eth_hdr(skb)->h_proto);
 
 467         if (proto != ETH_P_IP)
 
 469         if (skb->pkt_type == PACKET_OTHERHOST)
 
 474         iph = (struct iphdr *)skb->data;
 
 475         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 
 477         if (iph->ihl < 5 || iph->version != 4)
 
 479         if (!pskb_may_pull(skb, iph->ihl*4))
 
 481         if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
 
 484         len = ntohs(iph->tot_len);
 
 485         if (skb->len < len || len < iph->ihl*4)
 
 488         if (iph->protocol != IPPROTO_UDP)
 
 492         uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
 
 493         ulen = ntohs(uh->len);
 
 497         if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
 
 499         if (np->local_ip && np->local_ip != ntohl(iph->daddr))
 
 501         if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
 
 503         if (np->local_port && np->local_port != ntohs(uh->dest))
 
 506         np->rx_hook(np, ntohs(uh->source),
 
 508                     ulen - sizeof(struct udphdr));
 
 514         if (atomic_read(&trapped)) {
 
 522 int netpoll_parse_options(struct netpoll *np, char *opt)
 
 524         char *cur=opt, *delim;
 
 527                 if ((delim = strchr(cur, '@')) == NULL)
 
 530                 np->local_port=simple_strtol(cur, NULL, 10);
 
 534         printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
 
 537                 if ((delim = strchr(cur, '/')) == NULL)
 
 540                 np->local_ip=ntohl(in_aton(cur));
 
 543                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
 
 544                        np->name, HIPQUAD(np->local_ip));
 
 549                 /* parse out dev name */
 
 550                 if ((delim = strchr(cur, ',')) == NULL)
 
 553                 strlcpy(np->dev_name, cur, sizeof(np->dev_name));
 
 558         printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
 
 562                 if ((delim = strchr(cur, '@')) == NULL)
 
 565                 np->remote_port=simple_strtol(cur, NULL, 10);
 
 569         printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
 
 572         if ((delim = strchr(cur, '/')) == NULL)
 
 575         np->remote_ip=ntohl(in_aton(cur));
 
 578         printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
 
 579                        np->name, HIPQUAD(np->remote_ip));
 
 584                 if ((delim = strchr(cur, ':')) == NULL)
 
 587                 np->remote_mac[0]=simple_strtol(cur, NULL, 16);
 
 589                 if ((delim = strchr(cur, ':')) == NULL)
 
 592                 np->remote_mac[1]=simple_strtol(cur, NULL, 16);
 
 594                 if ((delim = strchr(cur, ':')) == NULL)
 
 597                 np->remote_mac[2]=simple_strtol(cur, NULL, 16);
 
 599                 if ((delim = strchr(cur, ':')) == NULL)
 
 602                 np->remote_mac[3]=simple_strtol(cur, NULL, 16);
 
 604                 if ((delim = strchr(cur, ':')) == NULL)
 
 607                 np->remote_mac[4]=simple_strtol(cur, NULL, 16);
 
 609                 np->remote_mac[5]=simple_strtol(cur, NULL, 16);
 
 612         printk(KERN_INFO "%s: remote ethernet address "
 
 613                "%02x:%02x:%02x:%02x:%02x:%02x\n",
 
 625         printk(KERN_INFO "%s: couldn't parse config at %s!\n",
 
 630 int netpoll_setup(struct netpoll *np)
 
 632         struct net_device *ndev = NULL;
 
 633         struct in_device *in_dev;
 
 634         struct netpoll_info *npinfo;
 
 638                 ndev = dev_get_by_name(np->dev_name);
 
 640                 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
 
 641                        np->name, np->dev_name);
 
 647                 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
 
 651                 npinfo->rx_flags = 0;
 
 652                 npinfo->rx_np = NULL;
 
 653                 spin_lock_init(&npinfo->poll_lock);
 
 654                 npinfo->poll_owner = -1;
 
 655                 npinfo->tries = MAX_RETRIES;
 
 656                 spin_lock_init(&npinfo->rx_lock);
 
 658                 npinfo = ndev->npinfo;
 
 660         if (!ndev->poll_controller) {
 
 661                 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 
 662                        np->name, np->dev_name);
 
 666         if (!netif_running(ndev)) {
 
 667                 unsigned long atmost, atleast;
 
 669                 printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
 
 670                        np->name, np->dev_name);
 
 673                 if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) {
 
 674                         printk(KERN_ERR "%s: failed to open %s\n",
 
 675                                np->name, np->dev_name);
 
 681                 atleast = jiffies + HZ/10;
 
 682                 atmost = jiffies + 4*HZ;
 
 683                 while (!netif_carrier_ok(ndev)) {
 
 684                         if (time_after(jiffies, atmost)) {
 
 686                                        "%s: timeout waiting for carrier\n",
 
 693                 /* If carrier appears to come up instantly, we don't
 
 694                  * trust it and pause so that we don't pump all our
 
 695                  * queued console messages into the bitbucket.
 
 698                 if (time_before(jiffies, atleast)) {
 
 699                         printk(KERN_NOTICE "%s: carrier detect appears"
 
 700                                " untrustworthy, waiting 4 seconds\n",
 
 706         if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
 
 707                 memcpy(np->local_mac, ndev->dev_addr, 6);
 
 711                 in_dev = __in_dev_get_rcu(ndev);
 
 713                 if (!in_dev || !in_dev->ifa_list) {
 
 715                         printk(KERN_ERR "%s: no IP address for %s, aborting\n",
 
 716                                np->name, np->dev_name);
 
 720                 np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
 
 722                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
 
 723                        np->name, HIPQUAD(np->local_ip));
 
 727                 spin_lock_irqsave(&npinfo->rx_lock, flags);
 
 728                 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
 
 730                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
 733         /* fill up the skb queue */
 
 736         /* last thing to do is link it to the net device structure */
 
 737         ndev->npinfo = npinfo;
 
 739         /* avoid racing with NAPI reading npinfo */
 
 752 void netpoll_cleanup(struct netpoll *np)
 
 754         struct netpoll_info *npinfo;
 
 758                 npinfo = np->dev->npinfo;
 
 759                 if (npinfo && npinfo->rx_np == np) {
 
 760                         spin_lock_irqsave(&npinfo->rx_lock, flags);
 
 761                         npinfo->rx_np = NULL;
 
 762                         npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
 
 763                         spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
 771 int netpoll_trap(void)
 
 773         return atomic_read(&trapped);
 
 776 void netpoll_set_trap(int trap)
 
 779                 atomic_inc(&trapped);
 
 781                 atomic_dec(&trapped);
 
 784 EXPORT_SYMBOL(netpoll_set_trap);
 
 785 EXPORT_SYMBOL(netpoll_trap);
 
 786 EXPORT_SYMBOL(netpoll_parse_options);
 
 787 EXPORT_SYMBOL(netpoll_setup);
 
 788 EXPORT_SYMBOL(netpoll_cleanup);
 
 789 EXPORT_SYMBOL(netpoll_send_udp);
 
 790 EXPORT_SYMBOL(netpoll_poll);
 
 791 EXPORT_SYMBOL(netpoll_queue);