2  * Common framework for low-level network console, dump, and debugger code
 
   4  * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 
   6  * based on the netconsole code from:
 
   8  * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 
   9  * Copyright (C) 2002  Red Hat, Inc.
 
  12 #include <linux/smp_lock.h>
 
  13 #include <linux/netdevice.h>
 
  14 #include <linux/etherdevice.h>
 
  15 #include <linux/string.h>
 
  16 #include <linux/if_arp.h>
 
  17 #include <linux/inetdevice.h>
 
  18 #include <linux/inet.h>
 
  19 #include <linux/interrupt.h>
 
  20 #include <linux/netpoll.h>
 
  21 #include <linux/sched.h>
 
  22 #include <linux/delay.h>
 
  23 #include <linux/rcupdate.h>
 
  24 #include <linux/workqueue.h>
 
  27 #include <asm/unaligned.h>
 
  30  * We maintain a small pool of fully-sized skbs, to make sure the
 
  31  * message gets out even in extreme OOM situations.
 
  34 #define MAX_UDP_CHUNK 1460
 
  36 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
 
  37 #define MAX_RETRIES 20000
 
  39 static DEFINE_SPINLOCK(skb_list_lock);
 
  41 static struct sk_buff *skbs;
 
  43 static DEFINE_SPINLOCK(queue_lock);
 
  44 static int queue_depth;
 
  45 static struct sk_buff *queue_head, *queue_tail;
 
  47 static atomic_t trapped;
 
  49 #define NETPOLL_RX_ENABLED  1
 
  50 #define NETPOLL_RX_DROP     2
 
  52 #define MAX_SKB_SIZE \
 
  53                 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
 
  54                                 sizeof(struct iphdr) + sizeof(struct ethhdr))
 
  56 static void zap_completion_queue(void);
 
  57 static void arp_reply(struct sk_buff *skb);
 
  59 static void queue_process(void *p)
 
  65                 spin_lock_irqsave(&queue_lock, flags);
 
  68                 queue_head = skb->next;
 
  69                 if (skb == queue_tail)
 
  74                 spin_unlock_irqrestore(&queue_lock, flags);
 
  80 static DECLARE_WORK(send_queue, queue_process, NULL);
 
  82 void netpoll_queue(struct sk_buff *skb)
 
  86         if (queue_depth == MAX_QUEUE_DEPTH) {
 
  91         spin_lock_irqsave(&queue_lock, flags);
 
  95                 queue_tail->next = skb;
 
  98         spin_unlock_irqrestore(&queue_lock, flags);
 
 100         schedule_work(&send_queue);
 
 103 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 
 104                              unsigned short ulen, u32 saddr, u32 daddr)
 
 108         if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
 
 111         psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
 
 113         if (skb->ip_summed == CHECKSUM_HW &&
 
 114             !(u16)csum_fold(csum_add(psum, skb->csum)))
 
 119         return __skb_checksum_complete(skb);
 
 123  * Check whether delayed processing was scheduled for our NIC. If so,
 
 124  * we attempt to grab the poll lock and use ->poll() to pump the card.
 
 125  * If this fails, either we've recursed in ->poll() or it's already
 
 126  * running on another CPU.
 
 128  * Note: we don't mask interrupts with this lock because we're using
 
 129  * trylock here and interrupts are already disabled in the softirq
 
 130  * case. Further, we test the poll_owner to avoid recursion on UP
 
 131  * systems where the lock doesn't exist.
 
 133  * In cases where there is bi-directional communications, reading only
 
 134  * one message at a time can lead to packets being dropped by the
 
 135  * network adapter, forcing superfluous retries and possibly timeouts.
 
 136  * Thus, we set our budget to greater than 1.
 
 138 static void poll_napi(struct netpoll *np)
 
 140         struct netpoll_info *npinfo = np->dev->npinfo;
 
 143         if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
 
 144             npinfo->poll_owner != smp_processor_id() &&
 
 145             spin_trylock(&npinfo->poll_lock)) {
 
 146                 npinfo->rx_flags |= NETPOLL_RX_DROP;
 
 147                 atomic_inc(&trapped);
 
 149                 np->dev->poll(np->dev, &budget);
 
 151                 atomic_dec(&trapped);
 
 152                 npinfo->rx_flags &= ~NETPOLL_RX_DROP;
 
 153                 spin_unlock(&npinfo->poll_lock);
 
 157 static void service_arp_queue(struct netpoll_info *npi)
 
 164         skb = skb_dequeue(&npi->arp_tx);
 
 166         while (skb != NULL) {
 
 168                 skb = skb_dequeue(&npi->arp_tx);
 
 173 void netpoll_poll(struct netpoll *np)
 
 175         if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
 
 178         /* Process pending work on NIC */
 
 179         np->dev->poll_controller(np->dev);
 
 183         service_arp_queue(np->dev->npinfo);
 
 185         zap_completion_queue();
 
 188 static void refill_skbs(void)
 
 193         spin_lock_irqsave(&skb_list_lock, flags);
 
 194         while (nr_skbs < MAX_SKBS) {
 
 195                 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
 
 203         spin_unlock_irqrestore(&skb_list_lock, flags);
 
 206 static void zap_completion_queue(void)
 
 209         struct softnet_data *sd = &get_cpu_var(softnet_data);
 
 211         if (sd->completion_queue) {
 
 212                 struct sk_buff *clist;
 
 214                 local_irq_save(flags);
 
 215                 clist = sd->completion_queue;
 
 216                 sd->completion_queue = NULL;
 
 217                 local_irq_restore(flags);
 
 219                 while (clist != NULL) {
 
 220                         struct sk_buff *skb = clist;
 
 223                                 dev_kfree_skb_any(skb); /* put this one back */
 
 229         put_cpu_var(softnet_data);
 
 232 static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
 
 234         int once = 1, count = 0;
 
 236         struct sk_buff *skb = NULL;
 
 238         zap_completion_queue();
 
 240         if (nr_skbs < MAX_SKBS)
 
 243         skb = alloc_skb(len, GFP_ATOMIC);
 
 246                 spin_lock_irqsave(&skb_list_lock, flags);
 
 253                 spin_unlock_irqrestore(&skb_list_lock, flags);
 
 258                 if (once && (count == 1000000)) {
 
 259                         printk("out of netpoll skbs!\n");
 
 266         atomic_set(&skb->users, 1);
 
 267         skb_reserve(skb, reserve);
 
 271 static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
 274         struct netpoll_info *npinfo;
 
 276         if (!np || !np->dev || !netif_running(np->dev)) {
 
 281         npinfo = np->dev->npinfo;
 
 283         /* avoid recursion */
 
 284         if (npinfo->poll_owner == smp_processor_id() ||
 
 285             np->dev->xmit_lock_owner == smp_processor_id()) {
 
 295                 netif_tx_lock(np->dev);
 
 298                  * network drivers do not expect to be called if the queue is
 
 301                 status = NETDEV_TX_BUSY;
 
 302                 if (!netif_queue_stopped(np->dev))
 
 303                         status = np->dev->hard_start_xmit(skb, np->dev);
 
 305                 netif_tx_unlock(np->dev);
 
 309                         npinfo->tries = MAX_RETRIES; /* reset */
 
 316         } while (npinfo->tries > 0);
 
 319 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 
 321         int total_len, eth_len, ip_len, udp_len;
 
 327         udp_len = len + sizeof(*udph);
 
 328         ip_len = eth_len = udp_len + sizeof(*iph);
 
 329         total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
 
 331         skb = find_skb(np, total_len, total_len - len);
 
 335         memcpy(skb->data, msg, len);
 
 338         udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
 
 339         udph->source = htons(np->local_port);
 
 340         udph->dest = htons(np->remote_port);
 
 341         udph->len = htons(udp_len);
 
 344         iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
 
 346         /* iph->version = 4; iph->ihl = 5; */
 
 347         put_unaligned(0x45, (unsigned char *)iph);
 
 349         put_unaligned(htons(ip_len), &(iph->tot_len));
 
 353         iph->protocol = IPPROTO_UDP;
 
 355         put_unaligned(htonl(np->local_ip), &(iph->saddr));
 
 356         put_unaligned(htonl(np->remote_ip), &(iph->daddr));
 
 357         iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
 
 359         eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
 
 361         eth->h_proto = htons(ETH_P_IP);
 
 362         memcpy(eth->h_source, np->local_mac, 6);
 
 363         memcpy(eth->h_dest, np->remote_mac, 6);
 
 367         netpoll_send_skb(np, skb);
 
 370 static void arp_reply(struct sk_buff *skb)
 
 372         struct netpoll_info *npinfo = skb->dev->npinfo;
 
 374         unsigned char *arp_ptr;
 
 375         int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
 
 377         struct sk_buff *send_skb;
 
 378         struct netpoll *np = NULL;
 
 380         if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
 
 385         /* No arp on this interface */
 
 386         if (skb->dev->flags & IFF_NOARP)
 
 389         if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
 
 390                                  (2 * skb->dev->addr_len) +
 
 394         skb->h.raw = skb->nh.raw = skb->data;
 
 397         if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
 
 398              arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
 
 399             arp->ar_pro != htons(ETH_P_IP) ||
 
 400             arp->ar_op != htons(ARPOP_REQUEST))
 
 403         arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
 
 404         memcpy(&sip, arp_ptr, 4);
 
 405         arp_ptr += 4 + skb->dev->addr_len;
 
 406         memcpy(&tip, arp_ptr, 4);
 
 408         /* Should we ignore arp? */
 
 409         if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
 
 412         size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
 
 413         send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
 
 414                             LL_RESERVED_SPACE(np->dev));
 
 419         send_skb->nh.raw = send_skb->data;
 
 420         arp = (struct arphdr *) skb_put(send_skb, size);
 
 421         send_skb->dev = skb->dev;
 
 422         send_skb->protocol = htons(ETH_P_ARP);
 
 424         /* Fill the device header for the ARP frame */
 
 426         if (np->dev->hard_header &&
 
 427             np->dev->hard_header(send_skb, skb->dev, ptype,
 
 428                                        np->remote_mac, np->local_mac,
 
 429                                        send_skb->len) < 0) {
 
 435          * Fill out the arp protocol part.
 
 437          * we only support ethernet device type,
 
 438          * which (according to RFC 1390) should always equal 1 (Ethernet).
 
 441         arp->ar_hrd = htons(np->dev->type);
 
 442         arp->ar_pro = htons(ETH_P_IP);
 
 443         arp->ar_hln = np->dev->addr_len;
 
 445         arp->ar_op = htons(type);
 
 447         arp_ptr=(unsigned char *)(arp + 1);
 
 448         memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
 
 449         arp_ptr += np->dev->addr_len;
 
 450         memcpy(arp_ptr, &tip, 4);
 
 452         memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
 
 453         arp_ptr += np->dev->addr_len;
 
 454         memcpy(arp_ptr, &sip, 4);
 
 456         netpoll_send_skb(np, send_skb);
 
 459 int __netpoll_rx(struct sk_buff *skb)
 
 461         int proto, len, ulen;
 
 464         struct netpoll_info *npi = skb->dev->npinfo;
 
 465         struct netpoll *np = npi->rx_np;
 
 470         if (skb->dev->type != ARPHRD_ETHER)
 
 473         /* check if netpoll clients need ARP */
 
 474         if (skb->protocol == __constant_htons(ETH_P_ARP) &&
 
 475             atomic_read(&trapped)) {
 
 476                 skb_queue_tail(&npi->arp_tx, skb);
 
 480         proto = ntohs(eth_hdr(skb)->h_proto);
 
 481         if (proto != ETH_P_IP)
 
 483         if (skb->pkt_type == PACKET_OTHERHOST)
 
 488         iph = (struct iphdr *)skb->data;
 
 489         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 
 491         if (iph->ihl < 5 || iph->version != 4)
 
 493         if (!pskb_may_pull(skb, iph->ihl*4))
 
 495         if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
 
 498         len = ntohs(iph->tot_len);
 
 499         if (skb->len < len || len < iph->ihl*4)
 
 502         if (iph->protocol != IPPROTO_UDP)
 
 506         uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
 
 507         ulen = ntohs(uh->len);
 
 511         if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
 
 513         if (np->local_ip && np->local_ip != ntohl(iph->daddr))
 
 515         if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
 
 517         if (np->local_port && np->local_port != ntohs(uh->dest))
 
 520         np->rx_hook(np, ntohs(uh->source),
 
 522                     ulen - sizeof(struct udphdr));
 
 528         if (atomic_read(&trapped)) {
 
 536 int netpoll_parse_options(struct netpoll *np, char *opt)
 
 538         char *cur=opt, *delim;
 
 541                 if ((delim = strchr(cur, '@')) == NULL)
 
 544                 np->local_port=simple_strtol(cur, NULL, 10);
 
 548         printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
 
 551                 if ((delim = strchr(cur, '/')) == NULL)
 
 554                 np->local_ip=ntohl(in_aton(cur));
 
 557                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
 
 558                        np->name, HIPQUAD(np->local_ip));
 
 563                 /* parse out dev name */
 
 564                 if ((delim = strchr(cur, ',')) == NULL)
 
 567                 strlcpy(np->dev_name, cur, sizeof(np->dev_name));
 
 572         printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
 
 576                 if ((delim = strchr(cur, '@')) == NULL)
 
 579                 np->remote_port=simple_strtol(cur, NULL, 10);
 
 583         printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
 
 586         if ((delim = strchr(cur, '/')) == NULL)
 
 589         np->remote_ip=ntohl(in_aton(cur));
 
 592         printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
 
 593                        np->name, HIPQUAD(np->remote_ip));
 
 598                 if ((delim = strchr(cur, ':')) == NULL)
 
 601                 np->remote_mac[0]=simple_strtol(cur, NULL, 16);
 
 603                 if ((delim = strchr(cur, ':')) == NULL)
 
 606                 np->remote_mac[1]=simple_strtol(cur, NULL, 16);
 
 608                 if ((delim = strchr(cur, ':')) == NULL)
 
 611                 np->remote_mac[2]=simple_strtol(cur, NULL, 16);
 
 613                 if ((delim = strchr(cur, ':')) == NULL)
 
 616                 np->remote_mac[3]=simple_strtol(cur, NULL, 16);
 
 618                 if ((delim = strchr(cur, ':')) == NULL)
 
 621                 np->remote_mac[4]=simple_strtol(cur, NULL, 16);
 
 623                 np->remote_mac[5]=simple_strtol(cur, NULL, 16);
 
 626         printk(KERN_INFO "%s: remote ethernet address "
 
 627                "%02x:%02x:%02x:%02x:%02x:%02x\n",
 
 639         printk(KERN_INFO "%s: couldn't parse config at %s!\n",
 
 644 int netpoll_setup(struct netpoll *np)
 
 646         struct net_device *ndev = NULL;
 
 647         struct in_device *in_dev;
 
 648         struct netpoll_info *npinfo;
 
 652                 ndev = dev_get_by_name(np->dev_name);
 
 654                 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
 
 655                        np->name, np->dev_name);
 
 661                 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
 
 665                 npinfo->rx_flags = 0;
 
 666                 npinfo->rx_np = NULL;
 
 667                 spin_lock_init(&npinfo->poll_lock);
 
 668                 npinfo->poll_owner = -1;
 
 669                 npinfo->tries = MAX_RETRIES;
 
 670                 spin_lock_init(&npinfo->rx_lock);
 
 671                 skb_queue_head_init(&npinfo->arp_tx);
 
 673                 npinfo = ndev->npinfo;
 
 675         if (!ndev->poll_controller) {
 
 676                 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 
 677                        np->name, np->dev_name);
 
 681         if (!netif_running(ndev)) {
 
 682                 unsigned long atmost, atleast;
 
 684                 printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
 
 685                        np->name, np->dev_name);
 
 688                 if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) {
 
 689                         printk(KERN_ERR "%s: failed to open %s\n",
 
 690                                np->name, np->dev_name);
 
 696                 atleast = jiffies + HZ/10;
 
 697                 atmost = jiffies + 4*HZ;
 
 698                 while (!netif_carrier_ok(ndev)) {
 
 699                         if (time_after(jiffies, atmost)) {
 
 701                                        "%s: timeout waiting for carrier\n",
 
 708                 /* If carrier appears to come up instantly, we don't
 
 709                  * trust it and pause so that we don't pump all our
 
 710                  * queued console messages into the bitbucket.
 
 713                 if (time_before(jiffies, atleast)) {
 
 714                         printk(KERN_NOTICE "%s: carrier detect appears"
 
 715                                " untrustworthy, waiting 4 seconds\n",
 
 721         if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
 
 722                 memcpy(np->local_mac, ndev->dev_addr, 6);
 
 726                 in_dev = __in_dev_get_rcu(ndev);
 
 728                 if (!in_dev || !in_dev->ifa_list) {
 
 730                         printk(KERN_ERR "%s: no IP address for %s, aborting\n",
 
 731                                np->name, np->dev_name);
 
 735                 np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
 
 737                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
 
 738                        np->name, HIPQUAD(np->local_ip));
 
 742                 spin_lock_irqsave(&npinfo->rx_lock, flags);
 
 743                 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
 
 745                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
 748         /* fill up the skb queue */
 
 751         /* last thing to do is link it to the net device structure */
 
 752         ndev->npinfo = npinfo;
 
 754         /* avoid racing with NAPI reading npinfo */
 
 767 void netpoll_cleanup(struct netpoll *np)
 
 769         struct netpoll_info *npinfo;
 
 773                 npinfo = np->dev->npinfo;
 
 774                 if (npinfo && npinfo->rx_np == np) {
 
 775                         spin_lock_irqsave(&npinfo->rx_lock, flags);
 
 776                         npinfo->rx_np = NULL;
 
 777                         npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
 
 778                         spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
 786 int netpoll_trap(void)
 
 788         return atomic_read(&trapped);
 
 791 void netpoll_set_trap(int trap)
 
 794                 atomic_inc(&trapped);
 
 796                 atomic_dec(&trapped);
 
 799 EXPORT_SYMBOL(netpoll_set_trap);
 
 800 EXPORT_SYMBOL(netpoll_trap);
 
 801 EXPORT_SYMBOL(netpoll_parse_options);
 
 802 EXPORT_SYMBOL(netpoll_setup);
 
 803 EXPORT_SYMBOL(netpoll_cleanup);
 
 804 EXPORT_SYMBOL(netpoll_send_udp);
 
 805 EXPORT_SYMBOL(netpoll_poll);
 
 806 EXPORT_SYMBOL(netpoll_queue);