2  * This is a module which is used for queueing IPv6 packets and
 
   3  * communicating with userspace via netlink.
 
   5  * (C) 2001 Fernando Anton, this code is GPL.
 
   6  *     IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
 
   7  *     Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
 
   8  *     Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
 
   9  *     email: fanton@it.uc3m.es
 
  11  * This program is free software; you can redistribute it and/or modify
 
  12  * it under the terms of the GNU General Public License version 2 as
 
  13  * published by the Free Software Foundation.
 
  15  * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
 
  17  *             HEAVILY based in ipqueue.c by James Morris. It's just
 
  18  *             a little modified version of it, so he's nearly the
 
  20  *             Few changes needed, mainly the hard_routing code and
 
  21  *             the netlink socket protocol (we're NETLINK_IP6_FW).
 
  22  * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
 
  23  * 2005-02-04: Added /proc counter for dropped packets; fixed so
 
  24  *             packets aren't delivered to user space if they're going
 
  27 #include <linux/module.h>
 
  28 #include <linux/skbuff.h>
 
  29 #include <linux/init.h>
 
  30 #include <linux/ipv6.h>
 
  31 #include <linux/notifier.h>
 
  32 #include <linux/netdevice.h>
 
  33 #include <linux/netfilter.h>
 
  34 #include <linux/netlink.h>
 
  35 #include <linux/spinlock.h>
 
  36 #include <linux/sysctl.h>
 
  37 #include <linux/proc_fs.h>
 
  38 #include <linux/mutex.h>
 
  41 #include <net/ip6_route.h>
 
  42 #include <linux/netfilter_ipv4/ip_queue.h>
 
  43 #include <linux/netfilter_ipv4/ip_tables.h>
 
  44 #include <linux/netfilter_ipv6/ip6_tables.h>
 
  46 #define IPQ_QMAX_DEFAULT 1024
 
  47 #define IPQ_PROC_FS_NAME "ip6_queue"
 
  48 #define NET_IPQ_QMAX 2088
 
  49 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
 
  51 struct ipq_queue_entry {
 
  52         struct list_head list;
 
  57 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
 
  59 static unsigned char copy_mode = IPQ_COPY_NONE;
 
  60 static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
 
  61 static DEFINE_RWLOCK(queue_lock);
 
  63 static unsigned int copy_range;
 
  64 static unsigned int queue_total;
 
  65 static unsigned int queue_dropped = 0;
 
  66 static unsigned int queue_user_dropped = 0;
 
  67 static struct sock *ipqnl;
 
  68 static LIST_HEAD(queue_list);
 
  69 static DEFINE_MUTEX(ipqnl_mutex);
 
  72 ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
 
  75         nf_reinject(entry->skb, entry->info, verdict);
 
  81 __ipq_enqueue_entry(struct ipq_queue_entry *entry)
 
  83        list_add(&entry->list, &queue_list);
 
  88  * Find and return a queued entry matched by cmpfn, or return the last
 
  89  * entry if cmpfn is NULL.
 
  91 static inline struct ipq_queue_entry *
 
  92 __ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
 
  96         list_for_each_prev(p, &queue_list) {
 
  97                 struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
 
  99                 if (!cmpfn || cmpfn(entry, data))
 
 106 __ipq_dequeue_entry(struct ipq_queue_entry *entry)
 
 108         list_del(&entry->list);
 
 112 static inline struct ipq_queue_entry *
 
 113 __ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
 
 115         struct ipq_queue_entry *entry;
 
 117         entry = __ipq_find_entry(cmpfn, data);
 
 121         __ipq_dequeue_entry(entry);
 
 127 __ipq_flush(int verdict)
 
 129         struct ipq_queue_entry *entry;
 
 131         while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
 
 132                 ipq_issue_verdict(entry, verdict);
 
 136 __ipq_set_mode(unsigned char mode, unsigned int range)
 
 147         case IPQ_COPY_PACKET:
 
 150                 if (copy_range > 0xFFFF)
 
 165         net_disable_timestamp();
 
 166         __ipq_set_mode(IPQ_COPY_NONE, 0);
 
 167         __ipq_flush(NF_DROP);
 
 170 static struct ipq_queue_entry *
 
 171 ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
 
 173         struct ipq_queue_entry *entry;
 
 175         write_lock_bh(&queue_lock);
 
 176         entry = __ipq_find_dequeue_entry(cmpfn, data);
 
 177         write_unlock_bh(&queue_lock);
 
 182 ipq_flush(int verdict)
 
 184         write_lock_bh(&queue_lock);
 
 185         __ipq_flush(verdict);
 
 186         write_unlock_bh(&queue_lock);
 
 189 static struct sk_buff *
 
 190 ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 
 192         unsigned char *old_tail;
 
 196         struct ipq_packet_msg *pmsg;
 
 197         struct nlmsghdr *nlh;
 
 199         read_lock_bh(&queue_lock);
 
 204                 size = NLMSG_SPACE(sizeof(*pmsg));
 
 208         case IPQ_COPY_PACKET:
 
 209                 if (entry->skb->ip_summed == CHECKSUM_HW &&
 
 210                     (*errp = skb_checksum_help(entry->skb,
 
 211                                                entry->info->outdev == NULL))) {
 
 212                         read_unlock_bh(&queue_lock);
 
 215                 if (copy_range == 0 || copy_range > entry->skb->len)
 
 216                         data_len = entry->skb->len;
 
 218                         data_len = copy_range;
 
 220                 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
 
 225                 read_unlock_bh(&queue_lock);
 
 229         read_unlock_bh(&queue_lock);
 
 231         skb = alloc_skb(size, GFP_ATOMIC);
 
 236         nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
 
 237         pmsg = NLMSG_DATA(nlh);
 
 238         memset(pmsg, 0, sizeof(*pmsg));
 
 240         pmsg->packet_id       = (unsigned long )entry;
 
 241         pmsg->data_len        = data_len;
 
 242         pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
 
 243         pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
 
 244         pmsg->mark            = entry->skb->nfmark;
 
 245         pmsg->hook            = entry->info->hook;
 
 246         pmsg->hw_protocol     = entry->skb->protocol;
 
 248         if (entry->info->indev)
 
 249                 strcpy(pmsg->indev_name, entry->info->indev->name);
 
 251                 pmsg->indev_name[0] = '\0';
 
 253         if (entry->info->outdev)
 
 254                 strcpy(pmsg->outdev_name, entry->info->outdev->name);
 
 256                 pmsg->outdev_name[0] = '\0';
 
 258         if (entry->info->indev && entry->skb->dev) {
 
 259                 pmsg->hw_type = entry->skb->dev->type;
 
 260                 if (entry->skb->dev->hard_header_parse)
 
 262                                 entry->skb->dev->hard_header_parse(entry->skb,
 
 267                 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
 
 270         nlh->nlmsg_len = skb->tail - old_tail;
 
 277         printk(KERN_ERR "ip6_queue: error creating packet message\n");
 
 282 ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 
 
 283                    unsigned int queuenum, void *data)
 
 285         int status = -EINVAL;
 
 286         struct sk_buff *nskb;
 
 287         struct ipq_queue_entry *entry;
 
 289         if (copy_mode == IPQ_COPY_NONE)
 
 292         entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 
 294                 printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
 
 301         nskb = ipq_build_packet_message(entry, &status);
 
 305         write_lock_bh(&queue_lock);
 
 308                 goto err_out_free_nskb; 
 
 310         if (queue_total >= queue_maxlen) {
 
 314                         printk (KERN_WARNING "ip6_queue: fill at %d entries, "
 
 315                                 "dropping packet(s).  Dropped: %d\n", queue_total,
 
 317                 goto err_out_free_nskb;
 
 320         /* netlink_unicast will either free the nskb or attach it to a socket */ 
 
 321         status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
 
 323                 queue_user_dropped++;
 
 327         __ipq_enqueue_entry(entry);
 
 329         write_unlock_bh(&queue_lock);
 
 336         write_unlock_bh(&queue_lock);
 
 344 ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 
 347         struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
 
 349         if (v->data_len < sizeof(*user_iph))
 
 351         diff = v->data_len - e->skb->len;
 
 353                 skb_trim(e->skb, v->data_len);
 
 355                 if (v->data_len > 0xFFFF)
 
 357                 if (diff > skb_tailroom(e->skb)) {
 
 358                         struct sk_buff *newskb;
 
 360                         newskb = skb_copy_expand(e->skb,
 
 361                                                  skb_headroom(e->skb),
 
 364                         if (newskb == NULL) {
 
 365                                 printk(KERN_WARNING "ip6_queue: OOM "
 
 366                                       "in mangle, dropping packet\n");
 
 370                                 skb_set_owner_w(newskb, e->skb->sk);
 
 374                 skb_put(e->skb, diff);
 
 376         if (!skb_make_writable(&e->skb, v->data_len))
 
 378         memcpy(e->skb->data, v->payload, v->data_len);
 
 379         e->skb->ip_summed = CHECKSUM_NONE;
 
 385 id_cmp(struct ipq_queue_entry *e, unsigned long id)
 
 387         return (id == (unsigned long )e);
 
 391 ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 
 393         struct ipq_queue_entry *entry;
 
 395         if (vmsg->value > NF_MAX_VERDICT)
 
 398         entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
 
 402                 int verdict = vmsg->value;
 
 404                 if (vmsg->data_len && vmsg->data_len == len)
 
 405                         if (ipq_mangle_ipv6(vmsg, entry) < 0)
 
 408                 ipq_issue_verdict(entry, verdict);
 
 414 ipq_set_mode(unsigned char mode, unsigned int range)
 
 418         write_lock_bh(&queue_lock);
 
 419         status = __ipq_set_mode(mode, range);
 
 420         write_unlock_bh(&queue_lock);
 
 425 ipq_receive_peer(struct ipq_peer_msg *pmsg,
 
 426                  unsigned char type, unsigned int len)
 
 430         if (len < sizeof(*pmsg))
 
 435                 status = ipq_set_mode(pmsg->msg.mode.value,
 
 436                                       pmsg->msg.mode.range);
 
 440                 if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
 
 443                         status = ipq_set_verdict(&pmsg->msg.verdict,
 
 444                                                  len - sizeof(*pmsg));
 
 453 dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
 
 455         if (entry->info->indev)
 
 456                 if (entry->info->indev->ifindex == ifindex)
 
 459         if (entry->info->outdev)
 
 460                 if (entry->info->outdev->ifindex == ifindex)
 
 467 ipq_dev_drop(int ifindex)
 
 469         struct ipq_queue_entry *entry;
 
 471         while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
 
 472                 ipq_issue_verdict(entry, NF_DROP);
 
 475 #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
 
 478 ipq_rcv_skb(struct sk_buff *skb)
 
 480         int status, type, pid, flags, nlmsglen, skblen;
 
 481         struct nlmsghdr *nlh;
 
 484         if (skblen < sizeof(*nlh))
 
 487         nlh = (struct nlmsghdr *)skb->data;
 
 488         nlmsglen = nlh->nlmsg_len;
 
 489         if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
 
 492         pid = nlh->nlmsg_pid;
 
 493         flags = nlh->nlmsg_flags;
 
 495         if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
 
 496                 RCV_SKB_FAIL(-EINVAL);
 
 498         if (flags & MSG_TRUNC)
 
 499                 RCV_SKB_FAIL(-ECOMM);
 
 501         type = nlh->nlmsg_type;
 
 502         if (type < NLMSG_NOOP || type >= IPQM_MAX)
 
 503                 RCV_SKB_FAIL(-EINVAL);
 
 505         if (type <= IPQM_BASE)
 
 508         if (security_netlink_recv(skb))
 
 509                 RCV_SKB_FAIL(-EPERM);   
 
 511         write_lock_bh(&queue_lock);
 
 514                 if (peer_pid != pid) {
 
 515                         write_unlock_bh(&queue_lock);
 
 516                         RCV_SKB_FAIL(-EBUSY);
 
 519                 net_enable_timestamp();
 
 523         write_unlock_bh(&queue_lock);
 
 525         status = ipq_receive_peer(NLMSG_DATA(nlh), type,
 
 526                                   nlmsglen - NLMSG_LENGTH(0));
 
 528                 RCV_SKB_FAIL(status);
 
 530         if (flags & NLM_F_ACK)
 
 531                 netlink_ack(skb, nlh, 0);
 
 536 ipq_rcv_sk(struct sock *sk, int len)
 
 541         mutex_lock(&ipqnl_mutex);
 
 543         for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
 
 544                 skb = skb_dequeue(&sk->sk_receive_queue);
 
 549         mutex_unlock(&ipqnl_mutex);
 
 553 ipq_rcv_dev_event(struct notifier_block *this,
 
 554                   unsigned long event, void *ptr)
 
 556         struct net_device *dev = ptr;
 
 558         /* Drop any packets associated with the downed device */
 
 559         if (event == NETDEV_DOWN)
 
 560                 ipq_dev_drop(dev->ifindex);
 
 564 static struct notifier_block ipq_dev_notifier = {
 
 565         .notifier_call  = ipq_rcv_dev_event,
 
 569 ipq_rcv_nl_event(struct notifier_block *this,
 
 570                  unsigned long event, void *ptr)
 
 572         struct netlink_notify *n = ptr;
 
 574         if (event == NETLINK_URELEASE &&
 
 575             n->protocol == NETLINK_IP6_FW && n->pid) {
 
 576                 write_lock_bh(&queue_lock);
 
 577                 if (n->pid == peer_pid)
 
 579                 write_unlock_bh(&queue_lock);
 
 584 static struct notifier_block ipq_nl_notifier = {
 
 585         .notifier_call  = ipq_rcv_nl_event,
 
 588 static struct ctl_table_header *ipq_sysctl_header;
 
 590 static ctl_table ipq_table[] = {
 
 592                 .ctl_name       = NET_IPQ_QMAX,
 
 593                 .procname       = NET_IPQ_QMAX_NAME,
 
 594                 .data           = &queue_maxlen,
 
 595                 .maxlen         = sizeof(queue_maxlen),
 
 597                 .proc_handler   = proc_dointvec
 
 602 static ctl_table ipq_dir_table[] = {
 
 604                 .ctl_name       = NET_IPV6,
 
 612 static ctl_table ipq_root_table[] = {
 
 617                 .child          = ipq_dir_table
 
 623 ipq_get_info(char *buffer, char **start, off_t offset, int length)
 
 627         read_lock_bh(&queue_lock);
 
 629         len = sprintf(buffer,
 
 633                       "Queue length      : %u\n"
 
 634                       "Queue max. length : %u\n"
 
 635                       "Queue dropped     : %u\n"
 
 636                       "Netfilter dropped : %u\n",
 
 645         read_unlock_bh(&queue_lock);
 
 647         *start = buffer + offset;
 
 656 static struct nf_queue_handler nfqh = {
 
 658         .outfn  = &ipq_enqueue_packet,
 
 661 static int __init ip6_queue_init(void)
 
 663         int status = -ENOMEM;
 
 664         struct proc_dir_entry *proc;
 
 666         netlink_register_notifier(&ipq_nl_notifier);
 
 667         ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk,
 
 670                 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
 
 671                 goto cleanup_netlink_notifier;
 
 674         proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
 
 676                 proc->owner = THIS_MODULE;
 
 678                 printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
 
 682         register_netdevice_notifier(&ipq_dev_notifier);
 
 683         ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
 
 685         status = nf_register_queue_handler(PF_INET6, &nfqh);
 
 687                 printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
 
 693         unregister_sysctl_table(ipq_sysctl_header);
 
 694         unregister_netdevice_notifier(&ipq_dev_notifier);
 
 695         proc_net_remove(IPQ_PROC_FS_NAME);
 
 698         sock_release(ipqnl->sk_socket);
 
 699         mutex_lock(&ipqnl_mutex);
 
 700         mutex_unlock(&ipqnl_mutex);
 
 702 cleanup_netlink_notifier:
 
 703         netlink_unregister_notifier(&ipq_nl_notifier);
 
 707 static void __exit ip6_queue_fini(void)
 
 709         nf_unregister_queue_handlers(&nfqh);
 
 713         unregister_sysctl_table(ipq_sysctl_header);
 
 714         unregister_netdevice_notifier(&ipq_dev_notifier);
 
 715         proc_net_remove(IPQ_PROC_FS_NAME);
 
 717         sock_release(ipqnl->sk_socket);
 
 718         mutex_lock(&ipqnl_mutex);
 
 719         mutex_unlock(&ipqnl_mutex);
 
 721         netlink_unregister_notifier(&ipq_nl_notifier);
 
 724 MODULE_DESCRIPTION("IPv6 packet queue handler");
 
 725 MODULE_LICENSE("GPL");
 
 727 module_init(ip6_queue_init);
 
 728 module_exit(ip6_queue_fini);