iwlwifi: correct API command overlap
[linux-2.6] / drivers / net / tun.c
1 /*
2  *  TUN - Universal TUN/TAP device driver.
3  *  Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  *  GNU General Public License for more details.
14  *
15  *  $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $
16  */
17
18 /*
19  *  Changes:
20  *
21  *  Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
22  *    Add TUNSETLINK ioctl to set the link encapsulation
23  *
24  *  Mark Smith <markzzzsmith@yahoo.com.au>
25  *    Use random_ether_addr() for tap MAC address.
26  *
27  *  Harald Roelle <harald.roelle@ifi.lmu.de>  2004/04/20
28  *    Fixes in packet dropping, queue length setting and queue wakeup.
29  *    Increased default tx queue length.
30  *    Added ethtool API.
31  *    Minor cleanups
32  *
33  *  Daniel Podlejski <underley@underley.eu.org>
34  *    Modifications for 2.3.99-pre5 kernel.
35  */
36
37 #define DRV_NAME        "tun"
38 #define DRV_VERSION     "1.6"
39 #define DRV_DESCRIPTION "Universal TUN/TAP device driver"
40 #define DRV_COPYRIGHT   "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
41
42 #include <linux/module.h>
43 #include <linux/errno.h>
44 #include <linux/kernel.h>
45 #include <linux/major.h>
46 #include <linux/slab.h>
47 #include <linux/smp_lock.h>
48 #include <linux/poll.h>
49 #include <linux/fcntl.h>
50 #include <linux/init.h>
51 #include <linux/skbuff.h>
52 #include <linux/netdevice.h>
53 #include <linux/etherdevice.h>
54 #include <linux/miscdevice.h>
55 #include <linux/ethtool.h>
56 #include <linux/rtnetlink.h>
57 #include <linux/if.h>
58 #include <linux/if_arp.h>
59 #include <linux/if_ether.h>
60 #include <linux/if_tun.h>
61 #include <linux/crc32.h>
62 #include <linux/nsproxy.h>
63 #include <linux/virtio_net.h>
64 #include <net/net_namespace.h>
65 #include <net/netns/generic.h>
66 #include <net/rtnetlink.h>
67 #include <net/sock.h>
68
69 #include <asm/system.h>
70 #include <asm/uaccess.h>
71
72 /* Uncomment to enable debugging */
73 /* #define TUN_DEBUG 1 */
74
75 #ifdef TUN_DEBUG
76 static int debug;
77
78 #define DBG  if(tun->debug)printk
79 #define DBG1 if(debug==2)printk
80 #else
81 #define DBG( a... )
82 #define DBG1( a... )
83 #endif
84
85 #define FLT_EXACT_COUNT 8
86 struct tap_filter {
87         unsigned int    count;    /* Number of addrs. Zero means disabled */
88         u32             mask[2];  /* Mask of the hashed addrs */
89         unsigned char   addr[FLT_EXACT_COUNT][ETH_ALEN];
90 };
91
92 struct tun_file {
93         atomic_t count;
94         struct tun_struct *tun;
95         struct net *net;
96         wait_queue_head_t       read_wait;
97 };
98
99 struct tun_sock;
100
101 struct tun_struct {
102         struct tun_file         *tfile;
103         unsigned int            flags;
104         uid_t                   owner;
105         gid_t                   group;
106
107         struct sk_buff_head     readq;
108
109         struct net_device       *dev;
110         struct fasync_struct    *fasync;
111
112         struct tap_filter       txflt;
113         struct sock             *sk;
114         struct socket           socket;
115
116 #ifdef TUN_DEBUG
117         int debug;
118 #endif
119 };
120
121 struct tun_sock {
122         struct sock             sk;
123         struct tun_struct       *tun;
124 };
125
126 static inline struct tun_sock *tun_sk(struct sock *sk)
127 {
128         return container_of(sk, struct tun_sock, sk);
129 }
130
131 static int tun_attach(struct tun_struct *tun, struct file *file)
132 {
133         struct tun_file *tfile = file->private_data;
134         const struct cred *cred = current_cred();
135         int err;
136
137         ASSERT_RTNL();
138
139         /* Check permissions */
140         if (((tun->owner != -1 && cred->euid != tun->owner) ||
141              (tun->group != -1 && !in_egroup_p(tun->group))) &&
142                 !capable(CAP_NET_ADMIN))
143                 return -EPERM;
144
145         netif_tx_lock_bh(tun->dev);
146
147         err = -EINVAL;
148         if (tfile->tun)
149                 goto out;
150
151         err = -EBUSY;
152         if (tun->tfile)
153                 goto out;
154
155         err = 0;
156         tfile->tun = tun;
157         tun->tfile = tfile;
158         dev_hold(tun->dev);
159         atomic_inc(&tfile->count);
160
161 out:
162         netif_tx_unlock_bh(tun->dev);
163         return err;
164 }
165
166 static void __tun_detach(struct tun_struct *tun)
167 {
168         struct tun_file *tfile = tun->tfile;
169
170         /* Detach from net device */
171         netif_tx_lock_bh(tun->dev);
172         tfile->tun = NULL;
173         tun->tfile = NULL;
174         netif_tx_unlock_bh(tun->dev);
175
176         /* Drop read queue */
177         skb_queue_purge(&tun->readq);
178
179         /* Drop the extra count on the net device */
180         dev_put(tun->dev);
181 }
182
183 static void tun_detach(struct tun_struct *tun)
184 {
185         rtnl_lock();
186         __tun_detach(tun);
187         rtnl_unlock();
188 }
189
190 static struct tun_struct *__tun_get(struct tun_file *tfile)
191 {
192         struct tun_struct *tun = NULL;
193
194         if (atomic_inc_not_zero(&tfile->count))
195                 tun = tfile->tun;
196
197         return tun;
198 }
199
200 static struct tun_struct *tun_get(struct file *file)
201 {
202         return __tun_get(file->private_data);
203 }
204
205 static void tun_put(struct tun_struct *tun)
206 {
207         struct tun_file *tfile = tun->tfile;
208
209         if (atomic_dec_and_test(&tfile->count))
210                 tun_detach(tfile->tun);
211 }
212
213 /* TAP filterting */
214 static void addr_hash_set(u32 *mask, const u8 *addr)
215 {
216         int n = ether_crc(ETH_ALEN, addr) >> 26;
217         mask[n >> 5] |= (1 << (n & 31));
218 }
219
220 static unsigned int addr_hash_test(const u32 *mask, const u8 *addr)
221 {
222         int n = ether_crc(ETH_ALEN, addr) >> 26;
223         return mask[n >> 5] & (1 << (n & 31));
224 }
225
226 static int update_filter(struct tap_filter *filter, void __user *arg)
227 {
228         struct { u8 u[ETH_ALEN]; } *addr;
229         struct tun_filter uf;
230         int err, alen, n, nexact;
231
232         if (copy_from_user(&uf, arg, sizeof(uf)))
233                 return -EFAULT;
234
235         if (!uf.count) {
236                 /* Disabled */
237                 filter->count = 0;
238                 return 0;
239         }
240
241         alen = ETH_ALEN * uf.count;
242         addr = kmalloc(alen, GFP_KERNEL);
243         if (!addr)
244                 return -ENOMEM;
245
246         if (copy_from_user(addr, arg + sizeof(uf), alen)) {
247                 err = -EFAULT;
248                 goto done;
249         }
250
251         /* The filter is updated without holding any locks. Which is
252          * perfectly safe. We disable it first and in the worst
253          * case we'll accept a few undesired packets. */
254         filter->count = 0;
255         wmb();
256
257         /* Use first set of addresses as an exact filter */
258         for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++)
259                 memcpy(filter->addr[n], addr[n].u, ETH_ALEN);
260
261         nexact = n;
262
263         /* The rest is hashed */
264         memset(filter->mask, 0, sizeof(filter->mask));
265         for (; n < uf.count; n++)
266                 addr_hash_set(filter->mask, addr[n].u);
267
268         /* For ALLMULTI just set the mask to all ones.
269          * This overrides the mask populated above. */
270         if ((uf.flags & TUN_FLT_ALLMULTI))
271                 memset(filter->mask, ~0, sizeof(filter->mask));
272
273         /* Now enable the filter */
274         wmb();
275         filter->count = nexact;
276
277         /* Return the number of exact filters */
278         err = nexact;
279
280 done:
281         kfree(addr);
282         return err;
283 }
284
285 /* Returns: 0 - drop, !=0 - accept */
286 static int run_filter(struct tap_filter *filter, const struct sk_buff *skb)
287 {
288         /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect
289          * at this point. */
290         struct ethhdr *eh = (struct ethhdr *) skb->data;
291         int i;
292
293         /* Exact match */
294         for (i = 0; i < filter->count; i++)
295                 if (!compare_ether_addr(eh->h_dest, filter->addr[i]))
296                         return 1;
297
298         /* Inexact match (multicast only) */
299         if (is_multicast_ether_addr(eh->h_dest))
300                 return addr_hash_test(filter->mask, eh->h_dest);
301
302         return 0;
303 }
304
305 /*
306  * Checks whether the packet is accepted or not.
307  * Returns: 0 - drop, !=0 - accept
308  */
309 static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
310 {
311         if (!filter->count)
312                 return 1;
313
314         return run_filter(filter, skb);
315 }
316
317 /* Network device part of the driver */
318
319 static const struct ethtool_ops tun_ethtool_ops;
320
321 /* Net device detach from fd. */
322 static void tun_net_uninit(struct net_device *dev)
323 {
324         struct tun_struct *tun = netdev_priv(dev);
325         struct tun_file *tfile = tun->tfile;
326
327         /* Inform the methods they need to stop using the dev.
328          */
329         if (tfile) {
330                 wake_up_all(&tfile->read_wait);
331                 if (atomic_dec_and_test(&tfile->count))
332                         __tun_detach(tun);
333         }
334 }
335
336 /* Net device open. */
337 static int tun_net_open(struct net_device *dev)
338 {
339         netif_start_queue(dev);
340         return 0;
341 }
342
343 /* Net device close. */
344 static int tun_net_close(struct net_device *dev)
345 {
346         netif_stop_queue(dev);
347         return 0;
348 }
349
350 /* Net device start xmit */
351 static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
352 {
353         struct tun_struct *tun = netdev_priv(dev);
354
355         DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
356
357         /* Drop packet if interface is not attached */
358         if (!tun->tfile)
359                 goto drop;
360
361         /* Drop if the filter does not like it.
362          * This is a noop if the filter is disabled.
363          * Filter can be enabled only for the TAP devices. */
364         if (!check_filter(&tun->txflt, skb))
365                 goto drop;
366
367         if (skb_queue_len(&tun->readq) >= dev->tx_queue_len) {
368                 if (!(tun->flags & TUN_ONE_QUEUE)) {
369                         /* Normal queueing mode. */
370                         /* Packet scheduler handles dropping of further packets. */
371                         netif_stop_queue(dev);
372
373                         /* We won't see all dropped packets individually, so overrun
374                          * error is more appropriate. */
375                         dev->stats.tx_fifo_errors++;
376                 } else {
377                         /* Single queue mode.
378                          * Driver handles dropping of all packets itself. */
379                         goto drop;
380                 }
381         }
382
383         /* Enqueue packet */
384         skb_queue_tail(&tun->readq, skb);
385         dev->trans_start = jiffies;
386
387         /* Notify and wake up reader process */
388         if (tun->flags & TUN_FASYNC)
389                 kill_fasync(&tun->fasync, SIGIO, POLL_IN);
390         wake_up_interruptible(&tun->tfile->read_wait);
391         return 0;
392
393 drop:
394         dev->stats.tx_dropped++;
395         kfree_skb(skb);
396         return 0;
397 }
398
399 static void tun_net_mclist(struct net_device *dev)
400 {
401         /*
402          * This callback is supposed to deal with mc filter in
403          * _rx_ path and has nothing to do with the _tx_ path.
404          * In rx path we always accept everything userspace gives us.
405          */
406         return;
407 }
408
409 #define MIN_MTU 68
410 #define MAX_MTU 65535
411
412 static int
413 tun_net_change_mtu(struct net_device *dev, int new_mtu)
414 {
415         if (new_mtu < MIN_MTU || new_mtu + dev->hard_header_len > MAX_MTU)
416                 return -EINVAL;
417         dev->mtu = new_mtu;
418         return 0;
419 }
420
421 static const struct net_device_ops tun_netdev_ops = {
422         .ndo_uninit             = tun_net_uninit,
423         .ndo_open               = tun_net_open,
424         .ndo_stop               = tun_net_close,
425         .ndo_start_xmit         = tun_net_xmit,
426         .ndo_change_mtu         = tun_net_change_mtu,
427 };
428
429 static const struct net_device_ops tap_netdev_ops = {
430         .ndo_uninit             = tun_net_uninit,
431         .ndo_open               = tun_net_open,
432         .ndo_stop               = tun_net_close,
433         .ndo_start_xmit         = tun_net_xmit,
434         .ndo_change_mtu         = tun_net_change_mtu,
435         .ndo_set_multicast_list = tun_net_mclist,
436         .ndo_set_mac_address    = eth_mac_addr,
437         .ndo_validate_addr      = eth_validate_addr,
438 };
439
440 /* Initialize net device. */
441 static void tun_net_init(struct net_device *dev)
442 {
443         struct tun_struct *tun = netdev_priv(dev);
444
445         switch (tun->flags & TUN_TYPE_MASK) {
446         case TUN_TUN_DEV:
447                 dev->netdev_ops = &tun_netdev_ops;
448
449                 /* Point-to-Point TUN Device */
450                 dev->hard_header_len = 0;
451                 dev->addr_len = 0;
452                 dev->mtu = 1500;
453
454                 /* Zero header length */
455                 dev->type = ARPHRD_NONE;
456                 dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
457                 dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
458                 break;
459
460         case TUN_TAP_DEV:
461                 dev->netdev_ops = &tap_netdev_ops;
462                 /* Ethernet TAP Device */
463                 ether_setup(dev);
464
465                 random_ether_addr(dev->dev_addr);
466
467                 dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
468                 break;
469         }
470 }
471
472 /* Character device part */
473
474 /* Poll */
475 static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
476 {
477         struct tun_file *tfile = file->private_data;
478         struct tun_struct *tun = __tun_get(tfile);
479         struct sock *sk = tun->sk;
480         unsigned int mask = 0;
481
482         if (!tun)
483                 return POLLERR;
484
485         DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
486
487         poll_wait(file, &tfile->read_wait, wait);
488
489         if (!skb_queue_empty(&tun->readq))
490                 mask |= POLLIN | POLLRDNORM;
491
492         if (sock_writeable(sk) ||
493             (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
494              sock_writeable(sk)))
495                 mask |= POLLOUT | POLLWRNORM;
496
497         if (tun->dev->reg_state != NETREG_REGISTERED)
498                 mask = POLLERR;
499
500         tun_put(tun);
501         return mask;
502 }
503
504 /* prepad is the amount to reserve at front.  len is length after that.
505  * linear is a hint as to how much to copy (usually headers). */
506 static inline struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
507                                             size_t prepad, size_t len,
508                                             size_t linear, int noblock)
509 {
510         struct sock *sk = tun->sk;
511         struct sk_buff *skb;
512         int err;
513
514         /* Under a page?  Don't bother with paged skb. */
515         if (prepad + len < PAGE_SIZE)
516                 linear = len;
517
518         skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
519                                    &err);
520         if (!skb)
521                 return ERR_PTR(err);
522
523         skb_reserve(skb, prepad);
524         skb_put(skb, linear);
525         skb->data_len = len - linear;
526         skb->len += len - linear;
527
528         return skb;
529 }
530
531 /* Get packet from user space buffer */
532 static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
533                                        struct iovec *iv, size_t count,
534                                        int noblock)
535 {
536         struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
537         struct sk_buff *skb;
538         size_t len = count, align = 0;
539         struct virtio_net_hdr gso = { 0 };
540
541         if (!(tun->flags & TUN_NO_PI)) {
542                 if ((len -= sizeof(pi)) > count)
543                         return -EINVAL;
544
545                 if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
546                         return -EFAULT;
547         }
548
549         if (tun->flags & TUN_VNET_HDR) {
550                 if ((len -= sizeof(gso)) > count)
551                         return -EINVAL;
552
553                 if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
554                         return -EFAULT;
555
556                 if (gso.hdr_len > len)
557                         return -EINVAL;
558         }
559
560         if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
561                 align = NET_IP_ALIGN;
562                 if (unlikely(len < ETH_HLEN))
563                         return -EINVAL;
564         }
565
566         skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock);
567         if (IS_ERR(skb)) {
568                 if (PTR_ERR(skb) != -EAGAIN)
569                         tun->dev->stats.rx_dropped++;
570                 return PTR_ERR(skb);
571         }
572
573         if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) {
574                 tun->dev->stats.rx_dropped++;
575                 kfree_skb(skb);
576                 return -EFAULT;
577         }
578
579         if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
580                 if (!skb_partial_csum_set(skb, gso.csum_start,
581                                           gso.csum_offset)) {
582                         tun->dev->stats.rx_frame_errors++;
583                         kfree_skb(skb);
584                         return -EINVAL;
585                 }
586         } else if (tun->flags & TUN_NOCHECKSUM)
587                 skb->ip_summed = CHECKSUM_UNNECESSARY;
588
589         switch (tun->flags & TUN_TYPE_MASK) {
590         case TUN_TUN_DEV:
591                 if (tun->flags & TUN_NO_PI) {
592                         switch (skb->data[0] & 0xf0) {
593                         case 0x40:
594                                 pi.proto = htons(ETH_P_IP);
595                                 break;
596                         case 0x60:
597                                 pi.proto = htons(ETH_P_IPV6);
598                                 break;
599                         default:
600                                 tun->dev->stats.rx_dropped++;
601                                 kfree_skb(skb);
602                                 return -EINVAL;
603                         }
604                 }
605
606                 skb_reset_mac_header(skb);
607                 skb->protocol = pi.proto;
608                 skb->dev = tun->dev;
609                 break;
610         case TUN_TAP_DEV:
611                 skb->protocol = eth_type_trans(skb, tun->dev);
612                 break;
613         };
614
615         if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
616                 pr_debug("GSO!\n");
617                 switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
618                 case VIRTIO_NET_HDR_GSO_TCPV4:
619                         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
620                         break;
621                 case VIRTIO_NET_HDR_GSO_TCPV6:
622                         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
623                         break;
624                 default:
625                         tun->dev->stats.rx_frame_errors++;
626                         kfree_skb(skb);
627                         return -EINVAL;
628                 }
629
630                 if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
631                         skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
632
633                 skb_shinfo(skb)->gso_size = gso.gso_size;
634                 if (skb_shinfo(skb)->gso_size == 0) {
635                         tun->dev->stats.rx_frame_errors++;
636                         kfree_skb(skb);
637                         return -EINVAL;
638                 }
639
640                 /* Header must be checked, and gso_segs computed. */
641                 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
642                 skb_shinfo(skb)->gso_segs = 0;
643         }
644
645         netif_rx_ni(skb);
646
647         tun->dev->stats.rx_packets++;
648         tun->dev->stats.rx_bytes += len;
649
650         return count;
651 }
652
653 static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
654                               unsigned long count, loff_t pos)
655 {
656         struct file *file = iocb->ki_filp;
657         struct tun_struct *tun = file->private_data;
658         ssize_t result;
659
660         if (!tun)
661                 return -EBADFD;
662
663         DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
664
665         result = tun_get_user(tun, (struct iovec *)iv, iov_length(iv, count),
666                               file->f_flags & O_NONBLOCK);
667
668         tun_put(tun);
669         return result;
670 }
671
672 /* Put packet to the user space buffer */
673 static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
674                                        struct sk_buff *skb,
675                                        struct iovec *iv, int len)
676 {
677         struct tun_pi pi = { 0, skb->protocol };
678         ssize_t total = 0;
679
680         if (!(tun->flags & TUN_NO_PI)) {
681                 if ((len -= sizeof(pi)) < 0)
682                         return -EINVAL;
683
684                 if (len < skb->len) {
685                         /* Packet will be striped */
686                         pi.flags |= TUN_PKT_STRIP;
687                 }
688
689                 if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi)))
690                         return -EFAULT;
691                 total += sizeof(pi);
692         }
693
694         if (tun->flags & TUN_VNET_HDR) {
695                 struct virtio_net_hdr gso = { 0 }; /* no info leak */
696                 if ((len -= sizeof(gso)) < 0)
697                         return -EINVAL;
698
699                 if (skb_is_gso(skb)) {
700                         struct skb_shared_info *sinfo = skb_shinfo(skb);
701
702                         /* This is a hint as to how much should be linear. */
703                         gso.hdr_len = skb_headlen(skb);
704                         gso.gso_size = sinfo->gso_size;
705                         if (sinfo->gso_type & SKB_GSO_TCPV4)
706                                 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
707                         else if (sinfo->gso_type & SKB_GSO_TCPV6)
708                                 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
709                         else
710                                 BUG();
711                         if (sinfo->gso_type & SKB_GSO_TCP_ECN)
712                                 gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
713                 } else
714                         gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
715
716                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
717                         gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
718                         gso.csum_start = skb->csum_start - skb_headroom(skb);
719                         gso.csum_offset = skb->csum_offset;
720                 } /* else everything is zero */
721
722                 if (unlikely(memcpy_toiovec(iv, (void *)&gso, sizeof(gso))))
723                         return -EFAULT;
724                 total += sizeof(gso);
725         }
726
727         len = min_t(int, skb->len, len);
728
729         skb_copy_datagram_iovec(skb, 0, iv, len);
730         total += len;
731
732         tun->dev->stats.tx_packets++;
733         tun->dev->stats.tx_bytes += len;
734
735         return total;
736 }
737
738 static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
739                             unsigned long count, loff_t pos)
740 {
741         struct file *file = iocb->ki_filp;
742         struct tun_file *tfile = file->private_data;
743         struct tun_struct *tun = __tun_get(tfile);
744         DECLARE_WAITQUEUE(wait, current);
745         struct sk_buff *skb;
746         ssize_t len, ret = 0;
747
748         if (!tun)
749                 return -EBADFD;
750
751         DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
752
753         len = iov_length(iv, count);
754         if (len < 0) {
755                 ret = -EINVAL;
756                 goto out;
757         }
758
759         add_wait_queue(&tfile->read_wait, &wait);
760         while (len) {
761                 current->state = TASK_INTERRUPTIBLE;
762
763                 /* Read frames from the queue */
764                 if (!(skb=skb_dequeue(&tun->readq))) {
765                         if (file->f_flags & O_NONBLOCK) {
766                                 ret = -EAGAIN;
767                                 break;
768                         }
769                         if (signal_pending(current)) {
770                                 ret = -ERESTARTSYS;
771                                 break;
772                         }
773                         if (tun->dev->reg_state != NETREG_REGISTERED) {
774                                 ret = -EIO;
775                                 break;
776                         }
777
778                         /* Nothing to read, let's sleep */
779                         schedule();
780                         continue;
781                 }
782                 netif_wake_queue(tun->dev);
783
784                 ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
785                 kfree_skb(skb);
786                 break;
787         }
788
789         current->state = TASK_RUNNING;
790         remove_wait_queue(&tfile->read_wait, &wait);
791
792 out:
793         tun_put(tun);
794         return ret;
795 }
796
797 static void tun_setup(struct net_device *dev)
798 {
799         struct tun_struct *tun = netdev_priv(dev);
800
801         skb_queue_head_init(&tun->readq);
802
803         tun->owner = -1;
804         tun->group = -1;
805
806         dev->ethtool_ops = &tun_ethtool_ops;
807         dev->destructor = free_netdev;
808 }
809
810 /* Trivial set of netlink ops to allow deleting tun or tap
811  * device with netlink.
812  */
813 static int tun_validate(struct nlattr *tb[], struct nlattr *data[])
814 {
815         return -EINVAL;
816 }
817
818 static struct rtnl_link_ops tun_link_ops __read_mostly = {
819         .kind           = DRV_NAME,
820         .priv_size      = sizeof(struct tun_struct),
821         .setup          = tun_setup,
822         .validate       = tun_validate,
823 };
824
825 static void tun_sock_write_space(struct sock *sk)
826 {
827         struct tun_struct *tun;
828
829         if (!sock_writeable(sk))
830                 return;
831
832         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
833                 wake_up_interruptible_sync(sk->sk_sleep);
834
835         if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
836                 return;
837
838         tun = container_of(sk, struct tun_sock, sk)->tun;
839         kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
840 }
841
842 static void tun_sock_destruct(struct sock *sk)
843 {
844         dev_put(container_of(sk, struct tun_sock, sk)->tun->dev);
845 }
846
847 static struct proto tun_proto = {
848         .name           = "tun",
849         .owner          = THIS_MODULE,
850         .obj_size       = sizeof(struct tun_sock),
851 };
852
853 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
854 {
855         struct sock *sk;
856         struct tun_struct *tun;
857         struct net_device *dev;
858         struct tun_file *tfile = file->private_data;
859         int err;
860
861         dev = __dev_get_by_name(net, ifr->ifr_name);
862         if (dev) {
863                 if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
864                         tun = netdev_priv(dev);
865                 else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
866                         tun = netdev_priv(dev);
867                 else
868                         return -EINVAL;
869
870                 err = tun_attach(tun, file);
871                 if (err < 0)
872                         return err;
873         }
874         else {
875                 char *name;
876                 unsigned long flags = 0;
877
878                 err = -EINVAL;
879
880                 if (!capable(CAP_NET_ADMIN))
881                         return -EPERM;
882
883                 /* Set dev type */
884                 if (ifr->ifr_flags & IFF_TUN) {
885                         /* TUN device */
886                         flags |= TUN_TUN_DEV;
887                         name = "tun%d";
888                 } else if (ifr->ifr_flags & IFF_TAP) {
889                         /* TAP device */
890                         flags |= TUN_TAP_DEV;
891                         name = "tap%d";
892                 } else
893                         goto failed;
894
895                 if (*ifr->ifr_name)
896                         name = ifr->ifr_name;
897
898                 dev = alloc_netdev(sizeof(struct tun_struct), name,
899                                    tun_setup);
900                 if (!dev)
901                         return -ENOMEM;
902
903                 dev_net_set(dev, net);
904                 dev->rtnl_link_ops = &tun_link_ops;
905
906                 tun = netdev_priv(dev);
907                 tun->dev = dev;
908                 tun->flags = flags;
909                 tun->txflt.count = 0;
910
911                 err = -ENOMEM;
912                 sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
913                 if (!sk)
914                         goto err_free_dev;
915
916                 /* This ref count is for tun->sk. */
917                 dev_hold(dev);
918                 sock_init_data(&tun->socket, sk);
919                 sk->sk_write_space = tun_sock_write_space;
920                 sk->sk_destruct = tun_sock_destruct;
921                 sk->sk_sndbuf = INT_MAX;
922                 sk->sk_sleep = &tfile->read_wait;
923
924                 tun->sk = sk;
925                 container_of(sk, struct tun_sock, sk)->tun = tun;
926
927                 tun_net_init(dev);
928
929                 if (strchr(dev->name, '%')) {
930                         err = dev_alloc_name(dev, dev->name);
931                         if (err < 0)
932                                 goto err_free_sk;
933                 }
934
935                 err = -EINVAL;
936                 err = register_netdevice(tun->dev);
937                 if (err < 0)
938                         goto err_free_dev;
939
940                 err = tun_attach(tun, file);
941                 if (err < 0)
942                         goto err_free_dev;
943         }
944
945         DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name);
946
947         if (ifr->ifr_flags & IFF_NO_PI)
948                 tun->flags |= TUN_NO_PI;
949         else
950                 tun->flags &= ~TUN_NO_PI;
951
952         if (ifr->ifr_flags & IFF_ONE_QUEUE)
953                 tun->flags |= TUN_ONE_QUEUE;
954         else
955                 tun->flags &= ~TUN_ONE_QUEUE;
956
957         if (ifr->ifr_flags & IFF_VNET_HDR)
958                 tun->flags |= TUN_VNET_HDR;
959         else
960                 tun->flags &= ~TUN_VNET_HDR;
961
962         /* Make sure persistent devices do not get stuck in
963          * xoff state.
964          */
965         if (netif_running(tun->dev))
966                 netif_wake_queue(tun->dev);
967
968         strcpy(ifr->ifr_name, tun->dev->name);
969         return 0;
970
971  err_free_sk:
972         sock_put(sk);
973  err_free_dev:
974         free_netdev(dev);
975  failed:
976         return err;
977 }
978
979 static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr)
980 {
981         struct tun_struct *tun = tun_get(file);
982
983         if (!tun)
984                 return -EBADFD;
985
986         DBG(KERN_INFO "%s: tun_get_iff\n", tun->dev->name);
987
988         strcpy(ifr->ifr_name, tun->dev->name);
989
990         ifr->ifr_flags = 0;
991
992         if (ifr->ifr_flags & TUN_TUN_DEV)
993                 ifr->ifr_flags |= IFF_TUN;
994         else
995                 ifr->ifr_flags |= IFF_TAP;
996
997         if (tun->flags & TUN_NO_PI)
998                 ifr->ifr_flags |= IFF_NO_PI;
999
1000         if (tun->flags & TUN_ONE_QUEUE)
1001                 ifr->ifr_flags |= IFF_ONE_QUEUE;
1002
1003         if (tun->flags & TUN_VNET_HDR)
1004                 ifr->ifr_flags |= IFF_VNET_HDR;
1005
1006         tun_put(tun);
1007         return 0;
1008 }
1009
1010 /* This is like a cut-down ethtool ops, except done via tun fd so no
1011  * privs required. */
1012 static int set_offload(struct net_device *dev, unsigned long arg)
1013 {
1014         unsigned int old_features, features;
1015
1016         old_features = dev->features;
1017         /* Unset features, set them as we chew on the arg. */
1018         features = (old_features & ~(NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST
1019                                     |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6));
1020
1021         if (arg & TUN_F_CSUM) {
1022                 features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
1023                 arg &= ~TUN_F_CSUM;
1024
1025                 if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
1026                         if (arg & TUN_F_TSO_ECN) {
1027                                 features |= NETIF_F_TSO_ECN;
1028                                 arg &= ~TUN_F_TSO_ECN;
1029                         }
1030                         if (arg & TUN_F_TSO4)
1031                                 features |= NETIF_F_TSO;
1032                         if (arg & TUN_F_TSO6)
1033                                 features |= NETIF_F_TSO6;
1034                         arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
1035                 }
1036         }
1037
1038         /* This gives the user a way to test for new features in future by
1039          * trying to set them. */
1040         if (arg)
1041                 return -EINVAL;
1042
1043         dev->features = features;
1044         if (old_features != dev->features)
1045                 netdev_features_change(dev);
1046
1047         return 0;
1048 }
1049
1050 static int tun_chr_ioctl(struct inode *inode, struct file *file,
1051                          unsigned int cmd, unsigned long arg)
1052 {
1053         struct tun_file *tfile = file->private_data;
1054         struct tun_struct *tun;
1055         void __user* argp = (void __user*)arg;
1056         struct ifreq ifr;
1057         int sndbuf;
1058         int ret;
1059
1060         if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
1061                 if (copy_from_user(&ifr, argp, sizeof ifr))
1062                         return -EFAULT;
1063
1064         if (cmd == TUNGETFEATURES) {
1065                 /* Currently this just means: "what IFF flags are valid?".
1066                  * This is needed because we never checked for invalid flags on
1067                  * TUNSETIFF. */
1068                 return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
1069                                 IFF_VNET_HDR,
1070                                 (unsigned int __user*)argp);
1071         }
1072
1073         tun = __tun_get(tfile);
1074         if (cmd == TUNSETIFF && !tun) {
1075                 int err;
1076
1077                 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1078
1079                 rtnl_lock();
1080                 err = tun_set_iff(tfile->net, file, &ifr);
1081                 rtnl_unlock();
1082
1083                 if (err)
1084                         return err;
1085
1086                 if (copy_to_user(argp, &ifr, sizeof(ifr)))
1087                         return -EFAULT;
1088                 return 0;
1089         }
1090
1091
1092         if (!tun)
1093                 return -EBADFD;
1094
1095         DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd);
1096
1097         ret = 0;
1098         switch (cmd) {
1099         case TUNGETIFF:
1100                 ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr);
1101                 if (ret)
1102                         break;
1103
1104                 if (copy_to_user(argp, &ifr, sizeof(ifr)))
1105                         ret = -EFAULT;
1106                 break;
1107
1108         case TUNSETNOCSUM:
1109                 /* Disable/Enable checksum */
1110                 if (arg)
1111                         tun->flags |= TUN_NOCHECKSUM;
1112                 else
1113                         tun->flags &= ~TUN_NOCHECKSUM;
1114
1115                 DBG(KERN_INFO "%s: checksum %s\n",
1116                     tun->dev->name, arg ? "disabled" : "enabled");
1117                 break;
1118
1119         case TUNSETPERSIST:
1120                 /* Disable/Enable persist mode */
1121                 if (arg)
1122                         tun->flags |= TUN_PERSIST;
1123                 else
1124                         tun->flags &= ~TUN_PERSIST;
1125
1126                 DBG(KERN_INFO "%s: persist %s\n",
1127                     tun->dev->name, arg ? "enabled" : "disabled");
1128                 break;
1129
1130         case TUNSETOWNER:
1131                 /* Set owner of the device */
1132                 tun->owner = (uid_t) arg;
1133
1134                 DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner);
1135                 break;
1136
1137         case TUNSETGROUP:
1138                 /* Set group of the device */
1139                 tun->group= (gid_t) arg;
1140
1141                 DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group);
1142                 break;
1143
1144         case TUNSETLINK:
1145                 /* Only allow setting the type when the interface is down */
1146                 rtnl_lock();
1147                 if (tun->dev->flags & IFF_UP) {
1148                         DBG(KERN_INFO "%s: Linktype set failed because interface is up\n",
1149                                 tun->dev->name);
1150                         ret = -EBUSY;
1151                 } else {
1152                         tun->dev->type = (int) arg;
1153                         DBG(KERN_INFO "%s: linktype set to %d\n", tun->dev->name, tun->dev->type);
1154                         ret = 0;
1155                 }
1156                 rtnl_unlock();
1157                 break;
1158
1159 #ifdef TUN_DEBUG
1160         case TUNSETDEBUG:
1161                 tun->debug = arg;
1162                 break;
1163 #endif
1164         case TUNSETOFFLOAD:
1165                 rtnl_lock();
1166                 ret = set_offload(tun->dev, arg);
1167                 rtnl_unlock();
1168                 break;
1169
1170         case TUNSETTXFILTER:
1171                 /* Can be set only for TAPs */
1172                 ret = -EINVAL;
1173                 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
1174                         break;
1175                 rtnl_lock();
1176                 ret = update_filter(&tun->txflt, (void __user *)arg);
1177                 rtnl_unlock();
1178                 break;
1179
1180         case SIOCGIFHWADDR:
1181                 /* Get hw addres */
1182                 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
1183                 ifr.ifr_hwaddr.sa_family = tun->dev->type;
1184                 if (copy_to_user(argp, &ifr, sizeof ifr))
1185                         ret = -EFAULT;
1186                 break;
1187
1188         case SIOCSIFHWADDR:
1189                 /* Set hw address */
1190                 DBG(KERN_DEBUG "%s: set hw address: %pM\n",
1191                         tun->dev->name, ifr.ifr_hwaddr.sa_data);
1192
1193                 rtnl_lock();
1194                 ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
1195                 rtnl_unlock();
1196                 break;
1197
1198         case TUNGETSNDBUF:
1199                 sndbuf = tun->sk->sk_sndbuf;
1200                 if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
1201                         ret = -EFAULT;
1202                 break;
1203
1204         case TUNSETSNDBUF:
1205                 if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) {
1206                         ret = -EFAULT;
1207                         break;
1208                 }
1209
1210                 tun->sk->sk_sndbuf = sndbuf;
1211                 break;
1212
1213         default:
1214                 ret = -EINVAL;
1215                 break;
1216         };
1217
1218         tun_put(tun);
1219         return ret;
1220 }
1221
1222 static int tun_chr_fasync(int fd, struct file *file, int on)
1223 {
1224         struct tun_struct *tun = tun_get(file);
1225         int ret;
1226
1227         if (!tun)
1228                 return -EBADFD;
1229
1230         DBG(KERN_INFO "%s: tun_chr_fasync %d\n", tun->dev->name, on);
1231
1232         lock_kernel();
1233         if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0)
1234                 goto out;
1235
1236         if (on) {
1237                 ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
1238                 if (ret)
1239                         goto out;
1240                 tun->flags |= TUN_FASYNC;
1241         } else
1242                 tun->flags &= ~TUN_FASYNC;
1243         ret = 0;
1244 out:
1245         unlock_kernel();
1246         tun_put(tun);
1247         return ret;
1248 }
1249
1250 static int tun_chr_open(struct inode *inode, struct file * file)
1251 {
1252         struct tun_file *tfile;
1253         cycle_kernel_lock();
1254         DBG1(KERN_INFO "tunX: tun_chr_open\n");
1255
1256         tfile = kmalloc(sizeof(*tfile), GFP_KERNEL);
1257         if (!tfile)
1258                 return -ENOMEM;
1259         atomic_set(&tfile->count, 0);
1260         tfile->tun = NULL;
1261         tfile->net = get_net(current->nsproxy->net_ns);
1262         init_waitqueue_head(&tfile->read_wait);
1263         file->private_data = tfile;
1264         return 0;
1265 }
1266
1267 static int tun_chr_close(struct inode *inode, struct file *file)
1268 {
1269         struct tun_file *tfile = file->private_data;
1270         struct tun_struct *tun = __tun_get(tfile);
1271
1272
1273         if (tun) {
1274                 DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
1275
1276                 rtnl_lock();
1277                 __tun_detach(tun);
1278
1279                 /* If desireable, unregister the netdevice. */
1280                 if (!(tun->flags & TUN_PERSIST)) {
1281                         sock_put(tun->sk);
1282                         unregister_netdevice(tun->dev);
1283                 }
1284
1285                 rtnl_unlock();
1286         }
1287
1288         put_net(tfile->net);
1289         kfree(tfile);
1290
1291         return 0;
1292 }
1293
1294 static const struct file_operations tun_fops = {
1295         .owner  = THIS_MODULE,
1296         .llseek = no_llseek,
1297         .read  = do_sync_read,
1298         .aio_read  = tun_chr_aio_read,
1299         .write = do_sync_write,
1300         .aio_write = tun_chr_aio_write,
1301         .poll   = tun_chr_poll,
1302         .ioctl  = tun_chr_ioctl,
1303         .open   = tun_chr_open,
1304         .release = tun_chr_close,
1305         .fasync = tun_chr_fasync
1306 };
1307
1308 static struct miscdevice tun_miscdev = {
1309         .minor = TUN_MINOR,
1310         .name = "tun",
1311         .fops = &tun_fops,
1312 };
1313
1314 /* ethtool interface */
1315
1316 static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1317 {
1318         cmd->supported          = 0;
1319         cmd->advertising        = 0;
1320         cmd->speed              = SPEED_10;
1321         cmd->duplex             = DUPLEX_FULL;
1322         cmd->port               = PORT_TP;
1323         cmd->phy_address        = 0;
1324         cmd->transceiver        = XCVR_INTERNAL;
1325         cmd->autoneg            = AUTONEG_DISABLE;
1326         cmd->maxtxpkt           = 0;
1327         cmd->maxrxpkt           = 0;
1328         return 0;
1329 }
1330
1331 static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1332 {
1333         struct tun_struct *tun = netdev_priv(dev);
1334
1335         strcpy(info->driver, DRV_NAME);
1336         strcpy(info->version, DRV_VERSION);
1337         strcpy(info->fw_version, "N/A");
1338
1339         switch (tun->flags & TUN_TYPE_MASK) {
1340         case TUN_TUN_DEV:
1341                 strcpy(info->bus_info, "tun");
1342                 break;
1343         case TUN_TAP_DEV:
1344                 strcpy(info->bus_info, "tap");
1345                 break;
1346         }
1347 }
1348
1349 static u32 tun_get_msglevel(struct net_device *dev)
1350 {
1351 #ifdef TUN_DEBUG
1352         struct tun_struct *tun = netdev_priv(dev);
1353         return tun->debug;
1354 #else
1355         return -EOPNOTSUPP;
1356 #endif
1357 }
1358
1359 static void tun_set_msglevel(struct net_device *dev, u32 value)
1360 {
1361 #ifdef TUN_DEBUG
1362         struct tun_struct *tun = netdev_priv(dev);
1363         tun->debug = value;
1364 #endif
1365 }
1366
1367 static u32 tun_get_link(struct net_device *dev)
1368 {
1369         struct tun_struct *tun = netdev_priv(dev);
1370         return !!tun->tfile;
1371 }
1372
1373 static u32 tun_get_rx_csum(struct net_device *dev)
1374 {
1375         struct tun_struct *tun = netdev_priv(dev);
1376         return (tun->flags & TUN_NOCHECKSUM) == 0;
1377 }
1378
1379 static int tun_set_rx_csum(struct net_device *dev, u32 data)
1380 {
1381         struct tun_struct *tun = netdev_priv(dev);
1382         if (data)
1383                 tun->flags &= ~TUN_NOCHECKSUM;
1384         else
1385                 tun->flags |= TUN_NOCHECKSUM;
1386         return 0;
1387 }
1388
1389 static const struct ethtool_ops tun_ethtool_ops = {
1390         .get_settings   = tun_get_settings,
1391         .get_drvinfo    = tun_get_drvinfo,
1392         .get_msglevel   = tun_get_msglevel,
1393         .set_msglevel   = tun_set_msglevel,
1394         .get_link       = tun_get_link,
1395         .get_rx_csum    = tun_get_rx_csum,
1396         .set_rx_csum    = tun_set_rx_csum
1397 };
1398
1399
1400 static int __init tun_init(void)
1401 {
1402         int ret = 0;
1403
1404         printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
1405         printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT);
1406
1407         ret = rtnl_link_register(&tun_link_ops);
1408         if (ret) {
1409                 printk(KERN_ERR "tun: Can't register link_ops\n");
1410                 goto err_linkops;
1411         }
1412
1413         ret = misc_register(&tun_miscdev);
1414         if (ret) {
1415                 printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR);
1416                 goto err_misc;
1417         }
1418         return  0;
1419 err_misc:
1420         rtnl_link_unregister(&tun_link_ops);
1421 err_linkops:
1422         return ret;
1423 }
1424
1425 static void tun_cleanup(void)
1426 {
1427         misc_deregister(&tun_miscdev);
1428         rtnl_link_unregister(&tun_link_ops);
1429 }
1430
1431 module_init(tun_init);
1432 module_exit(tun_cleanup);
1433 MODULE_DESCRIPTION(DRV_DESCRIPTION);
1434 MODULE_AUTHOR(DRV_COPYRIGHT);
1435 MODULE_LICENSE("GPL");
1436 MODULE_ALIAS_MISCDEV(TUN_MINOR);