Merge branch 'kvm-updates-2.6.27' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6] / drivers / net / virtio_net.c
1 /* A simple network driver using virtio.
2  *
3  * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19 //#define DEBUG
20 #include <linux/netdevice.h>
21 #include <linux/etherdevice.h>
22 #include <linux/module.h>
23 #include <linux/virtio.h>
24 #include <linux/virtio_net.h>
25 #include <linux/scatterlist.h>
26
27 static int napi_weight = 128;
28 module_param(napi_weight, int, 0444);
29
30 static int csum = 1, gso = 1;
31 module_param(csum, bool, 0444);
32 module_param(gso, bool, 0444);
33
34 /* FIXME: MTU in config. */
35 #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)
36
37 struct virtnet_info
38 {
39         struct virtio_device *vdev;
40         struct virtqueue *rvq, *svq;
41         struct net_device *dev;
42         struct napi_struct napi;
43
44         /* The skb we couldn't send because buffers were full. */
45         struct sk_buff *last_xmit_skb;
46
47         /* If we need to free in a timer, this is it. */
48         struct timer_list xmit_free_timer;
49
50         /* Number of input buffers, and max we've ever had. */
51         unsigned int num, max;
52
53         /* For cleaning up after transmission. */
54         struct tasklet_struct tasklet;
55         bool free_in_tasklet;
56
57         /* Receive & send queues. */
58         struct sk_buff_head recv;
59         struct sk_buff_head send;
60 };
61
62 static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
63 {
64         return (struct virtio_net_hdr *)skb->cb;
65 }
66
67 static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
68 {
69         sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
70 }
71
72 static void skb_xmit_done(struct virtqueue *svq)
73 {
74         struct virtnet_info *vi = svq->vdev->priv;
75
76         /* Suppress further interrupts. */
77         svq->vq_ops->disable_cb(svq);
78
79         /* We were probably waiting for more output buffers. */
80         netif_wake_queue(vi->dev);
81
82         /* Make sure we re-xmit last_xmit_skb: if there are no more packets
83          * queued, start_xmit won't be called. */
84         tasklet_schedule(&vi->tasklet);
85 }
86
87 static void receive_skb(struct net_device *dev, struct sk_buff *skb,
88                         unsigned len)
89 {
90         struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
91
92         if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
93                 pr_debug("%s: short packet %i\n", dev->name, len);
94                 dev->stats.rx_length_errors++;
95                 goto drop;
96         }
97         len -= sizeof(struct virtio_net_hdr);
98         BUG_ON(len > MAX_PACKET_LEN);
99
100         skb_trim(skb, len);
101
102         dev->stats.rx_bytes += skb->len;
103         dev->stats.rx_packets++;
104
105         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
106                 pr_debug("Needs csum!\n");
107                 if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset))
108                         goto frame_err;
109         }
110
111         skb->protocol = eth_type_trans(skb, dev);
112         pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
113                  ntohs(skb->protocol), skb->len, skb->pkt_type);
114
115         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
116                 pr_debug("GSO!\n");
117                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
118                 case VIRTIO_NET_HDR_GSO_TCPV4:
119                         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
120                         break;
121                 case VIRTIO_NET_HDR_GSO_UDP:
122                         skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
123                         break;
124                 case VIRTIO_NET_HDR_GSO_TCPV6:
125                         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
126                         break;
127                 default:
128                         if (net_ratelimit())
129                                 printk(KERN_WARNING "%s: bad gso type %u.\n",
130                                        dev->name, hdr->gso_type);
131                         goto frame_err;
132                 }
133
134                 if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
135                         skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
136
137                 skb_shinfo(skb)->gso_size = hdr->gso_size;
138                 if (skb_shinfo(skb)->gso_size == 0) {
139                         if (net_ratelimit())
140                                 printk(KERN_WARNING "%s: zero gso size.\n",
141                                        dev->name);
142                         goto frame_err;
143                 }
144
145                 /* Header must be checked, and gso_segs computed. */
146                 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
147                 skb_shinfo(skb)->gso_segs = 0;
148         }
149
150         netif_receive_skb(skb);
151         return;
152
153 frame_err:
154         dev->stats.rx_frame_errors++;
155 drop:
156         dev_kfree_skb(skb);
157 }
158
159 static void try_fill_recv(struct virtnet_info *vi)
160 {
161         struct sk_buff *skb;
162         struct scatterlist sg[2+MAX_SKB_FRAGS];
163         int num, err;
164
165         sg_init_table(sg, 2+MAX_SKB_FRAGS);
166         for (;;) {
167                 skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
168                 if (unlikely(!skb))
169                         break;
170
171                 skb_put(skb, MAX_PACKET_LEN);
172                 vnet_hdr_to_sg(sg, skb);
173                 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
174                 skb_queue_head(&vi->recv, skb);
175
176                 err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
177                 if (err) {
178                         skb_unlink(skb, &vi->recv);
179                         kfree_skb(skb);
180                         break;
181                 }
182                 vi->num++;
183         }
184         if (unlikely(vi->num > vi->max))
185                 vi->max = vi->num;
186         vi->rvq->vq_ops->kick(vi->rvq);
187 }
188
189 static void skb_recv_done(struct virtqueue *rvq)
190 {
191         struct virtnet_info *vi = rvq->vdev->priv;
192         /* Schedule NAPI, Suppress further interrupts if successful. */
193         if (netif_rx_schedule_prep(vi->dev, &vi->napi)) {
194                 rvq->vq_ops->disable_cb(rvq);
195                 __netif_rx_schedule(vi->dev, &vi->napi);
196         }
197 }
198
199 static int virtnet_poll(struct napi_struct *napi, int budget)
200 {
201         struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
202         struct sk_buff *skb = NULL;
203         unsigned int len, received = 0;
204
205 again:
206         while (received < budget &&
207                (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
208                 __skb_unlink(skb, &vi->recv);
209                 receive_skb(vi->dev, skb, len);
210                 vi->num--;
211                 received++;
212         }
213
214         /* FIXME: If we oom and completely run out of inbufs, we need
215          * to start a timer trying to fill more. */
216         if (vi->num < vi->max / 2)
217                 try_fill_recv(vi);
218
219         /* Out of packets? */
220         if (received < budget) {
221                 netif_rx_complete(vi->dev, napi);
222                 if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
223                     && napi_schedule_prep(napi)) {
224                         vi->rvq->vq_ops->disable_cb(vi->rvq);
225                         __netif_rx_schedule(vi->dev, napi);
226                         goto again;
227                 }
228         }
229
230         return received;
231 }
232
233 static void free_old_xmit_skbs(struct virtnet_info *vi)
234 {
235         struct sk_buff *skb;
236         unsigned int len;
237
238         while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
239                 pr_debug("Sent skb %p\n", skb);
240                 __skb_unlink(skb, &vi->send);
241                 vi->dev->stats.tx_bytes += skb->len;
242                 vi->dev->stats.tx_packets++;
243                 kfree_skb(skb);
244         }
245 }
246
247 /* If the virtio transport doesn't always notify us when all in-flight packets
248  * are consumed, we fall back to using this function on a timer to free them. */
249 static void xmit_free(unsigned long data)
250 {
251         struct virtnet_info *vi = (void *)data;
252
253         netif_tx_lock(vi->dev);
254
255         free_old_xmit_skbs(vi);
256
257         if (!skb_queue_empty(&vi->send))
258                 mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
259
260         netif_tx_unlock(vi->dev);
261 }
262
263 static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
264 {
265         int num, err;
266         struct scatterlist sg[2+MAX_SKB_FRAGS];
267         struct virtio_net_hdr *hdr;
268         const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
269
270         sg_init_table(sg, 2+MAX_SKB_FRAGS);
271
272         pr_debug("%s: xmit %p " MAC_FMT "\n", vi->dev->name, skb,
273                  dest[0], dest[1], dest[2],
274                  dest[3], dest[4], dest[5]);
275
276         /* Encode metadata header at front. */
277         hdr = skb_vnet_hdr(skb);
278         if (skb->ip_summed == CHECKSUM_PARTIAL) {
279                 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
280                 hdr->csum_start = skb->csum_start - skb_headroom(skb);
281                 hdr->csum_offset = skb->csum_offset;
282         } else {
283                 hdr->flags = 0;
284                 hdr->csum_offset = hdr->csum_start = 0;
285         }
286
287         if (skb_is_gso(skb)) {
288                 hdr->hdr_len = skb_transport_header(skb) - skb->data;
289                 hdr->gso_size = skb_shinfo(skb)->gso_size;
290                 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
291                         hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
292                 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
293                         hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
294                 else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
295                         hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
296                 else
297                         BUG();
298                 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
299                         hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
300         } else {
301                 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
302                 hdr->gso_size = hdr->hdr_len = 0;
303         }
304
305         vnet_hdr_to_sg(sg, skb);
306         num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
307
308         err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
309         if (!err && !vi->free_in_tasklet)
310                 mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
311
312         return err;
313 }
314
315 static void xmit_tasklet(unsigned long data)
316 {
317         struct virtnet_info *vi = (void *)data;
318
319         netif_tx_lock_bh(vi->dev);
320         if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
321                 vi->svq->vq_ops->kick(vi->svq);
322                 vi->last_xmit_skb = NULL;
323         }
324         if (vi->free_in_tasklet)
325                 free_old_xmit_skbs(vi);
326         netif_tx_unlock_bh(vi->dev);
327 }
328
329 static int start_xmit(struct sk_buff *skb, struct net_device *dev)
330 {
331         struct virtnet_info *vi = netdev_priv(dev);
332
333 again:
334         /* Free up any pending old buffers before queueing new ones. */
335         free_old_xmit_skbs(vi);
336
337         /* If we has a buffer left over from last time, send it now. */
338         if (unlikely(vi->last_xmit_skb)) {
339                 if (xmit_skb(vi, vi->last_xmit_skb) != 0) {
340                         /* Drop this skb: we only queue one. */
341                         vi->dev->stats.tx_dropped++;
342                         kfree_skb(skb);
343                         skb = NULL;
344                         goto stop_queue;
345                 }
346                 vi->last_xmit_skb = NULL;
347         }
348
349         /* Put new one in send queue and do transmit */
350         if (likely(skb)) {
351                 __skb_queue_head(&vi->send, skb);
352                 if (xmit_skb(vi, skb) != 0) {
353                         vi->last_xmit_skb = skb;
354                         skb = NULL;
355                         goto stop_queue;
356                 }
357         }
358 done:
359         vi->svq->vq_ops->kick(vi->svq);
360         return NETDEV_TX_OK;
361
362 stop_queue:
363         pr_debug("%s: virtio not prepared to send\n", dev->name);
364         netif_stop_queue(dev);
365
366         /* Activate callback for using skbs: if this returns false it
367          * means some were used in the meantime. */
368         if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
369                 vi->svq->vq_ops->disable_cb(vi->svq);
370                 netif_start_queue(dev);
371                 goto again;
372         }
373         goto done;
374 }
375
376 #ifdef CONFIG_NET_POLL_CONTROLLER
377 static void virtnet_netpoll(struct net_device *dev)
378 {
379         struct virtnet_info *vi = netdev_priv(dev);
380
381         napi_schedule(&vi->napi);
382 }
383 #endif
384
385 static int virtnet_open(struct net_device *dev)
386 {
387         struct virtnet_info *vi = netdev_priv(dev);
388
389         napi_enable(&vi->napi);
390
391         /* If all buffers were filled by other side before we napi_enabled, we
392          * won't get another interrupt, so process any outstanding packets
393          * now.  virtnet_poll wants re-enable the queue, so we disable here.
394          * We synchronize against interrupts via NAPI_STATE_SCHED */
395         if (netif_rx_schedule_prep(dev, &vi->napi)) {
396                 vi->rvq->vq_ops->disable_cb(vi->rvq);
397                 __netif_rx_schedule(dev, &vi->napi);
398         }
399         return 0;
400 }
401
402 static int virtnet_close(struct net_device *dev)
403 {
404         struct virtnet_info *vi = netdev_priv(dev);
405
406         napi_disable(&vi->napi);
407
408         return 0;
409 }
410
411 static int virtnet_probe(struct virtio_device *vdev)
412 {
413         int err;
414         struct net_device *dev;
415         struct virtnet_info *vi;
416
417         /* Allocate ourselves a network device with room for our info */
418         dev = alloc_etherdev(sizeof(struct virtnet_info));
419         if (!dev)
420                 return -ENOMEM;
421
422         /* Set up network device as normal. */
423         dev->open = virtnet_open;
424         dev->stop = virtnet_close;
425         dev->hard_start_xmit = start_xmit;
426         dev->features = NETIF_F_HIGHDMA;
427 #ifdef CONFIG_NET_POLL_CONTROLLER
428         dev->poll_controller = virtnet_netpoll;
429 #endif
430         SET_NETDEV_DEV(dev, &vdev->dev);
431
432         /* Do we support "hardware" checksums? */
433         if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
434                 /* This opens up the world of extra features. */
435                 dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
436                 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
437                         dev->features |= NETIF_F_TSO | NETIF_F_UFO
438                                 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
439                 }
440                 /* Individual feature bits: what can host handle? */
441                 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
442                         dev->features |= NETIF_F_TSO;
443                 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
444                         dev->features |= NETIF_F_TSO6;
445                 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
446                         dev->features |= NETIF_F_TSO_ECN;
447                 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
448                         dev->features |= NETIF_F_UFO;
449         }
450
451         /* Configuration may specify what MAC to use.  Otherwise random. */
452         if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
453                 vdev->config->get(vdev,
454                                   offsetof(struct virtio_net_config, mac),
455                                   dev->dev_addr, dev->addr_len);
456         } else
457                 random_ether_addr(dev->dev_addr);
458
459         /* Set up our device-specific information */
460         vi = netdev_priv(dev);
461         netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
462         vi->dev = dev;
463         vi->vdev = vdev;
464         vdev->priv = vi;
465
466         /* If they give us a callback when all buffers are done, we don't need
467          * the timer. */
468         vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
469
470         /* We expect two virtqueues, receive then send. */
471         vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
472         if (IS_ERR(vi->rvq)) {
473                 err = PTR_ERR(vi->rvq);
474                 goto free;
475         }
476
477         vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
478         if (IS_ERR(vi->svq)) {
479                 err = PTR_ERR(vi->svq);
480                 goto free_recv;
481         }
482
483         /* Initialize our empty receive and send queues. */
484         skb_queue_head_init(&vi->recv);
485         skb_queue_head_init(&vi->send);
486
487         tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi);
488
489         if (!vi->free_in_tasklet)
490                 setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi);
491
492         err = register_netdev(dev);
493         if (err) {
494                 pr_debug("virtio_net: registering device failed\n");
495                 goto free_send;
496         }
497
498         /* Last of all, set up some receive buffers. */
499         try_fill_recv(vi);
500
501         /* If we didn't even get one input buffer, we're useless. */
502         if (vi->num == 0) {
503                 err = -ENOMEM;
504                 goto unregister;
505         }
506
507         pr_debug("virtnet: registered device %s\n", dev->name);
508         return 0;
509
510 unregister:
511         unregister_netdev(dev);
512 free_send:
513         vdev->config->del_vq(vi->svq);
514 free_recv:
515         vdev->config->del_vq(vi->rvq);
516 free:
517         free_netdev(dev);
518         return err;
519 }
520
521 static void virtnet_remove(struct virtio_device *vdev)
522 {
523         struct virtnet_info *vi = vdev->priv;
524         struct sk_buff *skb;
525
526         /* Stop all the virtqueues. */
527         vdev->config->reset(vdev);
528
529         if (!vi->free_in_tasklet)
530                 del_timer_sync(&vi->xmit_free_timer);
531
532         /* Free our skbs in send and recv queues, if any. */
533         while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
534                 kfree_skb(skb);
535                 vi->num--;
536         }
537         __skb_queue_purge(&vi->send);
538
539         BUG_ON(vi->num != 0);
540
541         vdev->config->del_vq(vi->svq);
542         vdev->config->del_vq(vi->rvq);
543         unregister_netdev(vi->dev);
544         free_netdev(vi->dev);
545 }
546
547 static struct virtio_device_id id_table[] = {
548         { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
549         { 0 },
550 };
551
552 static unsigned int features[] = {
553         VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
554         VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
555         VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
556         VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY,
557 };
558
559 static struct virtio_driver virtio_net = {
560         .feature_table = features,
561         .feature_table_size = ARRAY_SIZE(features),
562         .driver.name =  KBUILD_MODNAME,
563         .driver.owner = THIS_MODULE,
564         .id_table =     id_table,
565         .probe =        virtnet_probe,
566         .remove =       __devexit_p(virtnet_remove),
567 };
568
569 static int __init init(void)
570 {
571         return register_virtio_driver(&virtio_net);
572 }
573
574 static void __exit fini(void)
575 {
576         unregister_virtio_driver(&virtio_net);
577 }
578 module_init(init);
579 module_exit(fini);
580
581 MODULE_DEVICE_TABLE(virtio, id_table);
582 MODULE_DESCRIPTION("Virtio network driver");
583 MODULE_LICENSE("GPL");