1 /****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2005-2006 Fen Systems Ltd.
4 * Copyright 2005-2008 Solarflare Communications Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
11 #include <linux/pci.h>
12 #include <linux/tcp.h>
15 #include <linux/if_ether.h>
16 #include <linux/highmem.h>
17 #include "net_driver.h"
21 #include "workarounds.h"
24 * TX descriptor ring full threshold
26 * The tx_queue descriptor ring fill-level must fall below this value
27 * before we restart the netif queue
29 #define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \
30 (_tx_queue->efx->type->txd_ring_mask / 2u)
32 /* We want to be able to nest calls to netif_stop_queue(), since each
33 * channel can have an individual stop on the queue.
35 void efx_stop_queue(struct efx_nic *efx)
37 spin_lock_bh(&efx->netif_stop_lock);
38 EFX_TRACE(efx, "stop TX queue\n");
40 atomic_inc(&efx->netif_stop_count);
41 netif_stop_queue(efx->net_dev);
43 spin_unlock_bh(&efx->netif_stop_lock);
46 /* Wake netif's TX queue
47 * We want to be able to nest calls to netif_stop_queue(), since each
48 * channel can have an individual stop on the queue.
50 inline void efx_wake_queue(struct efx_nic *efx)
53 if (atomic_dec_and_lock(&efx->netif_stop_count,
54 &efx->netif_stop_lock)) {
55 EFX_TRACE(efx, "waking TX queue\n");
56 netif_wake_queue(efx->net_dev);
57 spin_unlock(&efx->netif_stop_lock);
62 static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
63 struct efx_tx_buffer *buffer)
65 if (buffer->unmap_len) {
66 struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
67 if (buffer->unmap_single)
68 pci_unmap_single(pci_dev, buffer->unmap_addr,
69 buffer->unmap_len, PCI_DMA_TODEVICE);
71 pci_unmap_page(pci_dev, buffer->unmap_addr,
72 buffer->unmap_len, PCI_DMA_TODEVICE);
73 buffer->unmap_len = 0;
74 buffer->unmap_single = 0;
78 dev_kfree_skb_any((struct sk_buff *) buffer->skb);
80 EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x "
81 "complete\n", tx_queue->queue, read_ptr);
87 * Add a socket buffer to a TX queue
89 * This maps all fragments of a socket buffer for DMA and adds them to
90 * the TX queue. The queue's insert pointer will be incremented by
91 * the number of fragments in the socket buffer.
93 * If any DMA mapping fails, any mapped fragments will be unmapped,
94 * the queue's insert pointer will be restored to its original value.
96 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
97 * You must hold netif_tx_lock() to call this function.
99 static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
100 const struct sk_buff *skb)
102 struct efx_nic *efx = tx_queue->efx;
103 struct pci_dev *pci_dev = efx->pci_dev;
104 struct efx_tx_buffer *buffer;
105 skb_frag_t *fragment;
108 unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign;
109 dma_addr_t dma_addr, unmap_addr = 0;
110 unsigned int dma_len;
111 unsigned unmap_single;
113 int rc = NETDEV_TX_OK;
115 EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
117 /* Get size of the initial fragment */
118 len = skb_headlen(skb);
120 fill_level = tx_queue->insert_count - tx_queue->old_read_count;
121 q_space = efx->type->txd_ring_mask - 1 - fill_level;
123 /* Map for DMA. Use pci_map_single rather than pci_map_page
124 * since this is more efficient on machines with sparse
128 dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
130 /* Process all fragments */
132 if (unlikely(pci_dma_mapping_error(dma_addr)))
135 /* Store fields for marking in the per-fragment final
138 unmap_addr = dma_addr;
140 /* Add to TX queue, splitting across DMA boundaries */
142 if (unlikely(q_space-- <= 0)) {
143 /* It might be that completions have
144 * happened since the xmit path last
145 * checked. Update the xmit path's
146 * copy of read_count.
149 /* This memory barrier protects the
150 * change of stopped from the access
153 tx_queue->old_read_count =
154 *(volatile unsigned *)
155 &tx_queue->read_count;
156 fill_level = (tx_queue->insert_count
157 - tx_queue->old_read_count);
158 q_space = (efx->type->txd_ring_mask - 1 -
160 if (unlikely(q_space-- <= 0))
166 insert_ptr = (tx_queue->insert_count &
167 efx->type->txd_ring_mask);
168 buffer = &tx_queue->buffer[insert_ptr];
169 EFX_BUG_ON_PARANOID(buffer->skb);
170 EFX_BUG_ON_PARANOID(buffer->len);
171 EFX_BUG_ON_PARANOID(buffer->continuation != 1);
172 EFX_BUG_ON_PARANOID(buffer->unmap_len);
174 dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1);
175 if (likely(dma_len > len))
178 misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
179 if (misalign && dma_len + misalign > 512)
180 dma_len = 512 - misalign;
182 /* Fill out per descriptor fields */
183 buffer->len = dma_len;
184 buffer->dma_addr = dma_addr;
187 ++tx_queue->insert_count;
190 /* Transfer ownership of the unmapping to the final buffer */
191 buffer->unmap_addr = unmap_addr;
192 buffer->unmap_single = unmap_single;
193 buffer->unmap_len = unmap_len;
196 /* Get address and size of next fragment */
197 if (i >= skb_shinfo(skb)->nr_frags)
199 fragment = &skb_shinfo(skb)->frags[i];
200 len = fragment->size;
201 page = fragment->page;
202 page_offset = fragment->page_offset;
206 dma_addr = pci_map_page(pci_dev, page, page_offset, len,
210 /* Transfer ownership of the skb to the final buffer */
212 buffer->continuation = 0;
214 /* Pass off to hardware */
215 falcon_push_buffers(tx_queue);
220 EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d "
221 "fragments for DMA\n", tx_queue->queue, skb->len,
222 skb_shinfo(skb)->nr_frags + 1);
224 /* Mark the packet as transmitted, and free the SKB ourselves */
225 dev_kfree_skb_any((struct sk_buff *)skb);
231 if (tx_queue->stopped == 1)
235 /* Work backwards until we hit the original insert pointer value */
236 while (tx_queue->insert_count != tx_queue->write_count) {
237 --tx_queue->insert_count;
238 insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask;
239 buffer = &tx_queue->buffer[insert_ptr];
240 efx_dequeue_buffer(tx_queue, buffer);
244 /* Free the fragment we were mid-way through pushing */
246 pci_unmap_page(pci_dev, unmap_addr, unmap_len,
252 /* Remove packets from the TX queue
254 * This removes packets from the TX queue, up to and including the
257 static inline void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
260 struct efx_nic *efx = tx_queue->efx;
261 unsigned int stop_index, read_ptr;
262 unsigned int mask = tx_queue->efx->type->txd_ring_mask;
264 stop_index = (index + 1) & mask;
265 read_ptr = tx_queue->read_count & mask;
267 while (read_ptr != stop_index) {
268 struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
269 if (unlikely(buffer->len == 0)) {
270 EFX_ERR(tx_queue->efx, "TX queue %d spurious TX "
271 "completion id %x\n", tx_queue->queue,
273 efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
277 efx_dequeue_buffer(tx_queue, buffer);
278 buffer->continuation = 1;
281 ++tx_queue->read_count;
282 read_ptr = tx_queue->read_count & mask;
286 /* Initiate a packet transmission on the specified TX queue.
287 * Note that returning anything other than NETDEV_TX_OK will cause the
288 * OS to free the skb.
290 * This function is split out from efx_hard_start_xmit to allow the
291 * loopback test to direct packets via specific TX queues. It is
292 * therefore a non-static inline, so as not to penalise performance
293 * for non-loopback transmissions.
295 * Context: netif_tx_lock held
297 inline int efx_xmit(struct efx_nic *efx,
298 struct efx_tx_queue *tx_queue, struct sk_buff *skb)
302 /* Map fragments for DMA and add to TX queue */
303 rc = efx_enqueue_skb(tx_queue, skb);
304 if (unlikely(rc != NETDEV_TX_OK))
307 /* Update last TX timer */
308 efx->net_dev->trans_start = jiffies;
314 /* Initiate a packet transmission. We use one channel per CPU
315 * (sharing when we have more CPUs than channels). On Falcon, the TX
316 * completion events will be directed back to the CPU that transmitted
317 * the packet, which should be cache-efficient.
319 * Context: non-blocking.
320 * Note that returning anything other than NETDEV_TX_OK will cause the
321 * OS to free the skb.
323 int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
325 struct efx_nic *efx = net_dev->priv;
326 return efx_xmit(efx, &efx->tx_queue[0], skb);
329 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
332 struct efx_nic *efx = tx_queue->efx;
334 EFX_BUG_ON_PARANOID(index > efx->type->txd_ring_mask);
336 efx_dequeue_buffers(tx_queue, index);
338 /* See if we need to restart the netif queue. This barrier
339 * separates the update of read_count from the test of
342 if (unlikely(tx_queue->stopped)) {
343 fill_level = tx_queue->insert_count - tx_queue->read_count;
344 if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) {
345 EFX_BUG_ON_PARANOID(!NET_DEV_REGISTERED(efx));
347 /* Do this under netif_tx_lock(), to avoid racing
348 * with efx_xmit(). */
349 netif_tx_lock(efx->net_dev);
350 if (tx_queue->stopped) {
351 tx_queue->stopped = 0;
354 netif_tx_unlock(efx->net_dev);
359 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
361 struct efx_nic *efx = tx_queue->efx;
362 unsigned int txq_size;
365 EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue);
367 /* Allocate software ring */
368 txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer);
369 tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
370 if (!tx_queue->buffer) {
374 for (i = 0; i <= efx->type->txd_ring_mask; ++i)
375 tx_queue->buffer[i].continuation = 1;
377 /* Allocate hardware ring */
378 rc = falcon_probe_tx(tx_queue);
385 kfree(tx_queue->buffer);
386 tx_queue->buffer = NULL;
393 int efx_init_tx_queue(struct efx_tx_queue *tx_queue)
395 EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
397 tx_queue->insert_count = 0;
398 tx_queue->write_count = 0;
399 tx_queue->read_count = 0;
400 tx_queue->old_read_count = 0;
401 BUG_ON(tx_queue->stopped);
403 /* Set up TX descriptor ring */
404 return falcon_init_tx(tx_queue);
407 void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
409 struct efx_tx_buffer *buffer;
411 if (!tx_queue->buffer)
414 /* Free any buffers left in the ring */
415 while (tx_queue->read_count != tx_queue->write_count) {
416 buffer = &tx_queue->buffer[tx_queue->read_count &
417 tx_queue->efx->type->txd_ring_mask];
418 efx_dequeue_buffer(tx_queue, buffer);
419 buffer->continuation = 1;
422 ++tx_queue->read_count;
426 void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
428 EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue);
430 /* Flush TX queue, remove descriptor ring */
431 falcon_fini_tx(tx_queue);
433 efx_release_tx_buffers(tx_queue);
435 /* Release queue's stop on port, if any */
436 if (tx_queue->stopped) {
437 tx_queue->stopped = 0;
438 efx_wake_queue(tx_queue->efx);
442 void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
444 EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue);
445 falcon_remove_tx(tx_queue);
447 kfree(tx_queue->buffer);
448 tx_queue->buffer = NULL;