2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_vlan.h>
37 #include <linux/tcp.h>
38 #include <linux/dma-mapping.h>
44 #include "firmware_exports.h"
48 #define SGE_RX_SM_BUF_SIZE 1536
50 #define SGE_RX_COPY_THRES 256
51 #define SGE_RX_PULL_LEN 128
53 #define SGE_PG_RSVD SMP_CACHE_BYTES
55 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
56 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
59 #define FL0_PG_CHUNK_SIZE 2048
60 #define FL0_PG_ORDER 0
61 #define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
62 #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
63 #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
64 #define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
66 #define SGE_RX_DROP_THRES 16
67 #define RX_RECLAIM_PERIOD (HZ/4)
70 * Max number of Rx buffers we replenish at a time.
72 #define MAX_RX_REFILL 16U
74 * Period of the Tx buffer reclaim timer. This timer does not need to run
75 * frequently as Tx buffers are usually reclaimed by new Tx packets.
77 #define TX_RECLAIM_PERIOD (HZ / 4)
78 #define TX_RECLAIM_TIMER_CHUNK 64U
79 #define TX_RECLAIM_CHUNK 16U
81 /* WR size in bytes */
82 #define WR_LEN (WR_FLITS * 8)
85 * Types of Tx queues in each queue set. Order here matters, do not change.
87 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
89 /* Values for sge_txq.flags */
91 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
92 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
96 __be64 flit[TX_DESC_FLITS];
106 struct tx_sw_desc { /* SW state per Tx descriptor */
108 u8 eop; /* set if last descriptor for packet */
109 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
110 u8 fragidx; /* first page fragment associated with descriptor */
111 s8 sflit; /* start flit of first SGL entry in descriptor */
114 struct rx_sw_desc { /* SW state per Rx descriptor */
117 struct fl_pg_chunk pg_chunk;
119 DECLARE_PCI_UNMAP_ADDR(dma_addr);
122 struct rsp_desc { /* response queue descriptor */
123 struct rss_header rss_hdr;
131 * Holds unmapping information for Tx packets that need deferred unmapping.
132 * This structure lives at skb->head and must be allocated by callers.
134 struct deferred_unmap_info {
135 struct pci_dev *pdev;
136 dma_addr_t addr[MAX_SKB_FRAGS + 1];
140 * Maps a number of flits to the number of Tx descriptors that can hold them.
143 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
145 * HW allows up to 4 descriptors to be combined into a WR.
147 static u8 flit_desc_map[] = {
149 #if SGE_NUM_GENBITS == 1
150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
151 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
152 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
153 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
154 #elif SGE_NUM_GENBITS == 2
155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
157 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
158 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
160 # error "SGE_NUM_GENBITS must be 1 or 2"
164 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
166 return container_of(q, struct sge_qset, fl[qidx]);
169 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
171 return container_of(q, struct sge_qset, rspq);
174 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
176 return container_of(q, struct sge_qset, txq[qidx]);
180 * refill_rspq - replenish an SGE response queue
181 * @adapter: the adapter
182 * @q: the response queue to replenish
183 * @credits: how many new responses to make available
185 * Replenishes a response queue by making the supplied number of responses
188 static inline void refill_rspq(struct adapter *adapter,
189 const struct sge_rspq *q, unsigned int credits)
192 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
193 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
197 * need_skb_unmap - does the platform need unmapping of sk_buffs?
199 * Returns true if the platfrom needs sk_buff unmapping. The compiler
200 * optimizes away unecessary code if this returns true.
202 static inline int need_skb_unmap(void)
205 * This structure is used to tell if the platfrom needs buffer
206 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
209 DECLARE_PCI_UNMAP_ADDR(addr);
212 return sizeof(struct dummy) != 0;
216 * unmap_skb - unmap a packet main body and its page fragments
218 * @q: the Tx queue containing Tx descriptors for the packet
219 * @cidx: index of Tx descriptor
220 * @pdev: the PCI device
222 * Unmap the main body of an sk_buff and its page fragments, if any.
223 * Because of the fairly complicated structure of our SGLs and the desire
224 * to conserve space for metadata, the information necessary to unmap an
225 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
226 * descriptors (the physical addresses of the various data buffers), and
227 * the SW descriptor state (assorted indices). The send functions
228 * initialize the indices for the first packet descriptor so we can unmap
229 * the buffers held in the first Tx descriptor here, and we have enough
230 * information at this point to set the state for the next Tx descriptor.
232 * Note that it is possible to clean up the first descriptor of a packet
233 * before the send routines have written the next descriptors, but this
234 * race does not cause any problem. We just end up writing the unmapping
235 * info for the descriptor first.
237 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
238 unsigned int cidx, struct pci_dev *pdev)
240 const struct sg_ent *sgp;
241 struct tx_sw_desc *d = &q->sdesc[cidx];
242 int nfrags, frag_idx, curflit, j = d->addr_idx;
244 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
245 frag_idx = d->fragidx;
247 if (frag_idx == 0 && skb_headlen(skb)) {
248 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
249 skb_headlen(skb), PCI_DMA_TODEVICE);
253 curflit = d->sflit + 1 + j;
254 nfrags = skb_shinfo(skb)->nr_frags;
256 while (frag_idx < nfrags && curflit < WR_FLITS) {
257 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
258 skb_shinfo(skb)->frags[frag_idx].size,
269 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
270 d = cidx + 1 == q->size ? q->sdesc : d + 1;
271 d->fragidx = frag_idx;
273 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
278 * free_tx_desc - reclaims Tx descriptors and their buffers
279 * @adapter: the adapter
280 * @q: the Tx queue to reclaim descriptors from
281 * @n: the number of descriptors to reclaim
283 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
284 * Tx buffers. Called with the Tx queue lock held.
286 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
289 struct tx_sw_desc *d;
290 struct pci_dev *pdev = adapter->pdev;
291 unsigned int cidx = q->cidx;
293 const int need_unmap = need_skb_unmap() &&
294 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
298 if (d->skb) { /* an SGL is present */
300 unmap_skb(d->skb, q, cidx, pdev);
305 if (++cidx == q->size) {
314 * reclaim_completed_tx - reclaims completed Tx descriptors
315 * @adapter: the adapter
316 * @q: the Tx queue to reclaim completed descriptors from
317 * @chunk: maximum number of descriptors to reclaim
319 * Reclaims Tx descriptors that the SGE has indicated it has processed,
320 * and frees the associated buffers if possible. Called with the Tx
323 static inline unsigned int reclaim_completed_tx(struct adapter *adapter,
327 unsigned int reclaim = q->processed - q->cleaned;
329 reclaim = min(chunk, reclaim);
331 free_tx_desc(adapter, q, reclaim);
332 q->cleaned += reclaim;
333 q->in_use -= reclaim;
335 return q->processed - q->cleaned;
339 * should_restart_tx - are there enough resources to restart a Tx queue?
342 * Checks if there are enough descriptors to restart a suspended Tx queue.
344 static inline int should_restart_tx(const struct sge_txq *q)
346 unsigned int r = q->processed - q->cleaned;
348 return q->in_use - r < (q->size >> 1);
351 static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
352 struct rx_sw_desc *d)
354 if (q->use_pages && d->pg_chunk.page) {
355 (*d->pg_chunk.p_cnt)--;
356 if (!*d->pg_chunk.p_cnt)
358 pci_unmap_addr(&d->pg_chunk, mapping),
359 q->alloc_size, PCI_DMA_FROMDEVICE);
361 put_page(d->pg_chunk.page);
362 d->pg_chunk.page = NULL;
364 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
365 q->buf_size, PCI_DMA_FROMDEVICE);
372 * free_rx_bufs - free the Rx buffers on an SGE free list
373 * @pdev: the PCI device associated with the adapter
374 * @rxq: the SGE free list to clean up
376 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
377 * this queue should be stopped before calling this function.
379 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
381 unsigned int cidx = q->cidx;
383 while (q->credits--) {
384 struct rx_sw_desc *d = &q->sdesc[cidx];
387 clear_rx_desc(pdev, q, d);
388 if (++cidx == q->size)
392 if (q->pg_chunk.page) {
393 __free_pages(q->pg_chunk.page, q->order);
394 q->pg_chunk.page = NULL;
399 * add_one_rx_buf - add a packet buffer to a free-buffer list
400 * @va: buffer start VA
401 * @len: the buffer length
402 * @d: the HW Rx descriptor to write
403 * @sd: the SW Rx descriptor to write
404 * @gen: the generation bit value
405 * @pdev: the PCI device associated with the adapter
407 * Add a buffer of the given length to the supplied HW and SW Rx
410 static inline int add_one_rx_buf(void *va, unsigned int len,
411 struct rx_desc *d, struct rx_sw_desc *sd,
412 unsigned int gen, struct pci_dev *pdev)
416 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
417 if (unlikely(pci_dma_mapping_error(pdev, mapping)))
420 pci_unmap_addr_set(sd, dma_addr, mapping);
422 d->addr_lo = cpu_to_be32(mapping);
423 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
425 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
426 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
430 static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
433 d->addr_lo = cpu_to_be32(mapping);
434 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
436 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
437 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
441 static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
442 struct rx_sw_desc *sd, gfp_t gfp,
445 if (!q->pg_chunk.page) {
448 q->pg_chunk.page = alloc_pages(gfp, order);
449 if (unlikely(!q->pg_chunk.page))
451 q->pg_chunk.va = page_address(q->pg_chunk.page);
452 q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
454 q->pg_chunk.offset = 0;
455 mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
456 0, q->alloc_size, PCI_DMA_FROMDEVICE);
457 pci_unmap_addr_set(&q->pg_chunk, mapping, mapping);
459 sd->pg_chunk = q->pg_chunk;
461 prefetch(sd->pg_chunk.p_cnt);
463 q->pg_chunk.offset += q->buf_size;
464 if (q->pg_chunk.offset == (PAGE_SIZE << order))
465 q->pg_chunk.page = NULL;
467 q->pg_chunk.va += q->buf_size;
468 get_page(q->pg_chunk.page);
471 if (sd->pg_chunk.offset == 0)
472 *sd->pg_chunk.p_cnt = 1;
474 *sd->pg_chunk.p_cnt += 1;
479 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
481 if (q->pend_cred >= q->credits / 4) {
483 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
488 * refill_fl - refill an SGE free-buffer list
489 * @adapter: the adapter
490 * @q: the free-list to refill
491 * @n: the number of new buffers to allocate
492 * @gfp: the gfp flags for allocating new buffers
494 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
495 * allocated with the supplied gfp flags. The caller must assure that
496 * @n does not exceed the queue's capacity.
498 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
500 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
501 struct rx_desc *d = &q->desc[q->pidx];
502 unsigned int count = 0;
509 if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
511 nomem: q->alloc_failed++;
514 mapping = pci_unmap_addr(&sd->pg_chunk, mapping) +
516 pci_unmap_addr_set(sd, dma_addr, mapping);
518 add_one_rx_chunk(mapping, d, q->gen);
519 pci_dma_sync_single_for_device(adap->pdev, mapping,
520 q->buf_size - SGE_PG_RSVD,
525 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
530 buf_start = skb->data;
531 err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
534 clear_rx_desc(adap->pdev, q, sd);
541 if (++q->pidx == q->size) {
551 q->pend_cred += count;
557 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
559 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
560 GFP_ATOMIC | __GFP_COMP);
564 * recycle_rx_buf - recycle a receive buffer
565 * @adapter: the adapter
566 * @q: the SGE free list
567 * @idx: index of buffer to recycle
569 * Recycles the specified buffer on the given free list by adding it at
570 * the next available slot on the list.
572 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
575 struct rx_desc *from = &q->desc[idx];
576 struct rx_desc *to = &q->desc[q->pidx];
578 q->sdesc[q->pidx] = q->sdesc[idx];
579 to->addr_lo = from->addr_lo; /* already big endian */
580 to->addr_hi = from->addr_hi; /* likewise */
582 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
583 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
585 if (++q->pidx == q->size) {
596 * alloc_ring - allocate resources for an SGE descriptor ring
597 * @pdev: the PCI device
598 * @nelem: the number of descriptors
599 * @elem_size: the size of each descriptor
600 * @sw_size: the size of the SW state associated with each ring element
601 * @phys: the physical address of the allocated ring
602 * @metadata: address of the array holding the SW state for the ring
604 * Allocates resources for an SGE descriptor ring, such as Tx queues,
605 * free buffer lists, or response queues. Each SGE ring requires
606 * space for its HW descriptors plus, optionally, space for the SW state
607 * associated with each HW entry (the metadata). The function returns
608 * three values: the virtual address for the HW ring (the return value
609 * of the function), the physical address of the HW ring, and the address
612 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
613 size_t sw_size, dma_addr_t * phys, void *metadata)
615 size_t len = nelem * elem_size;
617 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
621 if (sw_size && metadata) {
622 s = kcalloc(nelem, sw_size, GFP_KERNEL);
625 dma_free_coherent(&pdev->dev, len, p, *phys);
628 *(void **)metadata = s;
635 * t3_reset_qset - reset a sge qset
638 * Reset the qset structure.
639 * the NAPI structure is preserved in the event of
640 * the qset's reincarnation, for example during EEH recovery.
642 static void t3_reset_qset(struct sge_qset *q)
645 !(q->adap->flags & NAPI_INIT)) {
646 memset(q, 0, sizeof(*q));
651 memset(&q->rspq, 0, sizeof(q->rspq));
652 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
653 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
655 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
656 q->rx_reclaim_timer.function = NULL;
658 napi_free_frags(&q->napi);
663 * free_qset - free the resources of an SGE queue set
664 * @adapter: the adapter owning the queue set
667 * Release the HW and SW resources associated with an SGE queue set, such
668 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
669 * queue set must be quiesced prior to calling this.
671 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
674 struct pci_dev *pdev = adapter->pdev;
676 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
678 spin_lock_irq(&adapter->sge.reg_lock);
679 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
680 spin_unlock_irq(&adapter->sge.reg_lock);
681 free_rx_bufs(pdev, &q->fl[i]);
682 kfree(q->fl[i].sdesc);
683 dma_free_coherent(&pdev->dev,
685 sizeof(struct rx_desc), q->fl[i].desc,
689 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
690 if (q->txq[i].desc) {
691 spin_lock_irq(&adapter->sge.reg_lock);
692 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
693 spin_unlock_irq(&adapter->sge.reg_lock);
694 if (q->txq[i].sdesc) {
695 free_tx_desc(adapter, &q->txq[i],
697 kfree(q->txq[i].sdesc);
699 dma_free_coherent(&pdev->dev,
701 sizeof(struct tx_desc),
702 q->txq[i].desc, q->txq[i].phys_addr);
703 __skb_queue_purge(&q->txq[i].sendq);
707 spin_lock_irq(&adapter->sge.reg_lock);
708 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
709 spin_unlock_irq(&adapter->sge.reg_lock);
710 dma_free_coherent(&pdev->dev,
711 q->rspq.size * sizeof(struct rsp_desc),
712 q->rspq.desc, q->rspq.phys_addr);
719 * init_qset_cntxt - initialize an SGE queue set context info
721 * @id: the queue set id
723 * Initializes the TIDs and context ids for the queues of a queue set.
725 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
727 qs->rspq.cntxt_id = id;
728 qs->fl[0].cntxt_id = 2 * id;
729 qs->fl[1].cntxt_id = 2 * id + 1;
730 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
731 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
732 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
733 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
734 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
738 * sgl_len - calculates the size of an SGL of the given capacity
739 * @n: the number of SGL entries
741 * Calculates the number of flits needed for a scatter/gather list that
742 * can hold the given number of entries.
744 static inline unsigned int sgl_len(unsigned int n)
746 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
747 return (3 * n) / 2 + (n & 1);
751 * flits_to_desc - returns the num of Tx descriptors for the given flits
752 * @n: the number of flits
754 * Calculates the number of Tx descriptors needed for the supplied number
757 static inline unsigned int flits_to_desc(unsigned int n)
759 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
760 return flit_desc_map[n];
764 * get_packet - return the next ingress packet buffer from a free list
765 * @adap: the adapter that received the packet
766 * @fl: the SGE free list holding the packet
767 * @len: the packet length including any SGE padding
768 * @drop_thres: # of remaining buffers before we start dropping packets
770 * Get the next packet from a free list and complete setup of the
771 * sk_buff. If the packet is small we make a copy and recycle the
772 * original buffer, otherwise we use the original buffer itself. If a
773 * positive drop threshold is supplied packets are dropped and their
774 * buffers recycled if (a) the number of remaining buffers is under the
775 * threshold and the packet is too big to copy, or (b) the packet should
776 * be copied but there is no memory for the copy.
778 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
779 unsigned int len, unsigned int drop_thres)
781 struct sk_buff *skb = NULL;
782 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
784 prefetch(sd->skb->data);
787 if (len <= SGE_RX_COPY_THRES) {
788 skb = alloc_skb(len, GFP_ATOMIC);
789 if (likely(skb != NULL)) {
791 pci_dma_sync_single_for_cpu(adap->pdev,
792 pci_unmap_addr(sd, dma_addr), len,
794 memcpy(skb->data, sd->skb->data, len);
795 pci_dma_sync_single_for_device(adap->pdev,
796 pci_unmap_addr(sd, dma_addr), len,
798 } else if (!drop_thres)
801 recycle_rx_buf(adap, fl, fl->cidx);
805 if (unlikely(fl->credits < drop_thres) &&
806 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
807 GFP_ATOMIC | __GFP_COMP) == 0)
811 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
812 fl->buf_size, PCI_DMA_FROMDEVICE);
815 __refill_fl(adap, fl);
820 * get_packet_pg - return the next ingress packet buffer from a free list
821 * @adap: the adapter that received the packet
822 * @fl: the SGE free list holding the packet
823 * @len: the packet length including any SGE padding
824 * @drop_thres: # of remaining buffers before we start dropping packets
826 * Get the next packet from a free list populated with page chunks.
827 * If the packet is small we make a copy and recycle the original buffer,
828 * otherwise we attach the original buffer as a page fragment to a fresh
829 * sk_buff. If a positive drop threshold is supplied packets are dropped
830 * and their buffers recycled if (a) the number of remaining buffers is
831 * under the threshold and the packet is too big to copy, or (b) there's
834 * Note: this function is similar to @get_packet but deals with Rx buffers
835 * that are page chunks rather than sk_buffs.
837 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
838 struct sge_rspq *q, unsigned int len,
839 unsigned int drop_thres)
841 struct sk_buff *newskb, *skb;
842 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
844 dma_addr_t dma_addr = pci_unmap_addr(sd, dma_addr);
846 newskb = skb = q->pg_skb;
847 if (!skb && (len <= SGE_RX_COPY_THRES)) {
848 newskb = alloc_skb(len, GFP_ATOMIC);
849 if (likely(newskb != NULL)) {
850 __skb_put(newskb, len);
851 pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
853 memcpy(newskb->data, sd->pg_chunk.va, len);
854 pci_dma_sync_single_for_device(adap->pdev, dma_addr,
857 } else if (!drop_thres)
861 recycle_rx_buf(adap, fl, fl->cidx);
866 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
869 prefetch(sd->pg_chunk.p_cnt);
872 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
874 if (unlikely(!newskb)) {
880 pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
882 (*sd->pg_chunk.p_cnt)--;
883 if (!*sd->pg_chunk.p_cnt)
884 pci_unmap_page(adap->pdev,
885 pci_unmap_addr(&sd->pg_chunk, mapping),
889 __skb_put(newskb, SGE_RX_PULL_LEN);
890 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
891 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
892 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
893 len - SGE_RX_PULL_LEN);
895 newskb->data_len = len - SGE_RX_PULL_LEN;
896 newskb->truesize += newskb->data_len;
898 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
900 sd->pg_chunk.offset, len);
902 newskb->data_len += len;
903 newskb->truesize += len;
908 * We do not refill FLs here, we let the caller do it to overlap a
915 * get_imm_packet - return the next ingress packet buffer from a response
916 * @resp: the response descriptor containing the packet data
918 * Return a packet containing the immediate data of the given response.
920 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
922 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
925 __skb_put(skb, IMMED_PKT_SIZE);
926 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
932 * calc_tx_descs - calculate the number of Tx descriptors for a packet
935 * Returns the number of Tx descriptors needed for the given Ethernet
936 * packet. Ethernet packets require addition of WR and CPL headers.
938 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
942 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
945 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
946 if (skb_shinfo(skb)->gso_size)
948 return flits_to_desc(flits);
952 * make_sgl - populate a scatter/gather list for a packet
954 * @sgp: the SGL to populate
955 * @start: start address of skb main body data to include in the SGL
956 * @len: length of skb main body data to include in the SGL
957 * @pdev: the PCI device
959 * Generates a scatter/gather list for the buffers that make up a packet
960 * and returns the SGL size in 8-byte words. The caller must size the SGL
963 static inline unsigned int make_sgl(const struct sk_buff *skb,
964 struct sg_ent *sgp, unsigned char *start,
965 unsigned int len, struct pci_dev *pdev)
968 unsigned int i, j = 0, nfrags;
971 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
972 sgp->len[0] = cpu_to_be32(len);
973 sgp->addr[0] = cpu_to_be64(mapping);
977 nfrags = skb_shinfo(skb)->nr_frags;
978 for (i = 0; i < nfrags; i++) {
979 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
981 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
982 frag->size, PCI_DMA_TODEVICE);
983 sgp->len[j] = cpu_to_be32(frag->size);
984 sgp->addr[j] = cpu_to_be64(mapping);
991 return ((nfrags + (len != 0)) * 3) / 2 + j;
995 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
999 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
1000 * where the HW is going to sleep just after we checked, however,
1001 * then the interrupt handler will detect the outstanding TX packet
1002 * and ring the doorbell for us.
1004 * When GTS is disabled we unconditionally ring the doorbell.
1006 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
1009 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1010 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1011 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1012 t3_write_reg(adap, A_SG_KDOORBELL,
1013 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1016 wmb(); /* write descriptors before telling HW */
1017 t3_write_reg(adap, A_SG_KDOORBELL,
1018 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1022 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
1024 #if SGE_NUM_GENBITS == 2
1025 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
1030 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1031 * @ndesc: number of Tx descriptors spanned by the SGL
1032 * @skb: the packet corresponding to the WR
1033 * @d: first Tx descriptor to be written
1034 * @pidx: index of above descriptors
1035 * @q: the SGE Tx queue
1037 * @flits: number of flits to the start of the SGL in the first descriptor
1038 * @sgl_flits: the SGL size in flits
1039 * @gen: the Tx descriptor generation
1040 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1041 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1043 * Write a work request header and an associated SGL. If the SGL is
1044 * small enough to fit into one Tx descriptor it has already been written
1045 * and we just need to write the WR header. Otherwise we distribute the
1046 * SGL across the number of descriptors it spans.
1048 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
1049 struct tx_desc *d, unsigned int pidx,
1050 const struct sge_txq *q,
1051 const struct sg_ent *sgl,
1052 unsigned int flits, unsigned int sgl_flits,
1053 unsigned int gen, __be32 wr_hi,
1056 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
1057 struct tx_sw_desc *sd = &q->sdesc[pidx];
1060 if (need_skb_unmap()) {
1066 if (likely(ndesc == 1)) {
1068 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1069 V_WR_SGLSFLT(flits)) | wr_hi;
1071 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1072 V_WR_GEN(gen)) | wr_lo;
1075 unsigned int ogen = gen;
1076 const u64 *fp = (const u64 *)sgl;
1077 struct work_request_hdr *wp = wrp;
1079 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1080 V_WR_SGLSFLT(flits)) | wr_hi;
1083 unsigned int avail = WR_FLITS - flits;
1085 if (avail > sgl_flits)
1087 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1097 if (++pidx == q->size) {
1105 wrp = (struct work_request_hdr *)d;
1106 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1107 V_WR_SGLSFLT(1)) | wr_hi;
1108 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1110 V_WR_GEN(gen)) | wr_lo;
1115 wrp->wr_hi |= htonl(F_WR_EOP);
1117 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1118 wr_gen2((struct tx_desc *)wp, ogen);
1119 WARN_ON(ndesc != 0);
1124 * write_tx_pkt_wr - write a TX_PKT work request
1125 * @adap: the adapter
1126 * @skb: the packet to send
1127 * @pi: the egress interface
1128 * @pidx: index of the first Tx descriptor to write
1129 * @gen: the generation value to use
1131 * @ndesc: number of descriptors the packet will occupy
1132 * @compl: the value of the COMPL bit to use
1134 * Generate a TX_PKT work request to send the supplied packet.
1136 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1137 const struct port_info *pi,
1138 unsigned int pidx, unsigned int gen,
1139 struct sge_txq *q, unsigned int ndesc,
1142 unsigned int flits, sgl_flits, cntrl, tso_info;
1143 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1144 struct tx_desc *d = &q->desc[pidx];
1145 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1147 cpl->len = htonl(skb->len);
1148 cntrl = V_TXPKT_INTF(pi->port_id);
1150 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1151 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1153 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1156 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1159 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1160 hdr->cntrl = htonl(cntrl);
1161 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1162 CPL_ETH_II : CPL_ETH_II_VLAN;
1163 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1164 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1165 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1166 hdr->lso_info = htonl(tso_info);
1169 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1170 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1171 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1172 cpl->cntrl = htonl(cntrl);
1174 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1175 q->sdesc[pidx].skb = NULL;
1177 skb_copy_from_linear_data(skb, &d->flit[2],
1180 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1182 flits = (skb->len + 7) / 8 + 2;
1183 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1184 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1185 | F_WR_SOP | F_WR_EOP | compl);
1187 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1188 V_WR_TID(q->token));
1197 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1198 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1200 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1201 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1202 htonl(V_WR_TID(q->token)));
1205 static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1206 struct sge_qset *qs, struct sge_txq *q)
1208 netif_tx_stop_queue(txq);
1209 set_bit(TXQ_ETH, &qs->txq_stopped);
1214 * eth_xmit - add a packet to the Ethernet Tx queue
1216 * @dev: the egress net device
1218 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1220 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1223 unsigned int ndesc, pidx, credits, gen, compl;
1224 const struct port_info *pi = netdev_priv(dev);
1225 struct adapter *adap = pi->adapter;
1226 struct netdev_queue *txq;
1227 struct sge_qset *qs;
1231 * The chip min packet length is 9 octets but play safe and reject
1232 * anything shorter than an Ethernet header.
1234 if (unlikely(skb->len < ETH_HLEN)) {
1236 return NETDEV_TX_OK;
1239 qidx = skb_get_queue_mapping(skb);
1241 q = &qs->txq[TXQ_ETH];
1242 txq = netdev_get_tx_queue(dev, qidx);
1244 spin_lock(&q->lock);
1245 reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1247 credits = q->size - q->in_use;
1248 ndesc = calc_tx_descs(skb);
1250 if (unlikely(credits < ndesc)) {
1251 t3_stop_tx_queue(txq, qs, q);
1252 dev_err(&adap->pdev->dev,
1253 "%s: Tx ring %u full while queue awake!\n",
1254 dev->name, q->cntxt_id & 7);
1255 spin_unlock(&q->lock);
1256 return NETDEV_TX_BUSY;
1260 if (unlikely(credits - ndesc < q->stop_thres)) {
1261 t3_stop_tx_queue(txq, qs, q);
1263 if (should_restart_tx(q) &&
1264 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1266 netif_tx_wake_queue(txq);
1271 q->unacked += ndesc;
1272 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1276 if (q->pidx >= q->size) {
1281 /* update port statistics */
1282 if (skb->ip_summed == CHECKSUM_COMPLETE)
1283 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1284 if (skb_shinfo(skb)->gso_size)
1285 qs->port_stats[SGE_PSTAT_TSO]++;
1286 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1287 qs->port_stats[SGE_PSTAT_VLANINS]++;
1289 dev->trans_start = jiffies;
1290 spin_unlock(&q->lock);
1293 * We do not use Tx completion interrupts to free DMAd Tx packets.
1294 * This is good for performamce but means that we rely on new Tx
1295 * packets arriving to run the destructors of completed packets,
1296 * which open up space in their sockets' send queues. Sometimes
1297 * we do not get such new packets causing Tx to stall. A single
1298 * UDP transmitter is a good example of this situation. We have
1299 * a clean up timer that periodically reclaims completed packets
1300 * but it doesn't run often enough (nor do we want it to) to prevent
1301 * lengthy stalls. A solution to this problem is to run the
1302 * destructor early, after the packet is queued but before it's DMAd.
1303 * A cons is that we lie to socket memory accounting, but the amount
1304 * of extra memory is reasonable (limited by the number of Tx
1305 * descriptors), the packets do actually get freed quickly by new
1306 * packets almost always, and for protocols like TCP that wait for
1307 * acks to really free up the data the extra memory is even less.
1308 * On the positive side we run the destructors on the sending CPU
1309 * rather than on a potentially different completing CPU, usually a
1310 * good thing. We also run them without holding our Tx queue lock,
1311 * unlike what reclaim_completed_tx() would otherwise do.
1313 * Run the destructor before telling the DMA engine about the packet
1314 * to make sure it doesn't complete and get freed prematurely.
1316 if (likely(!skb_shared(skb)))
1319 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1320 check_ring_tx_db(adap, q);
1321 return NETDEV_TX_OK;
1325 * write_imm - write a packet into a Tx descriptor as immediate data
1326 * @d: the Tx descriptor to write
1328 * @len: the length of packet data to write as immediate data
1329 * @gen: the generation bit value to write
1331 * Writes a packet as immediate data into a Tx descriptor. The packet
1332 * contains a work request at its beginning. We must write the packet
1333 * carefully so the SGE doesn't read it accidentally before it's written
1336 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1337 unsigned int len, unsigned int gen)
1339 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1340 struct work_request_hdr *to = (struct work_request_hdr *)d;
1342 if (likely(!skb->data_len))
1343 memcpy(&to[1], &from[1], len - sizeof(*from));
1345 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1347 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1348 V_WR_BCNTLFLT(len & 7));
1350 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1351 V_WR_LEN((len + 7) / 8));
1357 * check_desc_avail - check descriptor availability on a send queue
1358 * @adap: the adapter
1359 * @q: the send queue
1360 * @skb: the packet needing the descriptors
1361 * @ndesc: the number of Tx descriptors needed
1362 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1364 * Checks if the requested number of Tx descriptors is available on an
1365 * SGE send queue. If the queue is already suspended or not enough
1366 * descriptors are available the packet is queued for later transmission.
1367 * Must be called with the Tx queue locked.
1369 * Returns 0 if enough descriptors are available, 1 if there aren't
1370 * enough descriptors and the packet has been queued, and 2 if the caller
1371 * needs to retry because there weren't enough descriptors at the
1372 * beginning of the call but some freed up in the mean time.
1374 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1375 struct sk_buff *skb, unsigned int ndesc,
1378 if (unlikely(!skb_queue_empty(&q->sendq))) {
1379 addq_exit:__skb_queue_tail(&q->sendq, skb);
1382 if (unlikely(q->size - q->in_use < ndesc)) {
1383 struct sge_qset *qs = txq_to_qset(q, qid);
1385 set_bit(qid, &qs->txq_stopped);
1386 smp_mb__after_clear_bit();
1388 if (should_restart_tx(q) &&
1389 test_and_clear_bit(qid, &qs->txq_stopped))
1399 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1400 * @q: the SGE control Tx queue
1402 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1403 * that send only immediate data (presently just the control queues) and
1404 * thus do not have any sk_buffs to release.
1406 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1408 unsigned int reclaim = q->processed - q->cleaned;
1410 q->in_use -= reclaim;
1411 q->cleaned += reclaim;
1414 static inline int immediate(const struct sk_buff *skb)
1416 return skb->len <= WR_LEN;
1420 * ctrl_xmit - send a packet through an SGE control Tx queue
1421 * @adap: the adapter
1422 * @q: the control queue
1425 * Send a packet through an SGE control Tx queue. Packets sent through
1426 * a control queue must fit entirely as immediate data in a single Tx
1427 * descriptor and have no page fragments.
1429 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1430 struct sk_buff *skb)
1433 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1435 if (unlikely(!immediate(skb))) {
1438 return NET_XMIT_SUCCESS;
1441 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1442 wrp->wr_lo = htonl(V_WR_TID(q->token));
1444 spin_lock(&q->lock);
1445 again:reclaim_completed_tx_imm(q);
1447 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1448 if (unlikely(ret)) {
1450 spin_unlock(&q->lock);
1456 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1459 if (++q->pidx >= q->size) {
1463 spin_unlock(&q->lock);
1465 t3_write_reg(adap, A_SG_KDOORBELL,
1466 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1467 return NET_XMIT_SUCCESS;
1471 * restart_ctrlq - restart a suspended control queue
1472 * @qs: the queue set cotaining the control queue
1474 * Resumes transmission on a suspended Tx control queue.
1476 static void restart_ctrlq(unsigned long data)
1478 struct sk_buff *skb;
1479 struct sge_qset *qs = (struct sge_qset *)data;
1480 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1482 spin_lock(&q->lock);
1483 again:reclaim_completed_tx_imm(q);
1485 while (q->in_use < q->size &&
1486 (skb = __skb_dequeue(&q->sendq)) != NULL) {
1488 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1490 if (++q->pidx >= q->size) {
1497 if (!skb_queue_empty(&q->sendq)) {
1498 set_bit(TXQ_CTRL, &qs->txq_stopped);
1499 smp_mb__after_clear_bit();
1501 if (should_restart_tx(q) &&
1502 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1507 spin_unlock(&q->lock);
1509 t3_write_reg(qs->adap, A_SG_KDOORBELL,
1510 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1514 * Send a management message through control queue 0
1516 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1520 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1527 * deferred_unmap_destructor - unmap a packet when it is freed
1530 * This is the packet destructor used for Tx packets that need to remain
1531 * mapped until they are freed rather than until their Tx descriptors are
1534 static void deferred_unmap_destructor(struct sk_buff *skb)
1537 const dma_addr_t *p;
1538 const struct skb_shared_info *si;
1539 const struct deferred_unmap_info *dui;
1541 dui = (struct deferred_unmap_info *)skb->head;
1544 if (skb->tail - skb->transport_header)
1545 pci_unmap_single(dui->pdev, *p++,
1546 skb->tail - skb->transport_header,
1549 si = skb_shinfo(skb);
1550 for (i = 0; i < si->nr_frags; i++)
1551 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1555 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1556 const struct sg_ent *sgl, int sgl_flits)
1559 struct deferred_unmap_info *dui;
1561 dui = (struct deferred_unmap_info *)skb->head;
1563 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1564 *p++ = be64_to_cpu(sgl->addr[0]);
1565 *p++ = be64_to_cpu(sgl->addr[1]);
1568 *p = be64_to_cpu(sgl->addr[0]);
1572 * write_ofld_wr - write an offload work request
1573 * @adap: the adapter
1574 * @skb: the packet to send
1576 * @pidx: index of the first Tx descriptor to write
1577 * @gen: the generation value to use
1578 * @ndesc: number of descriptors the packet will occupy
1580 * Write an offload work request to send the supplied packet. The packet
1581 * data already carry the work request with most fields populated.
1583 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1584 struct sge_txq *q, unsigned int pidx,
1585 unsigned int gen, unsigned int ndesc)
1587 unsigned int sgl_flits, flits;
1588 struct work_request_hdr *from;
1589 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1590 struct tx_desc *d = &q->desc[pidx];
1592 if (immediate(skb)) {
1593 q->sdesc[pidx].skb = NULL;
1594 write_imm(d, skb, skb->len, gen);
1598 /* Only TX_DATA builds SGLs */
1600 from = (struct work_request_hdr *)skb->data;
1601 memcpy(&d->flit[1], &from[1],
1602 skb_transport_offset(skb) - sizeof(*from));
1604 flits = skb_transport_offset(skb) / 8;
1605 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1606 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1607 skb->tail - skb->transport_header,
1609 if (need_skb_unmap()) {
1610 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1611 skb->destructor = deferred_unmap_destructor;
1614 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1615 gen, from->wr_hi, from->wr_lo);
1619 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1622 * Returns the number of Tx descriptors needed for the given offload
1623 * packet. These packets are already fully constructed.
1625 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1627 unsigned int flits, cnt;
1629 if (skb->len <= WR_LEN)
1630 return 1; /* packet fits as immediate data */
1632 flits = skb_transport_offset(skb) / 8; /* headers */
1633 cnt = skb_shinfo(skb)->nr_frags;
1634 if (skb->tail != skb->transport_header)
1636 return flits_to_desc(flits + sgl_len(cnt));
1640 * ofld_xmit - send a packet through an offload queue
1641 * @adap: the adapter
1642 * @q: the Tx offload queue
1645 * Send an offload packet through an SGE offload queue.
1647 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1648 struct sk_buff *skb)
1651 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1653 spin_lock(&q->lock);
1654 again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1656 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1657 if (unlikely(ret)) {
1659 skb->priority = ndesc; /* save for restart */
1660 spin_unlock(&q->lock);
1670 if (q->pidx >= q->size) {
1674 spin_unlock(&q->lock);
1676 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1677 check_ring_tx_db(adap, q);
1678 return NET_XMIT_SUCCESS;
1682 * restart_offloadq - restart a suspended offload queue
1683 * @qs: the queue set cotaining the offload queue
1685 * Resumes transmission on a suspended Tx offload queue.
1687 static void restart_offloadq(unsigned long data)
1689 struct sk_buff *skb;
1690 struct sge_qset *qs = (struct sge_qset *)data;
1691 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1692 const struct port_info *pi = netdev_priv(qs->netdev);
1693 struct adapter *adap = pi->adapter;
1695 spin_lock(&q->lock);
1696 again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1698 while ((skb = skb_peek(&q->sendq)) != NULL) {
1699 unsigned int gen, pidx;
1700 unsigned int ndesc = skb->priority;
1702 if (unlikely(q->size - q->in_use < ndesc)) {
1703 set_bit(TXQ_OFLD, &qs->txq_stopped);
1704 smp_mb__after_clear_bit();
1706 if (should_restart_tx(q) &&
1707 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1717 if (q->pidx >= q->size) {
1721 __skb_unlink(skb, &q->sendq);
1722 spin_unlock(&q->lock);
1724 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1725 spin_lock(&q->lock);
1727 spin_unlock(&q->lock);
1730 set_bit(TXQ_RUNNING, &q->flags);
1731 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1734 t3_write_reg(adap, A_SG_KDOORBELL,
1735 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1739 * queue_set - return the queue set a packet should use
1742 * Maps a packet to the SGE queue set it should use. The desired queue
1743 * set is carried in bits 1-3 in the packet's priority.
1745 static inline int queue_set(const struct sk_buff *skb)
1747 return skb->priority >> 1;
1751 * is_ctrl_pkt - return whether an offload packet is a control packet
1754 * Determines whether an offload packet should use an OFLD or a CTRL
1755 * Tx queue. This is indicated by bit 0 in the packet's priority.
1757 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1759 return skb->priority & 1;
1763 * t3_offload_tx - send an offload packet
1764 * @tdev: the offload device to send to
1767 * Sends an offload packet. We use the packet priority to select the
1768 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1769 * should be sent as regular or control, bits 1-3 select the queue set.
1771 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1773 struct adapter *adap = tdev2adap(tdev);
1774 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1776 if (unlikely(is_ctrl_pkt(skb)))
1777 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1779 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1783 * offload_enqueue - add an offload packet to an SGE offload receive queue
1784 * @q: the SGE response queue
1787 * Add a new offload packet to an SGE response queue's offload packet
1788 * queue. If the packet is the first on the queue it schedules the RX
1789 * softirq to process the queue.
1791 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1793 int was_empty = skb_queue_empty(&q->rx_queue);
1795 __skb_queue_tail(&q->rx_queue, skb);
1798 struct sge_qset *qs = rspq_to_qset(q);
1800 napi_schedule(&qs->napi);
1805 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1806 * @tdev: the offload device that will be receiving the packets
1807 * @q: the SGE response queue that assembled the bundle
1808 * @skbs: the partial bundle
1809 * @n: the number of packets in the bundle
1811 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1813 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1815 struct sk_buff *skbs[], int n)
1818 q->offload_bundles++;
1819 tdev->recv(tdev, skbs, n);
1824 * ofld_poll - NAPI handler for offload packets in interrupt mode
1825 * @dev: the network device doing the polling
1826 * @budget: polling budget
1828 * The NAPI handler for offload packets when a response queue is serviced
1829 * by the hard interrupt handler, i.e., when it's operating in non-polling
1830 * mode. Creates small packet batches and sends them through the offload
1831 * receive handler. Batches need to be of modest size as we do prefetches
1832 * on the packets in each.
1834 static int ofld_poll(struct napi_struct *napi, int budget)
1836 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1837 struct sge_rspq *q = &qs->rspq;
1838 struct adapter *adapter = qs->adap;
1841 while (work_done < budget) {
1842 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1843 struct sk_buff_head queue;
1846 spin_lock_irq(&q->lock);
1847 __skb_queue_head_init(&queue);
1848 skb_queue_splice_init(&q->rx_queue, &queue);
1849 if (skb_queue_empty(&queue)) {
1850 napi_complete(napi);
1851 spin_unlock_irq(&q->lock);
1854 spin_unlock_irq(&q->lock);
1857 skb_queue_walk_safe(&queue, skb, tmp) {
1858 if (work_done >= budget)
1862 __skb_unlink(skb, &queue);
1863 prefetch(skb->data);
1864 skbs[ngathered] = skb;
1865 if (++ngathered == RX_BUNDLE_SIZE) {
1866 q->offload_bundles++;
1867 adapter->tdev.recv(&adapter->tdev, skbs,
1872 if (!skb_queue_empty(&queue)) {
1873 /* splice remaining packets back onto Rx queue */
1874 spin_lock_irq(&q->lock);
1875 skb_queue_splice(&queue, &q->rx_queue);
1876 spin_unlock_irq(&q->lock);
1878 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1885 * rx_offload - process a received offload packet
1886 * @tdev: the offload device receiving the packet
1887 * @rq: the response queue that received the packet
1889 * @rx_gather: a gather list of packets if we are building a bundle
1890 * @gather_idx: index of the next available slot in the bundle
1892 * Process an ingress offload pakcet and add it to the offload ingress
1893 * queue. Returns the index of the next available slot in the bundle.
1895 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1896 struct sk_buff *skb, struct sk_buff *rx_gather[],
1897 unsigned int gather_idx)
1899 skb_reset_mac_header(skb);
1900 skb_reset_network_header(skb);
1901 skb_reset_transport_header(skb);
1904 rx_gather[gather_idx++] = skb;
1905 if (gather_idx == RX_BUNDLE_SIZE) {
1906 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1908 rq->offload_bundles++;
1911 offload_enqueue(rq, skb);
1917 * restart_tx - check whether to restart suspended Tx queues
1918 * @qs: the queue set to resume
1920 * Restarts suspended Tx queues of an SGE queue set if they have enough
1921 * free resources to resume operation.
1923 static void restart_tx(struct sge_qset *qs)
1925 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1926 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1927 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1928 qs->txq[TXQ_ETH].restarts++;
1929 if (netif_running(qs->netdev))
1930 netif_tx_wake_queue(qs->tx_q);
1933 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1934 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1935 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1936 qs->txq[TXQ_OFLD].restarts++;
1937 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1939 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1940 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1941 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1942 qs->txq[TXQ_CTRL].restarts++;
1943 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1948 * cxgb3_arp_process - process an ARP request probing a private IP address
1949 * @adapter: the adapter
1950 * @skb: the skbuff containing the ARP request
1952 * Check if the ARP request is probing the private IP address
1953 * dedicated to iSCSI, generate an ARP reply if so.
1955 static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1957 struct net_device *dev = skb->dev;
1958 struct port_info *pi;
1960 unsigned char *arp_ptr;
1967 skb_reset_network_header(skb);
1970 if (arp->ar_op != htons(ARPOP_REQUEST))
1973 arp_ptr = (unsigned char *)(arp + 1);
1975 arp_ptr += dev->addr_len;
1976 memcpy(&sip, arp_ptr, sizeof(sip));
1977 arp_ptr += sizeof(sip);
1978 arp_ptr += dev->addr_len;
1979 memcpy(&tip, arp_ptr, sizeof(tip));
1981 pi = netdev_priv(dev);
1982 if (tip != pi->iscsi_ipv4addr)
1985 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1986 dev->dev_addr, sha);
1990 static inline int is_arp(struct sk_buff *skb)
1992 return skb->protocol == htons(ETH_P_ARP);
1996 * rx_eth - process an ingress ethernet packet
1997 * @adap: the adapter
1998 * @rq: the response queue that received the packet
2000 * @pad: amount of padding at the start of the buffer
2002 * Process an ingress ethernet pakcet and deliver it to the stack.
2003 * The padding is 2 if the packet was delivered in an Rx buffer and 0
2004 * if it was immediate data in a response.
2006 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
2007 struct sk_buff *skb, int pad, int lro)
2009 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
2010 struct sge_qset *qs = rspq_to_qset(rq);
2011 struct port_info *pi;
2013 skb_pull(skb, sizeof(*p) + pad);
2014 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
2015 pi = netdev_priv(skb->dev);
2016 if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid &&
2017 p->csum == htons(0xffff) && !p->fragment) {
2018 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2019 skb->ip_summed = CHECKSUM_UNNECESSARY;
2021 skb->ip_summed = CHECKSUM_NONE;
2022 skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
2024 if (unlikely(p->vlan_valid)) {
2025 struct vlan_group *grp = pi->vlan_grp;
2027 qs->port_stats[SGE_PSTAT_VLANEX]++;
2030 vlan_gro_receive(&qs->napi, grp,
2031 ntohs(p->vlan), skb);
2033 if (unlikely(pi->iscsi_ipv4addr &&
2035 unsigned short vtag = ntohs(p->vlan) &
2037 skb->dev = vlan_group_get_device(grp,
2039 cxgb3_arp_process(adap, skb);
2041 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
2045 dev_kfree_skb_any(skb);
2046 } else if (rq->polling) {
2048 napi_gro_receive(&qs->napi, skb);
2050 if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
2051 cxgb3_arp_process(adap, skb);
2052 netif_receive_skb(skb);
2058 static inline int is_eth_tcp(u32 rss)
2060 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2064 * lro_add_page - add a page chunk to an LRO session
2065 * @adap: the adapter
2066 * @qs: the associated queue set
2067 * @fl: the free list containing the page chunk to add
2068 * @len: packet length
2069 * @complete: Indicates the last fragment of a frame
2071 * Add a received packet contained in a page chunk to an existing LRO
2074 static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2075 struct sge_fl *fl, int len, int complete)
2077 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2078 struct sk_buff *skb = NULL;
2079 struct cpl_rx_pkt *cpl;
2080 struct skb_frag_struct *rx_frag;
2085 skb = napi_get_frags(&qs->napi);
2091 pci_dma_sync_single_for_cpu(adap->pdev,
2092 pci_unmap_addr(sd, dma_addr),
2093 fl->buf_size - SGE_PG_RSVD,
2094 PCI_DMA_FROMDEVICE);
2096 (*sd->pg_chunk.p_cnt)--;
2097 if (!*sd->pg_chunk.p_cnt)
2098 pci_unmap_page(adap->pdev,
2099 pci_unmap_addr(&sd->pg_chunk, mapping),
2101 PCI_DMA_FROMDEVICE);
2104 put_page(sd->pg_chunk.page);
2110 rx_frag = skb_shinfo(skb)->frags;
2111 nr_frags = skb_shinfo(skb)->nr_frags;
2114 offset = 2 + sizeof(struct cpl_rx_pkt);
2115 qs->lro_va = sd->pg_chunk.va + 2;
2119 prefetch(qs->lro_va);
2121 rx_frag += nr_frags;
2122 rx_frag->page = sd->pg_chunk.page;
2123 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2124 rx_frag->size = len;
2127 skb->data_len += len;
2128 skb->truesize += len;
2129 skb_shinfo(skb)->nr_frags++;
2134 skb->ip_summed = CHECKSUM_UNNECESSARY;
2137 if (unlikely(cpl->vlan_valid)) {
2138 struct net_device *dev = qs->netdev;
2139 struct port_info *pi = netdev_priv(dev);
2140 struct vlan_group *grp = pi->vlan_grp;
2142 if (likely(grp != NULL)) {
2143 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan));
2147 napi_gro_frags(&qs->napi);
2151 * handle_rsp_cntrl_info - handles control information in a response
2152 * @qs: the queue set corresponding to the response
2153 * @flags: the response control flags
2155 * Handles the control information of an SGE response, such as GTS
2156 * indications and completion credits for the queue set's Tx queues.
2157 * HW coalesces credits, we don't do any extra SW coalescing.
2159 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2161 unsigned int credits;
2164 if (flags & F_RSPD_TXQ0_GTS)
2165 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2168 credits = G_RSPD_TXQ0_CR(flags);
2170 qs->txq[TXQ_ETH].processed += credits;
2172 credits = G_RSPD_TXQ2_CR(flags);
2174 qs->txq[TXQ_CTRL].processed += credits;
2177 if (flags & F_RSPD_TXQ1_GTS)
2178 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2180 credits = G_RSPD_TXQ1_CR(flags);
2182 qs->txq[TXQ_OFLD].processed += credits;
2186 * check_ring_db - check if we need to ring any doorbells
2187 * @adapter: the adapter
2188 * @qs: the queue set whose Tx queues are to be examined
2189 * @sleeping: indicates which Tx queue sent GTS
2191 * Checks if some of a queue set's Tx queues need to ring their doorbells
2192 * to resume transmission after idling while they still have unprocessed
2195 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2196 unsigned int sleeping)
2198 if (sleeping & F_RSPD_TXQ0_GTS) {
2199 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2201 if (txq->cleaned + txq->in_use != txq->processed &&
2202 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2203 set_bit(TXQ_RUNNING, &txq->flags);
2204 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2205 V_EGRCNTX(txq->cntxt_id));
2209 if (sleeping & F_RSPD_TXQ1_GTS) {
2210 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2212 if (txq->cleaned + txq->in_use != txq->processed &&
2213 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2214 set_bit(TXQ_RUNNING, &txq->flags);
2215 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2216 V_EGRCNTX(txq->cntxt_id));
2222 * is_new_response - check if a response is newly written
2223 * @r: the response descriptor
2224 * @q: the response queue
2226 * Returns true if a response descriptor contains a yet unprocessed
2229 static inline int is_new_response(const struct rsp_desc *r,
2230 const struct sge_rspq *q)
2232 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2235 static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2238 q->rx_recycle_buf = 0;
2241 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2242 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2243 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2244 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2245 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2247 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2248 #define NOMEM_INTR_DELAY 2500
2251 * process_responses - process responses from an SGE response queue
2252 * @adap: the adapter
2253 * @qs: the queue set to which the response queue belongs
2254 * @budget: how many responses can be processed in this round
2256 * Process responses from an SGE response queue up to the supplied budget.
2257 * Responses include received packets as well as credits and other events
2258 * for the queues that belong to the response queue's queue set.
2259 * A negative budget is effectively unlimited.
2261 * Additionally choose the interrupt holdoff time for the next interrupt
2262 * on this queue. If the system is under memory shortage use a fairly
2263 * long delay to help recovery.
2265 static int process_responses(struct adapter *adap, struct sge_qset *qs,
2268 struct sge_rspq *q = &qs->rspq;
2269 struct rsp_desc *r = &q->desc[q->cidx];
2270 int budget_left = budget;
2271 unsigned int sleeping = 0;
2272 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2275 q->next_holdoff = q->holdoff_tmr;
2277 while (likely(budget_left && is_new_response(r, q))) {
2278 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2279 struct sk_buff *skb = NULL;
2280 u32 len, flags = ntohl(r->flags);
2281 __be32 rss_hi = *(const __be32 *)r,
2282 rss_lo = r->rss_hdr.rss_hash_val;
2284 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2286 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2287 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2291 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2292 skb->data[0] = CPL_ASYNC_NOTIF;
2293 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2295 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2296 skb = get_imm_packet(r);
2297 if (unlikely(!skb)) {
2299 q->next_holdoff = NOMEM_INTR_DELAY;
2301 /* consume one credit since we tried */
2307 } else if ((len = ntohl(r->len_cq)) != 0) {
2310 lro &= eth && is_eth_tcp(rss_hi);
2312 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2313 if (fl->use_pages) {
2314 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2317 #if L1_CACHE_BYTES < 128
2318 prefetch(addr + L1_CACHE_BYTES);
2320 __refill_fl(adap, fl);
2322 lro_add_page(adap, qs, fl,
2324 flags & F_RSPD_EOP);
2328 skb = get_packet_pg(adap, fl, q,
2331 SGE_RX_DROP_THRES : 0);
2334 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2335 eth ? SGE_RX_DROP_THRES : 0);
2336 if (unlikely(!skb)) {
2340 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2343 if (++fl->cidx == fl->size)
2348 if (flags & RSPD_CTRL_MASK) {
2349 sleeping |= flags & RSPD_GTS_MASK;
2350 handle_rsp_cntrl_info(qs, flags);
2354 if (unlikely(++q->cidx == q->size)) {
2361 if (++q->credits >= (q->size / 4)) {
2362 refill_rspq(adap, q, q->credits);
2366 packet_complete = flags &
2367 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2368 F_RSPD_ASYNC_NOTIF);
2370 if (skb != NULL && packet_complete) {
2372 rx_eth(adap, q, skb, ethpad, lro);
2375 /* Preserve the RSS info in csum & priority */
2377 skb->priority = rss_lo;
2378 ngathered = rx_offload(&adap->tdev, q, skb,
2383 if (flags & F_RSPD_EOP)
2384 clear_rspq_bufstate(q);
2389 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2392 check_ring_db(adap, qs, sleeping);
2394 smp_mb(); /* commit Tx queue .processed updates */
2395 if (unlikely(qs->txq_stopped != 0))
2398 budget -= budget_left;
2402 static inline int is_pure_response(const struct rsp_desc *r)
2404 __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2406 return (n | r->len_cq) == 0;
2410 * napi_rx_handler - the NAPI handler for Rx processing
2411 * @napi: the napi instance
2412 * @budget: how many packets we can process in this round
2414 * Handler for new data events when using NAPI.
2416 static int napi_rx_handler(struct napi_struct *napi, int budget)
2418 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2419 struct adapter *adap = qs->adap;
2420 int work_done = process_responses(adap, qs, budget);
2422 if (likely(work_done < budget)) {
2423 napi_complete(napi);
2426 * Because we don't atomically flush the following
2427 * write it is possible that in very rare cases it can
2428 * reach the device in a way that races with a new
2429 * response being written plus an error interrupt
2430 * causing the NAPI interrupt handler below to return
2431 * unhandled status to the OS. To protect against
2432 * this would require flushing the write and doing
2433 * both the write and the flush with interrupts off.
2434 * Way too expensive and unjustifiable given the
2435 * rarity of the race.
2437 * The race cannot happen at all with MSI-X.
2439 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2440 V_NEWTIMER(qs->rspq.next_holdoff) |
2441 V_NEWINDEX(qs->rspq.cidx));
2447 * Returns true if the device is already scheduled for polling.
2449 static inline int napi_is_scheduled(struct napi_struct *napi)
2451 return test_bit(NAPI_STATE_SCHED, &napi->state);
2455 * process_pure_responses - process pure responses from a response queue
2456 * @adap: the adapter
2457 * @qs: the queue set owning the response queue
2458 * @r: the first pure response to process
2460 * A simpler version of process_responses() that handles only pure (i.e.,
2461 * non data-carrying) responses. Such respones are too light-weight to
2462 * justify calling a softirq under NAPI, so we handle them specially in
2463 * the interrupt handler. The function is called with a pointer to a
2464 * response, which the caller must ensure is a valid pure response.
2466 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2468 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2471 struct sge_rspq *q = &qs->rspq;
2472 unsigned int sleeping = 0;
2475 u32 flags = ntohl(r->flags);
2478 if (unlikely(++q->cidx == q->size)) {
2485 if (flags & RSPD_CTRL_MASK) {
2486 sleeping |= flags & RSPD_GTS_MASK;
2487 handle_rsp_cntrl_info(qs, flags);
2491 if (++q->credits >= (q->size / 4)) {
2492 refill_rspq(adap, q, q->credits);
2495 } while (is_new_response(r, q) && is_pure_response(r));
2498 check_ring_db(adap, qs, sleeping);
2500 smp_mb(); /* commit Tx queue .processed updates */
2501 if (unlikely(qs->txq_stopped != 0))
2504 return is_new_response(r, q);
2508 * handle_responses - decide what to do with new responses in NAPI mode
2509 * @adap: the adapter
2510 * @q: the response queue
2512 * This is used by the NAPI interrupt handlers to decide what to do with
2513 * new SGE responses. If there are no new responses it returns -1. If
2514 * there are new responses and they are pure (i.e., non-data carrying)
2515 * it handles them straight in hard interrupt context as they are very
2516 * cheap and don't deliver any packets. Finally, if there are any data
2517 * signaling responses it schedules the NAPI handler. Returns 1 if it
2518 * schedules NAPI, 0 if all new responses were pure.
2520 * The caller must ascertain NAPI is not already running.
2522 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2524 struct sge_qset *qs = rspq_to_qset(q);
2525 struct rsp_desc *r = &q->desc[q->cidx];
2527 if (!is_new_response(r, q))
2529 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2530 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2531 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2534 napi_schedule(&qs->napi);
2539 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2540 * (i.e., response queue serviced in hard interrupt).
2542 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2544 struct sge_qset *qs = cookie;
2545 struct adapter *adap = qs->adap;
2546 struct sge_rspq *q = &qs->rspq;
2548 spin_lock(&q->lock);
2549 if (process_responses(adap, qs, -1) == 0)
2550 q->unhandled_irqs++;
2551 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2552 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2553 spin_unlock(&q->lock);
2558 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2559 * (i.e., response queue serviced by NAPI polling).
2561 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2563 struct sge_qset *qs = cookie;
2564 struct sge_rspq *q = &qs->rspq;
2566 spin_lock(&q->lock);
2568 if (handle_responses(qs->adap, q) < 0)
2569 q->unhandled_irqs++;
2570 spin_unlock(&q->lock);
2575 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2576 * SGE response queues as well as error and other async events as they all use
2577 * the same MSI vector. We use one SGE response queue per port in this mode
2578 * and protect all response queues with queue 0's lock.
2580 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2582 int new_packets = 0;
2583 struct adapter *adap = cookie;
2584 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2586 spin_lock(&q->lock);
2588 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2589 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2590 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2594 if (adap->params.nports == 2 &&
2595 process_responses(adap, &adap->sge.qs[1], -1)) {
2596 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2598 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2599 V_NEWTIMER(q1->next_holdoff) |
2600 V_NEWINDEX(q1->cidx));
2604 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2605 q->unhandled_irqs++;
2607 spin_unlock(&q->lock);
2611 static int rspq_check_napi(struct sge_qset *qs)
2613 struct sge_rspq *q = &qs->rspq;
2615 if (!napi_is_scheduled(&qs->napi) &&
2616 is_new_response(&q->desc[q->cidx], q)) {
2617 napi_schedule(&qs->napi);
2624 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2625 * by NAPI polling). Handles data events from SGE response queues as well as
2626 * error and other async events as they all use the same MSI vector. We use
2627 * one SGE response queue per port in this mode and protect all response
2628 * queues with queue 0's lock.
2630 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2633 struct adapter *adap = cookie;
2634 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2636 spin_lock(&q->lock);
2638 new_packets = rspq_check_napi(&adap->sge.qs[0]);
2639 if (adap->params.nports == 2)
2640 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2641 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2642 q->unhandled_irqs++;
2644 spin_unlock(&q->lock);
2649 * A helper function that processes responses and issues GTS.
2651 static inline int process_responses_gts(struct adapter *adap,
2652 struct sge_rspq *rq)
2656 work = process_responses(adap, rspq_to_qset(rq), -1);
2657 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2658 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2663 * The legacy INTx interrupt handler. This needs to handle data events from
2664 * SGE response queues as well as error and other async events as they all use
2665 * the same interrupt pin. We use one SGE response queue per port in this mode
2666 * and protect all response queues with queue 0's lock.
2668 static irqreturn_t t3_intr(int irq, void *cookie)
2670 int work_done, w0, w1;
2671 struct adapter *adap = cookie;
2672 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2673 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2675 spin_lock(&q0->lock);
2677 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2678 w1 = adap->params.nports == 2 &&
2679 is_new_response(&q1->desc[q1->cidx], q1);
2681 if (likely(w0 | w1)) {
2682 t3_write_reg(adap, A_PL_CLI, 0);
2683 t3_read_reg(adap, A_PL_CLI); /* flush */
2686 process_responses_gts(adap, q0);
2689 process_responses_gts(adap, q1);
2691 work_done = w0 | w1;
2693 work_done = t3_slow_intr_handler(adap);
2695 spin_unlock(&q0->lock);
2696 return IRQ_RETVAL(work_done != 0);
2700 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2701 * Handles data events from SGE response queues as well as error and other
2702 * async events as they all use the same interrupt pin. We use one SGE
2703 * response queue per port in this mode and protect all response queues with
2706 static irqreturn_t t3b_intr(int irq, void *cookie)
2709 struct adapter *adap = cookie;
2710 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2712 t3_write_reg(adap, A_PL_CLI, 0);
2713 map = t3_read_reg(adap, A_SG_DATA_INTR);
2715 if (unlikely(!map)) /* shared interrupt, most likely */
2718 spin_lock(&q0->lock);
2720 if (unlikely(map & F_ERRINTR))
2721 t3_slow_intr_handler(adap);
2723 if (likely(map & 1))
2724 process_responses_gts(adap, q0);
2727 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2729 spin_unlock(&q0->lock);
2734 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2735 * Handles data events from SGE response queues as well as error and other
2736 * async events as they all use the same interrupt pin. We use one SGE
2737 * response queue per port in this mode and protect all response queues with
2740 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2743 struct adapter *adap = cookie;
2744 struct sge_qset *qs0 = &adap->sge.qs[0];
2745 struct sge_rspq *q0 = &qs0->rspq;
2747 t3_write_reg(adap, A_PL_CLI, 0);
2748 map = t3_read_reg(adap, A_SG_DATA_INTR);
2750 if (unlikely(!map)) /* shared interrupt, most likely */
2753 spin_lock(&q0->lock);
2755 if (unlikely(map & F_ERRINTR))
2756 t3_slow_intr_handler(adap);
2758 if (likely(map & 1))
2759 napi_schedule(&qs0->napi);
2762 napi_schedule(&adap->sge.qs[1].napi);
2764 spin_unlock(&q0->lock);
2769 * t3_intr_handler - select the top-level interrupt handler
2770 * @adap: the adapter
2771 * @polling: whether using NAPI to service response queues
2773 * Selects the top-level interrupt handler based on the type of interrupts
2774 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2777 irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2779 if (adap->flags & USING_MSIX)
2780 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2781 if (adap->flags & USING_MSI)
2782 return polling ? t3_intr_msi_napi : t3_intr_msi;
2783 if (adap->params.rev > 0)
2784 return polling ? t3b_intr_napi : t3b_intr;
2788 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2789 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2790 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2791 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2793 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2794 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2798 * t3_sge_err_intr_handler - SGE async event interrupt handler
2799 * @adapter: the adapter
2801 * Interrupt handler for SGE asynchronous (non-data) events.
2803 void t3_sge_err_intr_handler(struct adapter *adapter)
2805 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) &
2808 if (status & SGE_PARERR)
2809 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2810 status & SGE_PARERR);
2811 if (status & SGE_FRAMINGERR)
2812 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2813 status & SGE_FRAMINGERR);
2815 if (status & F_RSPQCREDITOVERFOW)
2816 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2818 if (status & F_RSPQDISABLED) {
2819 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2822 "packet delivered to disabled response queue "
2823 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2826 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2827 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2828 status & F_HIPIODRBDROPERR ? "high" : "lo");
2830 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2831 if (status & SGE_FATALERR)
2832 t3_fatal_err(adapter);
2836 * sge_timer_tx - perform periodic maintenance of an SGE qset
2837 * @data: the SGE queue set to maintain
2839 * Runs periodically from a timer to perform maintenance of an SGE queue
2840 * set. It performs two tasks:
2842 * Cleans up any completed Tx descriptors that may still be pending.
2843 * Normal descriptor cleanup happens when new packets are added to a Tx
2844 * queue so this timer is relatively infrequent and does any cleanup only
2845 * if the Tx queue has not seen any new packets in a while. We make a
2846 * best effort attempt to reclaim descriptors, in that we don't wait
2847 * around if we cannot get a queue's lock (which most likely is because
2848 * someone else is queueing new packets and so will also handle the clean
2849 * up). Since control queues use immediate data exclusively we don't
2850 * bother cleaning them up here.
2853 static void sge_timer_tx(unsigned long data)
2855 struct sge_qset *qs = (struct sge_qset *)data;
2856 struct port_info *pi = netdev_priv(qs->netdev);
2857 struct adapter *adap = pi->adapter;
2858 unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
2859 unsigned long next_period;
2861 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2862 tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH],
2863 TX_RECLAIM_TIMER_CHUNK);
2864 spin_unlock(&qs->txq[TXQ_ETH].lock);
2866 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2867 tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD],
2868 TX_RECLAIM_TIMER_CHUNK);
2869 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2872 next_period = TX_RECLAIM_PERIOD >>
2873 (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) /
2874 TX_RECLAIM_TIMER_CHUNK);
2875 mod_timer(&qs->tx_reclaim_timer, jiffies + next_period);
2879 * sge_timer_rx - perform periodic maintenance of an SGE qset
2880 * @data: the SGE queue set to maintain
2882 * a) Replenishes Rx queues that have run out due to memory shortage.
2883 * Normally new Rx buffers are added when existing ones are consumed but
2884 * when out of memory a queue can become empty. We try to add only a few
2885 * buffers here, the queue will be replenished fully as these new buffers
2886 * are used up if memory shortage has subsided.
2888 * b) Return coalesced response queue credits in case a response queue is
2892 static void sge_timer_rx(unsigned long data)
2895 struct sge_qset *qs = (struct sge_qset *)data;
2896 struct port_info *pi = netdev_priv(qs->netdev);
2897 struct adapter *adap = pi->adapter;
2900 lock = adap->params.rev > 0 ?
2901 &qs->rspq.lock : &adap->sge.qs[0].rspq.lock;
2903 if (!spin_trylock_irq(lock))
2906 if (napi_is_scheduled(&qs->napi))
2909 if (adap->params.rev < 4) {
2910 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2912 if (status & (1 << qs->rspq.cntxt_id)) {
2914 if (qs->rspq.credits) {
2916 refill_rspq(adap, &qs->rspq, 1);
2917 qs->rspq.restarted++;
2918 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2919 1 << qs->rspq.cntxt_id);
2924 if (qs->fl[0].credits < qs->fl[0].size)
2925 __refill_fl(adap, &qs->fl[0]);
2926 if (qs->fl[1].credits < qs->fl[1].size)
2927 __refill_fl(adap, &qs->fl[1]);
2930 spin_unlock_irq(lock);
2932 mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
2936 * t3_update_qset_coalesce - update coalescing settings for a queue set
2937 * @qs: the SGE queue set
2938 * @p: new queue set parameters
2940 * Update the coalescing settings for an SGE queue set. Nothing is done
2941 * if the queue set is not initialized yet.
2943 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2945 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2946 qs->rspq.polling = p->polling;
2947 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2951 * t3_sge_alloc_qset - initialize an SGE queue set
2952 * @adapter: the adapter
2953 * @id: the queue set id
2954 * @nports: how many Ethernet ports will be using this queue set
2955 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2956 * @p: configuration parameters for this queue set
2957 * @ntxq: number of Tx queues for the queue set
2958 * @netdev: net device associated with this queue set
2959 * @netdevq: net device TX queue associated with this queue set
2961 * Allocate resources and initialize an SGE queue set. A queue set
2962 * comprises a response queue, two Rx free-buffer queues, and up to 3
2963 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2964 * queue, offload queue, and control queue.
2966 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2967 int irq_vec_idx, const struct qset_params *p,
2968 int ntxq, struct net_device *dev,
2969 struct netdev_queue *netdevq)
2971 int i, avail, ret = -ENOMEM;
2972 struct sge_qset *q = &adapter->sge.qs[id];
2974 init_qset_cntxt(q, id);
2975 setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q);
2976 setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q);
2978 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2979 sizeof(struct rx_desc),
2980 sizeof(struct rx_sw_desc),
2981 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2985 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2986 sizeof(struct rx_desc),
2987 sizeof(struct rx_sw_desc),
2988 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2992 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2993 sizeof(struct rsp_desc), 0,
2994 &q->rspq.phys_addr, NULL);
2998 for (i = 0; i < ntxq; ++i) {
3000 * The control queue always uses immediate data so does not
3001 * need to keep track of any sk_buffs.
3003 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
3005 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
3006 sizeof(struct tx_desc), sz,
3007 &q->txq[i].phys_addr,
3009 if (!q->txq[i].desc)
3013 q->txq[i].size = p->txq_size[i];
3014 spin_lock_init(&q->txq[i].lock);
3015 skb_queue_head_init(&q->txq[i].sendq);
3018 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
3020 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
3023 q->fl[0].gen = q->fl[1].gen = 1;
3024 q->fl[0].size = p->fl_size;
3025 q->fl[1].size = p->jumbo_size;
3028 q->rspq.size = p->rspq_size;
3029 spin_lock_init(&q->rspq.lock);
3030 skb_queue_head_init(&q->rspq.rx_queue);
3032 q->txq[TXQ_ETH].stop_thres = nports *
3033 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
3035 #if FL0_PG_CHUNK_SIZE > 0
3036 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
3038 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
3040 #if FL1_PG_CHUNK_SIZE > 0
3041 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
3043 q->fl[1].buf_size = is_offload(adapter) ?
3044 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
3045 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
3048 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
3049 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
3050 q->fl[0].order = FL0_PG_ORDER;
3051 q->fl[1].order = FL1_PG_ORDER;
3052 q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
3053 q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
3055 spin_lock_irq(&adapter->sge.reg_lock);
3057 /* FL threshold comparison uses < */
3058 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
3059 q->rspq.phys_addr, q->rspq.size,
3060 q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
3064 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
3065 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
3066 q->fl[i].phys_addr, q->fl[i].size,
3067 q->fl[i].buf_size - SGE_PG_RSVD,
3068 p->cong_thres, 1, 0);
3073 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
3074 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
3075 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
3081 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
3082 USE_GTS, SGE_CNTXT_OFLD, id,
3083 q->txq[TXQ_OFLD].phys_addr,
3084 q->txq[TXQ_OFLD].size, 0, 1, 0);
3090 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
3092 q->txq[TXQ_CTRL].phys_addr,
3093 q->txq[TXQ_CTRL].size,
3094 q->txq[TXQ_CTRL].token, 1, 0);
3099 spin_unlock_irq(&adapter->sge.reg_lock);
3104 t3_update_qset_coalesce(q, p);
3106 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
3107 GFP_KERNEL | __GFP_COMP);
3109 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3112 if (avail < q->fl[0].size)
3113 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3116 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
3117 GFP_KERNEL | __GFP_COMP);
3118 if (avail < q->fl[1].size)
3119 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3121 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
3123 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
3124 V_NEWTIMER(q->rspq.holdoff_tmr));
3129 spin_unlock_irq(&adapter->sge.reg_lock);
3131 t3_free_qset(adapter, q);
3136 * t3_start_sge_timers - start SGE timer call backs
3137 * @adap: the adapter
3139 * Starts each SGE queue set's timer call back
3141 void t3_start_sge_timers(struct adapter *adap)
3145 for (i = 0; i < SGE_QSETS; ++i) {
3146 struct sge_qset *q = &adap->sge.qs[i];
3148 if (q->tx_reclaim_timer.function)
3149 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3151 if (q->rx_reclaim_timer.function)
3152 mod_timer(&q->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
3157 * t3_stop_sge_timers - stop SGE timer call backs
3158 * @adap: the adapter
3160 * Stops each SGE queue set's timer call back
3162 void t3_stop_sge_timers(struct adapter *adap)
3166 for (i = 0; i < SGE_QSETS; ++i) {
3167 struct sge_qset *q = &adap->sge.qs[i];
3169 if (q->tx_reclaim_timer.function)
3170 del_timer_sync(&q->tx_reclaim_timer);
3171 if (q->rx_reclaim_timer.function)
3172 del_timer_sync(&q->rx_reclaim_timer);
3177 * t3_free_sge_resources - free SGE resources
3178 * @adap: the adapter
3180 * Frees resources used by the SGE queue sets.
3182 void t3_free_sge_resources(struct adapter *adap)
3186 for (i = 0; i < SGE_QSETS; ++i)
3187 t3_free_qset(adap, &adap->sge.qs[i]);
3191 * t3_sge_start - enable SGE
3192 * @adap: the adapter
3194 * Enables the SGE for DMAs. This is the last step in starting packet
3197 void t3_sge_start(struct adapter *adap)
3199 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3203 * t3_sge_stop - disable SGE operation
3204 * @adap: the adapter
3206 * Disables the DMA engine. This can be called in emeregencies (e.g.,
3207 * from error interrupts) or from normal process context. In the latter
3208 * case it also disables any pending queue restart tasklets. Note that
3209 * if it is called in interrupt context it cannot disable the restart
3210 * tasklets as it cannot wait, however the tasklets will have no effect
3211 * since the doorbells are disabled and the driver will call this again
3212 * later from process context, at which time the tasklets will be stopped
3213 * if they are still running.
3215 void t3_sge_stop(struct adapter *adap)
3217 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3218 if (!in_interrupt()) {
3221 for (i = 0; i < SGE_QSETS; ++i) {
3222 struct sge_qset *qs = &adap->sge.qs[i];
3224 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3225 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3231 * t3_sge_init - initialize SGE
3232 * @adap: the adapter
3233 * @p: the SGE parameters
3235 * Performs SGE initialization needed every time after a chip reset.
3236 * We do not initialize any of the queue sets here, instead the driver
3237 * top-level must request those individually. We also do not enable DMA
3238 * here, that should be done after the queues have been set up.
3240 void t3_sge_init(struct adapter *adap, struct sge_params *p)
3242 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3244 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3245 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3246 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3247 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3248 #if SGE_NUM_GENBITS == 1
3249 ctrl |= F_EGRGENCTRL;
3251 if (adap->params.rev > 0) {
3252 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3253 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3255 t3_write_reg(adap, A_SG_CONTROL, ctrl);
3256 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3257 V_LORCQDRBTHRSH(512));
3258 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3259 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3260 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3261 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3262 adap->params.rev < T3_REV_C ? 1000 : 500);
3263 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3264 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3265 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3266 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3267 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3271 * t3_sge_prep - one-time SGE initialization
3272 * @adap: the associated adapter
3273 * @p: SGE parameters
3275 * Performs one-time initialization of SGE SW state. Includes determining
3276 * defaults for the assorted SGE parameters, which admins can change until
3277 * they are used to initialize the SGE.
3279 void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3283 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3284 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3286 for (i = 0; i < SGE_QSETS; ++i) {
3287 struct qset_params *q = p->qset + i;
3289 q->polling = adap->params.rev > 0;
3290 q->coalesce_usecs = 5;
3291 q->rspq_size = 1024;
3293 q->jumbo_size = 512;
3294 q->txq_size[TXQ_ETH] = 1024;
3295 q->txq_size[TXQ_OFLD] = 1024;
3296 q->txq_size[TXQ_CTRL] = 256;
3300 spin_lock_init(&adap->sge.reg_lock);
3304 * t3_get_desc - dump an SGE descriptor for debugging purposes
3305 * @qs: the queue set
3306 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3307 * @idx: the descriptor index in the queue
3308 * @data: where to dump the descriptor contents
3310 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3311 * size of the descriptor.
3313 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3314 unsigned char *data)
3320 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3322 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3323 return sizeof(struct tx_desc);
3327 if (!qs->rspq.desc || idx >= qs->rspq.size)
3329 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3330 return sizeof(struct rsp_desc);
3334 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3336 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3337 return sizeof(struct rx_desc);