[SCSI] include linux/scatterlist.h in scsi_eh.h
[linux-2.6] / drivers / infiniband / hw / ipath / ipath_driver.c
1 /*
2  * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <linux/spinlock.h>
35 #include <linux/idr.h>
36 #include <linux/pci.h>
37 #include <linux/io.h>
38 #include <linux/delay.h>
39 #include <linux/netdevice.h>
40 #include <linux/vmalloc.h>
41
42 #include "ipath_kernel.h"
43 #include "ipath_verbs.h"
44 #include "ipath_common.h"
45
46 static void ipath_update_pio_bufs(struct ipath_devdata *);
47
48 const char *ipath_get_unit_name(int unit)
49 {
50         static char iname[16];
51         snprintf(iname, sizeof iname, "infinipath%u", unit);
52         return iname;
53 }
54
55 #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
56 #define PFX IPATH_DRV_NAME ": "
57
58 /*
59  * The size has to be longer than this string, so we can append
60  * board/chip information to it in the init code.
61  */
62 const char ib_ipath_version[] = IPATH_IDSTR "\n";
63
64 static struct idr unit_table;
65 DEFINE_SPINLOCK(ipath_devs_lock);
66 LIST_HEAD(ipath_dev_list);
67
68 wait_queue_head_t ipath_state_wait;
69
70 unsigned ipath_debug = __IPATH_INFO;
71
72 module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
73 MODULE_PARM_DESC(debug, "mask for debug prints");
74 EXPORT_SYMBOL_GPL(ipath_debug);
75
76 MODULE_LICENSE("GPL");
77 MODULE_AUTHOR("QLogic <support@pathscale.com>");
78 MODULE_DESCRIPTION("QLogic InfiniPath driver");
79
80 const char *ipath_ibcstatus_str[] = {
81         "Disabled",
82         "LinkUp",
83         "PollActive",
84         "PollQuiet",
85         "SleepDelay",
86         "SleepQuiet",
87         "LState6",              /* unused */
88         "LState7",              /* unused */
89         "CfgDebounce",
90         "CfgRcvfCfg",
91         "CfgWaitRmt",
92         "CfgIdle",
93         "RecovRetrain",
94         "LState0xD",            /* unused */
95         "RecovWaitRmt",
96         "RecovIdle",
97 };
98
99 static void __devexit ipath_remove_one(struct pci_dev *);
100 static int __devinit ipath_init_one(struct pci_dev *,
101                                     const struct pci_device_id *);
102
103 /* Only needed for registration, nothing else needs this info */
104 #define PCI_VENDOR_ID_PATHSCALE 0x1fc1
105 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd
106 #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
107
108 /* Number of seconds before our card status check...  */
109 #define STATUS_TIMEOUT 60
110
111 static const struct pci_device_id ipath_pci_tbl[] = {
112         { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
113         { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
114         { 0, }
115 };
116
117 MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
118
119 static struct pci_driver ipath_driver = {
120         .name = IPATH_DRV_NAME,
121         .probe = ipath_init_one,
122         .remove = __devexit_p(ipath_remove_one),
123         .id_table = ipath_pci_tbl,
124 };
125
126 static void ipath_check_status(struct work_struct *work)
127 {
128         struct ipath_devdata *dd = container_of(work, struct ipath_devdata,
129                                                 status_work.work);
130
131         /*
132          * If we don't have any interrupts, let the user know and
133          * don't bother checking again.
134          */
135         if (dd->ipath_int_counter == 0)
136                 dev_err(&dd->pcidev->dev, "No interrupts detected.\n");
137 }
138
139 static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
140                              u32 *bar0, u32 *bar1)
141 {
142         int ret;
143
144         ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
145         if (ret)
146                 ipath_dev_err(dd, "failed to read bar0 before enable: "
147                               "error %d\n", -ret);
148
149         ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
150         if (ret)
151                 ipath_dev_err(dd, "failed to read bar1 before enable: "
152                               "error %d\n", -ret);
153
154         ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
155 }
156
157 static void ipath_free_devdata(struct pci_dev *pdev,
158                                struct ipath_devdata *dd)
159 {
160         unsigned long flags;
161
162         pci_set_drvdata(pdev, NULL);
163
164         if (dd->ipath_unit != -1) {
165                 spin_lock_irqsave(&ipath_devs_lock, flags);
166                 idr_remove(&unit_table, dd->ipath_unit);
167                 list_del(&dd->ipath_list);
168                 spin_unlock_irqrestore(&ipath_devs_lock, flags);
169         }
170         vfree(dd);
171 }
172
173 static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
174 {
175         unsigned long flags;
176         struct ipath_devdata *dd;
177         int ret;
178
179         if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
180                 dd = ERR_PTR(-ENOMEM);
181                 goto bail;
182         }
183
184         dd = vmalloc(sizeof(*dd));
185         if (!dd) {
186                 dd = ERR_PTR(-ENOMEM);
187                 goto bail;
188         }
189         memset(dd, 0, sizeof(*dd));
190         dd->ipath_unit = -1;
191
192         spin_lock_irqsave(&ipath_devs_lock, flags);
193
194         ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
195         if (ret < 0) {
196                 printk(KERN_ERR IPATH_DRV_NAME
197                        ": Could not allocate unit ID: error %d\n", -ret);
198                 ipath_free_devdata(pdev, dd);
199                 dd = ERR_PTR(ret);
200                 goto bail_unlock;
201         }
202
203         dd->pcidev = pdev;
204         pci_set_drvdata(pdev, dd);
205
206         INIT_DELAYED_WORK(&dd->status_work, ipath_check_status);
207
208         list_add(&dd->ipath_list, &ipath_dev_list);
209
210 bail_unlock:
211         spin_unlock_irqrestore(&ipath_devs_lock, flags);
212
213 bail:
214         return dd;
215 }
216
217 static inline struct ipath_devdata *__ipath_lookup(int unit)
218 {
219         return idr_find(&unit_table, unit);
220 }
221
222 struct ipath_devdata *ipath_lookup(int unit)
223 {
224         struct ipath_devdata *dd;
225         unsigned long flags;
226
227         spin_lock_irqsave(&ipath_devs_lock, flags);
228         dd = __ipath_lookup(unit);
229         spin_unlock_irqrestore(&ipath_devs_lock, flags);
230
231         return dd;
232 }
233
234 int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp)
235 {
236         int nunits, npresent, nup;
237         struct ipath_devdata *dd;
238         unsigned long flags;
239         u32 maxports;
240
241         nunits = npresent = nup = maxports = 0;
242
243         spin_lock_irqsave(&ipath_devs_lock, flags);
244
245         list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
246                 nunits++;
247                 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
248                         npresent++;
249                 if (dd->ipath_lid &&
250                     !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
251                                          | IPATH_LINKUNK)))
252                         nup++;
253                 if (dd->ipath_cfgports > maxports)
254                         maxports = dd->ipath_cfgports;
255         }
256
257         spin_unlock_irqrestore(&ipath_devs_lock, flags);
258
259         if (npresentp)
260                 *npresentp = npresent;
261         if (nupp)
262                 *nupp = nup;
263         if (maxportsp)
264                 *maxportsp = maxports;
265
266         return nunits;
267 }
268
269 /*
270  * These next two routines are placeholders in case we don't have per-arch
271  * code for controlling write combining.  If explicit control of write
272  * combining is not available, performance will probably be awful.
273  */
274
275 int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
276 {
277         return -EOPNOTSUPP;
278 }
279
280 void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
281 {
282 }
283
284 /*
285  * Perform a PIO buffer bandwidth write test, to verify proper system
286  * configuration.  Even when all the setup calls work, occasionally
287  * BIOS or other issues can prevent write combining from working, or
288  * can cause other bandwidth problems to the chip.
289  *
290  * This test simply writes the same buffer over and over again, and
291  * measures close to the peak bandwidth to the chip (not testing
292  * data bandwidth to the wire).   On chips that use an address-based
293  * trigger to send packets to the wire, this is easy.  On chips that
294  * use a count to trigger, we want to make sure that the packet doesn't
295  * go out on the wire, or trigger flow control checks.
296  */
297 static void ipath_verify_pioperf(struct ipath_devdata *dd)
298 {
299         u32 pbnum, cnt, lcnt;
300         u32 __iomem *piobuf;
301         u32 *addr;
302         u64 msecs, emsecs;
303
304         piobuf = ipath_getpiobuf(dd, &pbnum);
305         if (!piobuf) {
306                 dev_info(&dd->pcidev->dev,
307                         "No PIObufs for checking perf, skipping\n");
308                 return;
309         }
310
311         /*
312          * Enough to give us a reasonable test, less than piobuf size, and
313          * likely multiple of store buffer length.
314          */
315         cnt = 1024;
316
317         addr = vmalloc(cnt);
318         if (!addr) {
319                 dev_info(&dd->pcidev->dev,
320                         "Couldn't get memory for checking PIO perf,"
321                         " skipping\n");
322                 goto done;
323         }
324
325         preempt_disable();  /* we want reasonably accurate elapsed time */
326         msecs = 1 + jiffies_to_msecs(jiffies);
327         for (lcnt = 0; lcnt < 10000U; lcnt++) {
328                 /* wait until we cross msec boundary */
329                 if (jiffies_to_msecs(jiffies) >= msecs)
330                         break;
331                 udelay(1);
332         }
333
334         writeq(0, piobuf); /* length 0, no dwords actually sent */
335         ipath_flush_wc();
336
337         /*
338          * this is only roughly accurate, since even with preempt we
339          * still take interrupts that could take a while.   Running for
340          * >= 5 msec seems to get us "close enough" to accurate values
341          */
342         msecs = jiffies_to_msecs(jiffies);
343         for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
344                 __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
345                 emsecs = jiffies_to_msecs(jiffies) - msecs;
346         }
347
348         /* 1 GiB/sec, slightly over IB SDR line rate */
349         if (lcnt < (emsecs * 1024U))
350                 ipath_dev_err(dd,
351                         "Performance problem: bandwidth to PIO buffers is "
352                         "only %u MiB/sec\n",
353                         lcnt / (u32) emsecs);
354         else
355                 ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
356                         lcnt / (u32) emsecs);
357
358         preempt_enable();
359
360         vfree(addr);
361
362 done:
363         /* disarm piobuf, so it's available again */
364         ipath_disarm_piobufs(dd, pbnum, 1);
365 }
366
367 static int __devinit ipath_init_one(struct pci_dev *pdev,
368                                     const struct pci_device_id *ent)
369 {
370         int ret, len, j;
371         struct ipath_devdata *dd;
372         unsigned long long addr;
373         u32 bar0 = 0, bar1 = 0;
374
375         dd = ipath_alloc_devdata(pdev);
376         if (IS_ERR(dd)) {
377                 ret = PTR_ERR(dd);
378                 printk(KERN_ERR IPATH_DRV_NAME
379                        ": Could not allocate devdata: error %d\n", -ret);
380                 goto bail;
381         }
382
383         ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
384
385         ret = pci_enable_device(pdev);
386         if (ret) {
387                 /* This can happen iff:
388                  *
389                  * We did a chip reset, and then failed to reprogram the
390                  * BAR, or the chip reset due to an internal error.  We then
391                  * unloaded the driver and reloaded it.
392                  *
393                  * Both reset cases set the BAR back to initial state.  For
394                  * the latter case, the AER sticky error bit at offset 0x718
395                  * should be set, but the Linux kernel doesn't yet know
396                  * about that, it appears.  If the original BAR was retained
397                  * in the kernel data structures, this may be OK.
398                  */
399                 ipath_dev_err(dd, "enable unit %d failed: error %d\n",
400                               dd->ipath_unit, -ret);
401                 goto bail_devdata;
402         }
403         addr = pci_resource_start(pdev, 0);
404         len = pci_resource_len(pdev, 0);
405         ipath_cdbg(VERBOSE, "regbase (0) %llx len %d pdev->irq %d, vend %x/%x "
406                    "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
407                    ent->device, ent->driver_data);
408
409         read_bars(dd, pdev, &bar0, &bar1);
410
411         if (!bar1 && !(bar0 & ~0xf)) {
412                 if (addr) {
413                         dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
414                                  "rewriting as %llx\n", addr);
415                         ret = pci_write_config_dword(
416                                 pdev, PCI_BASE_ADDRESS_0, addr);
417                         if (ret) {
418                                 ipath_dev_err(dd, "rewrite of BAR0 "
419                                               "failed: err %d\n", -ret);
420                                 goto bail_disable;
421                         }
422                         ret = pci_write_config_dword(
423                                 pdev, PCI_BASE_ADDRESS_1, addr >> 32);
424                         if (ret) {
425                                 ipath_dev_err(dd, "rewrite of BAR1 "
426                                               "failed: err %d\n", -ret);
427                                 goto bail_disable;
428                         }
429                 } else {
430                         ipath_dev_err(dd, "BAR is 0 (probable RESET), "
431                                       "not usable until reboot\n");
432                         ret = -ENODEV;
433                         goto bail_disable;
434                 }
435         }
436
437         ret = pci_request_regions(pdev, IPATH_DRV_NAME);
438         if (ret) {
439                 dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
440                          "err %d\n", dd->ipath_unit, -ret);
441                 goto bail_disable;
442         }
443
444         ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
445         if (ret) {
446                 /*
447                  * if the 64 bit setup fails, try 32 bit.  Some systems
448                  * do not setup 64 bit maps on systems with 2GB or less
449                  * memory installed.
450                  */
451                 ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
452                 if (ret) {
453                         dev_info(&pdev->dev,
454                                 "Unable to set DMA mask for unit %u: %d\n",
455                                 dd->ipath_unit, ret);
456                         goto bail_regions;
457                 }
458                 else {
459                         ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
460                         ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
461                         if (ret)
462                                 dev_info(&pdev->dev,
463                                         "Unable to set DMA consistent mask "
464                                         "for unit %u: %d\n",
465                                         dd->ipath_unit, ret);
466
467                 }
468         }
469         else {
470                 ret = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
471                 if (ret)
472                         dev_info(&pdev->dev,
473                                 "Unable to set DMA consistent mask "
474                                 "for unit %u: %d\n",
475                                 dd->ipath_unit, ret);
476         }
477
478         pci_set_master(pdev);
479
480         /*
481          * Save BARs to rewrite after device reset.  Save all 64 bits of
482          * BAR, just in case.
483          */
484         dd->ipath_pcibar0 = addr;
485         dd->ipath_pcibar1 = addr >> 32;
486         dd->ipath_deviceid = ent->device;       /* save for later use */
487         dd->ipath_vendorid = ent->vendor;
488
489         /* setup the chip-specific functions, as early as possible. */
490         switch (ent->device) {
491         case PCI_DEVICE_ID_INFINIPATH_HT:
492 #ifdef CONFIG_HT_IRQ
493                 ipath_init_iba6110_funcs(dd);
494                 break;
495 #else
496                 ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
497                               "CONFIG_HT_IRQ is not enabled\n", ent->device);
498                 return -ENODEV;
499 #endif
500         case PCI_DEVICE_ID_INFINIPATH_PE800:
501 #ifdef CONFIG_PCI_MSI
502                 ipath_init_iba6120_funcs(dd);
503                 break;
504 #else
505                 ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
506                               "CONFIG_PCI_MSI is not enabled\n", ent->device);
507                 return -ENODEV;
508 #endif
509         default:
510                 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
511                               "failing\n", ent->device);
512                 return -ENODEV;
513         }
514
515         for (j = 0; j < 6; j++) {
516                 if (!pdev->resource[j].start)
517                         continue;
518                 ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
519                            j, (unsigned long long)pdev->resource[j].start,
520                            (unsigned long long)pdev->resource[j].end,
521                            (unsigned long long)pci_resource_len(pdev, j));
522         }
523
524         if (!addr) {
525                 ipath_dev_err(dd, "No valid address in BAR 0!\n");
526                 ret = -ENODEV;
527                 goto bail_regions;
528         }
529
530         dd->ipath_pcirev = pdev->revision;
531
532 #if defined(__powerpc__)
533         /* There isn't a generic way to specify writethrough mappings */
534         dd->ipath_kregbase = __ioremap(addr, len,
535                 (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
536 #else
537         dd->ipath_kregbase = ioremap_nocache(addr, len);
538 #endif
539
540         if (!dd->ipath_kregbase) {
541                 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
542                           addr);
543                 ret = -ENOMEM;
544                 goto bail_iounmap;
545         }
546         dd->ipath_kregend = (u64 __iomem *)
547                 ((void __iomem *)dd->ipath_kregbase + len);
548         dd->ipath_physaddr = addr;      /* used for io_remap, etc. */
549         /* for user mmap */
550         ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
551                    addr, dd->ipath_kregbase);
552
553         /*
554          * clear ipath_flags here instead of in ipath_init_chip as it is set
555          * by ipath_setup_htconfig.
556          */
557         dd->ipath_flags = 0;
558         dd->ipath_lli_counter = 0;
559         dd->ipath_lli_errors = 0;
560
561         if (dd->ipath_f_bus(dd, pdev))
562                 ipath_dev_err(dd, "Failed to setup config space; "
563                               "continuing anyway\n");
564
565         /*
566          * set up our interrupt handler; IRQF_SHARED probably not needed,
567          * since MSI interrupts shouldn't be shared but won't  hurt for now.
568          * check 0 irq after we return from chip-specific bus setup, since
569          * that can affect this due to setup
570          */
571         if (!dd->ipath_irq)
572                 ipath_dev_err(dd, "irq is 0, BIOS error?  Interrupts won't "
573                               "work\n");
574         else {
575                 ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
576                                   IPATH_DRV_NAME, dd);
577                 if (ret) {
578                         ipath_dev_err(dd, "Couldn't setup irq handler, "
579                                       "irq=%d: %d\n", dd->ipath_irq, ret);
580                         goto bail_iounmap;
581                 }
582         }
583
584         ret = ipath_init_chip(dd, 0);   /* do the chip-specific init */
585         if (ret)
586                 goto bail_irqsetup;
587
588         ret = ipath_enable_wc(dd);
589
590         if (ret) {
591                 ipath_dev_err(dd, "Write combining not enabled "
592                               "(err %d): performance may be poor\n",
593                               -ret);
594                 ret = 0;
595         }
596
597         ipath_verify_pioperf(dd);
598
599         ipath_device_create_group(&pdev->dev, dd);
600         ipathfs_add_device(dd);
601         ipath_user_add(dd);
602         ipath_diag_add(dd);
603         ipath_register_ib_device(dd);
604
605         /* Check that card status in STATUS_TIMEOUT seconds. */
606         schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT);
607
608         goto bail;
609
610 bail_irqsetup:
611         if (pdev->irq) free_irq(pdev->irq, dd);
612
613 bail_iounmap:
614         iounmap((volatile void __iomem *) dd->ipath_kregbase);
615
616 bail_regions:
617         pci_release_regions(pdev);
618
619 bail_disable:
620         pci_disable_device(pdev);
621
622 bail_devdata:
623         ipath_free_devdata(pdev, dd);
624
625 bail:
626         return ret;
627 }
628
629 static void __devexit cleanup_device(struct ipath_devdata *dd)
630 {
631         int port;
632
633         if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
634                 /* can't do anything more with chip; needs re-init */
635                 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
636                 if (dd->ipath_kregbase) {
637                         /*
638                          * if we haven't already cleaned up before these are
639                          * to ensure any register reads/writes "fail" until
640                          * re-init
641                          */
642                         dd->ipath_kregbase = NULL;
643                         dd->ipath_uregbase = 0;
644                         dd->ipath_sregbase = 0;
645                         dd->ipath_cregbase = 0;
646                         dd->ipath_kregsize = 0;
647                 }
648                 ipath_disable_wc(dd);
649         }
650
651         if (dd->ipath_pioavailregs_dma) {
652                 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
653                                   (void *) dd->ipath_pioavailregs_dma,
654                                   dd->ipath_pioavailregs_phys);
655                 dd->ipath_pioavailregs_dma = NULL;
656         }
657         if (dd->ipath_dummy_hdrq) {
658                 dma_free_coherent(&dd->pcidev->dev,
659                         dd->ipath_pd[0]->port_rcvhdrq_size,
660                         dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
661                 dd->ipath_dummy_hdrq = NULL;
662         }
663
664         if (dd->ipath_pageshadow) {
665                 struct page **tmpp = dd->ipath_pageshadow;
666                 dma_addr_t *tmpd = dd->ipath_physshadow;
667                 int i, cnt = 0;
668
669                 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
670                            "locked\n");
671                 for (port = 0; port < dd->ipath_cfgports; port++) {
672                         int port_tidbase = port * dd->ipath_rcvtidcnt;
673                         int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
674                         for (i = port_tidbase; i < maxtid; i++) {
675                                 if (!tmpp[i])
676                                         continue;
677                                 pci_unmap_page(dd->pcidev, tmpd[i],
678                                         PAGE_SIZE, PCI_DMA_FROMDEVICE);
679                                 ipath_release_user_pages(&tmpp[i], 1);
680                                 tmpp[i] = NULL;
681                                 cnt++;
682                         }
683                 }
684                 if (cnt) {
685                         ipath_stats.sps_pageunlocks += cnt;
686                         ipath_cdbg(VERBOSE, "There were still %u expTID "
687                                    "entries locked\n", cnt);
688                 }
689                 if (ipath_stats.sps_pagelocks ||
690                     ipath_stats.sps_pageunlocks)
691                         ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
692                                    "unlocked via ipath_m{un}lock\n",
693                                    (unsigned long long)
694                                    ipath_stats.sps_pagelocks,
695                                    (unsigned long long)
696                                    ipath_stats.sps_pageunlocks);
697
698                 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
699                            dd->ipath_pageshadow);
700                 tmpp = dd->ipath_pageshadow;
701                 dd->ipath_pageshadow = NULL;
702                 vfree(tmpp);
703         }
704
705         /*
706          * free any resources still in use (usually just kernel ports)
707          * at unload; we do for portcnt, not cfgports, because cfgports
708          * could have changed while we were loaded.
709          */
710         for (port = 0; port < dd->ipath_portcnt; port++) {
711                 struct ipath_portdata *pd = dd->ipath_pd[port];
712                 dd->ipath_pd[port] = NULL;
713                 ipath_free_pddata(dd, pd);
714         }
715         kfree(dd->ipath_pd);
716         /*
717          * debuggability, in case some cleanup path tries to use it
718          * after this
719          */
720         dd->ipath_pd = NULL;
721 }
722
723 static void __devexit ipath_remove_one(struct pci_dev *pdev)
724 {
725         struct ipath_devdata *dd = pci_get_drvdata(pdev);
726
727         ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
728
729         /*
730          * disable the IB link early, to be sure no new packets arrive, which
731          * complicates the shutdown process
732          */
733         ipath_shutdown_device(dd);
734
735         cancel_delayed_work(&dd->status_work);
736         flush_scheduled_work();
737
738         if (dd->verbs_dev)
739                 ipath_unregister_ib_device(dd->verbs_dev);
740
741         ipath_diag_remove(dd);
742         ipath_user_remove(dd);
743         ipathfs_remove_device(dd);
744         ipath_device_remove_group(&pdev->dev, dd);
745
746         ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
747                    "unit %u\n", dd, (u32) dd->ipath_unit);
748
749         cleanup_device(dd);
750
751         /*
752          * turn off rcv, send, and interrupts for all ports, all drivers
753          * should also hard reset the chip here?
754          * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
755          * for all versions of the driver, if they were allocated
756          */
757         if (dd->ipath_irq) {
758                 ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
759                            dd->ipath_unit, dd->ipath_irq);
760                 dd->ipath_f_free_irq(dd);
761         } else
762                 ipath_dbg("irq is 0, not doing free_irq "
763                           "for unit %u\n", dd->ipath_unit);
764         /*
765          * we check for NULL here, because it's outside
766          * the kregbase check, and we need to call it
767          * after the free_irq.  Thus it's possible that
768          * the function pointers were never initialized.
769          */
770         if (dd->ipath_f_cleanup)
771                 /* clean up chip-specific stuff */
772                 dd->ipath_f_cleanup(dd);
773
774         ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
775         iounmap((volatile void __iomem *) dd->ipath_kregbase);
776         pci_release_regions(pdev);
777         ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
778         pci_disable_device(pdev);
779
780         ipath_free_devdata(pdev, dd);
781 }
782
783 /* general driver use */
784 DEFINE_MUTEX(ipath_mutex);
785
786 static DEFINE_SPINLOCK(ipath_pioavail_lock);
787
788 /**
789  * ipath_disarm_piobufs - cancel a range of PIO buffers
790  * @dd: the infinipath device
791  * @first: the first PIO buffer to cancel
792  * @cnt: the number of PIO buffers to cancel
793  *
794  * cancel a range of PIO buffers, used when they might be armed, but
795  * not triggered.  Used at init to ensure buffer state, and also user
796  * process close, in case it died while writing to a PIO buffer
797  * Also after errors.
798  */
799 void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
800                           unsigned cnt)
801 {
802         unsigned i, last = first + cnt;
803         u64 sendctrl, sendorig;
804
805         ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
806         sendorig = dd->ipath_sendctrl;
807         for (i = first; i < last; i++) {
808                 sendctrl = sendorig  | INFINIPATH_S_DISARM |
809                         (i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
810                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
811                                  sendctrl);
812         }
813
814         /*
815          * Write it again with current value, in case ipath_sendctrl changed
816          * while we were looping; no critical bits that would require
817          * locking.
818          *
819          * disable PIOAVAILUPD, then re-enable, reading scratch in
820          * between.  This seems to avoid a chip timing race that causes
821          * pioavail updates to memory to stop.
822          */
823         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
824                          sendorig & ~INFINIPATH_S_PIOBUFAVAILUPD);
825         sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
826         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
827                          dd->ipath_sendctrl);
828 }
829
830 /**
831  * ipath_wait_linkstate - wait for an IB link state change to occur
832  * @dd: the infinipath device
833  * @state: the state to wait for
834  * @msecs: the number of milliseconds to wait
835  *
836  * wait up to msecs milliseconds for IB link state change to occur for
837  * now, take the easy polling route.  Currently used only by
838  * ipath_set_linkstate.  Returns 0 if state reached, otherwise
839  * -ETIMEDOUT state can have multiple states set, for any of several
840  * transitions.
841  */
842 static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
843                                 int msecs)
844 {
845         dd->ipath_state_wanted = state;
846         wait_event_interruptible_timeout(ipath_state_wait,
847                                          (dd->ipath_flags & state),
848                                          msecs_to_jiffies(msecs));
849         dd->ipath_state_wanted = 0;
850
851         if (!(dd->ipath_flags & state)) {
852                 u64 val;
853                 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
854                            " ms\n",
855                            /* test INIT ahead of DOWN, both can be set */
856                            (state & IPATH_LINKINIT) ? "INIT" :
857                            ((state & IPATH_LINKDOWN) ? "DOWN" :
858                             ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
859                            msecs);
860                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
861                 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
862                            (unsigned long long) ipath_read_kreg64(
863                                    dd, dd->ipath_kregs->kr_ibcctrl),
864                            (unsigned long long) val,
865                            ipath_ibcstatus_str[val & 0xf]);
866         }
867         return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
868 }
869
870 /*
871  * Decode the error status into strings, deciding whether to always
872  * print * it or not depending on "normal packet errors" vs everything
873  * else.   Return 1 if "real" errors, otherwise 0 if only packet
874  * errors, so caller can decide what to print with the string.
875  */
876 int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
877 {
878         int iserr = 1;
879         *buf = '\0';
880         if (err & INFINIPATH_E_PKTERRS) {
881                 if (!(err & ~INFINIPATH_E_PKTERRS))
882                         iserr = 0; // if only packet errors.
883                 if (ipath_debug & __IPATH_ERRPKTDBG) {
884                         if (err & INFINIPATH_E_REBP)
885                                 strlcat(buf, "EBP ", blen);
886                         if (err & INFINIPATH_E_RVCRC)
887                                 strlcat(buf, "VCRC ", blen);
888                         if (err & INFINIPATH_E_RICRC) {
889                                 strlcat(buf, "CRC ", blen);
890                                 // clear for check below, so only once
891                                 err &= INFINIPATH_E_RICRC;
892                         }
893                         if (err & INFINIPATH_E_RSHORTPKTLEN)
894                                 strlcat(buf, "rshortpktlen ", blen);
895                         if (err & INFINIPATH_E_SDROPPEDDATAPKT)
896                                 strlcat(buf, "sdroppeddatapkt ", blen);
897                         if (err & INFINIPATH_E_SPKTLEN)
898                                 strlcat(buf, "spktlen ", blen);
899                 }
900                 if ((err & INFINIPATH_E_RICRC) &&
901                         !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
902                         strlcat(buf, "CRC ", blen);
903                 if (!iserr)
904                         goto done;
905         }
906         if (err & INFINIPATH_E_RHDRLEN)
907                 strlcat(buf, "rhdrlen ", blen);
908         if (err & INFINIPATH_E_RBADTID)
909                 strlcat(buf, "rbadtid ", blen);
910         if (err & INFINIPATH_E_RBADVERSION)
911                 strlcat(buf, "rbadversion ", blen);
912         if (err & INFINIPATH_E_RHDR)
913                 strlcat(buf, "rhdr ", blen);
914         if (err & INFINIPATH_E_RLONGPKTLEN)
915                 strlcat(buf, "rlongpktlen ", blen);
916         if (err & INFINIPATH_E_RMAXPKTLEN)
917                 strlcat(buf, "rmaxpktlen ", blen);
918         if (err & INFINIPATH_E_RMINPKTLEN)
919                 strlcat(buf, "rminpktlen ", blen);
920         if (err & INFINIPATH_E_SMINPKTLEN)
921                 strlcat(buf, "sminpktlen ", blen);
922         if (err & INFINIPATH_E_RFORMATERR)
923                 strlcat(buf, "rformaterr ", blen);
924         if (err & INFINIPATH_E_RUNSUPVL)
925                 strlcat(buf, "runsupvl ", blen);
926         if (err & INFINIPATH_E_RUNEXPCHAR)
927                 strlcat(buf, "runexpchar ", blen);
928         if (err & INFINIPATH_E_RIBFLOW)
929                 strlcat(buf, "ribflow ", blen);
930         if (err & INFINIPATH_E_SUNDERRUN)
931                 strlcat(buf, "sunderrun ", blen);
932         if (err & INFINIPATH_E_SPIOARMLAUNCH)
933                 strlcat(buf, "spioarmlaunch ", blen);
934         if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
935                 strlcat(buf, "sunexperrpktnum ", blen);
936         if (err & INFINIPATH_E_SDROPPEDSMPPKT)
937                 strlcat(buf, "sdroppedsmppkt ", blen);
938         if (err & INFINIPATH_E_SMAXPKTLEN)
939                 strlcat(buf, "smaxpktlen ", blen);
940         if (err & INFINIPATH_E_SUNSUPVL)
941                 strlcat(buf, "sunsupVL ", blen);
942         if (err & INFINIPATH_E_INVALIDADDR)
943                 strlcat(buf, "invalidaddr ", blen);
944         if (err & INFINIPATH_E_RRCVEGRFULL)
945                 strlcat(buf, "rcvegrfull ", blen);
946         if (err & INFINIPATH_E_RRCVHDRFULL)
947                 strlcat(buf, "rcvhdrfull ", blen);
948         if (err & INFINIPATH_E_IBSTATUSCHANGED)
949                 strlcat(buf, "ibcstatuschg ", blen);
950         if (err & INFINIPATH_E_RIBLOSTLINK)
951                 strlcat(buf, "riblostlink ", blen);
952         if (err & INFINIPATH_E_HARDWARE)
953                 strlcat(buf, "hardware ", blen);
954         if (err & INFINIPATH_E_RESET)
955                 strlcat(buf, "reset ", blen);
956 done:
957         return iserr;
958 }
959
960 /**
961  * get_rhf_errstring - decode RHF errors
962  * @err: the err number
963  * @msg: the output buffer
964  * @len: the length of the output buffer
965  *
966  * only used one place now, may want more later
967  */
968 static void get_rhf_errstring(u32 err, char *msg, size_t len)
969 {
970         /* if no errors, and so don't need to check what's first */
971         *msg = '\0';
972
973         if (err & INFINIPATH_RHF_H_ICRCERR)
974                 strlcat(msg, "icrcerr ", len);
975         if (err & INFINIPATH_RHF_H_VCRCERR)
976                 strlcat(msg, "vcrcerr ", len);
977         if (err & INFINIPATH_RHF_H_PARITYERR)
978                 strlcat(msg, "parityerr ", len);
979         if (err & INFINIPATH_RHF_H_LENERR)
980                 strlcat(msg, "lenerr ", len);
981         if (err & INFINIPATH_RHF_H_MTUERR)
982                 strlcat(msg, "mtuerr ", len);
983         if (err & INFINIPATH_RHF_H_IHDRERR)
984                 /* infinipath hdr checksum error */
985                 strlcat(msg, "ipathhdrerr ", len);
986         if (err & INFINIPATH_RHF_H_TIDERR)
987                 strlcat(msg, "tiderr ", len);
988         if (err & INFINIPATH_RHF_H_MKERR)
989                 /* bad port, offset, etc. */
990                 strlcat(msg, "invalid ipathhdr ", len);
991         if (err & INFINIPATH_RHF_H_IBERR)
992                 strlcat(msg, "iberr ", len);
993         if (err & INFINIPATH_RHF_L_SWA)
994                 strlcat(msg, "swA ", len);
995         if (err & INFINIPATH_RHF_L_SWB)
996                 strlcat(msg, "swB ", len);
997 }
998
999 /**
1000  * ipath_get_egrbuf - get an eager buffer
1001  * @dd: the infinipath device
1002  * @bufnum: the eager buffer to get
1003  * @err: unused
1004  *
1005  * must only be called if ipath_pd[port] is known to be allocated
1006  */
1007 static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
1008                                      int err)
1009 {
1010         return dd->ipath_port0_skbinfo ?
1011                 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
1012 }
1013
1014 /**
1015  * ipath_alloc_skb - allocate an skb and buffer with possible constraints
1016  * @dd: the infinipath device
1017  * @gfp_mask: the sk_buff SFP mask
1018  */
1019 struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
1020                                 gfp_t gfp_mask)
1021 {
1022         struct sk_buff *skb;
1023         u32 len;
1024
1025         /*
1026          * Only fully supported way to handle this is to allocate lots
1027          * extra, align as needed, and then do skb_reserve().  That wastes
1028          * a lot of memory...  I'll have to hack this into infinipath_copy
1029          * also.
1030          */
1031
1032         /*
1033          * We need 2 extra bytes for ipath_ether data sent in the
1034          * key header.  In order to keep everything dword aligned,
1035          * we'll reserve 4 bytes.
1036          */
1037         len = dd->ipath_ibmaxlen + 4;
1038
1039         if (dd->ipath_flags & IPATH_4BYTE_TID) {
1040                 /* We need a 2KB multiple alignment, and there is no way
1041                  * to do it except to allocate extra and then skb_reserve
1042                  * enough to bring it up to the right alignment.
1043                  */
1044                 len += 2047;
1045         }
1046
1047         skb = __dev_alloc_skb(len, gfp_mask);
1048         if (!skb) {
1049                 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
1050                               len);
1051                 goto bail;
1052         }
1053
1054         skb_reserve(skb, 4);
1055
1056         if (dd->ipath_flags & IPATH_4BYTE_TID) {
1057                 u32 una = (unsigned long)skb->data & 2047;
1058                 if (una)
1059                         skb_reserve(skb, 2048 - una);
1060         }
1061
1062 bail:
1063         return skb;
1064 }
1065
1066 static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
1067                              u32 eflags,
1068                              u32 l,
1069                              u32 etail,
1070                              u64 *rc)
1071 {
1072         char emsg[128];
1073         struct ipath_message_header *hdr;
1074
1075         get_rhf_errstring(eflags, emsg, sizeof emsg);
1076         hdr = (struct ipath_message_header *)&rc[1];
1077         ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
1078                    "tlen=%x opcode=%x egridx=%x: %s\n",
1079                    eflags, l,
1080                    ipath_hdrget_rcv_type((__le32 *) rc),
1081                    ipath_hdrget_length_in_bytes((__le32 *) rc),
1082                    be32_to_cpu(hdr->bth[0]) >> 24,
1083                    etail, emsg);
1084
1085         /* Count local link integrity errors. */
1086         if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
1087                 u8 n = (dd->ipath_ibcctrl >>
1088                         INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
1089                         INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
1090
1091                 if (++dd->ipath_lli_counter > n) {
1092                         dd->ipath_lli_counter = 0;
1093                         dd->ipath_lli_errors++;
1094                 }
1095         }
1096 }
1097
1098 /*
1099  * ipath_kreceive - receive a packet
1100  * @dd: the infinipath device
1101  *
1102  * called from interrupt handler for errors or receive interrupt
1103  */
1104 void ipath_kreceive(struct ipath_devdata *dd)
1105 {
1106         u64 *rc;
1107         void *ebuf;
1108         const u32 rsize = dd->ipath_rcvhdrentsize;      /* words */
1109         const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
1110         u32 etail = -1, l, hdrqtail;
1111         struct ipath_message_header *hdr;
1112         u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0;
1113         static u64 totcalls;    /* stats, may eventually remove */
1114
1115         if (!dd->ipath_hdrqtailptr) {
1116                 ipath_dev_err(dd,
1117                               "hdrqtailptr not set, can't do receives\n");
1118                 goto bail;
1119         }
1120
1121         l = dd->ipath_port0head;
1122         hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr);
1123         if (l == hdrqtail)
1124                 goto bail;
1125
1126 reloop:
1127         for (i = 0; l != hdrqtail; i++) {
1128                 u32 qp;
1129                 u8 *bthbytes;
1130
1131                 rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2));
1132                 hdr = (struct ipath_message_header *)&rc[1];
1133                 /*
1134                  * could make a network order version of IPATH_KD_QP, and
1135                  * do the obvious shift before masking to speed this up.
1136                  */
1137                 qp = ntohl(hdr->bth[1]) & 0xffffff;
1138                 bthbytes = (u8 *) hdr->bth;
1139
1140                 eflags = ipath_hdrget_err_flags((__le32 *) rc);
1141                 etype = ipath_hdrget_rcv_type((__le32 *) rc);
1142                 /* total length */
1143                 tlen = ipath_hdrget_length_in_bytes((__le32 *) rc);
1144                 ebuf = NULL;
1145                 if (etype != RCVHQ_RCV_TYPE_EXPECTED) {
1146                         /*
1147                          * it turns out that the chips uses an eager buffer
1148                          * for all non-expected packets, whether it "needs"
1149                          * one or not.  So always get the index, but don't
1150                          * set ebuf (so we try to copy data) unless the
1151                          * length requires it.
1152                          */
1153                         etail = ipath_hdrget_index((__le32 *) rc);
1154                         if (tlen > sizeof(*hdr) ||
1155                             etype == RCVHQ_RCV_TYPE_NON_KD)
1156                                 ebuf = ipath_get_egrbuf(dd, etail, 0);
1157                 }
1158
1159                 /*
1160                  * both tiderr and ipathhdrerr are set for all plain IB
1161                  * packets; only ipathhdrerr should be set.
1162                  */
1163
1164                 if (etype != RCVHQ_RCV_TYPE_NON_KD && etype !=
1165                     RCVHQ_RCV_TYPE_ERROR && ipath_hdrget_ipath_ver(
1166                             hdr->iph.ver_port_tid_offset) !=
1167                     IPS_PROTO_VERSION) {
1168                         ipath_cdbg(PKT, "Bad InfiniPath protocol version "
1169                                    "%x\n", etype);
1170                 }
1171
1172                 if (unlikely(eflags))
1173                         ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
1174                 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
1175                         ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
1176                         if (dd->ipath_lli_counter)
1177                                 dd->ipath_lli_counter--;
1178                         ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
1179                                    "qp=%x), len %x; ignored\n",
1180                                    etype, bthbytes[0], qp, tlen);
1181                 }
1182                 else if (etype == RCVHQ_RCV_TYPE_EAGER)
1183                         ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
1184                                    "qp=%x), len %x; ignored\n",
1185                                    etype, bthbytes[0], qp, tlen);
1186                 else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
1187                         ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
1188                                   be32_to_cpu(hdr->bth[0]) & 0xff);
1189                 else {
1190                         /*
1191                          * error packet, type of error  unknown.
1192                          * Probably type 3, but we don't know, so don't
1193                          * even try to print the opcode, etc.
1194                          */
1195                         ipath_dbg("Error Pkt, but no eflags! egrbuf %x, "
1196                                   "len %x\nhdrq@%lx;hdrq+%x rhf: %llx; "
1197                                   "hdr %llx %llx %llx %llx %llx\n",
1198                                   etail, tlen, (unsigned long) rc, l,
1199                                   (unsigned long long) rc[0],
1200                                   (unsigned long long) rc[1],
1201                                   (unsigned long long) rc[2],
1202                                   (unsigned long long) rc[3],
1203                                   (unsigned long long) rc[4],
1204                                   (unsigned long long) rc[5]);
1205                 }
1206                 l += rsize;
1207                 if (l >= maxcnt)
1208                         l = 0;
1209                 if (etype != RCVHQ_RCV_TYPE_EXPECTED)
1210                     updegr = 1;
1211                 /*
1212                  * update head regs on last packet, and every 16 packets.
1213                  * Reduce bus traffic, while still trying to prevent
1214                  * rcvhdrq overflows, for when the queue is nearly full
1215                  */
1216                 if (l == hdrqtail || (i && !(i&0xf))) {
1217                         u64 lval;
1218                         if (l == hdrqtail)
1219                                 /* request IBA6120 interrupt only on last */
1220                                 lval = dd->ipath_rhdrhead_intr_off | l;
1221                         else
1222                                 lval = l;
1223                         (void)ipath_write_ureg(dd, ur_rcvhdrhead, lval, 0);
1224                         if (updegr) {
1225                                 (void)ipath_write_ureg(dd, ur_rcvegrindexhead,
1226                                                        etail, 0);
1227                                 updegr = 0;
1228                         }
1229                 }
1230         }
1231
1232         if (!dd->ipath_rhdrhead_intr_off && !reloop) {
1233                 /* IBA6110 workaround; we can have a race clearing chip
1234                  * interrupt with another interrupt about to be delivered,
1235                  * and can clear it before it is delivered on the GPIO
1236                  * workaround.  By doing the extra check here for the
1237                  * in-memory tail register updating while we were doing
1238                  * earlier packets, we "almost" guarantee we have covered
1239                  * that case.
1240                  */
1241                 u32 hqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr);
1242                 if (hqtail != hdrqtail) {
1243                         hdrqtail = hqtail;
1244                         reloop = 1; /* loop 1 extra time at most */
1245                         goto reloop;
1246                 }
1247         }
1248
1249         pkttot += i;
1250
1251         dd->ipath_port0head = l;
1252
1253         if (pkttot > ipath_stats.sps_maxpkts_call)
1254                 ipath_stats.sps_maxpkts_call = pkttot;
1255         ipath_stats.sps_port0pkts += pkttot;
1256         ipath_stats.sps_avgpkts_call =
1257                 ipath_stats.sps_port0pkts / ++totcalls;
1258
1259 bail:;
1260 }
1261
1262 /**
1263  * ipath_update_pio_bufs - update shadow copy of the PIO availability map
1264  * @dd: the infinipath device
1265  *
1266  * called whenever our local copy indicates we have run out of send buffers
1267  * NOTE: This can be called from interrupt context by some code
1268  * and from non-interrupt context by ipath_getpiobuf().
1269  */
1270
1271 static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1272 {
1273         unsigned long flags;
1274         int i;
1275         const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
1276
1277         /* If the generation (check) bits have changed, then we update the
1278          * busy bit for the corresponding PIO buffer.  This algorithm will
1279          * modify positions to the value they already have in some cases
1280          * (i.e., no change), but it's faster than changing only the bits
1281          * that have changed.
1282          *
1283          * We would like to do this atomicly, to avoid spinlocks in the
1284          * critical send path, but that's not really possible, given the
1285          * type of changes, and that this routine could be called on
1286          * multiple cpu's simultaneously, so we lock in this routine only,
1287          * to avoid conflicting updates; all we change is the shadow, and
1288          * it's a single 64 bit memory location, so by definition the update
1289          * is atomic in terms of what other cpu's can see in testing the
1290          * bits.  The spin_lock overhead isn't too bad, since it only
1291          * happens when all buffers are in use, so only cpu overhead, not
1292          * latency or bandwidth is affected.
1293          */
1294 #define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL
1295         if (!dd->ipath_pioavailregs_dma) {
1296                 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
1297                 return;
1298         }
1299         if (ipath_debug & __IPATH_VERBDBG) {
1300                 /* only if packet debug and verbose */
1301                 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1302                 unsigned long *shadow = dd->ipath_pioavailshadow;
1303
1304                 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
1305                            "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
1306                            "s3=%lx\n",
1307                            (unsigned long long) le64_to_cpu(dma[0]),
1308                            shadow[0],
1309                            (unsigned long long) le64_to_cpu(dma[1]),
1310                            shadow[1],
1311                            (unsigned long long) le64_to_cpu(dma[2]),
1312                            shadow[2],
1313                            (unsigned long long) le64_to_cpu(dma[3]),
1314                            shadow[3]);
1315                 if (piobregs > 4)
1316                         ipath_cdbg(
1317                                 PKT, "2nd group, dma4=%llx shad4=%lx, "
1318                                 "d5=%llx s5=%lx, d6=%llx s6=%lx, "
1319                                 "d7=%llx s7=%lx\n",
1320                                 (unsigned long long) le64_to_cpu(dma[4]),
1321                                 shadow[4],
1322                                 (unsigned long long) le64_to_cpu(dma[5]),
1323                                 shadow[5],
1324                                 (unsigned long long) le64_to_cpu(dma[6]),
1325                                 shadow[6],
1326                                 (unsigned long long) le64_to_cpu(dma[7]),
1327                                 shadow[7]);
1328         }
1329         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1330         for (i = 0; i < piobregs; i++) {
1331                 u64 pchbusy, pchg, piov, pnew;
1332                 /*
1333                  * Chip Errata: bug 6641; even and odd qwords>3 are swapped
1334                  */
1335                 if (i > 3) {
1336                         if (i & 1)
1337                                 piov = le64_to_cpu(
1338                                         dd->ipath_pioavailregs_dma[i - 1]);
1339                         else
1340                                 piov = le64_to_cpu(
1341                                         dd->ipath_pioavailregs_dma[i + 1]);
1342                 } else
1343                         piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
1344                 pchg = _IPATH_ALL_CHECKBITS &
1345                         ~(dd->ipath_pioavailshadow[i] ^ piov);
1346                 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
1347                 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
1348                         pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
1349                         pnew |= piov & pchbusy;
1350                         dd->ipath_pioavailshadow[i] = pnew;
1351                 }
1352         }
1353         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1354 }
1355
1356 /**
1357  * ipath_setrcvhdrsize - set the receive header size
1358  * @dd: the infinipath device
1359  * @rhdrsize: the receive header size
1360  *
1361  * called from user init code, and also layered driver init
1362  */
1363 int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
1364 {
1365         int ret = 0;
1366
1367         if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
1368                 if (dd->ipath_rcvhdrsize != rhdrsize) {
1369                         dev_info(&dd->pcidev->dev,
1370                                  "Error: can't set protocol header "
1371                                  "size %u, already %u\n",
1372                                  rhdrsize, dd->ipath_rcvhdrsize);
1373                         ret = -EAGAIN;
1374                 } else
1375                         ipath_cdbg(VERBOSE, "Reuse same protocol header "
1376                                    "size %u\n", dd->ipath_rcvhdrsize);
1377         } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
1378                                (sizeof(u64) / sizeof(u32)))) {
1379                 ipath_dbg("Error: can't set protocol header size %u "
1380                           "(> max %u)\n", rhdrsize,
1381                           dd->ipath_rcvhdrentsize -
1382                           (u32) (sizeof(u64) / sizeof(u32)));
1383                 ret = -EOVERFLOW;
1384         } else {
1385                 dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
1386                 dd->ipath_rcvhdrsize = rhdrsize;
1387                 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
1388                                  dd->ipath_rcvhdrsize);
1389                 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
1390                            dd->ipath_rcvhdrsize);
1391         }
1392         return ret;
1393 }
1394
1395 /**
1396  * ipath_getpiobuf - find an available pio buffer
1397  * @dd: the infinipath device
1398  * @pbufnum: the buffer number is placed here
1399  *
1400  * do appropriate marking as busy, etc.
1401  * returns buffer number if one found (>=0), negative number is error.
1402  * Used by ipath_layer_send
1403  */
1404 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
1405 {
1406         int i, j, starti, updated = 0;
1407         unsigned piobcnt, iter;
1408         unsigned long flags;
1409         unsigned long *shadow = dd->ipath_pioavailshadow;
1410         u32 __iomem *buf;
1411
1412         piobcnt = (unsigned)(dd->ipath_piobcnt2k
1413                              + dd->ipath_piobcnt4k);
1414         starti = dd->ipath_lastport_piobuf;
1415         iter = piobcnt - starti;
1416         if (dd->ipath_upd_pio_shadow) {
1417                 /*
1418                  * Minor optimization.  If we had no buffers on last call,
1419                  * start out by doing the update; continue and do scan even
1420                  * if no buffers were updated, to be paranoid
1421                  */
1422                 ipath_update_pio_bufs(dd);
1423                 /* we scanned here, don't do it at end of scan */
1424                 updated = 1;
1425                 i = starti;
1426         } else
1427                 i = dd->ipath_lastpioindex;
1428
1429 rescan:
1430         /*
1431          * while test_and_set_bit() is atomic, we do that and then the
1432          * change_bit(), and the pair is not.  See if this is the cause
1433          * of the remaining armlaunch errors.
1434          */
1435         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1436         for (j = 0; j < iter; j++, i++) {
1437                 if (i >= piobcnt)
1438                         i = starti;
1439                 /*
1440                  * To avoid bus lock overhead, we first find a candidate
1441                  * buffer, then do the test and set, and continue if that
1442                  * fails.
1443                  */
1444                 if (test_bit((2 * i) + 1, shadow) ||
1445                     test_and_set_bit((2 * i) + 1, shadow))
1446                         continue;
1447                 /* flip generation bit */
1448                 change_bit(2 * i, shadow);
1449                 break;
1450         }
1451         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1452
1453         if (j == iter) {
1454                 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1455
1456                 /*
1457                  * first time through; shadow exhausted, but may be real
1458                  * buffers available, so go see; if any updated, rescan
1459                  * (once)
1460                  */
1461                 if (!updated) {
1462                         ipath_update_pio_bufs(dd);
1463                         updated = 1;
1464                         i = starti;
1465                         goto rescan;
1466                 }
1467                 dd->ipath_upd_pio_shadow = 1;
1468                 /*
1469                  * not atomic, but if we lose one once in a while, that's OK
1470                  */
1471                 ipath_stats.sps_nopiobufs++;
1472                 if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1473                         ipath_dbg(
1474                                 "%u pio sends with no bufavail; dmacopy: "
1475                                 "%llx %llx %llx %llx; shadow:  "
1476                                 "%lx %lx %lx %lx\n",
1477                                 dd->ipath_consec_nopiobuf,
1478                                 (unsigned long long) le64_to_cpu(dma[0]),
1479                                 (unsigned long long) le64_to_cpu(dma[1]),
1480                                 (unsigned long long) le64_to_cpu(dma[2]),
1481                                 (unsigned long long) le64_to_cpu(dma[3]),
1482                                 shadow[0], shadow[1], shadow[2],
1483                                 shadow[3]);
1484                         /*
1485                          * 4 buffers per byte, 4 registers above, cover rest
1486                          * below
1487                          */
1488                         if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1489                             (sizeof(shadow[0]) * 4 * 4))
1490                                 ipath_dbg("2nd group: dmacopy: %llx %llx "
1491                                           "%llx %llx; shadow: %lx %lx "
1492                                           "%lx %lx\n",
1493                                           (unsigned long long)
1494                                           le64_to_cpu(dma[4]),
1495                                           (unsigned long long)
1496                                           le64_to_cpu(dma[5]),
1497                                           (unsigned long long)
1498                                           le64_to_cpu(dma[6]),
1499                                           (unsigned long long)
1500                                           le64_to_cpu(dma[7]),
1501                                           shadow[4], shadow[5],
1502                                           shadow[6], shadow[7]);
1503                 }
1504                 buf = NULL;
1505                 goto bail;
1506         }
1507
1508         /*
1509          * set next starting place.  Since it's just an optimization,
1510          * it doesn't matter who wins on this, so no locking
1511          */
1512         dd->ipath_lastpioindex = i + 1;
1513         if (dd->ipath_upd_pio_shadow)
1514                 dd->ipath_upd_pio_shadow = 0;
1515         if (dd->ipath_consec_nopiobuf)
1516                 dd->ipath_consec_nopiobuf = 0;
1517         if (i < dd->ipath_piobcnt2k)
1518                 buf = (u32 __iomem *) (dd->ipath_pio2kbase +
1519                                        i * dd->ipath_palign);
1520         else
1521                 buf = (u32 __iomem *)
1522                         (dd->ipath_pio4kbase +
1523                          (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
1524         ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
1525                    i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf);
1526         if (pbufnum)
1527                 *pbufnum = i;
1528
1529 bail:
1530         return buf;
1531 }
1532
1533 /**
1534  * ipath_create_rcvhdrq - create a receive header queue
1535  * @dd: the infinipath device
1536  * @pd: the port data
1537  *
1538  * this must be contiguous memory (from an i/o perspective), and must be
1539  * DMA'able (which means for some systems, it will go through an IOMMU,
1540  * or be forced into a low address range).
1541  */
1542 int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1543                          struct ipath_portdata *pd)
1544 {
1545         int ret = 0;
1546
1547         if (!pd->port_rcvhdrq) {
1548                 dma_addr_t phys_hdrqtail;
1549                 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
1550                 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1551                                 sizeof(u32), PAGE_SIZE);
1552
1553                 pd->port_rcvhdrq = dma_alloc_coherent(
1554                         &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
1555                         gfp_flags);
1556
1557                 if (!pd->port_rcvhdrq) {
1558                         ipath_dev_err(dd, "attempt to allocate %d bytes "
1559                                       "for port %u rcvhdrq failed\n",
1560                                       amt, pd->port_port);
1561                         ret = -ENOMEM;
1562                         goto bail;
1563                 }
1564                 pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
1565                         &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL);
1566                 if (!pd->port_rcvhdrtail_kvaddr) {
1567                         ipath_dev_err(dd, "attempt to allocate 1 page "
1568                                       "for port %u rcvhdrqtailaddr failed\n",
1569                                       pd->port_port);
1570                         ret = -ENOMEM;
1571                         dma_free_coherent(&dd->pcidev->dev, amt,
1572                                           pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
1573                         pd->port_rcvhdrq = NULL;
1574                         goto bail;
1575                 }
1576                 pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
1577
1578                 pd->port_rcvhdrq_size = amt;
1579
1580                 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
1581                            "for port %u rcvhdr Q\n",
1582                            amt >> PAGE_SHIFT, pd->port_rcvhdrq,
1583                            (unsigned long) pd->port_rcvhdrq_phys,
1584                            (unsigned long) pd->port_rcvhdrq_size,
1585                            pd->port_port);
1586
1587                 ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n",
1588                            pd->port_port,
1589                            (unsigned long long) phys_hdrqtail);
1590         }
1591         else
1592                 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
1593                            "hdrtailaddr@%p %llx physical\n",
1594                            pd->port_port, pd->port_rcvhdrq,
1595                            (unsigned long long) pd->port_rcvhdrq_phys,
1596                            pd->port_rcvhdrtail_kvaddr, (unsigned long long)
1597                            pd->port_rcvhdrqtailaddr_phys);
1598
1599         /* clear for security and sanity on each use */
1600         memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
1601         memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
1602
1603         /*
1604          * tell chip each time we init it, even if we are re-using previous
1605          * memory (we zero the register at process close)
1606          */
1607         ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
1608                               pd->port_port, pd->port_rcvhdrqtailaddr_phys);
1609         ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
1610                               pd->port_port, pd->port_rcvhdrq_phys);
1611
1612         ret = 0;
1613 bail:
1614         return ret;
1615 }
1616
1617 int ipath_waitfor_complete(struct ipath_devdata *dd, ipath_kreg reg_id,
1618                            u64 bits_to_wait_for, u64 * valp)
1619 {
1620         unsigned long timeout;
1621         u64 lastval, val;
1622         int ret;
1623
1624         lastval = ipath_read_kreg64(dd, reg_id);
1625         /* wait a ridiculously long time */
1626         timeout = jiffies + msecs_to_jiffies(5);
1627         do {
1628                 val = ipath_read_kreg64(dd, reg_id);
1629                 /* set so they have something, even on failures. */
1630                 *valp = val;
1631                 if ((val & bits_to_wait_for) == bits_to_wait_for) {
1632                         ret = 0;
1633                         break;
1634                 }
1635                 if (val != lastval)
1636                         ipath_cdbg(VERBOSE, "Changed from %llx to %llx, "
1637                                    "waiting for %llx bits\n",
1638                                    (unsigned long long) lastval,
1639                                    (unsigned long long) val,
1640                                    (unsigned long long) bits_to_wait_for);
1641                 cond_resched();
1642                 if (time_after(jiffies, timeout)) {
1643                         ipath_dbg("Didn't get bits %llx in register 0x%x, "
1644                                   "got %llx\n",
1645                                   (unsigned long long) bits_to_wait_for,
1646                                   reg_id, (unsigned long long) *valp);
1647                         ret = -ENODEV;
1648                         break;
1649                 }
1650         } while (1);
1651
1652         return ret;
1653 }
1654
1655 /**
1656  * ipath_waitfor_mdio_cmdready - wait for last command to complete
1657  * @dd: the infinipath device
1658  *
1659  * Like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go
1660  * away indicating the last command has completed.  It doesn't return data
1661  */
1662 int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
1663 {
1664         unsigned long timeout;
1665         u64 val;
1666         int ret;
1667
1668         /* wait a ridiculously long time */
1669         timeout = jiffies + msecs_to_jiffies(5);
1670         do {
1671                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_mdio);
1672                 if (!(val & IPATH_MDIO_CMDVALID)) {
1673                         ret = 0;
1674                         break;
1675                 }
1676                 cond_resched();
1677                 if (time_after(jiffies, timeout)) {
1678                         ipath_dbg("CMDVALID stuck in mdio reg? (%llx)\n",
1679                                   (unsigned long long) val);
1680                         ret = -ENODEV;
1681                         break;
1682                 }
1683         } while (1);
1684
1685         return ret;
1686 }
1687
1688
1689 /*
1690  * Flush all sends that might be in the ready to send state, as well as any
1691  * that are in the process of being sent.   Used whenever we need to be
1692  * sure the send side is idle.  Cleans up all buffer state by canceling
1693  * all pio buffers, and issuing an abort, which cleans up anything in the
1694  * launch fifo.  The cancel is superfluous on some chip versions, but
1695  * it's safer to always do it.
1696  * PIOAvail bits are updated by the chip as if normal send had happened.
1697  */
1698 void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1699 {
1700         ipath_dbg("Cancelling all in-progress send buffers\n");
1701         dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */
1702         /*
1703          * the abort bit is auto-clearing.  We read scratch to be sure
1704          * that cancels and the abort have taken effect in the chip.
1705          */
1706         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1707                 INFINIPATH_S_ABORT);
1708         ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1709         ipath_disarm_piobufs(dd, 0,
1710                 (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k));
1711         if (restore_sendctrl) /* else done by caller later */
1712                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1713                                  dd->ipath_sendctrl);
1714
1715         /* and again, be sure all have hit the chip */
1716         ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1717 }
1718
1719
1720 static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1721 {
1722         static const char *what[4] = {
1723                 [0] = "DOWN",
1724                 [INFINIPATH_IBCC_LINKCMD_INIT] = "INIT",
1725                 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
1726                 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
1727         };
1728         int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
1729                         INFINIPATH_IBCC_LINKCMD_MASK;
1730
1731         ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
1732                    "is %s\n", dd->ipath_unit,
1733                    what[linkcmd],
1734                    ipath_ibcstatus_str[
1735                            (ipath_read_kreg64
1736                             (dd, dd->ipath_kregs->kr_ibcstatus) >>
1737                             INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1738                            INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
1739         /* flush all queued sends when going to DOWN or INIT, to be sure that
1740          * they don't block MAD packets */
1741         if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT)
1742                 ipath_cancel_sends(dd, 1);
1743
1744         ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1745                          dd->ipath_ibcctrl | which);
1746 }
1747
1748 int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
1749 {
1750         u32 lstate;
1751         int ret;
1752
1753         switch (newstate) {
1754         case IPATH_IB_LINKDOWN:
1755                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
1756                                     INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1757                 /* don't wait */
1758                 ret = 0;
1759                 goto bail;
1760
1761         case IPATH_IB_LINKDOWN_SLEEP:
1762                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
1763                                     INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1764                 /* don't wait */
1765                 ret = 0;
1766                 goto bail;
1767
1768         case IPATH_IB_LINKDOWN_DISABLE:
1769                 ipath_set_ib_lstate(dd,
1770                                     INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
1771                                     INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1772                 /* don't wait */
1773                 ret = 0;
1774                 goto bail;
1775
1776         case IPATH_IB_LINKINIT:
1777                 if (dd->ipath_flags & IPATH_LINKINIT) {
1778                         ret = 0;
1779                         goto bail;
1780                 }
1781                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
1782                                     INFINIPATH_IBCC_LINKCMD_SHIFT);
1783                 lstate = IPATH_LINKINIT;
1784                 break;
1785
1786         case IPATH_IB_LINKARM:
1787                 if (dd->ipath_flags & IPATH_LINKARMED) {
1788                         ret = 0;
1789                         goto bail;
1790                 }
1791                 if (!(dd->ipath_flags &
1792                       (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
1793                         ret = -EINVAL;
1794                         goto bail;
1795                 }
1796                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
1797                                     INFINIPATH_IBCC_LINKCMD_SHIFT);
1798                 /*
1799                  * Since the port can transition to ACTIVE by receiving
1800                  * a non VL 15 packet, wait for either state.
1801                  */
1802                 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
1803                 break;
1804
1805         case IPATH_IB_LINKACTIVE:
1806                 if (dd->ipath_flags & IPATH_LINKACTIVE) {
1807                         ret = 0;
1808                         goto bail;
1809                 }
1810                 if (!(dd->ipath_flags & IPATH_LINKARMED)) {
1811                         ret = -EINVAL;
1812                         goto bail;
1813                 }
1814                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
1815                                     INFINIPATH_IBCC_LINKCMD_SHIFT);
1816                 lstate = IPATH_LINKACTIVE;
1817                 break;
1818
1819         case IPATH_IB_LINK_LOOPBACK:
1820                 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
1821                 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
1822                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1823                                  dd->ipath_ibcctrl);
1824                 ret = 0;
1825                 goto bail; // no state change to wait for
1826
1827         case IPATH_IB_LINK_EXTERNAL:
1828                 dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
1829                 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
1830                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1831                                  dd->ipath_ibcctrl);
1832                 ret = 0;
1833                 goto bail; // no state change to wait for
1834
1835         default:
1836                 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
1837                 ret = -EINVAL;
1838                 goto bail;
1839         }
1840         ret = ipath_wait_linkstate(dd, lstate, 2000);
1841
1842 bail:
1843         return ret;
1844 }
1845
1846 /**
1847  * ipath_set_mtu - set the MTU
1848  * @dd: the infinipath device
1849  * @arg: the new MTU
1850  *
1851  * we can handle "any" incoming size, the issue here is whether we
1852  * need to restrict our outgoing size.   For now, we don't do any
1853  * sanity checking on this, and we don't deal with what happens to
1854  * programs that are already running when the size changes.
1855  * NOTE: changing the MTU will usually cause the IBC to go back to
1856  * link initialize (IPATH_IBSTATE_INIT) state...
1857  */
1858 int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
1859 {
1860         u32 piosize;
1861         int changed = 0;
1862         int ret;
1863
1864         /*
1865          * mtu is IB data payload max.  It's the largest power of 2 less
1866          * than piosize (or even larger, since it only really controls the
1867          * largest we can receive; we can send the max of the mtu and
1868          * piosize).  We check that it's one of the valid IB sizes.
1869          */
1870         if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
1871             arg != 4096) {
1872                 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
1873                 ret = -EINVAL;
1874                 goto bail;
1875         }
1876         if (dd->ipath_ibmtu == arg) {
1877                 ret = 0;        /* same as current */
1878                 goto bail;
1879         }
1880
1881         piosize = dd->ipath_ibmaxlen;
1882         dd->ipath_ibmtu = arg;
1883
1884         if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
1885                 /* Only if it's not the initial value (or reset to it) */
1886                 if (piosize != dd->ipath_init_ibmaxlen) {
1887                         dd->ipath_ibmaxlen = piosize;
1888                         changed = 1;
1889                 }
1890         } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
1891                 piosize = arg + IPATH_PIO_MAXIBHDR;
1892                 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
1893                            "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
1894                            arg);
1895                 dd->ipath_ibmaxlen = piosize;
1896                 changed = 1;
1897         }
1898
1899         if (changed) {
1900                 /*
1901                  * set the IBC maxpktlength to the size of our pio
1902                  * buffers in words
1903                  */
1904                 u64 ibc = dd->ipath_ibcctrl;
1905                 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
1906                          INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
1907
1908                 piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
1909                 dd->ipath_ibmaxlen = piosize;
1910                 piosize /= sizeof(u32); /* in words */
1911                 /*
1912                  * for ICRC, which we only send in diag test pkt mode, and
1913                  * we don't need to worry about that for mtu
1914                  */
1915                 piosize += 1;
1916
1917                 ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
1918                 dd->ipath_ibcctrl = ibc;
1919                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1920                                  dd->ipath_ibcctrl);
1921                 dd->ipath_f_tidtemplate(dd);
1922         }
1923
1924         ret = 0;
1925
1926 bail:
1927         return ret;
1928 }
1929
1930 int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
1931 {
1932         dd->ipath_lid = arg;
1933         dd->ipath_lmc = lmc;
1934
1935         return 0;
1936 }
1937
1938
1939 /**
1940  * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
1941  * @dd: the infinipath device
1942  * @regno: the register number to write
1943  * @port: the port containing the register
1944  * @value: the value to write
1945  *
1946  * Registers that vary with the chip implementation constants (port)
1947  * use this routine.
1948  */
1949 void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
1950                           unsigned port, u64 value)
1951 {
1952         u16 where;
1953
1954         if (port < dd->ipath_portcnt &&
1955             (regno == dd->ipath_kregs->kr_rcvhdraddr ||
1956              regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
1957                 where = regno + port;
1958         else
1959                 where = -1;
1960
1961         ipath_write_kreg(dd, where, value);
1962 }
1963
1964 /*
1965  * Following deal with the "obviously simple" task of overriding the state
1966  * of the LEDS, which normally indicate link physical and logical status.
1967  * The complications arise in dealing with different hardware mappings
1968  * and the board-dependent routine being called from interrupts.
1969  * and then there's the requirement to _flash_ them.
1970  */
1971 #define LED_OVER_FREQ_SHIFT 8
1972 #define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
1973 /* Below is "non-zero" to force override, but both actual LEDs are off */
1974 #define LED_OVER_BOTH_OFF (8)
1975
1976 static void ipath_run_led_override(unsigned long opaque)
1977 {
1978         struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
1979         int timeoff;
1980         int pidx;
1981         u64 lstate, ltstate, val;
1982
1983         if (!(dd->ipath_flags & IPATH_INITTED))
1984                 return;
1985
1986         pidx = dd->ipath_led_override_phase++ & 1;
1987         dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
1988         timeoff = dd->ipath_led_override_timeoff;
1989
1990         /*
1991          * below potentially restores the LED values per current status,
1992          * should also possibly setup the traffic-blink register,
1993          * but leave that to per-chip functions.
1994          */
1995         val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
1996         ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1997                   INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
1998         lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
1999                  INFINIPATH_IBCS_LINKSTATE_MASK;
2000
2001         dd->ipath_f_setextled(dd, lstate, ltstate);
2002         mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
2003 }
2004
2005 void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
2006 {
2007         int timeoff, freq;
2008
2009         if (!(dd->ipath_flags & IPATH_INITTED))
2010                 return;
2011
2012         /* First check if we are blinking. If not, use 1HZ polling */
2013         timeoff = HZ;
2014         freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
2015
2016         if (freq) {
2017                 /* For blink, set each phase from one nybble of val */
2018                 dd->ipath_led_override_vals[0] = val & 0xF;
2019                 dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
2020                 timeoff = (HZ << 4)/freq;
2021         } else {
2022                 /* Non-blink set both phases the same. */
2023                 dd->ipath_led_override_vals[0] = val & 0xF;
2024                 dd->ipath_led_override_vals[1] = val & 0xF;
2025         }
2026         dd->ipath_led_override_timeoff = timeoff;
2027
2028         /*
2029          * If the timer has not already been started, do so. Use a "quick"
2030          * timeout so the function will be called soon, to look at our request.
2031          */
2032         if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
2033                 /* Need to start timer */
2034                 init_timer(&dd->ipath_led_override_timer);
2035                 dd->ipath_led_override_timer.function =
2036                                                  ipath_run_led_override;
2037                 dd->ipath_led_override_timer.data = (unsigned long) dd;
2038                 dd->ipath_led_override_timer.expires = jiffies + 1;
2039                 add_timer(&dd->ipath_led_override_timer);
2040         } else {
2041                 atomic_dec(&dd->ipath_led_override_timer_active);
2042         }
2043 }
2044
2045 /**
2046  * ipath_shutdown_device - shut down a device
2047  * @dd: the infinipath device
2048  *
2049  * This is called to make the device quiet when we are about to
2050  * unload the driver, and also when the device is administratively
2051  * disabled.   It does not free any data structures.
2052  * Everything it does has to be setup again by ipath_init_chip(dd,1)
2053  */
2054 void ipath_shutdown_device(struct ipath_devdata *dd)
2055 {
2056         ipath_dbg("Shutting down the device\n");
2057
2058         dd->ipath_flags |= IPATH_LINKUNK;
2059         dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
2060                              IPATH_LINKINIT | IPATH_LINKARMED |
2061                              IPATH_LINKACTIVE);
2062         *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
2063                                 IPATH_STATUS_IB_READY);
2064
2065         /* mask interrupts, but not errors */
2066         ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
2067
2068         dd->ipath_rcvctrl = 0;
2069         ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
2070                          dd->ipath_rcvctrl);
2071
2072         /*
2073          * gracefully stop all sends allowing any in progress to trickle out
2074          * first.
2075          */
2076         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL);
2077         /* flush it */
2078         ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2079         /*
2080          * enough for anything that's going to trickle out to have actually
2081          * done so.
2082          */
2083         udelay(5);
2084
2085         ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
2086                             INFINIPATH_IBCC_LINKINITCMD_SHIFT);
2087         ipath_cancel_sends(dd, 0);
2088
2089         signal_ib_event(dd, IB_EVENT_PORT_ERR);
2090
2091         /* disable IBC */
2092         dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
2093         ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
2094                          dd->ipath_control | INFINIPATH_C_FREEZEMODE);
2095
2096         /*
2097          * clear SerdesEnable and turn the leds off; do this here because
2098          * we are unloading, so don't count on interrupts to move along
2099          * Turn the LEDs off explictly for the same reason.
2100          */
2101         dd->ipath_f_quiet_serdes(dd);
2102
2103         if (dd->ipath_stats_timer_active) {
2104                 del_timer_sync(&dd->ipath_stats_timer);
2105                 dd->ipath_stats_timer_active = 0;
2106         }
2107
2108         /*
2109          * clear all interrupts and errors, so that the next time the driver
2110          * is loaded or device is enabled, we know that whatever is set
2111          * happened while we were unloaded
2112          */
2113         ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
2114                          ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
2115         ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
2116         ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
2117
2118         ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
2119         ipath_update_eeprom_log(dd);
2120 }
2121
2122 /**
2123  * ipath_free_pddata - free a port's allocated data
2124  * @dd: the infinipath device
2125  * @pd: the portdata structure
2126  *
2127  * free up any allocated data for a port
2128  * This should not touch anything that would affect a simultaneous
2129  * re-allocation of port data, because it is called after ipath_mutex
2130  * is released (and can be called from reinit as well).
2131  * It should never change any chip state, or global driver state.
2132  * (The only exception to global state is freeing the port0 port0_skbs.)
2133  */
2134 void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
2135 {
2136         if (!pd)
2137                 return;
2138
2139         if (pd->port_rcvhdrq) {
2140                 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
2141                            "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
2142                            (unsigned long) pd->port_rcvhdrq_size);
2143                 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
2144                                   pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
2145                 pd->port_rcvhdrq = NULL;
2146                 if (pd->port_rcvhdrtail_kvaddr) {
2147                         dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
2148                                          pd->port_rcvhdrtail_kvaddr,
2149                                          pd->port_rcvhdrqtailaddr_phys);
2150                         pd->port_rcvhdrtail_kvaddr = NULL;
2151                 }
2152         }
2153         if (pd->port_port && pd->port_rcvegrbuf) {
2154                 unsigned e;
2155
2156                 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
2157                         void *base = pd->port_rcvegrbuf[e];
2158                         size_t size = pd->port_rcvegrbuf_size;
2159
2160                         ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
2161                                    "chunk %u/%u\n", base,
2162                                    (unsigned long) size,
2163                                    e, pd->port_rcvegrbuf_chunks);
2164                         dma_free_coherent(&dd->pcidev->dev, size,
2165                                 base, pd->port_rcvegrbuf_phys[e]);
2166                 }
2167                 kfree(pd->port_rcvegrbuf);
2168                 pd->port_rcvegrbuf = NULL;
2169                 kfree(pd->port_rcvegrbuf_phys);
2170                 pd->port_rcvegrbuf_phys = NULL;
2171                 pd->port_rcvegrbuf_chunks = 0;
2172         } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
2173                 unsigned e;
2174                 struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
2175
2176                 dd->ipath_port0_skbinfo = NULL;
2177                 ipath_cdbg(VERBOSE, "free closed port %d "
2178                            "ipath_port0_skbinfo @ %p\n", pd->port_port,
2179                            skbinfo);
2180                 for (e = 0; e < dd->ipath_rcvegrcnt; e++)
2181                 if (skbinfo[e].skb) {
2182                         pci_unmap_single(dd->pcidev, skbinfo[e].phys,
2183                                          dd->ipath_ibmaxlen,
2184                                          PCI_DMA_FROMDEVICE);
2185                         dev_kfree_skb(skbinfo[e].skb);
2186                 }
2187                 vfree(skbinfo);
2188         }
2189         kfree(pd->port_tid_pg_list);
2190         vfree(pd->subport_uregbase);
2191         vfree(pd->subport_rcvegrbuf);
2192         vfree(pd->subport_rcvhdr_base);
2193         kfree(pd);
2194 }
2195
2196 static int __init infinipath_init(void)
2197 {
2198         int ret;
2199
2200         if (ipath_debug & __IPATH_DBG)
2201                 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
2202
2203         /*
2204          * These must be called before the driver is registered with
2205          * the PCI subsystem.
2206          */
2207         idr_init(&unit_table);
2208         if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
2209                 ret = -ENOMEM;
2210                 goto bail;
2211         }
2212
2213         ret = pci_register_driver(&ipath_driver);
2214         if (ret < 0) {
2215                 printk(KERN_ERR IPATH_DRV_NAME
2216                        ": Unable to register driver: error %d\n", -ret);
2217                 goto bail_unit;
2218         }
2219
2220         ret = ipath_driver_create_group(&ipath_driver.driver);
2221         if (ret < 0) {
2222                 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create driver "
2223                        "sysfs entries: error %d\n", -ret);
2224                 goto bail_pci;
2225         }
2226
2227         ret = ipath_init_ipathfs();
2228         if (ret < 0) {
2229                 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
2230                        "ipathfs: error %d\n", -ret);
2231                 goto bail_group;
2232         }
2233
2234         goto bail;
2235
2236 bail_group:
2237         ipath_driver_remove_group(&ipath_driver.driver);
2238
2239 bail_pci:
2240         pci_unregister_driver(&ipath_driver);
2241
2242 bail_unit:
2243         idr_destroy(&unit_table);
2244
2245 bail:
2246         return ret;
2247 }
2248
2249 static void __exit infinipath_cleanup(void)
2250 {
2251         ipath_exit_ipathfs();
2252
2253         ipath_driver_remove_group(&ipath_driver.driver);
2254
2255         ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
2256         pci_unregister_driver(&ipath_driver);
2257
2258         idr_destroy(&unit_table);
2259 }
2260
2261 /**
2262  * ipath_reset_device - reset the chip if possible
2263  * @unit: the device to reset
2264  *
2265  * Whether or not reset is successful, we attempt to re-initialize the chip
2266  * (that is, much like a driver unload/reload).  We clear the INITTED flag
2267  * so that the various entry points will fail until we reinitialize.  For
2268  * now, we only allow this if no user ports are open that use chip resources
2269  */
2270 int ipath_reset_device(int unit)
2271 {
2272         int ret, i;
2273         struct ipath_devdata *dd = ipath_lookup(unit);
2274
2275         if (!dd) {
2276                 ret = -ENODEV;
2277                 goto bail;
2278         }
2279
2280         if (atomic_read(&dd->ipath_led_override_timer_active)) {
2281                 /* Need to stop LED timer, _then_ shut off LEDs */
2282                 del_timer_sync(&dd->ipath_led_override_timer);
2283                 atomic_set(&dd->ipath_led_override_timer_active, 0);
2284         }
2285
2286         /* Shut off LEDs after we are sure timer is not running */
2287         dd->ipath_led_override = LED_OVER_BOTH_OFF;
2288         dd->ipath_f_setextled(dd, 0, 0);
2289
2290         dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
2291
2292         if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
2293                 dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
2294                          "not initialized or not present\n", unit);
2295                 ret = -ENXIO;
2296                 goto bail;
2297         }
2298
2299         if (dd->ipath_pd)
2300                 for (i = 1; i < dd->ipath_cfgports; i++) {
2301                         if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
2302                                 ipath_dbg("unit %u port %d is in use "
2303                                           "(PID %u cmd %s), can't reset\n",
2304                                           unit, i,
2305                                           dd->ipath_pd[i]->port_pid,
2306                                           dd->ipath_pd[i]->port_comm);
2307                                 ret = -EBUSY;
2308                                 goto bail;
2309                         }
2310                 }
2311
2312         dd->ipath_flags &= ~IPATH_INITTED;
2313         ret = dd->ipath_f_reset(dd);
2314         if (ret != 1)
2315                 ipath_dbg("reset was not successful\n");
2316         ipath_dbg("Trying to reinitialize unit %u after reset attempt\n",
2317                   unit);
2318         ret = ipath_init_chip(dd, 1);
2319         if (ret)
2320                 ipath_dev_err(dd, "Reinitialize unit %u after "
2321                               "reset failed with %d\n", unit, ret);
2322         else
2323                 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
2324                          "resetting\n", unit);
2325
2326 bail:
2327         return ret;
2328 }
2329
2330 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
2331 {
2332         u64 val;
2333         if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
2334                 return -1;
2335         }
2336         if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
2337                 dd->ipath_rx_pol_inv = new_pol_inv;
2338                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
2339                 val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
2340                          INFINIPATH_XGXS_RX_POL_SHIFT);
2341                 val |= ((u64)dd->ipath_rx_pol_inv) <<
2342                         INFINIPATH_XGXS_RX_POL_SHIFT;
2343                 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
2344         }
2345         return 0;
2346 }
2347 module_init(infinipath_init);
2348 module_exit(infinipath_cleanup);