Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shaggy...
[linux-2.6] / drivers / infiniband / hw / ipath / ipath_driver.c
1 /*
2  * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <linux/spinlock.h>
35 #include <linux/idr.h>
36 #include <linux/pci.h>
37 #include <linux/delay.h>
38 #include <linux/netdevice.h>
39 #include <linux/vmalloc.h>
40
41 #include "ipath_kernel.h"
42 #include "ipath_verbs.h"
43 #include "ipath_common.h"
44
45 static void ipath_update_pio_bufs(struct ipath_devdata *);
46
47 const char *ipath_get_unit_name(int unit)
48 {
49         static char iname[16];
50         snprintf(iname, sizeof iname, "infinipath%u", unit);
51         return iname;
52 }
53
54 #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
55 #define PFX IPATH_DRV_NAME ": "
56
57 /*
58  * The size has to be longer than this string, so we can append
59  * board/chip information to it in the init code.
60  */
61 const char ib_ipath_version[] = IPATH_IDSTR "\n";
62
63 static struct idr unit_table;
64 DEFINE_SPINLOCK(ipath_devs_lock);
65 LIST_HEAD(ipath_dev_list);
66
67 wait_queue_head_t ipath_state_wait;
68
69 unsigned ipath_debug = __IPATH_INFO;
70
71 module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
72 MODULE_PARM_DESC(debug, "mask for debug prints");
73 EXPORT_SYMBOL_GPL(ipath_debug);
74
75 MODULE_LICENSE("GPL");
76 MODULE_AUTHOR("QLogic <support@pathscale.com>");
77 MODULE_DESCRIPTION("QLogic InfiniPath driver");
78
79 const char *ipath_ibcstatus_str[] = {
80         "Disabled",
81         "LinkUp",
82         "PollActive",
83         "PollQuiet",
84         "SleepDelay",
85         "SleepQuiet",
86         "LState6",              /* unused */
87         "LState7",              /* unused */
88         "CfgDebounce",
89         "CfgRcvfCfg",
90         "CfgWaitRmt",
91         "CfgIdle",
92         "RecovRetrain",
93         "LState0xD",            /* unused */
94         "RecovWaitRmt",
95         "RecovIdle",
96 };
97
98 static void __devexit ipath_remove_one(struct pci_dev *);
99 static int __devinit ipath_init_one(struct pci_dev *,
100                                     const struct pci_device_id *);
101
102 /* Only needed for registration, nothing else needs this info */
103 #define PCI_VENDOR_ID_PATHSCALE 0x1fc1
104 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd
105 #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
106
107 static const struct pci_device_id ipath_pci_tbl[] = {
108         { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
109         { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
110         { 0, }
111 };
112
113 MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
114
115 static struct pci_driver ipath_driver = {
116         .name = IPATH_DRV_NAME,
117         .probe = ipath_init_one,
118         .remove = __devexit_p(ipath_remove_one),
119         .id_table = ipath_pci_tbl,
120 };
121
122
123 static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
124                              u32 *bar0, u32 *bar1)
125 {
126         int ret;
127
128         ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
129         if (ret)
130                 ipath_dev_err(dd, "failed to read bar0 before enable: "
131                               "error %d\n", -ret);
132
133         ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
134         if (ret)
135                 ipath_dev_err(dd, "failed to read bar1 before enable: "
136                               "error %d\n", -ret);
137
138         ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
139 }
140
141 static void ipath_free_devdata(struct pci_dev *pdev,
142                                struct ipath_devdata *dd)
143 {
144         unsigned long flags;
145
146         pci_set_drvdata(pdev, NULL);
147
148         if (dd->ipath_unit != -1) {
149                 spin_lock_irqsave(&ipath_devs_lock, flags);
150                 idr_remove(&unit_table, dd->ipath_unit);
151                 list_del(&dd->ipath_list);
152                 spin_unlock_irqrestore(&ipath_devs_lock, flags);
153         }
154         vfree(dd);
155 }
156
157 static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
158 {
159         unsigned long flags;
160         struct ipath_devdata *dd;
161         int ret;
162
163         if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
164                 dd = ERR_PTR(-ENOMEM);
165                 goto bail;
166         }
167
168         dd = vmalloc(sizeof(*dd));
169         if (!dd) {
170                 dd = ERR_PTR(-ENOMEM);
171                 goto bail;
172         }
173         memset(dd, 0, sizeof(*dd));
174         dd->ipath_unit = -1;
175
176         spin_lock_irqsave(&ipath_devs_lock, flags);
177
178         ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
179         if (ret < 0) {
180                 printk(KERN_ERR IPATH_DRV_NAME
181                        ": Could not allocate unit ID: error %d\n", -ret);
182                 ipath_free_devdata(pdev, dd);
183                 dd = ERR_PTR(ret);
184                 goto bail_unlock;
185         }
186
187         dd->pcidev = pdev;
188         pci_set_drvdata(pdev, dd);
189
190         list_add(&dd->ipath_list, &ipath_dev_list);
191
192 bail_unlock:
193         spin_unlock_irqrestore(&ipath_devs_lock, flags);
194
195 bail:
196         return dd;
197 }
198
199 static inline struct ipath_devdata *__ipath_lookup(int unit)
200 {
201         return idr_find(&unit_table, unit);
202 }
203
204 struct ipath_devdata *ipath_lookup(int unit)
205 {
206         struct ipath_devdata *dd;
207         unsigned long flags;
208
209         spin_lock_irqsave(&ipath_devs_lock, flags);
210         dd = __ipath_lookup(unit);
211         spin_unlock_irqrestore(&ipath_devs_lock, flags);
212
213         return dd;
214 }
215
216 int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp)
217 {
218         int nunits, npresent, nup;
219         struct ipath_devdata *dd;
220         unsigned long flags;
221         u32 maxports;
222
223         nunits = npresent = nup = maxports = 0;
224
225         spin_lock_irqsave(&ipath_devs_lock, flags);
226
227         list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
228                 nunits++;
229                 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
230                         npresent++;
231                 if (dd->ipath_lid &&
232                     !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
233                                          | IPATH_LINKUNK)))
234                         nup++;
235                 if (dd->ipath_cfgports > maxports)
236                         maxports = dd->ipath_cfgports;
237         }
238
239         spin_unlock_irqrestore(&ipath_devs_lock, flags);
240
241         if (npresentp)
242                 *npresentp = npresent;
243         if (nupp)
244                 *nupp = nup;
245         if (maxportsp)
246                 *maxportsp = maxports;
247
248         return nunits;
249 }
250
251 /*
252  * These next two routines are placeholders in case we don't have per-arch
253  * code for controlling write combining.  If explicit control of write
254  * combining is not available, performance will probably be awful.
255  */
256
257 int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
258 {
259         return -EOPNOTSUPP;
260 }
261
262 void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
263 {
264 }
265
266 static int __devinit ipath_init_one(struct pci_dev *pdev,
267                                     const struct pci_device_id *ent)
268 {
269         int ret, len, j;
270         struct ipath_devdata *dd;
271         unsigned long long addr;
272         u32 bar0 = 0, bar1 = 0;
273         u8 rev;
274
275         dd = ipath_alloc_devdata(pdev);
276         if (IS_ERR(dd)) {
277                 ret = PTR_ERR(dd);
278                 printk(KERN_ERR IPATH_DRV_NAME
279                        ": Could not allocate devdata: error %d\n", -ret);
280                 goto bail;
281         }
282
283         ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
284
285         read_bars(dd, pdev, &bar0, &bar1);
286
287         ret = pci_enable_device(pdev);
288         if (ret) {
289                 /* This can happen iff:
290                  *
291                  * We did a chip reset, and then failed to reprogram the
292                  * BAR, or the chip reset due to an internal error.  We then
293                  * unloaded the driver and reloaded it.
294                  *
295                  * Both reset cases set the BAR back to initial state.  For
296                  * the latter case, the AER sticky error bit at offset 0x718
297                  * should be set, but the Linux kernel doesn't yet know
298                  * about that, it appears.  If the original BAR was retained
299                  * in the kernel data structures, this may be OK.
300                  */
301                 ipath_dev_err(dd, "enable unit %d failed: error %d\n",
302                               dd->ipath_unit, -ret);
303                 goto bail_devdata;
304         }
305         addr = pci_resource_start(pdev, 0);
306         len = pci_resource_len(pdev, 0);
307         ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %x, vend %x/%x "
308                    "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
309                    ent->device, ent->driver_data);
310
311         read_bars(dd, pdev, &bar0, &bar1);
312
313         if (!bar1 && !(bar0 & ~0xf)) {
314                 if (addr) {
315                         dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
316                                  "rewriting as %llx\n", addr);
317                         ret = pci_write_config_dword(
318                                 pdev, PCI_BASE_ADDRESS_0, addr);
319                         if (ret) {
320                                 ipath_dev_err(dd, "rewrite of BAR0 "
321                                               "failed: err %d\n", -ret);
322                                 goto bail_disable;
323                         }
324                         ret = pci_write_config_dword(
325                                 pdev, PCI_BASE_ADDRESS_1, addr >> 32);
326                         if (ret) {
327                                 ipath_dev_err(dd, "rewrite of BAR1 "
328                                               "failed: err %d\n", -ret);
329                                 goto bail_disable;
330                         }
331                 } else {
332                         ipath_dev_err(dd, "BAR is 0 (probable RESET), "
333                                       "not usable until reboot\n");
334                         ret = -ENODEV;
335                         goto bail_disable;
336                 }
337         }
338
339         ret = pci_request_regions(pdev, IPATH_DRV_NAME);
340         if (ret) {
341                 dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
342                          "err %d\n", dd->ipath_unit, -ret);
343                 goto bail_disable;
344         }
345
346         ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
347         if (ret) {
348                 /*
349                  * if the 64 bit setup fails, try 32 bit.  Some systems
350                  * do not setup 64 bit maps on systems with 2GB or less
351                  * memory installed.
352                  */
353                 ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
354                 if (ret) {
355                         dev_info(&pdev->dev,
356                                 "Unable to set DMA mask for unit %u: %d\n",
357                                 dd->ipath_unit, ret);
358                         goto bail_regions;
359                 }
360                 else {
361                         ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
362                         ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
363                         if (ret)
364                                 dev_info(&pdev->dev,
365                                         "Unable to set DMA consistent mask "
366                                         "for unit %u: %d\n",
367                                         dd->ipath_unit, ret);
368
369                 }
370         }
371         else {
372                 ret = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
373                 if (ret)
374                         dev_info(&pdev->dev,
375                                 "Unable to set DMA consistent mask "
376                                 "for unit %u: %d\n",
377                                 dd->ipath_unit, ret);
378         }
379
380         pci_set_master(pdev);
381
382         /*
383          * Save BARs to rewrite after device reset.  Save all 64 bits of
384          * BAR, just in case.
385          */
386         dd->ipath_pcibar0 = addr;
387         dd->ipath_pcibar1 = addr >> 32;
388         dd->ipath_deviceid = ent->device;       /* save for later use */
389         dd->ipath_vendorid = ent->vendor;
390
391         /* setup the chip-specific functions, as early as possible. */
392         switch (ent->device) {
393         case PCI_DEVICE_ID_INFINIPATH_HT:
394                 ipath_init_iba6110_funcs(dd);
395                 break;
396         case PCI_DEVICE_ID_INFINIPATH_PE800:
397                 ipath_init_iba6120_funcs(dd);
398                 break;
399         default:
400                 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
401                               "failing\n", ent->device);
402                 return -ENODEV;
403         }
404
405         for (j = 0; j < 6; j++) {
406                 if (!pdev->resource[j].start)
407                         continue;
408                 ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
409                            j, (unsigned long long)pdev->resource[j].start,
410                            (unsigned long long)pdev->resource[j].end,
411                            (unsigned long long)pci_resource_len(pdev, j));
412         }
413
414         if (!addr) {
415                 ipath_dev_err(dd, "No valid address in BAR 0!\n");
416                 ret = -ENODEV;
417                 goto bail_regions;
418         }
419
420         dd->ipath_deviceid = ent->device;       /* save for later use */
421         dd->ipath_vendorid = ent->vendor;
422
423         ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
424         if (ret) {
425                 ipath_dev_err(dd, "Failed to read PCI revision ID unit "
426                               "%u: err %d\n", dd->ipath_unit, -ret);
427                 goto bail_regions;      /* shouldn't ever happen */
428         }
429         dd->ipath_pcirev = rev;
430
431 #if defined(__powerpc__)
432         /* There isn't a generic way to specify writethrough mappings */
433         dd->ipath_kregbase = __ioremap(addr, len,
434                 (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
435 #else
436         dd->ipath_kregbase = ioremap_nocache(addr, len);
437 #endif
438
439         if (!dd->ipath_kregbase) {
440                 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
441                           addr);
442                 ret = -ENOMEM;
443                 goto bail_iounmap;
444         }
445         dd->ipath_kregend = (u64 __iomem *)
446                 ((void __iomem *)dd->ipath_kregbase + len);
447         dd->ipath_physaddr = addr;      /* used for io_remap, etc. */
448         /* for user mmap */
449         ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
450                    addr, dd->ipath_kregbase);
451
452         /*
453          * clear ipath_flags here instead of in ipath_init_chip as it is set
454          * by ipath_setup_htconfig.
455          */
456         dd->ipath_flags = 0;
457         dd->ipath_lli_counter = 0;
458         dd->ipath_lli_errors = 0;
459
460         if (dd->ipath_f_bus(dd, pdev))
461                 ipath_dev_err(dd, "Failed to setup config space; "
462                               "continuing anyway\n");
463
464         /*
465          * set up our interrupt handler; IRQF_SHARED probably not needed,
466          * since MSI interrupts shouldn't be shared but won't  hurt for now.
467          * check 0 irq after we return from chip-specific bus setup, since
468          * that can affect this due to setup
469          */
470         if (!pdev->irq)
471                 ipath_dev_err(dd, "irq is 0, BIOS error?  Interrupts won't "
472                               "work\n");
473         else {
474                 ret = request_irq(pdev->irq, ipath_intr, IRQF_SHARED,
475                                   IPATH_DRV_NAME, dd);
476                 if (ret) {
477                         ipath_dev_err(dd, "Couldn't setup irq handler, "
478                                       "irq=%u: %d\n", pdev->irq, ret);
479                         goto bail_iounmap;
480                 }
481         }
482
483         ret = ipath_init_chip(dd, 0);   /* do the chip-specific init */
484         if (ret)
485                 goto bail_iounmap;
486
487         ret = ipath_enable_wc(dd);
488
489         if (ret) {
490                 ipath_dev_err(dd, "Write combining not enabled "
491                               "(err %d): performance may be poor\n",
492                               -ret);
493                 ret = 0;
494         }
495
496         ipath_device_create_group(&pdev->dev, dd);
497         ipathfs_add_device(dd);
498         ipath_user_add(dd);
499         ipath_diag_add(dd);
500         ipath_register_ib_device(dd);
501
502         goto bail;
503
504 bail_iounmap:
505         iounmap((volatile void __iomem *) dd->ipath_kregbase);
506
507 bail_regions:
508         pci_release_regions(pdev);
509
510 bail_disable:
511         pci_disable_device(pdev);
512
513 bail_devdata:
514         ipath_free_devdata(pdev, dd);
515
516 bail:
517         return ret;
518 }
519
520 static void __devexit cleanup_device(struct ipath_devdata *dd)
521 {
522         int port;
523
524         ipath_shutdown_device(dd);
525
526         if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
527                 /* can't do anything more with chip; needs re-init */
528                 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
529                 if (dd->ipath_kregbase) {
530                         /*
531                          * if we haven't already cleaned up before these are
532                          * to ensure any register reads/writes "fail" until
533                          * re-init
534                          */
535                         dd->ipath_kregbase = NULL;
536                         dd->ipath_uregbase = 0;
537                         dd->ipath_sregbase = 0;
538                         dd->ipath_cregbase = 0;
539                         dd->ipath_kregsize = 0;
540                 }
541                 ipath_disable_wc(dd);
542         }
543
544         if (dd->ipath_pioavailregs_dma) {
545                 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
546                                   (void *) dd->ipath_pioavailregs_dma,
547                                   dd->ipath_pioavailregs_phys);
548                 dd->ipath_pioavailregs_dma = NULL;
549         }
550         if (dd->ipath_dummy_hdrq) {
551                 dma_free_coherent(&dd->pcidev->dev,
552                         dd->ipath_pd[0]->port_rcvhdrq_size,
553                         dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
554                 dd->ipath_dummy_hdrq = NULL;
555         }
556
557         if (dd->ipath_pageshadow) {
558                 struct page **tmpp = dd->ipath_pageshadow;
559                 dma_addr_t *tmpd = dd->ipath_physshadow;
560                 int i, cnt = 0;
561
562                 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
563                            "locked\n");
564                 for (port = 0; port < dd->ipath_cfgports; port++) {
565                         int port_tidbase = port * dd->ipath_rcvtidcnt;
566                         int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
567                         for (i = port_tidbase; i < maxtid; i++) {
568                                 if (!tmpp[i])
569                                         continue;
570                                 pci_unmap_page(dd->pcidev, tmpd[i],
571                                         PAGE_SIZE, PCI_DMA_FROMDEVICE);
572                                 ipath_release_user_pages(&tmpp[i], 1);
573                                 tmpp[i] = NULL;
574                                 cnt++;
575                         }
576                 }
577                 if (cnt) {
578                         ipath_stats.sps_pageunlocks += cnt;
579                         ipath_cdbg(VERBOSE, "There were still %u expTID "
580                                    "entries locked\n", cnt);
581                 }
582                 if (ipath_stats.sps_pagelocks ||
583                     ipath_stats.sps_pageunlocks)
584                         ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
585                                    "unlocked via ipath_m{un}lock\n",
586                                    (unsigned long long)
587                                    ipath_stats.sps_pagelocks,
588                                    (unsigned long long)
589                                    ipath_stats.sps_pageunlocks);
590
591                 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
592                            dd->ipath_pageshadow);
593                 vfree(dd->ipath_pageshadow);
594                 dd->ipath_pageshadow = NULL;
595         }
596
597         /*
598          * free any resources still in use (usually just kernel ports)
599          * at unload; we do for portcnt, not cfgports, because cfgports
600          * could have changed while we were loaded.
601          */
602         for (port = 0; port < dd->ipath_portcnt; port++) {
603                 struct ipath_portdata *pd = dd->ipath_pd[port];
604                 dd->ipath_pd[port] = NULL;
605                 ipath_free_pddata(dd, pd);
606         }
607         kfree(dd->ipath_pd);
608         /*
609          * debuggability, in case some cleanup path tries to use it
610          * after this
611          */
612         dd->ipath_pd = NULL;
613 }
614
615 static void __devexit ipath_remove_one(struct pci_dev *pdev)
616 {
617         struct ipath_devdata *dd = pci_get_drvdata(pdev);
618
619         ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
620
621         if (dd->verbs_dev)
622                 ipath_unregister_ib_device(dd->verbs_dev);
623
624         ipath_diag_remove(dd);
625         ipath_user_remove(dd);
626         ipathfs_remove_device(dd);
627         ipath_device_remove_group(&pdev->dev, dd);
628
629         ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
630                    "unit %u\n", dd, (u32) dd->ipath_unit);
631
632         cleanup_device(dd);
633
634         /*
635          * turn off rcv, send, and interrupts for all ports, all drivers
636          * should also hard reset the chip here?
637          * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
638          * for all versions of the driver, if they were allocated
639          */
640         if (pdev->irq) {
641                 ipath_cdbg(VERBOSE,
642                            "unit %u free_irq of irq %x\n",
643                            dd->ipath_unit, pdev->irq);
644                 free_irq(pdev->irq, dd);
645         } else
646                 ipath_dbg("irq is 0, not doing free_irq "
647                           "for unit %u\n", dd->ipath_unit);
648         /*
649          * we check for NULL here, because it's outside
650          * the kregbase check, and we need to call it
651          * after the free_irq.  Thus it's possible that
652          * the function pointers were never initialized.
653          */
654         if (dd->ipath_f_cleanup)
655                 /* clean up chip-specific stuff */
656                 dd->ipath_f_cleanup(dd);
657
658         ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
659         iounmap((volatile void __iomem *) dd->ipath_kregbase);
660         pci_release_regions(pdev);
661         ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
662         pci_disable_device(pdev);
663
664         ipath_free_devdata(pdev, dd);
665 }
666
667 /* general driver use */
668 DEFINE_MUTEX(ipath_mutex);
669
670 static DEFINE_SPINLOCK(ipath_pioavail_lock);
671
672 /**
673  * ipath_disarm_piobufs - cancel a range of PIO buffers
674  * @dd: the infinipath device
675  * @first: the first PIO buffer to cancel
676  * @cnt: the number of PIO buffers to cancel
677  *
678  * cancel a range of PIO buffers, used when they might be armed, but
679  * not triggered.  Used at init to ensure buffer state, and also user
680  * process close, in case it died while writing to a PIO buffer
681  * Also after errors.
682  */
683 void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
684                           unsigned cnt)
685 {
686         unsigned i, last = first + cnt;
687         u64 sendctrl, sendorig;
688
689         ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
690         sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM;
691         for (i = first; i < last; i++) {
692                 sendctrl = sendorig |
693                         (i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
694                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
695                                  sendctrl);
696         }
697
698         /*
699          * Write it again with current value, in case ipath_sendctrl changed
700          * while we were looping; no critical bits that would require
701          * locking.
702          *
703          * Write a 0, and then the original value, reading scratch in
704          * between.  This seems to avoid a chip timing race that causes
705          * pioavail updates to memory to stop.
706          */
707         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
708                          0);
709         sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
710         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
711                          dd->ipath_sendctrl);
712 }
713
714 /**
715  * ipath_wait_linkstate - wait for an IB link state change to occur
716  * @dd: the infinipath device
717  * @state: the state to wait for
718  * @msecs: the number of milliseconds to wait
719  *
720  * wait up to msecs milliseconds for IB link state change to occur for
721  * now, take the easy polling route.  Currently used only by
722  * ipath_set_linkstate.  Returns 0 if state reached, otherwise
723  * -ETIMEDOUT state can have multiple states set, for any of several
724  * transitions.
725  */
726 static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
727                                 int msecs)
728 {
729         dd->ipath_state_wanted = state;
730         wait_event_interruptible_timeout(ipath_state_wait,
731                                          (dd->ipath_flags & state),
732                                          msecs_to_jiffies(msecs));
733         dd->ipath_state_wanted = 0;
734
735         if (!(dd->ipath_flags & state)) {
736                 u64 val;
737                 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
738                            " ms\n",
739                            /* test INIT ahead of DOWN, both can be set */
740                            (state & IPATH_LINKINIT) ? "INIT" :
741                            ((state & IPATH_LINKDOWN) ? "DOWN" :
742                             ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
743                            msecs);
744                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
745                 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
746                            (unsigned long long) ipath_read_kreg64(
747                                    dd, dd->ipath_kregs->kr_ibcctrl),
748                            (unsigned long long) val,
749                            ipath_ibcstatus_str[val & 0xf]);
750         }
751         return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
752 }
753
754 void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
755 {
756         *buf = '\0';
757         if (err & INFINIPATH_E_RHDRLEN)
758                 strlcat(buf, "rhdrlen ", blen);
759         if (err & INFINIPATH_E_RBADTID)
760                 strlcat(buf, "rbadtid ", blen);
761         if (err & INFINIPATH_E_RBADVERSION)
762                 strlcat(buf, "rbadversion ", blen);
763         if (err & INFINIPATH_E_RHDR)
764                 strlcat(buf, "rhdr ", blen);
765         if (err & INFINIPATH_E_RLONGPKTLEN)
766                 strlcat(buf, "rlongpktlen ", blen);
767         if (err & INFINIPATH_E_RSHORTPKTLEN)
768                 strlcat(buf, "rshortpktlen ", blen);
769         if (err & INFINIPATH_E_RMAXPKTLEN)
770                 strlcat(buf, "rmaxpktlen ", blen);
771         if (err & INFINIPATH_E_RMINPKTLEN)
772                 strlcat(buf, "rminpktlen ", blen);
773         if (err & INFINIPATH_E_RFORMATERR)
774                 strlcat(buf, "rformaterr ", blen);
775         if (err & INFINIPATH_E_RUNSUPVL)
776                 strlcat(buf, "runsupvl ", blen);
777         if (err & INFINIPATH_E_RUNEXPCHAR)
778                 strlcat(buf, "runexpchar ", blen);
779         if (err & INFINIPATH_E_RIBFLOW)
780                 strlcat(buf, "ribflow ", blen);
781         if (err & INFINIPATH_E_REBP)
782                 strlcat(buf, "EBP ", blen);
783         if (err & INFINIPATH_E_SUNDERRUN)
784                 strlcat(buf, "sunderrun ", blen);
785         if (err & INFINIPATH_E_SPIOARMLAUNCH)
786                 strlcat(buf, "spioarmlaunch ", blen);
787         if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
788                 strlcat(buf, "sunexperrpktnum ", blen);
789         if (err & INFINIPATH_E_SDROPPEDDATAPKT)
790                 strlcat(buf, "sdroppeddatapkt ", blen);
791         if (err & INFINIPATH_E_SDROPPEDSMPPKT)
792                 strlcat(buf, "sdroppedsmppkt ", blen);
793         if (err & INFINIPATH_E_SMAXPKTLEN)
794                 strlcat(buf, "smaxpktlen ", blen);
795         if (err & INFINIPATH_E_SMINPKTLEN)
796                 strlcat(buf, "sminpktlen ", blen);
797         if (err & INFINIPATH_E_SUNSUPVL)
798                 strlcat(buf, "sunsupVL ", blen);
799         if (err & INFINIPATH_E_SPKTLEN)
800                 strlcat(buf, "spktlen ", blen);
801         if (err & INFINIPATH_E_INVALIDADDR)
802                 strlcat(buf, "invalidaddr ", blen);
803         if (err & INFINIPATH_E_RICRC)
804                 strlcat(buf, "CRC ", blen);
805         if (err & INFINIPATH_E_RVCRC)
806                 strlcat(buf, "VCRC ", blen);
807         if (err & INFINIPATH_E_RRCVEGRFULL)
808                 strlcat(buf, "rcvegrfull ", blen);
809         if (err & INFINIPATH_E_RRCVHDRFULL)
810                 strlcat(buf, "rcvhdrfull ", blen);
811         if (err & INFINIPATH_E_IBSTATUSCHANGED)
812                 strlcat(buf, "ibcstatuschg ", blen);
813         if (err & INFINIPATH_E_RIBLOSTLINK)
814                 strlcat(buf, "riblostlink ", blen);
815         if (err & INFINIPATH_E_HARDWARE)
816                 strlcat(buf, "hardware ", blen);
817         if (err & INFINIPATH_E_RESET)
818                 strlcat(buf, "reset ", blen);
819 }
820
821 /**
822  * get_rhf_errstring - decode RHF errors
823  * @err: the err number
824  * @msg: the output buffer
825  * @len: the length of the output buffer
826  *
827  * only used one place now, may want more later
828  */
829 static void get_rhf_errstring(u32 err, char *msg, size_t len)
830 {
831         /* if no errors, and so don't need to check what's first */
832         *msg = '\0';
833
834         if (err & INFINIPATH_RHF_H_ICRCERR)
835                 strlcat(msg, "icrcerr ", len);
836         if (err & INFINIPATH_RHF_H_VCRCERR)
837                 strlcat(msg, "vcrcerr ", len);
838         if (err & INFINIPATH_RHF_H_PARITYERR)
839                 strlcat(msg, "parityerr ", len);
840         if (err & INFINIPATH_RHF_H_LENERR)
841                 strlcat(msg, "lenerr ", len);
842         if (err & INFINIPATH_RHF_H_MTUERR)
843                 strlcat(msg, "mtuerr ", len);
844         if (err & INFINIPATH_RHF_H_IHDRERR)
845                 /* infinipath hdr checksum error */
846                 strlcat(msg, "ipathhdrerr ", len);
847         if (err & INFINIPATH_RHF_H_TIDERR)
848                 strlcat(msg, "tiderr ", len);
849         if (err & INFINIPATH_RHF_H_MKERR)
850                 /* bad port, offset, etc. */
851                 strlcat(msg, "invalid ipathhdr ", len);
852         if (err & INFINIPATH_RHF_H_IBERR)
853                 strlcat(msg, "iberr ", len);
854         if (err & INFINIPATH_RHF_L_SWA)
855                 strlcat(msg, "swA ", len);
856         if (err & INFINIPATH_RHF_L_SWB)
857                 strlcat(msg, "swB ", len);
858 }
859
860 /**
861  * ipath_get_egrbuf - get an eager buffer
862  * @dd: the infinipath device
863  * @bufnum: the eager buffer to get
864  * @err: unused
865  *
866  * must only be called if ipath_pd[port] is known to be allocated
867  */
868 static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
869                                      int err)
870 {
871         return dd->ipath_port0_skbinfo ?
872                 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
873 }
874
875 /**
876  * ipath_alloc_skb - allocate an skb and buffer with possible constraints
877  * @dd: the infinipath device
878  * @gfp_mask: the sk_buff SFP mask
879  */
880 struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
881                                 gfp_t gfp_mask)
882 {
883         struct sk_buff *skb;
884         u32 len;
885
886         /*
887          * Only fully supported way to handle this is to allocate lots
888          * extra, align as needed, and then do skb_reserve().  That wastes
889          * a lot of memory...  I'll have to hack this into infinipath_copy
890          * also.
891          */
892
893         /*
894          * We need 2 extra bytes for ipath_ether data sent in the
895          * key header.  In order to keep everything dword aligned,
896          * we'll reserve 4 bytes.
897          */
898         len = dd->ipath_ibmaxlen + 4;
899
900         if (dd->ipath_flags & IPATH_4BYTE_TID) {
901                 /* We need a 2KB multiple alignment, and there is no way
902                  * to do it except to allocate extra and then skb_reserve
903                  * enough to bring it up to the right alignment.
904                  */
905                 len += 2047;
906         }
907
908         skb = __dev_alloc_skb(len, gfp_mask);
909         if (!skb) {
910                 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
911                               len);
912                 goto bail;
913         }
914
915         skb_reserve(skb, 4);
916
917         if (dd->ipath_flags & IPATH_4BYTE_TID) {
918                 u32 una = (unsigned long)skb->data & 2047;
919                 if (una)
920                         skb_reserve(skb, 2048 - una);
921         }
922
923 bail:
924         return skb;
925 }
926
927 static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
928                              u32 eflags,
929                              u32 l,
930                              u32 etail,
931                              u64 *rc)
932 {
933         char emsg[128];
934         struct ipath_message_header *hdr;
935
936         get_rhf_errstring(eflags, emsg, sizeof emsg);
937         hdr = (struct ipath_message_header *)&rc[1];
938         ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
939                    "tlen=%x opcode=%x egridx=%x: %s\n",
940                    eflags, l,
941                    ipath_hdrget_rcv_type((__le32 *) rc),
942                    ipath_hdrget_length_in_bytes((__le32 *) rc),
943                    be32_to_cpu(hdr->bth[0]) >> 24,
944                    etail, emsg);
945
946         /* Count local link integrity errors. */
947         if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
948                 u8 n = (dd->ipath_ibcctrl >>
949                         INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
950                         INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
951
952                 if (++dd->ipath_lli_counter > n) {
953                         dd->ipath_lli_counter = 0;
954                         dd->ipath_lli_errors++;
955                 }
956         }
957 }
958
959 /*
960  * ipath_kreceive - receive a packet
961  * @dd: the infinipath device
962  *
963  * called from interrupt handler for errors or receive interrupt
964  */
965 void ipath_kreceive(struct ipath_devdata *dd)
966 {
967         u64 *rc;
968         void *ebuf;
969         const u32 rsize = dd->ipath_rcvhdrentsize;      /* words */
970         const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
971         u32 etail = -1, l, hdrqtail;
972         struct ipath_message_header *hdr;
973         u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0;
974         static u64 totcalls;    /* stats, may eventually remove */
975
976         if (!dd->ipath_hdrqtailptr) {
977                 ipath_dev_err(dd,
978                               "hdrqtailptr not set, can't do receives\n");
979                 goto bail;
980         }
981
982         /* There is already a thread processing this queue. */
983         if (test_and_set_bit(0, &dd->ipath_rcv_pending))
984                 goto bail;
985
986         l = dd->ipath_port0head;
987         hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr);
988         if (l == hdrqtail)
989                 goto done;
990
991 reloop:
992         for (i = 0; l != hdrqtail; i++) {
993                 u32 qp;
994                 u8 *bthbytes;
995
996                 rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2));
997                 hdr = (struct ipath_message_header *)&rc[1];
998                 /*
999                  * could make a network order version of IPATH_KD_QP, and
1000                  * do the obvious shift before masking to speed this up.
1001                  */
1002                 qp = ntohl(hdr->bth[1]) & 0xffffff;
1003                 bthbytes = (u8 *) hdr->bth;
1004
1005                 eflags = ipath_hdrget_err_flags((__le32 *) rc);
1006                 etype = ipath_hdrget_rcv_type((__le32 *) rc);
1007                 /* total length */
1008                 tlen = ipath_hdrget_length_in_bytes((__le32 *) rc);
1009                 ebuf = NULL;
1010                 if (etype != RCVHQ_RCV_TYPE_EXPECTED) {
1011                         /*
1012                          * it turns out that the chips uses an eager buffer
1013                          * for all non-expected packets, whether it "needs"
1014                          * one or not.  So always get the index, but don't
1015                          * set ebuf (so we try to copy data) unless the
1016                          * length requires it.
1017                          */
1018                         etail = ipath_hdrget_index((__le32 *) rc);
1019                         if (tlen > sizeof(*hdr) ||
1020                             etype == RCVHQ_RCV_TYPE_NON_KD)
1021                                 ebuf = ipath_get_egrbuf(dd, etail, 0);
1022                 }
1023
1024                 /*
1025                  * both tiderr and ipathhdrerr are set for all plain IB
1026                  * packets; only ipathhdrerr should be set.
1027                  */
1028
1029                 if (etype != RCVHQ_RCV_TYPE_NON_KD && etype !=
1030                     RCVHQ_RCV_TYPE_ERROR && ipath_hdrget_ipath_ver(
1031                             hdr->iph.ver_port_tid_offset) !=
1032                     IPS_PROTO_VERSION) {
1033                         ipath_cdbg(PKT, "Bad InfiniPath protocol version "
1034                                    "%x\n", etype);
1035                 }
1036
1037                 if (unlikely(eflags))
1038                         ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
1039                 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
1040                         ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
1041                         if (dd->ipath_lli_counter)
1042                                 dd->ipath_lli_counter--;
1043                         ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
1044                                    "qp=%x), len %x; ignored\n",
1045                                    etype, bthbytes[0], qp, tlen);
1046                 }
1047                 else if (etype == RCVHQ_RCV_TYPE_EAGER)
1048                         ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
1049                                    "qp=%x), len %x; ignored\n",
1050                                    etype, bthbytes[0], qp, tlen);
1051                 else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
1052                         ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
1053                                   be32_to_cpu(hdr->bth[0]) & 0xff);
1054                 else {
1055                         /*
1056                          * error packet, type of error  unknown.
1057                          * Probably type 3, but we don't know, so don't
1058                          * even try to print the opcode, etc.
1059                          */
1060                         ipath_dbg("Error Pkt, but no eflags! egrbuf %x, "
1061                                   "len %x\nhdrq@%lx;hdrq+%x rhf: %llx; "
1062                                   "hdr %llx %llx %llx %llx %llx\n",
1063                                   etail, tlen, (unsigned long) rc, l,
1064                                   (unsigned long long) rc[0],
1065                                   (unsigned long long) rc[1],
1066                                   (unsigned long long) rc[2],
1067                                   (unsigned long long) rc[3],
1068                                   (unsigned long long) rc[4],
1069                                   (unsigned long long) rc[5]);
1070                 }
1071                 l += rsize;
1072                 if (l >= maxcnt)
1073                         l = 0;
1074                 if (etype != RCVHQ_RCV_TYPE_EXPECTED)
1075                     updegr = 1;
1076                 /*
1077                  * update head regs on last packet, and every 16 packets.
1078                  * Reduce bus traffic, while still trying to prevent
1079                  * rcvhdrq overflows, for when the queue is nearly full
1080                  */
1081                 if (l == hdrqtail || (i && !(i&0xf))) {
1082                         u64 lval;
1083                         if (l == hdrqtail)
1084                                 /* request IBA6120 interrupt only on last */
1085                                 lval = dd->ipath_rhdrhead_intr_off | l;
1086                         else
1087                                 lval = l;
1088                         (void)ipath_write_ureg(dd, ur_rcvhdrhead, lval, 0);
1089                         if (updegr) {
1090                                 (void)ipath_write_ureg(dd, ur_rcvegrindexhead,
1091                                                        etail, 0);
1092                                 updegr = 0;
1093                         }
1094                 }
1095         }
1096
1097         if (!dd->ipath_rhdrhead_intr_off && !reloop) {
1098                 /* IBA6110 workaround; we can have a race clearing chip
1099                  * interrupt with another interrupt about to be delivered,
1100                  * and can clear it before it is delivered on the GPIO
1101                  * workaround.  By doing the extra check here for the
1102                  * in-memory tail register updating while we were doing
1103                  * earlier packets, we "almost" guarantee we have covered
1104                  * that case.
1105                  */
1106                 u32 hqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr);
1107                 if (hqtail != hdrqtail) {
1108                         hdrqtail = hqtail;
1109                         reloop = 1; /* loop 1 extra time at most */
1110                         goto reloop;
1111                 }
1112         }
1113
1114         pkttot += i;
1115
1116         dd->ipath_port0head = l;
1117
1118         if (pkttot > ipath_stats.sps_maxpkts_call)
1119                 ipath_stats.sps_maxpkts_call = pkttot;
1120         ipath_stats.sps_port0pkts += pkttot;
1121         ipath_stats.sps_avgpkts_call =
1122                 ipath_stats.sps_port0pkts / ++totcalls;
1123
1124 done:
1125         clear_bit(0, &dd->ipath_rcv_pending);
1126         smp_mb__after_clear_bit();
1127
1128 bail:;
1129 }
1130
1131 /**
1132  * ipath_update_pio_bufs - update shadow copy of the PIO availability map
1133  * @dd: the infinipath device
1134  *
1135  * called whenever our local copy indicates we have run out of send buffers
1136  * NOTE: This can be called from interrupt context by some code
1137  * and from non-interrupt context by ipath_getpiobuf().
1138  */
1139
1140 static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1141 {
1142         unsigned long flags;
1143         int i;
1144         const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
1145
1146         /* If the generation (check) bits have changed, then we update the
1147          * busy bit for the corresponding PIO buffer.  This algorithm will
1148          * modify positions to the value they already have in some cases
1149          * (i.e., no change), but it's faster than changing only the bits
1150          * that have changed.
1151          *
1152          * We would like to do this atomicly, to avoid spinlocks in the
1153          * critical send path, but that's not really possible, given the
1154          * type of changes, and that this routine could be called on
1155          * multiple cpu's simultaneously, so we lock in this routine only,
1156          * to avoid conflicting updates; all we change is the shadow, and
1157          * it's a single 64 bit memory location, so by definition the update
1158          * is atomic in terms of what other cpu's can see in testing the
1159          * bits.  The spin_lock overhead isn't too bad, since it only
1160          * happens when all buffers are in use, so only cpu overhead, not
1161          * latency or bandwidth is affected.
1162          */
1163 #define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL
1164         if (!dd->ipath_pioavailregs_dma) {
1165                 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
1166                 return;
1167         }
1168         if (ipath_debug & __IPATH_VERBDBG) {
1169                 /* only if packet debug and verbose */
1170                 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1171                 unsigned long *shadow = dd->ipath_pioavailshadow;
1172
1173                 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
1174                            "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
1175                            "s3=%lx\n",
1176                            (unsigned long long) le64_to_cpu(dma[0]),
1177                            shadow[0],
1178                            (unsigned long long) le64_to_cpu(dma[1]),
1179                            shadow[1],
1180                            (unsigned long long) le64_to_cpu(dma[2]),
1181                            shadow[2],
1182                            (unsigned long long) le64_to_cpu(dma[3]),
1183                            shadow[3]);
1184                 if (piobregs > 4)
1185                         ipath_cdbg(
1186                                 PKT, "2nd group, dma4=%llx shad4=%lx, "
1187                                 "d5=%llx s5=%lx, d6=%llx s6=%lx, "
1188                                 "d7=%llx s7=%lx\n",
1189                                 (unsigned long long) le64_to_cpu(dma[4]),
1190                                 shadow[4],
1191                                 (unsigned long long) le64_to_cpu(dma[5]),
1192                                 shadow[5],
1193                                 (unsigned long long) le64_to_cpu(dma[6]),
1194                                 shadow[6],
1195                                 (unsigned long long) le64_to_cpu(dma[7]),
1196                                 shadow[7]);
1197         }
1198         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1199         for (i = 0; i < piobregs; i++) {
1200                 u64 pchbusy, pchg, piov, pnew;
1201                 /*
1202                  * Chip Errata: bug 6641; even and odd qwords>3 are swapped
1203                  */
1204                 if (i > 3) {
1205                         if (i & 1)
1206                                 piov = le64_to_cpu(
1207                                         dd->ipath_pioavailregs_dma[i - 1]);
1208                         else
1209                                 piov = le64_to_cpu(
1210                                         dd->ipath_pioavailregs_dma[i + 1]);
1211                 } else
1212                         piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
1213                 pchg = _IPATH_ALL_CHECKBITS &
1214                         ~(dd->ipath_pioavailshadow[i] ^ piov);
1215                 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
1216                 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
1217                         pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
1218                         pnew |= piov & pchbusy;
1219                         dd->ipath_pioavailshadow[i] = pnew;
1220                 }
1221         }
1222         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1223 }
1224
1225 /**
1226  * ipath_setrcvhdrsize - set the receive header size
1227  * @dd: the infinipath device
1228  * @rhdrsize: the receive header size
1229  *
1230  * called from user init code, and also layered driver init
1231  */
1232 int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
1233 {
1234         int ret = 0;
1235
1236         if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
1237                 if (dd->ipath_rcvhdrsize != rhdrsize) {
1238                         dev_info(&dd->pcidev->dev,
1239                                  "Error: can't set protocol header "
1240                                  "size %u, already %u\n",
1241                                  rhdrsize, dd->ipath_rcvhdrsize);
1242                         ret = -EAGAIN;
1243                 } else
1244                         ipath_cdbg(VERBOSE, "Reuse same protocol header "
1245                                    "size %u\n", dd->ipath_rcvhdrsize);
1246         } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
1247                                (sizeof(u64) / sizeof(u32)))) {
1248                 ipath_dbg("Error: can't set protocol header size %u "
1249                           "(> max %u)\n", rhdrsize,
1250                           dd->ipath_rcvhdrentsize -
1251                           (u32) (sizeof(u64) / sizeof(u32)));
1252                 ret = -EOVERFLOW;
1253         } else {
1254                 dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
1255                 dd->ipath_rcvhdrsize = rhdrsize;
1256                 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
1257                                  dd->ipath_rcvhdrsize);
1258                 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
1259                            dd->ipath_rcvhdrsize);
1260         }
1261         return ret;
1262 }
1263
1264 /**
1265  * ipath_getpiobuf - find an available pio buffer
1266  * @dd: the infinipath device
1267  * @pbufnum: the buffer number is placed here
1268  *
1269  * do appropriate marking as busy, etc.
1270  * returns buffer number if one found (>=0), negative number is error.
1271  * Used by ipath_layer_send
1272  */
1273 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
1274 {
1275         int i, j, starti, updated = 0;
1276         unsigned piobcnt, iter;
1277         unsigned long flags;
1278         unsigned long *shadow = dd->ipath_pioavailshadow;
1279         u32 __iomem *buf;
1280
1281         piobcnt = (unsigned)(dd->ipath_piobcnt2k
1282                              + dd->ipath_piobcnt4k);
1283         starti = dd->ipath_lastport_piobuf;
1284         iter = piobcnt - starti;
1285         if (dd->ipath_upd_pio_shadow) {
1286                 /*
1287                  * Minor optimization.  If we had no buffers on last call,
1288                  * start out by doing the update; continue and do scan even
1289                  * if no buffers were updated, to be paranoid
1290                  */
1291                 ipath_update_pio_bufs(dd);
1292                 /* we scanned here, don't do it at end of scan */
1293                 updated = 1;
1294                 i = starti;
1295         } else
1296                 i = dd->ipath_lastpioindex;
1297
1298 rescan:
1299         /*
1300          * while test_and_set_bit() is atomic, we do that and then the
1301          * change_bit(), and the pair is not.  See if this is the cause
1302          * of the remaining armlaunch errors.
1303          */
1304         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1305         for (j = 0; j < iter; j++, i++) {
1306                 if (i >= piobcnt)
1307                         i = starti;
1308                 /*
1309                  * To avoid bus lock overhead, we first find a candidate
1310                  * buffer, then do the test and set, and continue if that
1311                  * fails.
1312                  */
1313                 if (test_bit((2 * i) + 1, shadow) ||
1314                     test_and_set_bit((2 * i) + 1, shadow))
1315                         continue;
1316                 /* flip generation bit */
1317                 change_bit(2 * i, shadow);
1318                 break;
1319         }
1320         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1321
1322         if (j == iter) {
1323                 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1324
1325                 /*
1326                  * first time through; shadow exhausted, but may be real
1327                  * buffers available, so go see; if any updated, rescan
1328                  * (once)
1329                  */
1330                 if (!updated) {
1331                         ipath_update_pio_bufs(dd);
1332                         updated = 1;
1333                         i = starti;
1334                         goto rescan;
1335                 }
1336                 dd->ipath_upd_pio_shadow = 1;
1337                 /*
1338                  * not atomic, but if we lose one once in a while, that's OK
1339                  */
1340                 ipath_stats.sps_nopiobufs++;
1341                 if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1342                         ipath_dbg(
1343                                 "%u pio sends with no bufavail; dmacopy: "
1344                                 "%llx %llx %llx %llx; shadow:  "
1345                                 "%lx %lx %lx %lx\n",
1346                                 dd->ipath_consec_nopiobuf,
1347                                 (unsigned long long) le64_to_cpu(dma[0]),
1348                                 (unsigned long long) le64_to_cpu(dma[1]),
1349                                 (unsigned long long) le64_to_cpu(dma[2]),
1350                                 (unsigned long long) le64_to_cpu(dma[3]),
1351                                 shadow[0], shadow[1], shadow[2],
1352                                 shadow[3]);
1353                         /*
1354                          * 4 buffers per byte, 4 registers above, cover rest
1355                          * below
1356                          */
1357                         if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1358                             (sizeof(shadow[0]) * 4 * 4))
1359                                 ipath_dbg("2nd group: dmacopy: %llx %llx "
1360                                           "%llx %llx; shadow: %lx %lx "
1361                                           "%lx %lx\n",
1362                                           (unsigned long long)
1363                                           le64_to_cpu(dma[4]),
1364                                           (unsigned long long)
1365                                           le64_to_cpu(dma[5]),
1366                                           (unsigned long long)
1367                                           le64_to_cpu(dma[6]),
1368                                           (unsigned long long)
1369                                           le64_to_cpu(dma[7]),
1370                                           shadow[4], shadow[5],
1371                                           shadow[6], shadow[7]);
1372                 }
1373                 buf = NULL;
1374                 goto bail;
1375         }
1376
1377         /*
1378          * set next starting place.  Since it's just an optimization,
1379          * it doesn't matter who wins on this, so no locking
1380          */
1381         dd->ipath_lastpioindex = i + 1;
1382         if (dd->ipath_upd_pio_shadow)
1383                 dd->ipath_upd_pio_shadow = 0;
1384         if (dd->ipath_consec_nopiobuf)
1385                 dd->ipath_consec_nopiobuf = 0;
1386         if (i < dd->ipath_piobcnt2k)
1387                 buf = (u32 __iomem *) (dd->ipath_pio2kbase +
1388                                        i * dd->ipath_palign);
1389         else
1390                 buf = (u32 __iomem *)
1391                         (dd->ipath_pio4kbase +
1392                          (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
1393         ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
1394                    i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf);
1395         if (pbufnum)
1396                 *pbufnum = i;
1397
1398 bail:
1399         return buf;
1400 }
1401
1402 /**
1403  * ipath_create_rcvhdrq - create a receive header queue
1404  * @dd: the infinipath device
1405  * @pd: the port data
1406  *
1407  * this must be contiguous memory (from an i/o perspective), and must be
1408  * DMA'able (which means for some systems, it will go through an IOMMU,
1409  * or be forced into a low address range).
1410  */
1411 int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1412                          struct ipath_portdata *pd)
1413 {
1414         int ret = 0;
1415
1416         if (!pd->port_rcvhdrq) {
1417                 dma_addr_t phys_hdrqtail;
1418                 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
1419                 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1420                                 sizeof(u32), PAGE_SIZE);
1421
1422                 pd->port_rcvhdrq = dma_alloc_coherent(
1423                         &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
1424                         gfp_flags);
1425
1426                 if (!pd->port_rcvhdrq) {
1427                         ipath_dev_err(dd, "attempt to allocate %d bytes "
1428                                       "for port %u rcvhdrq failed\n",
1429                                       amt, pd->port_port);
1430                         ret = -ENOMEM;
1431                         goto bail;
1432                 }
1433                 pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
1434                         &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL);
1435                 if (!pd->port_rcvhdrtail_kvaddr) {
1436                         ipath_dev_err(dd, "attempt to allocate 1 page "
1437                                       "for port %u rcvhdrqtailaddr failed\n",
1438                                       pd->port_port);
1439                         ret = -ENOMEM;
1440                         dma_free_coherent(&dd->pcidev->dev, amt,
1441                                           pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
1442                         pd->port_rcvhdrq = NULL;
1443                         goto bail;
1444                 }
1445                 pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
1446
1447                 pd->port_rcvhdrq_size = amt;
1448
1449                 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
1450                            "for port %u rcvhdr Q\n",
1451                            amt >> PAGE_SHIFT, pd->port_rcvhdrq,
1452                            (unsigned long) pd->port_rcvhdrq_phys,
1453                            (unsigned long) pd->port_rcvhdrq_size,
1454                            pd->port_port);
1455
1456                 ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n",
1457                            pd->port_port,
1458                            (unsigned long long) phys_hdrqtail);
1459         }
1460         else
1461                 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
1462                            "hdrtailaddr@%p %llx physical\n",
1463                            pd->port_port, pd->port_rcvhdrq,
1464                            (unsigned long long) pd->port_rcvhdrq_phys,
1465                            pd->port_rcvhdrtail_kvaddr, (unsigned long long)
1466                            pd->port_rcvhdrqtailaddr_phys);
1467
1468         /* clear for security and sanity on each use */
1469         memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
1470         memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
1471
1472         /*
1473          * tell chip each time we init it, even if we are re-using previous
1474          * memory (we zero the register at process close)
1475          */
1476         ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
1477                               pd->port_port, pd->port_rcvhdrqtailaddr_phys);
1478         ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
1479                               pd->port_port, pd->port_rcvhdrq_phys);
1480
1481         ret = 0;
1482 bail:
1483         return ret;
1484 }
1485
1486 int ipath_waitfor_complete(struct ipath_devdata *dd, ipath_kreg reg_id,
1487                            u64 bits_to_wait_for, u64 * valp)
1488 {
1489         unsigned long timeout;
1490         u64 lastval, val;
1491         int ret;
1492
1493         lastval = ipath_read_kreg64(dd, reg_id);
1494         /* wait a ridiculously long time */
1495         timeout = jiffies + msecs_to_jiffies(5);
1496         do {
1497                 val = ipath_read_kreg64(dd, reg_id);
1498                 /* set so they have something, even on failures. */
1499                 *valp = val;
1500                 if ((val & bits_to_wait_for) == bits_to_wait_for) {
1501                         ret = 0;
1502                         break;
1503                 }
1504                 if (val != lastval)
1505                         ipath_cdbg(VERBOSE, "Changed from %llx to %llx, "
1506                                    "waiting for %llx bits\n",
1507                                    (unsigned long long) lastval,
1508                                    (unsigned long long) val,
1509                                    (unsigned long long) bits_to_wait_for);
1510                 cond_resched();
1511                 if (time_after(jiffies, timeout)) {
1512                         ipath_dbg("Didn't get bits %llx in register 0x%x, "
1513                                   "got %llx\n",
1514                                   (unsigned long long) bits_to_wait_for,
1515                                   reg_id, (unsigned long long) *valp);
1516                         ret = -ENODEV;
1517                         break;
1518                 }
1519         } while (1);
1520
1521         return ret;
1522 }
1523
1524 /**
1525  * ipath_waitfor_mdio_cmdready - wait for last command to complete
1526  * @dd: the infinipath device
1527  *
1528  * Like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go
1529  * away indicating the last command has completed.  It doesn't return data
1530  */
1531 int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
1532 {
1533         unsigned long timeout;
1534         u64 val;
1535         int ret;
1536
1537         /* wait a ridiculously long time */
1538         timeout = jiffies + msecs_to_jiffies(5);
1539         do {
1540                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_mdio);
1541                 if (!(val & IPATH_MDIO_CMDVALID)) {
1542                         ret = 0;
1543                         break;
1544                 }
1545                 cond_resched();
1546                 if (time_after(jiffies, timeout)) {
1547                         ipath_dbg("CMDVALID stuck in mdio reg? (%llx)\n",
1548                                   (unsigned long long) val);
1549                         ret = -ENODEV;
1550                         break;
1551                 }
1552         } while (1);
1553
1554         return ret;
1555 }
1556
1557 static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1558 {
1559         static const char *what[4] = {
1560                 [0] = "DOWN",
1561                 [INFINIPATH_IBCC_LINKCMD_INIT] = "INIT",
1562                 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
1563                 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
1564         };
1565         int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
1566                         INFINIPATH_IBCC_LINKCMD_MASK;
1567
1568         ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
1569                    "is %s\n", dd->ipath_unit,
1570                    what[linkcmd],
1571                    ipath_ibcstatus_str[
1572                            (ipath_read_kreg64
1573                             (dd, dd->ipath_kregs->kr_ibcstatus) >>
1574                             INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1575                            INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
1576         /* flush all queued sends when going to DOWN or INIT, to be sure that
1577          * they don't block MAD packets */
1578         if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
1579                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1580                                  INFINIPATH_S_ABORT);
1581                 ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
1582                                     (unsigned)(dd->ipath_piobcnt2k +
1583                                     dd->ipath_piobcnt4k) -
1584                                     dd->ipath_lastport_piobuf);
1585         }
1586
1587         ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1588                          dd->ipath_ibcctrl | which);
1589 }
1590
1591 int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
1592 {
1593         u32 lstate;
1594         int ret;
1595
1596         switch (newstate) {
1597         case IPATH_IB_LINKDOWN:
1598                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
1599                                     INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1600                 /* don't wait */
1601                 ret = 0;
1602                 goto bail;
1603
1604         case IPATH_IB_LINKDOWN_SLEEP:
1605                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
1606                                     INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1607                 /* don't wait */
1608                 ret = 0;
1609                 goto bail;
1610
1611         case IPATH_IB_LINKDOWN_DISABLE:
1612                 ipath_set_ib_lstate(dd,
1613                                     INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
1614                                     INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1615                 /* don't wait */
1616                 ret = 0;
1617                 goto bail;
1618
1619         case IPATH_IB_LINKINIT:
1620                 if (dd->ipath_flags & IPATH_LINKINIT) {
1621                         ret = 0;
1622                         goto bail;
1623                 }
1624                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
1625                                     INFINIPATH_IBCC_LINKCMD_SHIFT);
1626                 lstate = IPATH_LINKINIT;
1627                 break;
1628
1629         case IPATH_IB_LINKARM:
1630                 if (dd->ipath_flags & IPATH_LINKARMED) {
1631                         ret = 0;
1632                         goto bail;
1633                 }
1634                 if (!(dd->ipath_flags &
1635                       (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
1636                         ret = -EINVAL;
1637                         goto bail;
1638                 }
1639                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
1640                                     INFINIPATH_IBCC_LINKCMD_SHIFT);
1641                 /*
1642                  * Since the port can transition to ACTIVE by receiving
1643                  * a non VL 15 packet, wait for either state.
1644                  */
1645                 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
1646                 break;
1647
1648         case IPATH_IB_LINKACTIVE:
1649                 if (dd->ipath_flags & IPATH_LINKACTIVE) {
1650                         ret = 0;
1651                         goto bail;
1652                 }
1653                 if (!(dd->ipath_flags & IPATH_LINKARMED)) {
1654                         ret = -EINVAL;
1655                         goto bail;
1656                 }
1657                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
1658                                     INFINIPATH_IBCC_LINKCMD_SHIFT);
1659                 lstate = IPATH_LINKACTIVE;
1660                 break;
1661
1662         default:
1663                 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
1664                 ret = -EINVAL;
1665                 goto bail;
1666         }
1667         ret = ipath_wait_linkstate(dd, lstate, 2000);
1668
1669 bail:
1670         return ret;
1671 }
1672
1673 /**
1674  * ipath_set_mtu - set the MTU
1675  * @dd: the infinipath device
1676  * @arg: the new MTU
1677  *
1678  * we can handle "any" incoming size, the issue here is whether we
1679  * need to restrict our outgoing size.   For now, we don't do any
1680  * sanity checking on this, and we don't deal with what happens to
1681  * programs that are already running when the size changes.
1682  * NOTE: changing the MTU will usually cause the IBC to go back to
1683  * link initialize (IPATH_IBSTATE_INIT) state...
1684  */
1685 int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
1686 {
1687         u32 piosize;
1688         int changed = 0;
1689         int ret;
1690
1691         /*
1692          * mtu is IB data payload max.  It's the largest power of 2 less
1693          * than piosize (or even larger, since it only really controls the
1694          * largest we can receive; we can send the max of the mtu and
1695          * piosize).  We check that it's one of the valid IB sizes.
1696          */
1697         if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
1698             arg != 4096) {
1699                 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
1700                 ret = -EINVAL;
1701                 goto bail;
1702         }
1703         if (dd->ipath_ibmtu == arg) {
1704                 ret = 0;        /* same as current */
1705                 goto bail;
1706         }
1707
1708         piosize = dd->ipath_ibmaxlen;
1709         dd->ipath_ibmtu = arg;
1710
1711         if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
1712                 /* Only if it's not the initial value (or reset to it) */
1713                 if (piosize != dd->ipath_init_ibmaxlen) {
1714                         dd->ipath_ibmaxlen = piosize;
1715                         changed = 1;
1716                 }
1717         } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
1718                 piosize = arg + IPATH_PIO_MAXIBHDR;
1719                 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
1720                            "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
1721                            arg);
1722                 dd->ipath_ibmaxlen = piosize;
1723                 changed = 1;
1724         }
1725
1726         if (changed) {
1727                 /*
1728                  * set the IBC maxpktlength to the size of our pio
1729                  * buffers in words
1730                  */
1731                 u64 ibc = dd->ipath_ibcctrl;
1732                 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
1733                          INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
1734
1735                 piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
1736                 dd->ipath_ibmaxlen = piosize;
1737                 piosize /= sizeof(u32); /* in words */
1738                 /*
1739                  * for ICRC, which we only send in diag test pkt mode, and
1740                  * we don't need to worry about that for mtu
1741                  */
1742                 piosize += 1;
1743
1744                 ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
1745                 dd->ipath_ibcctrl = ibc;
1746                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1747                                  dd->ipath_ibcctrl);
1748                 dd->ipath_f_tidtemplate(dd);
1749         }
1750
1751         ret = 0;
1752
1753 bail:
1754         return ret;
1755 }
1756
1757 int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
1758 {
1759         dd->ipath_lid = arg;
1760         dd->ipath_lmc = lmc;
1761
1762         return 0;
1763 }
1764
1765 /**
1766  * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
1767  * @dd: the infinipath device
1768  * @regno: the register number to read
1769  * @port: the port containing the register
1770  *
1771  * Registers that vary with the chip implementation constants (port)
1772  * use this routine.
1773  */
1774 u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
1775                            unsigned port)
1776 {
1777         u16 where;
1778
1779         if (port < dd->ipath_portcnt &&
1780             (regno == dd->ipath_kregs->kr_rcvhdraddr ||
1781              regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
1782                 where = regno + port;
1783         else
1784                 where = -1;
1785
1786         return ipath_read_kreg64(dd, where);
1787 }
1788
1789 /**
1790  * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
1791  * @dd: the infinipath device
1792  * @regno: the register number to write
1793  * @port: the port containing the register
1794  * @value: the value to write
1795  *
1796  * Registers that vary with the chip implementation constants (port)
1797  * use this routine.
1798  */
1799 void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
1800                           unsigned port, u64 value)
1801 {
1802         u16 where;
1803
1804         if (port < dd->ipath_portcnt &&
1805             (regno == dd->ipath_kregs->kr_rcvhdraddr ||
1806              regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
1807                 where = regno + port;
1808         else
1809                 where = -1;
1810
1811         ipath_write_kreg(dd, where, value);
1812 }
1813
1814 /**
1815  * ipath_shutdown_device - shut down a device
1816  * @dd: the infinipath device
1817  *
1818  * This is called to make the device quiet when we are about to
1819  * unload the driver, and also when the device is administratively
1820  * disabled.   It does not free any data structures.
1821  * Everything it does has to be setup again by ipath_init_chip(dd,1)
1822  */
1823 void ipath_shutdown_device(struct ipath_devdata *dd)
1824 {
1825         u64 val;
1826
1827         ipath_dbg("Shutting down the device\n");
1828
1829         dd->ipath_flags |= IPATH_LINKUNK;
1830         dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
1831                              IPATH_LINKINIT | IPATH_LINKARMED |
1832                              IPATH_LINKACTIVE);
1833         *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
1834                                 IPATH_STATUS_IB_READY);
1835
1836         /* mask interrupts, but not errors */
1837         ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
1838
1839         dd->ipath_rcvctrl = 0;
1840         ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1841                          dd->ipath_rcvctrl);
1842
1843         /*
1844          * gracefully stop all sends allowing any in progress to trickle out
1845          * first.
1846          */
1847         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL);
1848         /* flush it */
1849         val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1850         /*
1851          * enough for anything that's going to trickle out to have actually
1852          * done so.
1853          */
1854         udelay(5);
1855
1856         /*
1857          * abort any armed or launched PIO buffers that didn't go. (self
1858          * clearing).  Will cause any packet currently being transmitted to
1859          * go out with an EBP, and may also cause a short packet error on
1860          * the receiver.
1861          */
1862         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1863                          INFINIPATH_S_ABORT);
1864
1865         ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
1866                             INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1867
1868         /* disable IBC */
1869         dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
1870         ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
1871                          dd->ipath_control | INFINIPATH_C_FREEZEMODE);
1872
1873         /*
1874          * clear SerdesEnable and turn the leds off; do this here because
1875          * we are unloading, so don't count on interrupts to move along
1876          * Turn the LEDs off explictly for the same reason.
1877          */
1878         dd->ipath_f_quiet_serdes(dd);
1879         dd->ipath_f_setextled(dd, 0, 0);
1880
1881         if (dd->ipath_stats_timer_active) {
1882                 del_timer_sync(&dd->ipath_stats_timer);
1883                 dd->ipath_stats_timer_active = 0;
1884         }
1885
1886         /*
1887          * clear all interrupts and errors, so that the next time the driver
1888          * is loaded or device is enabled, we know that whatever is set
1889          * happened while we were unloaded
1890          */
1891         ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
1892                          ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
1893         ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
1894         ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
1895 }
1896
1897 /**
1898  * ipath_free_pddata - free a port's allocated data
1899  * @dd: the infinipath device
1900  * @pd: the portdata structure
1901  *
1902  * free up any allocated data for a port
1903  * This should not touch anything that would affect a simultaneous
1904  * re-allocation of port data, because it is called after ipath_mutex
1905  * is released (and can be called from reinit as well).
1906  * It should never change any chip state, or global driver state.
1907  * (The only exception to global state is freeing the port0 port0_skbs.)
1908  */
1909 void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
1910 {
1911         if (!pd)
1912                 return;
1913
1914         if (pd->port_rcvhdrq) {
1915                 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
1916                            "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
1917                            (unsigned long) pd->port_rcvhdrq_size);
1918                 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
1919                                   pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
1920                 pd->port_rcvhdrq = NULL;
1921                 if (pd->port_rcvhdrtail_kvaddr) {
1922                         dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
1923                                          pd->port_rcvhdrtail_kvaddr,
1924                                          pd->port_rcvhdrqtailaddr_phys);
1925                         pd->port_rcvhdrtail_kvaddr = NULL;
1926                 }
1927         }
1928         if (pd->port_port && pd->port_rcvegrbuf) {
1929                 unsigned e;
1930
1931                 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
1932                         void *base = pd->port_rcvegrbuf[e];
1933                         size_t size = pd->port_rcvegrbuf_size;
1934
1935                         ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
1936                                    "chunk %u/%u\n", base,
1937                                    (unsigned long) size,
1938                                    e, pd->port_rcvegrbuf_chunks);
1939                         dma_free_coherent(&dd->pcidev->dev, size,
1940                                 base, pd->port_rcvegrbuf_phys[e]);
1941                 }
1942                 kfree(pd->port_rcvegrbuf);
1943                 pd->port_rcvegrbuf = NULL;
1944                 kfree(pd->port_rcvegrbuf_phys);
1945                 pd->port_rcvegrbuf_phys = NULL;
1946                 pd->port_rcvegrbuf_chunks = 0;
1947         } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
1948                 unsigned e;
1949                 struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
1950
1951                 dd->ipath_port0_skbinfo = NULL;
1952                 ipath_cdbg(VERBOSE, "free closed port %d "
1953                            "ipath_port0_skbinfo @ %p\n", pd->port_port,
1954                            skbinfo);
1955                 for (e = 0; e < dd->ipath_rcvegrcnt; e++)
1956                 if (skbinfo[e].skb) {
1957                         pci_unmap_single(dd->pcidev, skbinfo[e].phys,
1958                                          dd->ipath_ibmaxlen,
1959                                          PCI_DMA_FROMDEVICE);
1960                         dev_kfree_skb(skbinfo[e].skb);
1961                 }
1962                 vfree(skbinfo);
1963         }
1964         kfree(pd->port_tid_pg_list);
1965         vfree(pd->subport_uregbase);
1966         vfree(pd->subport_rcvegrbuf);
1967         vfree(pd->subport_rcvhdr_base);
1968         kfree(pd);
1969 }
1970
1971 static int __init infinipath_init(void)
1972 {
1973         int ret;
1974
1975         ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
1976
1977         /*
1978          * These must be called before the driver is registered with
1979          * the PCI subsystem.
1980          */
1981         idr_init(&unit_table);
1982         if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
1983                 ret = -ENOMEM;
1984                 goto bail;
1985         }
1986
1987         ret = pci_register_driver(&ipath_driver);
1988         if (ret < 0) {
1989                 printk(KERN_ERR IPATH_DRV_NAME
1990                        ": Unable to register driver: error %d\n", -ret);
1991                 goto bail_unit;
1992         }
1993
1994         ret = ipath_driver_create_group(&ipath_driver.driver);
1995         if (ret < 0) {
1996                 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create driver "
1997                        "sysfs entries: error %d\n", -ret);
1998                 goto bail_pci;
1999         }
2000
2001         ret = ipath_init_ipathfs();
2002         if (ret < 0) {
2003                 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
2004                        "ipathfs: error %d\n", -ret);
2005                 goto bail_group;
2006         }
2007
2008         goto bail;
2009
2010 bail_group:
2011         ipath_driver_remove_group(&ipath_driver.driver);
2012
2013 bail_pci:
2014         pci_unregister_driver(&ipath_driver);
2015
2016 bail_unit:
2017         idr_destroy(&unit_table);
2018
2019 bail:
2020         return ret;
2021 }
2022
2023 static void __exit infinipath_cleanup(void)
2024 {
2025         ipath_exit_ipathfs();
2026
2027         ipath_driver_remove_group(&ipath_driver.driver);
2028
2029         ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
2030         pci_unregister_driver(&ipath_driver);
2031
2032         idr_destroy(&unit_table);
2033 }
2034
2035 /**
2036  * ipath_reset_device - reset the chip if possible
2037  * @unit: the device to reset
2038  *
2039  * Whether or not reset is successful, we attempt to re-initialize the chip
2040  * (that is, much like a driver unload/reload).  We clear the INITTED flag
2041  * so that the various entry points will fail until we reinitialize.  For
2042  * now, we only allow this if no user ports are open that use chip resources
2043  */
2044 int ipath_reset_device(int unit)
2045 {
2046         int ret, i;
2047         struct ipath_devdata *dd = ipath_lookup(unit);
2048
2049         if (!dd) {
2050                 ret = -ENODEV;
2051                 goto bail;
2052         }
2053
2054         dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
2055
2056         if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
2057                 dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
2058                          "not initialized or not present\n", unit);
2059                 ret = -ENXIO;
2060                 goto bail;
2061         }
2062
2063         if (dd->ipath_pd)
2064                 for (i = 1; i < dd->ipath_cfgports; i++) {
2065                         if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
2066                                 ipath_dbg("unit %u port %d is in use "
2067                                           "(PID %u cmd %s), can't reset\n",
2068                                           unit, i,
2069                                           dd->ipath_pd[i]->port_pid,
2070                                           dd->ipath_pd[i]->port_comm);
2071                                 ret = -EBUSY;
2072                                 goto bail;
2073                         }
2074                 }
2075
2076         dd->ipath_flags &= ~IPATH_INITTED;
2077         ret = dd->ipath_f_reset(dd);
2078         if (ret != 1)
2079                 ipath_dbg("reset was not successful\n");
2080         ipath_dbg("Trying to reinitialize unit %u after reset attempt\n",
2081                   unit);
2082         ret = ipath_init_chip(dd, 1);
2083         if (ret)
2084                 ipath_dev_err(dd, "Reinitialize unit %u after "
2085                               "reset failed with %d\n", unit, ret);
2086         else
2087                 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
2088                          "resetting\n", unit);
2089
2090 bail:
2091         return ret;
2092 }
2093
2094 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
2095 {
2096         u64 val;
2097         if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
2098                 return -1;
2099         }
2100         if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
2101                 dd->ipath_rx_pol_inv = new_pol_inv;
2102                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
2103                 val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
2104                          INFINIPATH_XGXS_RX_POL_SHIFT);
2105                 val |= ((u64)dd->ipath_rx_pol_inv) <<
2106                         INFINIPATH_XGXS_RX_POL_SHIFT;
2107                 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
2108         }
2109         return 0;
2110 }
2111 module_init(infinipath_init);
2112 module_exit(infinipath_cleanup);