tg3: Add GRO support.
[linux-2.6] / drivers / block / cciss.c
1 /*
2  *    Disk Array driver for HP Smart Array controllers.
3  *    (C) Copyright 2000, 2007 Hewlett-Packard Development Company, L.P.
4  *
5  *    This program is free software; you can redistribute it and/or modify
6  *    it under the terms of the GNU General Public License as published by
7  *    the Free Software Foundation; version 2 of the License.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  *    General Public License for more details.
13  *
14  *    You should have received a copy of the GNU General Public License
15  *    along with this program; if not, write to the Free Software
16  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17  *    02111-1307, USA.
18  *
19  *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
20  *
21  */
22
23 #include <linux/module.h>
24 #include <linux/interrupt.h>
25 #include <linux/types.h>
26 #include <linux/pci.h>
27 #include <linux/kernel.h>
28 #include <linux/slab.h>
29 #include <linux/delay.h>
30 #include <linux/major.h>
31 #include <linux/fs.h>
32 #include <linux/bio.h>
33 #include <linux/blkpg.h>
34 #include <linux/timer.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/init.h>
38 #include <linux/hdreg.h>
39 #include <linux/spinlock.h>
40 #include <linux/compat.h>
41 #include <linux/blktrace_api.h>
42 #include <asm/uaccess.h>
43 #include <asm/io.h>
44
45 #include <linux/dma-mapping.h>
46 #include <linux/blkdev.h>
47 #include <linux/genhd.h>
48 #include <linux/completion.h>
49 #include <scsi/scsi.h>
50 #include <scsi/sg.h>
51 #include <scsi/scsi_ioctl.h>
52 #include <linux/cdrom.h>
53 #include <linux/scatterlist.h>
54
55 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
56 #define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
57 #define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 20)
58
59 /* Embedded module documentation macros - see modules.h */
60 MODULE_AUTHOR("Hewlett-Packard Company");
61 MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
62 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
63                         " SA6i P600 P800 P400 P400i E200 E200i E500 P700m"
64                         " Smart Array G2 Series SAS/SATA Controllers");
65 MODULE_VERSION("3.6.20");
66 MODULE_LICENSE("GPL");
67
68 #include "cciss_cmd.h"
69 #include "cciss.h"
70 #include <linux/cciss_ioctl.h>
71
72 /* define the PCI info for the cards we can control */
73 static const struct pci_device_id cciss_pci_device_id[] = {
74         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISS,  0x0E11, 0x4070},
75         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4080},
76         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4082},
77         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4083},
78         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x4091},
79         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409A},
80         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409B},
81         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409C},
82         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409D},
83         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSA,     0x103C, 0x3225},
84         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3223},
85         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3234},
86         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3235},
87         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3211},
88         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3212},
89         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3213},
90         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3214},
91         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3215},
92         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3237},
93         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x323D},
94         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3241},
95         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3243},
96         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3245},
97         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3247},
98         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
99         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
100         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
101         {PCI_VENDOR_ID_HP,     PCI_ANY_ID,      PCI_ANY_ID, PCI_ANY_ID,
102                 PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0},
103         {0,}
104 };
105
106 MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
107
108 /*  board_id = Subsystem Device ID & Vendor ID
109  *  product = Marketing Name for the board
110  *  access = Address of the struct of function pointers
111  */
112 static struct board_type products[] = {
113         {0x40700E11, "Smart Array 5300", &SA5_access},
114         {0x40800E11, "Smart Array 5i", &SA5B_access},
115         {0x40820E11, "Smart Array 532", &SA5B_access},
116         {0x40830E11, "Smart Array 5312", &SA5B_access},
117         {0x409A0E11, "Smart Array 641", &SA5_access},
118         {0x409B0E11, "Smart Array 642", &SA5_access},
119         {0x409C0E11, "Smart Array 6400", &SA5_access},
120         {0x409D0E11, "Smart Array 6400 EM", &SA5_access},
121         {0x40910E11, "Smart Array 6i", &SA5_access},
122         {0x3225103C, "Smart Array P600", &SA5_access},
123         {0x3223103C, "Smart Array P800", &SA5_access},
124         {0x3234103C, "Smart Array P400", &SA5_access},
125         {0x3235103C, "Smart Array P400i", &SA5_access},
126         {0x3211103C, "Smart Array E200i", &SA5_access},
127         {0x3212103C, "Smart Array E200", &SA5_access},
128         {0x3213103C, "Smart Array E200i", &SA5_access},
129         {0x3214103C, "Smart Array E200i", &SA5_access},
130         {0x3215103C, "Smart Array E200i", &SA5_access},
131         {0x3237103C, "Smart Array E500", &SA5_access},
132         {0x323D103C, "Smart Array P700m", &SA5_access},
133         {0x3241103C, "Smart Array P212", &SA5_access},
134         {0x3243103C, "Smart Array P410", &SA5_access},
135         {0x3245103C, "Smart Array P410i", &SA5_access},
136         {0x3247103C, "Smart Array P411", &SA5_access},
137         {0x3249103C, "Smart Array P812", &SA5_access},
138         {0x324A103C, "Smart Array P712m", &SA5_access},
139         {0x324B103C, "Smart Array P711m", &SA5_access},
140         {0xFFFF103C, "Unknown Smart Array", &SA5_access},
141 };
142
143 /* How long to wait (in milliseconds) for board to go into simple mode */
144 #define MAX_CONFIG_WAIT 30000
145 #define MAX_IOCTL_CONFIG_WAIT 1000
146
147 /*define how many times we will try a command because of bus resets */
148 #define MAX_CMD_RETRIES 3
149
150 #define MAX_CTLR        32
151
152 /* Originally cciss driver only supports 8 major numbers */
153 #define MAX_CTLR_ORIG   8
154
155 static ctlr_info_t *hba[MAX_CTLR];
156
157 static void do_cciss_request(struct request_queue *q);
158 static irqreturn_t do_cciss_intr(int irq, void *dev_id);
159 static int cciss_open(struct block_device *bdev, fmode_t mode);
160 static int cciss_release(struct gendisk *disk, fmode_t mode);
161 static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
162                        unsigned int cmd, unsigned long arg);
163 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
164
165 static int cciss_revalidate(struct gendisk *disk);
166 static int rebuild_lun_table(ctlr_info_t *h, int first_time);
167 static int deregister_disk(ctlr_info_t *h, int drv_index,
168                            int clear_all);
169
170 static void cciss_read_capacity(int ctlr, int logvol, int withirq,
171                         sector_t *total_size, unsigned int *block_size);
172 static void cciss_read_capacity_16(int ctlr, int logvol, int withirq,
173                         sector_t *total_size, unsigned int *block_size);
174 static void cciss_geometry_inquiry(int ctlr, int logvol,
175                         int withirq, sector_t total_size,
176                         unsigned int block_size, InquiryData_struct *inq_buff,
177                                    drive_info_struct *drv);
178 static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *,
179                                            __u32);
180 static void start_io(ctlr_info_t *h);
181 static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
182                    unsigned int use_unit_num, unsigned int log_unit,
183                    __u8 page_code, unsigned char *scsi3addr, int cmd_type);
184 static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
185                            unsigned int use_unit_num, unsigned int log_unit,
186                            __u8 page_code, int cmd_type);
187
188 static void fail_all_cmds(unsigned long ctlr);
189
190 #ifdef CONFIG_PROC_FS
191 static void cciss_procinit(int i);
192 #else
193 static void cciss_procinit(int i)
194 {
195 }
196 #endif                          /* CONFIG_PROC_FS */
197
198 #ifdef CONFIG_COMPAT
199 static int cciss_compat_ioctl(struct block_device *, fmode_t,
200                               unsigned, unsigned long);
201 #endif
202
203 static struct block_device_operations cciss_fops = {
204         .owner = THIS_MODULE,
205         .open = cciss_open,
206         .release = cciss_release,
207         .locked_ioctl = cciss_ioctl,
208         .getgeo = cciss_getgeo,
209 #ifdef CONFIG_COMPAT
210         .compat_ioctl = cciss_compat_ioctl,
211 #endif
212         .revalidate_disk = cciss_revalidate,
213 };
214
215 /*
216  * Enqueuing and dequeuing functions for cmdlists.
217  */
218 static inline void addQ(struct hlist_head *list, CommandList_struct *c)
219 {
220         hlist_add_head(&c->list, list);
221 }
222
223 static inline void removeQ(CommandList_struct *c)
224 {
225         if (WARN_ON(hlist_unhashed(&c->list)))
226                 return;
227
228         hlist_del_init(&c->list);
229 }
230
231 #include "cciss_scsi.c"         /* For SCSI tape support */
232
233 #define RAID_UNKNOWN 6
234
235 #ifdef CONFIG_PROC_FS
236
237 /*
238  * Report information about this controller.
239  */
240 #define ENG_GIG 1000000000
241 #define ENG_GIG_FACTOR (ENG_GIG/512)
242 #define ENGAGE_SCSI     "engage scsi"
243 static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
244         "UNKNOWN"
245 };
246
247 static struct proc_dir_entry *proc_cciss;
248
249 static void cciss_seq_show_header(struct seq_file *seq)
250 {
251         ctlr_info_t *h = seq->private;
252
253         seq_printf(seq, "%s: HP %s Controller\n"
254                 "Board ID: 0x%08lx\n"
255                 "Firmware Version: %c%c%c%c\n"
256                 "IRQ: %d\n"
257                 "Logical drives: %d\n"
258                 "Current Q depth: %d\n"
259                 "Current # commands on controller: %d\n"
260                 "Max Q depth since init: %d\n"
261                 "Max # commands on controller since init: %d\n"
262                 "Max SG entries since init: %d\n",
263                 h->devname,
264                 h->product_name,
265                 (unsigned long)h->board_id,
266                 h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
267                 h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT],
268                 h->num_luns,
269                 h->Qdepth, h->commands_outstanding,
270                 h->maxQsinceinit, h->max_outstanding, h->maxSG);
271
272 #ifdef CONFIG_CISS_SCSI_TAPE
273         cciss_seq_tape_report(seq, h->ctlr);
274 #endif /* CONFIG_CISS_SCSI_TAPE */
275 }
276
277 static void *cciss_seq_start(struct seq_file *seq, loff_t *pos)
278 {
279         ctlr_info_t *h = seq->private;
280         unsigned ctlr = h->ctlr;
281         unsigned long flags;
282
283         /* prevent displaying bogus info during configuration
284          * or deconfiguration of a logical volume
285          */
286         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
287         if (h->busy_configuring) {
288                 spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
289                 return ERR_PTR(-EBUSY);
290         }
291         h->busy_configuring = 1;
292         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
293
294         if (*pos == 0)
295                 cciss_seq_show_header(seq);
296
297         return pos;
298 }
299
300 static int cciss_seq_show(struct seq_file *seq, void *v)
301 {
302         sector_t vol_sz, vol_sz_frac;
303         ctlr_info_t *h = seq->private;
304         unsigned ctlr = h->ctlr;
305         loff_t *pos = v;
306         drive_info_struct *drv = &h->drv[*pos];
307
308         if (*pos > h->highest_lun)
309                 return 0;
310
311         if (drv->heads == 0)
312                 return 0;
313
314         vol_sz = drv->nr_blocks;
315         vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR);
316         vol_sz_frac *= 100;
317         sector_div(vol_sz_frac, ENG_GIG_FACTOR);
318
319         if (drv->raid_level > 5)
320                 drv->raid_level = RAID_UNKNOWN;
321         seq_printf(seq, "cciss/c%dd%d:"
322                         "\t%4u.%02uGB\tRAID %s\n",
323                         ctlr, (int) *pos, (int)vol_sz, (int)vol_sz_frac,
324                         raid_label[drv->raid_level]);
325         return 0;
326 }
327
328 static void *cciss_seq_next(struct seq_file *seq, void *v, loff_t *pos)
329 {
330         ctlr_info_t *h = seq->private;
331
332         if (*pos > h->highest_lun)
333                 return NULL;
334         *pos += 1;
335
336         return pos;
337 }
338
339 static void cciss_seq_stop(struct seq_file *seq, void *v)
340 {
341         ctlr_info_t *h = seq->private;
342
343         /* Only reset h->busy_configuring if we succeeded in setting
344          * it during cciss_seq_start. */
345         if (v == ERR_PTR(-EBUSY))
346                 return;
347
348         h->busy_configuring = 0;
349 }
350
351 static struct seq_operations cciss_seq_ops = {
352         .start = cciss_seq_start,
353         .show  = cciss_seq_show,
354         .next  = cciss_seq_next,
355         .stop  = cciss_seq_stop,
356 };
357
358 static int cciss_seq_open(struct inode *inode, struct file *file)
359 {
360         int ret = seq_open(file, &cciss_seq_ops);
361         struct seq_file *seq = file->private_data;
362
363         if (!ret)
364                 seq->private = PDE(inode)->data;
365
366         return ret;
367 }
368
369 static ssize_t
370 cciss_proc_write(struct file *file, const char __user *buf,
371                  size_t length, loff_t *ppos)
372 {
373         int err;
374         char *buffer;
375
376 #ifndef CONFIG_CISS_SCSI_TAPE
377         return -EINVAL;
378 #endif
379
380         if (!buf || length > PAGE_SIZE - 1)
381                 return -EINVAL;
382
383         buffer = (char *)__get_free_page(GFP_KERNEL);
384         if (!buffer)
385                 return -ENOMEM;
386
387         err = -EFAULT;
388         if (copy_from_user(buffer, buf, length))
389                 goto out;
390         buffer[length] = '\0';
391
392 #ifdef CONFIG_CISS_SCSI_TAPE
393         if (strncmp(ENGAGE_SCSI, buffer, sizeof ENGAGE_SCSI - 1) == 0) {
394                 struct seq_file *seq = file->private_data;
395                 ctlr_info_t *h = seq->private;
396                 int rc;
397
398                 rc = cciss_engage_scsi(h->ctlr);
399                 if (rc != 0)
400                         err = -rc;
401                 else
402                         err = length;
403         } else
404 #endif /* CONFIG_CISS_SCSI_TAPE */
405                 err = -EINVAL;
406         /* might be nice to have "disengage" too, but it's not
407            safely possible. (only 1 module use count, lock issues.) */
408
409 out:
410         free_page((unsigned long)buffer);
411         return err;
412 }
413
414 static struct file_operations cciss_proc_fops = {
415         .owner   = THIS_MODULE,
416         .open    = cciss_seq_open,
417         .read    = seq_read,
418         .llseek  = seq_lseek,
419         .release = seq_release,
420         .write   = cciss_proc_write,
421 };
422
423 static void __devinit cciss_procinit(int i)
424 {
425         struct proc_dir_entry *pde;
426
427         if (proc_cciss == NULL)
428                 proc_cciss = proc_mkdir("driver/cciss", NULL);
429         if (!proc_cciss)
430                 return;
431         pde = proc_create_data(hba[i]->devname, S_IWUSR | S_IRUSR | S_IRGRP |
432                                         S_IROTH, proc_cciss,
433                                         &cciss_proc_fops, hba[i]);
434 }
435 #endif                          /* CONFIG_PROC_FS */
436
437 /*
438  * For operations that cannot sleep, a command block is allocated at init,
439  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
440  * which ones are free or in use.  For operations that can wait for kmalloc
441  * to possible sleep, this routine can be called with get_from_pool set to 0.
442  * cmd_free() MUST be called with a got_from_pool set to 0 if cmd_alloc was.
443  */
444 static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
445 {
446         CommandList_struct *c;
447         int i;
448         u64bit temp64;
449         dma_addr_t cmd_dma_handle, err_dma_handle;
450
451         if (!get_from_pool) {
452                 c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
453                         sizeof(CommandList_struct), &cmd_dma_handle);
454                 if (c == NULL)
455                         return NULL;
456                 memset(c, 0, sizeof(CommandList_struct));
457
458                 c->cmdindex = -1;
459
460                 c->err_info = (ErrorInfo_struct *)
461                     pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
462                             &err_dma_handle);
463
464                 if (c->err_info == NULL) {
465                         pci_free_consistent(h->pdev,
466                                 sizeof(CommandList_struct), c, cmd_dma_handle);
467                         return NULL;
468                 }
469                 memset(c->err_info, 0, sizeof(ErrorInfo_struct));
470         } else {                /* get it out of the controllers pool */
471
472                 do {
473                         i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
474                         if (i == h->nr_cmds)
475                                 return NULL;
476                 } while (test_and_set_bit
477                          (i & (BITS_PER_LONG - 1),
478                           h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
479 #ifdef CCISS_DEBUG
480                 printk(KERN_DEBUG "cciss: using command buffer %d\n", i);
481 #endif
482                 c = h->cmd_pool + i;
483                 memset(c, 0, sizeof(CommandList_struct));
484                 cmd_dma_handle = h->cmd_pool_dhandle
485                     + i * sizeof(CommandList_struct);
486                 c->err_info = h->errinfo_pool + i;
487                 memset(c->err_info, 0, sizeof(ErrorInfo_struct));
488                 err_dma_handle = h->errinfo_pool_dhandle
489                     + i * sizeof(ErrorInfo_struct);
490                 h->nr_allocs++;
491
492                 c->cmdindex = i;
493         }
494
495         INIT_HLIST_NODE(&c->list);
496         c->busaddr = (__u32) cmd_dma_handle;
497         temp64.val = (__u64) err_dma_handle;
498         c->ErrDesc.Addr.lower = temp64.val32.lower;
499         c->ErrDesc.Addr.upper = temp64.val32.upper;
500         c->ErrDesc.Len = sizeof(ErrorInfo_struct);
501
502         c->ctlr = h->ctlr;
503         return c;
504 }
505
506 /*
507  * Frees a command block that was previously allocated with cmd_alloc().
508  */
509 static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool)
510 {
511         int i;
512         u64bit temp64;
513
514         if (!got_from_pool) {
515                 temp64.val32.lower = c->ErrDesc.Addr.lower;
516                 temp64.val32.upper = c->ErrDesc.Addr.upper;
517                 pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
518                                     c->err_info, (dma_addr_t) temp64.val);
519                 pci_free_consistent(h->pdev, sizeof(CommandList_struct),
520                                     c, (dma_addr_t) c->busaddr);
521         } else {
522                 i = c - h->cmd_pool;
523                 clear_bit(i & (BITS_PER_LONG - 1),
524                           h->cmd_pool_bits + (i / BITS_PER_LONG));
525                 h->nr_frees++;
526         }
527 }
528
529 static inline ctlr_info_t *get_host(struct gendisk *disk)
530 {
531         return disk->queue->queuedata;
532 }
533
534 static inline drive_info_struct *get_drv(struct gendisk *disk)
535 {
536         return disk->private_data;
537 }
538
539 /*
540  * Open.  Make sure the device is really there.
541  */
542 static int cciss_open(struct block_device *bdev, fmode_t mode)
543 {
544         ctlr_info_t *host = get_host(bdev->bd_disk);
545         drive_info_struct *drv = get_drv(bdev->bd_disk);
546
547 #ifdef CCISS_DEBUG
548         printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name);
549 #endif                          /* CCISS_DEBUG */
550
551         if (host->busy_initializing || drv->busy_configuring)
552                 return -EBUSY;
553         /*
554          * Root is allowed to open raw volume zero even if it's not configured
555          * so array config can still work. Root is also allowed to open any
556          * volume that has a LUN ID, so it can issue IOCTL to reread the
557          * disk information.  I don't think I really like this
558          * but I'm already using way to many device nodes to claim another one
559          * for "raw controller".
560          */
561         if (drv->heads == 0) {
562                 if (MINOR(bdev->bd_dev) != 0) { /* not node 0? */
563                         /* if not node 0 make sure it is a partition = 0 */
564                         if (MINOR(bdev->bd_dev) & 0x0f) {
565                                 return -ENXIO;
566                                 /* if it is, make sure we have a LUN ID */
567                         } else if (drv->LunID == 0) {
568                                 return -ENXIO;
569                         }
570                 }
571                 if (!capable(CAP_SYS_ADMIN))
572                         return -EPERM;
573         }
574         drv->usage_count++;
575         host->usage_count++;
576         return 0;
577 }
578
579 /*
580  * Close.  Sync first.
581  */
582 static int cciss_release(struct gendisk *disk, fmode_t mode)
583 {
584         ctlr_info_t *host = get_host(disk);
585         drive_info_struct *drv = get_drv(disk);
586
587 #ifdef CCISS_DEBUG
588         printk(KERN_DEBUG "cciss_release %s\n", disk->disk_name);
589 #endif                          /* CCISS_DEBUG */
590
591         drv->usage_count--;
592         host->usage_count--;
593         return 0;
594 }
595
596 #ifdef CONFIG_COMPAT
597
598 static int do_ioctl(struct block_device *bdev, fmode_t mode,
599                     unsigned cmd, unsigned long arg)
600 {
601         int ret;
602         lock_kernel();
603         ret = cciss_ioctl(bdev, mode, cmd, arg);
604         unlock_kernel();
605         return ret;
606 }
607
608 static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
609                                   unsigned cmd, unsigned long arg);
610 static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
611                                       unsigned cmd, unsigned long arg);
612
613 static int cciss_compat_ioctl(struct block_device *bdev, fmode_t mode,
614                               unsigned cmd, unsigned long arg)
615 {
616         switch (cmd) {
617         case CCISS_GETPCIINFO:
618         case CCISS_GETINTINFO:
619         case CCISS_SETINTINFO:
620         case CCISS_GETNODENAME:
621         case CCISS_SETNODENAME:
622         case CCISS_GETHEARTBEAT:
623         case CCISS_GETBUSTYPES:
624         case CCISS_GETFIRMVER:
625         case CCISS_GETDRIVVER:
626         case CCISS_REVALIDVOLS:
627         case CCISS_DEREGDISK:
628         case CCISS_REGNEWDISK:
629         case CCISS_REGNEWD:
630         case CCISS_RESCANDISK:
631         case CCISS_GETLUNINFO:
632                 return do_ioctl(bdev, mode, cmd, arg);
633
634         case CCISS_PASSTHRU32:
635                 return cciss_ioctl32_passthru(bdev, mode, cmd, arg);
636         case CCISS_BIG_PASSTHRU32:
637                 return cciss_ioctl32_big_passthru(bdev, mode, cmd, arg);
638
639         default:
640                 return -ENOIOCTLCMD;
641         }
642 }
643
644 static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
645                                   unsigned cmd, unsigned long arg)
646 {
647         IOCTL32_Command_struct __user *arg32 =
648             (IOCTL32_Command_struct __user *) arg;
649         IOCTL_Command_struct arg64;
650         IOCTL_Command_struct __user *p = compat_alloc_user_space(sizeof(arg64));
651         int err;
652         u32 cp;
653
654         err = 0;
655         err |=
656             copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
657                            sizeof(arg64.LUN_info));
658         err |=
659             copy_from_user(&arg64.Request, &arg32->Request,
660                            sizeof(arg64.Request));
661         err |=
662             copy_from_user(&arg64.error_info, &arg32->error_info,
663                            sizeof(arg64.error_info));
664         err |= get_user(arg64.buf_size, &arg32->buf_size);
665         err |= get_user(cp, &arg32->buf);
666         arg64.buf = compat_ptr(cp);
667         err |= copy_to_user(p, &arg64, sizeof(arg64));
668
669         if (err)
670                 return -EFAULT;
671
672         err = do_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p);
673         if (err)
674                 return err;
675         err |=
676             copy_in_user(&arg32->error_info, &p->error_info,
677                          sizeof(arg32->error_info));
678         if (err)
679                 return -EFAULT;
680         return err;
681 }
682
683 static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
684                                       unsigned cmd, unsigned long arg)
685 {
686         BIG_IOCTL32_Command_struct __user *arg32 =
687             (BIG_IOCTL32_Command_struct __user *) arg;
688         BIG_IOCTL_Command_struct arg64;
689         BIG_IOCTL_Command_struct __user *p =
690             compat_alloc_user_space(sizeof(arg64));
691         int err;
692         u32 cp;
693
694         err = 0;
695         err |=
696             copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
697                            sizeof(arg64.LUN_info));
698         err |=
699             copy_from_user(&arg64.Request, &arg32->Request,
700                            sizeof(arg64.Request));
701         err |=
702             copy_from_user(&arg64.error_info, &arg32->error_info,
703                            sizeof(arg64.error_info));
704         err |= get_user(arg64.buf_size, &arg32->buf_size);
705         err |= get_user(arg64.malloc_size, &arg32->malloc_size);
706         err |= get_user(cp, &arg32->buf);
707         arg64.buf = compat_ptr(cp);
708         err |= copy_to_user(p, &arg64, sizeof(arg64));
709
710         if (err)
711                 return -EFAULT;
712
713         err = do_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p);
714         if (err)
715                 return err;
716         err |=
717             copy_in_user(&arg32->error_info, &p->error_info,
718                          sizeof(arg32->error_info));
719         if (err)
720                 return -EFAULT;
721         return err;
722 }
723 #endif
724
725 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
726 {
727         drive_info_struct *drv = get_drv(bdev->bd_disk);
728
729         if (!drv->cylinders)
730                 return -ENXIO;
731
732         geo->heads = drv->heads;
733         geo->sectors = drv->sectors;
734         geo->cylinders = drv->cylinders;
735         return 0;
736 }
737
738 /*
739  * ioctl
740  */
741 static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
742                        unsigned int cmd, unsigned long arg)
743 {
744         struct gendisk *disk = bdev->bd_disk;
745         ctlr_info_t *host = get_host(disk);
746         drive_info_struct *drv = get_drv(disk);
747         int ctlr = host->ctlr;
748         void __user *argp = (void __user *)arg;
749
750 #ifdef CCISS_DEBUG
751         printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg);
752 #endif                          /* CCISS_DEBUG */
753
754         switch (cmd) {
755         case CCISS_GETPCIINFO:
756                 {
757                         cciss_pci_info_struct pciinfo;
758
759                         if (!arg)
760                                 return -EINVAL;
761                         pciinfo.domain = pci_domain_nr(host->pdev->bus);
762                         pciinfo.bus = host->pdev->bus->number;
763                         pciinfo.dev_fn = host->pdev->devfn;
764                         pciinfo.board_id = host->board_id;
765                         if (copy_to_user
766                             (argp, &pciinfo, sizeof(cciss_pci_info_struct)))
767                                 return -EFAULT;
768                         return 0;
769                 }
770         case CCISS_GETINTINFO:
771                 {
772                         cciss_coalint_struct intinfo;
773                         if (!arg)
774                                 return -EINVAL;
775                         intinfo.delay =
776                             readl(&host->cfgtable->HostWrite.CoalIntDelay);
777                         intinfo.count =
778                             readl(&host->cfgtable->HostWrite.CoalIntCount);
779                         if (copy_to_user
780                             (argp, &intinfo, sizeof(cciss_coalint_struct)))
781                                 return -EFAULT;
782                         return 0;
783                 }
784         case CCISS_SETINTINFO:
785                 {
786                         cciss_coalint_struct intinfo;
787                         unsigned long flags;
788                         int i;
789
790                         if (!arg)
791                                 return -EINVAL;
792                         if (!capable(CAP_SYS_ADMIN))
793                                 return -EPERM;
794                         if (copy_from_user
795                             (&intinfo, argp, sizeof(cciss_coalint_struct)))
796                                 return -EFAULT;
797                         if ((intinfo.delay == 0) && (intinfo.count == 0))
798                         {
799 //                      printk("cciss_ioctl: delay and count cannot be 0\n");
800                                 return -EINVAL;
801                         }
802                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
803                         /* Update the field, and then ring the doorbell */
804                         writel(intinfo.delay,
805                                &(host->cfgtable->HostWrite.CoalIntDelay));
806                         writel(intinfo.count,
807                                &(host->cfgtable->HostWrite.CoalIntCount));
808                         writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
809
810                         for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
811                                 if (!(readl(host->vaddr + SA5_DOORBELL)
812                                       & CFGTBL_ChangeReq))
813                                         break;
814                                 /* delay and try again */
815                                 udelay(1000);
816                         }
817                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
818                         if (i >= MAX_IOCTL_CONFIG_WAIT)
819                                 return -EAGAIN;
820                         return 0;
821                 }
822         case CCISS_GETNODENAME:
823                 {
824                         NodeName_type NodeName;
825                         int i;
826
827                         if (!arg)
828                                 return -EINVAL;
829                         for (i = 0; i < 16; i++)
830                                 NodeName[i] =
831                                     readb(&host->cfgtable->ServerName[i]);
832                         if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
833                                 return -EFAULT;
834                         return 0;
835                 }
836         case CCISS_SETNODENAME:
837                 {
838                         NodeName_type NodeName;
839                         unsigned long flags;
840                         int i;
841
842                         if (!arg)
843                                 return -EINVAL;
844                         if (!capable(CAP_SYS_ADMIN))
845                                 return -EPERM;
846
847                         if (copy_from_user
848                             (NodeName, argp, sizeof(NodeName_type)))
849                                 return -EFAULT;
850
851                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
852
853                         /* Update the field, and then ring the doorbell */
854                         for (i = 0; i < 16; i++)
855                                 writeb(NodeName[i],
856                                        &host->cfgtable->ServerName[i]);
857
858                         writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
859
860                         for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
861                                 if (!(readl(host->vaddr + SA5_DOORBELL)
862                                       & CFGTBL_ChangeReq))
863                                         break;
864                                 /* delay and try again */
865                                 udelay(1000);
866                         }
867                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
868                         if (i >= MAX_IOCTL_CONFIG_WAIT)
869                                 return -EAGAIN;
870                         return 0;
871                 }
872
873         case CCISS_GETHEARTBEAT:
874                 {
875                         Heartbeat_type heartbeat;
876
877                         if (!arg)
878                                 return -EINVAL;
879                         heartbeat = readl(&host->cfgtable->HeartBeat);
880                         if (copy_to_user
881                             (argp, &heartbeat, sizeof(Heartbeat_type)))
882                                 return -EFAULT;
883                         return 0;
884                 }
885         case CCISS_GETBUSTYPES:
886                 {
887                         BusTypes_type BusTypes;
888
889                         if (!arg)
890                                 return -EINVAL;
891                         BusTypes = readl(&host->cfgtable->BusTypes);
892                         if (copy_to_user
893                             (argp, &BusTypes, sizeof(BusTypes_type)))
894                                 return -EFAULT;
895                         return 0;
896                 }
897         case CCISS_GETFIRMVER:
898                 {
899                         FirmwareVer_type firmware;
900
901                         if (!arg)
902                                 return -EINVAL;
903                         memcpy(firmware, host->firm_ver, 4);
904
905                         if (copy_to_user
906                             (argp, firmware, sizeof(FirmwareVer_type)))
907                                 return -EFAULT;
908                         return 0;
909                 }
910         case CCISS_GETDRIVVER:
911                 {
912                         DriverVer_type DriverVer = DRIVER_VERSION;
913
914                         if (!arg)
915                                 return -EINVAL;
916
917                         if (copy_to_user
918                             (argp, &DriverVer, sizeof(DriverVer_type)))
919                                 return -EFAULT;
920                         return 0;
921                 }
922
923         case CCISS_DEREGDISK:
924         case CCISS_REGNEWD:
925         case CCISS_REVALIDVOLS:
926                 return rebuild_lun_table(host, 0);
927
928         case CCISS_GETLUNINFO:{
929                         LogvolInfo_struct luninfo;
930
931                         luninfo.LunID = drv->LunID;
932                         luninfo.num_opens = drv->usage_count;
933                         luninfo.num_parts = 0;
934                         if (copy_to_user(argp, &luninfo,
935                                          sizeof(LogvolInfo_struct)))
936                                 return -EFAULT;
937                         return 0;
938                 }
939         case CCISS_PASSTHRU:
940                 {
941                         IOCTL_Command_struct iocommand;
942                         CommandList_struct *c;
943                         char *buff = NULL;
944                         u64bit temp64;
945                         unsigned long flags;
946                         DECLARE_COMPLETION_ONSTACK(wait);
947
948                         if (!arg)
949                                 return -EINVAL;
950
951                         if (!capable(CAP_SYS_RAWIO))
952                                 return -EPERM;
953
954                         if (copy_from_user
955                             (&iocommand, argp, sizeof(IOCTL_Command_struct)))
956                                 return -EFAULT;
957                         if ((iocommand.buf_size < 1) &&
958                             (iocommand.Request.Type.Direction != XFER_NONE)) {
959                                 return -EINVAL;
960                         }
961 #if 0                           /* 'buf_size' member is 16-bits, and always smaller than kmalloc limit */
962                         /* Check kmalloc limits */
963                         if (iocommand.buf_size > 128000)
964                                 return -EINVAL;
965 #endif
966                         if (iocommand.buf_size > 0) {
967                                 buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
968                                 if (buff == NULL)
969                                         return -EFAULT;
970                         }
971                         if (iocommand.Request.Type.Direction == XFER_WRITE) {
972                                 /* Copy the data into the buffer we created */
973                                 if (copy_from_user
974                                     (buff, iocommand.buf, iocommand.buf_size)) {
975                                         kfree(buff);
976                                         return -EFAULT;
977                                 }
978                         } else {
979                                 memset(buff, 0, iocommand.buf_size);
980                         }
981                         if ((c = cmd_alloc(host, 0)) == NULL) {
982                                 kfree(buff);
983                                 return -ENOMEM;
984                         }
985                         // Fill in the command type
986                         c->cmd_type = CMD_IOCTL_PEND;
987                         // Fill in Command Header
988                         c->Header.ReplyQueue = 0;       // unused in simple mode
989                         if (iocommand.buf_size > 0)     // buffer to fill
990                         {
991                                 c->Header.SGList = 1;
992                                 c->Header.SGTotal = 1;
993                         } else  // no buffers to fill
994                         {
995                                 c->Header.SGList = 0;
996                                 c->Header.SGTotal = 0;
997                         }
998                         c->Header.LUN = iocommand.LUN_info;
999                         c->Header.Tag.lower = c->busaddr;       // use the kernel address the cmd block for tag
1000
1001                         // Fill in Request block
1002                         c->Request = iocommand.Request;
1003
1004                         // Fill in the scatter gather information
1005                         if (iocommand.buf_size > 0) {
1006                                 temp64.val = pci_map_single(host->pdev, buff,
1007                                         iocommand.buf_size,
1008                                         PCI_DMA_BIDIRECTIONAL);
1009                                 c->SG[0].Addr.lower = temp64.val32.lower;
1010                                 c->SG[0].Addr.upper = temp64.val32.upper;
1011                                 c->SG[0].Len = iocommand.buf_size;
1012                                 c->SG[0].Ext = 0;       // we are not chaining
1013                         }
1014                         c->waiting = &wait;
1015
1016                         /* Put the request on the tail of the request queue */
1017                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1018                         addQ(&host->reqQ, c);
1019                         host->Qdepth++;
1020                         start_io(host);
1021                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1022
1023                         wait_for_completion(&wait);
1024
1025                         /* unlock the buffers from DMA */
1026                         temp64.val32.lower = c->SG[0].Addr.lower;
1027                         temp64.val32.upper = c->SG[0].Addr.upper;
1028                         pci_unmap_single(host->pdev, (dma_addr_t) temp64.val,
1029                                          iocommand.buf_size,
1030                                          PCI_DMA_BIDIRECTIONAL);
1031
1032                         /* Copy the error information out */
1033                         iocommand.error_info = *(c->err_info);
1034                         if (copy_to_user
1035                             (argp, &iocommand, sizeof(IOCTL_Command_struct))) {
1036                                 kfree(buff);
1037                                 cmd_free(host, c, 0);
1038                                 return -EFAULT;
1039                         }
1040
1041                         if (iocommand.Request.Type.Direction == XFER_READ) {
1042                                 /* Copy the data out of the buffer we created */
1043                                 if (copy_to_user
1044                                     (iocommand.buf, buff, iocommand.buf_size)) {
1045                                         kfree(buff);
1046                                         cmd_free(host, c, 0);
1047                                         return -EFAULT;
1048                                 }
1049                         }
1050                         kfree(buff);
1051                         cmd_free(host, c, 0);
1052                         return 0;
1053                 }
1054         case CCISS_BIG_PASSTHRU:{
1055                         BIG_IOCTL_Command_struct *ioc;
1056                         CommandList_struct *c;
1057                         unsigned char **buff = NULL;
1058                         int *buff_size = NULL;
1059                         u64bit temp64;
1060                         unsigned long flags;
1061                         BYTE sg_used = 0;
1062                         int status = 0;
1063                         int i;
1064                         DECLARE_COMPLETION_ONSTACK(wait);
1065                         __u32 left;
1066                         __u32 sz;
1067                         BYTE __user *data_ptr;
1068
1069                         if (!arg)
1070                                 return -EINVAL;
1071                         if (!capable(CAP_SYS_RAWIO))
1072                                 return -EPERM;
1073                         ioc = (BIG_IOCTL_Command_struct *)
1074                             kmalloc(sizeof(*ioc), GFP_KERNEL);
1075                         if (!ioc) {
1076                                 status = -ENOMEM;
1077                                 goto cleanup1;
1078                         }
1079                         if (copy_from_user(ioc, argp, sizeof(*ioc))) {
1080                                 status = -EFAULT;
1081                                 goto cleanup1;
1082                         }
1083                         if ((ioc->buf_size < 1) &&
1084                             (ioc->Request.Type.Direction != XFER_NONE)) {
1085                                 status = -EINVAL;
1086                                 goto cleanup1;
1087                         }
1088                         /* Check kmalloc limits  using all SGs */
1089                         if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
1090                                 status = -EINVAL;
1091                                 goto cleanup1;
1092                         }
1093                         if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
1094                                 status = -EINVAL;
1095                                 goto cleanup1;
1096                         }
1097                         buff =
1098                             kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
1099                         if (!buff) {
1100                                 status = -ENOMEM;
1101                                 goto cleanup1;
1102                         }
1103                         buff_size = kmalloc(MAXSGENTRIES * sizeof(int),
1104                                                    GFP_KERNEL);
1105                         if (!buff_size) {
1106                                 status = -ENOMEM;
1107                                 goto cleanup1;
1108                         }
1109                         left = ioc->buf_size;
1110                         data_ptr = ioc->buf;
1111                         while (left) {
1112                                 sz = (left >
1113                                       ioc->malloc_size) ? ioc->
1114                                     malloc_size : left;
1115                                 buff_size[sg_used] = sz;
1116                                 buff[sg_used] = kmalloc(sz, GFP_KERNEL);
1117                                 if (buff[sg_used] == NULL) {
1118                                         status = -ENOMEM;
1119                                         goto cleanup1;
1120                                 }
1121                                 if (ioc->Request.Type.Direction == XFER_WRITE) {
1122                                         if (copy_from_user
1123                                             (buff[sg_used], data_ptr, sz)) {
1124                                                 status = -EFAULT;
1125                                                 goto cleanup1;
1126                                         }
1127                                 } else {
1128                                         memset(buff[sg_used], 0, sz);
1129                                 }
1130                                 left -= sz;
1131                                 data_ptr += sz;
1132                                 sg_used++;
1133                         }
1134                         if ((c = cmd_alloc(host, 0)) == NULL) {
1135                                 status = -ENOMEM;
1136                                 goto cleanup1;
1137                         }
1138                         c->cmd_type = CMD_IOCTL_PEND;
1139                         c->Header.ReplyQueue = 0;
1140
1141                         if (ioc->buf_size > 0) {
1142                                 c->Header.SGList = sg_used;
1143                                 c->Header.SGTotal = sg_used;
1144                         } else {
1145                                 c->Header.SGList = 0;
1146                                 c->Header.SGTotal = 0;
1147                         }
1148                         c->Header.LUN = ioc->LUN_info;
1149                         c->Header.Tag.lower = c->busaddr;
1150
1151                         c->Request = ioc->Request;
1152                         if (ioc->buf_size > 0) {
1153                                 int i;
1154                                 for (i = 0; i < sg_used; i++) {
1155                                         temp64.val =
1156                                             pci_map_single(host->pdev, buff[i],
1157                                                     buff_size[i],
1158                                                     PCI_DMA_BIDIRECTIONAL);
1159                                         c->SG[i].Addr.lower =
1160                                             temp64.val32.lower;
1161                                         c->SG[i].Addr.upper =
1162                                             temp64.val32.upper;
1163                                         c->SG[i].Len = buff_size[i];
1164                                         c->SG[i].Ext = 0;       /* we are not chaining */
1165                                 }
1166                         }
1167                         c->waiting = &wait;
1168                         /* Put the request on the tail of the request queue */
1169                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1170                         addQ(&host->reqQ, c);
1171                         host->Qdepth++;
1172                         start_io(host);
1173                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1174                         wait_for_completion(&wait);
1175                         /* unlock the buffers from DMA */
1176                         for (i = 0; i < sg_used; i++) {
1177                                 temp64.val32.lower = c->SG[i].Addr.lower;
1178                                 temp64.val32.upper = c->SG[i].Addr.upper;
1179                                 pci_unmap_single(host->pdev,
1180                                         (dma_addr_t) temp64.val, buff_size[i],
1181                                         PCI_DMA_BIDIRECTIONAL);
1182                         }
1183                         /* Copy the error information out */
1184                         ioc->error_info = *(c->err_info);
1185                         if (copy_to_user(argp, ioc, sizeof(*ioc))) {
1186                                 cmd_free(host, c, 0);
1187                                 status = -EFAULT;
1188                                 goto cleanup1;
1189                         }
1190                         if (ioc->Request.Type.Direction == XFER_READ) {
1191                                 /* Copy the data out of the buffer we created */
1192                                 BYTE __user *ptr = ioc->buf;
1193                                 for (i = 0; i < sg_used; i++) {
1194                                         if (copy_to_user
1195                                             (ptr, buff[i], buff_size[i])) {
1196                                                 cmd_free(host, c, 0);
1197                                                 status = -EFAULT;
1198                                                 goto cleanup1;
1199                                         }
1200                                         ptr += buff_size[i];
1201                                 }
1202                         }
1203                         cmd_free(host, c, 0);
1204                         status = 0;
1205                       cleanup1:
1206                         if (buff) {
1207                                 for (i = 0; i < sg_used; i++)
1208                                         kfree(buff[i]);
1209                                 kfree(buff);
1210                         }
1211                         kfree(buff_size);
1212                         kfree(ioc);
1213                         return status;
1214                 }
1215
1216         /* scsi_cmd_ioctl handles these, below, though some are not */
1217         /* very meaningful for cciss.  SG_IO is the main one people want. */
1218
1219         case SG_GET_VERSION_NUM:
1220         case SG_SET_TIMEOUT:
1221         case SG_GET_TIMEOUT:
1222         case SG_GET_RESERVED_SIZE:
1223         case SG_SET_RESERVED_SIZE:
1224         case SG_EMULATED_HOST:
1225         case SG_IO:
1226         case SCSI_IOCTL_SEND_COMMAND:
1227                 return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
1228
1229         /* scsi_cmd_ioctl would normally handle these, below, but */
1230         /* they aren't a good fit for cciss, as CD-ROMs are */
1231         /* not supported, and we don't have any bus/target/lun */
1232         /* which we present to the kernel. */
1233
1234         case CDROM_SEND_PACKET:
1235         case CDROMCLOSETRAY:
1236         case CDROMEJECT:
1237         case SCSI_IOCTL_GET_IDLUN:
1238         case SCSI_IOCTL_GET_BUS_NUMBER:
1239         default:
1240                 return -ENOTTY;
1241         }
1242 }
1243
1244 static void cciss_check_queues(ctlr_info_t *h)
1245 {
1246         int start_queue = h->next_to_run;
1247         int i;
1248
1249         /* check to see if we have maxed out the number of commands that can
1250          * be placed on the queue.  If so then exit.  We do this check here
1251          * in case the interrupt we serviced was from an ioctl and did not
1252          * free any new commands.
1253          */
1254         if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds)
1255                 return;
1256
1257         /* We have room on the queue for more commands.  Now we need to queue
1258          * them up.  We will also keep track of the next queue to run so
1259          * that every queue gets a chance to be started first.
1260          */
1261         for (i = 0; i < h->highest_lun + 1; i++) {
1262                 int curr_queue = (start_queue + i) % (h->highest_lun + 1);
1263                 /* make sure the disk has been added and the drive is real
1264                  * because this can be called from the middle of init_one.
1265                  */
1266                 if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
1267                         continue;
1268                 blk_start_queue(h->gendisk[curr_queue]->queue);
1269
1270                 /* check to see if we have maxed out the number of commands
1271                  * that can be placed on the queue.
1272                  */
1273                 if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds) {
1274                         if (curr_queue == start_queue) {
1275                                 h->next_to_run =
1276                                     (start_queue + 1) % (h->highest_lun + 1);
1277                                 break;
1278                         } else {
1279                                 h->next_to_run = curr_queue;
1280                                 break;
1281                         }
1282                 }
1283         }
1284 }
1285
1286 static void cciss_softirq_done(struct request *rq)
1287 {
1288         CommandList_struct *cmd = rq->completion_data;
1289         ctlr_info_t *h = hba[cmd->ctlr];
1290         unsigned long flags;
1291         u64bit temp64;
1292         int i, ddir;
1293
1294         if (cmd->Request.Type.Direction == XFER_READ)
1295                 ddir = PCI_DMA_FROMDEVICE;
1296         else
1297                 ddir = PCI_DMA_TODEVICE;
1298
1299         /* command did not need to be retried */
1300         /* unmap the DMA mapping for all the scatter gather elements */
1301         for (i = 0; i < cmd->Header.SGList; i++) {
1302                 temp64.val32.lower = cmd->SG[i].Addr.lower;
1303                 temp64.val32.upper = cmd->SG[i].Addr.upper;
1304                 pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
1305         }
1306
1307 #ifdef CCISS_DEBUG
1308         printk("Done with %p\n", rq);
1309 #endif                          /* CCISS_DEBUG */
1310
1311         if (blk_end_request(rq, (rq->errors == 0) ? 0 : -EIO, blk_rq_bytes(rq)))
1312                 BUG();
1313
1314         spin_lock_irqsave(&h->lock, flags);
1315         cmd_free(h, cmd, 1);
1316         cciss_check_queues(h);
1317         spin_unlock_irqrestore(&h->lock, flags);
1318 }
1319
1320 /* This function gets the serial number of a logical drive via
1321  * inquiry page 0x83.  Serial no. is 16 bytes.  If the serial
1322  * number cannot be had, for whatever reason, 16 bytes of 0xff
1323  * are returned instead.
1324  */
1325 static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
1326                                 unsigned char *serial_no, int buflen)
1327 {
1328 #define PAGE_83_INQ_BYTES 64
1329         int rc;
1330         unsigned char *buf;
1331
1332         if (buflen > 16)
1333                 buflen = 16;
1334         memset(serial_no, 0xff, buflen);
1335         buf = kzalloc(PAGE_83_INQ_BYTES, GFP_KERNEL);
1336         if (!buf)
1337                 return;
1338         memset(serial_no, 0, buflen);
1339         if (withirq)
1340                 rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
1341                         PAGE_83_INQ_BYTES, 1, logvol, 0x83, TYPE_CMD);
1342         else
1343                 rc = sendcmd(CISS_INQUIRY, ctlr, buf,
1344                         PAGE_83_INQ_BYTES, 1, logvol, 0x83, NULL, TYPE_CMD);
1345         if (rc == IO_OK)
1346                 memcpy(serial_no, &buf[8], buflen);
1347         kfree(buf);
1348         return;
1349 }
1350
1351 static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
1352                                 int drv_index)
1353 {
1354         disk->queue = blk_init_queue(do_cciss_request, &h->lock);
1355         sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
1356         disk->major = h->major;
1357         disk->first_minor = drv_index << NWD_SHIFT;
1358         disk->fops = &cciss_fops;
1359         disk->private_data = &h->drv[drv_index];
1360         disk->driverfs_dev = &h->pdev->dev;
1361
1362         /* Set up queue information */
1363         blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
1364
1365         /* This is a hardware imposed limit. */
1366         blk_queue_max_hw_segments(disk->queue, MAXSGENTRIES);
1367
1368         /* This is a limit in the driver and could be eliminated. */
1369         blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES);
1370
1371         blk_queue_max_sectors(disk->queue, h->cciss_max_sectors);
1372
1373         blk_queue_softirq_done(disk->queue, cciss_softirq_done);
1374
1375         disk->queue->queuedata = h;
1376
1377         blk_queue_hardsect_size(disk->queue,
1378                                 h->drv[drv_index].block_size);
1379
1380         /* Make sure all queue data is written out before */
1381         /* setting h->drv[drv_index].queue, as setting this */
1382         /* allows the interrupt handler to start the queue */
1383         wmb();
1384         h->drv[drv_index].queue = disk->queue;
1385         add_disk(disk);
1386 }
1387
1388 /* This function will check the usage_count of the drive to be updated/added.
1389  * If the usage_count is zero and it is a heretofore unknown drive, or,
1390  * the drive's capacity, geometry, or serial number has changed,
1391  * then the drive information will be updated and the disk will be
1392  * re-registered with the kernel.  If these conditions don't hold,
1393  * then it will be left alone for the next reboot.  The exception to this
1394  * is disk 0 which will always be left registered with the kernel since it
1395  * is also the controller node.  Any changes to disk 0 will show up on
1396  * the next reboot.
1397  */
1398 static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
1399 {
1400         ctlr_info_t *h = hba[ctlr];
1401         struct gendisk *disk;
1402         InquiryData_struct *inq_buff = NULL;
1403         unsigned int block_size;
1404         sector_t total_size;
1405         unsigned long flags = 0;
1406         int ret = 0;
1407         drive_info_struct *drvinfo;
1408         int was_only_controller_node;
1409
1410         /* Get information about the disk and modify the driver structure */
1411         inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
1412         drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL);
1413         if (inq_buff == NULL || drvinfo == NULL)
1414                 goto mem_msg;
1415
1416         /* See if we're trying to update the "controller node"
1417          * this will happen the when the first logical drive gets
1418          * created by ACU.
1419          */
1420         was_only_controller_node = (drv_index == 0 &&
1421                                 h->drv[0].raid_level == -1);
1422
1423         /* testing to see if 16-byte CDBs are already being used */
1424         if (h->cciss_read == CCISS_READ_16) {
1425                 cciss_read_capacity_16(h->ctlr, drv_index, 1,
1426                         &total_size, &block_size);
1427
1428         } else {
1429                 cciss_read_capacity(ctlr, drv_index, 1,
1430                                     &total_size, &block_size);
1431
1432                 /* if read_capacity returns all F's this volume is >2TB */
1433                 /* in size so we switch to 16-byte CDB's for all */
1434                 /* read/write ops */
1435                 if (total_size == 0xFFFFFFFFULL) {
1436                         cciss_read_capacity_16(ctlr, drv_index, 1,
1437                         &total_size, &block_size);
1438                         h->cciss_read = CCISS_READ_16;
1439                         h->cciss_write = CCISS_WRITE_16;
1440                 } else {
1441                         h->cciss_read = CCISS_READ_10;
1442                         h->cciss_write = CCISS_WRITE_10;
1443                 }
1444         }
1445
1446         cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
1447                                inq_buff, drvinfo);
1448         drvinfo->block_size = block_size;
1449         drvinfo->nr_blocks = total_size + 1;
1450
1451         cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
1452                         sizeof(drvinfo->serial_no));
1453
1454         /* Is it the same disk we already know, and nothing's changed? */
1455         if (h->drv[drv_index].raid_level != -1 &&
1456                 ((memcmp(drvinfo->serial_no,
1457                                 h->drv[drv_index].serial_no, 16) == 0) &&
1458                 drvinfo->block_size == h->drv[drv_index].block_size &&
1459                 drvinfo->nr_blocks == h->drv[drv_index].nr_blocks &&
1460                 drvinfo->heads == h->drv[drv_index].heads &&
1461                 drvinfo->sectors == h->drv[drv_index].sectors &&
1462                 drvinfo->cylinders == h->drv[drv_index].cylinders))
1463                         /* The disk is unchanged, nothing to update */
1464                         goto freeret;
1465
1466         /* If we get here it's not the same disk, or something's changed,
1467          * so we need to * deregister it, and re-register it, if it's not
1468          * in use.
1469          * If the disk already exists then deregister it before proceeding
1470          * (unless it's the first disk (for the controller node).
1471          */
1472         if (h->drv[drv_index].raid_level != -1 && drv_index != 0) {
1473                 printk(KERN_WARNING "disk %d has changed.\n", drv_index);
1474                 spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1475                 h->drv[drv_index].busy_configuring = 1;
1476                 spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1477
1478                 /* deregister_disk sets h->drv[drv_index].queue = NULL
1479                  * which keeps the interrupt handler from starting
1480                  * the queue.
1481                  */
1482                 ret = deregister_disk(h, drv_index, 0);
1483                 h->drv[drv_index].busy_configuring = 0;
1484         }
1485
1486         /* If the disk is in use return */
1487         if (ret)
1488                 goto freeret;
1489
1490         /* Save the new information from cciss_geometry_inquiry
1491          * and serial number inquiry.
1492          */
1493         h->drv[drv_index].block_size = drvinfo->block_size;
1494         h->drv[drv_index].nr_blocks = drvinfo->nr_blocks;
1495         h->drv[drv_index].heads = drvinfo->heads;
1496         h->drv[drv_index].sectors = drvinfo->sectors;
1497         h->drv[drv_index].cylinders = drvinfo->cylinders;
1498         h->drv[drv_index].raid_level = drvinfo->raid_level;
1499         memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16);
1500
1501         ++h->num_luns;
1502         disk = h->gendisk[drv_index];
1503         set_capacity(disk, h->drv[drv_index].nr_blocks);
1504
1505         /* If it's not disk 0 (drv_index != 0)
1506          * or if it was disk 0, but there was previously
1507          * no actual corresponding configured logical drive
1508          * (raid_leve == -1) then we want to update the
1509          * logical drive's information.
1510          */
1511         if (drv_index || first_time)
1512                 cciss_add_disk(h, disk, drv_index);
1513
1514 freeret:
1515         kfree(inq_buff);
1516         kfree(drvinfo);
1517         return;
1518 mem_msg:
1519         printk(KERN_ERR "cciss: out of memory\n");
1520         goto freeret;
1521 }
1522
1523 /* This function will find the first index of the controllers drive array
1524  * that has a -1 for the raid_level and will return that index.  This is
1525  * where new drives will be added.  If the index to be returned is greater
1526  * than the highest_lun index for the controller then highest_lun is set
1527  * to this new index.  If there are no available indexes then -1 is returned.
1528  * "controller_node" is used to know if this is a real logical drive, or just
1529  * the controller node, which determines if this counts towards highest_lun.
1530  */
1531 static int cciss_find_free_drive_index(int ctlr, int controller_node)
1532 {
1533         int i;
1534
1535         for (i = 0; i < CISS_MAX_LUN; i++) {
1536                 if (hba[ctlr]->drv[i].raid_level == -1) {
1537                         if (i > hba[ctlr]->highest_lun)
1538                                 if (!controller_node)
1539                                         hba[ctlr]->highest_lun = i;
1540                         return i;
1541                 }
1542         }
1543         return -1;
1544 }
1545
1546 /* cciss_add_gendisk finds a free hba[]->drv structure
1547  * and allocates a gendisk if needed, and sets the lunid
1548  * in the drvinfo structure.   It returns the index into
1549  * the ->drv[] array, or -1 if none are free.
1550  * is_controller_node indicates whether highest_lun should
1551  * count this disk, or if it's only being added to provide
1552  * a means to talk to the controller in case no logical
1553  * drives have yet been configured.
1554  */
1555 static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node)
1556 {
1557         int drv_index;
1558
1559         drv_index = cciss_find_free_drive_index(h->ctlr, controller_node);
1560         if (drv_index == -1)
1561                 return -1;
1562         /*Check if the gendisk needs to be allocated */
1563         if (!h->gendisk[drv_index]) {
1564                 h->gendisk[drv_index] =
1565                         alloc_disk(1 << NWD_SHIFT);
1566                 if (!h->gendisk[drv_index]) {
1567                         printk(KERN_ERR "cciss%d: could not "
1568                                 "allocate a new disk %d\n",
1569                                 h->ctlr, drv_index);
1570                         return -1;
1571                 }
1572         }
1573         h->drv[drv_index].LunID = lunid;
1574
1575         /* Don't need to mark this busy because nobody */
1576         /* else knows about this disk yet to contend */
1577         /* for access to it. */
1578         h->drv[drv_index].busy_configuring = 0;
1579         wmb();
1580         return drv_index;
1581 }
1582
1583 /* This is for the special case of a controller which
1584  * has no logical drives.  In this case, we still need
1585  * to register a disk so the controller can be accessed
1586  * by the Array Config Utility.
1587  */
1588 static void cciss_add_controller_node(ctlr_info_t *h)
1589 {
1590         struct gendisk *disk;
1591         int drv_index;
1592
1593         if (h->gendisk[0] != NULL) /* already did this? Then bail. */
1594                 return;
1595
1596         drv_index = cciss_add_gendisk(h, 0, 1);
1597         if (drv_index == -1) {
1598                 printk(KERN_WARNING "cciss%d: could not "
1599                         "add disk 0.\n", h->ctlr);
1600                 return;
1601         }
1602         h->drv[drv_index].block_size = 512;
1603         h->drv[drv_index].nr_blocks = 0;
1604         h->drv[drv_index].heads = 0;
1605         h->drv[drv_index].sectors = 0;
1606         h->drv[drv_index].cylinders = 0;
1607         h->drv[drv_index].raid_level = -1;
1608         memset(h->drv[drv_index].serial_no, 0, 16);
1609         disk = h->gendisk[drv_index];
1610         cciss_add_disk(h, disk, drv_index);
1611 }
1612
1613 /* This function will add and remove logical drives from the Logical
1614  * drive array of the controller and maintain persistency of ordering
1615  * so that mount points are preserved until the next reboot.  This allows
1616  * for the removal of logical drives in the middle of the drive array
1617  * without a re-ordering of those drives.
1618  * INPUT
1619  * h            = The controller to perform the operations on
1620  */
1621 static int rebuild_lun_table(ctlr_info_t *h, int first_time)
1622 {
1623         int ctlr = h->ctlr;
1624         int num_luns;
1625         ReportLunData_struct *ld_buff = NULL;
1626         int return_code;
1627         int listlength = 0;
1628         int i;
1629         int drv_found;
1630         int drv_index = 0;
1631         __u32 lunid = 0;
1632         unsigned long flags;
1633
1634         if (!capable(CAP_SYS_RAWIO))
1635                 return -EPERM;
1636
1637         /* Set busy_configuring flag for this operation */
1638         spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1639         if (h->busy_configuring) {
1640                 spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1641                 return -EBUSY;
1642         }
1643         h->busy_configuring = 1;
1644         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1645
1646         ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
1647         if (ld_buff == NULL)
1648                 goto mem_msg;
1649
1650         return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
1651                                       sizeof(ReportLunData_struct), 0,
1652                                       0, 0, TYPE_CMD);
1653
1654         if (return_code == IO_OK)
1655                 listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
1656         else {  /* reading number of logical volumes failed */
1657                 printk(KERN_WARNING "cciss: report logical volume"
1658                        " command failed\n");
1659                 listlength = 0;
1660                 goto freeret;
1661         }
1662
1663         num_luns = listlength / 8;      /* 8 bytes per entry */
1664         if (num_luns > CISS_MAX_LUN) {
1665                 num_luns = CISS_MAX_LUN;
1666                 printk(KERN_WARNING "cciss: more luns configured"
1667                        " on controller than can be handled by"
1668                        " this driver.\n");
1669         }
1670
1671         if (num_luns == 0)
1672                 cciss_add_controller_node(h);
1673
1674         /* Compare controller drive array to driver's drive array
1675          * to see if any drives are missing on the controller due
1676          * to action of Array Config Utility (user deletes drive)
1677          * and deregister logical drives which have disappeared.
1678          */
1679         for (i = 0; i <= h->highest_lun; i++) {
1680                 int j;
1681                 drv_found = 0;
1682
1683                 /* skip holes in the array from already deleted drives */
1684                 if (h->drv[i].raid_level == -1)
1685                         continue;
1686
1687                 for (j = 0; j < num_luns; j++) {
1688                         memcpy(&lunid, &ld_buff->LUN[j][0], 4);
1689                         lunid = le32_to_cpu(lunid);
1690                         if (h->drv[i].LunID == lunid) {
1691                                 drv_found = 1;
1692                                 break;
1693                         }
1694                 }
1695                 if (!drv_found) {
1696                         /* Deregister it from the OS, it's gone. */
1697                         spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1698                         h->drv[i].busy_configuring = 1;
1699                         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1700                         return_code = deregister_disk(h, i, 1);
1701                         h->drv[i].busy_configuring = 0;
1702                 }
1703         }
1704
1705         /* Compare controller drive array to driver's drive array.
1706          * Check for updates in the drive information and any new drives
1707          * on the controller due to ACU adding logical drives, or changing
1708          * a logical drive's size, etc.  Reregister any new/changed drives
1709          */
1710         for (i = 0; i < num_luns; i++) {
1711                 int j;
1712
1713                 drv_found = 0;
1714
1715                 memcpy(&lunid, &ld_buff->LUN[i][0], 4);
1716                 lunid = le32_to_cpu(lunid);
1717
1718                 /* Find if the LUN is already in the drive array
1719                  * of the driver.  If so then update its info
1720                  * if not in use.  If it does not exist then find
1721                  * the first free index and add it.
1722                  */
1723                 for (j = 0; j <= h->highest_lun; j++) {
1724                         if (h->drv[j].raid_level != -1 &&
1725                                 h->drv[j].LunID == lunid) {
1726                                 drv_index = j;
1727                                 drv_found = 1;
1728                                 break;
1729                         }
1730                 }
1731
1732                 /* check if the drive was found already in the array */
1733                 if (!drv_found) {
1734                         drv_index = cciss_add_gendisk(h, lunid, 0);
1735                         if (drv_index == -1)
1736                                 goto freeret;
1737                 }
1738                 cciss_update_drive_info(ctlr, drv_index, first_time);
1739         }               /* end for */
1740
1741 freeret:
1742         kfree(ld_buff);
1743         h->busy_configuring = 0;
1744         /* We return -1 here to tell the ACU that we have registered/updated
1745          * all of the drives that we can and to keep it from calling us
1746          * additional times.
1747          */
1748         return -1;
1749 mem_msg:
1750         printk(KERN_ERR "cciss: out of memory\n");
1751         h->busy_configuring = 0;
1752         goto freeret;
1753 }
1754
1755 /* This function will deregister the disk and it's queue from the
1756  * kernel.  It must be called with the controller lock held and the
1757  * drv structures busy_configuring flag set.  It's parameters are:
1758  *
1759  * disk = This is the disk to be deregistered
1760  * drv  = This is the drive_info_struct associated with the disk to be
1761  *        deregistered.  It contains information about the disk used
1762  *        by the driver.
1763  * clear_all = This flag determines whether or not the disk information
1764  *             is going to be completely cleared out and the highest_lun
1765  *             reset.  Sometimes we want to clear out information about
1766  *             the disk in preparation for re-adding it.  In this case
1767  *             the highest_lun should be left unchanged and the LunID
1768  *             should not be cleared.
1769 */
1770 static int deregister_disk(ctlr_info_t *h, int drv_index,
1771                            int clear_all)
1772 {
1773         int i;
1774         struct gendisk *disk;
1775         drive_info_struct *drv;
1776
1777         if (!capable(CAP_SYS_RAWIO))
1778                 return -EPERM;
1779
1780         drv = &h->drv[drv_index];
1781         disk = h->gendisk[drv_index];
1782
1783         /* make sure logical volume is NOT is use */
1784         if (clear_all || (h->gendisk[0] == disk)) {
1785                 if (drv->usage_count > 1)
1786                         return -EBUSY;
1787         } else if (drv->usage_count > 0)
1788                 return -EBUSY;
1789
1790         /* invalidate the devices and deregister the disk.  If it is disk
1791          * zero do not deregister it but just zero out it's values.  This
1792          * allows us to delete disk zero but keep the controller registered.
1793          */
1794         if (h->gendisk[0] != disk) {
1795                 struct request_queue *q = disk->queue;
1796                 if (disk->flags & GENHD_FL_UP)
1797                         del_gendisk(disk);
1798                 if (q) {
1799                         blk_cleanup_queue(q);
1800                         /* Set drv->queue to NULL so that we do not try
1801                          * to call blk_start_queue on this queue in the
1802                          * interrupt handler
1803                          */
1804                         drv->queue = NULL;
1805                 }
1806                 /* If clear_all is set then we are deleting the logical
1807                  * drive, not just refreshing its info.  For drives
1808                  * other than disk 0 we will call put_disk.  We do not
1809                  * do this for disk 0 as we need it to be able to
1810                  * configure the controller.
1811                  */
1812                 if (clear_all){
1813                         /* This isn't pretty, but we need to find the
1814                          * disk in our array and NULL our the pointer.
1815                          * This is so that we will call alloc_disk if
1816                          * this index is used again later.
1817                          */
1818                         for (i=0; i < CISS_MAX_LUN; i++){
1819                                 if (h->gendisk[i] == disk) {
1820                                         h->gendisk[i] = NULL;
1821                                         break;
1822                                 }
1823                         }
1824                         put_disk(disk);
1825                 }
1826         } else {
1827                 set_capacity(disk, 0);
1828         }
1829
1830         --h->num_luns;
1831         /* zero out the disk size info */
1832         drv->nr_blocks = 0;
1833         drv->block_size = 0;
1834         drv->heads = 0;
1835         drv->sectors = 0;
1836         drv->cylinders = 0;
1837         drv->raid_level = -1;   /* This can be used as a flag variable to
1838                                  * indicate that this element of the drive
1839                                  * array is free.
1840                                  */
1841
1842         if (clear_all) {
1843                 /* check to see if it was the last disk */
1844                 if (drv == h->drv + h->highest_lun) {
1845                         /* if so, find the new hightest lun */
1846                         int i, newhighest = -1;
1847                         for (i = 0; i <= h->highest_lun; i++) {
1848                                 /* if the disk has size > 0, it is available */
1849                                 if (h->drv[i].heads)
1850                                         newhighest = i;
1851                         }
1852                         h->highest_lun = newhighest;
1853                 }
1854
1855                 drv->LunID = 0;
1856         }
1857         return 0;
1858 }
1859
1860 static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,      /* 0: address the controller,
1861                                                                                                                            1: address logical volume log_unit,
1862                                                                                                                            2: periph device address is scsi3addr */
1863                     unsigned int log_unit, __u8 page_code,
1864                     unsigned char *scsi3addr, int cmd_type)
1865 {
1866         ctlr_info_t *h = hba[ctlr];
1867         u64bit buff_dma_handle;
1868         int status = IO_OK;
1869
1870         c->cmd_type = CMD_IOCTL_PEND;
1871         c->Header.ReplyQueue = 0;
1872         if (buff != NULL) {
1873                 c->Header.SGList = 1;
1874                 c->Header.SGTotal = 1;
1875         } else {
1876                 c->Header.SGList = 0;
1877                 c->Header.SGTotal = 0;
1878         }
1879         c->Header.Tag.lower = c->busaddr;
1880
1881         c->Request.Type.Type = cmd_type;
1882         if (cmd_type == TYPE_CMD) {
1883                 switch (cmd) {
1884                 case CISS_INQUIRY:
1885                         /* If the logical unit number is 0 then, this is going
1886                            to controller so It's a physical command
1887                            mode = 0 target = 0.  So we have nothing to write.
1888                            otherwise, if use_unit_num == 1,
1889                            mode = 1(volume set addressing) target = LUNID
1890                            otherwise, if use_unit_num == 2,
1891                            mode = 0(periph dev addr) target = scsi3addr */
1892                         if (use_unit_num == 1) {
1893                                 c->Header.LUN.LogDev.VolId =
1894                                     h->drv[log_unit].LunID;
1895                                 c->Header.LUN.LogDev.Mode = 1;
1896                         } else if (use_unit_num == 2) {
1897                                 memcpy(c->Header.LUN.LunAddrBytes, scsi3addr,
1898                                        8);
1899                                 c->Header.LUN.LogDev.Mode = 0;
1900                         }
1901                         /* are we trying to read a vital product page */
1902                         if (page_code != 0) {
1903                                 c->Request.CDB[1] = 0x01;
1904                                 c->Request.CDB[2] = page_code;
1905                         }
1906                         c->Request.CDBLen = 6;
1907                         c->Request.Type.Attribute = ATTR_SIMPLE;
1908                         c->Request.Type.Direction = XFER_READ;
1909                         c->Request.Timeout = 0;
1910                         c->Request.CDB[0] = CISS_INQUIRY;
1911                         c->Request.CDB[4] = size & 0xFF;
1912                         break;
1913                 case CISS_REPORT_LOG:
1914                 case CISS_REPORT_PHYS:
1915                         /* Talking to controller so It's a physical command
1916                            mode = 00 target = 0.  Nothing to write.
1917                          */
1918                         c->Request.CDBLen = 12;
1919                         c->Request.Type.Attribute = ATTR_SIMPLE;
1920                         c->Request.Type.Direction = XFER_READ;
1921                         c->Request.Timeout = 0;
1922                         c->Request.CDB[0] = cmd;
1923                         c->Request.CDB[6] = (size >> 24) & 0xFF;        //MSB
1924                         c->Request.CDB[7] = (size >> 16) & 0xFF;
1925                         c->Request.CDB[8] = (size >> 8) & 0xFF;
1926                         c->Request.CDB[9] = size & 0xFF;
1927                         break;
1928
1929                 case CCISS_READ_CAPACITY:
1930                         c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
1931                         c->Header.LUN.LogDev.Mode = 1;
1932                         c->Request.CDBLen = 10;
1933                         c->Request.Type.Attribute = ATTR_SIMPLE;
1934                         c->Request.Type.Direction = XFER_READ;
1935                         c->Request.Timeout = 0;
1936                         c->Request.CDB[0] = cmd;
1937                         break;
1938                 case CCISS_READ_CAPACITY_16:
1939                         c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
1940                         c->Header.LUN.LogDev.Mode = 1;
1941                         c->Request.CDBLen = 16;
1942                         c->Request.Type.Attribute = ATTR_SIMPLE;
1943                         c->Request.Type.Direction = XFER_READ;
1944                         c->Request.Timeout = 0;
1945                         c->Request.CDB[0] = cmd;
1946                         c->Request.CDB[1] = 0x10;
1947                         c->Request.CDB[10] = (size >> 24) & 0xFF;
1948                         c->Request.CDB[11] = (size >> 16) & 0xFF;
1949                         c->Request.CDB[12] = (size >> 8) & 0xFF;
1950                         c->Request.CDB[13] = size & 0xFF;
1951                         c->Request.Timeout = 0;
1952                         c->Request.CDB[0] = cmd;
1953                         break;
1954                 case CCISS_CACHE_FLUSH:
1955                         c->Request.CDBLen = 12;
1956                         c->Request.Type.Attribute = ATTR_SIMPLE;
1957                         c->Request.Type.Direction = XFER_WRITE;
1958                         c->Request.Timeout = 0;
1959                         c->Request.CDB[0] = BMIC_WRITE;
1960                         c->Request.CDB[6] = BMIC_CACHE_FLUSH;
1961                         break;
1962                 default:
1963                         printk(KERN_WARNING
1964                                "cciss%d:  Unknown Command 0x%c\n", ctlr, cmd);
1965                         return IO_ERROR;
1966                 }
1967         } else if (cmd_type == TYPE_MSG) {
1968                 switch (cmd) {
1969                 case 0: /* ABORT message */
1970                         c->Request.CDBLen = 12;
1971                         c->Request.Type.Attribute = ATTR_SIMPLE;
1972                         c->Request.Type.Direction = XFER_WRITE;
1973                         c->Request.Timeout = 0;
1974                         c->Request.CDB[0] = cmd;        /* abort */
1975                         c->Request.CDB[1] = 0;  /* abort a command */
1976                         /* buff contains the tag of the command to abort */
1977                         memcpy(&c->Request.CDB[4], buff, 8);
1978                         break;
1979                 case 1: /* RESET message */
1980                         c->Request.CDBLen = 12;
1981                         c->Request.Type.Attribute = ATTR_SIMPLE;
1982                         c->Request.Type.Direction = XFER_WRITE;
1983                         c->Request.Timeout = 0;
1984                         memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
1985                         c->Request.CDB[0] = cmd;        /* reset */
1986                         c->Request.CDB[1] = 0x04;       /* reset a LUN */
1987                         break;
1988                 case 3: /* No-Op message */
1989                         c->Request.CDBLen = 1;
1990                         c->Request.Type.Attribute = ATTR_SIMPLE;
1991                         c->Request.Type.Direction = XFER_WRITE;
1992                         c->Request.Timeout = 0;
1993                         c->Request.CDB[0] = cmd;
1994                         break;
1995                 default:
1996                         printk(KERN_WARNING
1997                                "cciss%d: unknown message type %d\n", ctlr, cmd);
1998                         return IO_ERROR;
1999                 }
2000         } else {
2001                 printk(KERN_WARNING
2002                        "cciss%d: unknown command type %d\n", ctlr, cmd_type);
2003                 return IO_ERROR;
2004         }
2005         /* Fill in the scatter gather information */
2006         if (size > 0) {
2007                 buff_dma_handle.val = (__u64) pci_map_single(h->pdev,
2008                                                              buff, size,
2009                                                              PCI_DMA_BIDIRECTIONAL);
2010                 c->SG[0].Addr.lower = buff_dma_handle.val32.lower;
2011                 c->SG[0].Addr.upper = buff_dma_handle.val32.upper;
2012                 c->SG[0].Len = size;
2013                 c->SG[0].Ext = 0;       /* we are not chaining */
2014         }
2015         return status;
2016 }
2017
2018 static int sendcmd_withirq(__u8 cmd,
2019                            int ctlr,
2020                            void *buff,
2021                            size_t size,
2022                            unsigned int use_unit_num,
2023                            unsigned int log_unit, __u8 page_code, int cmd_type)
2024 {
2025         ctlr_info_t *h = hba[ctlr];
2026         CommandList_struct *c;
2027         u64bit buff_dma_handle;
2028         unsigned long flags;
2029         int return_status;
2030         DECLARE_COMPLETION_ONSTACK(wait);
2031
2032         if ((c = cmd_alloc(h, 0)) == NULL)
2033                 return -ENOMEM;
2034         return_status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
2035                                  log_unit, page_code, NULL, cmd_type);
2036         if (return_status != IO_OK) {
2037                 cmd_free(h, c, 0);
2038                 return return_status;
2039         }
2040       resend_cmd2:
2041         c->waiting = &wait;
2042
2043         /* Put the request on the tail of the queue and send it */
2044         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
2045         addQ(&h->reqQ, c);
2046         h->Qdepth++;
2047         start_io(h);
2048         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
2049
2050         wait_for_completion(&wait);
2051
2052         if (c->err_info->CommandStatus != 0) {  /* an error has occurred */
2053                 switch (c->err_info->CommandStatus) {
2054                 case CMD_TARGET_STATUS:
2055                         printk(KERN_WARNING "cciss: cmd %p has "
2056                                " completed with errors\n", c);
2057                         if (c->err_info->ScsiStatus) {
2058                                 printk(KERN_WARNING "cciss: cmd %p "
2059                                        "has SCSI Status = %x\n",
2060                                        c, c->err_info->ScsiStatus);
2061                         }
2062
2063                         break;
2064                 case CMD_DATA_UNDERRUN:
2065                 case CMD_DATA_OVERRUN:
2066                         /* expected for inquire and report lun commands */
2067                         break;
2068                 case CMD_INVALID:
2069                         printk(KERN_WARNING "cciss: Cmd %p is "
2070                                "reported invalid\n", c);
2071                         return_status = IO_ERROR;
2072                         break;
2073                 case CMD_PROTOCOL_ERR:
2074                         printk(KERN_WARNING "cciss: cmd %p has "
2075                                "protocol error \n", c);
2076                         return_status = IO_ERROR;
2077                         break;
2078                 case CMD_HARDWARE_ERR:
2079                         printk(KERN_WARNING "cciss: cmd %p had "
2080                                " hardware error\n", c);
2081                         return_status = IO_ERROR;
2082                         break;
2083                 case CMD_CONNECTION_LOST:
2084                         printk(KERN_WARNING "cciss: cmd %p had "
2085                                "connection lost\n", c);
2086                         return_status = IO_ERROR;
2087                         break;
2088                 case CMD_ABORTED:
2089                         printk(KERN_WARNING "cciss: cmd %p was "
2090                                "aborted\n", c);
2091                         return_status = IO_ERROR;
2092                         break;
2093                 case CMD_ABORT_FAILED:
2094                         printk(KERN_WARNING "cciss: cmd %p reports "
2095                                "abort failed\n", c);
2096                         return_status = IO_ERROR;
2097                         break;
2098                 case CMD_UNSOLICITED_ABORT:
2099                         printk(KERN_WARNING
2100                                "cciss%d: unsolicited abort %p\n", ctlr, c);
2101                         if (c->retry_count < MAX_CMD_RETRIES) {
2102                                 printk(KERN_WARNING
2103                                        "cciss%d: retrying %p\n", ctlr, c);
2104                                 c->retry_count++;
2105                                 /* erase the old error information */
2106                                 memset(c->err_info, 0,
2107                                        sizeof(ErrorInfo_struct));
2108                                 return_status = IO_OK;
2109                                 INIT_COMPLETION(wait);
2110                                 goto resend_cmd2;
2111                         }
2112                         return_status = IO_ERROR;
2113                         break;
2114                 default:
2115                         printk(KERN_WARNING "cciss: cmd %p returned "
2116                                "unknown status %x\n", c,
2117                                c->err_info->CommandStatus);
2118                         return_status = IO_ERROR;
2119                 }
2120         }
2121         /* unlock the buffers from DMA */
2122         buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
2123         buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
2124         pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
2125                          c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
2126         cmd_free(h, c, 0);
2127         return return_status;
2128 }
2129
2130 static void cciss_geometry_inquiry(int ctlr, int logvol,
2131                                    int withirq, sector_t total_size,
2132                                    unsigned int block_size,
2133                                    InquiryData_struct *inq_buff,
2134                                    drive_info_struct *drv)
2135 {
2136         int return_code;
2137         unsigned long t;
2138
2139         memset(inq_buff, 0, sizeof(InquiryData_struct));
2140         if (withirq)
2141                 return_code = sendcmd_withirq(CISS_INQUIRY, ctlr,
2142                                               inq_buff, sizeof(*inq_buff), 1,
2143                                               logvol, 0xC1, TYPE_CMD);
2144         else
2145                 return_code = sendcmd(CISS_INQUIRY, ctlr, inq_buff,
2146                                       sizeof(*inq_buff), 1, logvol, 0xC1, NULL,
2147                                       TYPE_CMD);
2148         if (return_code == IO_OK) {
2149                 if (inq_buff->data_byte[8] == 0xFF) {
2150                         printk(KERN_WARNING
2151                                "cciss: reading geometry failed, volume "
2152                                "does not support reading geometry\n");
2153                         drv->heads = 255;
2154                         drv->sectors = 32;      // Sectors per track
2155                         drv->cylinders = total_size + 1;
2156                         drv->raid_level = RAID_UNKNOWN;
2157                 } else {
2158                         drv->heads = inq_buff->data_byte[6];
2159                         drv->sectors = inq_buff->data_byte[7];
2160                         drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8;
2161                         drv->cylinders += inq_buff->data_byte[5];
2162                         drv->raid_level = inq_buff->data_byte[8];
2163                 }
2164                 drv->block_size = block_size;
2165                 drv->nr_blocks = total_size + 1;
2166                 t = drv->heads * drv->sectors;
2167                 if (t > 1) {
2168                         sector_t real_size = total_size + 1;
2169                         unsigned long rem = sector_div(real_size, t);
2170                         if (rem)
2171                                 real_size++;
2172                         drv->cylinders = real_size;
2173                 }
2174         } else {                /* Get geometry failed */
2175                 printk(KERN_WARNING "cciss: reading geometry failed\n");
2176         }
2177         printk(KERN_INFO "      heads=%d, sectors=%d, cylinders=%d\n\n",
2178                drv->heads, drv->sectors, drv->cylinders);
2179 }
2180
2181 static void
2182 cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
2183                     unsigned int *block_size)
2184 {
2185         ReadCapdata_struct *buf;
2186         int return_code;
2187
2188         buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
2189         if (!buf) {
2190                 printk(KERN_WARNING "cciss: out of memory\n");
2191                 return;
2192         }
2193
2194         if (withirq)
2195                 return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
2196                                 ctlr, buf, sizeof(ReadCapdata_struct),
2197                                         1, logvol, 0, TYPE_CMD);
2198         else
2199                 return_code = sendcmd(CCISS_READ_CAPACITY,
2200                                 ctlr, buf, sizeof(ReadCapdata_struct),
2201                                         1, logvol, 0, NULL, TYPE_CMD);
2202         if (return_code == IO_OK) {
2203                 *total_size = be32_to_cpu(*(__be32 *) buf->total_size);
2204                 *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
2205         } else {                /* read capacity command failed */
2206                 printk(KERN_WARNING "cciss: read capacity failed\n");
2207                 *total_size = 0;
2208                 *block_size = BLOCK_SIZE;
2209         }
2210         if (*total_size != 0)
2211                 printk(KERN_INFO "      blocks= %llu block_size= %d\n",
2212                 (unsigned long long)*total_size+1, *block_size);
2213         kfree(buf);
2214 }
2215
2216 static void
2217 cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,                                 unsigned int *block_size)
2218 {
2219         ReadCapdata_struct_16 *buf;
2220         int return_code;
2221
2222         buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
2223         if (!buf) {
2224                 printk(KERN_WARNING "cciss: out of memory\n");
2225                 return;
2226         }
2227
2228         if (withirq) {
2229                 return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
2230                         ctlr, buf, sizeof(ReadCapdata_struct_16),
2231                                 1, logvol, 0, TYPE_CMD);
2232         }
2233         else {
2234                 return_code = sendcmd(CCISS_READ_CAPACITY_16,
2235                         ctlr, buf, sizeof(ReadCapdata_struct_16),
2236                                 1, logvol, 0, NULL, TYPE_CMD);
2237         }
2238         if (return_code == IO_OK) {
2239                 *total_size = be64_to_cpu(*(__be64 *) buf->total_size);
2240                 *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
2241         } else {                /* read capacity command failed */
2242                 printk(KERN_WARNING "cciss: read capacity failed\n");
2243                 *total_size = 0;
2244                 *block_size = BLOCK_SIZE;
2245         }
2246         printk(KERN_INFO "      blocks= %llu block_size= %d\n",
2247                (unsigned long long)*total_size+1, *block_size);
2248         kfree(buf);
2249 }
2250
2251 static int cciss_revalidate(struct gendisk *disk)
2252 {
2253         ctlr_info_t *h = get_host(disk);
2254         drive_info_struct *drv = get_drv(disk);
2255         int logvol;
2256         int FOUND = 0;
2257         unsigned int block_size;
2258         sector_t total_size;
2259         InquiryData_struct *inq_buff = NULL;
2260
2261         for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
2262                 if (h->drv[logvol].LunID == drv->LunID) {
2263                         FOUND = 1;
2264                         break;
2265                 }
2266         }
2267
2268         if (!FOUND)
2269                 return 1;
2270
2271         inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
2272         if (inq_buff == NULL) {
2273                 printk(KERN_WARNING "cciss: out of memory\n");
2274                 return 1;
2275         }
2276         if (h->cciss_read == CCISS_READ_10) {
2277                 cciss_read_capacity(h->ctlr, logvol, 1,
2278                                         &total_size, &block_size);
2279         } else {
2280                 cciss_read_capacity_16(h->ctlr, logvol, 1,
2281                                         &total_size, &block_size);
2282         }
2283         cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size,
2284                                inq_buff, drv);
2285
2286         blk_queue_hardsect_size(drv->queue, drv->block_size);
2287         set_capacity(disk, drv->nr_blocks);
2288
2289         kfree(inq_buff);
2290         return 0;
2291 }
2292
2293 /*
2294  *   Wait polling for a command to complete.
2295  *   The memory mapped FIFO is polled for the completion.
2296  *   Used only at init time, interrupts from the HBA are disabled.
2297  */
2298 static unsigned long pollcomplete(int ctlr)
2299 {
2300         unsigned long done;
2301         int i;
2302
2303         /* Wait (up to 20 seconds) for a command to complete */
2304
2305         for (i = 20 * HZ; i > 0; i--) {
2306                 done = hba[ctlr]->access.command_completed(hba[ctlr]);
2307                 if (done == FIFO_EMPTY)
2308                         schedule_timeout_uninterruptible(1);
2309                 else
2310                         return done;
2311         }
2312         /* Invalid address to tell caller we ran out of time */
2313         return 1;
2314 }
2315
2316 static int add_sendcmd_reject(__u8 cmd, int ctlr, unsigned long complete)
2317 {
2318         /* We get in here if sendcmd() is polling for completions
2319            and gets some command back that it wasn't expecting --
2320            something other than that which it just sent down.
2321            Ordinarily, that shouldn't happen, but it can happen when
2322            the scsi tape stuff gets into error handling mode, and
2323            starts using sendcmd() to try to abort commands and
2324            reset tape drives.  In that case, sendcmd may pick up
2325            completions of commands that were sent to logical drives
2326            through the block i/o system, or cciss ioctls completing, etc.
2327            In that case, we need to save those completions for later
2328            processing by the interrupt handler.
2329          */
2330
2331 #ifdef CONFIG_CISS_SCSI_TAPE
2332         struct sendcmd_reject_list *srl = &hba[ctlr]->scsi_rejects;
2333
2334         /* If it's not the scsi tape stuff doing error handling, (abort */
2335         /* or reset) then we don't expect anything weird. */
2336         if (cmd != CCISS_RESET_MSG && cmd != CCISS_ABORT_MSG) {
2337 #endif
2338                 printk(KERN_WARNING "cciss cciss%d: SendCmd "
2339                        "Invalid command list address returned! (%lx)\n",
2340                        ctlr, complete);
2341                 /* not much we can do. */
2342 #ifdef CONFIG_CISS_SCSI_TAPE
2343                 return 1;
2344         }
2345
2346         /* We've sent down an abort or reset, but something else
2347            has completed */
2348         if (srl->ncompletions >= (hba[ctlr]->nr_cmds + 2)) {
2349                 /* Uh oh.  No room to save it for later... */
2350                 printk(KERN_WARNING "cciss%d: Sendcmd: Invalid command addr, "
2351                        "reject list overflow, command lost!\n", ctlr);
2352                 return 1;
2353         }
2354         /* Save it for later */
2355         srl->complete[srl->ncompletions] = complete;
2356         srl->ncompletions++;
2357 #endif
2358         return 0;
2359 }
2360
2361 /*
2362  * Send a command to the controller, and wait for it to complete.
2363  * Only used at init time.
2364  */
2365 static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,      /* 0: address the controller,
2366                                                                                                    1: address logical volume log_unit,
2367                                                                                                    2: periph device address is scsi3addr */
2368                    unsigned int log_unit,
2369                    __u8 page_code, unsigned char *scsi3addr, int cmd_type)
2370 {
2371         CommandList_struct *c;
2372         int i;
2373         unsigned long complete;
2374         ctlr_info_t *info_p = hba[ctlr];
2375         u64bit buff_dma_handle;
2376         int status, done = 0;
2377
2378         if ((c = cmd_alloc(info_p, 1)) == NULL) {
2379                 printk(KERN_WARNING "cciss: unable to get memory");
2380                 return IO_ERROR;
2381         }
2382         status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
2383                           log_unit, page_code, scsi3addr, cmd_type);
2384         if (status != IO_OK) {
2385                 cmd_free(info_p, c, 1);
2386                 return status;
2387         }
2388       resend_cmd1:
2389         /*
2390          * Disable interrupt
2391          */
2392 #ifdef CCISS_DEBUG
2393         printk(KERN_DEBUG "cciss: turning intr off\n");
2394 #endif                          /* CCISS_DEBUG */
2395         info_p->access.set_intr_mask(info_p, CCISS_INTR_OFF);
2396
2397         /* Make sure there is room in the command FIFO */
2398         /* Actually it should be completely empty at this time */
2399         /* unless we are in here doing error handling for the scsi */
2400         /* tape side of the driver. */
2401         for (i = 200000; i > 0; i--) {
2402                 /* if fifo isn't full go */
2403                 if (!(info_p->access.fifo_full(info_p))) {
2404
2405                         break;
2406                 }
2407                 udelay(10);
2408                 printk(KERN_WARNING "cciss cciss%d: SendCmd FIFO full,"
2409                        " waiting!\n", ctlr);
2410         }
2411         /*
2412          * Send the cmd
2413          */
2414         info_p->access.submit_command(info_p, c);
2415         done = 0;
2416         do {
2417                 complete = pollcomplete(ctlr);
2418
2419 #ifdef CCISS_DEBUG
2420                 printk(KERN_DEBUG "cciss: command completed\n");
2421 #endif                          /* CCISS_DEBUG */
2422
2423                 if (complete == 1) {
2424                         printk(KERN_WARNING
2425                                "cciss cciss%d: SendCmd Timeout out, "
2426                                "No command list address returned!\n", ctlr);
2427                         status = IO_ERROR;
2428                         done = 1;
2429                         break;
2430                 }
2431
2432                 /* This will need to change for direct lookup completions */
2433                 if ((complete & CISS_ERROR_BIT)
2434                     && (complete & ~CISS_ERROR_BIT) == c->busaddr) {
2435                         /* if data overrun or underun on Report command
2436                            ignore it
2437                          */
2438                         if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
2439                              (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
2440                              (c->Request.CDB[0] == CISS_INQUIRY)) &&
2441                             ((c->err_info->CommandStatus ==
2442                               CMD_DATA_OVERRUN) ||
2443                              (c->err_info->CommandStatus == CMD_DATA_UNDERRUN)
2444                             )) {
2445                                 complete = c->busaddr;
2446                         } else {
2447                                 if (c->err_info->CommandStatus ==
2448                                     CMD_UNSOLICITED_ABORT) {
2449                                         printk(KERN_WARNING "cciss%d: "
2450                                                "unsolicited abort %p\n",
2451                                                ctlr, c);
2452                                         if (c->retry_count < MAX_CMD_RETRIES) {
2453                                                 printk(KERN_WARNING
2454                                                        "cciss%d: retrying %p\n",
2455                                                        ctlr, c);
2456                                                 c->retry_count++;
2457                                                 /* erase the old error */
2458                                                 /* information */
2459                                                 memset(c->err_info, 0,
2460                                                        sizeof
2461                                                        (ErrorInfo_struct));
2462                                                 goto resend_cmd1;
2463                                         } else {
2464                                                 printk(KERN_WARNING
2465                                                        "cciss%d: retried %p too "
2466                                                        "many times\n", ctlr, c);
2467                                                 status = IO_ERROR;
2468                                                 goto cleanup1;
2469                                         }
2470                                 } else if (c->err_info->CommandStatus ==
2471                                            CMD_UNABORTABLE) {
2472                                         printk(KERN_WARNING
2473                                                "cciss%d: command could not be aborted.\n",
2474                                                ctlr);
2475                                         status = IO_ERROR;
2476                                         goto cleanup1;
2477                                 }
2478                                 printk(KERN_WARNING "ciss ciss%d: sendcmd"
2479                                        " Error %x \n", ctlr,
2480                                        c->err_info->CommandStatus);
2481                                 printk(KERN_WARNING "ciss ciss%d: sendcmd"
2482                                        " offensive info\n"
2483                                        "  size %x\n   num %x   value %x\n",
2484                                        ctlr,
2485                                        c->err_info->MoreErrInfo.Invalid_Cmd.
2486                                        offense_size,
2487                                        c->err_info->MoreErrInfo.Invalid_Cmd.
2488                                        offense_num,
2489                                        c->err_info->MoreErrInfo.Invalid_Cmd.
2490                                        offense_value);
2491                                 status = IO_ERROR;
2492                                 goto cleanup1;
2493                         }
2494                 }
2495                 /* This will need changing for direct lookup completions */
2496                 if (complete != c->busaddr) {
2497                         if (add_sendcmd_reject(cmd, ctlr, complete) != 0) {
2498                                 BUG();  /* we are pretty much hosed if we get here. */
2499                         }
2500                         continue;
2501                 } else
2502                         done = 1;
2503         } while (!done);
2504
2505       cleanup1:
2506         /* unlock the data buffer from DMA */
2507         buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
2508         buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
2509         pci_unmap_single(info_p->pdev, (dma_addr_t) buff_dma_handle.val,
2510                          c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
2511 #ifdef CONFIG_CISS_SCSI_TAPE
2512         /* if we saved some commands for later, process them now. */
2513         if (info_p->scsi_rejects.ncompletions > 0)
2514                 do_cciss_intr(0, info_p);
2515 #endif
2516         cmd_free(info_p, c, 1);
2517         return status;
2518 }
2519
2520 /*
2521  * Map (physical) PCI mem into (virtual) kernel space
2522  */
2523 static void __iomem *remap_pci_mem(ulong base, ulong size)
2524 {
2525         ulong page_base = ((ulong) base) & PAGE_MASK;
2526         ulong page_offs = ((ulong) base) - page_base;
2527         void __iomem *page_remapped = ioremap(page_base, page_offs + size);
2528
2529         return page_remapped ? (page_remapped + page_offs) : NULL;
2530 }
2531
2532 /*
2533  * Takes jobs of the Q and sends them to the hardware, then puts it on
2534  * the Q to wait for completion.
2535  */
2536 static void start_io(ctlr_info_t *h)
2537 {
2538         CommandList_struct *c;
2539
2540         while (!hlist_empty(&h->reqQ)) {
2541                 c = hlist_entry(h->reqQ.first, CommandList_struct, list);
2542                 /* can't do anything if fifo is full */
2543                 if ((h->access.fifo_full(h))) {
2544                         printk(KERN_WARNING "cciss: fifo full\n");
2545                         break;
2546                 }
2547
2548                 /* Get the first entry from the Request Q */
2549                 removeQ(c);
2550                 h->Qdepth--;
2551
2552                 /* Tell the controller execute command */
2553                 h->access.submit_command(h, c);
2554
2555                 /* Put job onto the completed Q */
2556                 addQ(&h->cmpQ, c);
2557         }
2558 }
2559
2560 /* Assumes that CCISS_LOCK(h->ctlr) is held. */
2561 /* Zeros out the error record and then resends the command back */
2562 /* to the controller */
2563 static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
2564 {
2565         /* erase the old error information */
2566         memset(c->err_info, 0, sizeof(ErrorInfo_struct));
2567
2568         /* add it to software queue and then send it to the controller */
2569         addQ(&h->reqQ, c);
2570         h->Qdepth++;
2571         if (h->Qdepth > h->maxQsinceinit)
2572                 h->maxQsinceinit = h->Qdepth;
2573
2574         start_io(h);
2575 }
2576
2577 static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
2578         unsigned int msg_byte, unsigned int host_byte,
2579         unsigned int driver_byte)
2580 {
2581         /* inverse of macros in scsi.h */
2582         return (scsi_status_byte & 0xff) |
2583                 ((msg_byte & 0xff) << 8) |
2584                 ((host_byte & 0xff) << 16) |
2585                 ((driver_byte & 0xff) << 24);
2586 }
2587
2588 static inline int evaluate_target_status(CommandList_struct *cmd)
2589 {
2590         unsigned char sense_key;
2591         unsigned char status_byte, msg_byte, host_byte, driver_byte;
2592         int error_value;
2593
2594         /* If we get in here, it means we got "target status", that is, scsi status */
2595         status_byte = cmd->err_info->ScsiStatus;
2596         driver_byte = DRIVER_OK;
2597         msg_byte = cmd->err_info->CommandStatus; /* correct?  seems too device specific */
2598
2599         if (blk_pc_request(cmd->rq))
2600                 host_byte = DID_PASSTHROUGH;
2601         else
2602                 host_byte = DID_OK;
2603
2604         error_value = make_status_bytes(status_byte, msg_byte,
2605                 host_byte, driver_byte);
2606
2607         if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) {
2608                 if (!blk_pc_request(cmd->rq))
2609                         printk(KERN_WARNING "cciss: cmd %p "
2610                                "has SCSI Status 0x%x\n",
2611                                cmd, cmd->err_info->ScsiStatus);
2612                 return error_value;
2613         }
2614
2615         /* check the sense key */
2616         sense_key = 0xf & cmd->err_info->SenseInfo[2];
2617         /* no status or recovered error */
2618         if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
2619                 error_value = 0;
2620
2621         if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
2622                 if (error_value != 0)
2623                         printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
2624                                " sense key = 0x%x\n", cmd, sense_key);
2625                 return error_value;
2626         }
2627
2628         /* SG_IO or similar, copy sense data back */
2629         if (cmd->rq->sense) {
2630                 if (cmd->rq->sense_len > cmd->err_info->SenseLen)
2631                         cmd->rq->sense_len = cmd->err_info->SenseLen;
2632                 memcpy(cmd->rq->sense, cmd->err_info->SenseInfo,
2633                         cmd->rq->sense_len);
2634         } else
2635                 cmd->rq->sense_len = 0;
2636
2637         return error_value;
2638 }
2639
2640 /* checks the status of the job and calls complete buffers to mark all
2641  * buffers for the completed job. Note that this function does not need
2642  * to hold the hba/queue lock.
2643  */
2644 static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
2645                                     int timeout)
2646 {
2647         int retry_cmd = 0;
2648         struct request *rq = cmd->rq;
2649
2650         rq->errors = 0;
2651
2652         if (timeout)
2653                 rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
2654
2655         if (cmd->err_info->CommandStatus == 0)  /* no error has occurred */
2656                 goto after_error_processing;
2657
2658         switch (cmd->err_info->CommandStatus) {
2659         case CMD_TARGET_STATUS:
2660                 rq->errors = evaluate_target_status(cmd);
2661                 break;
2662         case CMD_DATA_UNDERRUN:
2663                 if (blk_fs_request(cmd->rq)) {
2664                         printk(KERN_WARNING "cciss: cmd %p has"
2665                                " completed with data underrun "
2666                                "reported\n", cmd);
2667                         cmd->rq->data_len = cmd->err_info->ResidualCnt;
2668                 }
2669                 break;
2670         case CMD_DATA_OVERRUN:
2671                 if (blk_fs_request(cmd->rq))
2672                         printk(KERN_WARNING "cciss: cmd %p has"
2673                                " completed with data overrun "
2674                                "reported\n", cmd);
2675                 break;
2676         case CMD_INVALID:
2677                 printk(KERN_WARNING "cciss: cmd %p is "
2678                        "reported invalid\n", cmd);
2679                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2680                         cmd->err_info->CommandStatus, DRIVER_OK,
2681                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2682                 break;
2683         case CMD_PROTOCOL_ERR:
2684                 printk(KERN_WARNING "cciss: cmd %p has "
2685                        "protocol error \n", cmd);
2686                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2687                         cmd->err_info->CommandStatus, DRIVER_OK,
2688                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2689                 break;
2690         case CMD_HARDWARE_ERR:
2691                 printk(KERN_WARNING "cciss: cmd %p had "
2692                        " hardware error\n", cmd);
2693                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2694                         cmd->err_info->CommandStatus, DRIVER_OK,
2695                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2696                 break;
2697         case CMD_CONNECTION_LOST:
2698                 printk(KERN_WARNING "cciss: cmd %p had "
2699                        "connection lost\n", cmd);
2700                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2701                         cmd->err_info->CommandStatus, DRIVER_OK,
2702                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2703                 break;
2704         case CMD_ABORTED:
2705                 printk(KERN_WARNING "cciss: cmd %p was "
2706                        "aborted\n", cmd);
2707                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2708                         cmd->err_info->CommandStatus, DRIVER_OK,
2709                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
2710                 break;
2711         case CMD_ABORT_FAILED:
2712                 printk(KERN_WARNING "cciss: cmd %p reports "
2713                        "abort failed\n", cmd);
2714                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2715                         cmd->err_info->CommandStatus, DRIVER_OK,
2716                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2717                 break;
2718         case CMD_UNSOLICITED_ABORT:
2719                 printk(KERN_WARNING "cciss%d: unsolicited "
2720                        "abort %p\n", h->ctlr, cmd);
2721                 if (cmd->retry_count < MAX_CMD_RETRIES) {
2722                         retry_cmd = 1;
2723                         printk(KERN_WARNING
2724                                "cciss%d: retrying %p\n", h->ctlr, cmd);
2725                         cmd->retry_count++;
2726                 } else
2727                         printk(KERN_WARNING
2728                                "cciss%d: %p retried too "
2729                                "many times\n", h->ctlr, cmd);
2730                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2731                         cmd->err_info->CommandStatus, DRIVER_OK,
2732                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
2733                 break;
2734         case CMD_TIMEOUT:
2735                 printk(KERN_WARNING "cciss: cmd %p timedout\n", cmd);
2736                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2737                         cmd->err_info->CommandStatus, DRIVER_OK,
2738                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2739                 break;
2740         default:
2741                 printk(KERN_WARNING "cciss: cmd %p returned "
2742                        "unknown status %x\n", cmd,
2743                        cmd->err_info->CommandStatus);
2744                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2745                         cmd->err_info->CommandStatus, DRIVER_OK,
2746                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2747         }
2748
2749 after_error_processing:
2750
2751         /* We need to return this command */
2752         if (retry_cmd) {
2753                 resend_cciss_cmd(h, cmd);
2754                 return;
2755         }
2756         cmd->rq->completion_data = cmd;
2757         blk_complete_request(cmd->rq);
2758 }
2759
2760 /*
2761  * Get a request and submit it to the controller.
2762  */
2763 static void do_cciss_request(struct request_queue *q)
2764 {
2765         ctlr_info_t *h = q->queuedata;
2766         CommandList_struct *c;
2767         sector_t start_blk;
2768         int seg;
2769         struct request *creq;
2770         u64bit temp64;
2771         struct scatterlist tmp_sg[MAXSGENTRIES];
2772         drive_info_struct *drv;
2773         int i, dir;
2774
2775         /* We call start_io here in case there is a command waiting on the
2776          * queue that has not been sent.
2777          */
2778         if (blk_queue_plugged(q))
2779                 goto startio;
2780
2781       queue:
2782         creq = elv_next_request(q);
2783         if (!creq)
2784                 goto startio;
2785
2786         BUG_ON(creq->nr_phys_segments > MAXSGENTRIES);
2787
2788         if ((c = cmd_alloc(h, 1)) == NULL)
2789                 goto full;
2790
2791         blkdev_dequeue_request(creq);
2792
2793         spin_unlock_irq(q->queue_lock);
2794
2795         c->cmd_type = CMD_RWREQ;
2796         c->rq = creq;
2797
2798         /* fill in the request */
2799         drv = creq->rq_disk->private_data;
2800         c->Header.ReplyQueue = 0;       // unused in simple mode
2801         /* got command from pool, so use the command block index instead */
2802         /* for direct lookups. */
2803         /* The first 2 bits are reserved for controller error reporting. */
2804         c->Header.Tag.lower = (c->cmdindex << 3);
2805         c->Header.Tag.lower |= 0x04;    /* flag for direct lookup. */
2806         c->Header.LUN.LogDev.VolId = drv->LunID;
2807         c->Header.LUN.LogDev.Mode = 1;
2808         c->Request.CDBLen = 10; // 12 byte commands not in FW yet;
2809         c->Request.Type.Type = TYPE_CMD;        // It is a command.
2810         c->Request.Type.Attribute = ATTR_SIMPLE;
2811         c->Request.Type.Direction =
2812             (rq_data_dir(creq) == READ) ? XFER_READ : XFER_WRITE;
2813         c->Request.Timeout = 0; // Don't time out
2814         c->Request.CDB[0] =
2815             (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
2816         start_blk = creq->sector;
2817 #ifdef CCISS_DEBUG
2818         printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n", (int)creq->sector,
2819                (int)creq->nr_sectors);
2820 #endif                          /* CCISS_DEBUG */
2821
2822         sg_init_table(tmp_sg, MAXSGENTRIES);
2823         seg = blk_rq_map_sg(q, creq, tmp_sg);
2824
2825         /* get the DMA records for the setup */
2826         if (c->Request.Type.Direction == XFER_READ)
2827                 dir = PCI_DMA_FROMDEVICE;
2828         else
2829                 dir = PCI_DMA_TODEVICE;
2830
2831         for (i = 0; i < seg; i++) {
2832                 c->SG[i].Len = tmp_sg[i].length;
2833                 temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
2834                                                   tmp_sg[i].offset,
2835                                                   tmp_sg[i].length, dir);
2836                 c->SG[i].Addr.lower = temp64.val32.lower;
2837                 c->SG[i].Addr.upper = temp64.val32.upper;
2838                 c->SG[i].Ext = 0;       // we are not chaining
2839         }
2840         /* track how many SG entries we are using */
2841         if (seg > h->maxSG)
2842                 h->maxSG = seg;
2843
2844 #ifdef CCISS_DEBUG
2845         printk(KERN_DEBUG "cciss: Submitting %lu sectors in %d segments\n",
2846                creq->nr_sectors, seg);
2847 #endif                          /* CCISS_DEBUG */
2848
2849         c->Header.SGList = c->Header.SGTotal = seg;
2850         if (likely(blk_fs_request(creq))) {
2851                 if(h->cciss_read == CCISS_READ_10) {
2852                         c->Request.CDB[1] = 0;
2853                         c->Request.CDB[2] = (start_blk >> 24) & 0xff;   //MSB
2854                         c->Request.CDB[3] = (start_blk >> 16) & 0xff;
2855                         c->Request.CDB[4] = (start_blk >> 8) & 0xff;
2856                         c->Request.CDB[5] = start_blk & 0xff;
2857                         c->Request.CDB[6] = 0;  // (sect >> 24) & 0xff; MSB
2858                         c->Request.CDB[7] = (creq->nr_sectors >> 8) & 0xff;
2859                         c->Request.CDB[8] = creq->nr_sectors & 0xff;
2860                         c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
2861                 } else {
2862                         u32 upper32 = upper_32_bits(start_blk);
2863
2864                         c->Request.CDBLen = 16;
2865                         c->Request.CDB[1]= 0;
2866                         c->Request.CDB[2]= (upper32 >> 24) & 0xff;      //MSB
2867                         c->Request.CDB[3]= (upper32 >> 16) & 0xff;
2868                         c->Request.CDB[4]= (upper32 >>  8) & 0xff;
2869                         c->Request.CDB[5]= upper32 & 0xff;
2870                         c->Request.CDB[6]= (start_blk >> 24) & 0xff;
2871                         c->Request.CDB[7]= (start_blk >> 16) & 0xff;
2872                         c->Request.CDB[8]= (start_blk >>  8) & 0xff;
2873                         c->Request.CDB[9]= start_blk & 0xff;
2874                         c->Request.CDB[10]= (creq->nr_sectors >>  24) & 0xff;
2875                         c->Request.CDB[11]= (creq->nr_sectors >>  16) & 0xff;
2876                         c->Request.CDB[12]= (creq->nr_sectors >>  8) & 0xff;
2877                         c->Request.CDB[13]= creq->nr_sectors & 0xff;
2878                         c->Request.CDB[14] = c->Request.CDB[15] = 0;
2879                 }
2880         } else if (blk_pc_request(creq)) {
2881                 c->Request.CDBLen = creq->cmd_len;
2882                 memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB);
2883         } else {
2884                 printk(KERN_WARNING "cciss%d: bad request type %d\n", h->ctlr, creq->cmd_type);
2885                 BUG();
2886         }
2887
2888         spin_lock_irq(q->queue_lock);
2889
2890         addQ(&h->reqQ, c);
2891         h->Qdepth++;
2892         if (h->Qdepth > h->maxQsinceinit)
2893                 h->maxQsinceinit = h->Qdepth;
2894
2895         goto queue;
2896 full:
2897         blk_stop_queue(q);
2898 startio:
2899         /* We will already have the driver lock here so not need
2900          * to lock it.
2901          */
2902         start_io(h);
2903 }
2904
2905 static inline unsigned long get_next_completion(ctlr_info_t *h)
2906 {
2907 #ifdef CONFIG_CISS_SCSI_TAPE
2908         /* Any rejects from sendcmd() lying around? Process them first */
2909         if (h->scsi_rejects.ncompletions == 0)
2910                 return h->access.command_completed(h);
2911         else {
2912                 struct sendcmd_reject_list *srl;
2913                 int n;
2914                 srl = &h->scsi_rejects;
2915                 n = --srl->ncompletions;
2916                 /* printk("cciss%d: processing saved reject\n", h->ctlr); */
2917                 printk("p");
2918                 return srl->complete[n];
2919         }
2920 #else
2921         return h->access.command_completed(h);
2922 #endif
2923 }
2924
2925 static inline int interrupt_pending(ctlr_info_t *h)
2926 {
2927 #ifdef CONFIG_CISS_SCSI_TAPE
2928         return (h->access.intr_pending(h)
2929                 || (h->scsi_rejects.ncompletions > 0));
2930 #else
2931         return h->access.intr_pending(h);
2932 #endif
2933 }
2934
2935 static inline long interrupt_not_for_us(ctlr_info_t *h)
2936 {
2937 #ifdef CONFIG_CISS_SCSI_TAPE
2938         return (((h->access.intr_pending(h) == 0) ||
2939                  (h->interrupts_enabled == 0))
2940                 && (h->scsi_rejects.ncompletions == 0));
2941 #else
2942         return (((h->access.intr_pending(h) == 0) ||
2943                  (h->interrupts_enabled == 0)));
2944 #endif
2945 }
2946
2947 static irqreturn_t do_cciss_intr(int irq, void *dev_id)
2948 {
2949         ctlr_info_t *h = dev_id;
2950         CommandList_struct *c;
2951         unsigned long flags;
2952         __u32 a, a1, a2;
2953
2954         if (interrupt_not_for_us(h))
2955                 return IRQ_NONE;
2956         /*
2957          * If there are completed commands in the completion queue,
2958          * we had better do something about it.
2959          */
2960         spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
2961         while (interrupt_pending(h)) {
2962                 while ((a = get_next_completion(h)) != FIFO_EMPTY) {
2963                         a1 = a;
2964                         if ((a & 0x04)) {
2965                                 a2 = (a >> 3);
2966                                 if (a2 >= h->nr_cmds) {
2967                                         printk(KERN_WARNING
2968                                                "cciss: controller cciss%d failed, stopping.\n",
2969                                                h->ctlr);
2970                                         fail_all_cmds(h->ctlr);
2971                                         return IRQ_HANDLED;
2972                                 }
2973
2974                                 c = h->cmd_pool + a2;
2975                                 a = c->busaddr;
2976
2977                         } else {
2978                                 struct hlist_node *tmp;
2979
2980                                 a &= ~3;
2981                                 c = NULL;
2982                                 hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
2983                                         if (c->busaddr == a)
2984                                                 break;
2985                                 }
2986                         }
2987                         /*
2988                          * If we've found the command, take it off the
2989                          * completion Q and free it
2990                          */
2991                         if (c && c->busaddr == a) {
2992                                 removeQ(c);
2993                                 if (c->cmd_type == CMD_RWREQ) {
2994                                         complete_command(h, c, 0);
2995                                 } else if (c->cmd_type == CMD_IOCTL_PEND) {
2996                                         complete(c->waiting);
2997                                 }
2998 #                               ifdef CONFIG_CISS_SCSI_TAPE
2999                                 else if (c->cmd_type == CMD_SCSI)
3000                                         complete_scsi_command(c, 0, a1);
3001 #                               endif
3002                                 continue;
3003                         }
3004                 }
3005         }
3006
3007         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
3008         return IRQ_HANDLED;
3009 }
3010
3011 /*
3012  *  We cannot read the structure directly, for portability we must use
3013  *   the io functions.
3014  *   This is for debug only.
3015  */
3016 #ifdef CCISS_DEBUG
3017 static void print_cfg_table(CfgTable_struct *tb)
3018 {
3019         int i;
3020         char temp_name[17];
3021
3022         printk("Controller Configuration information\n");
3023         printk("------------------------------------\n");
3024         for (i = 0; i < 4; i++)
3025                 temp_name[i] = readb(&(tb->Signature[i]));
3026         temp_name[4] = '\0';
3027         printk("   Signature = %s\n", temp_name);
3028         printk("   Spec Number = %d\n", readl(&(tb->SpecValence)));
3029         printk("   Transport methods supported = 0x%x\n",
3030                readl(&(tb->TransportSupport)));
3031         printk("   Transport methods active = 0x%x\n",
3032                readl(&(tb->TransportActive)));
3033         printk("   Requested transport Method = 0x%x\n",
3034                readl(&(tb->HostWrite.TransportRequest)));
3035         printk("   Coalesce Interrupt Delay = 0x%x\n",
3036                readl(&(tb->HostWrite.CoalIntDelay)));
3037         printk("   Coalesce Interrupt Count = 0x%x\n",
3038                readl(&(tb->HostWrite.CoalIntCount)));
3039         printk("   Max outstanding commands = 0x%d\n",
3040                readl(&(tb->CmdsOutMax)));
3041         printk("   Bus Types = 0x%x\n", readl(&(tb->BusTypes)));
3042         for (i = 0; i < 16; i++)
3043                 temp_name[i] = readb(&(tb->ServerName[i]));
3044         temp_name[16] = '\0';
3045         printk("   Server Name = %s\n", temp_name);
3046         printk("   Heartbeat Counter = 0x%x\n\n\n", readl(&(tb->HeartBeat)));
3047 }
3048 #endif                          /* CCISS_DEBUG */
3049
3050 static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
3051 {
3052         int i, offset, mem_type, bar_type;
3053         if (pci_bar_addr == PCI_BASE_ADDRESS_0) /* looking for BAR zero? */
3054                 return 0;
3055         offset = 0;
3056         for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3057                 bar_type = pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE;
3058                 if (bar_type == PCI_BASE_ADDRESS_SPACE_IO)
3059                         offset += 4;
3060                 else {
3061                         mem_type = pci_resource_flags(pdev, i) &
3062                             PCI_BASE_ADDRESS_MEM_TYPE_MASK;
3063                         switch (mem_type) {
3064                         case PCI_BASE_ADDRESS_MEM_TYPE_32:
3065                         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
3066                                 offset += 4;    /* 32 bit */
3067                                 break;
3068                         case PCI_BASE_ADDRESS_MEM_TYPE_64:
3069                                 offset += 8;
3070                                 break;
3071                         default:        /* reserved in PCI 2.2 */
3072                                 printk(KERN_WARNING
3073                                        "Base address is invalid\n");
3074                                 return -1;
3075                                 break;
3076                         }
3077                 }
3078                 if (offset == pci_bar_addr - PCI_BASE_ADDRESS_0)
3079                         return i + 1;
3080         }
3081         return -1;
3082 }
3083
3084 /* If MSI/MSI-X is supported by the kernel we will try to enable it on
3085  * controllers that are capable. If not, we use IO-APIC mode.
3086  */
3087
3088 static void __devinit cciss_interrupt_mode(ctlr_info_t *c,
3089                                            struct pci_dev *pdev, __u32 board_id)
3090 {
3091 #ifdef CONFIG_PCI_MSI
3092         int err;
3093         struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1},
3094         {0, 2}, {0, 3}
3095         };
3096
3097         /* Some boards advertise MSI but don't really support it */
3098         if ((board_id == 0x40700E11) ||
3099             (board_id == 0x40800E11) ||
3100             (board_id == 0x40820E11) || (board_id == 0x40830E11))
3101                 goto default_int_mode;
3102
3103         if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
3104                 err = pci_enable_msix(pdev, cciss_msix_entries, 4);
3105                 if (!err) {
3106                         c->intr[0] = cciss_msix_entries[0].vector;
3107                         c->intr[1] = cciss_msix_entries[1].vector;
3108                         c->intr[2] = cciss_msix_entries[2].vector;
3109                         c->intr[3] = cciss_msix_entries[3].vector;
3110                         c->msix_vector = 1;
3111                         return;
3112                 }
3113                 if (err > 0) {
3114                         printk(KERN_WARNING "cciss: only %d MSI-X vectors "
3115                                "available\n", err);
3116                         goto default_int_mode;
3117                 } else {
3118                         printk(KERN_WARNING "cciss: MSI-X init failed %d\n",
3119                                err);
3120                         goto default_int_mode;
3121                 }
3122         }
3123         if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) {
3124                 if (!pci_enable_msi(pdev)) {
3125                         c->msi_vector = 1;
3126                 } else {
3127                         printk(KERN_WARNING "cciss: MSI init failed\n");
3128                 }
3129         }
3130 default_int_mode:
3131 #endif                          /* CONFIG_PCI_MSI */
3132         /* if we get here we're going to use the default interrupt mode */
3133         c->intr[SIMPLE_MODE_INT] = pdev->irq;
3134         return;
3135 }
3136
3137 static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
3138 {
3139         ushort subsystem_vendor_id, subsystem_device_id, command;
3140         __u32 board_id, scratchpad = 0;
3141         __u64 cfg_offset;
3142         __u32 cfg_base_addr;
3143         __u64 cfg_base_addr_index;
3144         int i, err;
3145
3146         /* check to see if controller has been disabled */
3147         /* BEFORE trying to enable it */
3148         (void)pci_read_config_word(pdev, PCI_COMMAND, &command);
3149         if (!(command & 0x02)) {
3150                 printk(KERN_WARNING
3151                        "cciss: controller appears to be disabled\n");
3152                 return -ENODEV;
3153         }
3154
3155         err = pci_enable_device(pdev);
3156         if (err) {
3157                 printk(KERN_ERR "cciss: Unable to Enable PCI device\n");
3158                 return err;
3159         }
3160
3161         err = pci_request_regions(pdev, "cciss");
3162         if (err) {
3163                 printk(KERN_ERR "cciss: Cannot obtain PCI resources, "
3164                        "aborting\n");
3165                 return err;
3166         }
3167
3168         subsystem_vendor_id = pdev->subsystem_vendor;
3169         subsystem_device_id = pdev->subsystem_device;
3170         board_id = (((__u32) (subsystem_device_id << 16) & 0xffff0000) |
3171                     subsystem_vendor_id);
3172
3173 #ifdef CCISS_DEBUG
3174         printk("command = %x\n", command);
3175         printk("irq = %x\n", pdev->irq);
3176         printk("board_id = %x\n", board_id);
3177 #endif                          /* CCISS_DEBUG */
3178
3179 /* If the kernel supports MSI/MSI-X we will try to enable that functionality,
3180  * else we use the IO-APIC interrupt assigned to us by system ROM.
3181  */
3182         cciss_interrupt_mode(c, pdev, board_id);
3183
3184         /*
3185          * Memory base addr is first addr , the second points to the config
3186          *   table
3187          */
3188
3189         c->paddr = pci_resource_start(pdev, 0); /* addressing mode bits already removed */
3190 #ifdef CCISS_DEBUG
3191         printk("address 0 = %lx\n", c->paddr);
3192 #endif                          /* CCISS_DEBUG */
3193         c->vaddr = remap_pci_mem(c->paddr, 0x250);
3194
3195         /* Wait for the board to become ready.  (PCI hotplug needs this.)
3196          * We poll for up to 120 secs, once per 100ms. */
3197         for (i = 0; i < 1200; i++) {
3198                 scratchpad = readl(c->vaddr + SA5_SCRATCHPAD_OFFSET);
3199                 if (scratchpad == CCISS_FIRMWARE_READY)
3200                         break;
3201                 set_current_state(TASK_INTERRUPTIBLE);
3202                 schedule_timeout(HZ / 10);      /* wait 100ms */
3203         }
3204         if (scratchpad != CCISS_FIRMWARE_READY) {
3205                 printk(KERN_WARNING "cciss: Board not ready.  Timed out.\n");
3206                 err = -ENODEV;
3207                 goto err_out_free_res;
3208         }
3209
3210         /* get the address index number */
3211         cfg_base_addr = readl(c->vaddr + SA5_CTCFG_OFFSET);
3212         cfg_base_addr &= (__u32) 0x0000ffff;
3213 #ifdef CCISS_DEBUG
3214         printk("cfg base address = %x\n", cfg_base_addr);
3215 #endif                          /* CCISS_DEBUG */
3216         cfg_base_addr_index = find_PCI_BAR_index(pdev, cfg_base_addr);
3217 #ifdef CCISS_DEBUG
3218         printk("cfg base address index = %llx\n",
3219                 (unsigned long long)cfg_base_addr_index);
3220 #endif                          /* CCISS_DEBUG */
3221         if (cfg_base_addr_index == -1) {
3222                 printk(KERN_WARNING "cciss: Cannot find cfg_base_addr_index\n");
3223                 err = -ENODEV;
3224                 goto err_out_free_res;
3225         }
3226
3227         cfg_offset = readl(c->vaddr + SA5_CTMEM_OFFSET);
3228 #ifdef CCISS_DEBUG
3229         printk("cfg offset = %llx\n", (unsigned long long)cfg_offset);
3230 #endif                          /* CCISS_DEBUG */
3231         c->cfgtable = remap_pci_mem(pci_resource_start(pdev,
3232                                                        cfg_base_addr_index) +
3233                                     cfg_offset, sizeof(CfgTable_struct));
3234         c->board_id = board_id;
3235
3236 #ifdef CCISS_DEBUG
3237         print_cfg_table(c->cfgtable);
3238 #endif                          /* CCISS_DEBUG */
3239
3240         /* Some controllers support Zero Memory Raid (ZMR).
3241          * When configured in ZMR mode the number of supported
3242          * commands drops to 64. So instead of just setting an
3243          * arbitrary value we make the driver a little smarter.
3244          * We read the config table to tell us how many commands
3245          * are supported on the controller then subtract 4 to
3246          * leave a little room for ioctl calls.
3247          */
3248         c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
3249         for (i = 0; i < ARRAY_SIZE(products); i++) {
3250                 if (board_id == products[i].board_id) {
3251                         c->product_name = products[i].product_name;
3252                         c->access = *(products[i].access);
3253                         c->nr_cmds = c->max_commands - 4;
3254                         break;
3255                 }
3256         }
3257         if ((readb(&c->cfgtable->Signature[0]) != 'C') ||
3258             (readb(&c->cfgtable->Signature[1]) != 'I') ||
3259             (readb(&c->cfgtable->Signature[2]) != 'S') ||
3260             (readb(&c->cfgtable->Signature[3]) != 'S')) {
3261                 printk("Does not appear to be a valid CISS config table\n");
3262                 err = -ENODEV;
3263                 goto err_out_free_res;
3264         }
3265         /* We didn't find the controller in our list. We know the
3266          * signature is valid. If it's an HP device let's try to
3267          * bind to the device and fire it up. Otherwise we bail.
3268          */
3269         if (i == ARRAY_SIZE(products)) {
3270                 if (subsystem_vendor_id == PCI_VENDOR_ID_HP) {
3271                         c->product_name = products[i-1].product_name;
3272                         c->access = *(products[i-1].access);
3273                         c->nr_cmds = c->max_commands - 4;
3274                         printk(KERN_WARNING "cciss: This is an unknown "
3275                                 "Smart Array controller.\n"
3276                                 "cciss: Please update to the latest driver "
3277                                 "available from www.hp.com.\n");
3278                 } else {
3279                         printk(KERN_WARNING "cciss: Sorry, I don't know how"
3280                                 " to access the Smart Array controller %08lx\n"
3281                                         , (unsigned long)board_id);
3282                         err = -ENODEV;
3283                         goto err_out_free_res;
3284                 }
3285         }
3286 #ifdef CONFIG_X86
3287         {
3288                 /* Need to enable prefetch in the SCSI core for 6400 in x86 */
3289                 __u32 prefetch;
3290                 prefetch = readl(&(c->cfgtable->SCSI_Prefetch));
3291                 prefetch |= 0x100;
3292                 writel(prefetch, &(c->cfgtable->SCSI_Prefetch));
3293         }
3294 #endif
3295
3296         /* Disabling DMA prefetch and refetch for the P600.
3297          * An ASIC bug may result in accesses to invalid memory addresses.
3298          * We've disabled prefetch for some time now. Testing with XEN
3299          * kernels revealed a bug in the refetch if dom0 resides on a P600.
3300          */
3301         if(board_id == 0x3225103C) {
3302                 __u32 dma_prefetch;
3303                 __u32 dma_refetch;
3304                 dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG);
3305                 dma_prefetch |= 0x8000;
3306                 writel(dma_prefetch, c->vaddr + I2O_DMA1_CFG);
3307                 pci_read_config_dword(pdev, PCI_COMMAND_PARITY, &dma_refetch);
3308                 dma_refetch |= 0x1;
3309                 pci_write_config_dword(pdev, PCI_COMMAND_PARITY, dma_refetch);
3310         }
3311
3312 #ifdef CCISS_DEBUG
3313         printk("Trying to put board into Simple mode\n");
3314 #endif                          /* CCISS_DEBUG */
3315         c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
3316         /* Update the field, and then ring the doorbell */
3317         writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest));
3318         writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL);
3319
3320         /* under certain very rare conditions, this can take awhile.
3321          * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
3322          * as we enter this code.) */
3323         for (i = 0; i < MAX_CONFIG_WAIT; i++) {
3324                 if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
3325                         break;
3326                 /* delay and try again */
3327                 set_current_state(TASK_INTERRUPTIBLE);
3328                 schedule_timeout(10);
3329         }
3330
3331 #ifdef CCISS_DEBUG
3332         printk(KERN_DEBUG "I counter got to %d %x\n", i,
3333                readl(c->vaddr + SA5_DOORBELL));
3334 #endif                          /* CCISS_DEBUG */
3335 #ifdef CCISS_DEBUG
3336         print_cfg_table(c->cfgtable);
3337 #endif                          /* CCISS_DEBUG */
3338
3339         if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
3340                 printk(KERN_WARNING "cciss: unable to get board into"
3341                        " simple mode\n");
3342                 err = -ENODEV;
3343                 goto err_out_free_res;
3344         }
3345         return 0;
3346
3347 err_out_free_res:
3348         /*
3349          * Deliberately omit pci_disable_device(): it does something nasty to
3350          * Smart Array controllers that pci_enable_device does not undo
3351          */
3352         pci_release_regions(pdev);
3353         return err;
3354 }
3355
3356 /* Function to find the first free pointer into our hba[] array
3357  * Returns -1 if no free entries are left.
3358  */
3359 static int alloc_cciss_hba(void)
3360 {
3361         int i;
3362
3363         for (i = 0; i < MAX_CTLR; i++) {
3364                 if (!hba[i]) {
3365                         ctlr_info_t *p;
3366
3367                         p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
3368                         if (!p)
3369                                 goto Enomem;
3370                         hba[i] = p;
3371                         return i;
3372                 }
3373         }
3374         printk(KERN_WARNING "cciss: This driver supports a maximum"
3375                " of %d controllers.\n", MAX_CTLR);
3376         return -1;
3377 Enomem:
3378         printk(KERN_ERR "cciss: out of memory.\n");
3379         return -1;
3380 }
3381
3382 static void free_hba(int i)
3383 {
3384         ctlr_info_t *p = hba[i];
3385         int n;
3386
3387         hba[i] = NULL;
3388         for (n = 0; n < CISS_MAX_LUN; n++)
3389                 put_disk(p->gendisk[n]);
3390         kfree(p);
3391 }
3392
3393 /* Send a message CDB to the firmware. */
3394 static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
3395 {
3396         typedef struct {
3397                 CommandListHeader_struct CommandHeader;
3398                 RequestBlock_struct Request;
3399                 ErrDescriptor_struct ErrorDescriptor;
3400         } Command;
3401         static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
3402         Command *cmd;
3403         dma_addr_t paddr64;
3404         uint32_t paddr32, tag;
3405         void __iomem *vaddr;
3406         int i, err;
3407
3408         vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
3409         if (vaddr == NULL)
3410                 return -ENOMEM;
3411
3412         /* The Inbound Post Queue only accepts 32-bit physical addresses for the
3413            CCISS commands, so they must be allocated from the lower 4GiB of
3414            memory. */
3415         err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
3416         if (err) {
3417                 iounmap(vaddr);
3418                 return -ENOMEM;
3419         }
3420
3421         cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
3422         if (cmd == NULL) {
3423                 iounmap(vaddr);
3424                 return -ENOMEM;
3425         }
3426
3427         /* This must fit, because of the 32-bit consistent DMA mask.  Also,
3428            although there's no guarantee, we assume that the address is at
3429            least 4-byte aligned (most likely, it's page-aligned). */
3430         paddr32 = paddr64;
3431
3432         cmd->CommandHeader.ReplyQueue = 0;
3433         cmd->CommandHeader.SGList = 0;
3434         cmd->CommandHeader.SGTotal = 0;
3435         cmd->CommandHeader.Tag.lower = paddr32;
3436         cmd->CommandHeader.Tag.upper = 0;
3437         memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
3438
3439         cmd->Request.CDBLen = 16;
3440         cmd->Request.Type.Type = TYPE_MSG;
3441         cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
3442         cmd->Request.Type.Direction = XFER_NONE;
3443         cmd->Request.Timeout = 0; /* Don't time out */
3444         cmd->Request.CDB[0] = opcode;
3445         cmd->Request.CDB[1] = type;
3446         memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
3447
3448         cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
3449         cmd->ErrorDescriptor.Addr.upper = 0;
3450         cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
3451
3452         writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
3453
3454         for (i = 0; i < 10; i++) {
3455                 tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
3456                 if ((tag & ~3) == paddr32)
3457                         break;
3458                 schedule_timeout_uninterruptible(HZ);
3459         }
3460
3461         iounmap(vaddr);
3462
3463         /* we leak the DMA buffer here ... no choice since the controller could
3464            still complete the command. */
3465         if (i == 10) {
3466                 printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
3467                         opcode, type);
3468                 return -ETIMEDOUT;
3469         }
3470
3471         pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
3472
3473         if (tag & 2) {
3474                 printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
3475                         opcode, type);
3476                 return -EIO;
3477         }
3478
3479         printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
3480                 opcode, type);
3481         return 0;
3482 }
3483
3484 #define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
3485 #define cciss_noop(p) cciss_message(p, 3, 0)
3486
3487 static __devinit int cciss_reset_msi(struct pci_dev *pdev)
3488 {
3489 /* the #defines are stolen from drivers/pci/msi.h. */
3490 #define msi_control_reg(base)           (base + PCI_MSI_FLAGS)
3491 #define PCI_MSIX_FLAGS_ENABLE           (1 << 15)
3492
3493         int pos;
3494         u16 control = 0;
3495
3496         pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
3497         if (pos) {
3498                 pci_read_config_word(pdev, msi_control_reg(pos), &control);
3499                 if (control & PCI_MSI_FLAGS_ENABLE) {
3500                         printk(KERN_INFO "cciss: resetting MSI\n");
3501                         pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
3502                 }
3503         }
3504
3505         pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
3506         if (pos) {
3507                 pci_read_config_word(pdev, msi_control_reg(pos), &control);
3508                 if (control & PCI_MSIX_FLAGS_ENABLE) {
3509                         printk(KERN_INFO "cciss: resetting MSI-X\n");
3510                         pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
3511                 }
3512         }
3513
3514         return 0;
3515 }
3516
3517 /* This does a hard reset of the controller using PCI power management
3518  * states. */
3519 static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
3520 {
3521         u16 pmcsr, saved_config_space[32];
3522         int i, pos;
3523
3524         printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
3525
3526         /* This is very nearly the same thing as
3527
3528            pci_save_state(pci_dev);
3529            pci_set_power_state(pci_dev, PCI_D3hot);
3530            pci_set_power_state(pci_dev, PCI_D0);
3531            pci_restore_state(pci_dev);
3532
3533            but we can't use these nice canned kernel routines on
3534            kexec, because they also check the MSI/MSI-X state in PCI
3535            configuration space and do the wrong thing when it is
3536            set/cleared.  Also, the pci_save/restore_state functions
3537            violate the ordering requirements for restoring the
3538            configuration space from the CCISS document (see the
3539            comment below).  So we roll our own .... */
3540
3541         for (i = 0; i < 32; i++)
3542                 pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
3543
3544         pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
3545         if (pos == 0) {
3546                 printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
3547                 return -ENODEV;
3548         }
3549
3550         /* Quoting from the Open CISS Specification: "The Power
3551          * Management Control/Status Register (CSR) controls the power
3552          * state of the device.  The normal operating state is D0,
3553          * CSR=00h.  The software off state is D3, CSR=03h.  To reset
3554          * the controller, place the interface device in D3 then to
3555          * D0, this causes a secondary PCI reset which will reset the
3556          * controller." */
3557
3558         /* enter the D3hot power management state */
3559         pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
3560         pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
3561         pmcsr |= PCI_D3hot;
3562         pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
3563
3564         schedule_timeout_uninterruptible(HZ >> 1);
3565
3566         /* enter the D0 power management state */
3567         pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
3568         pmcsr |= PCI_D0;
3569         pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
3570
3571         schedule_timeout_uninterruptible(HZ >> 1);
3572
3573         /* Restore the PCI configuration space.  The Open CISS
3574          * Specification says, "Restore the PCI Configuration
3575          * Registers, offsets 00h through 60h. It is important to
3576          * restore the command register, 16-bits at offset 04h,
3577          * last. Do not restore the configuration status register,
3578          * 16-bits at offset 06h."  Note that the offset is 2*i. */
3579         for (i = 0; i < 32; i++) {
3580                 if (i == 2 || i == 3)
3581                         continue;
3582                 pci_write_config_word(pdev, 2*i, saved_config_space[i]);
3583         }
3584         wmb();
3585         pci_write_config_word(pdev, 4, saved_config_space[2]);
3586
3587         return 0;
3588 }
3589
3590 /*
3591  *  This is it.  Find all the controllers and register them.  I really hate
3592  *  stealing all these major device numbers.
3593  *  returns the number of block devices registered.
3594  */
3595 static int __devinit cciss_init_one(struct pci_dev *pdev,
3596                                     const struct pci_device_id *ent)
3597 {
3598         int i;
3599         int j = 0;
3600         int rc;
3601         int dac, return_code;
3602         InquiryData_struct *inq_buff = NULL;
3603
3604         if (reset_devices) {
3605                 /* Reset the controller with a PCI power-cycle */
3606                 if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
3607                         return -ENODEV;
3608
3609                 /* Now try to get the controller to respond to a no-op. Some
3610                    devices (notably the HP Smart Array 5i Controller) need
3611                    up to 30 seconds to respond. */
3612                 for (i=0; i<30; i++) {
3613                         if (cciss_noop(pdev) == 0)
3614                                 break;
3615
3616                         schedule_timeout_uninterruptible(HZ);
3617                 }
3618                 if (i == 30) {
3619                         printk(KERN_ERR "cciss: controller seems dead\n");
3620                         return -EBUSY;
3621                 }
3622         }
3623
3624         i = alloc_cciss_hba();
3625         if (i < 0)
3626                 return -1;
3627
3628         hba[i]->busy_initializing = 1;
3629         INIT_HLIST_HEAD(&hba[i]->cmpQ);
3630         INIT_HLIST_HEAD(&hba[i]->reqQ);
3631
3632         if (cciss_pci_init(hba[i], pdev) != 0)
3633                 goto clean1;
3634
3635         sprintf(hba[i]->devname, "cciss%d", i);
3636         hba[i]->ctlr = i;
3637         hba[i]->pdev = pdev;
3638
3639         /* configure PCI DMA stuff */
3640         if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK))
3641                 dac = 1;
3642         else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK))
3643                 dac = 0;
3644         else {
3645                 printk(KERN_ERR "cciss: no suitable DMA available\n");
3646                 goto clean1;
3647         }
3648
3649         /*
3650          * register with the major number, or get a dynamic major number
3651          * by passing 0 as argument.  This is done for greater than
3652          * 8 controller support.
3653          */
3654         if (i < MAX_CTLR_ORIG)
3655                 hba[i]->major = COMPAQ_CISS_MAJOR + i;
3656         rc = register_blkdev(hba[i]->major, hba[i]->devname);
3657         if (rc == -EBUSY || rc == -EINVAL) {
3658                 printk(KERN_ERR
3659                        "cciss:  Unable to get major number %d for %s "
3660                        "on hba %d\n", hba[i]->major, hba[i]->devname, i);
3661                 goto clean1;
3662         } else {
3663                 if (i >= MAX_CTLR_ORIG)
3664                         hba[i]->major = rc;
3665         }
3666
3667         /* make sure the board interrupts are off */
3668         hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
3669         if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
3670                         IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) {
3671                 printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
3672                        hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
3673                 goto clean2;
3674         }
3675
3676         printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
3677                hba[i]->devname, pdev->device, pci_name(pdev),
3678                hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
3679
3680         hba[i]->cmd_pool_bits =
3681             kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
3682                         * sizeof(unsigned long), GFP_KERNEL);
3683         hba[i]->cmd_pool = (CommandList_struct *)
3684             pci_alloc_consistent(hba[i]->pdev,
3685                     hba[i]->nr_cmds * sizeof(CommandList_struct),
3686                     &(hba[i]->cmd_pool_dhandle));
3687         hba[i]->errinfo_pool = (ErrorInfo_struct *)
3688             pci_alloc_consistent(hba[i]->pdev,
3689                     hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
3690                     &(hba[i]->errinfo_pool_dhandle));
3691         if ((hba[i]->cmd_pool_bits == NULL)
3692             || (hba[i]->cmd_pool == NULL)
3693             || (hba[i]->errinfo_pool == NULL)) {
3694                 printk(KERN_ERR "cciss: out of memory");
3695                 goto clean4;
3696         }
3697 #ifdef CONFIG_CISS_SCSI_TAPE
3698         hba[i]->scsi_rejects.complete =
3699             kmalloc(sizeof(hba[i]->scsi_rejects.complete[0]) *
3700                     (hba[i]->nr_cmds + 5), GFP_KERNEL);
3701         if (hba[i]->scsi_rejects.complete == NULL) {
3702                 printk(KERN_ERR "cciss: out of memory");
3703                 goto clean4;
3704         }
3705 #endif
3706         spin_lock_init(&hba[i]->lock);
3707
3708         /* Initialize the pdev driver private data.
3709            have it point to hba[i].  */
3710         pci_set_drvdata(pdev, hba[i]);
3711         /* command and error info recs zeroed out before
3712            they are used */
3713         memset(hba[i]->cmd_pool_bits, 0,
3714                DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
3715                         * sizeof(unsigned long));
3716
3717         hba[i]->num_luns = 0;
3718         hba[i]->highest_lun = -1;
3719         for (j = 0; j < CISS_MAX_LUN; j++) {
3720                 hba[i]->drv[j].raid_level = -1;
3721                 hba[i]->drv[j].queue = NULL;
3722                 hba[i]->gendisk[j] = NULL;
3723         }
3724
3725         cciss_scsi_setup(i);
3726
3727         /* Turn the interrupts on so we can service requests */
3728         hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON);
3729
3730         /* Get the firmware version */
3731         inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
3732         if (inq_buff == NULL) {
3733                 printk(KERN_ERR "cciss: out of memory\n");
3734                 goto clean4;
3735         }
3736
3737         return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff,
3738                 sizeof(InquiryData_struct), 0, 0 , 0, TYPE_CMD);
3739         if (return_code == IO_OK) {
3740                 hba[i]->firm_ver[0] = inq_buff->data_byte[32];
3741                 hba[i]->firm_ver[1] = inq_buff->data_byte[33];
3742                 hba[i]->firm_ver[2] = inq_buff->data_byte[34];
3743                 hba[i]->firm_ver[3] = inq_buff->data_byte[35];
3744         } else {         /* send command failed */
3745                 printk(KERN_WARNING "cciss: unable to determine firmware"
3746                         " version of controller\n");
3747         }
3748
3749         cciss_procinit(i);
3750
3751         hba[i]->cciss_max_sectors = 2048;
3752
3753         hba[i]->busy_initializing = 0;
3754
3755         rebuild_lun_table(hba[i], 1);
3756         return 1;
3757
3758 clean4:
3759         kfree(inq_buff);
3760 #ifdef CONFIG_CISS_SCSI_TAPE
3761         kfree(hba[i]->scsi_rejects.complete);
3762 #endif
3763         kfree(hba[i]->cmd_pool_bits);
3764         if (hba[i]->cmd_pool)
3765                 pci_free_consistent(hba[i]->pdev,
3766                                     hba[i]->nr_cmds * sizeof(CommandList_struct),
3767                                     hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
3768         if (hba[i]->errinfo_pool)
3769                 pci_free_consistent(hba[i]->pdev,
3770                                     hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
3771                                     hba[i]->errinfo_pool,
3772                                     hba[i]->errinfo_pool_dhandle);
3773         free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
3774 clean2:
3775         unregister_blkdev(hba[i]->major, hba[i]->devname);
3776 clean1:
3777         hba[i]->busy_initializing = 0;
3778         /* cleanup any queues that may have been initialized */
3779         for (j=0; j <= hba[i]->highest_lun; j++){
3780                 drive_info_struct *drv = &(hba[i]->drv[j]);
3781                 if (drv->queue)
3782                         blk_cleanup_queue(drv->queue);
3783         }
3784         /*
3785          * Deliberately omit pci_disable_device(): it does something nasty to
3786          * Smart Array controllers that pci_enable_device does not undo
3787          */
3788         pci_release_regions(pdev);
3789         pci_set_drvdata(pdev, NULL);
3790         free_hba(i);
3791         return -1;
3792 }
3793
3794 static void cciss_shutdown(struct pci_dev *pdev)
3795 {
3796         ctlr_info_t *tmp_ptr;
3797         int i;
3798         char flush_buf[4];
3799         int return_code;
3800
3801         tmp_ptr = pci_get_drvdata(pdev);
3802         if (tmp_ptr == NULL)
3803                 return;
3804         i = tmp_ptr->ctlr;
3805         if (hba[i] == NULL)
3806                 return;
3807
3808         /* Turn board interrupts off  and send the flush cache command */
3809         /* sendcmd will turn off interrupt, and send the flush...
3810          * To write all data in the battery backed cache to disks */
3811         memset(flush_buf, 0, 4);
3812         return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0, 0, 0, NULL,
3813                               TYPE_CMD);
3814         if (return_code == IO_OK) {
3815                 printk(KERN_INFO "Completed flushing cache on controller %d\n", i);
3816         } else {
3817                 printk(KERN_WARNING "Error flushing cache on controller %d\n", i);
3818         }
3819         free_irq(hba[i]->intr[2], hba[i]);
3820 }
3821
3822 static void __devexit cciss_remove_one(struct pci_dev *pdev)
3823 {
3824         ctlr_info_t *tmp_ptr;
3825         int i, j;
3826
3827         if (pci_get_drvdata(pdev) == NULL) {
3828                 printk(KERN_ERR "cciss: Unable to remove device \n");
3829                 return;
3830         }
3831         tmp_ptr = pci_get_drvdata(pdev);
3832         i = tmp_ptr->ctlr;
3833         if (hba[i] == NULL) {
3834                 printk(KERN_ERR "cciss: device appears to "
3835                        "already be removed \n");
3836                 return;
3837         }
3838
3839         remove_proc_entry(hba[i]->devname, proc_cciss);
3840         unregister_blkdev(hba[i]->major, hba[i]->devname);
3841
3842         /* remove it from the disk list */
3843         for (j = 0; j < CISS_MAX_LUN; j++) {
3844                 struct gendisk *disk = hba[i]->gendisk[j];
3845                 if (disk) {
3846                         struct request_queue *q = disk->queue;
3847
3848                         if (disk->flags & GENHD_FL_UP)
3849                                 del_gendisk(disk);
3850                         if (q)
3851                                 blk_cleanup_queue(q);
3852                 }
3853         }
3854
3855 #ifdef CONFIG_CISS_SCSI_TAPE
3856         cciss_unregister_scsi(i);       /* unhook from SCSI subsystem */
3857 #endif
3858
3859         cciss_shutdown(pdev);
3860
3861 #ifdef CONFIG_PCI_MSI
3862         if (hba[i]->msix_vector)
3863                 pci_disable_msix(hba[i]->pdev);
3864         else if (hba[i]->msi_vector)
3865                 pci_disable_msi(hba[i]->pdev);
3866 #endif                          /* CONFIG_PCI_MSI */
3867
3868         iounmap(hba[i]->vaddr);
3869
3870         pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct),
3871                             hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
3872         pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
3873                             hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
3874         kfree(hba[i]->cmd_pool_bits);
3875 #ifdef CONFIG_CISS_SCSI_TAPE
3876         kfree(hba[i]->scsi_rejects.complete);
3877 #endif
3878         /*
3879          * Deliberately omit pci_disable_device(): it does something nasty to
3880          * Smart Array controllers that pci_enable_device does not undo
3881          */
3882         pci_release_regions(pdev);
3883         pci_set_drvdata(pdev, NULL);
3884         free_hba(i);
3885 }
3886
3887 static struct pci_driver cciss_pci_driver = {
3888         .name = "cciss",
3889         .probe = cciss_init_one,
3890         .remove = __devexit_p(cciss_remove_one),
3891         .id_table = cciss_pci_device_id,        /* id_table */
3892         .shutdown = cciss_shutdown,
3893 };
3894
3895 /*
3896  *  This is it.  Register the PCI driver information for the cards we control
3897  *  the OS will call our registered routines when it finds one of our cards.
3898  */
3899 static int __init cciss_init(void)
3900 {
3901         /*
3902          * The hardware requires that commands are aligned on a 64-bit
3903          * boundary. Given that we use pci_alloc_consistent() to allocate an
3904          * array of them, the size must be a multiple of 8 bytes.
3905          */
3906         BUILD_BUG_ON(sizeof(CommandList_struct) % 8);
3907
3908         printk(KERN_INFO DRIVER_NAME "\n");
3909
3910         /* Register for our PCI devices */
3911         return pci_register_driver(&cciss_pci_driver);
3912 }
3913
3914 static void __exit cciss_cleanup(void)
3915 {
3916         int i;
3917
3918         pci_unregister_driver(&cciss_pci_driver);
3919         /* double check that all controller entrys have been removed */
3920         for (i = 0; i < MAX_CTLR; i++) {
3921                 if (hba[i] != NULL) {
3922                         printk(KERN_WARNING "cciss: had to remove"
3923                                " controller %d\n", i);
3924                         cciss_remove_one(hba[i]->pdev);
3925                 }
3926         }
3927         remove_proc_entry("driver/cciss", NULL);
3928 }
3929
3930 static void fail_all_cmds(unsigned long ctlr)
3931 {
3932         /* If we get here, the board is apparently dead. */
3933         ctlr_info_t *h = hba[ctlr];
3934         CommandList_struct *c;
3935         unsigned long flags;
3936
3937         printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr);
3938         h->alive = 0;           /* the controller apparently died... */
3939
3940         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
3941
3942         pci_disable_device(h->pdev);    /* Make sure it is really dead. */
3943
3944         /* move everything off the request queue onto the completed queue */
3945         while (!hlist_empty(&h->reqQ)) {
3946                 c = hlist_entry(h->reqQ.first, CommandList_struct, list);
3947                 removeQ(c);
3948                 h->Qdepth--;
3949                 addQ(&h->cmpQ, c);
3950         }
3951
3952         /* Now, fail everything on the completed queue with a HW error */
3953         while (!hlist_empty(&h->cmpQ)) {
3954                 c = hlist_entry(h->cmpQ.first, CommandList_struct, list);
3955                 removeQ(c);
3956                 c->err_info->CommandStatus = CMD_HARDWARE_ERR;
3957                 if (c->cmd_type == CMD_RWREQ) {
3958                         complete_command(h, c, 0);
3959                 } else if (c->cmd_type == CMD_IOCTL_PEND)
3960                         complete(c->waiting);
3961 #ifdef CONFIG_CISS_SCSI_TAPE
3962                 else if (c->cmd_type == CMD_SCSI)
3963                         complete_scsi_command(c, 0, 0);
3964 #endif
3965         }
3966         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
3967         return;
3968 }
3969
3970 module_init(cciss_init);
3971 module_exit(cciss_cleanup);