cciss: kernel scan thread for MSA2012
[linux-2.6] / drivers / block / cciss.c
1 /*
2  *    Disk Array driver for HP Smart Array controllers.
3  *    (C) Copyright 2000, 2007 Hewlett-Packard Development Company, L.P.
4  *
5  *    This program is free software; you can redistribute it and/or modify
6  *    it under the terms of the GNU General Public License as published by
7  *    the Free Software Foundation; version 2 of the License.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  *    General Public License for more details.
13  *
14  *    You should have received a copy of the GNU General Public License
15  *    along with this program; if not, write to the Free Software
16  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17  *    02111-1307, USA.
18  *
19  *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
20  *
21  */
22
23 #include <linux/module.h>
24 #include <linux/interrupt.h>
25 #include <linux/types.h>
26 #include <linux/pci.h>
27 #include <linux/kernel.h>
28 #include <linux/slab.h>
29 #include <linux/delay.h>
30 #include <linux/major.h>
31 #include <linux/fs.h>
32 #include <linux/bio.h>
33 #include <linux/blkpg.h>
34 #include <linux/timer.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/init.h>
38 #include <linux/hdreg.h>
39 #include <linux/spinlock.h>
40 #include <linux/compat.h>
41 #include <linux/blktrace_api.h>
42 #include <asm/uaccess.h>
43 #include <asm/io.h>
44
45 #include <linux/dma-mapping.h>
46 #include <linux/blkdev.h>
47 #include <linux/genhd.h>
48 #include <linux/completion.h>
49 #include <scsi/scsi.h>
50 #include <scsi/sg.h>
51 #include <scsi/scsi_ioctl.h>
52 #include <linux/cdrom.h>
53 #include <linux/scatterlist.h>
54 #include <linux/kthread.h>
55
56 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
57 #define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
58 #define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 20)
59
60 /* Embedded module documentation macros - see modules.h */
61 MODULE_AUTHOR("Hewlett-Packard Company");
62 MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
63 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
64                         " SA6i P600 P800 P400 P400i E200 E200i E500 P700m"
65                         " Smart Array G2 Series SAS/SATA Controllers");
66 MODULE_VERSION("3.6.20");
67 MODULE_LICENSE("GPL");
68
69 #include "cciss_cmd.h"
70 #include "cciss.h"
71 #include <linux/cciss_ioctl.h>
72
73 /* define the PCI info for the cards we can control */
74 static const struct pci_device_id cciss_pci_device_id[] = {
75         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISS,  0x0E11, 0x4070},
76         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4080},
77         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4082},
78         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4083},
79         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x4091},
80         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409A},
81         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409B},
82         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409C},
83         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409D},
84         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSA,     0x103C, 0x3225},
85         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3223},
86         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3234},
87         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3235},
88         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3211},
89         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3212},
90         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3213},
91         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3214},
92         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3215},
93         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3237},
94         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x323D},
95         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3241},
96         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3243},
97         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3245},
98         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3247},
99         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
100         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
101         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
102         {PCI_VENDOR_ID_HP,     PCI_ANY_ID,      PCI_ANY_ID, PCI_ANY_ID,
103                 PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0},
104         {0,}
105 };
106
107 MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
108
109 /*  board_id = Subsystem Device ID & Vendor ID
110  *  product = Marketing Name for the board
111  *  access = Address of the struct of function pointers
112  */
113 static struct board_type products[] = {
114         {0x40700E11, "Smart Array 5300", &SA5_access},
115         {0x40800E11, "Smart Array 5i", &SA5B_access},
116         {0x40820E11, "Smart Array 532", &SA5B_access},
117         {0x40830E11, "Smart Array 5312", &SA5B_access},
118         {0x409A0E11, "Smart Array 641", &SA5_access},
119         {0x409B0E11, "Smart Array 642", &SA5_access},
120         {0x409C0E11, "Smart Array 6400", &SA5_access},
121         {0x409D0E11, "Smart Array 6400 EM", &SA5_access},
122         {0x40910E11, "Smart Array 6i", &SA5_access},
123         {0x3225103C, "Smart Array P600", &SA5_access},
124         {0x3223103C, "Smart Array P800", &SA5_access},
125         {0x3234103C, "Smart Array P400", &SA5_access},
126         {0x3235103C, "Smart Array P400i", &SA5_access},
127         {0x3211103C, "Smart Array E200i", &SA5_access},
128         {0x3212103C, "Smart Array E200", &SA5_access},
129         {0x3213103C, "Smart Array E200i", &SA5_access},
130         {0x3214103C, "Smart Array E200i", &SA5_access},
131         {0x3215103C, "Smart Array E200i", &SA5_access},
132         {0x3237103C, "Smart Array E500", &SA5_access},
133         {0x323D103C, "Smart Array P700m", &SA5_access},
134         {0x3241103C, "Smart Array P212", &SA5_access},
135         {0x3243103C, "Smart Array P410", &SA5_access},
136         {0x3245103C, "Smart Array P410i", &SA5_access},
137         {0x3247103C, "Smart Array P411", &SA5_access},
138         {0x3249103C, "Smart Array P812", &SA5_access},
139         {0x324A103C, "Smart Array P712m", &SA5_access},
140         {0x324B103C, "Smart Array P711m", &SA5_access},
141         {0xFFFF103C, "Unknown Smart Array", &SA5_access},
142 };
143
144 /* How long to wait (in milliseconds) for board to go into simple mode */
145 #define MAX_CONFIG_WAIT 30000
146 #define MAX_IOCTL_CONFIG_WAIT 1000
147
148 /*define how many times we will try a command because of bus resets */
149 #define MAX_CMD_RETRIES 3
150
151 #define MAX_CTLR        32
152
153 /* Originally cciss driver only supports 8 major numbers */
154 #define MAX_CTLR_ORIG   8
155
156 static ctlr_info_t *hba[MAX_CTLR];
157
158 static void do_cciss_request(struct request_queue *q);
159 static irqreturn_t do_cciss_intr(int irq, void *dev_id);
160 static int cciss_open(struct block_device *bdev, fmode_t mode);
161 static int cciss_release(struct gendisk *disk, fmode_t mode);
162 static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
163                        unsigned int cmd, unsigned long arg);
164 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
165
166 static int cciss_revalidate(struct gendisk *disk);
167 static int rebuild_lun_table(ctlr_info_t *h, int first_time);
168 static int deregister_disk(ctlr_info_t *h, int drv_index,
169                            int clear_all);
170
171 static void cciss_read_capacity(int ctlr, int logvol, int withirq,
172                         sector_t *total_size, unsigned int *block_size);
173 static void cciss_read_capacity_16(int ctlr, int logvol, int withirq,
174                         sector_t *total_size, unsigned int *block_size);
175 static void cciss_geometry_inquiry(int ctlr, int logvol,
176                         int withirq, sector_t total_size,
177                         unsigned int block_size, InquiryData_struct *inq_buff,
178                                    drive_info_struct *drv);
179 static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *,
180                                            __u32);
181 static void start_io(ctlr_info_t *h);
182 static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
183                    unsigned int use_unit_num, unsigned int log_unit,
184                    __u8 page_code, unsigned char *scsi3addr, int cmd_type);
185 static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
186                            unsigned int use_unit_num, unsigned int log_unit,
187                            __u8 page_code, int cmd_type);
188
189 static void fail_all_cmds(unsigned long ctlr);
190 static int scan_thread(void *data);
191 static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
192
193 #ifdef CONFIG_PROC_FS
194 static void cciss_procinit(int i);
195 #else
196 static void cciss_procinit(int i)
197 {
198 }
199 #endif                          /* CONFIG_PROC_FS */
200
201 #ifdef CONFIG_COMPAT
202 static int cciss_compat_ioctl(struct block_device *, fmode_t,
203                               unsigned, unsigned long);
204 #endif
205
206 static struct block_device_operations cciss_fops = {
207         .owner = THIS_MODULE,
208         .open = cciss_open,
209         .release = cciss_release,
210         .locked_ioctl = cciss_ioctl,
211         .getgeo = cciss_getgeo,
212 #ifdef CONFIG_COMPAT
213         .compat_ioctl = cciss_compat_ioctl,
214 #endif
215         .revalidate_disk = cciss_revalidate,
216 };
217
218 /*
219  * Enqueuing and dequeuing functions for cmdlists.
220  */
221 static inline void addQ(struct hlist_head *list, CommandList_struct *c)
222 {
223         hlist_add_head(&c->list, list);
224 }
225
226 static inline void removeQ(CommandList_struct *c)
227 {
228         if (WARN_ON(hlist_unhashed(&c->list)))
229                 return;
230
231         hlist_del_init(&c->list);
232 }
233
234 #include "cciss_scsi.c"         /* For SCSI tape support */
235
236 #define RAID_UNKNOWN 6
237
238 #ifdef CONFIG_PROC_FS
239
240 /*
241  * Report information about this controller.
242  */
243 #define ENG_GIG 1000000000
244 #define ENG_GIG_FACTOR (ENG_GIG/512)
245 #define ENGAGE_SCSI     "engage scsi"
246 static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
247         "UNKNOWN"
248 };
249
250 static struct proc_dir_entry *proc_cciss;
251
252 static void cciss_seq_show_header(struct seq_file *seq)
253 {
254         ctlr_info_t *h = seq->private;
255
256         seq_printf(seq, "%s: HP %s Controller\n"
257                 "Board ID: 0x%08lx\n"
258                 "Firmware Version: %c%c%c%c\n"
259                 "IRQ: %d\n"
260                 "Logical drives: %d\n"
261                 "Current Q depth: %d\n"
262                 "Current # commands on controller: %d\n"
263                 "Max Q depth since init: %d\n"
264                 "Max # commands on controller since init: %d\n"
265                 "Max SG entries since init: %d\n",
266                 h->devname,
267                 h->product_name,
268                 (unsigned long)h->board_id,
269                 h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
270                 h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT],
271                 h->num_luns,
272                 h->Qdepth, h->commands_outstanding,
273                 h->maxQsinceinit, h->max_outstanding, h->maxSG);
274
275 #ifdef CONFIG_CISS_SCSI_TAPE
276         cciss_seq_tape_report(seq, h->ctlr);
277 #endif /* CONFIG_CISS_SCSI_TAPE */
278 }
279
280 static void *cciss_seq_start(struct seq_file *seq, loff_t *pos)
281 {
282         ctlr_info_t *h = seq->private;
283         unsigned ctlr = h->ctlr;
284         unsigned long flags;
285
286         /* prevent displaying bogus info during configuration
287          * or deconfiguration of a logical volume
288          */
289         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
290         if (h->busy_configuring) {
291                 spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
292                 return ERR_PTR(-EBUSY);
293         }
294         h->busy_configuring = 1;
295         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
296
297         if (*pos == 0)
298                 cciss_seq_show_header(seq);
299
300         return pos;
301 }
302
303 static int cciss_seq_show(struct seq_file *seq, void *v)
304 {
305         sector_t vol_sz, vol_sz_frac;
306         ctlr_info_t *h = seq->private;
307         unsigned ctlr = h->ctlr;
308         loff_t *pos = v;
309         drive_info_struct *drv = &h->drv[*pos];
310
311         if (*pos > h->highest_lun)
312                 return 0;
313
314         if (drv->heads == 0)
315                 return 0;
316
317         vol_sz = drv->nr_blocks;
318         vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR);
319         vol_sz_frac *= 100;
320         sector_div(vol_sz_frac, ENG_GIG_FACTOR);
321
322         if (drv->raid_level > 5)
323                 drv->raid_level = RAID_UNKNOWN;
324         seq_printf(seq, "cciss/c%dd%d:"
325                         "\t%4u.%02uGB\tRAID %s\n",
326                         ctlr, (int) *pos, (int)vol_sz, (int)vol_sz_frac,
327                         raid_label[drv->raid_level]);
328         return 0;
329 }
330
331 static void *cciss_seq_next(struct seq_file *seq, void *v, loff_t *pos)
332 {
333         ctlr_info_t *h = seq->private;
334
335         if (*pos > h->highest_lun)
336                 return NULL;
337         *pos += 1;
338
339         return pos;
340 }
341
342 static void cciss_seq_stop(struct seq_file *seq, void *v)
343 {
344         ctlr_info_t *h = seq->private;
345
346         /* Only reset h->busy_configuring if we succeeded in setting
347          * it during cciss_seq_start. */
348         if (v == ERR_PTR(-EBUSY))
349                 return;
350
351         h->busy_configuring = 0;
352 }
353
354 static struct seq_operations cciss_seq_ops = {
355         .start = cciss_seq_start,
356         .show  = cciss_seq_show,
357         .next  = cciss_seq_next,
358         .stop  = cciss_seq_stop,
359 };
360
361 static int cciss_seq_open(struct inode *inode, struct file *file)
362 {
363         int ret = seq_open(file, &cciss_seq_ops);
364         struct seq_file *seq = file->private_data;
365
366         if (!ret)
367                 seq->private = PDE(inode)->data;
368
369         return ret;
370 }
371
372 static ssize_t
373 cciss_proc_write(struct file *file, const char __user *buf,
374                  size_t length, loff_t *ppos)
375 {
376         int err;
377         char *buffer;
378
379 #ifndef CONFIG_CISS_SCSI_TAPE
380         return -EINVAL;
381 #endif
382
383         if (!buf || length > PAGE_SIZE - 1)
384                 return -EINVAL;
385
386         buffer = (char *)__get_free_page(GFP_KERNEL);
387         if (!buffer)
388                 return -ENOMEM;
389
390         err = -EFAULT;
391         if (copy_from_user(buffer, buf, length))
392                 goto out;
393         buffer[length] = '\0';
394
395 #ifdef CONFIG_CISS_SCSI_TAPE
396         if (strncmp(ENGAGE_SCSI, buffer, sizeof ENGAGE_SCSI - 1) == 0) {
397                 struct seq_file *seq = file->private_data;
398                 ctlr_info_t *h = seq->private;
399                 int rc;
400
401                 rc = cciss_engage_scsi(h->ctlr);
402                 if (rc != 0)
403                         err = -rc;
404                 else
405                         err = length;
406         } else
407 #endif /* CONFIG_CISS_SCSI_TAPE */
408                 err = -EINVAL;
409         /* might be nice to have "disengage" too, but it's not
410            safely possible. (only 1 module use count, lock issues.) */
411
412 out:
413         free_page((unsigned long)buffer);
414         return err;
415 }
416
417 static struct file_operations cciss_proc_fops = {
418         .owner   = THIS_MODULE,
419         .open    = cciss_seq_open,
420         .read    = seq_read,
421         .llseek  = seq_lseek,
422         .release = seq_release,
423         .write   = cciss_proc_write,
424 };
425
426 static void __devinit cciss_procinit(int i)
427 {
428         struct proc_dir_entry *pde;
429
430         if (proc_cciss == NULL)
431                 proc_cciss = proc_mkdir("driver/cciss", NULL);
432         if (!proc_cciss)
433                 return;
434         pde = proc_create_data(hba[i]->devname, S_IWUSR | S_IRUSR | S_IRGRP |
435                                         S_IROTH, proc_cciss,
436                                         &cciss_proc_fops, hba[i]);
437 }
438 #endif                          /* CONFIG_PROC_FS */
439
440 /*
441  * For operations that cannot sleep, a command block is allocated at init,
442  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
443  * which ones are free or in use.  For operations that can wait for kmalloc
444  * to possible sleep, this routine can be called with get_from_pool set to 0.
445  * cmd_free() MUST be called with a got_from_pool set to 0 if cmd_alloc was.
446  */
447 static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
448 {
449         CommandList_struct *c;
450         int i;
451         u64bit temp64;
452         dma_addr_t cmd_dma_handle, err_dma_handle;
453
454         if (!get_from_pool) {
455                 c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
456                         sizeof(CommandList_struct), &cmd_dma_handle);
457                 if (c == NULL)
458                         return NULL;
459                 memset(c, 0, sizeof(CommandList_struct));
460
461                 c->cmdindex = -1;
462
463                 c->err_info = (ErrorInfo_struct *)
464                     pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
465                             &err_dma_handle);
466
467                 if (c->err_info == NULL) {
468                         pci_free_consistent(h->pdev,
469                                 sizeof(CommandList_struct), c, cmd_dma_handle);
470                         return NULL;
471                 }
472                 memset(c->err_info, 0, sizeof(ErrorInfo_struct));
473         } else {                /* get it out of the controllers pool */
474
475                 do {
476                         i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
477                         if (i == h->nr_cmds)
478                                 return NULL;
479                 } while (test_and_set_bit
480                          (i & (BITS_PER_LONG - 1),
481                           h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
482 #ifdef CCISS_DEBUG
483                 printk(KERN_DEBUG "cciss: using command buffer %d\n", i);
484 #endif
485                 c = h->cmd_pool + i;
486                 memset(c, 0, sizeof(CommandList_struct));
487                 cmd_dma_handle = h->cmd_pool_dhandle
488                     + i * sizeof(CommandList_struct);
489                 c->err_info = h->errinfo_pool + i;
490                 memset(c->err_info, 0, sizeof(ErrorInfo_struct));
491                 err_dma_handle = h->errinfo_pool_dhandle
492                     + i * sizeof(ErrorInfo_struct);
493                 h->nr_allocs++;
494
495                 c->cmdindex = i;
496         }
497
498         INIT_HLIST_NODE(&c->list);
499         c->busaddr = (__u32) cmd_dma_handle;
500         temp64.val = (__u64) err_dma_handle;
501         c->ErrDesc.Addr.lower = temp64.val32.lower;
502         c->ErrDesc.Addr.upper = temp64.val32.upper;
503         c->ErrDesc.Len = sizeof(ErrorInfo_struct);
504
505         c->ctlr = h->ctlr;
506         return c;
507 }
508
509 /*
510  * Frees a command block that was previously allocated with cmd_alloc().
511  */
512 static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool)
513 {
514         int i;
515         u64bit temp64;
516
517         if (!got_from_pool) {
518                 temp64.val32.lower = c->ErrDesc.Addr.lower;
519                 temp64.val32.upper = c->ErrDesc.Addr.upper;
520                 pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
521                                     c->err_info, (dma_addr_t) temp64.val);
522                 pci_free_consistent(h->pdev, sizeof(CommandList_struct),
523                                     c, (dma_addr_t) c->busaddr);
524         } else {
525                 i = c - h->cmd_pool;
526                 clear_bit(i & (BITS_PER_LONG - 1),
527                           h->cmd_pool_bits + (i / BITS_PER_LONG));
528                 h->nr_frees++;
529         }
530 }
531
532 static inline ctlr_info_t *get_host(struct gendisk *disk)
533 {
534         return disk->queue->queuedata;
535 }
536
537 static inline drive_info_struct *get_drv(struct gendisk *disk)
538 {
539         return disk->private_data;
540 }
541
542 /*
543  * Open.  Make sure the device is really there.
544  */
545 static int cciss_open(struct block_device *bdev, fmode_t mode)
546 {
547         ctlr_info_t *host = get_host(bdev->bd_disk);
548         drive_info_struct *drv = get_drv(bdev->bd_disk);
549
550 #ifdef CCISS_DEBUG
551         printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name);
552 #endif                          /* CCISS_DEBUG */
553
554         if (host->busy_initializing || drv->busy_configuring)
555                 return -EBUSY;
556         /*
557          * Root is allowed to open raw volume zero even if it's not configured
558          * so array config can still work. Root is also allowed to open any
559          * volume that has a LUN ID, so it can issue IOCTL to reread the
560          * disk information.  I don't think I really like this
561          * but I'm already using way to many device nodes to claim another one
562          * for "raw controller".
563          */
564         if (drv->heads == 0) {
565                 if (MINOR(bdev->bd_dev) != 0) { /* not node 0? */
566                         /* if not node 0 make sure it is a partition = 0 */
567                         if (MINOR(bdev->bd_dev) & 0x0f) {
568                                 return -ENXIO;
569                                 /* if it is, make sure we have a LUN ID */
570                         } else if (drv->LunID == 0) {
571                                 return -ENXIO;
572                         }
573                 }
574                 if (!capable(CAP_SYS_ADMIN))
575                         return -EPERM;
576         }
577         drv->usage_count++;
578         host->usage_count++;
579         return 0;
580 }
581
582 /*
583  * Close.  Sync first.
584  */
585 static int cciss_release(struct gendisk *disk, fmode_t mode)
586 {
587         ctlr_info_t *host = get_host(disk);
588         drive_info_struct *drv = get_drv(disk);
589
590 #ifdef CCISS_DEBUG
591         printk(KERN_DEBUG "cciss_release %s\n", disk->disk_name);
592 #endif                          /* CCISS_DEBUG */
593
594         drv->usage_count--;
595         host->usage_count--;
596         return 0;
597 }
598
599 #ifdef CONFIG_COMPAT
600
601 static int do_ioctl(struct block_device *bdev, fmode_t mode,
602                     unsigned cmd, unsigned long arg)
603 {
604         int ret;
605         lock_kernel();
606         ret = cciss_ioctl(bdev, mode, cmd, arg);
607         unlock_kernel();
608         return ret;
609 }
610
611 static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
612                                   unsigned cmd, unsigned long arg);
613 static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
614                                       unsigned cmd, unsigned long arg);
615
616 static int cciss_compat_ioctl(struct block_device *bdev, fmode_t mode,
617                               unsigned cmd, unsigned long arg)
618 {
619         switch (cmd) {
620         case CCISS_GETPCIINFO:
621         case CCISS_GETINTINFO:
622         case CCISS_SETINTINFO:
623         case CCISS_GETNODENAME:
624         case CCISS_SETNODENAME:
625         case CCISS_GETHEARTBEAT:
626         case CCISS_GETBUSTYPES:
627         case CCISS_GETFIRMVER:
628         case CCISS_GETDRIVVER:
629         case CCISS_REVALIDVOLS:
630         case CCISS_DEREGDISK:
631         case CCISS_REGNEWDISK:
632         case CCISS_REGNEWD:
633         case CCISS_RESCANDISK:
634         case CCISS_GETLUNINFO:
635                 return do_ioctl(bdev, mode, cmd, arg);
636
637         case CCISS_PASSTHRU32:
638                 return cciss_ioctl32_passthru(bdev, mode, cmd, arg);
639         case CCISS_BIG_PASSTHRU32:
640                 return cciss_ioctl32_big_passthru(bdev, mode, cmd, arg);
641
642         default:
643                 return -ENOIOCTLCMD;
644         }
645 }
646
647 static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
648                                   unsigned cmd, unsigned long arg)
649 {
650         IOCTL32_Command_struct __user *arg32 =
651             (IOCTL32_Command_struct __user *) arg;
652         IOCTL_Command_struct arg64;
653         IOCTL_Command_struct __user *p = compat_alloc_user_space(sizeof(arg64));
654         int err;
655         u32 cp;
656
657         err = 0;
658         err |=
659             copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
660                            sizeof(arg64.LUN_info));
661         err |=
662             copy_from_user(&arg64.Request, &arg32->Request,
663                            sizeof(arg64.Request));
664         err |=
665             copy_from_user(&arg64.error_info, &arg32->error_info,
666                            sizeof(arg64.error_info));
667         err |= get_user(arg64.buf_size, &arg32->buf_size);
668         err |= get_user(cp, &arg32->buf);
669         arg64.buf = compat_ptr(cp);
670         err |= copy_to_user(p, &arg64, sizeof(arg64));
671
672         if (err)
673                 return -EFAULT;
674
675         err = do_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p);
676         if (err)
677                 return err;
678         err |=
679             copy_in_user(&arg32->error_info, &p->error_info,
680                          sizeof(arg32->error_info));
681         if (err)
682                 return -EFAULT;
683         return err;
684 }
685
686 static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
687                                       unsigned cmd, unsigned long arg)
688 {
689         BIG_IOCTL32_Command_struct __user *arg32 =
690             (BIG_IOCTL32_Command_struct __user *) arg;
691         BIG_IOCTL_Command_struct arg64;
692         BIG_IOCTL_Command_struct __user *p =
693             compat_alloc_user_space(sizeof(arg64));
694         int err;
695         u32 cp;
696
697         err = 0;
698         err |=
699             copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
700                            sizeof(arg64.LUN_info));
701         err |=
702             copy_from_user(&arg64.Request, &arg32->Request,
703                            sizeof(arg64.Request));
704         err |=
705             copy_from_user(&arg64.error_info, &arg32->error_info,
706                            sizeof(arg64.error_info));
707         err |= get_user(arg64.buf_size, &arg32->buf_size);
708         err |= get_user(arg64.malloc_size, &arg32->malloc_size);
709         err |= get_user(cp, &arg32->buf);
710         arg64.buf = compat_ptr(cp);
711         err |= copy_to_user(p, &arg64, sizeof(arg64));
712
713         if (err)
714                 return -EFAULT;
715
716         err = do_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p);
717         if (err)
718                 return err;
719         err |=
720             copy_in_user(&arg32->error_info, &p->error_info,
721                          sizeof(arg32->error_info));
722         if (err)
723                 return -EFAULT;
724         return err;
725 }
726 #endif
727
728 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
729 {
730         drive_info_struct *drv = get_drv(bdev->bd_disk);
731
732         if (!drv->cylinders)
733                 return -ENXIO;
734
735         geo->heads = drv->heads;
736         geo->sectors = drv->sectors;
737         geo->cylinders = drv->cylinders;
738         return 0;
739 }
740
741 static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
742 {
743         if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
744                         c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
745                 (void)check_for_unit_attention(host, c);
746 }
747 /*
748  * ioctl
749  */
750 static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
751                        unsigned int cmd, unsigned long arg)
752 {
753         struct gendisk *disk = bdev->bd_disk;
754         ctlr_info_t *host = get_host(disk);
755         drive_info_struct *drv = get_drv(disk);
756         int ctlr = host->ctlr;
757         void __user *argp = (void __user *)arg;
758
759 #ifdef CCISS_DEBUG
760         printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg);
761 #endif                          /* CCISS_DEBUG */
762
763         switch (cmd) {
764         case CCISS_GETPCIINFO:
765                 {
766                         cciss_pci_info_struct pciinfo;
767
768                         if (!arg)
769                                 return -EINVAL;
770                         pciinfo.domain = pci_domain_nr(host->pdev->bus);
771                         pciinfo.bus = host->pdev->bus->number;
772                         pciinfo.dev_fn = host->pdev->devfn;
773                         pciinfo.board_id = host->board_id;
774                         if (copy_to_user
775                             (argp, &pciinfo, sizeof(cciss_pci_info_struct)))
776                                 return -EFAULT;
777                         return 0;
778                 }
779         case CCISS_GETINTINFO:
780                 {
781                         cciss_coalint_struct intinfo;
782                         if (!arg)
783                                 return -EINVAL;
784                         intinfo.delay =
785                             readl(&host->cfgtable->HostWrite.CoalIntDelay);
786                         intinfo.count =
787                             readl(&host->cfgtable->HostWrite.CoalIntCount);
788                         if (copy_to_user
789                             (argp, &intinfo, sizeof(cciss_coalint_struct)))
790                                 return -EFAULT;
791                         return 0;
792                 }
793         case CCISS_SETINTINFO:
794                 {
795                         cciss_coalint_struct intinfo;
796                         unsigned long flags;
797                         int i;
798
799                         if (!arg)
800                                 return -EINVAL;
801                         if (!capable(CAP_SYS_ADMIN))
802                                 return -EPERM;
803                         if (copy_from_user
804                             (&intinfo, argp, sizeof(cciss_coalint_struct)))
805                                 return -EFAULT;
806                         if ((intinfo.delay == 0) && (intinfo.count == 0))
807                         {
808 //                      printk("cciss_ioctl: delay and count cannot be 0\n");
809                                 return -EINVAL;
810                         }
811                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
812                         /* Update the field, and then ring the doorbell */
813                         writel(intinfo.delay,
814                                &(host->cfgtable->HostWrite.CoalIntDelay));
815                         writel(intinfo.count,
816                                &(host->cfgtable->HostWrite.CoalIntCount));
817                         writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
818
819                         for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
820                                 if (!(readl(host->vaddr + SA5_DOORBELL)
821                                       & CFGTBL_ChangeReq))
822                                         break;
823                                 /* delay and try again */
824                                 udelay(1000);
825                         }
826                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
827                         if (i >= MAX_IOCTL_CONFIG_WAIT)
828                                 return -EAGAIN;
829                         return 0;
830                 }
831         case CCISS_GETNODENAME:
832                 {
833                         NodeName_type NodeName;
834                         int i;
835
836                         if (!arg)
837                                 return -EINVAL;
838                         for (i = 0; i < 16; i++)
839                                 NodeName[i] =
840                                     readb(&host->cfgtable->ServerName[i]);
841                         if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
842                                 return -EFAULT;
843                         return 0;
844                 }
845         case CCISS_SETNODENAME:
846                 {
847                         NodeName_type NodeName;
848                         unsigned long flags;
849                         int i;
850
851                         if (!arg)
852                                 return -EINVAL;
853                         if (!capable(CAP_SYS_ADMIN))
854                                 return -EPERM;
855
856                         if (copy_from_user
857                             (NodeName, argp, sizeof(NodeName_type)))
858                                 return -EFAULT;
859
860                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
861
862                         /* Update the field, and then ring the doorbell */
863                         for (i = 0; i < 16; i++)
864                                 writeb(NodeName[i],
865                                        &host->cfgtable->ServerName[i]);
866
867                         writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
868
869                         for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
870                                 if (!(readl(host->vaddr + SA5_DOORBELL)
871                                       & CFGTBL_ChangeReq))
872                                         break;
873                                 /* delay and try again */
874                                 udelay(1000);
875                         }
876                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
877                         if (i >= MAX_IOCTL_CONFIG_WAIT)
878                                 return -EAGAIN;
879                         return 0;
880                 }
881
882         case CCISS_GETHEARTBEAT:
883                 {
884                         Heartbeat_type heartbeat;
885
886                         if (!arg)
887                                 return -EINVAL;
888                         heartbeat = readl(&host->cfgtable->HeartBeat);
889                         if (copy_to_user
890                             (argp, &heartbeat, sizeof(Heartbeat_type)))
891                                 return -EFAULT;
892                         return 0;
893                 }
894         case CCISS_GETBUSTYPES:
895                 {
896                         BusTypes_type BusTypes;
897
898                         if (!arg)
899                                 return -EINVAL;
900                         BusTypes = readl(&host->cfgtable->BusTypes);
901                         if (copy_to_user
902                             (argp, &BusTypes, sizeof(BusTypes_type)))
903                                 return -EFAULT;
904                         return 0;
905                 }
906         case CCISS_GETFIRMVER:
907                 {
908                         FirmwareVer_type firmware;
909
910                         if (!arg)
911                                 return -EINVAL;
912                         memcpy(firmware, host->firm_ver, 4);
913
914                         if (copy_to_user
915                             (argp, firmware, sizeof(FirmwareVer_type)))
916                                 return -EFAULT;
917                         return 0;
918                 }
919         case CCISS_GETDRIVVER:
920                 {
921                         DriverVer_type DriverVer = DRIVER_VERSION;
922
923                         if (!arg)
924                                 return -EINVAL;
925
926                         if (copy_to_user
927                             (argp, &DriverVer, sizeof(DriverVer_type)))
928                                 return -EFAULT;
929                         return 0;
930                 }
931
932         case CCISS_DEREGDISK:
933         case CCISS_REGNEWD:
934         case CCISS_REVALIDVOLS:
935                 return rebuild_lun_table(host, 0);
936
937         case CCISS_GETLUNINFO:{
938                         LogvolInfo_struct luninfo;
939
940                         luninfo.LunID = drv->LunID;
941                         luninfo.num_opens = drv->usage_count;
942                         luninfo.num_parts = 0;
943                         if (copy_to_user(argp, &luninfo,
944                                          sizeof(LogvolInfo_struct)))
945                                 return -EFAULT;
946                         return 0;
947                 }
948         case CCISS_PASSTHRU:
949                 {
950                         IOCTL_Command_struct iocommand;
951                         CommandList_struct *c;
952                         char *buff = NULL;
953                         u64bit temp64;
954                         unsigned long flags;
955                         DECLARE_COMPLETION_ONSTACK(wait);
956
957                         if (!arg)
958                                 return -EINVAL;
959
960                         if (!capable(CAP_SYS_RAWIO))
961                                 return -EPERM;
962
963                         if (copy_from_user
964                             (&iocommand, argp, sizeof(IOCTL_Command_struct)))
965                                 return -EFAULT;
966                         if ((iocommand.buf_size < 1) &&
967                             (iocommand.Request.Type.Direction != XFER_NONE)) {
968                                 return -EINVAL;
969                         }
970 #if 0                           /* 'buf_size' member is 16-bits, and always smaller than kmalloc limit */
971                         /* Check kmalloc limits */
972                         if (iocommand.buf_size > 128000)
973                                 return -EINVAL;
974 #endif
975                         if (iocommand.buf_size > 0) {
976                                 buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
977                                 if (buff == NULL)
978                                         return -EFAULT;
979                         }
980                         if (iocommand.Request.Type.Direction == XFER_WRITE) {
981                                 /* Copy the data into the buffer we created */
982                                 if (copy_from_user
983                                     (buff, iocommand.buf, iocommand.buf_size)) {
984                                         kfree(buff);
985                                         return -EFAULT;
986                                 }
987                         } else {
988                                 memset(buff, 0, iocommand.buf_size);
989                         }
990                         if ((c = cmd_alloc(host, 0)) == NULL) {
991                                 kfree(buff);
992                                 return -ENOMEM;
993                         }
994                         // Fill in the command type
995                         c->cmd_type = CMD_IOCTL_PEND;
996                         // Fill in Command Header
997                         c->Header.ReplyQueue = 0;       // unused in simple mode
998                         if (iocommand.buf_size > 0)     // buffer to fill
999                         {
1000                                 c->Header.SGList = 1;
1001                                 c->Header.SGTotal = 1;
1002                         } else  // no buffers to fill
1003                         {
1004                                 c->Header.SGList = 0;
1005                                 c->Header.SGTotal = 0;
1006                         }
1007                         c->Header.LUN = iocommand.LUN_info;
1008                         c->Header.Tag.lower = c->busaddr;       // use the kernel address the cmd block for tag
1009
1010                         // Fill in Request block
1011                         c->Request = iocommand.Request;
1012
1013                         // Fill in the scatter gather information
1014                         if (iocommand.buf_size > 0) {
1015                                 temp64.val = pci_map_single(host->pdev, buff,
1016                                         iocommand.buf_size,
1017                                         PCI_DMA_BIDIRECTIONAL);
1018                                 c->SG[0].Addr.lower = temp64.val32.lower;
1019                                 c->SG[0].Addr.upper = temp64.val32.upper;
1020                                 c->SG[0].Len = iocommand.buf_size;
1021                                 c->SG[0].Ext = 0;       // we are not chaining
1022                         }
1023                         c->waiting = &wait;
1024
1025                         /* Put the request on the tail of the request queue */
1026                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1027                         addQ(&host->reqQ, c);
1028                         host->Qdepth++;
1029                         start_io(host);
1030                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1031
1032                         wait_for_completion(&wait);
1033
1034                         /* unlock the buffers from DMA */
1035                         temp64.val32.lower = c->SG[0].Addr.lower;
1036                         temp64.val32.upper = c->SG[0].Addr.upper;
1037                         pci_unmap_single(host->pdev, (dma_addr_t) temp64.val,
1038                                          iocommand.buf_size,
1039                                          PCI_DMA_BIDIRECTIONAL);
1040
1041                         check_ioctl_unit_attention(host, c);
1042
1043                         /* Copy the error information out */
1044                         iocommand.error_info = *(c->err_info);
1045                         if (copy_to_user
1046                             (argp, &iocommand, sizeof(IOCTL_Command_struct))) {
1047                                 kfree(buff);
1048                                 cmd_free(host, c, 0);
1049                                 return -EFAULT;
1050                         }
1051
1052                         if (iocommand.Request.Type.Direction == XFER_READ) {
1053                                 /* Copy the data out of the buffer we created */
1054                                 if (copy_to_user
1055                                     (iocommand.buf, buff, iocommand.buf_size)) {
1056                                         kfree(buff);
1057                                         cmd_free(host, c, 0);
1058                                         return -EFAULT;
1059                                 }
1060                         }
1061                         kfree(buff);
1062                         cmd_free(host, c, 0);
1063                         return 0;
1064                 }
1065         case CCISS_BIG_PASSTHRU:{
1066                         BIG_IOCTL_Command_struct *ioc;
1067                         CommandList_struct *c;
1068                         unsigned char **buff = NULL;
1069                         int *buff_size = NULL;
1070                         u64bit temp64;
1071                         unsigned long flags;
1072                         BYTE sg_used = 0;
1073                         int status = 0;
1074                         int i;
1075                         DECLARE_COMPLETION_ONSTACK(wait);
1076                         __u32 left;
1077                         __u32 sz;
1078                         BYTE __user *data_ptr;
1079
1080                         if (!arg)
1081                                 return -EINVAL;
1082                         if (!capable(CAP_SYS_RAWIO))
1083                                 return -EPERM;
1084                         ioc = (BIG_IOCTL_Command_struct *)
1085                             kmalloc(sizeof(*ioc), GFP_KERNEL);
1086                         if (!ioc) {
1087                                 status = -ENOMEM;
1088                                 goto cleanup1;
1089                         }
1090                         if (copy_from_user(ioc, argp, sizeof(*ioc))) {
1091                                 status = -EFAULT;
1092                                 goto cleanup1;
1093                         }
1094                         if ((ioc->buf_size < 1) &&
1095                             (ioc->Request.Type.Direction != XFER_NONE)) {
1096                                 status = -EINVAL;
1097                                 goto cleanup1;
1098                         }
1099                         /* Check kmalloc limits  using all SGs */
1100                         if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
1101                                 status = -EINVAL;
1102                                 goto cleanup1;
1103                         }
1104                         if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
1105                                 status = -EINVAL;
1106                                 goto cleanup1;
1107                         }
1108                         buff =
1109                             kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
1110                         if (!buff) {
1111                                 status = -ENOMEM;
1112                                 goto cleanup1;
1113                         }
1114                         buff_size = kmalloc(MAXSGENTRIES * sizeof(int),
1115                                                    GFP_KERNEL);
1116                         if (!buff_size) {
1117                                 status = -ENOMEM;
1118                                 goto cleanup1;
1119                         }
1120                         left = ioc->buf_size;
1121                         data_ptr = ioc->buf;
1122                         while (left) {
1123                                 sz = (left >
1124                                       ioc->malloc_size) ? ioc->
1125                                     malloc_size : left;
1126                                 buff_size[sg_used] = sz;
1127                                 buff[sg_used] = kmalloc(sz, GFP_KERNEL);
1128                                 if (buff[sg_used] == NULL) {
1129                                         status = -ENOMEM;
1130                                         goto cleanup1;
1131                                 }
1132                                 if (ioc->Request.Type.Direction == XFER_WRITE) {
1133                                         if (copy_from_user
1134                                             (buff[sg_used], data_ptr, sz)) {
1135                                                 status = -EFAULT;
1136                                                 goto cleanup1;
1137                                         }
1138                                 } else {
1139                                         memset(buff[sg_used], 0, sz);
1140                                 }
1141                                 left -= sz;
1142                                 data_ptr += sz;
1143                                 sg_used++;
1144                         }
1145                         if ((c = cmd_alloc(host, 0)) == NULL) {
1146                                 status = -ENOMEM;
1147                                 goto cleanup1;
1148                         }
1149                         c->cmd_type = CMD_IOCTL_PEND;
1150                         c->Header.ReplyQueue = 0;
1151
1152                         if (ioc->buf_size > 0) {
1153                                 c->Header.SGList = sg_used;
1154                                 c->Header.SGTotal = sg_used;
1155                         } else {
1156                                 c->Header.SGList = 0;
1157                                 c->Header.SGTotal = 0;
1158                         }
1159                         c->Header.LUN = ioc->LUN_info;
1160                         c->Header.Tag.lower = c->busaddr;
1161
1162                         c->Request = ioc->Request;
1163                         if (ioc->buf_size > 0) {
1164                                 int i;
1165                                 for (i = 0; i < sg_used; i++) {
1166                                         temp64.val =
1167                                             pci_map_single(host->pdev, buff[i],
1168                                                     buff_size[i],
1169                                                     PCI_DMA_BIDIRECTIONAL);
1170                                         c->SG[i].Addr.lower =
1171                                             temp64.val32.lower;
1172                                         c->SG[i].Addr.upper =
1173                                             temp64.val32.upper;
1174                                         c->SG[i].Len = buff_size[i];
1175                                         c->SG[i].Ext = 0;       /* we are not chaining */
1176                                 }
1177                         }
1178                         c->waiting = &wait;
1179                         /* Put the request on the tail of the request queue */
1180                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1181                         addQ(&host->reqQ, c);
1182                         host->Qdepth++;
1183                         start_io(host);
1184                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1185                         wait_for_completion(&wait);
1186                         /* unlock the buffers from DMA */
1187                         for (i = 0; i < sg_used; i++) {
1188                                 temp64.val32.lower = c->SG[i].Addr.lower;
1189                                 temp64.val32.upper = c->SG[i].Addr.upper;
1190                                 pci_unmap_single(host->pdev,
1191                                         (dma_addr_t) temp64.val, buff_size[i],
1192                                         PCI_DMA_BIDIRECTIONAL);
1193                         }
1194                         check_ioctl_unit_attention(host, c);
1195                         /* Copy the error information out */
1196                         ioc->error_info = *(c->err_info);
1197                         if (copy_to_user(argp, ioc, sizeof(*ioc))) {
1198                                 cmd_free(host, c, 0);
1199                                 status = -EFAULT;
1200                                 goto cleanup1;
1201                         }
1202                         if (ioc->Request.Type.Direction == XFER_READ) {
1203                                 /* Copy the data out of the buffer we created */
1204                                 BYTE __user *ptr = ioc->buf;
1205                                 for (i = 0; i < sg_used; i++) {
1206                                         if (copy_to_user
1207                                             (ptr, buff[i], buff_size[i])) {
1208                                                 cmd_free(host, c, 0);
1209                                                 status = -EFAULT;
1210                                                 goto cleanup1;
1211                                         }
1212                                         ptr += buff_size[i];
1213                                 }
1214                         }
1215                         cmd_free(host, c, 0);
1216                         status = 0;
1217                       cleanup1:
1218                         if (buff) {
1219                                 for (i = 0; i < sg_used; i++)
1220                                         kfree(buff[i]);
1221                                 kfree(buff);
1222                         }
1223                         kfree(buff_size);
1224                         kfree(ioc);
1225                         return status;
1226                 }
1227
1228         /* scsi_cmd_ioctl handles these, below, though some are not */
1229         /* very meaningful for cciss.  SG_IO is the main one people want. */
1230
1231         case SG_GET_VERSION_NUM:
1232         case SG_SET_TIMEOUT:
1233         case SG_GET_TIMEOUT:
1234         case SG_GET_RESERVED_SIZE:
1235         case SG_SET_RESERVED_SIZE:
1236         case SG_EMULATED_HOST:
1237         case SG_IO:
1238         case SCSI_IOCTL_SEND_COMMAND:
1239                 return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
1240
1241         /* scsi_cmd_ioctl would normally handle these, below, but */
1242         /* they aren't a good fit for cciss, as CD-ROMs are */
1243         /* not supported, and we don't have any bus/target/lun */
1244         /* which we present to the kernel. */
1245
1246         case CDROM_SEND_PACKET:
1247         case CDROMCLOSETRAY:
1248         case CDROMEJECT:
1249         case SCSI_IOCTL_GET_IDLUN:
1250         case SCSI_IOCTL_GET_BUS_NUMBER:
1251         default:
1252                 return -ENOTTY;
1253         }
1254 }
1255
1256 static void cciss_check_queues(ctlr_info_t *h)
1257 {
1258         int start_queue = h->next_to_run;
1259         int i;
1260
1261         /* check to see if we have maxed out the number of commands that can
1262          * be placed on the queue.  If so then exit.  We do this check here
1263          * in case the interrupt we serviced was from an ioctl and did not
1264          * free any new commands.
1265          */
1266         if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds)
1267                 return;
1268
1269         /* We have room on the queue for more commands.  Now we need to queue
1270          * them up.  We will also keep track of the next queue to run so
1271          * that every queue gets a chance to be started first.
1272          */
1273         for (i = 0; i < h->highest_lun + 1; i++) {
1274                 int curr_queue = (start_queue + i) % (h->highest_lun + 1);
1275                 /* make sure the disk has been added and the drive is real
1276                  * because this can be called from the middle of init_one.
1277                  */
1278                 if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
1279                         continue;
1280                 blk_start_queue(h->gendisk[curr_queue]->queue);
1281
1282                 /* check to see if we have maxed out the number of commands
1283                  * that can be placed on the queue.
1284                  */
1285                 if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds) {
1286                         if (curr_queue == start_queue) {
1287                                 h->next_to_run =
1288                                     (start_queue + 1) % (h->highest_lun + 1);
1289                                 break;
1290                         } else {
1291                                 h->next_to_run = curr_queue;
1292                                 break;
1293                         }
1294                 }
1295         }
1296 }
1297
1298 static void cciss_softirq_done(struct request *rq)
1299 {
1300         CommandList_struct *cmd = rq->completion_data;
1301         ctlr_info_t *h = hba[cmd->ctlr];
1302         unsigned int nr_bytes;
1303         unsigned long flags;
1304         u64bit temp64;
1305         int i, ddir;
1306
1307         if (cmd->Request.Type.Direction == XFER_READ)
1308                 ddir = PCI_DMA_FROMDEVICE;
1309         else
1310                 ddir = PCI_DMA_TODEVICE;
1311
1312         /* command did not need to be retried */
1313         /* unmap the DMA mapping for all the scatter gather elements */
1314         for (i = 0; i < cmd->Header.SGList; i++) {
1315                 temp64.val32.lower = cmd->SG[i].Addr.lower;
1316                 temp64.val32.upper = cmd->SG[i].Addr.upper;
1317                 pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
1318         }
1319
1320 #ifdef CCISS_DEBUG
1321         printk("Done with %p\n", rq);
1322 #endif                          /* CCISS_DEBUG */
1323
1324         /*
1325          * Store the full size and set the residual count for pc requests
1326          */
1327         nr_bytes = blk_rq_bytes(rq);
1328         if (blk_pc_request(rq))
1329                 rq->data_len = cmd->err_info->ResidualCnt;
1330
1331         if (blk_end_request(rq, (rq->errors == 0) ? 0 : -EIO, nr_bytes))
1332                 BUG();
1333
1334         spin_lock_irqsave(&h->lock, flags);
1335         cmd_free(h, cmd, 1);
1336         cciss_check_queues(h);
1337         spin_unlock_irqrestore(&h->lock, flags);
1338 }
1339
1340 /* This function gets the serial number of a logical drive via
1341  * inquiry page 0x83.  Serial no. is 16 bytes.  If the serial
1342  * number cannot be had, for whatever reason, 16 bytes of 0xff
1343  * are returned instead.
1344  */
1345 static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
1346                                 unsigned char *serial_no, int buflen)
1347 {
1348 #define PAGE_83_INQ_BYTES 64
1349         int rc;
1350         unsigned char *buf;
1351
1352         if (buflen > 16)
1353                 buflen = 16;
1354         memset(serial_no, 0xff, buflen);
1355         buf = kzalloc(PAGE_83_INQ_BYTES, GFP_KERNEL);
1356         if (!buf)
1357                 return;
1358         memset(serial_no, 0, buflen);
1359         if (withirq)
1360                 rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
1361                         PAGE_83_INQ_BYTES, 1, logvol, 0x83, TYPE_CMD);
1362         else
1363                 rc = sendcmd(CISS_INQUIRY, ctlr, buf,
1364                         PAGE_83_INQ_BYTES, 1, logvol, 0x83, NULL, TYPE_CMD);
1365         if (rc == IO_OK)
1366                 memcpy(serial_no, &buf[8], buflen);
1367         kfree(buf);
1368         return;
1369 }
1370
1371 static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
1372                                 int drv_index)
1373 {
1374         disk->queue = blk_init_queue(do_cciss_request, &h->lock);
1375         sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
1376         disk->major = h->major;
1377         disk->first_minor = drv_index << NWD_SHIFT;
1378         disk->fops = &cciss_fops;
1379         disk->private_data = &h->drv[drv_index];
1380         disk->driverfs_dev = &h->pdev->dev;
1381
1382         /* Set up queue information */
1383         blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
1384
1385         /* This is a hardware imposed limit. */
1386         blk_queue_max_hw_segments(disk->queue, MAXSGENTRIES);
1387
1388         /* This is a limit in the driver and could be eliminated. */
1389         blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES);
1390
1391         blk_queue_max_sectors(disk->queue, h->cciss_max_sectors);
1392
1393         blk_queue_softirq_done(disk->queue, cciss_softirq_done);
1394
1395         disk->queue->queuedata = h;
1396
1397         blk_queue_hardsect_size(disk->queue,
1398                                 h->drv[drv_index].block_size);
1399
1400         /* Make sure all queue data is written out before */
1401         /* setting h->drv[drv_index].queue, as setting this */
1402         /* allows the interrupt handler to start the queue */
1403         wmb();
1404         h->drv[drv_index].queue = disk->queue;
1405         add_disk(disk);
1406 }
1407
1408 /* This function will check the usage_count of the drive to be updated/added.
1409  * If the usage_count is zero and it is a heretofore unknown drive, or,
1410  * the drive's capacity, geometry, or serial number has changed,
1411  * then the drive information will be updated and the disk will be
1412  * re-registered with the kernel.  If these conditions don't hold,
1413  * then it will be left alone for the next reboot.  The exception to this
1414  * is disk 0 which will always be left registered with the kernel since it
1415  * is also the controller node.  Any changes to disk 0 will show up on
1416  * the next reboot.
1417  */
1418 static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
1419 {
1420         ctlr_info_t *h = hba[ctlr];
1421         struct gendisk *disk;
1422         InquiryData_struct *inq_buff = NULL;
1423         unsigned int block_size;
1424         sector_t total_size;
1425         unsigned long flags = 0;
1426         int ret = 0;
1427         drive_info_struct *drvinfo;
1428         int was_only_controller_node;
1429
1430         /* Get information about the disk and modify the driver structure */
1431         inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
1432         drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL);
1433         if (inq_buff == NULL || drvinfo == NULL)
1434                 goto mem_msg;
1435
1436         /* See if we're trying to update the "controller node"
1437          * this will happen the when the first logical drive gets
1438          * created by ACU.
1439          */
1440         was_only_controller_node = (drv_index == 0 &&
1441                                 h->drv[0].raid_level == -1);
1442
1443         /* testing to see if 16-byte CDBs are already being used */
1444         if (h->cciss_read == CCISS_READ_16) {
1445                 cciss_read_capacity_16(h->ctlr, drv_index, 1,
1446                         &total_size, &block_size);
1447
1448         } else {
1449                 cciss_read_capacity(ctlr, drv_index, 1,
1450                                     &total_size, &block_size);
1451
1452                 /* if read_capacity returns all F's this volume is >2TB */
1453                 /* in size so we switch to 16-byte CDB's for all */
1454                 /* read/write ops */
1455                 if (total_size == 0xFFFFFFFFULL) {
1456                         cciss_read_capacity_16(ctlr, drv_index, 1,
1457                         &total_size, &block_size);
1458                         h->cciss_read = CCISS_READ_16;
1459                         h->cciss_write = CCISS_WRITE_16;
1460                 } else {
1461                         h->cciss_read = CCISS_READ_10;
1462                         h->cciss_write = CCISS_WRITE_10;
1463                 }
1464         }
1465
1466         cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
1467                                inq_buff, drvinfo);
1468         drvinfo->block_size = block_size;
1469         drvinfo->nr_blocks = total_size + 1;
1470
1471         cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
1472                         sizeof(drvinfo->serial_no));
1473
1474         /* Is it the same disk we already know, and nothing's changed? */
1475         if (h->drv[drv_index].raid_level != -1 &&
1476                 ((memcmp(drvinfo->serial_no,
1477                                 h->drv[drv_index].serial_no, 16) == 0) &&
1478                 drvinfo->block_size == h->drv[drv_index].block_size &&
1479                 drvinfo->nr_blocks == h->drv[drv_index].nr_blocks &&
1480                 drvinfo->heads == h->drv[drv_index].heads &&
1481                 drvinfo->sectors == h->drv[drv_index].sectors &&
1482                 drvinfo->cylinders == h->drv[drv_index].cylinders))
1483                         /* The disk is unchanged, nothing to update */
1484                         goto freeret;
1485
1486         /* If we get here it's not the same disk, or something's changed,
1487          * so we need to * deregister it, and re-register it, if it's not
1488          * in use.
1489          * If the disk already exists then deregister it before proceeding
1490          * (unless it's the first disk (for the controller node).
1491          */
1492         if (h->drv[drv_index].raid_level != -1 && drv_index != 0) {
1493                 printk(KERN_WARNING "disk %d has changed.\n", drv_index);
1494                 spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1495                 h->drv[drv_index].busy_configuring = 1;
1496                 spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1497
1498                 /* deregister_disk sets h->drv[drv_index].queue = NULL
1499                  * which keeps the interrupt handler from starting
1500                  * the queue.
1501                  */
1502                 ret = deregister_disk(h, drv_index, 0);
1503                 h->drv[drv_index].busy_configuring = 0;
1504         }
1505
1506         /* If the disk is in use return */
1507         if (ret)
1508                 goto freeret;
1509
1510         /* Save the new information from cciss_geometry_inquiry
1511          * and serial number inquiry.
1512          */
1513         h->drv[drv_index].block_size = drvinfo->block_size;
1514         h->drv[drv_index].nr_blocks = drvinfo->nr_blocks;
1515         h->drv[drv_index].heads = drvinfo->heads;
1516         h->drv[drv_index].sectors = drvinfo->sectors;
1517         h->drv[drv_index].cylinders = drvinfo->cylinders;
1518         h->drv[drv_index].raid_level = drvinfo->raid_level;
1519         memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16);
1520
1521         ++h->num_luns;
1522         disk = h->gendisk[drv_index];
1523         set_capacity(disk, h->drv[drv_index].nr_blocks);
1524
1525         /* If it's not disk 0 (drv_index != 0)
1526          * or if it was disk 0, but there was previously
1527          * no actual corresponding configured logical drive
1528          * (raid_leve == -1) then we want to update the
1529          * logical drive's information.
1530          */
1531         if (drv_index || first_time)
1532                 cciss_add_disk(h, disk, drv_index);
1533
1534 freeret:
1535         kfree(inq_buff);
1536         kfree(drvinfo);
1537         return;
1538 mem_msg:
1539         printk(KERN_ERR "cciss: out of memory\n");
1540         goto freeret;
1541 }
1542
1543 /* This function will find the first index of the controllers drive array
1544  * that has a -1 for the raid_level and will return that index.  This is
1545  * where new drives will be added.  If the index to be returned is greater
1546  * than the highest_lun index for the controller then highest_lun is set
1547  * to this new index.  If there are no available indexes then -1 is returned.
1548  * "controller_node" is used to know if this is a real logical drive, or just
1549  * the controller node, which determines if this counts towards highest_lun.
1550  */
1551 static int cciss_find_free_drive_index(int ctlr, int controller_node)
1552 {
1553         int i;
1554
1555         for (i = 0; i < CISS_MAX_LUN; i++) {
1556                 if (hba[ctlr]->drv[i].raid_level == -1) {
1557                         if (i > hba[ctlr]->highest_lun)
1558                                 if (!controller_node)
1559                                         hba[ctlr]->highest_lun = i;
1560                         return i;
1561                 }
1562         }
1563         return -1;
1564 }
1565
1566 /* cciss_add_gendisk finds a free hba[]->drv structure
1567  * and allocates a gendisk if needed, and sets the lunid
1568  * in the drvinfo structure.   It returns the index into
1569  * the ->drv[] array, or -1 if none are free.
1570  * is_controller_node indicates whether highest_lun should
1571  * count this disk, or if it's only being added to provide
1572  * a means to talk to the controller in case no logical
1573  * drives have yet been configured.
1574  */
1575 static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node)
1576 {
1577         int drv_index;
1578
1579         drv_index = cciss_find_free_drive_index(h->ctlr, controller_node);
1580         if (drv_index == -1)
1581                 return -1;
1582         /*Check if the gendisk needs to be allocated */
1583         if (!h->gendisk[drv_index]) {
1584                 h->gendisk[drv_index] =
1585                         alloc_disk(1 << NWD_SHIFT);
1586                 if (!h->gendisk[drv_index]) {
1587                         printk(KERN_ERR "cciss%d: could not "
1588                                 "allocate a new disk %d\n",
1589                                 h->ctlr, drv_index);
1590                         return -1;
1591                 }
1592         }
1593         h->drv[drv_index].LunID = lunid;
1594
1595         /* Don't need to mark this busy because nobody */
1596         /* else knows about this disk yet to contend */
1597         /* for access to it. */
1598         h->drv[drv_index].busy_configuring = 0;
1599         wmb();
1600         return drv_index;
1601 }
1602
1603 /* This is for the special case of a controller which
1604  * has no logical drives.  In this case, we still need
1605  * to register a disk so the controller can be accessed
1606  * by the Array Config Utility.
1607  */
1608 static void cciss_add_controller_node(ctlr_info_t *h)
1609 {
1610         struct gendisk *disk;
1611         int drv_index;
1612
1613         if (h->gendisk[0] != NULL) /* already did this? Then bail. */
1614                 return;
1615
1616         drv_index = cciss_add_gendisk(h, 0, 1);
1617         if (drv_index == -1) {
1618                 printk(KERN_WARNING "cciss%d: could not "
1619                         "add disk 0.\n", h->ctlr);
1620                 return;
1621         }
1622         h->drv[drv_index].block_size = 512;
1623         h->drv[drv_index].nr_blocks = 0;
1624         h->drv[drv_index].heads = 0;
1625         h->drv[drv_index].sectors = 0;
1626         h->drv[drv_index].cylinders = 0;
1627         h->drv[drv_index].raid_level = -1;
1628         memset(h->drv[drv_index].serial_no, 0, 16);
1629         disk = h->gendisk[drv_index];
1630         cciss_add_disk(h, disk, drv_index);
1631 }
1632
1633 /* This function will add and remove logical drives from the Logical
1634  * drive array of the controller and maintain persistency of ordering
1635  * so that mount points are preserved until the next reboot.  This allows
1636  * for the removal of logical drives in the middle of the drive array
1637  * without a re-ordering of those drives.
1638  * INPUT
1639  * h            = The controller to perform the operations on
1640  */
1641 static int rebuild_lun_table(ctlr_info_t *h, int first_time)
1642 {
1643         int ctlr = h->ctlr;
1644         int num_luns;
1645         ReportLunData_struct *ld_buff = NULL;
1646         int return_code;
1647         int listlength = 0;
1648         int i;
1649         int drv_found;
1650         int drv_index = 0;
1651         __u32 lunid = 0;
1652         unsigned long flags;
1653
1654         if (!capable(CAP_SYS_RAWIO))
1655                 return -EPERM;
1656
1657         /* Set busy_configuring flag for this operation */
1658         spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1659         if (h->busy_configuring) {
1660                 spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1661                 return -EBUSY;
1662         }
1663         h->busy_configuring = 1;
1664         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1665
1666         ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
1667         if (ld_buff == NULL)
1668                 goto mem_msg;
1669
1670         return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
1671                                       sizeof(ReportLunData_struct), 0,
1672                                       0, 0, TYPE_CMD);
1673
1674         if (return_code == IO_OK)
1675                 listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
1676         else {  /* reading number of logical volumes failed */
1677                 printk(KERN_WARNING "cciss: report logical volume"
1678                        " command failed\n");
1679                 listlength = 0;
1680                 goto freeret;
1681         }
1682
1683         num_luns = listlength / 8;      /* 8 bytes per entry */
1684         if (num_luns > CISS_MAX_LUN) {
1685                 num_luns = CISS_MAX_LUN;
1686                 printk(KERN_WARNING "cciss: more luns configured"
1687                        " on controller than can be handled by"
1688                        " this driver.\n");
1689         }
1690
1691         if (num_luns == 0)
1692                 cciss_add_controller_node(h);
1693
1694         /* Compare controller drive array to driver's drive array
1695          * to see if any drives are missing on the controller due
1696          * to action of Array Config Utility (user deletes drive)
1697          * and deregister logical drives which have disappeared.
1698          */
1699         for (i = 0; i <= h->highest_lun; i++) {
1700                 int j;
1701                 drv_found = 0;
1702
1703                 /* skip holes in the array from already deleted drives */
1704                 if (h->drv[i].raid_level == -1)
1705                         continue;
1706
1707                 for (j = 0; j < num_luns; j++) {
1708                         memcpy(&lunid, &ld_buff->LUN[j][0], 4);
1709                         lunid = le32_to_cpu(lunid);
1710                         if (h->drv[i].LunID == lunid) {
1711                                 drv_found = 1;
1712                                 break;
1713                         }
1714                 }
1715                 if (!drv_found) {
1716                         /* Deregister it from the OS, it's gone. */
1717                         spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1718                         h->drv[i].busy_configuring = 1;
1719                         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1720                         return_code = deregister_disk(h, i, 1);
1721                         h->drv[i].busy_configuring = 0;
1722                 }
1723         }
1724
1725         /* Compare controller drive array to driver's drive array.
1726          * Check for updates in the drive information and any new drives
1727          * on the controller due to ACU adding logical drives, or changing
1728          * a logical drive's size, etc.  Reregister any new/changed drives
1729          */
1730         for (i = 0; i < num_luns; i++) {
1731                 int j;
1732
1733                 drv_found = 0;
1734
1735                 memcpy(&lunid, &ld_buff->LUN[i][0], 4);
1736                 lunid = le32_to_cpu(lunid);
1737
1738                 /* Find if the LUN is already in the drive array
1739                  * of the driver.  If so then update its info
1740                  * if not in use.  If it does not exist then find
1741                  * the first free index and add it.
1742                  */
1743                 for (j = 0; j <= h->highest_lun; j++) {
1744                         if (h->drv[j].raid_level != -1 &&
1745                                 h->drv[j].LunID == lunid) {
1746                                 drv_index = j;
1747                                 drv_found = 1;
1748                                 break;
1749                         }
1750                 }
1751
1752                 /* check if the drive was found already in the array */
1753                 if (!drv_found) {
1754                         drv_index = cciss_add_gendisk(h, lunid, 0);
1755                         if (drv_index == -1)
1756                                 goto freeret;
1757                 }
1758                 cciss_update_drive_info(ctlr, drv_index, first_time);
1759         }               /* end for */
1760
1761 freeret:
1762         kfree(ld_buff);
1763         h->busy_configuring = 0;
1764         /* We return -1 here to tell the ACU that we have registered/updated
1765          * all of the drives that we can and to keep it from calling us
1766          * additional times.
1767          */
1768         return -1;
1769 mem_msg:
1770         printk(KERN_ERR "cciss: out of memory\n");
1771         h->busy_configuring = 0;
1772         goto freeret;
1773 }
1774
1775 /* This function will deregister the disk and it's queue from the
1776  * kernel.  It must be called with the controller lock held and the
1777  * drv structures busy_configuring flag set.  It's parameters are:
1778  *
1779  * disk = This is the disk to be deregistered
1780  * drv  = This is the drive_info_struct associated with the disk to be
1781  *        deregistered.  It contains information about the disk used
1782  *        by the driver.
1783  * clear_all = This flag determines whether or not the disk information
1784  *             is going to be completely cleared out and the highest_lun
1785  *             reset.  Sometimes we want to clear out information about
1786  *             the disk in preparation for re-adding it.  In this case
1787  *             the highest_lun should be left unchanged and the LunID
1788  *             should not be cleared.
1789 */
1790 static int deregister_disk(ctlr_info_t *h, int drv_index,
1791                            int clear_all)
1792 {
1793         int i;
1794         struct gendisk *disk;
1795         drive_info_struct *drv;
1796
1797         if (!capable(CAP_SYS_RAWIO))
1798                 return -EPERM;
1799
1800         drv = &h->drv[drv_index];
1801         disk = h->gendisk[drv_index];
1802
1803         /* make sure logical volume is NOT is use */
1804         if (clear_all || (h->gendisk[0] == disk)) {
1805                 if (drv->usage_count > 1)
1806                         return -EBUSY;
1807         } else if (drv->usage_count > 0)
1808                 return -EBUSY;
1809
1810         /* invalidate the devices and deregister the disk.  If it is disk
1811          * zero do not deregister it but just zero out it's values.  This
1812          * allows us to delete disk zero but keep the controller registered.
1813          */
1814         if (h->gendisk[0] != disk) {
1815                 struct request_queue *q = disk->queue;
1816                 if (disk->flags & GENHD_FL_UP)
1817                         del_gendisk(disk);
1818                 if (q) {
1819                         blk_cleanup_queue(q);
1820                         /* Set drv->queue to NULL so that we do not try
1821                          * to call blk_start_queue on this queue in the
1822                          * interrupt handler
1823                          */
1824                         drv->queue = NULL;
1825                 }
1826                 /* If clear_all is set then we are deleting the logical
1827                  * drive, not just refreshing its info.  For drives
1828                  * other than disk 0 we will call put_disk.  We do not
1829                  * do this for disk 0 as we need it to be able to
1830                  * configure the controller.
1831                  */
1832                 if (clear_all){
1833                         /* This isn't pretty, but we need to find the
1834                          * disk in our array and NULL our the pointer.
1835                          * This is so that we will call alloc_disk if
1836                          * this index is used again later.
1837                          */
1838                         for (i=0; i < CISS_MAX_LUN; i++){
1839                                 if (h->gendisk[i] == disk) {
1840                                         h->gendisk[i] = NULL;
1841                                         break;
1842                                 }
1843                         }
1844                         put_disk(disk);
1845                 }
1846         } else {
1847                 set_capacity(disk, 0);
1848         }
1849
1850         --h->num_luns;
1851         /* zero out the disk size info */
1852         drv->nr_blocks = 0;
1853         drv->block_size = 0;
1854         drv->heads = 0;
1855         drv->sectors = 0;
1856         drv->cylinders = 0;
1857         drv->raid_level = -1;   /* This can be used as a flag variable to
1858                                  * indicate that this element of the drive
1859                                  * array is free.
1860                                  */
1861
1862         if (clear_all) {
1863                 /* check to see if it was the last disk */
1864                 if (drv == h->drv + h->highest_lun) {
1865                         /* if so, find the new hightest lun */
1866                         int i, newhighest = -1;
1867                         for (i = 0; i <= h->highest_lun; i++) {
1868                                 /* if the disk has size > 0, it is available */
1869                                 if (h->drv[i].heads)
1870                                         newhighest = i;
1871                         }
1872                         h->highest_lun = newhighest;
1873                 }
1874
1875                 drv->LunID = 0;
1876         }
1877         return 0;
1878 }
1879
1880 static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,      /* 0: address the controller,
1881                                                                                                                            1: address logical volume log_unit,
1882                                                                                                                            2: periph device address is scsi3addr */
1883                     unsigned int log_unit, __u8 page_code,
1884                     unsigned char *scsi3addr, int cmd_type)
1885 {
1886         ctlr_info_t *h = hba[ctlr];
1887         u64bit buff_dma_handle;
1888         int status = IO_OK;
1889
1890         c->cmd_type = CMD_IOCTL_PEND;
1891         c->Header.ReplyQueue = 0;
1892         if (buff != NULL) {
1893                 c->Header.SGList = 1;
1894                 c->Header.SGTotal = 1;
1895         } else {
1896                 c->Header.SGList = 0;
1897                 c->Header.SGTotal = 0;
1898         }
1899         c->Header.Tag.lower = c->busaddr;
1900
1901         c->Request.Type.Type = cmd_type;
1902         if (cmd_type == TYPE_CMD) {
1903                 switch (cmd) {
1904                 case CISS_INQUIRY:
1905                         /* If the logical unit number is 0 then, this is going
1906                            to controller so It's a physical command
1907                            mode = 0 target = 0.  So we have nothing to write.
1908                            otherwise, if use_unit_num == 1,
1909                            mode = 1(volume set addressing) target = LUNID
1910                            otherwise, if use_unit_num == 2,
1911                            mode = 0(periph dev addr) target = scsi3addr */
1912                         if (use_unit_num == 1) {
1913                                 c->Header.LUN.LogDev.VolId =
1914                                     h->drv[log_unit].LunID;
1915                                 c->Header.LUN.LogDev.Mode = 1;
1916                         } else if (use_unit_num == 2) {
1917                                 memcpy(c->Header.LUN.LunAddrBytes, scsi3addr,
1918                                        8);
1919                                 c->Header.LUN.LogDev.Mode = 0;
1920                         }
1921                         /* are we trying to read a vital product page */
1922                         if (page_code != 0) {
1923                                 c->Request.CDB[1] = 0x01;
1924                                 c->Request.CDB[2] = page_code;
1925                         }
1926                         c->Request.CDBLen = 6;
1927                         c->Request.Type.Attribute = ATTR_SIMPLE;
1928                         c->Request.Type.Direction = XFER_READ;
1929                         c->Request.Timeout = 0;
1930                         c->Request.CDB[0] = CISS_INQUIRY;
1931                         c->Request.CDB[4] = size & 0xFF;
1932                         break;
1933                 case CISS_REPORT_LOG:
1934                 case CISS_REPORT_PHYS:
1935                         /* Talking to controller so It's a physical command
1936                            mode = 00 target = 0.  Nothing to write.
1937                          */
1938                         c->Request.CDBLen = 12;
1939                         c->Request.Type.Attribute = ATTR_SIMPLE;
1940                         c->Request.Type.Direction = XFER_READ;
1941                         c->Request.Timeout = 0;
1942                         c->Request.CDB[0] = cmd;
1943                         c->Request.CDB[6] = (size >> 24) & 0xFF;        //MSB
1944                         c->Request.CDB[7] = (size >> 16) & 0xFF;
1945                         c->Request.CDB[8] = (size >> 8) & 0xFF;
1946                         c->Request.CDB[9] = size & 0xFF;
1947                         break;
1948
1949                 case CCISS_READ_CAPACITY:
1950                         c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
1951                         c->Header.LUN.LogDev.Mode = 1;
1952                         c->Request.CDBLen = 10;
1953                         c->Request.Type.Attribute = ATTR_SIMPLE;
1954                         c->Request.Type.Direction = XFER_READ;
1955                         c->Request.Timeout = 0;
1956                         c->Request.CDB[0] = cmd;
1957                         break;
1958                 case CCISS_READ_CAPACITY_16:
1959                         c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
1960                         c->Header.LUN.LogDev.Mode = 1;
1961                         c->Request.CDBLen = 16;
1962                         c->Request.Type.Attribute = ATTR_SIMPLE;
1963                         c->Request.Type.Direction = XFER_READ;
1964                         c->Request.Timeout = 0;
1965                         c->Request.CDB[0] = cmd;
1966                         c->Request.CDB[1] = 0x10;
1967                         c->Request.CDB[10] = (size >> 24) & 0xFF;
1968                         c->Request.CDB[11] = (size >> 16) & 0xFF;
1969                         c->Request.CDB[12] = (size >> 8) & 0xFF;
1970                         c->Request.CDB[13] = size & 0xFF;
1971                         c->Request.Timeout = 0;
1972                         c->Request.CDB[0] = cmd;
1973                         break;
1974                 case CCISS_CACHE_FLUSH:
1975                         c->Request.CDBLen = 12;
1976                         c->Request.Type.Attribute = ATTR_SIMPLE;
1977                         c->Request.Type.Direction = XFER_WRITE;
1978                         c->Request.Timeout = 0;
1979                         c->Request.CDB[0] = BMIC_WRITE;
1980                         c->Request.CDB[6] = BMIC_CACHE_FLUSH;
1981                         break;
1982                 default:
1983                         printk(KERN_WARNING
1984                                "cciss%d:  Unknown Command 0x%c\n", ctlr, cmd);
1985                         return IO_ERROR;
1986                 }
1987         } else if (cmd_type == TYPE_MSG) {
1988                 switch (cmd) {
1989                 case 0: /* ABORT message */
1990                         c->Request.CDBLen = 12;
1991                         c->Request.Type.Attribute = ATTR_SIMPLE;
1992                         c->Request.Type.Direction = XFER_WRITE;
1993                         c->Request.Timeout = 0;
1994                         c->Request.CDB[0] = cmd;        /* abort */
1995                         c->Request.CDB[1] = 0;  /* abort a command */
1996                         /* buff contains the tag of the command to abort */
1997                         memcpy(&c->Request.CDB[4], buff, 8);
1998                         break;
1999                 case 1: /* RESET message */
2000                         c->Request.CDBLen = 12;
2001                         c->Request.Type.Attribute = ATTR_SIMPLE;
2002                         c->Request.Type.Direction = XFER_WRITE;
2003                         c->Request.Timeout = 0;
2004                         memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
2005                         c->Request.CDB[0] = cmd;        /* reset */
2006                         c->Request.CDB[1] = 0x04;       /* reset a LUN */
2007                         break;
2008                 case 3: /* No-Op message */
2009                         c->Request.CDBLen = 1;
2010                         c->Request.Type.Attribute = ATTR_SIMPLE;
2011                         c->Request.Type.Direction = XFER_WRITE;
2012                         c->Request.Timeout = 0;
2013                         c->Request.CDB[0] = cmd;
2014                         break;
2015                 default:
2016                         printk(KERN_WARNING
2017                                "cciss%d: unknown message type %d\n", ctlr, cmd);
2018                         return IO_ERROR;
2019                 }
2020         } else {
2021                 printk(KERN_WARNING
2022                        "cciss%d: unknown command type %d\n", ctlr, cmd_type);
2023                 return IO_ERROR;
2024         }
2025         /* Fill in the scatter gather information */
2026         if (size > 0) {
2027                 buff_dma_handle.val = (__u64) pci_map_single(h->pdev,
2028                                                              buff, size,
2029                                                              PCI_DMA_BIDIRECTIONAL);
2030                 c->SG[0].Addr.lower = buff_dma_handle.val32.lower;
2031                 c->SG[0].Addr.upper = buff_dma_handle.val32.upper;
2032                 c->SG[0].Len = size;
2033                 c->SG[0].Ext = 0;       /* we are not chaining */
2034         }
2035         return status;
2036 }
2037
2038 static int sendcmd_withirq(__u8 cmd,
2039                            int ctlr,
2040                            void *buff,
2041                            size_t size,
2042                            unsigned int use_unit_num,
2043                            unsigned int log_unit, __u8 page_code, int cmd_type)
2044 {
2045         ctlr_info_t *h = hba[ctlr];
2046         CommandList_struct *c;
2047         u64bit buff_dma_handle;
2048         unsigned long flags;
2049         int return_status;
2050         DECLARE_COMPLETION_ONSTACK(wait);
2051
2052         if ((c = cmd_alloc(h, 0)) == NULL)
2053                 return -ENOMEM;
2054         return_status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
2055                                  log_unit, page_code, NULL, cmd_type);
2056         if (return_status != IO_OK) {
2057                 cmd_free(h, c, 0);
2058                 return return_status;
2059         }
2060       resend_cmd2:
2061         c->waiting = &wait;
2062
2063         /* Put the request on the tail of the queue and send it */
2064         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
2065         addQ(&h->reqQ, c);
2066         h->Qdepth++;
2067         start_io(h);
2068         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
2069
2070         wait_for_completion(&wait);
2071
2072         if (c->err_info->CommandStatus != 0) {  /* an error has occurred */
2073                 switch (c->err_info->CommandStatus) {
2074                 case CMD_TARGET_STATUS:
2075                         printk(KERN_WARNING "cciss: cmd %p has "
2076                                " completed with errors\n", c);
2077                         if (c->err_info->ScsiStatus) {
2078                                 printk(KERN_WARNING "cciss: cmd %p "
2079                                        "has SCSI Status = %x\n",
2080                                        c, c->err_info->ScsiStatus);
2081                         }
2082
2083                         break;
2084                 case CMD_DATA_UNDERRUN:
2085                 case CMD_DATA_OVERRUN:
2086                         /* expected for inquire and report lun commands */
2087                         break;
2088                 case CMD_INVALID:
2089                         printk(KERN_WARNING "cciss: Cmd %p is "
2090                                "reported invalid\n", c);
2091                         return_status = IO_ERROR;
2092                         break;
2093                 case CMD_PROTOCOL_ERR:
2094                         printk(KERN_WARNING "cciss: cmd %p has "
2095                                "protocol error \n", c);
2096                         return_status = IO_ERROR;
2097                         break;
2098                 case CMD_HARDWARE_ERR:
2099                         printk(KERN_WARNING "cciss: cmd %p had "
2100                                " hardware error\n", c);
2101                         return_status = IO_ERROR;
2102                         break;
2103                 case CMD_CONNECTION_LOST:
2104                         printk(KERN_WARNING "cciss: cmd %p had "
2105                                "connection lost\n", c);
2106                         return_status = IO_ERROR;
2107                         break;
2108                 case CMD_ABORTED:
2109                         printk(KERN_WARNING "cciss: cmd %p was "
2110                                "aborted\n", c);
2111                         return_status = IO_ERROR;
2112                         break;
2113                 case CMD_ABORT_FAILED:
2114                         printk(KERN_WARNING "cciss: cmd %p reports "
2115                                "abort failed\n", c);
2116                         return_status = IO_ERROR;
2117                         break;
2118                 case CMD_UNSOLICITED_ABORT:
2119                         printk(KERN_WARNING
2120                                "cciss%d: unsolicited abort %p\n", ctlr, c);
2121                         if (c->retry_count < MAX_CMD_RETRIES) {
2122                                 printk(KERN_WARNING
2123                                        "cciss%d: retrying %p\n", ctlr, c);
2124                                 c->retry_count++;
2125                                 /* erase the old error information */
2126                                 memset(c->err_info, 0,
2127                                        sizeof(ErrorInfo_struct));
2128                                 return_status = IO_OK;
2129                                 INIT_COMPLETION(wait);
2130                                 goto resend_cmd2;
2131                         }
2132                         return_status = IO_ERROR;
2133                         break;
2134                 default:
2135                         printk(KERN_WARNING "cciss: cmd %p returned "
2136                                "unknown status %x\n", c,
2137                                c->err_info->CommandStatus);
2138                         return_status = IO_ERROR;
2139                 }
2140         }
2141         /* unlock the buffers from DMA */
2142         buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
2143         buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
2144         pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
2145                          c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
2146         cmd_free(h, c, 0);
2147         return return_status;
2148 }
2149
2150 static void cciss_geometry_inquiry(int ctlr, int logvol,
2151                                    int withirq, sector_t total_size,
2152                                    unsigned int block_size,
2153                                    InquiryData_struct *inq_buff,
2154                                    drive_info_struct *drv)
2155 {
2156         int return_code;
2157         unsigned long t;
2158
2159         memset(inq_buff, 0, sizeof(InquiryData_struct));
2160         if (withirq)
2161                 return_code = sendcmd_withirq(CISS_INQUIRY, ctlr,
2162                                               inq_buff, sizeof(*inq_buff), 1,
2163                                               logvol, 0xC1, TYPE_CMD);
2164         else
2165                 return_code = sendcmd(CISS_INQUIRY, ctlr, inq_buff,
2166                                       sizeof(*inq_buff), 1, logvol, 0xC1, NULL,
2167                                       TYPE_CMD);
2168         if (return_code == IO_OK) {
2169                 if (inq_buff->data_byte[8] == 0xFF) {
2170                         printk(KERN_WARNING
2171                                "cciss: reading geometry failed, volume "
2172                                "does not support reading geometry\n");
2173                         drv->heads = 255;
2174                         drv->sectors = 32;      // Sectors per track
2175                         drv->cylinders = total_size + 1;
2176                         drv->raid_level = RAID_UNKNOWN;
2177                 } else {
2178                         drv->heads = inq_buff->data_byte[6];
2179                         drv->sectors = inq_buff->data_byte[7];
2180                         drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8;
2181                         drv->cylinders += inq_buff->data_byte[5];
2182                         drv->raid_level = inq_buff->data_byte[8];
2183                 }
2184                 drv->block_size = block_size;
2185                 drv->nr_blocks = total_size + 1;
2186                 t = drv->heads * drv->sectors;
2187                 if (t > 1) {
2188                         sector_t real_size = total_size + 1;
2189                         unsigned long rem = sector_div(real_size, t);
2190                         if (rem)
2191                                 real_size++;
2192                         drv->cylinders = real_size;
2193                 }
2194         } else {                /* Get geometry failed */
2195                 printk(KERN_WARNING "cciss: reading geometry failed\n");
2196         }
2197         printk(KERN_INFO "      heads=%d, sectors=%d, cylinders=%d\n\n",
2198                drv->heads, drv->sectors, drv->cylinders);
2199 }
2200
2201 static void
2202 cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
2203                     unsigned int *block_size)
2204 {
2205         ReadCapdata_struct *buf;
2206         int return_code;
2207
2208         buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
2209         if (!buf) {
2210                 printk(KERN_WARNING "cciss: out of memory\n");
2211                 return;
2212         }
2213
2214         if (withirq)
2215                 return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
2216                                 ctlr, buf, sizeof(ReadCapdata_struct),
2217                                         1, logvol, 0, TYPE_CMD);
2218         else
2219                 return_code = sendcmd(CCISS_READ_CAPACITY,
2220                                 ctlr, buf, sizeof(ReadCapdata_struct),
2221                                         1, logvol, 0, NULL, TYPE_CMD);
2222         if (return_code == IO_OK) {
2223                 *total_size = be32_to_cpu(*(__be32 *) buf->total_size);
2224                 *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
2225         } else {                /* read capacity command failed */
2226                 printk(KERN_WARNING "cciss: read capacity failed\n");
2227                 *total_size = 0;
2228                 *block_size = BLOCK_SIZE;
2229         }
2230         if (*total_size != 0)
2231                 printk(KERN_INFO "      blocks= %llu block_size= %d\n",
2232                 (unsigned long long)*total_size+1, *block_size);
2233         kfree(buf);
2234 }
2235
2236 static void
2237 cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,                                 unsigned int *block_size)
2238 {
2239         ReadCapdata_struct_16 *buf;
2240         int return_code;
2241
2242         buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
2243         if (!buf) {
2244                 printk(KERN_WARNING "cciss: out of memory\n");
2245                 return;
2246         }
2247
2248         if (withirq) {
2249                 return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
2250                         ctlr, buf, sizeof(ReadCapdata_struct_16),
2251                                 1, logvol, 0, TYPE_CMD);
2252         }
2253         else {
2254                 return_code = sendcmd(CCISS_READ_CAPACITY_16,
2255                         ctlr, buf, sizeof(ReadCapdata_struct_16),
2256                                 1, logvol, 0, NULL, TYPE_CMD);
2257         }
2258         if (return_code == IO_OK) {
2259                 *total_size = be64_to_cpu(*(__be64 *) buf->total_size);
2260                 *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
2261         } else {                /* read capacity command failed */
2262                 printk(KERN_WARNING "cciss: read capacity failed\n");
2263                 *total_size = 0;
2264                 *block_size = BLOCK_SIZE;
2265         }
2266         printk(KERN_INFO "      blocks= %llu block_size= %d\n",
2267                (unsigned long long)*total_size+1, *block_size);
2268         kfree(buf);
2269 }
2270
2271 static int cciss_revalidate(struct gendisk *disk)
2272 {
2273         ctlr_info_t *h = get_host(disk);
2274         drive_info_struct *drv = get_drv(disk);
2275         int logvol;
2276         int FOUND = 0;
2277         unsigned int block_size;
2278         sector_t total_size;
2279         InquiryData_struct *inq_buff = NULL;
2280
2281         for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
2282                 if (h->drv[logvol].LunID == drv->LunID) {
2283                         FOUND = 1;
2284                         break;
2285                 }
2286         }
2287
2288         if (!FOUND)
2289                 return 1;
2290
2291         inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
2292         if (inq_buff == NULL) {
2293                 printk(KERN_WARNING "cciss: out of memory\n");
2294                 return 1;
2295         }
2296         if (h->cciss_read == CCISS_READ_10) {
2297                 cciss_read_capacity(h->ctlr, logvol, 1,
2298                                         &total_size, &block_size);
2299         } else {
2300                 cciss_read_capacity_16(h->ctlr, logvol, 1,
2301                                         &total_size, &block_size);
2302         }
2303         cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size,
2304                                inq_buff, drv);
2305
2306         blk_queue_hardsect_size(drv->queue, drv->block_size);
2307         set_capacity(disk, drv->nr_blocks);
2308
2309         kfree(inq_buff);
2310         return 0;
2311 }
2312
2313 /*
2314  *   Wait polling for a command to complete.
2315  *   The memory mapped FIFO is polled for the completion.
2316  *   Used only at init time, interrupts from the HBA are disabled.
2317  */
2318 static unsigned long pollcomplete(int ctlr)
2319 {
2320         unsigned long done;
2321         int i;
2322
2323         /* Wait (up to 20 seconds) for a command to complete */
2324
2325         for (i = 20 * HZ; i > 0; i--) {
2326                 done = hba[ctlr]->access.command_completed(hba[ctlr]);
2327                 if (done == FIFO_EMPTY)
2328                         schedule_timeout_uninterruptible(1);
2329                 else
2330                         return done;
2331         }
2332         /* Invalid address to tell caller we ran out of time */
2333         return 1;
2334 }
2335
2336 static int add_sendcmd_reject(__u8 cmd, int ctlr, unsigned long complete)
2337 {
2338         /* We get in here if sendcmd() is polling for completions
2339            and gets some command back that it wasn't expecting --
2340            something other than that which it just sent down.
2341            Ordinarily, that shouldn't happen, but it can happen when
2342            the scsi tape stuff gets into error handling mode, and
2343            starts using sendcmd() to try to abort commands and
2344            reset tape drives.  In that case, sendcmd may pick up
2345            completions of commands that were sent to logical drives
2346            through the block i/o system, or cciss ioctls completing, etc.
2347            In that case, we need to save those completions for later
2348            processing by the interrupt handler.
2349          */
2350
2351 #ifdef CONFIG_CISS_SCSI_TAPE
2352         struct sendcmd_reject_list *srl = &hba[ctlr]->scsi_rejects;
2353
2354         /* If it's not the scsi tape stuff doing error handling, (abort */
2355         /* or reset) then we don't expect anything weird. */
2356         if (cmd != CCISS_RESET_MSG && cmd != CCISS_ABORT_MSG) {
2357 #endif
2358                 printk(KERN_WARNING "cciss cciss%d: SendCmd "
2359                        "Invalid command list address returned! (%lx)\n",
2360                        ctlr, complete);
2361                 /* not much we can do. */
2362 #ifdef CONFIG_CISS_SCSI_TAPE
2363                 return 1;
2364         }
2365
2366         /* We've sent down an abort or reset, but something else
2367            has completed */
2368         if (srl->ncompletions >= (hba[ctlr]->nr_cmds + 2)) {
2369                 /* Uh oh.  No room to save it for later... */
2370                 printk(KERN_WARNING "cciss%d: Sendcmd: Invalid command addr, "
2371                        "reject list overflow, command lost!\n", ctlr);
2372                 return 1;
2373         }
2374         /* Save it for later */
2375         srl->complete[srl->ncompletions] = complete;
2376         srl->ncompletions++;
2377 #endif
2378         return 0;
2379 }
2380
2381 /*
2382  * Send a command to the controller, and wait for it to complete.
2383  * Only used at init time.
2384  */
2385 static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,      /* 0: address the controller,
2386                                                                                                    1: address logical volume log_unit,
2387                                                                                                    2: periph device address is scsi3addr */
2388                    unsigned int log_unit,
2389                    __u8 page_code, unsigned char *scsi3addr, int cmd_type)
2390 {
2391         CommandList_struct *c;
2392         int i;
2393         unsigned long complete;
2394         ctlr_info_t *info_p = hba[ctlr];
2395         u64bit buff_dma_handle;
2396         int status, done = 0;
2397
2398         if ((c = cmd_alloc(info_p, 1)) == NULL) {
2399                 printk(KERN_WARNING "cciss: unable to get memory");
2400                 return IO_ERROR;
2401         }
2402         status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
2403                           log_unit, page_code, scsi3addr, cmd_type);
2404         if (status != IO_OK) {
2405                 cmd_free(info_p, c, 1);
2406                 return status;
2407         }
2408       resend_cmd1:
2409         /*
2410          * Disable interrupt
2411          */
2412 #ifdef CCISS_DEBUG
2413         printk(KERN_DEBUG "cciss: turning intr off\n");
2414 #endif                          /* CCISS_DEBUG */
2415         info_p->access.set_intr_mask(info_p, CCISS_INTR_OFF);
2416
2417         /* Make sure there is room in the command FIFO */
2418         /* Actually it should be completely empty at this time */
2419         /* unless we are in here doing error handling for the scsi */
2420         /* tape side of the driver. */
2421         for (i = 200000; i > 0; i--) {
2422                 /* if fifo isn't full go */
2423                 if (!(info_p->access.fifo_full(info_p))) {
2424
2425                         break;
2426                 }
2427                 udelay(10);
2428                 printk(KERN_WARNING "cciss cciss%d: SendCmd FIFO full,"
2429                        " waiting!\n", ctlr);
2430         }
2431         /*
2432          * Send the cmd
2433          */
2434         info_p->access.submit_command(info_p, c);
2435         done = 0;
2436         do {
2437                 complete = pollcomplete(ctlr);
2438
2439 #ifdef CCISS_DEBUG
2440                 printk(KERN_DEBUG "cciss: command completed\n");
2441 #endif                          /* CCISS_DEBUG */
2442
2443                 if (complete == 1) {
2444                         printk(KERN_WARNING
2445                                "cciss cciss%d: SendCmd Timeout out, "
2446                                "No command list address returned!\n", ctlr);
2447                         status = IO_ERROR;
2448                         done = 1;
2449                         break;
2450                 }
2451
2452                 /* This will need to change for direct lookup completions */
2453                 if ((complete & CISS_ERROR_BIT)
2454                     && (complete & ~CISS_ERROR_BIT) == c->busaddr) {
2455                         /* if data overrun or underun on Report command
2456                            ignore it
2457                          */
2458                         if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
2459                              (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
2460                              (c->Request.CDB[0] == CISS_INQUIRY)) &&
2461                             ((c->err_info->CommandStatus ==
2462                               CMD_DATA_OVERRUN) ||
2463                              (c->err_info->CommandStatus == CMD_DATA_UNDERRUN)
2464                             )) {
2465                                 complete = c->busaddr;
2466                         } else {
2467                                 if (c->err_info->CommandStatus ==
2468                                     CMD_UNSOLICITED_ABORT) {
2469                                         printk(KERN_WARNING "cciss%d: "
2470                                                "unsolicited abort %p\n",
2471                                                ctlr, c);
2472                                         if (c->retry_count < MAX_CMD_RETRIES) {
2473                                                 printk(KERN_WARNING
2474                                                        "cciss%d: retrying %p\n",
2475                                                        ctlr, c);
2476                                                 c->retry_count++;
2477                                                 /* erase the old error */
2478                                                 /* information */
2479                                                 memset(c->err_info, 0,
2480                                                        sizeof
2481                                                        (ErrorInfo_struct));
2482                                                 goto resend_cmd1;
2483                                         } else {
2484                                                 printk(KERN_WARNING
2485                                                        "cciss%d: retried %p too "
2486                                                        "many times\n", ctlr, c);
2487                                                 status = IO_ERROR;
2488                                                 goto cleanup1;
2489                                         }
2490                                 } else if (c->err_info->CommandStatus ==
2491                                            CMD_UNABORTABLE) {
2492                                         printk(KERN_WARNING
2493                                                "cciss%d: command could not be aborted.\n",
2494                                                ctlr);
2495                                         status = IO_ERROR;
2496                                         goto cleanup1;
2497                                 }
2498                                 printk(KERN_WARNING "ciss ciss%d: sendcmd"
2499                                        " Error %x \n", ctlr,
2500                                        c->err_info->CommandStatus);
2501                                 printk(KERN_WARNING "ciss ciss%d: sendcmd"
2502                                        " offensive info\n"
2503                                        "  size %x\n   num %x   value %x\n",
2504                                        ctlr,
2505                                        c->err_info->MoreErrInfo.Invalid_Cmd.
2506                                        offense_size,
2507                                        c->err_info->MoreErrInfo.Invalid_Cmd.
2508                                        offense_num,
2509                                        c->err_info->MoreErrInfo.Invalid_Cmd.
2510                                        offense_value);
2511                                 status = IO_ERROR;
2512                                 goto cleanup1;
2513                         }
2514                 }
2515                 /* This will need changing for direct lookup completions */
2516                 if (complete != c->busaddr) {
2517                         if (add_sendcmd_reject(cmd, ctlr, complete) != 0) {
2518                                 BUG();  /* we are pretty much hosed if we get here. */
2519                         }
2520                         continue;
2521                 } else
2522                         done = 1;
2523         } while (!done);
2524
2525       cleanup1:
2526         /* unlock the data buffer from DMA */
2527         buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
2528         buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
2529         pci_unmap_single(info_p->pdev, (dma_addr_t) buff_dma_handle.val,
2530                          c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
2531 #ifdef CONFIG_CISS_SCSI_TAPE
2532         /* if we saved some commands for later, process them now. */
2533         if (info_p->scsi_rejects.ncompletions > 0)
2534                 do_cciss_intr(0, info_p);
2535 #endif
2536         cmd_free(info_p, c, 1);
2537         return status;
2538 }
2539
2540 /*
2541  * Map (physical) PCI mem into (virtual) kernel space
2542  */
2543 static void __iomem *remap_pci_mem(ulong base, ulong size)
2544 {
2545         ulong page_base = ((ulong) base) & PAGE_MASK;
2546         ulong page_offs = ((ulong) base) - page_base;
2547         void __iomem *page_remapped = ioremap(page_base, page_offs + size);
2548
2549         return page_remapped ? (page_remapped + page_offs) : NULL;
2550 }
2551
2552 /*
2553  * Takes jobs of the Q and sends them to the hardware, then puts it on
2554  * the Q to wait for completion.
2555  */
2556 static void start_io(ctlr_info_t *h)
2557 {
2558         CommandList_struct *c;
2559
2560         while (!hlist_empty(&h->reqQ)) {
2561                 c = hlist_entry(h->reqQ.first, CommandList_struct, list);
2562                 /* can't do anything if fifo is full */
2563                 if ((h->access.fifo_full(h))) {
2564                         printk(KERN_WARNING "cciss: fifo full\n");
2565                         break;
2566                 }
2567
2568                 /* Get the first entry from the Request Q */
2569                 removeQ(c);
2570                 h->Qdepth--;
2571
2572                 /* Tell the controller execute command */
2573                 h->access.submit_command(h, c);
2574
2575                 /* Put job onto the completed Q */
2576                 addQ(&h->cmpQ, c);
2577         }
2578 }
2579
2580 /* Assumes that CCISS_LOCK(h->ctlr) is held. */
2581 /* Zeros out the error record and then resends the command back */
2582 /* to the controller */
2583 static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
2584 {
2585         /* erase the old error information */
2586         memset(c->err_info, 0, sizeof(ErrorInfo_struct));
2587
2588         /* add it to software queue and then send it to the controller */
2589         addQ(&h->reqQ, c);
2590         h->Qdepth++;
2591         if (h->Qdepth > h->maxQsinceinit)
2592                 h->maxQsinceinit = h->Qdepth;
2593
2594         start_io(h);
2595 }
2596
2597 static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
2598         unsigned int msg_byte, unsigned int host_byte,
2599         unsigned int driver_byte)
2600 {
2601         /* inverse of macros in scsi.h */
2602         return (scsi_status_byte & 0xff) |
2603                 ((msg_byte & 0xff) << 8) |
2604                 ((host_byte & 0xff) << 16) |
2605                 ((driver_byte & 0xff) << 24);
2606 }
2607
2608 static inline int evaluate_target_status(ctlr_info_t *h,
2609                         CommandList_struct *cmd, int *retry_cmd)
2610 {
2611         unsigned char sense_key;
2612         unsigned char status_byte, msg_byte, host_byte, driver_byte;
2613         int error_value;
2614
2615         *retry_cmd = 0;
2616         /* If we get in here, it means we got "target status", that is, scsi status */
2617         status_byte = cmd->err_info->ScsiStatus;
2618         driver_byte = DRIVER_OK;
2619         msg_byte = cmd->err_info->CommandStatus; /* correct?  seems too device specific */
2620
2621         if (blk_pc_request(cmd->rq))
2622                 host_byte = DID_PASSTHROUGH;
2623         else
2624                 host_byte = DID_OK;
2625
2626         error_value = make_status_bytes(status_byte, msg_byte,
2627                 host_byte, driver_byte);
2628
2629         if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) {
2630                 if (!blk_pc_request(cmd->rq))
2631                         printk(KERN_WARNING "cciss: cmd %p "
2632                                "has SCSI Status 0x%x\n",
2633                                cmd, cmd->err_info->ScsiStatus);
2634                 return error_value;
2635         }
2636
2637         /* check the sense key */
2638         sense_key = 0xf & cmd->err_info->SenseInfo[2];
2639         /* no status or recovered error */
2640         if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
2641                 error_value = 0;
2642
2643         if (check_for_unit_attention(h, cmd)) {
2644                 *retry_cmd = !blk_pc_request(cmd->rq);
2645                 return 0;
2646         }
2647
2648         if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
2649                 if (error_value != 0)
2650                         printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
2651                                " sense key = 0x%x\n", cmd, sense_key);
2652                 return error_value;
2653         }
2654
2655         /* SG_IO or similar, copy sense data back */
2656         if (cmd->rq->sense) {
2657                 if (cmd->rq->sense_len > cmd->err_info->SenseLen)
2658                         cmd->rq->sense_len = cmd->err_info->SenseLen;
2659                 memcpy(cmd->rq->sense, cmd->err_info->SenseInfo,
2660                         cmd->rq->sense_len);
2661         } else
2662                 cmd->rq->sense_len = 0;
2663
2664         return error_value;
2665 }
2666
2667 /* checks the status of the job and calls complete buffers to mark all
2668  * buffers for the completed job. Note that this function does not need
2669  * to hold the hba/queue lock.
2670  */
2671 static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
2672                                     int timeout)
2673 {
2674         int retry_cmd = 0;
2675         struct request *rq = cmd->rq;
2676
2677         rq->errors = 0;
2678
2679         if (timeout)
2680                 rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
2681
2682         if (cmd->err_info->CommandStatus == 0)  /* no error has occurred */
2683                 goto after_error_processing;
2684
2685         switch (cmd->err_info->CommandStatus) {
2686         case CMD_TARGET_STATUS:
2687                 rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
2688                 break;
2689         case CMD_DATA_UNDERRUN:
2690                 if (blk_fs_request(cmd->rq)) {
2691                         printk(KERN_WARNING "cciss: cmd %p has"
2692                                " completed with data underrun "
2693                                "reported\n", cmd);
2694                         cmd->rq->data_len = cmd->err_info->ResidualCnt;
2695                 }
2696                 break;
2697         case CMD_DATA_OVERRUN:
2698                 if (blk_fs_request(cmd->rq))
2699                         printk(KERN_WARNING "cciss: cmd %p has"
2700                                " completed with data overrun "
2701                                "reported\n", cmd);
2702                 break;
2703         case CMD_INVALID:
2704                 printk(KERN_WARNING "cciss: cmd %p is "
2705                        "reported invalid\n", cmd);
2706                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2707                         cmd->err_info->CommandStatus, DRIVER_OK,
2708                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2709                 break;
2710         case CMD_PROTOCOL_ERR:
2711                 printk(KERN_WARNING "cciss: cmd %p has "
2712                        "protocol error \n", cmd);
2713                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2714                         cmd->err_info->CommandStatus, DRIVER_OK,
2715                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2716                 break;
2717         case CMD_HARDWARE_ERR:
2718                 printk(KERN_WARNING "cciss: cmd %p had "
2719                        " hardware error\n", cmd);
2720                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2721                         cmd->err_info->CommandStatus, DRIVER_OK,
2722                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2723                 break;
2724         case CMD_CONNECTION_LOST:
2725                 printk(KERN_WARNING "cciss: cmd %p had "
2726                        "connection lost\n", cmd);
2727                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2728                         cmd->err_info->CommandStatus, DRIVER_OK,
2729                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2730                 break;
2731         case CMD_ABORTED:
2732                 printk(KERN_WARNING "cciss: cmd %p was "
2733                        "aborted\n", cmd);
2734                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2735                         cmd->err_info->CommandStatus, DRIVER_OK,
2736                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
2737                 break;
2738         case CMD_ABORT_FAILED:
2739                 printk(KERN_WARNING "cciss: cmd %p reports "
2740                        "abort failed\n", cmd);
2741                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2742                         cmd->err_info->CommandStatus, DRIVER_OK,
2743                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2744                 break;
2745         case CMD_UNSOLICITED_ABORT:
2746                 printk(KERN_WARNING "cciss%d: unsolicited "
2747                        "abort %p\n", h->ctlr, cmd);
2748                 if (cmd->retry_count < MAX_CMD_RETRIES) {
2749                         retry_cmd = 1;
2750                         printk(KERN_WARNING
2751                                "cciss%d: retrying %p\n", h->ctlr, cmd);
2752                         cmd->retry_count++;
2753                 } else
2754                         printk(KERN_WARNING
2755                                "cciss%d: %p retried too "
2756                                "many times\n", h->ctlr, cmd);
2757                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2758                         cmd->err_info->CommandStatus, DRIVER_OK,
2759                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
2760                 break;
2761         case CMD_TIMEOUT:
2762                 printk(KERN_WARNING "cciss: cmd %p timedout\n", cmd);
2763                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2764                         cmd->err_info->CommandStatus, DRIVER_OK,
2765                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2766                 break;
2767         default:
2768                 printk(KERN_WARNING "cciss: cmd %p returned "
2769                        "unknown status %x\n", cmd,
2770                        cmd->err_info->CommandStatus);
2771                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2772                         cmd->err_info->CommandStatus, DRIVER_OK,
2773                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2774         }
2775
2776 after_error_processing:
2777
2778         /* We need to return this command */
2779         if (retry_cmd) {
2780                 resend_cciss_cmd(h, cmd);
2781                 return;
2782         }
2783         cmd->rq->completion_data = cmd;
2784         blk_complete_request(cmd->rq);
2785 }
2786
2787 /*
2788  * Get a request and submit it to the controller.
2789  */
2790 static void do_cciss_request(struct request_queue *q)
2791 {
2792         ctlr_info_t *h = q->queuedata;
2793         CommandList_struct *c;
2794         sector_t start_blk;
2795         int seg;
2796         struct request *creq;
2797         u64bit temp64;
2798         struct scatterlist tmp_sg[MAXSGENTRIES];
2799         drive_info_struct *drv;
2800         int i, dir;
2801
2802         /* We call start_io here in case there is a command waiting on the
2803          * queue that has not been sent.
2804          */
2805         if (blk_queue_plugged(q))
2806                 goto startio;
2807
2808       queue:
2809         creq = elv_next_request(q);
2810         if (!creq)
2811                 goto startio;
2812
2813         BUG_ON(creq->nr_phys_segments > MAXSGENTRIES);
2814
2815         if ((c = cmd_alloc(h, 1)) == NULL)
2816                 goto full;
2817
2818         blkdev_dequeue_request(creq);
2819
2820         spin_unlock_irq(q->queue_lock);
2821
2822         c->cmd_type = CMD_RWREQ;
2823         c->rq = creq;
2824
2825         /* fill in the request */
2826         drv = creq->rq_disk->private_data;
2827         c->Header.ReplyQueue = 0;       // unused in simple mode
2828         /* got command from pool, so use the command block index instead */
2829         /* for direct lookups. */
2830         /* The first 2 bits are reserved for controller error reporting. */
2831         c->Header.Tag.lower = (c->cmdindex << 3);
2832         c->Header.Tag.lower |= 0x04;    /* flag for direct lookup. */
2833         c->Header.LUN.LogDev.VolId = drv->LunID;
2834         c->Header.LUN.LogDev.Mode = 1;
2835         c->Request.CDBLen = 10; // 12 byte commands not in FW yet;
2836         c->Request.Type.Type = TYPE_CMD;        // It is a command.
2837         c->Request.Type.Attribute = ATTR_SIMPLE;
2838         c->Request.Type.Direction =
2839             (rq_data_dir(creq) == READ) ? XFER_READ : XFER_WRITE;
2840         c->Request.Timeout = 0; // Don't time out
2841         c->Request.CDB[0] =
2842             (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
2843         start_blk = creq->sector;
2844 #ifdef CCISS_DEBUG
2845         printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n", (int)creq->sector,
2846                (int)creq->nr_sectors);
2847 #endif                          /* CCISS_DEBUG */
2848
2849         sg_init_table(tmp_sg, MAXSGENTRIES);
2850         seg = blk_rq_map_sg(q, creq, tmp_sg);
2851
2852         /* get the DMA records for the setup */
2853         if (c->Request.Type.Direction == XFER_READ)
2854                 dir = PCI_DMA_FROMDEVICE;
2855         else
2856                 dir = PCI_DMA_TODEVICE;
2857
2858         for (i = 0; i < seg; i++) {
2859                 c->SG[i].Len = tmp_sg[i].length;
2860                 temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
2861                                                   tmp_sg[i].offset,
2862                                                   tmp_sg[i].length, dir);
2863                 c->SG[i].Addr.lower = temp64.val32.lower;
2864                 c->SG[i].Addr.upper = temp64.val32.upper;
2865                 c->SG[i].Ext = 0;       // we are not chaining
2866         }
2867         /* track how many SG entries we are using */
2868         if (seg > h->maxSG)
2869                 h->maxSG = seg;
2870
2871 #ifdef CCISS_DEBUG
2872         printk(KERN_DEBUG "cciss: Submitting %lu sectors in %d segments\n",
2873                creq->nr_sectors, seg);
2874 #endif                          /* CCISS_DEBUG */
2875
2876         c->Header.SGList = c->Header.SGTotal = seg;
2877         if (likely(blk_fs_request(creq))) {
2878                 if(h->cciss_read == CCISS_READ_10) {
2879                         c->Request.CDB[1] = 0;
2880                         c->Request.CDB[2] = (start_blk >> 24) & 0xff;   //MSB
2881                         c->Request.CDB[3] = (start_blk >> 16) & 0xff;
2882                         c->Request.CDB[4] = (start_blk >> 8) & 0xff;
2883                         c->Request.CDB[5] = start_blk & 0xff;
2884                         c->Request.CDB[6] = 0;  // (sect >> 24) & 0xff; MSB
2885                         c->Request.CDB[7] = (creq->nr_sectors >> 8) & 0xff;
2886                         c->Request.CDB[8] = creq->nr_sectors & 0xff;
2887                         c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
2888                 } else {
2889                         u32 upper32 = upper_32_bits(start_blk);
2890
2891                         c->Request.CDBLen = 16;
2892                         c->Request.CDB[1]= 0;
2893                         c->Request.CDB[2]= (upper32 >> 24) & 0xff;      //MSB
2894                         c->Request.CDB[3]= (upper32 >> 16) & 0xff;
2895                         c->Request.CDB[4]= (upper32 >>  8) & 0xff;
2896                         c->Request.CDB[5]= upper32 & 0xff;
2897                         c->Request.CDB[6]= (start_blk >> 24) & 0xff;
2898                         c->Request.CDB[7]= (start_blk >> 16) & 0xff;
2899                         c->Request.CDB[8]= (start_blk >>  8) & 0xff;
2900                         c->Request.CDB[9]= start_blk & 0xff;
2901                         c->Request.CDB[10]= (creq->nr_sectors >>  24) & 0xff;
2902                         c->Request.CDB[11]= (creq->nr_sectors >>  16) & 0xff;
2903                         c->Request.CDB[12]= (creq->nr_sectors >>  8) & 0xff;
2904                         c->Request.CDB[13]= creq->nr_sectors & 0xff;
2905                         c->Request.CDB[14] = c->Request.CDB[15] = 0;
2906                 }
2907         } else if (blk_pc_request(creq)) {
2908                 c->Request.CDBLen = creq->cmd_len;
2909                 memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB);
2910         } else {
2911                 printk(KERN_WARNING "cciss%d: bad request type %d\n", h->ctlr, creq->cmd_type);
2912                 BUG();
2913         }
2914
2915         spin_lock_irq(q->queue_lock);
2916
2917         addQ(&h->reqQ, c);
2918         h->Qdepth++;
2919         if (h->Qdepth > h->maxQsinceinit)
2920                 h->maxQsinceinit = h->Qdepth;
2921
2922         goto queue;
2923 full:
2924         blk_stop_queue(q);
2925 startio:
2926         /* We will already have the driver lock here so not need
2927          * to lock it.
2928          */
2929         start_io(h);
2930 }
2931
2932 static inline unsigned long get_next_completion(ctlr_info_t *h)
2933 {
2934 #ifdef CONFIG_CISS_SCSI_TAPE
2935         /* Any rejects from sendcmd() lying around? Process them first */
2936         if (h->scsi_rejects.ncompletions == 0)
2937                 return h->access.command_completed(h);
2938         else {
2939                 struct sendcmd_reject_list *srl;
2940                 int n;
2941                 srl = &h->scsi_rejects;
2942                 n = --srl->ncompletions;
2943                 /* printk("cciss%d: processing saved reject\n", h->ctlr); */
2944                 printk("p");
2945                 return srl->complete[n];
2946         }
2947 #else
2948         return h->access.command_completed(h);
2949 #endif
2950 }
2951
2952 static inline int interrupt_pending(ctlr_info_t *h)
2953 {
2954 #ifdef CONFIG_CISS_SCSI_TAPE
2955         return (h->access.intr_pending(h)
2956                 || (h->scsi_rejects.ncompletions > 0));
2957 #else
2958         return h->access.intr_pending(h);
2959 #endif
2960 }
2961
2962 static inline long interrupt_not_for_us(ctlr_info_t *h)
2963 {
2964 #ifdef CONFIG_CISS_SCSI_TAPE
2965         return (((h->access.intr_pending(h) == 0) ||
2966                  (h->interrupts_enabled == 0))
2967                 && (h->scsi_rejects.ncompletions == 0));
2968 #else
2969         return (((h->access.intr_pending(h) == 0) ||
2970                  (h->interrupts_enabled == 0)));
2971 #endif
2972 }
2973
2974 static irqreturn_t do_cciss_intr(int irq, void *dev_id)
2975 {
2976         ctlr_info_t *h = dev_id;
2977         CommandList_struct *c;
2978         unsigned long flags;
2979         __u32 a, a1, a2;
2980
2981         if (interrupt_not_for_us(h))
2982                 return IRQ_NONE;
2983         /*
2984          * If there are completed commands in the completion queue,
2985          * we had better do something about it.
2986          */
2987         spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
2988         while (interrupt_pending(h)) {
2989                 while ((a = get_next_completion(h)) != FIFO_EMPTY) {
2990                         a1 = a;
2991                         if ((a & 0x04)) {
2992                                 a2 = (a >> 3);
2993                                 if (a2 >= h->nr_cmds) {
2994                                         printk(KERN_WARNING
2995                                                "cciss: controller cciss%d failed, stopping.\n",
2996                                                h->ctlr);
2997                                         fail_all_cmds(h->ctlr);
2998                                         return IRQ_HANDLED;
2999                                 }
3000
3001                                 c = h->cmd_pool + a2;
3002                                 a = c->busaddr;
3003
3004                         } else {
3005                                 struct hlist_node *tmp;
3006
3007                                 a &= ~3;
3008                                 c = NULL;
3009                                 hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
3010                                         if (c->busaddr == a)
3011                                                 break;
3012                                 }
3013                         }
3014                         /*
3015                          * If we've found the command, take it off the
3016                          * completion Q and free it
3017                          */
3018                         if (c && c->busaddr == a) {
3019                                 removeQ(c);
3020                                 if (c->cmd_type == CMD_RWREQ) {
3021                                         complete_command(h, c, 0);
3022                                 } else if (c->cmd_type == CMD_IOCTL_PEND) {
3023                                         complete(c->waiting);
3024                                 }
3025 #                               ifdef CONFIG_CISS_SCSI_TAPE
3026                                 else if (c->cmd_type == CMD_SCSI)
3027                                         complete_scsi_command(c, 0, a1);
3028 #                               endif
3029                                 continue;
3030                         }
3031                 }
3032         }
3033
3034         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
3035         return IRQ_HANDLED;
3036 }
3037
3038 static int scan_thread(void *data)
3039 {
3040         ctlr_info_t *h = data;
3041         int rc;
3042         DECLARE_COMPLETION_ONSTACK(wait);
3043         h->rescan_wait = &wait;
3044
3045         for (;;) {
3046                 rc = wait_for_completion_interruptible(&wait);
3047                 if (kthread_should_stop())
3048                         break;
3049                 if (!rc)
3050                         rebuild_lun_table(h, 0);
3051         }
3052         return 0;
3053 }
3054
3055 static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
3056 {
3057         if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
3058                 return 0;
3059
3060         switch (c->err_info->SenseInfo[12]) {
3061         case STATE_CHANGED:
3062                 printk(KERN_WARNING "cciss%d: a state change "
3063                         "detected, command retried\n", h->ctlr);
3064                 return 1;
3065         break;
3066         case LUN_FAILED:
3067                 printk(KERN_WARNING "cciss%d: LUN failure "
3068                         "detected, action required\n", h->ctlr);
3069                 return 1;
3070         break;
3071         case REPORT_LUNS_CHANGED:
3072                 printk(KERN_WARNING "cciss%d: report LUN data "
3073                         "changed\n", h->ctlr);
3074                 if (h->rescan_wait)
3075                         complete(h->rescan_wait);
3076                 return 1;
3077         break;
3078         case POWER_OR_RESET:
3079                 printk(KERN_WARNING "cciss%d: a power on "
3080                         "or device reset detected\n", h->ctlr);
3081                 return 1;
3082         break;
3083         case UNIT_ATTENTION_CLEARED:
3084                 printk(KERN_WARNING "cciss%d: unit attention "
3085                     "cleared by another initiator\n", h->ctlr);
3086                 return 1;
3087         break;
3088         default:
3089                 printk(KERN_WARNING "cciss%d: unknown "
3090                         "unit attention detected\n", h->ctlr);
3091                                 return 1;
3092         }
3093 }
3094
3095 /*
3096  *  We cannot read the structure directly, for portability we must use
3097  *   the io functions.
3098  *   This is for debug only.
3099  */
3100 #ifdef CCISS_DEBUG
3101 static void print_cfg_table(CfgTable_struct *tb)
3102 {
3103         int i;
3104         char temp_name[17];
3105
3106         printk("Controller Configuration information\n");
3107         printk("------------------------------------\n");
3108         for (i = 0; i < 4; i++)
3109                 temp_name[i] = readb(&(tb->Signature[i]));
3110         temp_name[4] = '\0';
3111         printk("   Signature = %s\n", temp_name);
3112         printk("   Spec Number = %d\n", readl(&(tb->SpecValence)));
3113         printk("   Transport methods supported = 0x%x\n",
3114                readl(&(tb->TransportSupport)));
3115         printk("   Transport methods active = 0x%x\n",
3116                readl(&(tb->TransportActive)));
3117         printk("   Requested transport Method = 0x%x\n",
3118                readl(&(tb->HostWrite.TransportRequest)));
3119         printk("   Coalesce Interrupt Delay = 0x%x\n",
3120                readl(&(tb->HostWrite.CoalIntDelay)));
3121         printk("   Coalesce Interrupt Count = 0x%x\n",
3122                readl(&(tb->HostWrite.CoalIntCount)));
3123         printk("   Max outstanding commands = 0x%d\n",
3124                readl(&(tb->CmdsOutMax)));
3125         printk("   Bus Types = 0x%x\n", readl(&(tb->BusTypes)));
3126         for (i = 0; i < 16; i++)
3127                 temp_name[i] = readb(&(tb->ServerName[i]));
3128         temp_name[16] = '\0';
3129         printk("   Server Name = %s\n", temp_name);
3130         printk("   Heartbeat Counter = 0x%x\n\n\n", readl(&(tb->HeartBeat)));
3131 }
3132 #endif                          /* CCISS_DEBUG */
3133
3134 static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
3135 {
3136         int i, offset, mem_type, bar_type;
3137         if (pci_bar_addr == PCI_BASE_ADDRESS_0) /* looking for BAR zero? */
3138                 return 0;
3139         offset = 0;
3140         for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3141                 bar_type = pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE;
3142                 if (bar_type == PCI_BASE_ADDRESS_SPACE_IO)
3143                         offset += 4;
3144                 else {
3145                         mem_type = pci_resource_flags(pdev, i) &
3146                             PCI_BASE_ADDRESS_MEM_TYPE_MASK;
3147                         switch (mem_type) {
3148                         case PCI_BASE_ADDRESS_MEM_TYPE_32:
3149                         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
3150                                 offset += 4;    /* 32 bit */
3151                                 break;
3152                         case PCI_BASE_ADDRESS_MEM_TYPE_64:
3153                                 offset += 8;
3154                                 break;
3155                         default:        /* reserved in PCI 2.2 */
3156                                 printk(KERN_WARNING
3157                                        "Base address is invalid\n");
3158                                 return -1;
3159                                 break;
3160                         }
3161                 }
3162                 if (offset == pci_bar_addr - PCI_BASE_ADDRESS_0)
3163                         return i + 1;
3164         }
3165         return -1;
3166 }
3167
3168 /* If MSI/MSI-X is supported by the kernel we will try to enable it on
3169  * controllers that are capable. If not, we use IO-APIC mode.
3170  */
3171
3172 static void __devinit cciss_interrupt_mode(ctlr_info_t *c,
3173                                            struct pci_dev *pdev, __u32 board_id)
3174 {
3175 #ifdef CONFIG_PCI_MSI
3176         int err;
3177         struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1},
3178         {0, 2}, {0, 3}
3179         };
3180
3181         /* Some boards advertise MSI but don't really support it */
3182         if ((board_id == 0x40700E11) ||
3183             (board_id == 0x40800E11) ||
3184             (board_id == 0x40820E11) || (board_id == 0x40830E11))
3185                 goto default_int_mode;
3186
3187         if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
3188                 err = pci_enable_msix(pdev, cciss_msix_entries, 4);
3189                 if (!err) {
3190                         c->intr[0] = cciss_msix_entries[0].vector;
3191                         c->intr[1] = cciss_msix_entries[1].vector;
3192                         c->intr[2] = cciss_msix_entries[2].vector;
3193                         c->intr[3] = cciss_msix_entries[3].vector;
3194                         c->msix_vector = 1;
3195                         return;
3196                 }
3197                 if (err > 0) {
3198                         printk(KERN_WARNING "cciss: only %d MSI-X vectors "
3199                                "available\n", err);
3200                         goto default_int_mode;
3201                 } else {
3202                         printk(KERN_WARNING "cciss: MSI-X init failed %d\n",
3203                                err);
3204                         goto default_int_mode;
3205                 }
3206         }
3207         if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) {
3208                 if (!pci_enable_msi(pdev)) {
3209                         c->msi_vector = 1;
3210                 } else {
3211                         printk(KERN_WARNING "cciss: MSI init failed\n");
3212                 }
3213         }
3214 default_int_mode:
3215 #endif                          /* CONFIG_PCI_MSI */
3216         /* if we get here we're going to use the default interrupt mode */
3217         c->intr[SIMPLE_MODE_INT] = pdev->irq;
3218         return;
3219 }
3220
3221 static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
3222 {
3223         ushort subsystem_vendor_id, subsystem_device_id, command;
3224         __u32 board_id, scratchpad = 0;
3225         __u64 cfg_offset;
3226         __u32 cfg_base_addr;
3227         __u64 cfg_base_addr_index;
3228         int i, err;
3229
3230         /* check to see if controller has been disabled */
3231         /* BEFORE trying to enable it */
3232         (void)pci_read_config_word(pdev, PCI_COMMAND, &command);
3233         if (!(command & 0x02)) {
3234                 printk(KERN_WARNING
3235                        "cciss: controller appears to be disabled\n");
3236                 return -ENODEV;
3237         }
3238
3239         err = pci_enable_device(pdev);
3240         if (err) {
3241                 printk(KERN_ERR "cciss: Unable to Enable PCI device\n");
3242                 return err;
3243         }
3244
3245         err = pci_request_regions(pdev, "cciss");
3246         if (err) {
3247                 printk(KERN_ERR "cciss: Cannot obtain PCI resources, "
3248                        "aborting\n");
3249                 return err;
3250         }
3251
3252         subsystem_vendor_id = pdev->subsystem_vendor;
3253         subsystem_device_id = pdev->subsystem_device;
3254         board_id = (((__u32) (subsystem_device_id << 16) & 0xffff0000) |
3255                     subsystem_vendor_id);
3256
3257 #ifdef CCISS_DEBUG
3258         printk("command = %x\n", command);
3259         printk("irq = %x\n", pdev->irq);
3260         printk("board_id = %x\n", board_id);
3261 #endif                          /* CCISS_DEBUG */
3262
3263 /* If the kernel supports MSI/MSI-X we will try to enable that functionality,
3264  * else we use the IO-APIC interrupt assigned to us by system ROM.
3265  */
3266         cciss_interrupt_mode(c, pdev, board_id);
3267
3268         /*
3269          * Memory base addr is first addr , the second points to the config
3270          *   table
3271          */
3272
3273         c->paddr = pci_resource_start(pdev, 0); /* addressing mode bits already removed */
3274 #ifdef CCISS_DEBUG
3275         printk("address 0 = %lx\n", c->paddr);
3276 #endif                          /* CCISS_DEBUG */
3277         c->vaddr = remap_pci_mem(c->paddr, 0x250);
3278
3279         /* Wait for the board to become ready.  (PCI hotplug needs this.)
3280          * We poll for up to 120 secs, once per 100ms. */
3281         for (i = 0; i < 1200; i++) {
3282                 scratchpad = readl(c->vaddr + SA5_SCRATCHPAD_OFFSET);
3283                 if (scratchpad == CCISS_FIRMWARE_READY)
3284                         break;
3285                 set_current_state(TASK_INTERRUPTIBLE);
3286                 schedule_timeout(HZ / 10);      /* wait 100ms */
3287         }
3288         if (scratchpad != CCISS_FIRMWARE_READY) {
3289                 printk(KERN_WARNING "cciss: Board not ready.  Timed out.\n");
3290                 err = -ENODEV;
3291                 goto err_out_free_res;
3292         }
3293
3294         /* get the address index number */
3295         cfg_base_addr = readl(c->vaddr + SA5_CTCFG_OFFSET);
3296         cfg_base_addr &= (__u32) 0x0000ffff;
3297 #ifdef CCISS_DEBUG
3298         printk("cfg base address = %x\n", cfg_base_addr);
3299 #endif                          /* CCISS_DEBUG */
3300         cfg_base_addr_index = find_PCI_BAR_index(pdev, cfg_base_addr);
3301 #ifdef CCISS_DEBUG
3302         printk("cfg base address index = %llx\n",
3303                 (unsigned long long)cfg_base_addr_index);
3304 #endif                          /* CCISS_DEBUG */
3305         if (cfg_base_addr_index == -1) {
3306                 printk(KERN_WARNING "cciss: Cannot find cfg_base_addr_index\n");
3307                 err = -ENODEV;
3308                 goto err_out_free_res;
3309         }
3310
3311         cfg_offset = readl(c->vaddr + SA5_CTMEM_OFFSET);
3312 #ifdef CCISS_DEBUG
3313         printk("cfg offset = %llx\n", (unsigned long long)cfg_offset);
3314 #endif                          /* CCISS_DEBUG */
3315         c->cfgtable = remap_pci_mem(pci_resource_start(pdev,
3316                                                        cfg_base_addr_index) +
3317                                     cfg_offset, sizeof(CfgTable_struct));
3318         c->board_id = board_id;
3319
3320 #ifdef CCISS_DEBUG
3321         print_cfg_table(c->cfgtable);
3322 #endif                          /* CCISS_DEBUG */
3323
3324         /* Some controllers support Zero Memory Raid (ZMR).
3325          * When configured in ZMR mode the number of supported
3326          * commands drops to 64. So instead of just setting an
3327          * arbitrary value we make the driver a little smarter.
3328          * We read the config table to tell us how many commands
3329          * are supported on the controller then subtract 4 to
3330          * leave a little room for ioctl calls.
3331          */
3332         c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
3333         for (i = 0; i < ARRAY_SIZE(products); i++) {
3334                 if (board_id == products[i].board_id) {
3335                         c->product_name = products[i].product_name;
3336                         c->access = *(products[i].access);
3337                         c->nr_cmds = c->max_commands - 4;
3338                         break;
3339                 }
3340         }
3341         if ((readb(&c->cfgtable->Signature[0]) != 'C') ||
3342             (readb(&c->cfgtable->Signature[1]) != 'I') ||
3343             (readb(&c->cfgtable->Signature[2]) != 'S') ||
3344             (readb(&c->cfgtable->Signature[3]) != 'S')) {
3345                 printk("Does not appear to be a valid CISS config table\n");
3346                 err = -ENODEV;
3347                 goto err_out_free_res;
3348         }
3349         /* We didn't find the controller in our list. We know the
3350          * signature is valid. If it's an HP device let's try to
3351          * bind to the device and fire it up. Otherwise we bail.
3352          */
3353         if (i == ARRAY_SIZE(products)) {
3354                 if (subsystem_vendor_id == PCI_VENDOR_ID_HP) {
3355                         c->product_name = products[i-1].product_name;
3356                         c->access = *(products[i-1].access);
3357                         c->nr_cmds = c->max_commands - 4;
3358                         printk(KERN_WARNING "cciss: This is an unknown "
3359                                 "Smart Array controller.\n"
3360                                 "cciss: Please update to the latest driver "
3361                                 "available from www.hp.com.\n");
3362                 } else {
3363                         printk(KERN_WARNING "cciss: Sorry, I don't know how"
3364                                 " to access the Smart Array controller %08lx\n"
3365                                         , (unsigned long)board_id);
3366                         err = -ENODEV;
3367                         goto err_out_free_res;
3368                 }
3369         }
3370 #ifdef CONFIG_X86
3371         {
3372                 /* Need to enable prefetch in the SCSI core for 6400 in x86 */
3373                 __u32 prefetch;
3374                 prefetch = readl(&(c->cfgtable->SCSI_Prefetch));
3375                 prefetch |= 0x100;
3376                 writel(prefetch, &(c->cfgtable->SCSI_Prefetch));
3377         }
3378 #endif
3379
3380         /* Disabling DMA prefetch and refetch for the P600.
3381          * An ASIC bug may result in accesses to invalid memory addresses.
3382          * We've disabled prefetch for some time now. Testing with XEN
3383          * kernels revealed a bug in the refetch if dom0 resides on a P600.
3384          */
3385         if(board_id == 0x3225103C) {
3386                 __u32 dma_prefetch;
3387                 __u32 dma_refetch;
3388                 dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG);
3389                 dma_prefetch |= 0x8000;
3390                 writel(dma_prefetch, c->vaddr + I2O_DMA1_CFG);
3391                 pci_read_config_dword(pdev, PCI_COMMAND_PARITY, &dma_refetch);
3392                 dma_refetch |= 0x1;
3393                 pci_write_config_dword(pdev, PCI_COMMAND_PARITY, dma_refetch);
3394         }
3395
3396 #ifdef CCISS_DEBUG
3397         printk("Trying to put board into Simple mode\n");
3398 #endif                          /* CCISS_DEBUG */
3399         c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
3400         /* Update the field, and then ring the doorbell */
3401         writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest));
3402         writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL);
3403
3404         /* under certain very rare conditions, this can take awhile.
3405          * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
3406          * as we enter this code.) */
3407         for (i = 0; i < MAX_CONFIG_WAIT; i++) {
3408                 if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
3409                         break;
3410                 /* delay and try again */
3411                 set_current_state(TASK_INTERRUPTIBLE);
3412                 schedule_timeout(10);
3413         }
3414
3415 #ifdef CCISS_DEBUG
3416         printk(KERN_DEBUG "I counter got to %d %x\n", i,
3417                readl(c->vaddr + SA5_DOORBELL));
3418 #endif                          /* CCISS_DEBUG */
3419 #ifdef CCISS_DEBUG
3420         print_cfg_table(c->cfgtable);
3421 #endif                          /* CCISS_DEBUG */
3422
3423         if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
3424                 printk(KERN_WARNING "cciss: unable to get board into"
3425                        " simple mode\n");
3426                 err = -ENODEV;
3427                 goto err_out_free_res;
3428         }
3429         return 0;
3430
3431 err_out_free_res:
3432         /*
3433          * Deliberately omit pci_disable_device(): it does something nasty to
3434          * Smart Array controllers that pci_enable_device does not undo
3435          */
3436         pci_release_regions(pdev);
3437         return err;
3438 }
3439
3440 /* Function to find the first free pointer into our hba[] array
3441  * Returns -1 if no free entries are left.
3442  */
3443 static int alloc_cciss_hba(void)
3444 {
3445         int i;
3446
3447         for (i = 0; i < MAX_CTLR; i++) {
3448                 if (!hba[i]) {
3449                         ctlr_info_t *p;
3450
3451                         p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
3452                         if (!p)
3453                                 goto Enomem;
3454                         hba[i] = p;
3455                         return i;
3456                 }
3457         }
3458         printk(KERN_WARNING "cciss: This driver supports a maximum"
3459                " of %d controllers.\n", MAX_CTLR);
3460         return -1;
3461 Enomem:
3462         printk(KERN_ERR "cciss: out of memory.\n");
3463         return -1;
3464 }
3465
3466 static void free_hba(int i)
3467 {
3468         ctlr_info_t *p = hba[i];
3469         int n;
3470
3471         hba[i] = NULL;
3472         for (n = 0; n < CISS_MAX_LUN; n++)
3473                 put_disk(p->gendisk[n]);
3474         kfree(p);
3475 }
3476
3477 /* Send a message CDB to the firmware. */
3478 static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
3479 {
3480         typedef struct {
3481                 CommandListHeader_struct CommandHeader;
3482                 RequestBlock_struct Request;
3483                 ErrDescriptor_struct ErrorDescriptor;
3484         } Command;
3485         static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
3486         Command *cmd;
3487         dma_addr_t paddr64;
3488         uint32_t paddr32, tag;
3489         void __iomem *vaddr;
3490         int i, err;
3491
3492         vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
3493         if (vaddr == NULL)
3494                 return -ENOMEM;
3495
3496         /* The Inbound Post Queue only accepts 32-bit physical addresses for the
3497            CCISS commands, so they must be allocated from the lower 4GiB of
3498            memory. */
3499         err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
3500         if (err) {
3501                 iounmap(vaddr);
3502                 return -ENOMEM;
3503         }
3504
3505         cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
3506         if (cmd == NULL) {
3507                 iounmap(vaddr);
3508                 return -ENOMEM;
3509         }
3510
3511         /* This must fit, because of the 32-bit consistent DMA mask.  Also,
3512            although there's no guarantee, we assume that the address is at
3513            least 4-byte aligned (most likely, it's page-aligned). */
3514         paddr32 = paddr64;
3515
3516         cmd->CommandHeader.ReplyQueue = 0;
3517         cmd->CommandHeader.SGList = 0;
3518         cmd->CommandHeader.SGTotal = 0;
3519         cmd->CommandHeader.Tag.lower = paddr32;
3520         cmd->CommandHeader.Tag.upper = 0;
3521         memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
3522
3523         cmd->Request.CDBLen = 16;
3524         cmd->Request.Type.Type = TYPE_MSG;
3525         cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
3526         cmd->Request.Type.Direction = XFER_NONE;
3527         cmd->Request.Timeout = 0; /* Don't time out */
3528         cmd->Request.CDB[0] = opcode;
3529         cmd->Request.CDB[1] = type;
3530         memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
3531
3532         cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
3533         cmd->ErrorDescriptor.Addr.upper = 0;
3534         cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
3535
3536         writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
3537
3538         for (i = 0; i < 10; i++) {
3539                 tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
3540                 if ((tag & ~3) == paddr32)
3541                         break;
3542                 schedule_timeout_uninterruptible(HZ);
3543         }
3544
3545         iounmap(vaddr);
3546
3547         /* we leak the DMA buffer here ... no choice since the controller could
3548            still complete the command. */
3549         if (i == 10) {
3550                 printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
3551                         opcode, type);
3552                 return -ETIMEDOUT;
3553         }
3554
3555         pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
3556
3557         if (tag & 2) {
3558                 printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
3559                         opcode, type);
3560                 return -EIO;
3561         }
3562
3563         printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
3564                 opcode, type);
3565         return 0;
3566 }
3567
3568 #define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
3569 #define cciss_noop(p) cciss_message(p, 3, 0)
3570
3571 static __devinit int cciss_reset_msi(struct pci_dev *pdev)
3572 {
3573 /* the #defines are stolen from drivers/pci/msi.h. */
3574 #define msi_control_reg(base)           (base + PCI_MSI_FLAGS)
3575 #define PCI_MSIX_FLAGS_ENABLE           (1 << 15)
3576
3577         int pos;
3578         u16 control = 0;
3579
3580         pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
3581         if (pos) {
3582                 pci_read_config_word(pdev, msi_control_reg(pos), &control);
3583                 if (control & PCI_MSI_FLAGS_ENABLE) {
3584                         printk(KERN_INFO "cciss: resetting MSI\n");
3585                         pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
3586                 }
3587         }
3588
3589         pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
3590         if (pos) {
3591                 pci_read_config_word(pdev, msi_control_reg(pos), &control);
3592                 if (control & PCI_MSIX_FLAGS_ENABLE) {
3593                         printk(KERN_INFO "cciss: resetting MSI-X\n");
3594                         pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
3595                 }
3596         }
3597
3598         return 0;
3599 }
3600
3601 /* This does a hard reset of the controller using PCI power management
3602  * states. */
3603 static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
3604 {
3605         u16 pmcsr, saved_config_space[32];
3606         int i, pos;
3607
3608         printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
3609
3610         /* This is very nearly the same thing as
3611
3612            pci_save_state(pci_dev);
3613            pci_set_power_state(pci_dev, PCI_D3hot);
3614            pci_set_power_state(pci_dev, PCI_D0);
3615            pci_restore_state(pci_dev);
3616
3617            but we can't use these nice canned kernel routines on
3618            kexec, because they also check the MSI/MSI-X state in PCI
3619            configuration space and do the wrong thing when it is
3620            set/cleared.  Also, the pci_save/restore_state functions
3621            violate the ordering requirements for restoring the
3622            configuration space from the CCISS document (see the
3623            comment below).  So we roll our own .... */
3624
3625         for (i = 0; i < 32; i++)
3626                 pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
3627
3628         pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
3629         if (pos == 0) {
3630                 printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
3631                 return -ENODEV;
3632         }
3633
3634         /* Quoting from the Open CISS Specification: "The Power
3635          * Management Control/Status Register (CSR) controls the power
3636          * state of the device.  The normal operating state is D0,
3637          * CSR=00h.  The software off state is D3, CSR=03h.  To reset
3638          * the controller, place the interface device in D3 then to
3639          * D0, this causes a secondary PCI reset which will reset the
3640          * controller." */
3641
3642         /* enter the D3hot power management state */
3643         pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
3644         pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
3645         pmcsr |= PCI_D3hot;
3646         pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
3647
3648         schedule_timeout_uninterruptible(HZ >> 1);
3649
3650         /* enter the D0 power management state */
3651         pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
3652         pmcsr |= PCI_D0;
3653         pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
3654
3655         schedule_timeout_uninterruptible(HZ >> 1);
3656
3657         /* Restore the PCI configuration space.  The Open CISS
3658          * Specification says, "Restore the PCI Configuration
3659          * Registers, offsets 00h through 60h. It is important to
3660          * restore the command register, 16-bits at offset 04h,
3661          * last. Do not restore the configuration status register,
3662          * 16-bits at offset 06h."  Note that the offset is 2*i. */
3663         for (i = 0; i < 32; i++) {
3664                 if (i == 2 || i == 3)
3665                         continue;
3666                 pci_write_config_word(pdev, 2*i, saved_config_space[i]);
3667         }
3668         wmb();
3669         pci_write_config_word(pdev, 4, saved_config_space[2]);
3670
3671         return 0;
3672 }
3673
3674 /*
3675  *  This is it.  Find all the controllers and register them.  I really hate
3676  *  stealing all these major device numbers.
3677  *  returns the number of block devices registered.
3678  */
3679 static int __devinit cciss_init_one(struct pci_dev *pdev,
3680                                     const struct pci_device_id *ent)
3681 {
3682         int i;
3683         int j = 0;
3684         int rc;
3685         int dac, return_code;
3686         InquiryData_struct *inq_buff = NULL;
3687
3688         if (reset_devices) {
3689                 /* Reset the controller with a PCI power-cycle */
3690                 if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
3691                         return -ENODEV;
3692
3693                 /* Now try to get the controller to respond to a no-op. Some
3694                    devices (notably the HP Smart Array 5i Controller) need
3695                    up to 30 seconds to respond. */
3696                 for (i=0; i<30; i++) {
3697                         if (cciss_noop(pdev) == 0)
3698                                 break;
3699
3700                         schedule_timeout_uninterruptible(HZ);
3701                 }
3702                 if (i == 30) {
3703                         printk(KERN_ERR "cciss: controller seems dead\n");
3704                         return -EBUSY;
3705                 }
3706         }
3707
3708         i = alloc_cciss_hba();
3709         if (i < 0)
3710                 return -1;
3711
3712         hba[i]->busy_initializing = 1;
3713         INIT_HLIST_HEAD(&hba[i]->cmpQ);
3714         INIT_HLIST_HEAD(&hba[i]->reqQ);
3715
3716         if (cciss_pci_init(hba[i], pdev) != 0)
3717                 goto clean1;
3718
3719         sprintf(hba[i]->devname, "cciss%d", i);
3720         hba[i]->ctlr = i;
3721         hba[i]->pdev = pdev;
3722
3723         /* configure PCI DMA stuff */
3724         if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK))
3725                 dac = 1;
3726         else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK))
3727                 dac = 0;
3728         else {
3729                 printk(KERN_ERR "cciss: no suitable DMA available\n");
3730                 goto clean1;
3731         }
3732
3733         /*
3734          * register with the major number, or get a dynamic major number
3735          * by passing 0 as argument.  This is done for greater than
3736          * 8 controller support.
3737          */
3738         if (i < MAX_CTLR_ORIG)
3739                 hba[i]->major = COMPAQ_CISS_MAJOR + i;
3740         rc = register_blkdev(hba[i]->major, hba[i]->devname);
3741         if (rc == -EBUSY || rc == -EINVAL) {
3742                 printk(KERN_ERR
3743                        "cciss:  Unable to get major number %d for %s "
3744                        "on hba %d\n", hba[i]->major, hba[i]->devname, i);
3745                 goto clean1;
3746         } else {
3747                 if (i >= MAX_CTLR_ORIG)
3748                         hba[i]->major = rc;
3749         }
3750
3751         /* make sure the board interrupts are off */
3752         hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
3753         if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
3754                         IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) {
3755                 printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
3756                        hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
3757                 goto clean2;
3758         }
3759
3760         printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
3761                hba[i]->devname, pdev->device, pci_name(pdev),
3762                hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
3763
3764         hba[i]->cmd_pool_bits =
3765             kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
3766                         * sizeof(unsigned long), GFP_KERNEL);
3767         hba[i]->cmd_pool = (CommandList_struct *)
3768             pci_alloc_consistent(hba[i]->pdev,
3769                     hba[i]->nr_cmds * sizeof(CommandList_struct),
3770                     &(hba[i]->cmd_pool_dhandle));
3771         hba[i]->errinfo_pool = (ErrorInfo_struct *)
3772             pci_alloc_consistent(hba[i]->pdev,
3773                     hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
3774                     &(hba[i]->errinfo_pool_dhandle));
3775         if ((hba[i]->cmd_pool_bits == NULL)
3776             || (hba[i]->cmd_pool == NULL)
3777             || (hba[i]->errinfo_pool == NULL)) {
3778                 printk(KERN_ERR "cciss: out of memory");
3779                 goto clean4;
3780         }
3781 #ifdef CONFIG_CISS_SCSI_TAPE
3782         hba[i]->scsi_rejects.complete =
3783             kmalloc(sizeof(hba[i]->scsi_rejects.complete[0]) *
3784                     (hba[i]->nr_cmds + 5), GFP_KERNEL);
3785         if (hba[i]->scsi_rejects.complete == NULL) {
3786                 printk(KERN_ERR "cciss: out of memory");
3787                 goto clean4;
3788         }
3789 #endif
3790         spin_lock_init(&hba[i]->lock);
3791
3792         /* Initialize the pdev driver private data.
3793            have it point to hba[i].  */
3794         pci_set_drvdata(pdev, hba[i]);
3795         /* command and error info recs zeroed out before
3796            they are used */
3797         memset(hba[i]->cmd_pool_bits, 0,
3798                DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
3799                         * sizeof(unsigned long));
3800
3801         hba[i]->num_luns = 0;
3802         hba[i]->highest_lun = -1;
3803         for (j = 0; j < CISS_MAX_LUN; j++) {
3804                 hba[i]->drv[j].raid_level = -1;
3805                 hba[i]->drv[j].queue = NULL;
3806                 hba[i]->gendisk[j] = NULL;
3807         }
3808
3809         cciss_scsi_setup(i);
3810
3811         /* Turn the interrupts on so we can service requests */
3812         hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON);
3813
3814         /* Get the firmware version */
3815         inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
3816         if (inq_buff == NULL) {
3817                 printk(KERN_ERR "cciss: out of memory\n");
3818                 goto clean4;
3819         }
3820
3821         return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff,
3822                 sizeof(InquiryData_struct), 0, 0 , 0, TYPE_CMD);
3823         if (return_code == IO_OK) {
3824                 hba[i]->firm_ver[0] = inq_buff->data_byte[32];
3825                 hba[i]->firm_ver[1] = inq_buff->data_byte[33];
3826                 hba[i]->firm_ver[2] = inq_buff->data_byte[34];
3827                 hba[i]->firm_ver[3] = inq_buff->data_byte[35];
3828         } else {         /* send command failed */
3829                 printk(KERN_WARNING "cciss: unable to determine firmware"
3830                         " version of controller\n");
3831         }
3832
3833         cciss_procinit(i);
3834
3835         hba[i]->cciss_max_sectors = 2048;
3836
3837         hba[i]->busy_initializing = 0;
3838
3839         rebuild_lun_table(hba[i], 1);
3840         hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
3841                                 "cciss_scan%02d", i);
3842         if (IS_ERR(hba[i]->cciss_scan_thread))
3843                 return PTR_ERR(hba[i]->cciss_scan_thread);
3844
3845         return 1;
3846
3847 clean4:
3848         kfree(inq_buff);
3849 #ifdef CONFIG_CISS_SCSI_TAPE
3850         kfree(hba[i]->scsi_rejects.complete);
3851 #endif
3852         kfree(hba[i]->cmd_pool_bits);
3853         if (hba[i]->cmd_pool)
3854                 pci_free_consistent(hba[i]->pdev,
3855                                     hba[i]->nr_cmds * sizeof(CommandList_struct),
3856                                     hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
3857         if (hba[i]->errinfo_pool)
3858                 pci_free_consistent(hba[i]->pdev,
3859                                     hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
3860                                     hba[i]->errinfo_pool,
3861                                     hba[i]->errinfo_pool_dhandle);
3862         free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
3863 clean2:
3864         unregister_blkdev(hba[i]->major, hba[i]->devname);
3865 clean1:
3866         hba[i]->busy_initializing = 0;
3867         /* cleanup any queues that may have been initialized */
3868         for (j=0; j <= hba[i]->highest_lun; j++){
3869                 drive_info_struct *drv = &(hba[i]->drv[j]);
3870                 if (drv->queue)
3871                         blk_cleanup_queue(drv->queue);
3872         }
3873         /*
3874          * Deliberately omit pci_disable_device(): it does something nasty to
3875          * Smart Array controllers that pci_enable_device does not undo
3876          */
3877         pci_release_regions(pdev);
3878         pci_set_drvdata(pdev, NULL);
3879         free_hba(i);
3880         return -1;
3881 }
3882
3883 static void cciss_shutdown(struct pci_dev *pdev)
3884 {
3885         ctlr_info_t *tmp_ptr;
3886         int i;
3887         char flush_buf[4];
3888         int return_code;
3889
3890         tmp_ptr = pci_get_drvdata(pdev);
3891         if (tmp_ptr == NULL)
3892                 return;
3893         i = tmp_ptr->ctlr;
3894         if (hba[i] == NULL)
3895                 return;
3896
3897         /* Turn board interrupts off  and send the flush cache command */
3898         /* sendcmd will turn off interrupt, and send the flush...
3899          * To write all data in the battery backed cache to disks */
3900         memset(flush_buf, 0, 4);
3901         return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0, 0, 0, NULL,
3902                               TYPE_CMD);
3903         if (return_code == IO_OK) {
3904                 printk(KERN_INFO "Completed flushing cache on controller %d\n", i);
3905         } else {
3906                 printk(KERN_WARNING "Error flushing cache on controller %d\n", i);
3907         }
3908         free_irq(hba[i]->intr[2], hba[i]);
3909 }
3910
3911 static void __devexit cciss_remove_one(struct pci_dev *pdev)
3912 {
3913         ctlr_info_t *tmp_ptr;
3914         int i, j;
3915
3916         if (pci_get_drvdata(pdev) == NULL) {
3917                 printk(KERN_ERR "cciss: Unable to remove device \n");
3918                 return;
3919         }
3920
3921         tmp_ptr = pci_get_drvdata(pdev);
3922         i = tmp_ptr->ctlr;
3923         if (hba[i] == NULL) {
3924                 printk(KERN_ERR "cciss: device appears to "
3925                        "already be removed \n");
3926                 return;
3927         }
3928
3929         kthread_stop(hba[i]->cciss_scan_thread);
3930
3931         remove_proc_entry(hba[i]->devname, proc_cciss);
3932         unregister_blkdev(hba[i]->major, hba[i]->devname);
3933
3934         /* remove it from the disk list */
3935         for (j = 0; j < CISS_MAX_LUN; j++) {
3936                 struct gendisk *disk = hba[i]->gendisk[j];
3937                 if (disk) {
3938                         struct request_queue *q = disk->queue;
3939
3940                         if (disk->flags & GENHD_FL_UP)
3941                                 del_gendisk(disk);
3942                         if (q)
3943                                 blk_cleanup_queue(q);
3944                 }
3945         }
3946
3947 #ifdef CONFIG_CISS_SCSI_TAPE
3948         cciss_unregister_scsi(i);       /* unhook from SCSI subsystem */
3949 #endif
3950
3951         cciss_shutdown(pdev);
3952
3953 #ifdef CONFIG_PCI_MSI
3954         if (hba[i]->msix_vector)
3955                 pci_disable_msix(hba[i]->pdev);
3956         else if (hba[i]->msi_vector)
3957                 pci_disable_msi(hba[i]->pdev);
3958 #endif                          /* CONFIG_PCI_MSI */
3959
3960         iounmap(hba[i]->vaddr);
3961
3962         pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct),
3963                             hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
3964         pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
3965                             hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
3966         kfree(hba[i]->cmd_pool_bits);
3967 #ifdef CONFIG_CISS_SCSI_TAPE
3968         kfree(hba[i]->scsi_rejects.complete);
3969 #endif
3970         /*
3971          * Deliberately omit pci_disable_device(): it does something nasty to
3972          * Smart Array controllers that pci_enable_device does not undo
3973          */
3974         pci_release_regions(pdev);
3975         pci_set_drvdata(pdev, NULL);
3976         free_hba(i);
3977 }
3978
3979 static struct pci_driver cciss_pci_driver = {
3980         .name = "cciss",
3981         .probe = cciss_init_one,
3982         .remove = __devexit_p(cciss_remove_one),
3983         .id_table = cciss_pci_device_id,        /* id_table */
3984         .shutdown = cciss_shutdown,
3985 };
3986
3987 /*
3988  *  This is it.  Register the PCI driver information for the cards we control
3989  *  the OS will call our registered routines when it finds one of our cards.
3990  */
3991 static int __init cciss_init(void)
3992 {
3993         /*
3994          * The hardware requires that commands are aligned on a 64-bit
3995          * boundary. Given that we use pci_alloc_consistent() to allocate an
3996          * array of them, the size must be a multiple of 8 bytes.
3997          */
3998         BUILD_BUG_ON(sizeof(CommandList_struct) % 8);
3999
4000         printk(KERN_INFO DRIVER_NAME "\n");
4001
4002         /* Register for our PCI devices */
4003         return pci_register_driver(&cciss_pci_driver);
4004 }
4005
4006 static void __exit cciss_cleanup(void)
4007 {
4008         int i;
4009
4010         pci_unregister_driver(&cciss_pci_driver);
4011         /* double check that all controller entrys have been removed */
4012         for (i = 0; i < MAX_CTLR; i++) {
4013                 if (hba[i] != NULL) {
4014                         printk(KERN_WARNING "cciss: had to remove"
4015                                " controller %d\n", i);
4016                         cciss_remove_one(hba[i]->pdev);
4017                 }
4018         }
4019         remove_proc_entry("driver/cciss", NULL);
4020 }
4021
4022 static void fail_all_cmds(unsigned long ctlr)
4023 {
4024         /* If we get here, the board is apparently dead. */
4025         ctlr_info_t *h = hba[ctlr];
4026         CommandList_struct *c;
4027         unsigned long flags;
4028
4029         printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr);
4030         h->alive = 0;           /* the controller apparently died... */
4031
4032         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
4033
4034         pci_disable_device(h->pdev);    /* Make sure it is really dead. */
4035
4036         /* move everything off the request queue onto the completed queue */
4037         while (!hlist_empty(&h->reqQ)) {
4038                 c = hlist_entry(h->reqQ.first, CommandList_struct, list);
4039                 removeQ(c);
4040                 h->Qdepth--;
4041                 addQ(&h->cmpQ, c);
4042         }
4043
4044         /* Now, fail everything on the completed queue with a HW error */
4045         while (!hlist_empty(&h->cmpQ)) {
4046                 c = hlist_entry(h->cmpQ.first, CommandList_struct, list);
4047                 removeQ(c);
4048                 c->err_info->CommandStatus = CMD_HARDWARE_ERR;
4049                 if (c->cmd_type == CMD_RWREQ) {
4050                         complete_command(h, c, 0);
4051                 } else if (c->cmd_type == CMD_IOCTL_PEND)
4052                         complete(c->waiting);
4053 #ifdef CONFIG_CISS_SCSI_TAPE
4054                 else if (c->cmd_type == CMD_SCSI)
4055                         complete_scsi_command(c, 0, 0);
4056 #endif
4057         }
4058         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
4059         return;
4060 }
4061
4062 module_init(cciss_init);
4063 module_exit(cciss_cleanup);