2 * Generic SCSI-3 ALUA SCSI Device Handler
4 * Copyright (C) 2007, 2008 Hannes Reinecke, SUSE Linux Products GmbH.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 #include <scsi/scsi.h>
23 #include <scsi/scsi_eh.h>
24 #include <scsi/scsi_dh.h>
26 #define ALUA_DH_NAME "alua"
27 #define ALUA_DH_VER "1.2"
29 #define TPGS_STATE_OPTIMIZED 0x0
30 #define TPGS_STATE_NONOPTIMIZED 0x1
31 #define TPGS_STATE_STANDBY 0x2
32 #define TPGS_STATE_UNAVAILABLE 0x3
33 #define TPGS_STATE_OFFLINE 0xe
34 #define TPGS_STATE_TRANSITIONING 0xf
36 #define TPGS_SUPPORT_NONE 0x00
37 #define TPGS_SUPPORT_OPTIMIZED 0x01
38 #define TPGS_SUPPORT_NONOPTIMIZED 0x02
39 #define TPGS_SUPPORT_STANDBY 0x04
40 #define TPGS_SUPPORT_UNAVAILABLE 0x08
41 #define TPGS_SUPPORT_OFFLINE 0x40
42 #define TPGS_SUPPORT_TRANSITION 0x80
44 #define TPGS_MODE_UNINITIALIZED -1
45 #define TPGS_MODE_NONE 0x0
46 #define TPGS_MODE_IMPLICIT 0x1
47 #define TPGS_MODE_EXPLICIT 0x2
49 #define ALUA_INQUIRY_SIZE 36
50 #define ALUA_FAILOVER_TIMEOUT (60 * HZ)
51 #define ALUA_FAILOVER_RETRIES 5
58 unsigned char inq[ALUA_INQUIRY_SIZE];
61 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
65 #define ALUA_POLICY_SWITCH_CURRENT 0
66 #define ALUA_POLICY_SWITCH_ALL 1
68 static inline struct alua_dh_data *get_alua_data(struct scsi_device *sdev)
70 struct scsi_dh_data *scsi_dh_data = sdev->scsi_dh_data;
71 BUG_ON(scsi_dh_data == NULL);
72 return ((struct alua_dh_data *) scsi_dh_data->buf);
75 static int realloc_buffer(struct alua_dh_data *h, unsigned len)
77 if (h->buff && h->buff != h->inq)
80 h->buff = kmalloc(len, GFP_NOIO);
83 h->bufflen = ALUA_INQUIRY_SIZE;
90 static struct request *get_alua_req(struct scsi_device *sdev,
91 void *buffer, unsigned buflen, int rw)
94 struct request_queue *q = sdev->request_queue;
96 rq = blk_get_request(q, rw, GFP_NOIO);
99 sdev_printk(KERN_INFO, sdev,
100 "%s: blk_get_request failed\n", __func__);
104 if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
106 sdev_printk(KERN_INFO, sdev,
107 "%s: blk_rq_map_kern failed\n", __func__);
111 rq->cmd_type = REQ_TYPE_BLOCK_PC;
112 rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
114 rq->retries = ALUA_FAILOVER_RETRIES;
115 rq->timeout = ALUA_FAILOVER_TIMEOUT;
121 * submit_std_inquiry - Issue a standard INQUIRY command
122 * @sdev: sdev the command should be send to
124 static int submit_std_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
127 int err = SCSI_DH_RES_TEMP_UNAVAIL;
129 rq = get_alua_req(sdev, h->inq, ALUA_INQUIRY_SIZE, READ);
133 /* Prepare the command. */
134 rq->cmd[0] = INQUIRY;
137 rq->cmd[4] = ALUA_INQUIRY_SIZE;
138 rq->cmd_len = COMMAND_SIZE(INQUIRY);
140 rq->sense = h->sense;
141 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
142 rq->sense_len = h->senselen = 0;
144 err = blk_execute_rq(rq->q, NULL, rq, 1);
146 sdev_printk(KERN_INFO, sdev,
147 "%s: std inquiry failed with %x\n",
148 ALUA_DH_NAME, rq->errors);
149 h->senselen = rq->sense_len;
158 * submit_vpd_inquiry - Issue an INQUIRY VPD page 0x83 command
159 * @sdev: sdev the command should be sent to
161 static int submit_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
164 int err = SCSI_DH_RES_TEMP_UNAVAIL;
166 rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
170 /* Prepare the command. */
171 rq->cmd[0] = INQUIRY;
174 rq->cmd[4] = h->bufflen;
175 rq->cmd_len = COMMAND_SIZE(INQUIRY);
177 rq->sense = h->sense;
178 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
179 rq->sense_len = h->senselen = 0;
181 err = blk_execute_rq(rq->q, NULL, rq, 1);
183 sdev_printk(KERN_INFO, sdev,
184 "%s: evpd inquiry failed with %x\n",
185 ALUA_DH_NAME, rq->errors);
186 h->senselen = rq->sense_len;
195 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
196 * @sdev: sdev the command should be sent to
198 static unsigned submit_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
201 int err = SCSI_DH_RES_TEMP_UNAVAIL;
203 rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
207 /* Prepare the command. */
208 rq->cmd[0] = MAINTENANCE_IN;
209 rq->cmd[1] = MI_REPORT_TARGET_PGS;
210 rq->cmd[6] = (h->bufflen >> 24) & 0xff;
211 rq->cmd[7] = (h->bufflen >> 16) & 0xff;
212 rq->cmd[8] = (h->bufflen >> 8) & 0xff;
213 rq->cmd[9] = h->bufflen & 0xff;
214 rq->cmd_len = COMMAND_SIZE(MAINTENANCE_IN);
216 rq->sense = h->sense;
217 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
218 rq->sense_len = h->senselen = 0;
220 err = blk_execute_rq(rq->q, NULL, rq, 1);
222 sdev_printk(KERN_INFO, sdev,
223 "%s: rtpg failed with %x\n",
224 ALUA_DH_NAME, rq->errors);
225 h->senselen = rq->sense_len;
234 * submit_stpg - Issue a SET TARGET GROUP STATES command
235 * @sdev: sdev the command should be sent to
237 * Currently we're only setting the current target port group state
238 * to 'active/optimized' and let the array firmware figure out
239 * the states of the remaining groups.
241 static unsigned submit_stpg(struct scsi_device *sdev, struct alua_dh_data *h)
244 int err = SCSI_DH_RES_TEMP_UNAVAIL;
247 /* Prepare the data buffer */
248 memset(h->buff, 0, stpg_len);
249 h->buff[4] = TPGS_STATE_OPTIMIZED & 0x0f;
250 h->buff[6] = (h->group_id >> 8) & 0xff;
251 h->buff[7] = h->group_id & 0xff;
253 rq = get_alua_req(sdev, h->buff, stpg_len, WRITE);
257 /* Prepare the command. */
258 rq->cmd[0] = MAINTENANCE_OUT;
259 rq->cmd[1] = MO_SET_TARGET_PGS;
260 rq->cmd[6] = (stpg_len >> 24) & 0xff;
261 rq->cmd[7] = (stpg_len >> 16) & 0xff;
262 rq->cmd[8] = (stpg_len >> 8) & 0xff;
263 rq->cmd[9] = stpg_len & 0xff;
264 rq->cmd_len = COMMAND_SIZE(MAINTENANCE_OUT);
266 rq->sense = h->sense;
267 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
268 rq->sense_len = h->senselen = 0;
270 err = blk_execute_rq(rq->q, NULL, rq, 1);
272 sdev_printk(KERN_INFO, sdev,
273 "%s: stpg failed with %x\n",
274 ALUA_DH_NAME, rq->errors);
275 h->senselen = rq->sense_len;
284 * alua_std_inquiry - Evaluate standard INQUIRY command
285 * @sdev: device to be checked
287 * Just extract the TPGS setting to find out if ALUA
290 static int alua_std_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
294 err = submit_std_inquiry(sdev, h);
296 if (err != SCSI_DH_OK)
299 /* Check TPGS setting */
300 h->tpgs = (h->inq[5] >> 4) & 0x3;
302 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
303 sdev_printk(KERN_INFO, sdev,
304 "%s: supports implicit and explicit TPGS\n",
307 case TPGS_MODE_EXPLICIT:
308 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
311 case TPGS_MODE_IMPLICIT:
312 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
316 h->tpgs = TPGS_MODE_NONE;
317 sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
319 err = SCSI_DH_DEV_UNSUPP;
327 * alua_vpd_inquiry - Evaluate INQUIRY vpd page 0x83
328 * @sdev: device to be checked
330 * Extract the relative target port and the target port group
331 * descriptor from the list of identificators.
333 static int alua_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
340 err = submit_vpd_inquiry(sdev, h);
342 if (err != SCSI_DH_OK)
345 /* Check if vpd page exceeds initial buffer */
346 len = (h->buff[2] << 8) + h->buff[3] + 4;
347 if (len > h->bufflen) {
348 /* Resubmit with the correct length */
349 if (realloc_buffer(h, len)) {
350 sdev_printk(KERN_WARNING, sdev,
351 "%s: kmalloc buffer failed\n",
353 /* Temporary failure, bypass */
354 return SCSI_DH_DEV_TEMP_BUSY;
360 * Now look for the correct descriptor.
363 while (d < h->buff + len) {
364 switch (d[1] & 0xf) {
366 /* Relative target port */
367 h->rel_port = (d[6] << 8) + d[7];
370 /* Target port group */
371 h->group_id = (d[6] << 8) + d[7];
379 if (h->group_id == -1) {
381 * Internal error; TPGS supported but required
382 * VPD identification descriptors not present.
383 * Disable ALUA support
385 sdev_printk(KERN_INFO, sdev,
386 "%s: No target port descriptors found\n",
388 h->state = TPGS_STATE_OPTIMIZED;
389 h->tpgs = TPGS_MODE_NONE;
390 err = SCSI_DH_DEV_UNSUPP;
392 sdev_printk(KERN_INFO, sdev,
393 "%s: port group %02x rel port %02x\n",
394 ALUA_DH_NAME, h->group_id, h->rel_port);
400 static char print_alua_state(int state)
403 case TPGS_STATE_OPTIMIZED:
405 case TPGS_STATE_NONOPTIMIZED:
407 case TPGS_STATE_STANDBY:
409 case TPGS_STATE_UNAVAILABLE:
411 case TPGS_STATE_OFFLINE:
413 case TPGS_STATE_TRANSITIONING:
420 static int alua_check_sense(struct scsi_device *sdev,
421 struct scsi_sense_hdr *sense_hdr)
423 switch (sense_hdr->sense_key) {
425 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a)
427 * LUN Not Accessible - ALUA state transition
429 return ADD_TO_MLQUEUE;
430 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0b)
432 * LUN Not Accessible -- Target port in standby state
435 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0c)
437 * LUN Not Accessible -- Target port in unavailable state
440 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x12)
442 * LUN Not Ready -- Offline
447 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
449 * Power On, Reset, or Bus Device Reset, just retry.
451 return ADD_TO_MLQUEUE;
452 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
456 return ADD_TO_MLQUEUE;
458 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
460 * Implicit ALUA state transition failed
462 return ADD_TO_MLQUEUE;
464 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e) {
466 * REPORTED_LUNS_DATA_HAS_CHANGED is reported
467 * when switching controllers on targets like
468 * Intel Multi-Flex. We can just retry.
470 return ADD_TO_MLQUEUE;
476 return SCSI_RETURN_NOT_HANDLED;
480 * alua_stpg - Evaluate SET TARGET GROUP STATES
481 * @sdev: the device to be evaluated
482 * @state: the new target group state
484 * Send a SET TARGET GROUP STATES command to the device.
485 * We only have to test here if we should resubmit the command;
486 * any other error is assumed as a failure.
488 static int alua_stpg(struct scsi_device *sdev, int state,
489 struct alua_dh_data *h)
491 struct scsi_sense_hdr sense_hdr;
493 int retry = ALUA_FAILOVER_RETRIES;
496 err = submit_stpg(sdev, h);
497 if (err == SCSI_DH_IO && h->senselen > 0) {
498 err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
502 err = alua_check_sense(sdev, &sense_hdr);
503 if (retry > 0 && err == ADD_TO_MLQUEUE) {
507 sdev_printk(KERN_INFO, sdev,
508 "%s: stpg sense code: %02x/%02x/%02x\n",
509 ALUA_DH_NAME, sense_hdr.sense_key,
510 sense_hdr.asc, sense_hdr.ascq);
513 if (err == SCSI_DH_OK) {
515 sdev_printk(KERN_INFO, sdev,
516 "%s: port group %02x switched to state %c\n",
517 ALUA_DH_NAME, h->group_id,
518 print_alua_state(h->state) );
524 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
525 * @sdev: the device to be evaluated.
527 * Evaluate the Target Port Group State.
528 * Returns SCSI_DH_DEV_OFFLINED if the path is
529 * found to be unuseable.
531 static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
533 struct scsi_sense_hdr sense_hdr;
534 int len, k, off, valid_states = 0;
539 err = submit_rtpg(sdev, h);
541 if (err == SCSI_DH_IO && h->senselen > 0) {
542 err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
547 err = alua_check_sense(sdev, &sense_hdr);
548 if (err == ADD_TO_MLQUEUE)
550 sdev_printk(KERN_INFO, sdev,
551 "%s: rtpg sense code %02x/%02x/%02x\n",
552 ALUA_DH_NAME, sense_hdr.sense_key,
553 sense_hdr.asc, sense_hdr.ascq);
556 if (err != SCSI_DH_OK)
559 len = (h->buff[0] << 24) + (h->buff[1] << 16) +
560 (h->buff[2] << 8) + h->buff[3] + 4;
562 if (len > h->bufflen) {
563 /* Resubmit with the correct length */
564 if (realloc_buffer(h, len)) {
565 sdev_printk(KERN_WARNING, sdev,
566 "%s: kmalloc buffer failed\n",__func__);
567 /* Temporary failure, bypass */
568 return SCSI_DH_DEV_TEMP_BUSY;
573 for (k = 4, ucp = h->buff + 4; k < len; k += off, ucp += off) {
574 if (h->group_id == (ucp[2] << 8) + ucp[3]) {
575 h->state = ucp[0] & 0x0f;
576 valid_states = ucp[1];
578 off = 8 + (ucp[7] * 4);
581 sdev_printk(KERN_INFO, sdev,
582 "%s: port group %02x state %c supports %c%c%c%c%c%c\n",
583 ALUA_DH_NAME, h->group_id, print_alua_state(h->state),
584 valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
585 valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
586 valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
587 valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
588 valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
589 valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
591 if (h->tpgs & TPGS_MODE_EXPLICIT) {
593 case TPGS_STATE_TRANSITIONING:
594 /* State transition, retry */
597 case TPGS_STATE_OFFLINE:
598 /* Path is offline, fail */
599 err = SCSI_DH_DEV_OFFLINED;
605 /* Only Implicit ALUA support */
606 if (h->state == TPGS_STATE_OPTIMIZED ||
607 h->state == TPGS_STATE_NONOPTIMIZED ||
608 h->state == TPGS_STATE_STANDBY)
609 /* Useable path if active */
612 /* Path unuseable for unavailable/offline */
613 err = SCSI_DH_DEV_OFFLINED;
619 * alua_initialize - Initialize ALUA state
620 * @sdev: the device to be initialized
622 * For the prep_fn to work correctly we have
623 * to initialize the ALUA state for the device.
625 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
629 err = alua_std_inquiry(sdev, h);
630 if (err != SCSI_DH_OK)
633 err = alua_vpd_inquiry(sdev, h);
634 if (err != SCSI_DH_OK)
637 err = alua_rtpg(sdev, h);
638 if (err != SCSI_DH_OK)
646 * alua_activate - activate a path
647 * @sdev: device on the path to be activated
649 * We're currently switching the port group to be activated only and
650 * let the array figure out the rest.
651 * There may be other arrays which require us to switch all port groups
652 * based on a certain policy. But until we actually encounter them it
655 static int alua_activate(struct scsi_device *sdev)
657 struct alua_dh_data *h = get_alua_data(sdev);
658 int err = SCSI_DH_OK;
660 if (h->group_id != -1) {
661 err = alua_rtpg(sdev, h);
662 if (err != SCSI_DH_OK)
666 if (h->tpgs == TPGS_MODE_EXPLICIT && h->state != TPGS_STATE_OPTIMIZED)
667 err = alua_stpg(sdev, TPGS_STATE_OPTIMIZED, h);
674 * alua_prep_fn - request callback
676 * Fail I/O to all paths not in state
677 * active/optimized or active/non-optimized.
679 static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
681 struct alua_dh_data *h = get_alua_data(sdev);
682 int ret = BLKPREP_OK;
684 if (h->state != TPGS_STATE_OPTIMIZED &&
685 h->state != TPGS_STATE_NONOPTIMIZED) {
687 req->cmd_flags |= REQ_QUIET;
693 static const struct scsi_dh_devlist alua_dev_list[] = {
694 {"HP", "MSA VOLUME" },
702 {"Pillar", "Axiom" },
703 {"Intel", "Multi-Flex"},
707 static int alua_bus_attach(struct scsi_device *sdev);
708 static void alua_bus_detach(struct scsi_device *sdev);
710 static struct scsi_device_handler alua_dh = {
711 .name = ALUA_DH_NAME,
712 .module = THIS_MODULE,
713 .devlist = alua_dev_list,
714 .attach = alua_bus_attach,
715 .detach = alua_bus_detach,
716 .prep_fn = alua_prep_fn,
717 .check_sense = alua_check_sense,
718 .activate = alua_activate,
722 * alua_bus_attach - Attach device handler
723 * @sdev: device to be attached to
725 static int alua_bus_attach(struct scsi_device *sdev)
727 struct scsi_dh_data *scsi_dh_data;
728 struct alua_dh_data *h;
730 int err = SCSI_DH_OK;
732 scsi_dh_data = kzalloc(sizeof(struct scsi_device_handler *)
733 + sizeof(*h) , GFP_KERNEL);
735 sdev_printk(KERN_ERR, sdev, "%s: Attach failed\n",
740 scsi_dh_data->scsi_dh = &alua_dh;
741 h = (struct alua_dh_data *) scsi_dh_data->buf;
742 h->tpgs = TPGS_MODE_UNINITIALIZED;
743 h->state = TPGS_STATE_OPTIMIZED;
747 h->bufflen = ALUA_INQUIRY_SIZE;
749 err = alua_initialize(sdev, h);
750 if (err != SCSI_DH_OK)
753 if (!try_module_get(THIS_MODULE))
756 spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
757 sdev->scsi_dh_data = scsi_dh_data;
758 spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
764 sdev_printk(KERN_ERR, sdev, "%s: not attached\n", ALUA_DH_NAME);
769 * alua_bus_detach - Detach device handler
770 * @sdev: device to be detached from
772 static void alua_bus_detach(struct scsi_device *sdev)
774 struct scsi_dh_data *scsi_dh_data;
775 struct alua_dh_data *h;
778 spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
779 scsi_dh_data = sdev->scsi_dh_data;
780 sdev->scsi_dh_data = NULL;
781 spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
783 h = (struct alua_dh_data *) scsi_dh_data->buf;
784 if (h->buff && h->inq != h->buff)
787 module_put(THIS_MODULE);
788 sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", ALUA_DH_NAME);
791 static int __init alua_init(void)
795 r = scsi_register_device_handler(&alua_dh);
797 printk(KERN_ERR "%s: Failed to register scsi device handler",
802 static void __exit alua_exit(void)
804 scsi_unregister_device_handler(&alua_dh);
807 module_init(alua_init);
808 module_exit(alua_exit);
810 MODULE_DESCRIPTION("DM Multipath ALUA support");
811 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
812 MODULE_LICENSE("GPL");
813 MODULE_VERSION(ALUA_DH_VER);