2 * Generic SCSI-3 ALUA SCSI Device Handler
4 * Copyright (C) 2007, 2008 Hannes Reinecke, SUSE Linux Products GmbH.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 #include <scsi/scsi.h>
23 #include <scsi/scsi_eh.h>
24 #include <scsi/scsi_dh.h>
26 #define ALUA_DH_NAME "alua"
27 #define ALUA_DH_VER "1.2"
29 #define TPGS_STATE_OPTIMIZED 0x0
30 #define TPGS_STATE_NONOPTIMIZED 0x1
31 #define TPGS_STATE_STANDBY 0x2
32 #define TPGS_STATE_UNAVAILABLE 0x3
33 #define TPGS_STATE_OFFLINE 0xe
34 #define TPGS_STATE_TRANSITIONING 0xf
36 #define TPGS_SUPPORT_NONE 0x00
37 #define TPGS_SUPPORT_OPTIMIZED 0x01
38 #define TPGS_SUPPORT_NONOPTIMIZED 0x02
39 #define TPGS_SUPPORT_STANDBY 0x04
40 #define TPGS_SUPPORT_UNAVAILABLE 0x08
41 #define TPGS_SUPPORT_OFFLINE 0x40
42 #define TPGS_SUPPORT_TRANSITION 0x80
44 #define TPGS_MODE_UNINITIALIZED -1
45 #define TPGS_MODE_NONE 0x0
46 #define TPGS_MODE_IMPLICIT 0x1
47 #define TPGS_MODE_EXPLICIT 0x2
49 #define ALUA_INQUIRY_SIZE 36
50 #define ALUA_FAILOVER_TIMEOUT (60 * HZ)
51 #define ALUA_FAILOVER_RETRIES 5
58 unsigned char inq[ALUA_INQUIRY_SIZE];
61 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
65 #define ALUA_POLICY_SWITCH_CURRENT 0
66 #define ALUA_POLICY_SWITCH_ALL 1
68 static inline struct alua_dh_data *get_alua_data(struct scsi_device *sdev)
70 struct scsi_dh_data *scsi_dh_data = sdev->scsi_dh_data;
71 BUG_ON(scsi_dh_data == NULL);
72 return ((struct alua_dh_data *) scsi_dh_data->buf);
75 static int realloc_buffer(struct alua_dh_data *h, unsigned len)
77 if (h->buff && h->buff != h->inq)
80 h->buff = kmalloc(len, GFP_NOIO);
83 h->bufflen = ALUA_INQUIRY_SIZE;
90 static struct request *get_alua_req(struct scsi_device *sdev,
91 void *buffer, unsigned buflen, int rw)
94 struct request_queue *q = sdev->request_queue;
96 rq = blk_get_request(q, rw, GFP_NOIO);
99 sdev_printk(KERN_INFO, sdev,
100 "%s: blk_get_request failed\n", __func__);
104 if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
106 sdev_printk(KERN_INFO, sdev,
107 "%s: blk_rq_map_kern failed\n", __func__);
111 rq->cmd_type = REQ_TYPE_BLOCK_PC;
112 rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
114 rq->retries = ALUA_FAILOVER_RETRIES;
115 rq->timeout = ALUA_FAILOVER_TIMEOUT;
121 * submit_std_inquiry - Issue a standard INQUIRY command
122 * @sdev: sdev the command should be send to
124 static int submit_std_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
127 int err = SCSI_DH_RES_TEMP_UNAVAIL;
129 rq = get_alua_req(sdev, h->inq, ALUA_INQUIRY_SIZE, READ);
133 /* Prepare the command. */
134 rq->cmd[0] = INQUIRY;
137 rq->cmd[4] = ALUA_INQUIRY_SIZE;
138 rq->cmd_len = COMMAND_SIZE(INQUIRY);
140 rq->sense = h->sense;
141 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
142 rq->sense_len = h->senselen = 0;
144 err = blk_execute_rq(rq->q, NULL, rq, 1);
146 sdev_printk(KERN_INFO, sdev,
147 "%s: std inquiry failed with %x\n",
148 ALUA_DH_NAME, rq->errors);
149 h->senselen = rq->sense_len;
158 * submit_vpd_inquiry - Issue an INQUIRY VPD page 0x83 command
159 * @sdev: sdev the command should be sent to
161 static int submit_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
164 int err = SCSI_DH_RES_TEMP_UNAVAIL;
166 rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
170 /* Prepare the command. */
171 rq->cmd[0] = INQUIRY;
174 rq->cmd[4] = h->bufflen;
175 rq->cmd_len = COMMAND_SIZE(INQUIRY);
177 rq->sense = h->sense;
178 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
179 rq->sense_len = h->senselen = 0;
181 err = blk_execute_rq(rq->q, NULL, rq, 1);
183 sdev_printk(KERN_INFO, sdev,
184 "%s: evpd inquiry failed with %x\n",
185 ALUA_DH_NAME, rq->errors);
186 h->senselen = rq->sense_len;
195 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
196 * @sdev: sdev the command should be sent to
198 static unsigned submit_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
201 int err = SCSI_DH_RES_TEMP_UNAVAIL;
203 rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
207 /* Prepare the command. */
208 rq->cmd[0] = MAINTENANCE_IN;
209 rq->cmd[1] = MI_REPORT_TARGET_PGS;
210 rq->cmd[6] = (h->bufflen >> 24) & 0xff;
211 rq->cmd[7] = (h->bufflen >> 16) & 0xff;
212 rq->cmd[8] = (h->bufflen >> 8) & 0xff;
213 rq->cmd[9] = h->bufflen & 0xff;
214 rq->cmd_len = COMMAND_SIZE(MAINTENANCE_IN);
216 rq->sense = h->sense;
217 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
218 rq->sense_len = h->senselen = 0;
220 err = blk_execute_rq(rq->q, NULL, rq, 1);
222 sdev_printk(KERN_INFO, sdev,
223 "%s: rtpg failed with %x\n",
224 ALUA_DH_NAME, rq->errors);
225 h->senselen = rq->sense_len;
234 * submit_stpg - Issue a SET TARGET GROUP STATES command
235 * @sdev: sdev the command should be sent to
237 * Currently we're only setting the current target port group state
238 * to 'active/optimized' and let the array firmware figure out
239 * the states of the remaining groups.
241 static unsigned submit_stpg(struct scsi_device *sdev, struct alua_dh_data *h)
244 int err = SCSI_DH_RES_TEMP_UNAVAIL;
247 /* Prepare the data buffer */
248 memset(h->buff, 0, stpg_len);
249 h->buff[4] = TPGS_STATE_OPTIMIZED & 0x0f;
250 h->buff[6] = (h->group_id >> 8) & 0x0f;
251 h->buff[7] = h->group_id & 0x0f;
253 rq = get_alua_req(sdev, h->buff, stpg_len, WRITE);
257 /* Prepare the command. */
258 rq->cmd[0] = MAINTENANCE_OUT;
259 rq->cmd[1] = MO_SET_TARGET_PGS;
260 rq->cmd[6] = (stpg_len >> 24) & 0xff;
261 rq->cmd[7] = (stpg_len >> 16) & 0xff;
262 rq->cmd[8] = (stpg_len >> 8) & 0xff;
263 rq->cmd[9] = stpg_len & 0xff;
264 rq->cmd_len = COMMAND_SIZE(MAINTENANCE_OUT);
266 rq->sense = h->sense;
267 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
268 rq->sense_len = h->senselen = 0;
270 err = blk_execute_rq(rq->q, NULL, rq, 1);
272 sdev_printk(KERN_INFO, sdev,
273 "%s: stpg failed with %x\n",
274 ALUA_DH_NAME, rq->errors);
275 h->senselen = rq->sense_len;
284 * alua_std_inquiry - Evaluate standard INQUIRY command
285 * @sdev: device to be checked
287 * Just extract the TPGS setting to find out if ALUA
290 static int alua_std_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
294 err = submit_std_inquiry(sdev, h);
296 if (err != SCSI_DH_OK)
299 /* Check TPGS setting */
300 h->tpgs = (h->inq[5] >> 4) & 0x3;
302 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
303 sdev_printk(KERN_INFO, sdev,
304 "%s: supports implicit and explicit TPGS\n",
307 case TPGS_MODE_EXPLICIT:
308 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
311 case TPGS_MODE_IMPLICIT:
312 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
316 h->tpgs = TPGS_MODE_NONE;
317 sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
319 err = SCSI_DH_DEV_UNSUPP;
327 * alua_vpd_inquiry - Evaluate INQUIRY vpd page 0x83
328 * @sdev: device to be checked
330 * Extract the relative target port and the target port group
331 * descriptor from the list of identificators.
333 static int alua_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
340 err = submit_vpd_inquiry(sdev, h);
342 if (err != SCSI_DH_OK)
345 /* Check if vpd page exceeds initial buffer */
346 len = (h->buff[2] << 8) + h->buff[3] + 4;
347 if (len > h->bufflen) {
348 /* Resubmit with the correct length */
349 if (realloc_buffer(h, len)) {
350 sdev_printk(KERN_WARNING, sdev,
351 "%s: kmalloc buffer failed\n",
353 /* Temporary failure, bypass */
354 return SCSI_DH_DEV_TEMP_BUSY;
360 * Now look for the correct descriptor.
363 while (d < h->buff + len) {
364 switch (d[1] & 0xf) {
366 /* Relative target port */
367 h->rel_port = (d[6] << 8) + d[7];
370 /* Target port group */
371 h->group_id = (d[6] << 8) + d[7];
379 if (h->group_id == -1) {
381 * Internal error; TPGS supported but required
382 * VPD identification descriptors not present.
383 * Disable ALUA support
385 sdev_printk(KERN_INFO, sdev,
386 "%s: No target port descriptors found\n",
388 h->state = TPGS_STATE_OPTIMIZED;
389 h->tpgs = TPGS_MODE_NONE;
390 err = SCSI_DH_DEV_UNSUPP;
392 sdev_printk(KERN_INFO, sdev,
393 "%s: port group %02x rel port %02x\n",
394 ALUA_DH_NAME, h->group_id, h->rel_port);
400 static char print_alua_state(int state)
403 case TPGS_STATE_OPTIMIZED:
405 case TPGS_STATE_NONOPTIMIZED:
407 case TPGS_STATE_STANDBY:
409 case TPGS_STATE_UNAVAILABLE:
411 case TPGS_STATE_OFFLINE:
413 case TPGS_STATE_TRANSITIONING:
420 static int alua_check_sense(struct scsi_device *sdev,
421 struct scsi_sense_hdr *sense_hdr)
423 switch (sense_hdr->sense_key) {
425 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a)
427 * LUN Not Accessible - ALUA state transition
429 return ADD_TO_MLQUEUE;
430 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0b)
432 * LUN Not Accessible -- Target port in standby state
435 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0c)
437 * LUN Not Accessible -- Target port in unavailable state
440 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x12)
442 * LUN Not Ready -- Offline
447 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
449 * Power On, Reset, or Bus Device Reset, just retry.
451 return ADD_TO_MLQUEUE;
452 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
456 return ADD_TO_MLQUEUE;
458 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
460 * Implicit ALUA state transition failed
462 return ADD_TO_MLQUEUE;
467 return SCSI_RETURN_NOT_HANDLED;
471 * alua_stpg - Evaluate SET TARGET GROUP STATES
472 * @sdev: the device to be evaluated
473 * @state: the new target group state
475 * Send a SET TARGET GROUP STATES command to the device.
476 * We only have to test here if we should resubmit the command;
477 * any other error is assumed as a failure.
479 static int alua_stpg(struct scsi_device *sdev, int state,
480 struct alua_dh_data *h)
482 struct scsi_sense_hdr sense_hdr;
484 int retry = ALUA_FAILOVER_RETRIES;
487 err = submit_stpg(sdev, h);
488 if (err == SCSI_DH_IO && h->senselen > 0) {
489 err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
493 err = alua_check_sense(sdev, &sense_hdr);
494 if (retry > 0 && err == ADD_TO_MLQUEUE) {
498 sdev_printk(KERN_INFO, sdev,
499 "%s: stpg sense code: %02x/%02x/%02x\n",
500 ALUA_DH_NAME, sense_hdr.sense_key,
501 sense_hdr.asc, sense_hdr.ascq);
504 if (err == SCSI_DH_OK) {
506 sdev_printk(KERN_INFO, sdev,
507 "%s: port group %02x switched to state %c\n",
508 ALUA_DH_NAME, h->group_id,
509 print_alua_state(h->state) );
515 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
516 * @sdev: the device to be evaluated.
518 * Evaluate the Target Port Group State.
519 * Returns SCSI_DH_DEV_OFFLINED if the path is
520 * found to be unuseable.
522 static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
524 struct scsi_sense_hdr sense_hdr;
525 int len, k, off, valid_states = 0;
530 err = submit_rtpg(sdev, h);
532 if (err == SCSI_DH_IO && h->senselen > 0) {
533 err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
538 err = alua_check_sense(sdev, &sense_hdr);
539 if (err == ADD_TO_MLQUEUE)
541 sdev_printk(KERN_INFO, sdev,
542 "%s: rtpg sense code %02x/%02x/%02x\n",
543 ALUA_DH_NAME, sense_hdr.sense_key,
544 sense_hdr.asc, sense_hdr.ascq);
547 if (err != SCSI_DH_OK)
550 len = (h->buff[0] << 24) + (h->buff[1] << 16) +
551 (h->buff[2] << 8) + h->buff[3] + 4;
553 if (len > h->bufflen) {
554 /* Resubmit with the correct length */
555 if (realloc_buffer(h, len)) {
556 sdev_printk(KERN_WARNING, sdev,
557 "%s: kmalloc buffer failed\n",__func__);
558 /* Temporary failure, bypass */
559 return SCSI_DH_DEV_TEMP_BUSY;
564 for (k = 4, ucp = h->buff + 4; k < len; k += off, ucp += off) {
565 if (h->group_id == (ucp[2] << 8) + ucp[3]) {
566 h->state = ucp[0] & 0x0f;
567 valid_states = ucp[1];
569 off = 8 + (ucp[7] * 4);
572 sdev_printk(KERN_INFO, sdev,
573 "%s: port group %02x state %c supports %c%c%c%c%c%c\n",
574 ALUA_DH_NAME, h->group_id, print_alua_state(h->state),
575 valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
576 valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
577 valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
578 valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
579 valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
580 valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
582 if (h->tpgs & TPGS_MODE_EXPLICIT) {
584 case TPGS_STATE_TRANSITIONING:
585 /* State transition, retry */
588 case TPGS_STATE_OFFLINE:
589 /* Path is offline, fail */
590 err = SCSI_DH_DEV_OFFLINED;
596 /* Only Implicit ALUA support */
597 if (h->state == TPGS_STATE_OPTIMIZED ||
598 h->state == TPGS_STATE_NONOPTIMIZED ||
599 h->state == TPGS_STATE_STANDBY)
600 /* Useable path if active */
603 /* Path unuseable for unavailable/offline */
604 err = SCSI_DH_DEV_OFFLINED;
610 * alua_initialize - Initialize ALUA state
611 * @sdev: the device to be initialized
613 * For the prep_fn to work correctly we have
614 * to initialize the ALUA state for the device.
616 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
620 err = alua_std_inquiry(sdev, h);
621 if (err != SCSI_DH_OK)
624 err = alua_vpd_inquiry(sdev, h);
625 if (err != SCSI_DH_OK)
628 err = alua_rtpg(sdev, h);
629 if (err != SCSI_DH_OK)
637 * alua_activate - activate a path
638 * @sdev: device on the path to be activated
640 * We're currently switching the port group to be activated only and
641 * let the array figure out the rest.
642 * There may be other arrays which require us to switch all port groups
643 * based on a certain policy. But until we actually encounter them it
646 static int alua_activate(struct scsi_device *sdev)
648 struct alua_dh_data *h = get_alua_data(sdev);
649 int err = SCSI_DH_OK;
651 if (h->group_id != -1) {
652 err = alua_rtpg(sdev, h);
653 if (err != SCSI_DH_OK)
657 if (h->tpgs == TPGS_MODE_EXPLICIT && h->state != TPGS_STATE_OPTIMIZED)
658 err = alua_stpg(sdev, TPGS_STATE_OPTIMIZED, h);
665 * alua_prep_fn - request callback
667 * Fail I/O to all paths not in state
668 * active/optimized or active/non-optimized.
670 static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
672 struct alua_dh_data *h = get_alua_data(sdev);
673 int ret = BLKPREP_OK;
675 if (h->state != TPGS_STATE_OPTIMIZED &&
676 h->state != TPGS_STATE_NONOPTIMIZED) {
678 req->cmd_flags |= REQ_QUIET;
684 static const struct scsi_dh_devlist alua_dev_list[] = {
685 {"HP", "MSA VOLUME" },
693 {"Pillar", "Axiom" },
697 static int alua_bus_attach(struct scsi_device *sdev);
698 static void alua_bus_detach(struct scsi_device *sdev);
700 static struct scsi_device_handler alua_dh = {
701 .name = ALUA_DH_NAME,
702 .module = THIS_MODULE,
703 .devlist = alua_dev_list,
704 .attach = alua_bus_attach,
705 .detach = alua_bus_detach,
706 .prep_fn = alua_prep_fn,
707 .check_sense = alua_check_sense,
708 .activate = alua_activate,
712 * alua_bus_attach - Attach device handler
713 * @sdev: device to be attached to
715 static int alua_bus_attach(struct scsi_device *sdev)
717 struct scsi_dh_data *scsi_dh_data;
718 struct alua_dh_data *h;
720 int err = SCSI_DH_OK;
722 scsi_dh_data = kzalloc(sizeof(struct scsi_device_handler *)
723 + sizeof(*h) , GFP_KERNEL);
725 sdev_printk(KERN_ERR, sdev, "%s: Attach failed\n",
730 scsi_dh_data->scsi_dh = &alua_dh;
731 h = (struct alua_dh_data *) scsi_dh_data->buf;
732 h->tpgs = TPGS_MODE_UNINITIALIZED;
733 h->state = TPGS_STATE_OPTIMIZED;
737 h->bufflen = ALUA_INQUIRY_SIZE;
739 err = alua_initialize(sdev, h);
740 if (err != SCSI_DH_OK)
743 if (!try_module_get(THIS_MODULE))
746 spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
747 sdev->scsi_dh_data = scsi_dh_data;
748 spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
754 sdev_printk(KERN_ERR, sdev, "%s: not attached\n", ALUA_DH_NAME);
759 * alua_bus_detach - Detach device handler
760 * @sdev: device to be detached from
762 static void alua_bus_detach(struct scsi_device *sdev)
764 struct scsi_dh_data *scsi_dh_data;
765 struct alua_dh_data *h;
768 spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
769 scsi_dh_data = sdev->scsi_dh_data;
770 sdev->scsi_dh_data = NULL;
771 spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
773 h = (struct alua_dh_data *) scsi_dh_data->buf;
774 if (h->buff && h->inq != h->buff)
777 module_put(THIS_MODULE);
778 sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", ALUA_DH_NAME);
781 static int __init alua_init(void)
785 r = scsi_register_device_handler(&alua_dh);
787 printk(KERN_ERR "%s: Failed to register scsi device handler",
792 static void __exit alua_exit(void)
794 scsi_unregister_device_handler(&alua_dh);
797 module_init(alua_init);
798 module_exit(alua_exit);
800 MODULE_DESCRIPTION("DM Multipath ALUA support");
801 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
802 MODULE_LICENSE("GPL");
803 MODULE_VERSION(ALUA_DH_VER);