2 * Generic SCSI-3 ALUA SCSI Device Handler
4 * Copyright (C) 2007, 2008 Hannes Reinecke, SUSE Linux Products GmbH.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 #include <scsi/scsi.h>
23 #include <scsi/scsi_eh.h>
24 #include <scsi/scsi_dh.h>
26 #define ALUA_DH_NAME "alua"
27 #define ALUA_DH_VER "1.2"
29 #define TPGS_STATE_OPTIMIZED 0x0
30 #define TPGS_STATE_NONOPTIMIZED 0x1
31 #define TPGS_STATE_STANDBY 0x2
32 #define TPGS_STATE_UNAVAILABLE 0x3
33 #define TPGS_STATE_OFFLINE 0xe
34 #define TPGS_STATE_TRANSITIONING 0xf
36 #define TPGS_SUPPORT_NONE 0x00
37 #define TPGS_SUPPORT_OPTIMIZED 0x01
38 #define TPGS_SUPPORT_NONOPTIMIZED 0x02
39 #define TPGS_SUPPORT_STANDBY 0x04
40 #define TPGS_SUPPORT_UNAVAILABLE 0x08
41 #define TPGS_SUPPORT_OFFLINE 0x40
42 #define TPGS_SUPPORT_TRANSITION 0x80
44 #define TPGS_MODE_UNINITIALIZED -1
45 #define TPGS_MODE_NONE 0x0
46 #define TPGS_MODE_IMPLICIT 0x1
47 #define TPGS_MODE_EXPLICIT 0x2
49 #define ALUA_INQUIRY_SIZE 36
50 #define ALUA_FAILOVER_TIMEOUT (60 * HZ)
51 #define ALUA_FAILOVER_RETRIES 5
58 unsigned char inq[ALUA_INQUIRY_SIZE];
61 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
65 #define ALUA_POLICY_SWITCH_CURRENT 0
66 #define ALUA_POLICY_SWITCH_ALL 1
68 static inline struct alua_dh_data *get_alua_data(struct scsi_device *sdev)
70 struct scsi_dh_data *scsi_dh_data = sdev->scsi_dh_data;
71 BUG_ON(scsi_dh_data == NULL);
72 return ((struct alua_dh_data *) scsi_dh_data->buf);
75 static int realloc_buffer(struct alua_dh_data *h, unsigned len)
77 if (h->buff && h->buff != h->inq)
80 h->buff = kmalloc(len, GFP_NOIO);
83 h->bufflen = ALUA_INQUIRY_SIZE;
90 static struct request *get_alua_req(struct scsi_device *sdev,
91 void *buffer, unsigned buflen, int rw)
94 struct request_queue *q = sdev->request_queue;
96 rq = blk_get_request(q, rw, GFP_NOIO);
99 sdev_printk(KERN_INFO, sdev,
100 "%s: blk_get_request failed\n", __func__);
104 if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
106 sdev_printk(KERN_INFO, sdev,
107 "%s: blk_rq_map_kern failed\n", __func__);
111 rq->cmd_type = REQ_TYPE_BLOCK_PC;
112 rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
113 rq->retries = ALUA_FAILOVER_RETRIES;
114 rq->timeout = ALUA_FAILOVER_TIMEOUT;
120 * submit_std_inquiry - Issue a standard INQUIRY command
121 * @sdev: sdev the command should be send to
123 static int submit_std_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
126 int err = SCSI_DH_RES_TEMP_UNAVAIL;
128 rq = get_alua_req(sdev, h->inq, ALUA_INQUIRY_SIZE, READ);
132 /* Prepare the command. */
133 rq->cmd[0] = INQUIRY;
136 rq->cmd[4] = ALUA_INQUIRY_SIZE;
137 rq->cmd_len = COMMAND_SIZE(INQUIRY);
139 rq->sense = h->sense;
140 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
141 rq->sense_len = h->senselen = 0;
143 err = blk_execute_rq(rq->q, NULL, rq, 1);
145 sdev_printk(KERN_INFO, sdev,
146 "%s: std inquiry failed with %x\n",
147 ALUA_DH_NAME, rq->errors);
148 h->senselen = rq->sense_len;
157 * submit_vpd_inquiry - Issue an INQUIRY VPD page 0x83 command
158 * @sdev: sdev the command should be sent to
160 static int submit_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
163 int err = SCSI_DH_RES_TEMP_UNAVAIL;
165 rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
169 /* Prepare the command. */
170 rq->cmd[0] = INQUIRY;
173 rq->cmd[4] = h->bufflen;
174 rq->cmd_len = COMMAND_SIZE(INQUIRY);
176 rq->sense = h->sense;
177 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
178 rq->sense_len = h->senselen = 0;
180 err = blk_execute_rq(rq->q, NULL, rq, 1);
182 sdev_printk(KERN_INFO, sdev,
183 "%s: evpd inquiry failed with %x\n",
184 ALUA_DH_NAME, rq->errors);
185 h->senselen = rq->sense_len;
194 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
195 * @sdev: sdev the command should be sent to
197 static unsigned submit_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
200 int err = SCSI_DH_RES_TEMP_UNAVAIL;
202 rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
206 /* Prepare the command. */
207 rq->cmd[0] = MAINTENANCE_IN;
208 rq->cmd[1] = MI_REPORT_TARGET_PGS;
209 rq->cmd[6] = (h->bufflen >> 24) & 0xff;
210 rq->cmd[7] = (h->bufflen >> 16) & 0xff;
211 rq->cmd[8] = (h->bufflen >> 8) & 0xff;
212 rq->cmd[9] = h->bufflen & 0xff;
213 rq->cmd_len = COMMAND_SIZE(MAINTENANCE_IN);
215 rq->sense = h->sense;
216 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
217 rq->sense_len = h->senselen = 0;
219 err = blk_execute_rq(rq->q, NULL, rq, 1);
221 sdev_printk(KERN_INFO, sdev,
222 "%s: rtpg failed with %x\n",
223 ALUA_DH_NAME, rq->errors);
224 h->senselen = rq->sense_len;
233 * submit_stpg - Issue a SET TARGET GROUP STATES command
234 * @sdev: sdev the command should be sent to
236 * Currently we're only setting the current target port group state
237 * to 'active/optimized' and let the array firmware figure out
238 * the states of the remaining groups.
240 static unsigned submit_stpg(struct scsi_device *sdev, struct alua_dh_data *h)
243 int err = SCSI_DH_RES_TEMP_UNAVAIL;
246 /* Prepare the data buffer */
247 memset(h->buff, 0, stpg_len);
248 h->buff[4] = TPGS_STATE_OPTIMIZED & 0x0f;
249 h->buff[6] = (h->group_id >> 8) & 0x0f;
250 h->buff[7] = h->group_id & 0x0f;
252 rq = get_alua_req(sdev, h->buff, stpg_len, WRITE);
256 /* Prepare the command. */
257 rq->cmd[0] = MAINTENANCE_OUT;
258 rq->cmd[1] = MO_SET_TARGET_PGS;
259 rq->cmd[6] = (stpg_len >> 24) & 0xff;
260 rq->cmd[7] = (stpg_len >> 16) & 0xff;
261 rq->cmd[8] = (stpg_len >> 8) & 0xff;
262 rq->cmd[9] = stpg_len & 0xff;
263 rq->cmd_len = COMMAND_SIZE(MAINTENANCE_OUT);
265 rq->sense = h->sense;
266 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
267 rq->sense_len = h->senselen = 0;
269 err = blk_execute_rq(rq->q, NULL, rq, 1);
271 sdev_printk(KERN_INFO, sdev,
272 "%s: stpg failed with %x\n",
273 ALUA_DH_NAME, rq->errors);
274 h->senselen = rq->sense_len;
283 * alua_std_inquiry - Evaluate standard INQUIRY command
284 * @sdev: device to be checked
286 * Just extract the TPGS setting to find out if ALUA
289 static int alua_std_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
293 err = submit_std_inquiry(sdev, h);
295 if (err != SCSI_DH_OK)
298 /* Check TPGS setting */
299 h->tpgs = (h->inq[5] >> 4) & 0x3;
301 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
302 sdev_printk(KERN_INFO, sdev,
303 "%s: supports implicit and explicit TPGS\n",
306 case TPGS_MODE_EXPLICIT:
307 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
310 case TPGS_MODE_IMPLICIT:
311 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
315 h->tpgs = TPGS_MODE_NONE;
316 sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
318 err = SCSI_DH_DEV_UNSUPP;
326 * alua_vpd_inquiry - Evaluate INQUIRY vpd page 0x83
327 * @sdev: device to be checked
329 * Extract the relative target port and the target port group
330 * descriptor from the list of identificators.
332 static int alua_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
339 err = submit_vpd_inquiry(sdev, h);
341 if (err != SCSI_DH_OK)
344 /* Check if vpd page exceeds initial buffer */
345 len = (h->buff[2] << 8) + h->buff[3] + 4;
346 if (len > h->bufflen) {
347 /* Resubmit with the correct length */
348 if (realloc_buffer(h, len)) {
349 sdev_printk(KERN_WARNING, sdev,
350 "%s: kmalloc buffer failed\n",
352 /* Temporary failure, bypass */
353 return SCSI_DH_DEV_TEMP_BUSY;
359 * Now look for the correct descriptor.
362 while (d < h->buff + len) {
363 switch (d[1] & 0xf) {
365 /* Relative target port */
366 h->rel_port = (d[6] << 8) + d[7];
369 /* Target port group */
370 h->group_id = (d[6] << 8) + d[7];
378 if (h->group_id == -1) {
380 * Internal error; TPGS supported but required
381 * VPD identification descriptors not present.
382 * Disable ALUA support
384 sdev_printk(KERN_INFO, sdev,
385 "%s: No target port descriptors found\n",
387 h->state = TPGS_STATE_OPTIMIZED;
388 h->tpgs = TPGS_MODE_NONE;
389 err = SCSI_DH_DEV_UNSUPP;
391 sdev_printk(KERN_INFO, sdev,
392 "%s: port group %02x rel port %02x\n",
393 ALUA_DH_NAME, h->group_id, h->rel_port);
399 static char print_alua_state(int state)
402 case TPGS_STATE_OPTIMIZED:
404 case TPGS_STATE_NONOPTIMIZED:
406 case TPGS_STATE_STANDBY:
408 case TPGS_STATE_UNAVAILABLE:
410 case TPGS_STATE_OFFLINE:
412 case TPGS_STATE_TRANSITIONING:
419 static int alua_check_sense(struct scsi_device *sdev,
420 struct scsi_sense_hdr *sense_hdr)
422 switch (sense_hdr->sense_key) {
424 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a)
426 * LUN Not Accessible - ALUA state transition
428 return ADD_TO_MLQUEUE;
429 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0b)
431 * LUN Not Accessible -- Target port in standby state
434 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0c)
436 * LUN Not Accessible -- Target port in unavailable state
439 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x12)
441 * LUN Not Ready -- Offline
446 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
448 * Power On, Reset, or Bus Device Reset, just retry.
450 return ADD_TO_MLQUEUE;
451 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
455 return ADD_TO_MLQUEUE;
457 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
459 * Implicit ALUA state transition failed
461 return ADD_TO_MLQUEUE;
466 return SCSI_RETURN_NOT_HANDLED;
470 * alua_stpg - Evaluate SET TARGET GROUP STATES
471 * @sdev: the device to be evaluated
472 * @state: the new target group state
474 * Send a SET TARGET GROUP STATES command to the device.
475 * We only have to test here if we should resubmit the command;
476 * any other error is assumed as a failure.
478 static int alua_stpg(struct scsi_device *sdev, int state,
479 struct alua_dh_data *h)
481 struct scsi_sense_hdr sense_hdr;
483 int retry = ALUA_FAILOVER_RETRIES;
486 err = submit_stpg(sdev, h);
487 if (err == SCSI_DH_IO && h->senselen > 0) {
488 err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
492 err = alua_check_sense(sdev, &sense_hdr);
493 if (retry > 0 && err == ADD_TO_MLQUEUE) {
497 sdev_printk(KERN_INFO, sdev,
498 "%s: stpg sense code: %02x/%02x/%02x\n",
499 ALUA_DH_NAME, sense_hdr.sense_key,
500 sense_hdr.asc, sense_hdr.ascq);
503 if (err == SCSI_DH_OK) {
505 sdev_printk(KERN_INFO, sdev,
506 "%s: port group %02x switched to state %c\n",
507 ALUA_DH_NAME, h->group_id,
508 print_alua_state(h->state) );
514 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
515 * @sdev: the device to be evaluated.
517 * Evaluate the Target Port Group State.
518 * Returns SCSI_DH_DEV_OFFLINED if the path is
519 * found to be unuseable.
521 static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
523 struct scsi_sense_hdr sense_hdr;
524 int len, k, off, valid_states = 0;
529 err = submit_rtpg(sdev, h);
531 if (err == SCSI_DH_IO && h->senselen > 0) {
532 err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
537 err = alua_check_sense(sdev, &sense_hdr);
538 if (err == ADD_TO_MLQUEUE)
540 sdev_printk(KERN_INFO, sdev,
541 "%s: rtpg sense code %02x/%02x/%02x\n",
542 ALUA_DH_NAME, sense_hdr.sense_key,
543 sense_hdr.asc, sense_hdr.ascq);
546 if (err != SCSI_DH_OK)
549 len = (h->buff[0] << 24) + (h->buff[1] << 16) +
550 (h->buff[2] << 8) + h->buff[3] + 4;
552 if (len > h->bufflen) {
553 /* Resubmit with the correct length */
554 if (realloc_buffer(h, len)) {
555 sdev_printk(KERN_WARNING, sdev,
556 "%s: kmalloc buffer failed\n",__func__);
557 /* Temporary failure, bypass */
558 return SCSI_DH_DEV_TEMP_BUSY;
563 for (k = 4, ucp = h->buff + 4; k < len; k += off, ucp += off) {
564 if (h->group_id == (ucp[2] << 8) + ucp[3]) {
565 h->state = ucp[0] & 0x0f;
566 valid_states = ucp[1];
568 off = 8 + (ucp[7] * 4);
571 sdev_printk(KERN_INFO, sdev,
572 "%s: port group %02x state %c supports %c%c%c%c%c%c\n",
573 ALUA_DH_NAME, h->group_id, print_alua_state(h->state),
574 valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
575 valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
576 valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
577 valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
578 valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
579 valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
581 if (h->tpgs & TPGS_MODE_EXPLICIT) {
583 case TPGS_STATE_TRANSITIONING:
584 /* State transition, retry */
587 case TPGS_STATE_OFFLINE:
588 /* Path is offline, fail */
589 err = SCSI_DH_DEV_OFFLINED;
595 /* Only Implicit ALUA support */
596 if (h->state == TPGS_STATE_OPTIMIZED ||
597 h->state == TPGS_STATE_NONOPTIMIZED ||
598 h->state == TPGS_STATE_STANDBY)
599 /* Useable path if active */
602 /* Path unuseable for unavailable/offline */
603 err = SCSI_DH_DEV_OFFLINED;
609 * alua_initialize - Initialize ALUA state
610 * @sdev: the device to be initialized
612 * For the prep_fn to work correctly we have
613 * to initialize the ALUA state for the device.
615 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
619 err = alua_std_inquiry(sdev, h);
620 if (err != SCSI_DH_OK)
623 err = alua_vpd_inquiry(sdev, h);
624 if (err != SCSI_DH_OK)
627 err = alua_rtpg(sdev, h);
628 if (err != SCSI_DH_OK)
636 * alua_activate - activate a path
637 * @sdev: device on the path to be activated
639 * We're currently switching the port group to be activated only and
640 * let the array figure out the rest.
641 * There may be other arrays which require us to switch all port groups
642 * based on a certain policy. But until we actually encounter them it
645 static int alua_activate(struct scsi_device *sdev)
647 struct alua_dh_data *h = get_alua_data(sdev);
648 int err = SCSI_DH_OK;
650 if (h->group_id != -1) {
651 err = alua_rtpg(sdev, h);
652 if (err != SCSI_DH_OK)
656 if (h->tpgs == TPGS_MODE_EXPLICIT && h->state != TPGS_STATE_OPTIMIZED)
657 err = alua_stpg(sdev, TPGS_STATE_OPTIMIZED, h);
664 * alua_prep_fn - request callback
666 * Fail I/O to all paths not in state
667 * active/optimized or active/non-optimized.
669 static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
671 struct alua_dh_data *h = get_alua_data(sdev);
672 int ret = BLKPREP_OK;
674 if (h->state != TPGS_STATE_OPTIMIZED &&
675 h->state != TPGS_STATE_NONOPTIMIZED) {
677 req->cmd_flags |= REQ_QUIET;
683 static const struct scsi_dh_devlist alua_dev_list[] = {
684 {"HP", "MSA VOLUME" },
692 {"Pillar", "Axiom" },
696 static int alua_bus_attach(struct scsi_device *sdev);
697 static void alua_bus_detach(struct scsi_device *sdev);
699 static struct scsi_device_handler alua_dh = {
700 .name = ALUA_DH_NAME,
701 .module = THIS_MODULE,
702 .devlist = alua_dev_list,
703 .attach = alua_bus_attach,
704 .detach = alua_bus_detach,
705 .prep_fn = alua_prep_fn,
706 .check_sense = alua_check_sense,
707 .activate = alua_activate,
711 * alua_bus_attach - Attach device handler
712 * @sdev: device to be attached to
714 static int alua_bus_attach(struct scsi_device *sdev)
716 struct scsi_dh_data *scsi_dh_data;
717 struct alua_dh_data *h;
719 int err = SCSI_DH_OK;
721 scsi_dh_data = kzalloc(sizeof(struct scsi_device_handler *)
722 + sizeof(*h) , GFP_KERNEL);
724 sdev_printk(KERN_ERR, sdev, "%s: Attach failed\n",
729 scsi_dh_data->scsi_dh = &alua_dh;
730 h = (struct alua_dh_data *) scsi_dh_data->buf;
731 h->tpgs = TPGS_MODE_UNINITIALIZED;
732 h->state = TPGS_STATE_OPTIMIZED;
736 h->bufflen = ALUA_INQUIRY_SIZE;
738 err = alua_initialize(sdev, h);
739 if (err != SCSI_DH_OK)
742 if (!try_module_get(THIS_MODULE))
745 spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
746 sdev->scsi_dh_data = scsi_dh_data;
747 spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
753 sdev_printk(KERN_ERR, sdev, "%s: not attached\n", ALUA_DH_NAME);
758 * alua_bus_detach - Detach device handler
759 * @sdev: device to be detached from
761 static void alua_bus_detach(struct scsi_device *sdev)
763 struct scsi_dh_data *scsi_dh_data;
764 struct alua_dh_data *h;
767 spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
768 scsi_dh_data = sdev->scsi_dh_data;
769 sdev->scsi_dh_data = NULL;
770 spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
772 h = (struct alua_dh_data *) scsi_dh_data->buf;
773 if (h->buff && h->inq != h->buff)
776 module_put(THIS_MODULE);
777 sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", ALUA_DH_NAME);
780 static int __init alua_init(void)
784 r = scsi_register_device_handler(&alua_dh);
786 printk(KERN_ERR "%s: Failed to register scsi device handler",
791 static void __exit alua_exit(void)
793 scsi_unregister_device_handler(&alua_dh);
796 module_init(alua_init);
797 module_exit(alua_exit);
799 MODULE_DESCRIPTION("DM Multipath ALUA support");
800 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
801 MODULE_LICENSE("GPL");
802 MODULE_VERSION(ALUA_DH_VER);