void scsi_eh_wakeup(struct Scsi_Host *shost)
{
if (shost->host_busy == shost->host_failed) {
- up(shost->eh_wait);
+ wake_up_process(shost->ehandler);
SCSI_LOG_ERROR_RECOVERY(5,
printk("Waking error handler thread\n"));
}
{
struct Scsi_Host *shost = scmd->device->host;
unsigned long flags;
+ int ret = 0;
- if (shost->eh_wait == NULL)
+ if (!shost->ehandler)
return 0;
spin_lock_irqsave(shost->host_lock, flags);
+ if (scsi_host_set_state(shost, SHOST_RECOVERY))
+ if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
+ goto out_unlock;
+ ret = 1;
scmd->eh_eflags |= eh_flag;
list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
- scsi_host_set_state(shost, SHOST_RECOVERY);
shost->host_failed++;
scsi_eh_wakeup(shost);
+ out_unlock:
spin_unlock_irqrestore(shost->host_lock, flags);
- return 1;
+ return ret;
}
/**
}
if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
- panic("Error handler thread not present at %p %p %s %d",
- scmd, scmd->device->host, __FILE__, __LINE__);
+ scmd->result |= DID_TIME_OUT << 16;
+ __scsi_done(scmd);
}
}
{
int online;
- wait_event(sdev->host->host_wait, (sdev->host->shost_state !=
- SHOST_RECOVERY));
+ wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
online = scsi_device_online(sdev);
if (cmd_cancel || cmd_failed) {
SCSI_LOG_ERROR_RECOVERY(3,
- printk("%s: %d:%d:%d:%d cmds failed: %d,"
- " cancel: %d\n",
- __FUNCTION__, shost->host_no,
- sdev->channel, sdev->id, sdev->lun,
- cmd_failed, cmd_cancel));
+ sdev_printk(KERN_INFO, sdev,
+ "%s: cmds failed: %d, cancel: %d\n",
+ __FUNCTION__, cmd_failed,
+ cmd_cancel));
cmd_cancel = 0;
cmd_failed = 0;
++devices_failed;
return FAILED;
}
-/**
- * scsi_eh_times_out - timeout function for error handling.
- * @scmd: Cmd that is timing out.
- *
- * Notes:
- * During error handling, the kernel thread will be sleeping waiting
- * for some action to complete on the device. our only job is to
- * record that it timed out, and to wake up the thread.
- **/
-static void scsi_eh_times_out(struct scsi_cmnd *scmd)
-{
- scmd->eh_eflags |= SCSI_EH_REC_TIMEOUT;
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
- scmd));
-
- up(scmd->device->host->eh_action);
-}
-
/**
* scsi_eh_done - Completion function for error handling.
* @scmd: Cmd that is done.
**/
static void scsi_eh_done(struct scsi_cmnd *scmd)
{
- /*
- * if the timeout handler is already running, then just set the
- * flag which says we finished late, and return. we have no
- * way of stopping the timeout handler from running, so we must
- * always defer to it.
- */
- if (del_timer(&scmd->eh_timeout)) {
- scmd->request->rq_status = RQ_SCSI_DONE;
-
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
- __FUNCTION__, scmd, scmd->result));
-
- up(scmd->device->host->eh_action);
- }
+ SCSI_LOG_ERROR_RECOVERY(3,
+ printk("%s scmd: %p result: %x\n",
+ __FUNCTION__, scmd, scmd->result));
+ complete(scmd->device->host->eh_action);
}
/**
* @scmd: SCSI Cmd to send.
* @timeout: Timeout for cmd.
*
- * Notes:
- * The initialization of the structures is quite a bit different in
- * this case, and furthermore, there is a different completion handler
- * vs scsi_dispatch_cmd.
* Return value:
* SUCCESS or FAILED or NEEDS_RETRY
**/
{
struct scsi_device *sdev = scmd->device;
struct Scsi_Host *shost = sdev->host;
- DECLARE_MUTEX_LOCKED(sem);
+ DECLARE_COMPLETION(done);
+ unsigned long timeleft;
unsigned long flags;
- int rtn = SUCCESS;
+ int rtn;
- /*
- * we will use a queued command if possible, otherwise we will
- * emulate the queuing and calling of completion function ourselves.
- */
if (sdev->scsi_level <= SCSI_2)
scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
(sdev->lun << 5 & 0xe0);
- scsi_add_timer(scmd, timeout, scsi_eh_times_out);
-
- /*
- * set up the semaphore so we wait for the command to complete.
- */
- shost->eh_action = &sem;
+ shost->eh_action = &done;
scmd->request->rq_status = RQ_SCSI_BUSY;
spin_lock_irqsave(shost->host_lock, flags);
shost->hostt->queuecommand(scmd, scsi_eh_done);
spin_unlock_irqrestore(shost->host_lock, flags);
- down(&sem);
- scsi_log_completion(scmd, SUCCESS);
+ timeleft = wait_for_completion_timeout(&done, timeout);
+ scmd->request->rq_status = RQ_SCSI_DONE;
shost->eh_action = NULL;
- /*
- * see if timeout. if so, tell the host to forget about it.
- * in other words, we don't want a callback any more.
- */
- if (scmd->eh_eflags & SCSI_EH_REC_TIMEOUT) {
- scmd->eh_eflags &= ~SCSI_EH_REC_TIMEOUT;
-
- /*
- * as far as the low level driver is
- * concerned, this command is still active, so
- * we must give the low level driver a chance
- * to abort it. (db)
- *
- * FIXME(eric) - we are not tracking whether we could
- * abort a timed out command or not. not sure how
- * we should treat them differently anyways.
- */
- if (shost->hostt->eh_abort_handler)
- shost->hostt->eh_abort_handler(scmd);
-
- scmd->request->rq_status = RQ_SCSI_DONE;
- rtn = FAILED;
- }
+ scsi_log_completion(scmd, SUCCESS);
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
- __FUNCTION__, scmd, rtn));
+ SCSI_LOG_ERROR_RECOVERY(3,
+ printk("%s: scmd: %p, timeleft: %ld\n",
+ __FUNCTION__, scmd, timeleft));
/*
- * now examine the actual status codes to see whether the command
- * actually did complete normally.
+ * If there is time left scsi_eh_done got called, and we will
+ * examine the actual status codes to see whether the command
+ * actually did complete normally, else tell the host to forget
+ * about this command.
*/
- if (rtn == SUCCESS) {
+ if (timeleft) {
rtn = scsi_eh_completed_normally(scmd);
SCSI_LOG_ERROR_RECOVERY(3,
printk("%s: scsi_eh_completed_normally %x\n",
__FUNCTION__, rtn));
+
switch (rtn) {
case SUCCESS:
case NEEDS_RETRY:
rtn = FAILED;
break;
}
+ } else {
+ /*
+ * FIXME(eric) - we are not tracking whether we could
+ * abort a timed out command or not. not sure how
+ * we should treat them differently anyways.
+ */
+ if (shost->hostt->eh_abort_handler)
+ shost->hostt->eh_abort_handler(scmd);
+ rtn = FAILED;
}
return rtn;
SCSI_SENSE_VALID(scmd))
continue;
- SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
- " for id: %d\n",
- current->comm,
- scmd->device->id));
+ SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
+ "%s: requesting sense\n",
+ current->comm));
rtn = scsi_request_sense(scmd);
if (rtn != SUCCESS)
continue;
if (!scmd->device->host->hostt->skip_settle_delay)
ssleep(BUS_RESET_SETTLE_TIME);
spin_lock_irqsave(scmd->device->host->host_lock, flags);
- scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
+ scsi_report_bus_reset(scmd->device->host,
+ scmd_channel(scmd));
spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
}
if (!scmd->device->host->hostt->skip_settle_delay)
ssleep(HOST_RESET_SETTLE_TIME);
spin_lock_irqsave(scmd->device->host->host_lock, flags);
- scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
+ scsi_report_bus_reset(scmd->device->host,
+ scmd_channel(scmd));
spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
}
for (channel = 0; channel <= shost->max_channel; channel++) {
chan_scmd = NULL;
list_for_each_entry(scmd, work_q, eh_entry) {
- if (channel == scmd->device->channel) {
+ if (channel == scmd_channel(scmd)) {
chan_scmd = scmd;
break;
/*
rtn = scsi_try_bus_reset(chan_scmd);
if (rtn == SUCCESS) {
list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
- if (channel == scmd->device->channel)
+ if (channel == scmd_channel(scmd))
if (!scsi_device_online(scmd->device) ||
!scsi_eh_tur(scmd))
scsi_eh_finish_cmd(scmd,
struct scsi_cmnd *scmd, *next;
list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
- printk(KERN_INFO "scsi: Device offlined - not"
- " ready after error recovery: host"
- " %d channel %d id %d lun %d\n",
- scmd->device->host->host_no,
- scmd->device->channel,
- scmd->device->id,
- scmd->device->lun);
+ sdev_printk(KERN_INFO, scmd->device,
+ "scsi: Device offlined - not"
+ " ready after error recovery\n");
scsi_device_set_state(scmd->device, SDEV_OFFLINE);
if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) {
/*
return SUCCESS;
case RESERVATION_CONFLICT:
- printk(KERN_INFO "scsi: reservation conflict: host"
- " %d channel %d id %d lun %d\n",
- scmd->device->host->host_no, scmd->device->channel,
- scmd->device->id, scmd->device->lun);
+ sdev_printk(KERN_INFO, scmd->device,
+ "reservation conflict\n");
return SUCCESS; /* causes immediate i/o error */
default:
return FAILED;
static void scsi_restart_operations(struct Scsi_Host *shost)
{
struct scsi_device *sdev;
+ unsigned long flags;
/*
* If the door was locked, we need to insert a door lock request
SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
__FUNCTION__));
- scsi_host_set_state(shost, SHOST_RUNNING);
+ spin_lock_irqsave(shost->host_lock, flags);
+ if (scsi_host_set_state(shost, SHOST_RUNNING))
+ if (scsi_host_set_state(shost, SHOST_CANCEL))
+ BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
+ spin_unlock_irqrestore(shost->host_lock, flags);
wake_up(&shost->host_wait);
}
/**
- * scsi_error_handler - Handle errors/timeouts of SCSI cmds.
+ * scsi_error_handler - SCSI error handler thread
* @data: Host for which we are running.
*
* Notes:
- * This is always run in the context of a kernel thread. The idea is
- * that we start this thing up when the kernel starts up (one per host
- * that we detect), and it immediately goes to sleep and waits for some
- * event (i.e. failure). When this takes place, we have the job of
- * trying to unjam the bus and restarting things.
+ * This is the main error handling loop. This is run as a kernel thread
+ * for every SCSI host and handles all error handling activity.
**/
int scsi_error_handler(void *data)
{
- struct Scsi_Host *shost = (struct Scsi_Host *) data;
- int rtn;
- DECLARE_MUTEX_LOCKED(sem);
+ struct Scsi_Host *shost = data;
current->flags |= PF_NOFREEZE;
- shost->eh_wait = &sem;
/*
- * Wake up the thread that created us.
+ * We use TASK_INTERRUPTIBLE so that the thread is not
+ * counted against the load average as a running process.
+ * We never actually get interrupted because kthread_run
+ * disables singal delivery for the created thread.
*/
- SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"
- " scsi_eh_%d\n",shost->host_no));
-
- while (1) {
- /*
- * If we get a signal, it means we are supposed to go
- * away and die. This typically happens if the user is
- * trying to unload a module.
- */
- SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
- " scsi_eh_%d"
- " sleeping\n",shost->host_no));
-
- /*
- * Note - we always use down_interruptible with the semaphore
- * even if the module was loaded as part of the kernel. The
- * reason is that down() will cause this thread to be counted
- * in the load average as a running process, and down
- * interruptible doesn't. Given that we need to allow this
- * thread to die if the driver was loaded as a module, using
- * semaphores isn't unreasonable.
- */
- down_interruptible(&sem);
- if (kthread_should_stop())
- break;
-
- SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
- " scsi_eh_%d waking"
- " up\n",shost->host_no));
+ set_current_state(TASK_INTERRUPTIBLE);
+ while (!kthread_should_stop()) {
+ if (shost->host_failed == 0 ||
+ shost->host_failed != shost->host_busy) {
+ SCSI_LOG_ERROR_RECOVERY(1,
+ printk("Error handler scsi_eh_%d sleeping\n",
+ shost->host_no));
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ continue;
+ }
- shost->eh_active = 1;
+ __set_current_state(TASK_RUNNING);
+ SCSI_LOG_ERROR_RECOVERY(1,
+ printk("Error handler scsi_eh_%d waking up\n",
+ shost->host_no));
/*
* We have a host that is failing for some reason. Figure out
* If we fail, we end up taking the thing offline.
*/
if (shost->hostt->eh_strategy_handler)
- rtn = shost->hostt->eh_strategy_handler(shost);
+ shost->hostt->eh_strategy_handler(shost);
else
scsi_unjam_host(shost);
- shost->eh_active = 0;
-
/*
* Note - if the above fails completely, the action is to take
* individual devices offline and flush the queue of any
* which are still online.
*/
scsi_restart_operations(shost);
-
+ set_current_state(TASK_INTERRUPTIBLE);
}
+ __set_current_state(TASK_RUNNING);
- SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
- " exiting\n",shost->host_no));
-
- /*
- * Make sure that nobody tries to wake us up again.
- */
- shost->eh_wait = NULL;
+ SCSI_LOG_ERROR_RECOVERY(1,
+ printk("Error handler scsi_eh_%d exiting\n", shost->host_no));
+ shost->ehandler = NULL;
return 0;
}
struct scsi_device *sdev;
__shost_for_each_device(sdev, shost) {
- if (channel == sdev->channel) {
+ if (channel == sdev_channel(sdev)) {
sdev->was_reset = 1;
sdev->expecting_cc_ua = 1;
}
struct scsi_device *sdev;
__shost_for_each_device(sdev, shost) {
- if (channel == sdev->channel &&
- target == sdev->id) {
+ if (channel == sdev_channel(sdev) &&
+ target == sdev_id(sdev)) {
sdev->was_reset = 1;
sdev->expecting_cc_ua = 1;
}