[SCSI] ipr: Cleanup error structures

[linux-2.6] / drivers / scsi / scsi_error.c
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c

index 895c945..18c5d25 100644 (file)
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -50,7 +50,7 @@
  void scsi_eh_wakeup(struct Scsi_Host *shost)
  {
         if (shost->host_busy == shost->host_failed) {
-               up(shost->eh_wait);
+               wake_up_process(shost->ehandler);
                 SCSI_LOG_ERROR_RECOVERY(5,
                                 printk("Waking error handler thread\n"));
         }
@@ -68,19 +68,24 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
  {
         struct Scsi_Host *shost = scmd->device->host;
         unsigned long flags;
+       int ret = 0;
  
-       if (shost->eh_wait == NULL)
+       if (!shost->ehandler)
                 return 0;
  
         spin_lock_irqsave(shost->host_lock, flags);
+       if (scsi_host_set_state(shost, SHOST_RECOVERY))
+               if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
+                       goto out_unlock;
  
+       ret = 1;
         scmd->eh_eflags |= eh_flag;
         list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
-       scsi_host_set_state(shost, SHOST_RECOVERY);
         shost->host_failed++;
         scsi_eh_wakeup(shost);
+ out_unlock:
         spin_unlock_irqrestore(shost->host_lock, flags);
-       return 1;
+       return ret;
  }
  
  /**
@@ -176,8 +181,8 @@ void scsi_times_out(struct scsi_cmnd *scmd)
                 }
  
         if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
-               panic("Error handler thread not present at %p %p %s %d",
-                     scmd, scmd->device->host, __FILE__, __LINE__);
+               scmd->result |= DID_TIME_OUT << 16;
+               __scsi_done(scmd);
         }
  }
  
@@ -196,8 +201,7 @@ int scsi_block_when_processing_errors(struct scsi_device *sdev)
  {
         int online;
  
-       wait_event(sdev->host->host_wait, (sdev->host->shost_state !=
-                                          SHOST_RECOVERY));
+       wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
  
         online = scsi_device_online(sdev);
  
@@ -237,11 +241,10 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
  
                 if (cmd_cancel || cmd_failed) {
                         SCSI_LOG_ERROR_RECOVERY(3,
-                               printk("%s: %d:%d:%d:%d cmds failed: %d,"
-                                      " cancel: %d\n",
-                                      __FUNCTION__, shost->host_no,
-                                      sdev->channel, sdev->id, sdev->lun,
-                                      cmd_failed, cmd_cancel));
+                               sdev_printk(KERN_INFO, sdev,
+                                           "%s: cmds failed: %d, cancel: %d\n",
+                                           __FUNCTION__, cmd_failed,
+                                           cmd_cancel));
                         cmd_cancel = 0;
                         cmd_failed = 0;
                         ++devices_failed;
@@ -413,44 +416,16 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
         return FAILED;
  }
  
-/**
- * scsi_eh_times_out - timeout function for error handling.
- * @scmd:      Cmd that is timing out.
- *
- * Notes:
- *    During error handling, the kernel thread will be sleeping waiting
- *    for some action to complete on the device.  our only job is to
- *    record that it timed out, and to wake up the thread.
- **/
-static void scsi_eh_times_out(struct scsi_cmnd *scmd)
-{
-       scmd->eh_eflags |= SCSI_EH_REC_TIMEOUT;
-       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
-                                         scmd));
-
-       up(scmd->device->host->eh_action);
-}
-
  /**
   * scsi_eh_done - Completion function for error handling.
   * @scmd:      Cmd that is done.
   **/
  static void scsi_eh_done(struct scsi_cmnd *scmd)
  {
-       /*
-        * if the timeout handler is already running, then just set the
-        * flag which says we finished late, and return.  we have no
-        * way of stopping the timeout handler from running, so we must
-        * always defer to it.
-        */
-       if (del_timer(&scmd->eh_timeout)) {
-               scmd->request->rq_status = RQ_SCSI_DONE;
-
-               SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
-                                          __FUNCTION__, scmd, scmd->result));
-
-               up(scmd->device->host->eh_action);
-       }
+       SCSI_LOG_ERROR_RECOVERY(3,
+               printk("%s scmd: %p result: %x\n",
+                       __FUNCTION__, scmd, scmd->result));
+       complete(scmd->device->host->eh_action);
  }
  
  /**
@@ -458,10 +433,6 @@ static void scsi_eh_done(struct scsi_cmnd *scmd)
   * @scmd:      SCSI Cmd to send.
   * @timeout:   Timeout for cmd.
   *
- * Notes:
- *    The initialization of the structures is quite a bit different in
- *    this case, and furthermore, there is a different completion handler
- *    vs scsi_dispatch_cmd.
   * Return value:
   *    SUCCESS or FAILED or NEEDS_RETRY
   **/
@@ -469,24 +440,16 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
  {
         struct scsi_device *sdev = scmd->device;
         struct Scsi_Host *shost = sdev->host;
-       DECLARE_MUTEX_LOCKED(sem);
+       DECLARE_COMPLETION(done);
+       unsigned long timeleft;
         unsigned long flags;
-       int rtn = SUCCESS;
+       int rtn;
  
-       /*
-        * we will use a queued command if possible, otherwise we will
-        * emulate the queuing and calling of completion function ourselves.
-        */
         if (sdev->scsi_level <= SCSI_2)
                 scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
                         (sdev->lun << 5 & 0xe0);
  
-       scsi_add_timer(scmd, timeout, scsi_eh_times_out);
-
-       /*
-        * set up the semaphore so we wait for the command to complete.
-        */
-       shost->eh_action = &sem;
+       shost->eh_action = &done;
         scmd->request->rq_status = RQ_SCSI_BUSY;
  
         spin_lock_irqsave(shost->host_lock, flags);
@@ -494,47 +457,29 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
         shost->hostt->queuecommand(scmd, scsi_eh_done);
         spin_unlock_irqrestore(shost->host_lock, flags);
  
-       down(&sem);
-       scsi_log_completion(scmd, SUCCESS);
+       timeleft = wait_for_completion_timeout(&done, timeout);
  
+       scmd->request->rq_status = RQ_SCSI_DONE;
         shost->eh_action = NULL;
  
-       /*
-        * see if timeout.  if so, tell the host to forget about it.
-        * in other words, we don't want a callback any more.
-        */
-       if (scmd->eh_eflags & SCSI_EH_REC_TIMEOUT) {
-               scmd->eh_eflags &= ~SCSI_EH_REC_TIMEOUT;
-
-               /*
-                * as far as the low level driver is
-                * concerned, this command is still active, so
-                * we must give the low level driver a chance
-                * to abort it. (db) 
-                *
-                * FIXME(eric) - we are not tracking whether we could
-                * abort a timed out command or not.  not sure how
-                * we should treat them differently anyways.
-                */
-               if (shost->hostt->eh_abort_handler)
-                       shost->hostt->eh_abort_handler(scmd);
-                       
-               scmd->request->rq_status = RQ_SCSI_DONE;
-               rtn = FAILED;
-       }
+       scsi_log_completion(scmd, SUCCESS);
  
-       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
-                                         __FUNCTION__, scmd, rtn));
+       SCSI_LOG_ERROR_RECOVERY(3,
+               printk("%s: scmd: %p, timeleft: %ld\n",
+                       __FUNCTION__, scmd, timeleft));
  
         /*
-        * now examine the actual status codes to see whether the command
-        * actually did complete normally.
+        * If there is time left scsi_eh_done got called, and we will
+        * examine the actual status codes to see whether the command
+        * actually did complete normally, else tell the host to forget
+        * about this command.
          */
-       if (rtn == SUCCESS) {
+       if (timeleft) {
                 rtn = scsi_eh_completed_normally(scmd);
                 SCSI_LOG_ERROR_RECOVERY(3,
                         printk("%s: scsi_eh_completed_normally %x\n",
                                __FUNCTION__, rtn));
+
                 switch (rtn) {
                 case SUCCESS:
                 case NEEDS_RETRY:
@@ -544,6 +489,15 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
                         rtn = FAILED;
                         break;
                 }
+       } else {
+               /*
+                * FIXME(eric) - we are not tracking whether we could
+                * abort a timed out command or not.  not sure how
+                * we should treat them differently anyways.
+                */
+               if (shost->hostt->eh_abort_handler)
+                       shost->hostt->eh_abort_handler(scmd);
+               rtn = FAILED;
         }
  
         return rtn;
@@ -670,10 +624,9 @@ static int scsi_eh_get_sense(struct list_head *work_q,
                     SCSI_SENSE_VALID(scmd))
                         continue;
  
-               SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
-                                                 " for id: %d\n",
-                                                 current->comm,
-                                                 scmd->device->id));
+               SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
+                                                 "%s: requesting sense\n",
+                                                 current->comm));
                 rtn = scsi_request_sense(scmd);
                 if (rtn != SUCCESS)
                         continue;
@@ -1031,7 +984,8 @@ static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
                 if (!scmd->device->host->hostt->skip_settle_delay)
                         ssleep(BUS_RESET_SETTLE_TIME);
                 spin_lock_irqsave(scmd->device->host->host_lock, flags);
-               scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
+               scsi_report_bus_reset(scmd->device->host,
+                                     scmd_channel(scmd));
                 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
         }
  
@@ -1059,7 +1013,8 @@ static int scsi_try_host_reset(struct scsi_cmnd *scmd)
                 if (!scmd->device->host->hostt->skip_settle_delay)
                         ssleep(HOST_RESET_SETTLE_TIME);
                 spin_lock_irqsave(scmd->device->host->host_lock, flags);
-               scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
+               scsi_report_bus_reset(scmd->device->host,
+                                     scmd_channel(scmd));
                 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
         }
  
@@ -1089,7 +1044,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
         for (channel = 0; channel <= shost->max_channel; channel++) {
                 chan_scmd = NULL;
                 list_for_each_entry(scmd, work_q, eh_entry) {
-                       if (channel == scmd->device->channel) {
+                       if (channel == scmd_channel(scmd)) {
                                 chan_scmd = scmd;
                                 break;
                                 /*
@@ -1107,7 +1062,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
                 rtn = scsi_try_bus_reset(chan_scmd);
                 if (rtn == SUCCESS) {
                         list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
-                               if (channel == scmd->device->channel)
+                               if (channel == scmd_channel(scmd))
                                         if (!scsi_device_online(scmd->device) ||
                                             !scsi_eh_tur(scmd))
                                                 scsi_eh_finish_cmd(scmd,
@@ -1170,13 +1125,9 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q,
         struct scsi_cmnd *scmd, *next;
  
         list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
-               printk(KERN_INFO "scsi: Device offlined - not"
-                               " ready after error recovery: host"
-                               " %d channel %d id %d lun %d\n",
-                               scmd->device->host->host_no,
-                               scmd->device->channel,
-                               scmd->device->id,
-                               scmd->device->lun);
+               sdev_printk(KERN_INFO, scmd->device,
+                           "scsi: Device offlined - not"
+                           " ready after error recovery\n");
                 scsi_device_set_state(scmd->device, SDEV_OFFLINE);
                 if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) {
                         /*
@@ -1338,10 +1289,8 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
                 return SUCCESS;
  
         case RESERVATION_CONFLICT:
-               printk(KERN_INFO "scsi: reservation conflict: host"
-                                " %d channel %d id %d lun %d\n",
-                      scmd->device->host->host_no, scmd->device->channel,
-                      scmd->device->id, scmd->device->lun);
+               sdev_printk(KERN_INFO, scmd->device,
+                           "reservation conflict\n");
                 return SUCCESS; /* causes immediate i/o error */
         default:
                 return FAILED;
@@ -1441,6 +1390,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
  static void scsi_restart_operations(struct Scsi_Host *shost)
  {
         struct scsi_device *sdev;
+       unsigned long flags;
  
         /*
          * If the door was locked, we need to insert a door lock request
@@ -1460,7 +1410,11 @@ static void scsi_restart_operations(struct Scsi_Host *shost)
         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
                                           __FUNCTION__));
  
-       scsi_host_set_state(shost, SHOST_RUNNING);
+       spin_lock_irqsave(shost->host_lock, flags);
+       if (scsi_host_set_state(shost, SHOST_RUNNING))
+               if (scsi_host_set_state(shost, SHOST_CANCEL))
+                       BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
+       spin_unlock_irqrestore(shost->host_lock, flags);
  
         wake_up(&shost->host_wait);
  
@@ -1568,59 +1522,41 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
  }
  
  /**
- * scsi_error_handler - Handle errors/timeouts of SCSI cmds.
+ * scsi_error_handler - SCSI error handler thread
   * @data:      Host for which we are running.
   *
   * Notes:
- *    This is always run in the context of a kernel thread.  The idea is
- *    that we start this thing up when the kernel starts up (one per host
- *    that we detect), and it immediately goes to sleep and waits for some
- *    event (i.e. failure).  When this takes place, we have the job of
- *    trying to unjam the bus and restarting things.
+ *    This is the main error handling loop.  This is run as a kernel thread
+ *    for every SCSI host and handles all error handling activity.
   **/
  int scsi_error_handler(void *data)
  {
-       struct Scsi_Host *shost = (struct Scsi_Host *) data;
-       int rtn;
-       DECLARE_MUTEX_LOCKED(sem);
+       struct Scsi_Host *shost = data;
  
         current->flags |= PF_NOFREEZE;
-       shost->eh_wait = &sem;
  
         /*
-        * Wake up the thread that created us.
+        * We use TASK_INTERRUPTIBLE so that the thread is not
+        * counted against the load average as a running process.
+        * We never actually get interrupted because kthread_run
+        * disables singal delivery for the created thread.
          */
-       SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"
-                                         " scsi_eh_%d\n",shost->host_no));
-
-       while (1) {
-               /*
-                * If we get a signal, it means we are supposed to go
-                * away and die.  This typically happens if the user is
-                * trying to unload a module.
-                */
-               SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
-                                                 " scsi_eh_%d"
-                                                 " sleeping\n",shost->host_no));
-
-               /*
-                * Note - we always use down_interruptible with the semaphore
-                * even if the module was loaded as part of the kernel.  The
-                * reason is that down() will cause this thread to be counted
-                * in the load average as a running process, and down
-                * interruptible doesn't.  Given that we need to allow this
-                * thread to die if the driver was loaded as a module, using
-                * semaphores isn't unreasonable.
-                */
-               down_interruptible(&sem);
-               if (kthread_should_stop())
-                       break;
-
-               SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
-                                                 " scsi_eh_%d waking"
-                                                 " up\n",shost->host_no));
+       set_current_state(TASK_INTERRUPTIBLE);
+       while (!kthread_should_stop()) {
+               if (shost->host_failed == 0 ||
+                   shost->host_failed != shost->host_busy) {
+                       SCSI_LOG_ERROR_RECOVERY(1,
+                               printk("Error handler scsi_eh_%d sleeping\n",
+                                       shost->host_no));
+                       schedule();
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       continue;
+               }
  
-               shost->eh_active = 1;
+               __set_current_state(TASK_RUNNING);
+               SCSI_LOG_ERROR_RECOVERY(1,
+                       printk("Error handler scsi_eh_%d waking up\n",
+                               shost->host_no));
  
                 /*
                  * We have a host that is failing for some reason.  Figure out
@@ -1628,12 +1564,10 @@ int scsi_error_handler(void *data)
                  * If we fail, we end up taking the thing offline.
                  */
                 if (shost->hostt->eh_strategy_handler) 
-                       rtn = shost->hostt->eh_strategy_handler(shost);
+                       shost->hostt->eh_strategy_handler(shost);
                 else
                         scsi_unjam_host(shost);
  
-               shost->eh_active = 0;
-
                 /*
                  * Note - if the above fails completely, the action is to take
                  * individual devices offline and flush the queue of any
@@ -1642,16 +1576,13 @@ int scsi_error_handler(void *data)
                  * which are still online.
                  */
                 scsi_restart_operations(shost);
-
+               set_current_state(TASK_INTERRUPTIBLE);
         }
+       __set_current_state(TASK_RUNNING);
  
-       SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
-                                         " exiting\n",shost->host_no));
-
-       /*
-        * Make sure that nobody tries to wake us up again.
-        */
-       shost->eh_wait = NULL;
+       SCSI_LOG_ERROR_RECOVERY(1,
+               printk("Error handler scsi_eh_%d exiting\n", shost->host_no));
+       shost->ehandler = NULL;
         return 0;
  }
  
@@ -1681,7 +1612,7 @@ void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
         struct scsi_device *sdev;
  
         __shost_for_each_device(sdev, shost) {
-               if (channel == sdev->channel) {
+               if (channel == sdev_channel(sdev)) {
                         sdev->was_reset = 1;
                         sdev->expecting_cc_ua = 1;
                 }
@@ -1716,8 +1647,8 @@ void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
         struct scsi_device *sdev;
  
         __shost_for_each_device(sdev, shost) {
-               if (channel == sdev->channel &&
-                   target == sdev->id) {
+               if (channel == sdev_channel(sdev) &&
+                   target == sdev_id(sdev)) {
                         sdev->was_reset = 1;
                         sdev->expecting_cc_ua = 1;
                 }