Index: cam_periph.c =================================================================== --- cam_periph.c (revision 220512) +++ cam_periph.c (working copy) @@ -1047,7 +1047,6 @@ } #define saved_ccb_ptr ppriv_ptr0 -#define recovery_depth ppriv_field1 static void camperiphsensedone(struct cam_periph *periph, union ccb *done_ccb) { @@ -1055,21 +1054,13 @@ cam_status status; int frozen = 0; u_int sense_key; - int depth = done_ccb->ccb_h.recovery_depth; status = done_ccb->ccb_h.status; - if (status & CAM_DEV_QFRZN) { - frozen = 1; - /* - * Clear freeze flag now for case of retry, - * freeze will be dropped later. - */ - done_ccb->ccb_h.status &= ~CAM_DEV_QFRZN; - } + frozen = (status & CAM_DEV_QFRZN) != 0; status &= CAM_STATUS_MASK; switch (status) { case CAM_REQ_CMP: - { + /* * If we manually retrieved sense into a CCB and got * something other than "NO SENSE" send the updated CCB @@ -1079,19 +1070,15 @@ sense_key = saved_ccb->csio.sense_data.flags; sense_key &= SSD_KEY; if (sense_key != SSD_KEY_NO_SENSE) { - saved_ccb->ccb_h.status |= - CAM_AUTOSNS_VALID; + saved_ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } else { - saved_ccb->ccb_h.status &= - ~CAM_STATUS_MASK; - saved_ccb->ccb_h.status |= - CAM_AUTOSENSE_FAIL; + saved_ccb->ccb_h.status &= ~CAM_STATUS_MASK; + saved_ccb->ccb_h.status |= CAM_AUTOSENSE_FAIL; } saved_ccb->csio.sense_resid = done_ccb->csio.resid; bcopy(saved_ccb, done_ccb, sizeof(union ccb)); xpt_free_ccb(saved_ccb); break; - } default: bcopy(saved_ccb, done_ccb, sizeof(union ccb)); xpt_free_ccb(saved_ccb); @@ -1101,29 +1088,26 @@ } periph->flags &= ~CAM_PERIPH_SENSE_INPROG; /* - * If it is the end of recovery, drop freeze, taken due to - * CAM_DEV_QFREEZE flag, set on recovery request. + * At this point we should hold one freeze obtained because of the + * CAM_DEV_QFREEZE flag on sense fetching start, and may have another + * one if REQUEST SENSE has failed. We can't return two freezes to + * the peripheral callback, so report one and drop extra. */ - if (depth == 0) { + if (frozen != 0) { cam_release_devq(done_ccb->ccb_h.path, /*relsim_flags*/0, /*openings*/0, /*timeout*/0, /*getcount_only*/0); } - /* - * Copy frozen flag from recovery request if it is set there - * for some reason. - */ - if (frozen != 0) - done_ccb->ccb_h.status |= CAM_DEV_QFRZN; + done_ccb->ccb_h.status |= CAM_DEV_QFRZN; (*done_ccb->ccb_h.cbfcnp)(periph, done_ccb); } static void camperiphdone(struct cam_periph *periph, union ccb *done_ccb) { - union ccb *saved_ccb, *save_ccb; + union ccb *saved_ccb; cam_status status; int frozen = 0; struct scsi_start_stop_unit *scsi_cmd; @@ -1162,99 +1146,71 @@ goto final; } case CAM_SCSI_STATUS_ERROR: + { + struct ccb_getdev cgd; + struct scsi_sense_data *sense; + int error_code, sense_key, asc, ascq; + scsi_sense_action err_action; + + if ((status & CAM_AUTOSNS_VALID) == 0) + goto final; + scsi_cmd = (struct scsi_start_stop_unit *) - &done_ccb->csio.cdb_io.cdb_bytes; - if (status & CAM_AUTOSNS_VALID) { - struct ccb_getdev cgd; - struct scsi_sense_data *sense; - int error_code, sense_key, asc, ascq; - scsi_sense_action err_action; + &done_ccb->csio.cdb_io.cdb_bytes; + sense = &done_ccb->csio.sense_data; + scsi_extract_sense(sense, &error_code, + &sense_key, &asc, &ascq); + /* + * Grab the inquiry data for this device. + */ + xpt_setup_ccb(&cgd.ccb_h, done_ccb->ccb_h.path, + CAM_PRIORITY_NORMAL); + cgd.ccb_h.func_code = XPT_GDEV_TYPE; + xpt_action((union ccb *)&cgd); + err_action = scsi_error_action(&done_ccb->csio, + &cgd.inq_data, 0); + /* + * If the error is "invalid field in CDB", and the load/eject + * flag is set, turn the flag off and try again. This is + * just in case the drive in question barfs on the load eject + * flag. The CAM code should set the load/eject flag by + * default for removable media. + */ + /* XXX KDM + * Should we check to see what the specific scsi status is?? + * Or does it not matter since we already know that there was + * an error, and we know what the specific error code was, + * and we know what the opcode is.. + */ + if ((scsi_cmd->opcode == START_STOP_UNIT) && + ((scsi_cmd->how & SSS_LOEJ) != 0) && + (asc == 0x24) && (ascq == 0x00) && + (done_ccb->ccb_h.retry_count > 0)) { + scsi_cmd->how &= ~SSS_LOEJ; + xpt_action(done_ccb); + } else if ((done_ccb->ccb_h.retry_count > 1) + && ((err_action & SS_MASK) != SS_FAIL)) { - sense = &done_ccb->csio.sense_data; - scsi_extract_sense(sense, &error_code, - &sense_key, &asc, &ascq); /* - * Grab the inquiry data for this device. + * In this case, the error recovery command failed, + * but we've got some retries left on it. Give + * it another try unless this is an unretryable error. */ - xpt_setup_ccb(&cgd.ccb_h, done_ccb->ccb_h.path, - CAM_PRIORITY_NORMAL); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); - err_action = scsi_error_action(&done_ccb->csio, - &cgd.inq_data, 0); - /* - * If the error is "invalid field in CDB", - * and the load/eject flag is set, turn the - * flag off and try again. This is just in - * case the drive in question barfs on the - * load eject flag. The CAM code should set - * the load/eject flag by default for - * removable media. - */ - /* XXX KDM - * Should we check to see what the specific - * scsi status is?? Or does it not matter - * since we already know that there was an - * error, and we know what the specific - * error code was, and we know what the - * opcode is.. - */ - if ((scsi_cmd->opcode == START_STOP_UNIT) && - ((scsi_cmd->how & SSS_LOEJ) != 0) && - (asc == 0x24) && (ascq == 0x00) && - (done_ccb->ccb_h.retry_count > 0)) { - - scsi_cmd->how &= ~SSS_LOEJ; - xpt_action(done_ccb); - } else if ((done_ccb->ccb_h.retry_count > 1) - && ((err_action & SS_MASK) != SS_FAIL)) { - - /* - * In this case, the error recovery - * command failed, but we've got - * some retries left on it. Give - * it another try unless this is an - * unretryable error. - */ - /* set the timeout to .5 sec */ - relsim_flags = - RELSIM_RELEASE_AFTER_TIMEOUT; - timeout = 500; - xpt_action(done_ccb); - break; - } else { - /* - * Perform the final retry with the original - * CCB so that final error processing is - * performed by the owner of the CCB. - */ - goto final; - } + /* set the timeout to .5 sec */ + relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT; + timeout = 500; + xpt_action(done_ccb); + break; } else { - save_ccb = xpt_alloc_ccb_nowait(); - if (save_ccb == NULL) - goto final; - bcopy(done_ccb, save_ccb, sizeof(*save_ccb)); - periph->flags |= CAM_PERIPH_SENSE_INPROG; /* - * Send a Request Sense to the device. We - * assume that we are in a contingent allegiance - * condition so we do not tag this request. + * Perform the final retry with the original + * CCB so that final error processing is + * performed by the owner of the CCB. */ - scsi_request_sense(&done_ccb->csio, /*retries*/1, - camperiphsensedone, - &save_ccb->csio.sense_data, - save_ccb->csio.sense_len, - CAM_TAG_ACTION_NONE, - /*sense_len*/SSD_FULL_SIZE, - /*timeout*/5000); - done_ccb->ccb_h.pinfo.priority--; - done_ccb->ccb_h.flags |= CAM_DEV_QFREEZE; - done_ccb->ccb_h.saved_ccb_ptr = save_ccb; - done_ccb->ccb_h.recovery_depth++; - xpt_action(done_ccb); + goto final; } break; + } default: final: bcopy(saved_ccb, done_ccb, sizeof(*done_ccb)); @@ -1376,64 +1332,6 @@ action_string); break; case SCSI_STATUS_QUEUE_FULL: - { - /* no decrement */ - struct ccb_getdevstats cgds; - - /* - * First off, find out what the current - * transaction counts are. - */ - xpt_setup_ccb(&cgds.ccb_h, - ccb->ccb_h.path, - CAM_PRIORITY_NORMAL); - cgds.ccb_h.func_code = XPT_GDEV_STATS; - xpt_action((union ccb *)&cgds); - - /* - * If we were the only transaction active, treat - * the QUEUE FULL as if it were a BUSY condition. - */ - if (cgds.dev_active != 0) { - int total_openings; - - /* - * Reduce the number of openings to - * be 1 less than the amount it took - * to get a queue full bounded by the - * minimum allowed tag count for this - * device. - */ - total_openings = cgds.dev_active + cgds.dev_openings; - *openings = cgds.dev_active; - if (*openings < cgds.mintags) - *openings = cgds.mintags; - if (*openings < total_openings) - *relsim_flags = RELSIM_ADJUST_OPENINGS; - else { - /* - * Some devices report queue full for - * temporary resource shortages. For - * this reason, we allow a minimum - * tag count to be entered via a - * quirk entry to prevent the queue - * count on these devices from falling - * to a pessimisticly low value. We - * still wait for the next successful - * completion, however, before queueing - * more transactions to the device. - */ - *relsim_flags = RELSIM_RELEASE_AFTER_CMDCMPLT; - } - *timeout = 0; - error = ERESTART; - if (bootverbose) { - xpt_print(ccb->ccb_h.path, "Queue full\n"); - } - break; - } - /* FALLTHROUGH */ - } case SCSI_STATUS_BUSY: /* * Restart the queue after either another @@ -1507,8 +1405,6 @@ err_action = scsi_error_action(&ccb->csio, &cgd.inq_data, sense_flags); - else if ((ccb->ccb_h.flags & CAM_DIS_AUTOSENSE) == 0) - err_action = SS_REQSENSE; else err_action = SS_RETRY|SSQ_DECREMENT_COUNT|EIO; @@ -1632,9 +1528,68 @@ *timeout = 500; break; } - case SS_REQSENSE: - { - *action_string = "Requesting SCSI sense data"; + default: + panic("Unhandled error action %x", err_action); + } + + if ((err_action & SS_MASK) >= SS_START) { + /* + * Drop the priority, so that the recovery + * CCB is the first to execute. Freeze the queue + * after this command is sent so that we can + * restore the old csio and have it queued in + * the proper order before we release normal + * transactions to the device. + */ + ccb->ccb_h.pinfo.priority--; + ccb->ccb_h.flags |= CAM_DEV_QFREEZE; + ccb->ccb_h.saved_ccb_ptr = orig_ccb; + error = ERESTART; + } + +sense_error_done: + if ((err_action & SSQ_PRINT_SENSE) != 0 + && (ccb->ccb_h.status & CAM_AUTOSNS_VALID) != 0) + cam_error_print(orig_ccb, CAM_ESF_ALL, CAM_EPF_ALL); + } + return (error); +} + +/* + * Generic XPT error handler. Called before peripheral error handler to + * handle XPT-specific cases, such as sense fetching and full queue. + */ +int +cam_xpt_error(union ccb *ccb) +{ + struct cam_periph *periph; + union ccb *orig_ccb; + cam_status status; + int error, frozen, openings; + u_int32_t relsim_flags, timeout; + + error = 0; + relsim_flags = 0; + openings = 0; + timeout = 0; + periph = xpt_path_periph(ccb->ccb_h.path); + status = ccb->ccb_h.status; + frozen = (status & CAM_DEV_QFRZN) != 0; + status &= CAM_STATUS_MASK; + switch (status) { + case CAM_SCSI_STATUS_ERROR: + switch (ccb->csio.scsi_status) { + case SCSI_STATUS_CMD_TERMINATED: + case SCSI_STATUS_CHECK_COND: + if ((ccb->ccb_h.status & CAM_AUTOSNS_VALID) != 0 || + (ccb->ccb_h.flags & CAM_DIS_AUTOSENSE) != 0) + break; + if (periph->flags & CAM_PERIPH_SENSE_INPROG) + break; + orig_ccb = xpt_alloc_ccb_nowait(); + if (orig_ccb == NULL) + break; + bcopy(ccb, orig_ccb, sizeof(*orig_ccb)); periph->flags |= CAM_PERIPH_SENSE_INPROG; /* * Send a Request Sense to the device. We @@ -1648,13 +1603,6 @@ CAM_TAG_ACTION_NONE, /*sense_len*/SSD_FULL_SIZE, /*timeout*/5000); - break; - } - default: - panic("Unhandled error action %x", err_action); - } - - if ((err_action & SS_MASK) >= SS_START) { /* * Drop the priority, so that the recovery * CCB is the first to execute. Freeze the queue @@ -1666,15 +1614,123 @@ ccb->ccb_h.pinfo.priority--; ccb->ccb_h.flags |= CAM_DEV_QFREEZE; ccb->ccb_h.saved_ccb_ptr = orig_ccb; - ccb->ccb_h.recovery_depth = 0; error = ERESTART; + break; + case SCSI_STATUS_QUEUE_FULL: + { + struct ccb_getdevstats cgds; + int total_openings; + + /* + * First off, find out what the current + * transaction counts are. + */ + xpt_setup_ccb(&cgds.ccb_h, + ccb->ccb_h.path, + CAM_PRIORITY_NORMAL); + cgds.ccb_h.func_code = XPT_GDEV_STATS; + xpt_action((union ccb *)&cgds); + + /* + * If we were the only transaction active, treat + * the QUEUE FULL as if it were a BUSY condition. + */ + if (cgds.dev_active == 0) + break; + + /* + * Reduce the number of openings to + * be 1 less than the amount it took + * to get a queue full bounded by the + * minimum allowed tag count for this + * device. + */ + total_openings = cgds.dev_active + cgds.dev_openings; + openings = cgds.dev_active; + if (openings < cgds.mintags) + openings = cgds.mintags; + if (openings < total_openings) + relsim_flags = RELSIM_ADJUST_OPENINGS; + else { + /* + * Some devices report queue full for + * temporary resource shortages. For + * this reason, we allow a minimum + * tag count to be entered via a + * quirk entry to prevent the queue + * count on these devices from falling + * to a pessimisticly low value. We + * still wait for the next successful + * completion, however, before queueing + * more transactions to the device. + */ + relsim_flags = RELSIM_RELEASE_AFTER_CMDCMPLT; + } + error = ERESTART; + if (bootverbose) { + xpt_print(ccb->ccb_h.path, + "Queue full, limiting to %d openings.\n", + openings); + } + break; } + default: + break; + } + break; + case CAM_SEL_TIMEOUT: + { + struct cam_path *newpath; -sense_error_done: - if ((err_action & SSQ_PRINT_SENSE) != 0 - && (ccb->ccb_h.status & CAM_AUTOSNS_VALID) != 0) - cam_error_print(orig_ccb, CAM_ESF_ALL, CAM_EPF_ALL); + if ((periph->flags & CAM_PERIPH_INVALID) == 0 && + ccb->ccb_h.retry_count > 0) { + + ccb->ccb_h.retry_count--; + error = ERESTART; + if (bootverbose) { + xpt_print(ccb->ccb_h.path, + "Selection timeout\n"); + } + + /* + * Wait a bit to give the device + * time to recover before we try again. + */ + relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT; + timeout = periph_selto_delay; + break; + } + /* Should we do more if we can't create the path?? */ + if (xpt_create_path(&newpath, xpt_path_periph(ccb->ccb_h.path), + xpt_path_path_id(ccb->ccb_h.path), + xpt_path_target_id(ccb->ccb_h.path), + CAM_LUN_WILDCARD) != CAM_REQ_CMP) + break; + + /* + * Let peripheral drivers know that this device has gone + * away. + */ + xpt_async(AC_LOST_DEVICE, newpath, NULL); + xpt_free_path(newpath); + break; } + default: + break; + } + + /* Run recovery if requested. */ + if (error == ERESTART) { + if (frozen) + ccb->ccb_h.status &= ~CAM_DEV_QFRZN; +xpt_print(ccb->ccb_h.path, "Doing XPT recovery\n"); + xpt_action(ccb); + if (frozen) + cam_release_devq(ccb->ccb_h.path, + relsim_flags, openings, timeout, + /*getcount_only*/0); + } + return (error); } @@ -1773,45 +1829,12 @@ error = EIO; break; case CAM_SEL_TIMEOUT: - { - struct cam_path *newpath; - - if ((camflags & CAM_RETRY_SELTO) != 0) { - if (ccb->ccb_h.retry_count > 0) { - - ccb->ccb_h.retry_count--; - error = ERESTART; - if (bootverbose && printed == 0) { - xpt_print(ccb->ccb_h.path, - "Selection timeout\n"); - printed++; - } - - /* - * Wait a bit to give the device - * time to recover before we try again. - */ - relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT; - timeout = periph_selto_delay; - break; - } + error = ENXIO; + if (bootverbose && printed == 0) { + xpt_print(ccb->ccb_h.path, "Selection timeout\n"); + printed++; } - error = ENXIO; - /* Should we do more if we can't create the path?? */ - if (xpt_create_path(&newpath, xpt_path_periph(ccb->ccb_h.path), - xpt_path_path_id(ccb->ccb_h.path), - xpt_path_target_id(ccb->ccb_h.path), - CAM_LUN_WILDCARD) != CAM_REQ_CMP) - break; - - /* - * Let peripheral drivers know that this device has gone - * away. - */ - xpt_async(AC_LOST_DEVICE, newpath, NULL); - xpt_free_path(newpath); break; - } case CAM_REQ_INVALID: case CAM_PATH_INVALID: case CAM_DEV_NOT_THERE: Index: cam_periph.h =================================================================== --- cam_periph.h (revision 220512) +++ cam_periph.h (working copy) @@ -181,6 +181,7 @@ u_int duration_ms); int cam_periph_error(union ccb *ccb, cam_flags camflags, u_int32_t sense_flags, union ccb *save_ccb); +int cam_xpt_error(union ccb *ccb); static __inline void cam_periph_lock(struct cam_periph *periph) Index: scsi/scsi_all.h =================================================================== --- scsi/scsi_all.h (revision 220512) +++ scsi/scsi_all.h (working copy) @@ -73,9 +73,6 @@ SS_TUR = 0x040000, /* Send a Test Unit Ready command to the * device, then retry the original command. */ - SS_REQSENSE = 0x050000, /* Send a RequestSense command to the - * device, then retry the original command. - */ SS_MASK = 0xff0000 } scsi_sense_action; Index: cam_xpt.c =================================================================== --- cam_xpt.c (revision 220512) +++ cam_xpt.c (working copy) @@ -4827,16 +4827,21 @@ runq = FALSE; } - if ((ccb_h->flags & CAM_DEV_QFRZDIS) - && (ccb_h->status & CAM_DEV_QFRZN)) { - xpt_release_devq(ccb_h->path, /*count*/1, - /*run_queue*/TRUE); - ccb_h->status &= ~CAM_DEV_QFRZN; - } else if (runq) { + if (runq) xpt_run_dev_sendq(ccb_h->path->bus); + + if (cam_xpt_error((union ccb *)ccb_h) != ERESTART) { + + if ((ccb_h->flags & CAM_DEV_QFRZDIS) && + (ccb_h->status & CAM_DEV_QFRZN)) { + xpt_release_devq(ccb_h->path, /*count*/1, + /*run_queue*/TRUE); + ccb_h->status &= ~CAM_DEV_QFRZN; + } + + /* Call the peripheral driver's callback */ + (*ccb_h->cbfcnp)(ccb_h->path->periph, + (union ccb *)ccb_h); } - - /* Call the peripheral driver's callback */ - (*ccb_h->cbfcnp)(ccb_h->path->periph, (union ccb *)ccb_h); } }