# HG changeset patch # Parent 013ee68c7aa8437d13b564b1e092a32e10c870b0 Implement "NCQ TRIM" by using Send First Party DMA commands with a Dataset Management subcommand and setting the TRIM bit. For drives that support this method of TRIM, better latency is expected because TRIMs no longer act as a gating event forcing the queue to drain before they are executed and holding off new commands while the TRIM finishes. We still impose a one-at-a-time limit of the TRIMs, though that could be relaxed (though you start running into fairness issues if you have a lot of TRIMs competing with other I/Os), so that optimization is left for the future. * * * Add a flag for supporting the NCQ DSM TRIM extention to the ata_cmd structure. It may be gross, but at least it is now safe. diff -r 013ee68c7aa8 sys/cam/ata/ata_all.h --- a/sys/cam/ata/ata_all.h +++ b/sys/cam/ata/ata_all.h @@ -46,6 +46,7 @@ struct ata_cmd { #define CAM_ATAIO_CONTROL 0x04 /* Control, not a command */ #define CAM_ATAIO_NEEDRESULT 0x08 /* Request requires result. */ #define CAM_ATAIO_DMA 0x10 /* DMA command */ +#define CAM_ATAIO_AUX_HACK 0x20 /* Kludge to make FPDMA DSM TRIM work */ u_int8_t command; u_int8_t features; diff -r 013ee68c7aa8 sys/cam/ata/ata_da.c --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -87,7 +87,8 @@ typedef enum { ADA_FLAG_CAN_CFA = 0x0400, ADA_FLAG_CAN_POWERMGT = 0x0800, ADA_FLAG_CAN_DMA48 = 0x1000, - ADA_FLAG_DIRTY = 0x2000 + ADA_FLAG_DIRTY = 0x2000, + ADA_FLAG_CAN_NCQ_TRIM = 0x4000 /* CAN_TRIM also set */ } ada_flags; typedef enum { @@ -1018,10 +1019,22 @@ adaasync(void *callback_arg, u_int32_t c else softc->flags &= ~ADA_FLAG_CAN_NCQ; if ((cgd.ident_data.support_dsm & ATA_SUPPORT_DSM_TRIM) && - (cgd.inq_flags & SID_DMA)) + (cgd.inq_flags & SID_DMA)) { softc->flags |= ADA_FLAG_CAN_TRIM; - else - softc->flags &= ~ADA_FLAG_CAN_TRIM; + /* + * If we can do RCVSND_FPDMA_QUEUED commands, we may be able to do + * NCQ trims, if we support trims at all. We also need support from + * the sim do do things properly. Perhaps we should look at log 13 + * dword 0 bit 0 and dword 1 bit 0 are set too... + */ + if (/* (cpi.hba_misc & PIM_NCQ_KLUDGE) != 0 && */ /* Don't know how to do this here */ + (cgd.ident_data.satacapabilities2 & ATA_SUPPORT_RCVSND_FPDMA_QUEUED) != 0 && + (softc->flags & ADA_FLAG_CAN_TRIM) != 0) + softc->flags |= ADA_FLAG_CAN_NCQ_TRIM; + else + softc->flags &= ~ADA_FLAG_CAN_NCQ_TRIM; + } else + softc->flags &= ~(ADA_FLAG_CAN_TRIM | ADA_FLAG_CAN_NCQ_TRIM); cam_periph_async(periph, code, path, arg); break; @@ -1293,6 +1306,16 @@ adaregister(struct cam_periph *periph, v softc->disk->d_delmaxsize = maxio; if ((cpi.hba_misc & PIM_UNMAPPED) != 0) softc->disk->d_flags |= DISKFLAG_UNMAPPED_BIO; + /* + * If we can do RCVSND_FPDMA_QUEUED commands, we may be able to do + * NCQ trims, if we support trims at all. We also need support from + * the sim do do things properly. Perhaps we should look at log 13 + * dword 0 bit 0 and dword 1 bit 0 are set too... + */ + if ((cpi.hba_misc & PIM_NCQ_KLUDGE) != 0 && + (cgd->ident_data.satacapabilities2 & ATA_SUPPORT_RCVSND_FPDMA_QUEUED) != 0 && + (softc->flags & ADA_FLAG_CAN_TRIM) != 0) + softc->flags |= ADA_FLAG_CAN_NCQ_TRIM; strlcpy(softc->disk->d_descr, cgd->ident_data.model, MIN(sizeof(softc->disk->d_descr), sizeof(cgd->ident_data.model))); strlcpy(softc->disk->d_ident, cgd->ident_data.serial, @@ -1410,10 +1433,9 @@ adaregister(struct cam_periph *periph, v return(CAM_REQ_CMP); } -static void -ada_dsmtrim(struct ada_softc *softc, struct bio *bp, struct ccb_ataio *ataio) +static int +ada_dsmtrim_req_create(struct ada_softc *softc, struct bio *bp, struct trim_request *req) { - struct trim_request *req = &softc->trim_req; uint64_t lastlba = (uint64_t)-1; int c, lastcount = 0, off, ranges = 0; @@ -1466,6 +1488,18 @@ ada_dsmtrim(struct ada_softc *softc, str (softc->trim_max_ranges - ranges) * ATA_DSM_RANGE_MAX) break; } while (1); + + return (ranges); +} + +static void +ada_dsmtrim(struct ada_softc *softc, struct bio *bp, struct ccb_ataio *ataio) +{ + struct trim_request *req = &softc->trim_req; + int ranges; + + printf("Doing DSM Trim\n"); + ranges = ada_dsmtrim_req_create(softc, bp, req); cam_fill_ataio(ataio, ada_retry_count, adadone, @@ -1480,6 +1514,54 @@ ada_dsmtrim(struct ada_softc *softc, str 1) / ATA_DSM_BLK_RANGES); } +#ifdef SATAFIS +static void +ada_ncq_dsmtrim(struct ada_softc *softc, struct bio *bp, struct ccb_satafisio *satafisio) +#else +static void +ada_ncq_dsmtrim(struct ada_softc *softc, struct bio *bp, struct ccb_ataio *ataio) +#endif +{ + struct trim_request *req = &softc->trim_req; + int ranges; + + printf("Doing NCQ DSM Trim\n"); + ranges = ada_dsmtrim_req_create(softc, bp, req); +#ifdef SATAFIS + cam_fill_satafisio(satafisio, + ada_retry_count, + adadone, + CAM_DIR_OUT, + 0, + req->data, + ((ranges + ATA_DSM_BLK_RANGES - 1) / + ATA_DSM_BLK_RANGES) * ATA_DSM_BLK_SIZE, + ada_default_timeout * 1000); + ata_h2d_fis_cmd(satafisio, + ATA_SEND_FPDMA_QUEUED, + ATA_SFPDMA_DSM, + 0, /* Reserved -> 0 */ + (ranges + ATA_DSM_BLK_RANGES - 1) / ATA_DSM_BLK_RANGES, + ATA_SFPDMA_DSM_TRIM); +#else + cam_fill_ataio(ataio, + ada_retry_count, + adadone, + CAM_DIR_OUT, + 0, + req->data, + ((ranges + ATA_DSM_BLK_RANGES - 1) / + ATA_DSM_BLK_RANGES) * ATA_DSM_BLK_SIZE, + ada_default_timeout * 1000); + ata_ncq_cmd(ataio, + ATA_SEND_FPDMA_QUEUED, + 0, + (ranges + ATA_DSM_BLK_RANGES - 1) / ATA_DSM_BLK_RANGES); + ataio->cmd.sector_count_exp = ATA_SFPDMA_DSM; + ataio->cmd.flags |= CAM_ATAIO_AUX_HACK; +#endif +} + static void ada_cfaerase(struct ada_softc *softc, struct bio *bp, struct ccb_ataio *ataio) { @@ -1511,6 +1593,9 @@ adastart(struct cam_periph *periph, unio { struct ada_softc *softc = (struct ada_softc *)periph->softc; struct ccb_ataio *ataio = &start_ccb->ataio; +#ifdef SATAFIS + struct ccb_satafisio *satafisio = &start_ccb->satafisio; +#endif CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("adastart\n")); @@ -1523,7 +1608,13 @@ adastart(struct cam_periph *periph, unio /* Run TRIM if not running yet. */ if (!softc->trim_running && (bp = bioq_first(&softc->trim_queue)) != 0) { - if (softc->flags & ADA_FLAG_CAN_TRIM) { + if (softc->flags & ADA_FLAG_CAN_NCQ_TRIM) { +#ifdef SATAFIS + ada_ncq_dsmtrim(softc, bp, satafisio); +#else + ada_ncq_dsmtrim(softc, bp, ataio); +#endif + } else if (softc->flags & ADA_FLAG_CAN_TRIM) { ada_dsmtrim(softc, bp, ataio); } else if ((softc->flags & ADA_FLAG_CAN_CFA) && !(softc->flags & ADA_FLAG_CAN_48BIT)) { @@ -1749,6 +1840,7 @@ adadone(struct cam_periph *periph, union int error; cam_periph_lock(periph); + bp = (struct bio *)done_ccb->ccb_h.ccb_bp; if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { error = adaerror(done_ccb, 0, 0); if (error == ERESTART) { @@ -1762,12 +1854,25 @@ adadone(struct cam_periph *periph, union /*reduction*/0, /*timeout*/0, /*getcount_only*/0); + /* + * If we get an error on an NCQ DSM TRIM, fall back + * to a non-NCQ DSM TRIM forever. Please note that if + * CAN_NCQ_TRIM is set, CAN_TRIM is necessarily set too. + */ + if (state == ADA_CCB_TRIM && + error != 0 && + (softc->flags & ADA_FLAG_CAN_NCQ_TRIM) != 0) { + softc->flags &= ~ADA_FLAG_CAN_NCQ_TRIM; + ada_dsmtrim(softc, bp, ataio); + adaschedule(periph); /* unsure if I need to schedule here */ + cam_periph_unlock(periph); + return; + } } else { if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) panic("REQ_CMP with QFRZN"); error = 0; } - bp = (struct bio *)done_ccb->ccb_h.ccb_bp; bp->bio_error = error; if (error != 0) { bp->bio_resid = bp->bio_bcount; diff -r 013ee68c7aa8 sys/cam/cam_ccb.h --- a/sys/cam/cam_ccb.h +++ b/sys/cam/cam_ccb.h @@ -573,6 +573,7 @@ typedef enum { } pi_tmflag; typedef enum { + PIM_NCQ_KLUDGE = 0x200, /* Supports the sata ncq trim kludge */ PIM_EXTLUNS = 0x100,/* 64bit extended LUNs supported */ PIM_SCANHILO = 0x80, /* Bus scans from high ID to low ID */ PIM_NOREMOVE = 0x40, /* Removeable devices not included in scan */ diff -r 013ee68c7aa8 sys/dev/ahci/ahci.c --- a/sys/dev/ahci/ahci.c +++ b/sys/dev/ahci/ahci.c @@ -2361,6 +2361,9 @@ ahci_setup_fis(struct ahci_channel *ch, fis[13] = ccb->ataio.cmd.sector_count_exp; } fis[15] = ATA_A_4BIT; + /* Gross and vile hack -- makes ncq trim work w/o changing ataio size */ + if (ccb->ataio.cmd.flags & CAM_ATAIO_AUX_HACK) + fis[16] = 1; } else { fis[15] = ccb->ataio.cmd.control; } @@ -2618,7 +2621,7 @@ ahciaction(struct cam_sim *sim, union cc if (ch->caps & AHCI_CAP_SPM) cpi->hba_inquiry |= PI_SATAPM; cpi->target_sprt = 0; - cpi->hba_misc = PIM_SEQSCAN | PIM_UNMAPPED; + cpi->hba_misc = PIM_SEQSCAN | PIM_UNMAPPED | PIM_NCQ_KLUDGE; cpi->hba_eng_cnt = 0; if (ch->caps & AHCI_CAP_SPM) cpi->max_target = 15;