Index: ata-all.h =================================================================== --- ata-all.h (revision 245046) +++ ata-all.h (working copy) @@ -698,6 +698,8 @@ MALLOC_DECLARE(M_ATA); #define ATA_INW(res, offset) \ bus_read_2((res), (offset)) +#define ATA_INW_STRM(res, offset) \ + bus_read_stream_2((res), (offset)) #define ATA_INL(res, offset) \ bus_read_4((res), (offset)) #define ATA_INSW(res, offset, addr, count) \ @@ -712,6 +714,8 @@ MALLOC_DECLARE(M_ATA); bus_write_1((res), (offset), (value)) #define ATA_OUTW(res, offset, value) \ bus_write_2((res), (offset), (value)) +#define ATA_OUTW_STRM(res, offset, value) \ + bus_write_stream_2((res), (offset), (value)) #define ATA_OUTL(res, offset, value) \ bus_write_4((res), (offset), (value)) #define ATA_OUTSW(res, offset, addr, count) \ @@ -729,6 +733,9 @@ MALLOC_DECLARE(M_ATA); #define ATA_IDX_INW(ch, idx) \ ATA_INW(ch->r_io[idx].res, ch->r_io[idx].offset) +#define ATA_IDX_INW_STRM(ch, idx) \ + ATA_INW_STRM(ch->r_io[idx].res, ch->r_io[idx].offset) + #define ATA_IDX_INL(ch, idx) \ ATA_INL(ch->r_io[idx].res, ch->r_io[idx].offset) @@ -750,6 +757,9 @@ MALLOC_DECLARE(M_ATA); #define ATA_IDX_OUTW(ch, idx, value) \ ATA_OUTW(ch->r_io[idx].res, ch->r_io[idx].offset, value) +#define ATA_IDX_OUTW_STRM(ch, idx, value) \ + ATA_OUTW_STRM(ch->r_io[idx].res, ch->r_io[idx].offset, value) + #define ATA_IDX_OUTL(ch, idx, value) \ ATA_OUTL(ch->r_io[idx].res, ch->r_io[idx].offset, value) Index: ata-lowlevel.c =================================================================== --- ata-lowlevel.c (revision 245047) +++ ata-lowlevel.c (working copy) @@ -846,14 +846,28 @@ ata_pio_read(struct ata_request *request, int leng uint8_t *addr; int size = min(request->transfersize, length); int resid; - uint8_t buf[2]; + uint8_t buf[2] __aligned(sizeof(int16_t)); +#ifndef __NO_STRICT_ALIGNMENT + int i; +#endif addr = (uint8_t *)request->data + request->donecount; - if (ch->flags & ATA_USE_16BIT || (size % sizeof(int32_t)) || - ((uintptr_t)addr % sizeof(int32_t))) { - ATA_IDX_INSW_STRM(ch, ATA_DATA, (void*)addr, size / sizeof(int16_t)); + if (__predict_false(ch->flags & ATA_USE_16BIT || + (size % sizeof(int32_t)) || ((uintptr_t)addr % sizeof(int32_t)))) { +#ifndef __NO_STRICT_ALIGNMENT + if (__predict_false((uintptr_t)addr % sizeof(int16_t))) { + for (i = 0, resid = size & ~1; resid > 0; resid -= + sizeof(int16_t)) { + *(uint16_t *)&buf = ATA_IDX_INW_STRM(ch, ATA_DATA); + addr[i++] = buf[0]; + addr[i++] = buf[1]; + } + } else +#endif + ATA_IDX_INSW_STRM(ch, ATA_DATA, (void*)addr, size / + sizeof(int16_t)); if (size & 1) { - ATA_IDX_INSW_STRM(ch, ATA_DATA, (void*)buf, 1); + *(uint16_t *)&buf = ATA_IDX_INW_STRM(ch, ATA_DATA); (addr + (size & ~1))[0] = buf[0]; } } else @@ -875,15 +889,30 @@ ata_pio_write(struct ata_request *request, int len uint8_t *addr; int size = min(request->transfersize, length); int resid; - uint8_t buf[2]; + uint8_t buf[2] __aligned(sizeof(int16_t)); +#ifndef __NO_STRICT_ALIGNMENT + int i; +#endif + size = min(request->transfersize, length); addr = (uint8_t *)request->data + request->donecount; - if (ch->flags & ATA_USE_16BIT || (size % sizeof(int32_t)) || - ((uintptr_t)addr % sizeof(int32_t))) { - ATA_IDX_OUTSW_STRM(ch, ATA_DATA, (void*)addr, size / sizeof(int16_t)); + if (__predict_false(ch->flags & ATA_USE_16BIT || + (size % sizeof(int32_t)) || ((uintptr_t)addr % sizeof(int32_t)))) { +#ifndef __NO_STRICT_ALIGNMENT + if (__predict_false((uintptr_t)addr % sizeof(int16_t))) { + for (i = 0, resid = size & ~1; resid > 0; resid -= + sizeof(int16_t)) { + buf[0] = addr[i++]; + buf[1] = addr[i++]; + ATA_IDX_OUTW_STRM(ch, ATA_DATA, *(uint16_t *)&buf); + } + } else +#endif + ATA_IDX_OUTSW_STRM(ch, ATA_DATA, (void*)addr, size / + sizeof(int16_t)); if (size & 1) { buf[0] = (addr + (size & ~1))[0]; - ATA_IDX_OUTSW_STRM(ch, ATA_DATA, (void*)buf, 1); + ATA_IDX_OUTW_STRM(ch, ATA_DATA, *(uint16_t *)&buf); } } else ATA_IDX_OUTSL_STRM(ch, ATA_DATA, (void*)addr, size / sizeof(int32_t));