FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 00023 * Use is subject to license terms. 00024 */ 00025 00026 /* 00027 * Copyright (c) 2012 by Delphix. All rights reserved. 00028 */ 00029 00030 #include <sys/spa.h> 00031 #include <sys/spa_impl.h> 00032 #include <sys/vdev.h> 00033 #include <sys/vdev_impl.h> 00034 #include <sys/zio.h> 00035 #include <sys/zio_checksum.h> 00036 00037 #include <sys/fm/fs/zfs.h> 00038 #include <sys/fm/protocol.h> 00039 #include <sys/fm/util.h> 00040 #include <sys/sysevent.h> 00041 00104 #ifdef _KERNEL 00105 static void 00106 zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, 00107 const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, 00108 uint64_t stateoroffset, uint64_t size) 00109 { 00110 nvlist_t *ereport, *detector; 00111 00112 uint64_t ena; 00113 char class[64]; 00114 00115 /* 00116 * If we are doing a spa_tryimport() or in recovery mode, 00117 * ignore errors. 00118 */ 00119 if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT || 00120 spa_load_state(spa) == SPA_LOAD_RECOVER) 00121 return; 00122 00123 /* 00124 * If we are in the middle of opening a pool, and the previous attempt 00125 * failed, don't bother logging any new ereports - we're just going to 00126 * get the same diagnosis anyway. 00127 */ 00128 if (spa_load_state(spa) != SPA_LOAD_NONE && 00129 spa->spa_last_open_failed) 00130 return; 00131 00132 if (zio != NULL) { 00133 /* 00134 * If this is not a read or write zio, ignore the error. This 00135 * can occur if the DKIOCFLUSHWRITECACHE ioctl fails. 00136 */ 00137 if (zio->io_type != ZIO_TYPE_READ && 00138 zio->io_type != ZIO_TYPE_WRITE) 00139 return; 00140 00141 /* 00142 * Ignore any errors from speculative I/Os, as failure is an 00143 * expected result. 00144 */ 00145 if (zio->io_flags & ZIO_FLAG_SPECULATIVE) 00146 return; 00147 00148 /* 00149 * If this I/O is not a retry I/O, don't post an ereport. 00150 * Otherwise, we risk making bad diagnoses based on B_FAILFAST 00151 * I/Os. 00152 */ 00153 if (zio->io_error == EIO && 00154 !(zio->io_flags & ZIO_FLAG_IO_RETRY)) 00155 return; 00156 00157 if (vd != NULL) { 00158 /* 00159 * If the vdev has already been marked as failing due 00160 * to a failed probe, then ignore any subsequent I/O 00161 * errors, as the DE will automatically fault the vdev 00162 * on the first such failure. This also catches cases 00163 * where vdev_remove_wanted is set and the device has 00164 * not yet been asynchronously placed into the REMOVED 00165 * state. 00166 */ 00167 if (zio->io_vd == vd && !vdev_accessible(vd, zio)) 00168 return; 00169 00170 /* 00171 * Ignore checksum errors for reads from DTL regions of 00172 * leaf vdevs. 00173 */ 00174 if (zio->io_type == ZIO_TYPE_READ && 00175 zio->io_error == ECKSUM && 00176 vd->vdev_ops->vdev_op_leaf && 00177 vdev_dtl_contains(vd, DTL_MISSING, zio->io_txg, 1)) 00178 return; 00179 } 00180 } 00181 00182 /* 00183 * For probe failure, we want to avoid posting ereports if we've 00184 * already removed the device in the meantime. 00185 */ 00186 if (vd != NULL && 00187 strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) == 0 && 00188 (vd->vdev_remove_wanted || vd->vdev_state == VDEV_STATE_REMOVED)) 00189 return; 00190 00191 if ((ereport = fm_nvlist_create(NULL)) == NULL) 00192 return; 00193 00194 if ((detector = fm_nvlist_create(NULL)) == NULL) { 00195 fm_nvlist_destroy(ereport, FM_NVA_FREE); 00196 return; 00197 } 00198 00199 /* 00200 * Serialize ereport generation 00201 */ 00202 mutex_enter(&spa->spa_errlist_lock); 00203 00204 /* 00205 * Determine the ENA to use for this event. If we are in a loading 00206 * state, use a SPA-wide ENA. Otherwise, if we are in an I/O state, use 00207 * a root zio-wide ENA. Otherwise, simply use a unique ENA. 00208 */ 00209 if (spa_load_state(spa) != SPA_LOAD_NONE) { 00210 if (spa->spa_ena == 0) 00211 spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1); 00212 ena = spa->spa_ena; 00213 } else if (zio != NULL && zio->io_logical != NULL) { 00214 if (zio->io_logical->io_ena == 0) 00215 zio->io_logical->io_ena = 00216 fm_ena_generate(0, FM_ENA_FMT1); 00217 ena = zio->io_logical->io_ena; 00218 } else { 00219 ena = fm_ena_generate(0, FM_ENA_FMT1); 00220 } 00221 00222 /* 00223 * Construct the full class, detector, and other standard FMA fields. 00224 */ 00225 (void) snprintf(class, sizeof (class), "%s.%s", 00226 ZFS_ERROR_CLASS, subclass); 00227 00228 fm_fmri_zfs_set(detector, FM_ZFS_SCHEME_VERSION, spa_guid(spa), 00229 vd != NULL ? vd->vdev_guid : 0); 00230 00231 fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL); 00232 00233 /* 00234 * Construct the per-ereport payload, depending on which parameters are 00235 * passed in. 00236 */ 00237 00238 /* 00239 * Generic payload members common to all ereports. 00240 */ 00241 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL, 00242 DATA_TYPE_STRING, spa_name(spa), FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 00243 DATA_TYPE_UINT64, spa_guid(spa), 00244 FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32, 00245 spa_load_state(spa), NULL); 00246 00247 if (spa != NULL) { 00248 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE, 00249 DATA_TYPE_STRING, 00250 spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ? 00251 FM_EREPORT_FAILMODE_WAIT : 00252 spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ? 00253 FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC, 00254 NULL); 00255 } 00256 00257 if (vd != NULL) { 00258 vdev_t *pvd = vd->vdev_parent; 00259 00260 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, 00261 DATA_TYPE_UINT64, vd->vdev_guid, 00262 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, 00263 DATA_TYPE_STRING, vd->vdev_ops->vdev_op_type, NULL); 00264 if (vd->vdev_path != NULL) 00265 fm_payload_set(ereport, 00266 FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, 00267 DATA_TYPE_STRING, vd->vdev_path, NULL); 00268 if (vd->vdev_devid != NULL) 00269 fm_payload_set(ereport, 00270 FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID, 00271 DATA_TYPE_STRING, vd->vdev_devid, NULL); 00272 if (vd->vdev_fru != NULL) 00273 fm_payload_set(ereport, 00274 FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU, 00275 DATA_TYPE_STRING, vd->vdev_fru, NULL); 00276 00277 if (pvd != NULL) { 00278 fm_payload_set(ereport, 00279 FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, 00280 DATA_TYPE_UINT64, pvd->vdev_guid, 00281 FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE, 00282 DATA_TYPE_STRING, pvd->vdev_ops->vdev_op_type, 00283 NULL); 00284 if (pvd->vdev_path) 00285 fm_payload_set(ereport, 00286 FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH, 00287 DATA_TYPE_STRING, pvd->vdev_path, NULL); 00288 if (pvd->vdev_devid) 00289 fm_payload_set(ereport, 00290 FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID, 00291 DATA_TYPE_STRING, pvd->vdev_devid, NULL); 00292 } 00293 } 00294 00295 if (zio != NULL) { 00296 /* 00297 * Payload common to all I/Os. 00298 */ 00299 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR, 00300 DATA_TYPE_INT32, zio->io_error, NULL); 00301 00302 /* 00303 * If the 'size' parameter is non-zero, it indicates this is a 00304 * RAID-Z or other I/O where the physical offset and length are 00305 * provided for us, instead of within the zio_t. 00306 */ 00307 if (vd != NULL) { 00308 if (size) 00309 fm_payload_set(ereport, 00310 FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET, 00311 DATA_TYPE_UINT64, stateoroffset, 00312 FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, 00313 DATA_TYPE_UINT64, size, NULL); 00314 else 00315 fm_payload_set(ereport, 00316 FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET, 00317 DATA_TYPE_UINT64, zio->io_offset, 00318 FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, 00319 DATA_TYPE_UINT64, zio->io_size, NULL); 00320 } 00321 00322 /* 00323 * Payload for I/Os with corresponding logical information. 00324 */ 00325 if (zio->io_logical != NULL) 00326 fm_payload_set(ereport, 00327 FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, 00328 DATA_TYPE_UINT64, 00329 zio->io_logical->io_bookmark.zb_objset, 00330 FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, 00331 DATA_TYPE_UINT64, 00332 zio->io_logical->io_bookmark.zb_object, 00333 FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL, 00334 DATA_TYPE_INT64, 00335 zio->io_logical->io_bookmark.zb_level, 00336 FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID, 00337 DATA_TYPE_UINT64, 00338 zio->io_logical->io_bookmark.zb_blkid, NULL); 00339 } else if (vd != NULL) { 00340 /* 00341 * If we have a vdev but no zio, this is a device fault, and the 00342 * 'stateoroffset' parameter indicates the previous state of the 00343 * vdev. 00344 */ 00345 fm_payload_set(ereport, 00346 FM_EREPORT_PAYLOAD_ZFS_PREV_STATE, 00347 DATA_TYPE_UINT64, stateoroffset, NULL); 00348 } 00349 00350 mutex_exit(&spa->spa_errlist_lock); 00351 00352 *ereport_out = ereport; 00353 *detector_out = detector; 00354 } 00355 00356 /* if it's <= 128 bytes, save the corruption directly */ 00357 #define ZFM_MAX_INLINE (128 / sizeof (uint64_t)) 00358 00359 #define MAX_RANGES 16 00360 00361 typedef struct zfs_ecksum_info { 00362 /* histograms of set and cleared bits by bit number in a 64-bit word */ 00363 uint16_t zei_histogram_set[sizeof (uint64_t) * NBBY]; 00364 uint16_t zei_histogram_cleared[sizeof (uint64_t) * NBBY]; 00365 00366 /* inline arrays of bits set and cleared. */ 00367 uint64_t zei_bits_set[ZFM_MAX_INLINE]; 00368 uint64_t zei_bits_cleared[ZFM_MAX_INLINE]; 00369 00370 /* 00371 * for each range, the number of bits set and cleared. The Hamming 00372 * distance between the good and bad buffers is the sum of them all. 00373 */ 00374 uint32_t zei_range_sets[MAX_RANGES]; 00375 uint32_t zei_range_clears[MAX_RANGES]; 00376 00377 struct zei_ranges { 00378 uint32_t zr_start; 00379 uint32_t zr_end; 00380 } zei_ranges[MAX_RANGES]; 00381 00382 size_t zei_range_count; 00383 uint32_t zei_mingap; 00384 uint32_t zei_allowed_mingap; 00385 00386 } zfs_ecksum_info_t; 00387 00388 static void 00389 update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count) 00390 { 00391 size_t i; 00392 size_t bits = 0; 00393 uint64_t value = BE_64(value_arg); 00394 00395 /* We store the bits in big-endian (largest-first) order */ 00396 for (i = 0; i < 64; i++) { 00397 if (value & (1ull << i)) { 00398 hist[63 - i]++; 00399 ++bits; 00400 } 00401 } 00402 /* update the count of bits changed */ 00403 *count += bits; 00404 } 00405 00416 static void 00417 shrink_ranges(zfs_ecksum_info_t *eip) 00418 { 00419 uint32_t mingap = UINT32_MAX; 00420 uint32_t new_allowed_gap = eip->zei_mingap + 1; 00421 00422 size_t idx, output; 00423 size_t max = eip->zei_range_count; 00424 00425 struct zei_ranges *r = eip->zei_ranges; 00426 00427 ASSERT3U(eip->zei_range_count, >, 0); 00428 ASSERT3U(eip->zei_range_count, <=, MAX_RANGES); 00429 00430 output = idx = 0; 00431 while (idx < max - 1) { 00432 uint32_t start = r[idx].zr_start; 00433 uint32_t end = r[idx].zr_end; 00434 00435 while (idx < max - 1) { 00436 idx++; 00437 00438 uint32_t nstart = r[idx].zr_start; 00439 uint32_t nend = r[idx].zr_end; 00440 00441 uint32_t gap = nstart - end; 00442 if (gap < new_allowed_gap) { 00443 end = nend; 00444 continue; 00445 } 00446 if (gap < mingap) 00447 mingap = gap; 00448 break; 00449 } 00450 r[output].zr_start = start; 00451 r[output].zr_end = end; 00452 output++; 00453 } 00454 ASSERT3U(output, <, eip->zei_range_count); 00455 eip->zei_range_count = output; 00456 eip->zei_mingap = mingap; 00457 eip->zei_allowed_mingap = new_allowed_gap; 00458 } 00459 00460 static void 00461 add_range(zfs_ecksum_info_t *eip, int start, int end) 00462 { 00463 struct zei_ranges *r = eip->zei_ranges; 00464 size_t count = eip->zei_range_count; 00465 00466 if (count >= MAX_RANGES) { 00467 shrink_ranges(eip); 00468 count = eip->zei_range_count; 00469 } 00470 if (count == 0) { 00471 eip->zei_mingap = UINT32_MAX; 00472 eip->zei_allowed_mingap = 1; 00473 } else { 00474 int gap = start - r[count - 1].zr_end; 00475 00476 if (gap < eip->zei_allowed_mingap) { 00477 r[count - 1].zr_end = end; 00478 return; 00479 } 00480 if (gap < eip->zei_mingap) 00481 eip->zei_mingap = gap; 00482 } 00483 r[count].zr_start = start; 00484 r[count].zr_end = end; 00485 eip->zei_range_count++; 00486 } 00487 00488 static size_t 00489 range_total_size(zfs_ecksum_info_t *eip) 00490 { 00491 struct zei_ranges *r = eip->zei_ranges; 00492 size_t count = eip->zei_range_count; 00493 size_t result = 0; 00494 size_t idx; 00495 00496 for (idx = 0; idx < count; idx++) 00497 result += (r[idx].zr_end - r[idx].zr_start); 00498 00499 return (result); 00500 } 00501 00502 static zfs_ecksum_info_t * 00503 annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, 00504 const uint8_t *goodbuf, const uint8_t *badbuf, size_t size, 00505 boolean_t drop_if_identical) 00506 { 00507 const uint64_t *good = (const uint64_t *)goodbuf; 00508 const uint64_t *bad = (const uint64_t *)badbuf; 00509 00510 uint64_t allset = 0; 00511 uint64_t allcleared = 0; 00512 00513 size_t nui64s = size / sizeof (uint64_t); 00514 00515 size_t inline_size; 00516 int no_inline = 0; 00517 size_t idx; 00518 size_t range; 00519 00520 size_t offset = 0; 00521 ssize_t start = -1; 00522 00523 zfs_ecksum_info_t *eip = kmem_zalloc(sizeof (*eip), KM_SLEEP); 00524 00525 /* don't do any annotation for injected checksum errors */ 00526 if (info != NULL && info->zbc_injected) 00527 return (eip); 00528 00529 if (info != NULL && info->zbc_has_cksum) { 00530 fm_payload_set(ereport, 00531 FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED, 00532 DATA_TYPE_UINT64_ARRAY, 00533 sizeof (info->zbc_expected) / sizeof (uint64_t), 00534 (uint64_t *)&info->zbc_expected, 00535 FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL, 00536 DATA_TYPE_UINT64_ARRAY, 00537 sizeof (info->zbc_actual) / sizeof (uint64_t), 00538 (uint64_t *)&info->zbc_actual, 00539 FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO, 00540 DATA_TYPE_STRING, 00541 info->zbc_checksum_name, 00542 NULL); 00543 00544 if (info->zbc_byteswapped) { 00545 fm_payload_set(ereport, 00546 FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP, 00547 DATA_TYPE_BOOLEAN, 1, 00548 NULL); 00549 } 00550 } 00551 00552 if (badbuf == NULL || goodbuf == NULL) 00553 return (eip); 00554 00555 ASSERT3U(nui64s, <=, UINT16_MAX); 00556 ASSERT3U(size, ==, nui64s * sizeof (uint64_t)); 00557 ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); 00558 ASSERT3U(size, <=, UINT32_MAX); 00559 00560 /* build up the range list by comparing the two buffers. */ 00561 for (idx = 0; idx < nui64s; idx++) { 00562 if (good[idx] == bad[idx]) { 00563 if (start == -1) 00564 continue; 00565 00566 add_range(eip, start, idx); 00567 start = -1; 00568 } else { 00569 if (start != -1) 00570 continue; 00571 00572 start = idx; 00573 } 00574 } 00575 if (start != -1) 00576 add_range(eip, start, idx); 00577 00578 /* See if it will fit in our inline buffers */ 00579 inline_size = range_total_size(eip); 00580 if (inline_size > ZFM_MAX_INLINE) 00581 no_inline = 1; 00582 00583 /* 00584 * If there is no change and we want to drop if the buffers are 00585 * identical, do so. 00586 */ 00587 if (inline_size == 0 && drop_if_identical) { 00588 kmem_free(eip, sizeof (*eip)); 00589 return (NULL); 00590 } 00591 00592 /* 00593 * Now walk through the ranges, filling in the details of the 00594 * differences. Also convert our uint64_t-array offsets to byte 00595 * offsets. 00596 */ 00597 for (range = 0; range < eip->zei_range_count; range++) { 00598 size_t start = eip->zei_ranges[range].zr_start; 00599 size_t end = eip->zei_ranges[range].zr_end; 00600 00601 for (idx = start; idx < end; idx++) { 00602 uint64_t set, cleared; 00603 00604 // bits set in bad, but not in good 00605 set = ((~good[idx]) & bad[idx]); 00606 // bits set in good, but not in bad 00607 cleared = (good[idx] & (~bad[idx])); 00608 00609 allset |= set; 00610 allcleared |= cleared; 00611 00612 if (!no_inline) { 00613 ASSERT3U(offset, <, inline_size); 00614 eip->zei_bits_set[offset] = set; 00615 eip->zei_bits_cleared[offset] = cleared; 00616 offset++; 00617 } 00618 00619 update_histogram(set, eip->zei_histogram_set, 00620 &eip->zei_range_sets[range]); 00621 update_histogram(cleared, eip->zei_histogram_cleared, 00622 &eip->zei_range_clears[range]); 00623 } 00624 00625 /* convert to byte offsets */ 00626 eip->zei_ranges[range].zr_start *= sizeof (uint64_t); 00627 eip->zei_ranges[range].zr_end *= sizeof (uint64_t); 00628 } 00629 eip->zei_allowed_mingap *= sizeof (uint64_t); 00630 inline_size *= sizeof (uint64_t); 00631 00632 /* fill in ereport */ 00633 fm_payload_set(ereport, 00634 FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES, 00635 DATA_TYPE_UINT32_ARRAY, 2 * eip->zei_range_count, 00636 (uint32_t *)eip->zei_ranges, 00637 FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP, 00638 DATA_TYPE_UINT32, eip->zei_allowed_mingap, 00639 FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS, 00640 DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_sets, 00641 FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS, 00642 DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_clears, 00643 NULL); 00644 00645 if (!no_inline) { 00646 fm_payload_set(ereport, 00647 FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS, 00648 DATA_TYPE_UINT8_ARRAY, 00649 inline_size, (uint8_t *)eip->zei_bits_set, 00650 FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS, 00651 DATA_TYPE_UINT8_ARRAY, 00652 inline_size, (uint8_t *)eip->zei_bits_cleared, 00653 NULL); 00654 } else { 00655 fm_payload_set(ereport, 00656 FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM, 00657 DATA_TYPE_UINT16_ARRAY, 00658 NBBY * sizeof (uint64_t), eip->zei_histogram_set, 00659 FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM, 00660 DATA_TYPE_UINT16_ARRAY, 00661 NBBY * sizeof (uint64_t), eip->zei_histogram_cleared, 00662 NULL); 00663 } 00664 return (eip); 00665 } 00666 #endif 00667 00668 void 00669 zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, 00670 uint64_t stateoroffset, uint64_t size) 00671 { 00672 #ifdef _KERNEL 00673 nvlist_t *ereport = NULL; 00674 nvlist_t *detector = NULL; 00675 00676 zfs_ereport_start(&ereport, &detector, 00677 subclass, spa, vd, zio, stateoroffset, size); 00678 00679 if (ereport == NULL) 00680 return; 00681 00682 fm_ereport_post(ereport, EVCH_SLEEP); 00683 00684 fm_nvlist_destroy(ereport, FM_NVA_FREE); 00685 fm_nvlist_destroy(detector, FM_NVA_FREE); 00686 #endif 00687 } 00688 00689 void 00690 zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, 00691 struct zio *zio, uint64_t offset, uint64_t length, void *arg, 00692 zio_bad_cksum_t *info) 00693 { 00694 zio_cksum_report_t *report = kmem_zalloc(sizeof (*report), KM_SLEEP); 00695 00696 if (zio->io_vsd != NULL) 00697 zio->io_vsd_ops->vsd_cksum_report(zio, report, arg); 00698 else 00699 zio_vsd_default_cksum_report(zio, report, arg); 00700 00701 /* copy the checksum failure information if it was provided */ 00702 if (info != NULL) { 00703 report->zcr_ckinfo = kmem_zalloc(sizeof (*info), KM_SLEEP); 00704 bcopy(info, report->zcr_ckinfo, sizeof (*info)); 00705 } 00706 00707 report->zcr_align = 1ULL << vd->vdev_top->vdev_ashift; 00708 report->zcr_length = length; 00709 00710 #ifdef _KERNEL 00711 zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector, 00712 FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); 00713 00714 if (report->zcr_ereport == NULL) { 00715 report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo); 00716 if (report->zcr_ckinfo != NULL) { 00717 kmem_free(report->zcr_ckinfo, 00718 sizeof (*report->zcr_ckinfo)); 00719 } 00720 kmem_free(report, sizeof (*report)); 00721 return; 00722 } 00723 #endif 00724 00725 mutex_enter(&spa->spa_errlist_lock); 00726 report->zcr_next = zio->io_logical->io_cksum_report; 00727 zio->io_logical->io_cksum_report = report; 00728 mutex_exit(&spa->spa_errlist_lock); 00729 } 00730 00731 void 00732 zfs_ereport_finish_checksum(zio_cksum_report_t *report, 00733 const void *good_data, const void *bad_data, boolean_t drop_if_identical) 00734 { 00735 #ifdef _KERNEL 00736 zfs_ecksum_info_t *info = NULL; 00737 info = annotate_ecksum(report->zcr_ereport, report->zcr_ckinfo, 00738 good_data, bad_data, report->zcr_length, drop_if_identical); 00739 00740 if (info != NULL) 00741 fm_ereport_post(report->zcr_ereport, EVCH_SLEEP); 00742 00743 fm_nvlist_destroy(report->zcr_ereport, FM_NVA_FREE); 00744 fm_nvlist_destroy(report->zcr_detector, FM_NVA_FREE); 00745 report->zcr_ereport = report->zcr_detector = NULL; 00746 00747 if (info != NULL) 00748 kmem_free(info, sizeof (*info)); 00749 #endif 00750 } 00751 00752 void 00753 zfs_ereport_free_checksum(zio_cksum_report_t *rpt) 00754 { 00755 #ifdef _KERNEL 00756 if (rpt->zcr_ereport != NULL) { 00757 fm_nvlist_destroy(rpt->zcr_ereport, 00758 FM_NVA_FREE); 00759 fm_nvlist_destroy(rpt->zcr_detector, 00760 FM_NVA_FREE); 00761 } 00762 #endif 00763 rpt->zcr_free(rpt->zcr_cbdata, rpt->zcr_cbinfo); 00764 00765 if (rpt->zcr_ckinfo != NULL) 00766 kmem_free(rpt->zcr_ckinfo, sizeof (*rpt->zcr_ckinfo)); 00767 00768 kmem_free(rpt, sizeof (*rpt)); 00769 } 00770 00771 void 00772 zfs_ereport_send_interim_checksum(zio_cksum_report_t *report) 00773 { 00774 #ifdef _KERNEL 00775 fm_ereport_post(report->zcr_ereport, EVCH_SLEEP); 00776 #endif 00777 } 00778 00779 void 00780 zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, 00781 struct zio *zio, uint64_t offset, uint64_t length, 00782 const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc) 00783 { 00784 #ifdef _KERNEL 00785 nvlist_t *ereport = NULL; 00786 nvlist_t *detector = NULL; 00787 zfs_ecksum_info_t *info; 00788 00789 zfs_ereport_start(&ereport, &detector, 00790 FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); 00791 00792 if (ereport == NULL) 00793 return; 00794 00795 info = annotate_ecksum(ereport, zbc, good_data, bad_data, length, 00796 B_FALSE); 00797 00798 if (info != NULL) 00799 fm_ereport_post(ereport, EVCH_SLEEP); 00800 00801 fm_nvlist_destroy(ereport, FM_NVA_FREE); 00802 fm_nvlist_destroy(detector, FM_NVA_FREE); 00803 00804 if (info != NULL) 00805 kmem_free(info, sizeof (*info)); 00806 #endif 00807 } 00808 00809 static void 00810 zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) 00811 { 00812 #ifdef _KERNEL 00813 nvlist_t *resource; 00814 char class[64]; 00815 00816 if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT) 00817 return; 00818 00819 if ((resource = fm_nvlist_create(NULL)) == NULL) 00820 return; 00821 00822 (void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE, 00823 ZFS_ERROR_CLASS, name); 00824 VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0); 00825 VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0); 00826 VERIFY(nvlist_add_uint64(resource, 00827 FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0); 00828 if (vd) 00829 VERIFY(nvlist_add_uint64(resource, 00830 FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0); 00831 00832 fm_ereport_post(resource, EVCH_SLEEP); 00833 00834 fm_nvlist_destroy(resource, FM_NVA_FREE); 00835 #endif 00836 } 00837 00844 void 00845 zfs_post_remove(spa_t *spa, vdev_t *vd) 00846 { 00847 zfs_post_common(spa, vd, FM_RESOURCE_REMOVED); 00848 } 00849 00855 void 00856 zfs_post_autoreplace(spa_t *spa, vdev_t *vd) 00857 { 00858 zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE); 00859 } 00860 00867 void 00868 zfs_post_state_change(spa_t *spa, vdev_t *vd) 00869 { 00870 zfs_post_common(spa, vd, FM_RESOURCE_STATECHANGE); 00871 }