FreeBSD ZFS
The Zettabyte File System

zfs_fm.c

Go to the documentation of this file.
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 /*
00022  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
00023  * Use is subject to license terms.
00024  */
00025 
00026 /*
00027  * Copyright (c) 2012 by Delphix. All rights reserved.
00028  */
00029 
00030 #include <sys/spa.h>
00031 #include <sys/spa_impl.h>
00032 #include <sys/vdev.h>
00033 #include <sys/vdev_impl.h>
00034 #include <sys/zio.h>
00035 #include <sys/zio_checksum.h>
00036 
00037 #include <sys/fm/fs/zfs.h>
00038 #include <sys/fm/protocol.h>
00039 #include <sys/fm/util.h>
00040 #include <sys/sysevent.h>
00041 
00104 #ifdef _KERNEL
00105 static void
00106 zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
00107     const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
00108     uint64_t stateoroffset, uint64_t size)
00109 {
00110         nvlist_t *ereport, *detector;
00111 
00112         uint64_t ena;
00113         char class[64];
00114 
00115         /*
00116          * If we are doing a spa_tryimport() or in recovery mode,
00117          * ignore errors.
00118          */
00119         if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT ||
00120             spa_load_state(spa) == SPA_LOAD_RECOVER)
00121                 return;
00122 
00123         /*
00124          * If we are in the middle of opening a pool, and the previous attempt
00125          * failed, don't bother logging any new ereports - we're just going to
00126          * get the same diagnosis anyway.
00127          */
00128         if (spa_load_state(spa) != SPA_LOAD_NONE &&
00129             spa->spa_last_open_failed)
00130                 return;
00131 
00132         if (zio != NULL) {
00133                 /*
00134                  * If this is not a read or write zio, ignore the error.  This
00135                  * can occur if the DKIOCFLUSHWRITECACHE ioctl fails.
00136                  */
00137                 if (zio->io_type != ZIO_TYPE_READ &&
00138                     zio->io_type != ZIO_TYPE_WRITE)
00139                         return;
00140 
00141                 /*
00142                  * Ignore any errors from speculative I/Os, as failure is an
00143                  * expected result.
00144                  */
00145                 if (zio->io_flags & ZIO_FLAG_SPECULATIVE)
00146                         return;
00147 
00148                 /*
00149                  * If this I/O is not a retry I/O, don't post an ereport.
00150                  * Otherwise, we risk making bad diagnoses based on B_FAILFAST
00151                  * I/Os.
00152                  */
00153                 if (zio->io_error == EIO &&
00154                     !(zio->io_flags & ZIO_FLAG_IO_RETRY))
00155                         return;
00156 
00157                 if (vd != NULL) {
00158                         /*
00159                          * If the vdev has already been marked as failing due
00160                          * to a failed probe, then ignore any subsequent I/O
00161                          * errors, as the DE will automatically fault the vdev
00162                          * on the first such failure.  This also catches cases
00163                          * where vdev_remove_wanted is set and the device has
00164                          * not yet been asynchronously placed into the REMOVED
00165                          * state.
00166                          */
00167                         if (zio->io_vd == vd && !vdev_accessible(vd, zio))
00168                                 return;
00169 
00170                         /*
00171                          * Ignore checksum errors for reads from DTL regions of
00172                          * leaf vdevs.
00173                          */
00174                         if (zio->io_type == ZIO_TYPE_READ &&
00175                             zio->io_error == ECKSUM &&
00176                             vd->vdev_ops->vdev_op_leaf &&
00177                             vdev_dtl_contains(vd, DTL_MISSING, zio->io_txg, 1))
00178                                 return;
00179                 }
00180         }
00181 
00182         /*
00183          * For probe failure, we want to avoid posting ereports if we've
00184          * already removed the device in the meantime.
00185          */
00186         if (vd != NULL &&
00187             strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) == 0 &&
00188             (vd->vdev_remove_wanted || vd->vdev_state == VDEV_STATE_REMOVED))
00189                 return;
00190 
00191         if ((ereport = fm_nvlist_create(NULL)) == NULL)
00192                 return;
00193 
00194         if ((detector = fm_nvlist_create(NULL)) == NULL) {
00195                 fm_nvlist_destroy(ereport, FM_NVA_FREE);
00196                 return;
00197         }
00198 
00199         /*
00200          * Serialize ereport generation
00201          */
00202         mutex_enter(&spa->spa_errlist_lock);
00203 
00204         /*
00205          * Determine the ENA to use for this event.  If we are in a loading
00206          * state, use a SPA-wide ENA.  Otherwise, if we are in an I/O state, use
00207          * a root zio-wide ENA.  Otherwise, simply use a unique ENA.
00208          */
00209         if (spa_load_state(spa) != SPA_LOAD_NONE) {
00210                 if (spa->spa_ena == 0)
00211                         spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1);
00212                 ena = spa->spa_ena;
00213         } else if (zio != NULL && zio->io_logical != NULL) {
00214                 if (zio->io_logical->io_ena == 0)
00215                         zio->io_logical->io_ena =
00216                             fm_ena_generate(0, FM_ENA_FMT1);
00217                 ena = zio->io_logical->io_ena;
00218         } else {
00219                 ena = fm_ena_generate(0, FM_ENA_FMT1);
00220         }
00221 
00222         /*
00223          * Construct the full class, detector, and other standard FMA fields.
00224          */
00225         (void) snprintf(class, sizeof (class), "%s.%s",
00226             ZFS_ERROR_CLASS, subclass);
00227 
00228         fm_fmri_zfs_set(detector, FM_ZFS_SCHEME_VERSION, spa_guid(spa),
00229             vd != NULL ? vd->vdev_guid : 0);
00230 
00231         fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL);
00232 
00233         /*
00234          * Construct the per-ereport payload, depending on which parameters are
00235          * passed in.
00236          */
00237 
00238         /*
00239          * Generic payload members common to all ereports.
00240          */
00241         fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL,
00242             DATA_TYPE_STRING, spa_name(spa), FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
00243             DATA_TYPE_UINT64, spa_guid(spa),
00244             FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32,
00245             spa_load_state(spa), NULL);
00246 
00247         if (spa != NULL) {
00248                 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
00249                     DATA_TYPE_STRING,
00250                     spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ?
00251                     FM_EREPORT_FAILMODE_WAIT :
00252                     spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ?
00253                     FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC,
00254                     NULL);
00255         }
00256 
00257         if (vd != NULL) {
00258                 vdev_t *pvd = vd->vdev_parent;
00259 
00260                 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
00261                     DATA_TYPE_UINT64, vd->vdev_guid,
00262                     FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
00263                     DATA_TYPE_STRING, vd->vdev_ops->vdev_op_type, NULL);
00264                 if (vd->vdev_path != NULL)
00265                         fm_payload_set(ereport,
00266                             FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH,
00267                             DATA_TYPE_STRING, vd->vdev_path, NULL);
00268                 if (vd->vdev_devid != NULL)
00269                         fm_payload_set(ereport,
00270                             FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID,
00271                             DATA_TYPE_STRING, vd->vdev_devid, NULL);
00272                 if (vd->vdev_fru != NULL)
00273                         fm_payload_set(ereport,
00274                             FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU,
00275                             DATA_TYPE_STRING, vd->vdev_fru, NULL);
00276 
00277                 if (pvd != NULL) {
00278                         fm_payload_set(ereport,
00279                             FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID,
00280                             DATA_TYPE_UINT64, pvd->vdev_guid,
00281                             FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE,
00282                             DATA_TYPE_STRING, pvd->vdev_ops->vdev_op_type,
00283                             NULL);
00284                         if (pvd->vdev_path)
00285                                 fm_payload_set(ereport,
00286                                     FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH,
00287                                     DATA_TYPE_STRING, pvd->vdev_path, NULL);
00288                         if (pvd->vdev_devid)
00289                                 fm_payload_set(ereport,
00290                                     FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID,
00291                                     DATA_TYPE_STRING, pvd->vdev_devid, NULL);
00292                 }
00293         }
00294 
00295         if (zio != NULL) {
00296                 /*
00297                  * Payload common to all I/Os.
00298                  */
00299                 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
00300                     DATA_TYPE_INT32, zio->io_error, NULL);
00301 
00302                 /*
00303                  * If the 'size' parameter is non-zero, it indicates this is a
00304                  * RAID-Z or other I/O where the physical offset and length are
00305                  * provided for us, instead of within the zio_t.
00306                  */
00307                 if (vd != NULL) {
00308                         if (size)
00309                                 fm_payload_set(ereport,
00310                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
00311                                     DATA_TYPE_UINT64, stateoroffset,
00312                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
00313                                     DATA_TYPE_UINT64, size, NULL);
00314                         else
00315                                 fm_payload_set(ereport,
00316                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
00317                                     DATA_TYPE_UINT64, zio->io_offset,
00318                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
00319                                     DATA_TYPE_UINT64, zio->io_size, NULL);
00320                 }
00321 
00322                 /*
00323                  * Payload for I/Os with corresponding logical information.
00324                  */
00325                 if (zio->io_logical != NULL)
00326                         fm_payload_set(ereport,
00327                             FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
00328                             DATA_TYPE_UINT64,
00329                             zio->io_logical->io_bookmark.zb_objset,
00330                             FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
00331                             DATA_TYPE_UINT64,
00332                             zio->io_logical->io_bookmark.zb_object,
00333                             FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
00334                             DATA_TYPE_INT64,
00335                             zio->io_logical->io_bookmark.zb_level,
00336                             FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
00337                             DATA_TYPE_UINT64,
00338                             zio->io_logical->io_bookmark.zb_blkid, NULL);
00339         } else if (vd != NULL) {
00340                 /*
00341                  * If we have a vdev but no zio, this is a device fault, and the
00342                  * 'stateoroffset' parameter indicates the previous state of the
00343                  * vdev.
00344                  */
00345                 fm_payload_set(ereport,
00346                     FM_EREPORT_PAYLOAD_ZFS_PREV_STATE,
00347                     DATA_TYPE_UINT64, stateoroffset, NULL);
00348         }
00349 
00350         mutex_exit(&spa->spa_errlist_lock);
00351 
00352         *ereport_out = ereport;
00353         *detector_out = detector;
00354 }
00355 
00356 /* if it's <= 128 bytes, save the corruption directly */
00357 #define ZFM_MAX_INLINE          (128 / sizeof (uint64_t))
00358 
00359 #define MAX_RANGES              16
00360 
00361 typedef struct zfs_ecksum_info {
00362         /* histograms of set and cleared bits by bit number in a 64-bit word */
00363         uint16_t zei_histogram_set[sizeof (uint64_t) * NBBY];
00364         uint16_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
00365 
00366         /* inline arrays of bits set and cleared. */
00367         uint64_t zei_bits_set[ZFM_MAX_INLINE];
00368         uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
00369 
00370         /*
00371          * for each range, the number of bits set and cleared.  The Hamming
00372          * distance between the good and bad buffers is the sum of them all.
00373          */
00374         uint32_t zei_range_sets[MAX_RANGES];
00375         uint32_t zei_range_clears[MAX_RANGES];
00376 
00377         struct zei_ranges {
00378                 uint32_t        zr_start;
00379                 uint32_t        zr_end;
00380         } zei_ranges[MAX_RANGES];
00381 
00382         size_t  zei_range_count;
00383         uint32_t zei_mingap;
00384         uint32_t zei_allowed_mingap;
00385 
00386 } zfs_ecksum_info_t;
00387 
00388 static void
00389 update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count)
00390 {
00391         size_t i;
00392         size_t bits = 0;
00393         uint64_t value = BE_64(value_arg);
00394 
00395         /* We store the bits in big-endian (largest-first) order */
00396         for (i = 0; i < 64; i++) {
00397                 if (value & (1ull << i)) {
00398                         hist[63 - i]++;
00399                         ++bits;
00400                 }
00401         }
00402         /* update the count of bits changed */
00403         *count += bits;
00404 }
00405 
00416 static void
00417 shrink_ranges(zfs_ecksum_info_t *eip)
00418 {
00419         uint32_t mingap = UINT32_MAX;
00420         uint32_t new_allowed_gap = eip->zei_mingap + 1;
00421 
00422         size_t idx, output;
00423         size_t max = eip->zei_range_count;
00424 
00425         struct zei_ranges *r = eip->zei_ranges;
00426 
00427         ASSERT3U(eip->zei_range_count, >, 0);
00428         ASSERT3U(eip->zei_range_count, <=, MAX_RANGES);
00429 
00430         output = idx = 0;
00431         while (idx < max - 1) {
00432                 uint32_t start = r[idx].zr_start;
00433                 uint32_t end = r[idx].zr_end;
00434 
00435                 while (idx < max - 1) {
00436                         idx++;
00437 
00438                         uint32_t nstart = r[idx].zr_start;
00439                         uint32_t nend = r[idx].zr_end;
00440 
00441                         uint32_t gap = nstart - end;
00442                         if (gap < new_allowed_gap) {
00443                                 end = nend;
00444                                 continue;
00445                         }
00446                         if (gap < mingap)
00447                                 mingap = gap;
00448                         break;
00449                 }
00450                 r[output].zr_start = start;
00451                 r[output].zr_end = end;
00452                 output++;
00453         }
00454         ASSERT3U(output, <, eip->zei_range_count);
00455         eip->zei_range_count = output;
00456         eip->zei_mingap = mingap;
00457         eip->zei_allowed_mingap = new_allowed_gap;
00458 }
00459 
00460 static void
00461 add_range(zfs_ecksum_info_t *eip, int start, int end)
00462 {
00463         struct zei_ranges *r = eip->zei_ranges;
00464         size_t count = eip->zei_range_count;
00465 
00466         if (count >= MAX_RANGES) {
00467                 shrink_ranges(eip);
00468                 count = eip->zei_range_count;
00469         }
00470         if (count == 0) {
00471                 eip->zei_mingap = UINT32_MAX;
00472                 eip->zei_allowed_mingap = 1;
00473         } else {
00474                 int gap = start - r[count - 1].zr_end;
00475 
00476                 if (gap < eip->zei_allowed_mingap) {
00477                         r[count - 1].zr_end = end;
00478                         return;
00479                 }
00480                 if (gap < eip->zei_mingap)
00481                         eip->zei_mingap = gap;
00482         }
00483         r[count].zr_start = start;
00484         r[count].zr_end = end;
00485         eip->zei_range_count++;
00486 }
00487 
00488 static size_t
00489 range_total_size(zfs_ecksum_info_t *eip)
00490 {
00491         struct zei_ranges *r = eip->zei_ranges;
00492         size_t count = eip->zei_range_count;
00493         size_t result = 0;
00494         size_t idx;
00495 
00496         for (idx = 0; idx < count; idx++)
00497                 result += (r[idx].zr_end - r[idx].zr_start);
00498 
00499         return (result);
00500 }
00501 
00502 static zfs_ecksum_info_t *
00503 annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
00504     const uint8_t *goodbuf, const uint8_t *badbuf, size_t size,
00505     boolean_t drop_if_identical)
00506 {
00507         const uint64_t *good = (const uint64_t *)goodbuf;
00508         const uint64_t *bad = (const uint64_t *)badbuf;
00509 
00510         uint64_t allset = 0;
00511         uint64_t allcleared = 0;
00512 
00513         size_t nui64s = size / sizeof (uint64_t);
00514 
00515         size_t inline_size;
00516         int no_inline = 0;
00517         size_t idx;
00518         size_t range;
00519 
00520         size_t offset = 0;
00521         ssize_t start = -1;
00522 
00523         zfs_ecksum_info_t *eip = kmem_zalloc(sizeof (*eip), KM_SLEEP);
00524 
00525         /* don't do any annotation for injected checksum errors */
00526         if (info != NULL && info->zbc_injected)
00527                 return (eip);
00528 
00529         if (info != NULL && info->zbc_has_cksum) {
00530                 fm_payload_set(ereport,
00531                     FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED,
00532                     DATA_TYPE_UINT64_ARRAY,
00533                     sizeof (info->zbc_expected) / sizeof (uint64_t),
00534                     (uint64_t *)&info->zbc_expected,
00535                     FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
00536                     DATA_TYPE_UINT64_ARRAY,
00537                     sizeof (info->zbc_actual) / sizeof (uint64_t),
00538                     (uint64_t *)&info->zbc_actual,
00539                     FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
00540                     DATA_TYPE_STRING,
00541                     info->zbc_checksum_name,
00542                     NULL);
00543 
00544                 if (info->zbc_byteswapped) {
00545                         fm_payload_set(ereport,
00546                             FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP,
00547                             DATA_TYPE_BOOLEAN, 1,
00548                             NULL);
00549                 }
00550         }
00551 
00552         if (badbuf == NULL || goodbuf == NULL)
00553                 return (eip);
00554 
00555         ASSERT3U(nui64s, <=, UINT16_MAX);
00556         ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
00557         ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
00558         ASSERT3U(size, <=, UINT32_MAX);
00559 
00560         /* build up the range list by comparing the two buffers. */
00561         for (idx = 0; idx < nui64s; idx++) {
00562                 if (good[idx] == bad[idx]) {
00563                         if (start == -1)
00564                                 continue;
00565 
00566                         add_range(eip, start, idx);
00567                         start = -1;
00568                 } else {
00569                         if (start != -1)
00570                                 continue;
00571 
00572                         start = idx;
00573                 }
00574         }
00575         if (start != -1)
00576                 add_range(eip, start, idx);
00577 
00578         /* See if it will fit in our inline buffers */
00579         inline_size = range_total_size(eip);
00580         if (inline_size > ZFM_MAX_INLINE)
00581                 no_inline = 1;
00582 
00583         /*
00584          * If there is no change and we want to drop if the buffers are
00585          * identical, do so.
00586          */
00587         if (inline_size == 0 && drop_if_identical) {
00588                 kmem_free(eip, sizeof (*eip));
00589                 return (NULL);
00590         }
00591 
00592         /*
00593          * Now walk through the ranges, filling in the details of the
00594          * differences.  Also convert our uint64_t-array offsets to byte
00595          * offsets.
00596          */
00597         for (range = 0; range < eip->zei_range_count; range++) {
00598                 size_t start = eip->zei_ranges[range].zr_start;
00599                 size_t end = eip->zei_ranges[range].zr_end;
00600 
00601                 for (idx = start; idx < end; idx++) {
00602                         uint64_t set, cleared;
00603 
00604                         // bits set in bad, but not in good
00605                         set = ((~good[idx]) & bad[idx]);
00606                         // bits set in good, but not in bad
00607                         cleared = (good[idx] & (~bad[idx]));
00608 
00609                         allset |= set;
00610                         allcleared |= cleared;
00611 
00612                         if (!no_inline) {
00613                                 ASSERT3U(offset, <, inline_size);
00614                                 eip->zei_bits_set[offset] = set;
00615                                 eip->zei_bits_cleared[offset] = cleared;
00616                                 offset++;
00617                         }
00618 
00619                         update_histogram(set, eip->zei_histogram_set,
00620                             &eip->zei_range_sets[range]);
00621                         update_histogram(cleared, eip->zei_histogram_cleared,
00622                             &eip->zei_range_clears[range]);
00623                 }
00624 
00625                 /* convert to byte offsets */
00626                 eip->zei_ranges[range].zr_start *= sizeof (uint64_t);
00627                 eip->zei_ranges[range].zr_end   *= sizeof (uint64_t);
00628         }
00629         eip->zei_allowed_mingap *= sizeof (uint64_t);
00630         inline_size             *= sizeof (uint64_t);
00631 
00632         /* fill in ereport */
00633         fm_payload_set(ereport,
00634             FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES,
00635             DATA_TYPE_UINT32_ARRAY, 2 * eip->zei_range_count,
00636             (uint32_t *)eip->zei_ranges,
00637             FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP,
00638             DATA_TYPE_UINT32, eip->zei_allowed_mingap,
00639             FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS,
00640             DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_sets,
00641             FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS,
00642             DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_clears,
00643             NULL);
00644 
00645         if (!no_inline) {
00646                 fm_payload_set(ereport,
00647                     FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS,
00648                     DATA_TYPE_UINT8_ARRAY,
00649                     inline_size, (uint8_t *)eip->zei_bits_set,
00650                     FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS,
00651                     DATA_TYPE_UINT8_ARRAY,
00652                     inline_size, (uint8_t *)eip->zei_bits_cleared,
00653                     NULL);
00654         } else {
00655                 fm_payload_set(ereport,
00656                     FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
00657                     DATA_TYPE_UINT16_ARRAY,
00658                     NBBY * sizeof (uint64_t), eip->zei_histogram_set,
00659                     FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
00660                     DATA_TYPE_UINT16_ARRAY,
00661                     NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
00662                     NULL);
00663         }
00664         return (eip);
00665 }
00666 #endif
00667 
00668 void
00669 zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
00670     uint64_t stateoroffset, uint64_t size)
00671 {
00672 #ifdef _KERNEL
00673         nvlist_t *ereport = NULL;
00674         nvlist_t *detector = NULL;
00675 
00676         zfs_ereport_start(&ereport, &detector,
00677             subclass, spa, vd, zio, stateoroffset, size);
00678 
00679         if (ereport == NULL)
00680                 return;
00681 
00682         fm_ereport_post(ereport, EVCH_SLEEP);
00683 
00684         fm_nvlist_destroy(ereport, FM_NVA_FREE);
00685         fm_nvlist_destroy(detector, FM_NVA_FREE);
00686 #endif
00687 }
00688 
00689 void
00690 zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
00691     struct zio *zio, uint64_t offset, uint64_t length, void *arg,
00692     zio_bad_cksum_t *info)
00693 {
00694         zio_cksum_report_t *report = kmem_zalloc(sizeof (*report), KM_SLEEP);
00695 
00696         if (zio->io_vsd != NULL)
00697                 zio->io_vsd_ops->vsd_cksum_report(zio, report, arg);
00698         else
00699                 zio_vsd_default_cksum_report(zio, report, arg);
00700 
00701         /* copy the checksum failure information if it was provided */
00702         if (info != NULL) {
00703                 report->zcr_ckinfo = kmem_zalloc(sizeof (*info), KM_SLEEP);
00704                 bcopy(info, report->zcr_ckinfo, sizeof (*info));
00705         }
00706 
00707         report->zcr_align = 1ULL << vd->vdev_top->vdev_ashift;
00708         report->zcr_length = length;
00709 
00710 #ifdef _KERNEL
00711         zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
00712             FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
00713 
00714         if (report->zcr_ereport == NULL) {
00715                 report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo);
00716                 if (report->zcr_ckinfo != NULL) {
00717                         kmem_free(report->zcr_ckinfo,
00718                             sizeof (*report->zcr_ckinfo));
00719                 }
00720                 kmem_free(report, sizeof (*report));
00721                 return;
00722         }
00723 #endif
00724 
00725         mutex_enter(&spa->spa_errlist_lock);
00726         report->zcr_next = zio->io_logical->io_cksum_report;
00727         zio->io_logical->io_cksum_report = report;
00728         mutex_exit(&spa->spa_errlist_lock);
00729 }
00730 
00731 void
00732 zfs_ereport_finish_checksum(zio_cksum_report_t *report,
00733     const void *good_data, const void *bad_data, boolean_t drop_if_identical)
00734 {
00735 #ifdef _KERNEL
00736         zfs_ecksum_info_t *info = NULL;
00737         info = annotate_ecksum(report->zcr_ereport, report->zcr_ckinfo,
00738             good_data, bad_data, report->zcr_length, drop_if_identical);
00739 
00740         if (info != NULL)
00741                 fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
00742 
00743         fm_nvlist_destroy(report->zcr_ereport, FM_NVA_FREE);
00744         fm_nvlist_destroy(report->zcr_detector, FM_NVA_FREE);
00745         report->zcr_ereport = report->zcr_detector = NULL;
00746 
00747         if (info != NULL)
00748                 kmem_free(info, sizeof (*info));
00749 #endif
00750 }
00751 
00752 void
00753 zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
00754 {
00755 #ifdef _KERNEL
00756         if (rpt->zcr_ereport != NULL) {
00757                 fm_nvlist_destroy(rpt->zcr_ereport,
00758                     FM_NVA_FREE);
00759                 fm_nvlist_destroy(rpt->zcr_detector,
00760                     FM_NVA_FREE);
00761         }
00762 #endif
00763         rpt->zcr_free(rpt->zcr_cbdata, rpt->zcr_cbinfo);
00764 
00765         if (rpt->zcr_ckinfo != NULL)
00766                 kmem_free(rpt->zcr_ckinfo, sizeof (*rpt->zcr_ckinfo));
00767 
00768         kmem_free(rpt, sizeof (*rpt));
00769 }
00770 
00771 void
00772 zfs_ereport_send_interim_checksum(zio_cksum_report_t *report)
00773 {
00774 #ifdef _KERNEL
00775         fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
00776 #endif
00777 }
00778 
00779 void
00780 zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
00781     struct zio *zio, uint64_t offset, uint64_t length,
00782     const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc)
00783 {
00784 #ifdef _KERNEL
00785         nvlist_t *ereport = NULL;
00786         nvlist_t *detector = NULL;
00787         zfs_ecksum_info_t *info;
00788 
00789         zfs_ereport_start(&ereport, &detector,
00790             FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
00791 
00792         if (ereport == NULL)
00793                 return;
00794 
00795         info = annotate_ecksum(ereport, zbc, good_data, bad_data, length,
00796             B_FALSE);
00797 
00798         if (info != NULL)
00799                 fm_ereport_post(ereport, EVCH_SLEEP);
00800 
00801         fm_nvlist_destroy(ereport, FM_NVA_FREE);
00802         fm_nvlist_destroy(detector, FM_NVA_FREE);
00803 
00804         if (info != NULL)
00805                 kmem_free(info, sizeof (*info));
00806 #endif
00807 }
00808 
00809 static void
00810 zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
00811 {
00812 #ifdef _KERNEL
00813         nvlist_t *resource;
00814         char class[64];
00815 
00816         if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT)
00817                 return;
00818 
00819         if ((resource = fm_nvlist_create(NULL)) == NULL)
00820                 return;
00821 
00822         (void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE,
00823             ZFS_ERROR_CLASS, name);
00824         VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0);
00825         VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0);
00826         VERIFY(nvlist_add_uint64(resource,
00827             FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0);
00828         if (vd)
00829                 VERIFY(nvlist_add_uint64(resource,
00830                     FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0);
00831 
00832         fm_ereport_post(resource, EVCH_SLEEP);
00833 
00834         fm_nvlist_destroy(resource, FM_NVA_FREE);
00835 #endif
00836 }
00837 
00844 void
00845 zfs_post_remove(spa_t *spa, vdev_t *vd)
00846 {
00847         zfs_post_common(spa, vd, FM_RESOURCE_REMOVED);
00848 }
00849 
00855 void
00856 zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
00857 {
00858         zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE);
00859 }
00860 
00867 void
00868 zfs_post_state_change(spa_t *spa, vdev_t *vd)
00869 {
00870         zfs_post_common(spa, vd, FM_RESOURCE_STATECHANGE);
00871 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines