Index: cddl/sbin/zfsd/case_file.cc =================================================================== --- cddl/sbin/zfsd/case_file.cc (.../head) (revision 0) +++ cddl/sbin/zfsd/case_file.cc (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,685 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file case_file.cc + * + * We keep case files for any leaf vdev that is not in the optimal state. + * However, we only serialize to disk those events that need to be preserved + * across reboots. For now, this is just a log of soft errors which we + * accumulate in order to mark a device as degraded. + */ +#include +#include +#include +#include +#include + +#include "case_file.h" +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +/*============================ Namespace Control =============================*/ +using std::auto_ptr; +using std::hex; +using std::stringstream; +using std::setfill; +using std::setw; + +/*--------------------------------- CaseFile ---------------------------------*/ +//- CaseFile Static Data ------------------------------------------------------- +CaseFileList CaseFile::s_activeCases; +const string CaseFile::s_caseFilePath = "/etc/zfs/cases"; +const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; + +//- CaseFile Static Public Methods --------------------------------------------- +CaseFile * +CaseFile::Find(uint64_t poolGUID, uint64_t vdevGUID) +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + + if ((*curCase)->PoolGUID() != poolGUID + || (*curCase)->VdevGUID() != vdevGUID) + continue; + + /* + * We only carry one active case per-vdev. + */ + return (*curCase); + } + return (NULL); +} + +CaseFile * +CaseFile::Find(const string &physPath) +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + + if ((*curCase)->PhysicalPath() != physPath) + continue; + + return (*curCase); + } + return (NULL); +} + +CaseFile & +CaseFile::Create(Vdev &vdev) +{ + CaseFile *activeCase; + + activeCase = Find(vdev.PoolGUID(), vdev.GUID()); + if (activeCase == NULL) + activeCase = new CaseFile(vdev); + + return (*activeCase); +} + +void +CaseFile::DeSerialize() +{ + struct dirent **caseFiles; + + int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, + DeSerializeSelector, /*compar*/NULL)); + + if (numCaseFiles == 0 || numCaseFiles == -1) + return; + + for (int i = 0; i < numCaseFiles; i++) { + + DeSerializeFile(caseFiles[i]->d_name); + free(caseFiles[i]); + } + free(caseFiles); +} + +void +CaseFile::LogAll() +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) + (*curCase)->Log(); +} + +void +CaseFile::PurgeAll() +{ + /* CaseFiles remove themselves from this list on destruction. */ + while (s_activeCases.size() != 0) + delete s_activeCases.front(); +} + +//- CaseFile Public Methods ---------------------------------------------------- +bool +CaseFile::RefreshVdevState() +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + if (zpl.empty()) { + syslog(LOG_INFO, + "CaseFile::RefreshVdevState: Unknown pool for " + "Vdev(%ju,%ju).\n", + m_poolGUID, m_vdevGUID); + return (false); + } + + zpool_handle_t *casePool(zpl.front()); + nvlist_t *vdevConfig = VdevIterator(casePool).Find(VdevGUID()); + if (vdevConfig == NULL) { + syslog(LOG_INFO, + "CaseFile::RefreshVdevState: Unknown Vdev(%s,%s).\n", + PoolGUIDString().c_str(), PoolGUIDString().c_str()); + return (false); + } + Vdev caseVdev(casePool, vdevConfig); + + m_vdevState = caseVdev.State(); + m_vdevPhysPath = caseVdev.PhysicalPath(); + return (true); +} + +bool +CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + + if (zpl.empty() || !RefreshVdevState()) { + /* + * The pool or vdev for this case file is no longer + * part of the configuration. This can happen + * if we process a device arrival notification + * before seeing the ZFS configuration change + * event. + */ + syslog(LOG_INFO, + "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " + "Closing\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + Close(); + + /* + * Since this event was not used to close this + * case, do not report it as consumed. + */ + return (/*consumed*/false); + } + zpool_handle_t *pool(zpl.front()); + + if (VdevState() > VDEV_STATE_CANT_OPEN) { + /* + * For now, newly discovered devices only help for + * devices that are missing. In the future, we might + * use a newly inserted spare to replace a degraded + * or faulted device. + */ + return (false); + } + + if (vdev != NULL + && vdev->PoolGUID() == m_poolGUID + && vdev->GUID() == m_vdevGUID) { + + zpool_vdev_online(pool, vdev->GUIDString().c_str(), + ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, + &m_vdevState); + syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", + zpool_get_name(pool), vdev->GUIDString().c_str(), + devPath.c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + + /* + * Check the vdev state post the online action to see + * if we can retire this case. + */ + CloseIfSolved(); + + return (/*consumed*/true); + } + + /* + * If the auto-replace policy is enabled, and we have physical + * path information, try a physical path replacement. + */ + if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): AutoReplace not set. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (false); + } + + if (PhysicalPath().empty()) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): No vdev physical path information. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (false); + } + + if (physPath != PhysicalPath()) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): Physical path mismatch. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (false); + } + + /* Write a label on the newly inserted disk. */ + if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { + syslog(LOG_ERR, + "Replace vdev(%s/%s) by physical path (label): %s: %s\n", + zpool_get_name(pool), VdevGUIDString().c_str(), + libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + return (/*consumed*/false); + } + + /* + * Build a root vdev/leaf vdev configuration suitable for + * zpool_vdev_attach. Only enough data for the kernel to find + * the device (i.e. type and disk device node path) are needed. + */ + nvlist_t *nvroot(NULL); + nvlist_t *newvd(NULL); + if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 + || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { + syslog(LOG_ERR, "Replace vdev(%s/%s) by physical path: " + "Unable to allocate configuration data.\n", + zpool_get_name(pool), VdevGUIDString().c_str()); + if (nvroot != NULL) + nvlist_free(nvroot); + return (/*consumed*/false); + } + + if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 + || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, devPath.c_str()) != 0 + || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 + || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &newvd, 1) != 0) { + syslog(LOG_ERR, "Replace vdev(%s/%s) by physical path: " + "Unable to initialize configuration data.\n", + zpool_get_name(pool), VdevGUIDString().c_str()); + nvlist_free(newvd); + nvlist_free(nvroot); + return (1); + } + + /* Data was copied when added to the root vdev. */ + nvlist_free(newvd); + + if (zpool_vdev_attach(pool, VdevGUIDString().c_str(), + devPath.c_str(), nvroot, + /*replace*/B_TRUE) != 0) { + syslog(LOG_ERR, + "Replace vdev(%s/%s) by physical path(attach): %s: %s\n", + zpool_get_name(pool), VdevGUIDString().c_str(), + libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + } else { + syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", + zpool_get_name(pool), VdevGUIDString().c_str(), + devPath.c_str()); + } + nvlist_free(nvroot); + + return (true); +} + +bool +CaseFile::ReEvaluate(const ZfsEvent &event) +{ + bool consumed(false); + + if (!RefreshVdevState()) { + /* + * The pool or vdev for this case file is no longer + * part of the configuration. This can happen + * if we process a device arrival notification + * before seeing the ZFS configuration change + * event. + */ + syslog(LOG_INFO, + "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " + "Closing\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + Close(); + + /* + * Since this event was not used to close this + * case, do not report it as consumed. + */ + return (/*consumed*/false); + } + + if (event.Value("type") == "misc.fs.zfs.vdev_remove") { + /* + * The Vdev we represent has been removed from the + * configuration. This case is no longer of value. + */ + Close(); + + return (/*consumed*/true); + } + + if (event.Value("class") == "resource.fs.zfs.removed") { + + /* + * Discard any tentative I/O error events for + * this case. They were most likely caused by the + * hot-unplug of this device. + */ + PurgeTentativeEvents(); + + /* + * Rescan the drives in the system to see if a recent + * drive arrival can be used to solve this case. + */ + ZfsDaemon::RequestSystemRescan(); + + consumed = true; + } else if (event.Value("class") == "ereport.fs.zfs.io" + || event.Value("class") == "ereport.fs.zfs.checksum") { + + m_tentativeEvents.push_front(event.DeepCopy()); + if (!m_tentativeTimer.IsPending()) + m_tentativeTimer.Reset(s_removeGracePeriod, + OnGracePeriodEnded, this); + consumed = true; + } + + bool closed(CloseIfSolved()); + + return (consumed || closed); +} + +bool +CaseFile::CloseIfSolved() +{ + if (m_events.empty() + && m_tentativeEvents.empty()) { + + /* + * We currently do not track or take actions on + * devices in the degraded or faulted state. + * Once we have support for spare pools, we'll + * retain these cases so that any spares added in + * the future can be applied to them. + */ + if (VdevState() > VDEV_STATE_CANT_OPEN + && VdevState() <= VDEV_STATE_HEALTHY) { + Close(); + return (true); + } + + /* + * Re-serialize the case in order to remove any + * previous event data. + */ + Serialize(); + } + + return (false); +} + +void +CaseFile::Log() +{ + syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), + VdevGUIDString().c_str(), PhysicalPath().c_str()); + syslog(LOG_INFO, "\tVdev State = %s\n", + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + if (m_tentativeEvents.size() != 0) { + syslog(LOG_INFO, "\t=== Tentative Events ===\n"); + for (DevCtlEventList::iterator event(m_tentativeEvents.begin()); + event != m_tentativeEvents.end(); event++) + (*event)->Log(LOG_INFO); + } + if (m_events.size() != 0) { + syslog(LOG_INFO, "\t=== Events ===\n"); + for (DevCtlEventList::iterator event(m_events.begin()); + event != m_events.end(); event++) + (*event)->Log(LOG_INFO); + } +} + +//- CaseFile Static Protected Methods ------------------------------------------ +void +CaseFile::OnGracePeriodEnded(void *arg) +{ + CaseFile &casefile(*static_cast(arg)); + + casefile.OnGracePeriodEnded(); +} + +int +CaseFile::DeSerializeSelector(const struct dirent *dirEntry) +{ + uintmax_t poolGUID; + uintmax_t vdevGUID; + + if (dirEntry->d_type == DT_REG + && sscanf(dirEntry->d_name, "pool_%ju_vdev_%ju.case", + &poolGUID, &vdevGUID) == 2) + return (1); + return (0); +} + +void +CaseFile::DeSerializeFile(const char *fileName) +{ + string fullName(s_caseFilePath + '/' + fileName); + string evString; + CaseFile *existingCaseFile(NULL); + CaseFile *caseFile(NULL); + int fd(-1); + + try { + uintmax_t poolGUID; + uintmax_t vdevGUID; + nvlist_t *vdevConf; + + sscanf(fileName, "pool_%ju_vdev_%ju.case", + &poolGUID, &vdevGUID); + existingCaseFile = Find(poolGUID, vdevGUID); + if (existingCaseFile != NULL) { + /* + * If the vdev is already degraded or faulted, + * there's no point in keeping the state around + * that we use to put a drive into the degraded + * state. However, if the vdev is simply missing, + * preseve the case data in the hopes that it will + * return. + */ + caseFile = existingCaseFile; + vdev_state curState(caseFile->VdevState()); + if (curState > VDEV_STATE_CANT_OPEN + && curState < VDEV_STATE_HEALTHY) { + unlink(fileName); + return; + } + } else { + ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); + if (zpl.empty() + || (vdevConf = VdevIterator(zpl.front()) + .Find(vdevGUID)) == NULL) { + /* + * Either the pool no longer exists + * of this vdev is no longer a member of + * the pool. + */ + unlink(fullName.c_str()); + return; + } + + /* + * Any vdev we find that does not have a case file + * must be in the healthy state and thus worthy of + * continued SERD data tracking. + */ + caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); + } + + fd = open(fullName.c_str(), O_RDONLY); + if (fd == -1) { + throw ZfsdException("CaseFile::DeSerialize: Unable to " + "read %s.\n", fileName); + return; + } + + /* Re-load EventData */ + EventBuffer eventBuffer(fd); + while (eventBuffer.ExtractEvent(evString)) { + DevCtlEvent *event(DevCtlEvent::CreateEvent(evString)); + if (event != NULL) + caseFile->m_events.push_back(event); + } + close(fd); + } catch (const ParseException &exp) { + + exp.Log(evString); + if (caseFile != existingCaseFile) + delete caseFile; + close(fd); + + /* + * Since we can't parse the file, unlink it so we don't + * trip over it again. + */ + unlink(fileName); + } catch (const ZfsdException &zfsException) { + + zfsException.Log(); + if (caseFile != existingCaseFile) + delete caseFile; + } +} + +//- CaseFile Protected Methods ------------------------------------------------- +CaseFile::CaseFile(const Vdev &vdev) + : m_poolGUID(vdev.PoolGUID()), + m_vdevGUID(vdev.GUID()), + m_vdevState(vdev.State()), + m_vdevPhysPath(vdev.PhysicalPath()) +{ + stringstream guidString; + + guidString << m_vdevGUID; + m_vdevGUIDString = guidString.str(); + guidString.str(""); + guidString << m_poolGUID; + m_poolGUIDString = guidString.str(); + + s_activeCases.push_back(this); + + syslog(LOG_INFO, "Creating new CaseFile:\n"); + Log(); +} + +CaseFile::~CaseFile() +{ + PurgeEvents(); + PurgeTentativeEvents(); + m_tentativeTimer.Stop(); + s_activeCases.remove(this); +} + +void +CaseFile::PurgeEvents() +{ + for (DevCtlEventList::iterator event(m_events.begin()); + event != m_events.end(); event++) + delete *event; + + m_events.clear(); +} + +void +CaseFile::PurgeTentativeEvents() +{ + for (DevCtlEventList::iterator event(m_tentativeEvents.begin()); + event != m_tentativeEvents.end(); event++) + delete *event; + + m_tentativeEvents.clear(); +} + +void +CaseFile::Serialize() +{ + stringstream saveFile; + + saveFile << setfill('0') + << s_caseFilePath << "/" + << "pool_" << PoolGUIDString() + << "_vdev_" << VdevGUIDString() + << ".case"; + + if (m_events.empty()) { + unlink(saveFile.str().c_str()); + return; + } + + int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); + if (fd == -1) { + syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", + saveFile.str().c_str()); + return; + } + for (DevCtlEventList::const_iterator curEvent = m_events.begin(); + curEvent != m_events.end(); curEvent++) { + const string &eventString((*curEvent)->GetEventString()); + + write(fd, eventString.c_str(), eventString.length()); + } +} + +void +CaseFile::Close() +{ + /* + * This case is no longer relevant. Clean up our + * serialization file, and delete the case. + */ + syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", + PoolGUIDString().c_str(), VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + + /* + * Serialization of a Case with no event data, clears the + * Serialization data for that event. + */ + PurgeEvents(); + Serialize(); + + delete this; +} + +void +CaseFile::OnGracePeriodEnded() +{ + m_events.splice(m_events.begin(), m_tentativeEvents); + + if (m_events.size() > ZFS_DEGRADE_IO_COUNT) { + + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + if (zpl.empty() + || (VdevIterator(zpl.front()).Find(m_vdevGUID)) == NULL) { + /* + * Either the pool no longer exists + * or this vdev is no longer a member of + * the pool. + */ + Close(); + return; + } + + /* Degrade the vdev and close the case. */ + if (zpool_vdev_degrade(zpl.front(), m_vdevGUID, + VDEV_AUX_ERR_EXCEEDED) == 0) { + Close(); + return; + } + } + Serialize(); +} Index: cddl/sbin/zfsd/case_file.h =================================================================== --- cddl/sbin/zfsd/case_file.h (.../head) (revision 0) +++ cddl/sbin/zfsd/case_file.h (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,344 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file case_file.h + * + * CaseFile objects aggregate vdev faults that may require ZFSD action + * in order to maintain the health of a ZFS pool. + */ +#ifndef _CASE_FILE_H_ +#define _CASE_FILE_H_ + +#include +#include + +#include + +#include "callout.h" +#include "dev_ctl_event.h" + +/*=========================== Forward Declarations ===========================*/ +class CaseFile; +class Vdev; + +/*============================= Class Definitions ============================*/ +/*------------------------------- CaseFileList -------------------------------*/ +/** + * CaseFileList is a specialization of the standard list STL container. + */ +typedef std::list< CaseFile *> CaseFileList; + +/*--------------------------------- CaseFile ---------------------------------*/ +/** + * A CaseFile object is instantiated anytime a vdev for an active pool + * experiences an I/O error, is faulted by ZFS, or is determined to be + * missing/removed. + * + * A vdev may have at most one CaseFile. + * + * CaseFiles are retired when a vdev leaves an active pool configuration + * or an action is taken to resolve the issues recorded in the CaseFile. + * + * Logging a case against a vdev does not imply that an immediate action + * to resolve a fault is required or even desired. For example, a CaseFile + * must accumulate a number of I/O errors in order to flag a device as + * degraded. + * + * Vdev I/O errors are not recorded in ZFS label inforamation. For this + * reasons, CaseFile%%s with accumulated I/O error events are serialized + * to the file system so that they survive across boots. Currently all + * other fault types can be reconstructed from ZFS label information, so + * CaseFile%%s for missing, faulted, or degradded members are just recreated + * at ZFSD startup instead of being deserialized from the file system. + */ +class CaseFile +{ +public: + /** + * \brief Find a CaseFile object by a vdev's pool/vdev GUID tuple. + * + * \param poolGUID Pool GUID for the vdev of the CaseFile to find. + * \param vdevGUID Vdev GUID for the vdev of the CaseFile to find. + * + * \return If found, a pointer to a valid CaseFile object. + * Otherwise NULL. + */ + static CaseFile *Find(uint64_t poolGUID, uint64_t vdevGUID); + + /** + * \brief Find a CaseFile object by a vdev's current/last known + * physical path. + * + * \param physPath Physical path of the vdev of the CaseFile to find. + * + * \return If found, a pointer to a valid CaseFile object. + * Otherwise NULL. + */ + static CaseFile *Find(const string &physPath); + + /** + * \brief Create or return an existing active CaseFile for the + * specified vdev. + * + * \param vdev The vdev object for which to find/create a CaseFile. + * + * \return A referenc eto a valid CaseFile object. + */ + static CaseFile &Create(Vdev &vdev); + + /** + * \brief Deserialize all serialized CaseFile objects found in + * the file system. + */ + static void DeSerialize(); + + /** + * \brief Emit syslog data on all active CaseFile%%s in the system. + */ + static void LogAll(); + + /** + * \brief Destroy the in-core cache of CaseFile data. + * + * This routine does not disturb the on disk, serialized, CaseFile + * data. + */ + static void PurgeAll(); + + uint64_t PoolGUID() const; + uint64_t VdevGUID() const; + vdev_state VdevState() const; + const string &PoolGUIDString() const; + const string &VdevGUIDString() const; + const string &PhysicalPath() const; + + /** + * \brief Attempt to resolve this CaseFile using the disk + * resource at the given device/physical path/vdev object + * tuple. + * + * \param devPath The devfs path for the disk resource. + * \param physPath The physical path information reported by + * the disk resource. + * \param vdev If the disk contains ZFS label information, + * a pointer to the disk label's vdev object + * data. Otherwise NULL. + * + * \return True if this event was consumed by this CaseFile. + */ + bool ReEvaluate(const string &devPath, const string &physPath, + Vdev *vdev); + + /** + * \brief Update this CaseFile in light of the provided ZfsEvent. + * + * \param event The ZfsEvent to evaluate. + * + * \return True if this event was consumed by this CaseFile. + */ + bool ReEvaluate(const ZfsEvent &event); + + /** + * \breif Close a case if it is no longer relevant. + * + * This method deals with cases tracking soft errors. Soft errors + * will be discarded should a remove event occur within a short period + * of the soft errors being reported. We also discard the events + * if the vdev is marked degraded or failed. + * + * \return True if the case is closed. False otherwise. + */ + bool CloseIfSolved(); + + /** + * \brief Emit data about this CaseFile via syslog(3). + */ + void Log(); + +protected: + enum { + /** + * The number of soft errors on a vdev required + * to transition a vdev from healthy to degrated + * status. + */ + ZFS_DEGRADE_IO_COUNT = 50 + }; + + static CalloutFunc_t OnGracePeriodEnded; + + /** + * \brief scandir(3) filter function used to find files containing + * serialized CaseFile data. + * + * \param dirEntry Directory entry for the file to filter. + * + * \return Non-zero for a file to include in the selection, otherwise 0. + */ + static int DeSerializeSelector(const struct dirent *dirEntry); + + /** + * \brief Given the name of a file containing a serialized CaseFile + * object, create/update an in-core CaseFile object + * representing the serialized data. + * + * \param fileName The name of a file containing a serialized + * CaseFile object. + */ + static void DeSerializeFile(const char *fileName); + + /** Constructor. */ + CaseFile(const Vdev &vdev); + + /** Destructor. */ + ~CaseFile(); + + /** + * \brief Reload state for the vdev associated with this CaseFile. + * + * \return True if the refresh was successful. False if the system + * has no record of the pool or vdev for this CaseFile. + */ + bool RefreshVdevState(); + + /** + * \brief Free all events in the m_events list. + */ + void PurgeEvents(); + + /** + * \brief Free all events in the m_tentativeEvents list. + */ + void PurgeTentativeEvents(); + + /** + * \brief Commit to file system storage. + */ + void Serialize(); + + /** + * \brief Unconditionally close a CaseFile. + */ + void Close(); + + /** + * \brief Callout callback invoked when the remove timer grace + * period expires. + * + * If no remove events are received prior to the grace period + * firing, then any tentative events are promoted and counted + * against the health of the vdev. + */ + void OnGracePeriodEnded(); + + /** + * \brief All CaseFiles being tracked by ZFSD. + */ + static CaseFileList s_activeCases; + + /** + * \brief The file system path to serialized CaseFile data. + */ + static const string s_caseFilePath; + + /** + * \brief The time ZFSD waits before promoting a tentative event + * into a permanent event. + */ + static const timeval s_removeGracePeriod; + + /** + * \brief A list of soft error events counted against the health of + * a vdev. + */ + DevCtlEventList m_events; + + /** + * \brief A list of soft error events waiting for a grace period + * expiration before being counted against the health of + * a vdev. + */ + DevCtlEventList m_tentativeEvents; + + uint64_t m_poolGUID; + uint64_t m_vdevGUID; + vdev_state m_vdevState; + string m_poolGUIDString; + string m_vdevGUIDString; + string m_vdevPhysPath; + + /** + * \brief Callout activated when a grace period + */ + Callout m_tentativeTimer; +}; + +inline uint64_t +CaseFile::PoolGUID() const +{ + return (m_poolGUID); +} + +inline uint64_t +CaseFile::VdevGUID() const +{ + return (m_vdevGUID); +} + +inline vdev_state +CaseFile::VdevState() const +{ + return (m_vdevState); +} + +inline const string & +CaseFile::PoolGUIDString() const +{ + return (m_poolGUIDString); +} + +inline const string & +CaseFile::VdevGUIDString() const +{ + return (m_vdevGUIDString); +} + +inline const string & +CaseFile::PhysicalPath() const +{ + return (m_vdevPhysPath); +} + +#endif /* _CASE_FILE_H_ */ Index: cddl/sbin/zfsd/vdev_iterator.cc =================================================================== --- cddl/sbin/zfsd/vdev_iterator.cc (.../head) (revision 0) +++ cddl/sbin/zfsd/vdev_iterator.cc (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,141 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file vdev_iterator.cc + * + * Implementation of the VdevIterator class. + */ +#include +#include + +#include +#include + +#include "vdev.h" +#include "vdev_iterator.h" +#include "zfsd.h" +#include "zfsd_exception.h" + +/*=========================== Class Implementations ==========================*/ +/*------------------------------- VdevIterator -------------------------------*/ +VdevIterator::VdevIterator(zpool_handle_t *pool) + : m_poolConfig(zpool_get_config(pool, NULL)) +{ + Reset(); +} + +VdevIterator::VdevIterator(nvlist_t *poolConfig) + : m_poolConfig(poolConfig) +{ + Reset(); +} + +void +VdevIterator::Reset() +{ + nvlist_t *rootVdev; + int result; + + result = nvlist_lookup_nvlist(m_poolConfig, + ZPOOL_CONFIG_VDEV_TREE, + &rootVdev); + if (result != 0) + throw ZfsdException(m_poolConfig, "Unable to extract " + "ZPOOL_CONFIG_VDEV_TREE from pool."); + m_vdevQueue.assign(1, rootVdev); +} + +nvlist_t * +VdevIterator::Next() +{ + nvlist_t *vdevConfig; + + if (m_vdevQueue.empty()) + return (NULL); + + while (1) { + nvlist_t **vdevChildren; + int result; + u_int numChildren; + + vdevConfig = m_vdevQueue.front(); + m_vdevQueue.pop_front(); + + /* Expand non-leaf vdevs. */ + result = nvlist_lookup_nvlist_array(vdevConfig, + ZPOOL_CONFIG_CHILDREN, + &vdevChildren, &numChildren); + if (result != 0) { + /* leaf vdev */ + break; + } + + /* + * Insert children at the head of the queue to effect a + * depth first traversal of the tree. + */ + m_vdevQueue.insert(m_vdevQueue.begin(), vdevChildren, + vdevChildren + numChildren); + }; + + return (vdevConfig); +} + +void +VdevIterator::Each(VdevCallback_t *callBack, void *callBackArg) +{ + nvlist_t *vdevConfig; + + Reset(); + while ((vdevConfig = Next()) != NULL) { + Vdev vdev(m_poolConfig, vdevConfig); + + if (callBack(vdev, callBackArg)) + break; + } +} + +nvlist_t * +VdevIterator::Find(uint64_t vdevGUID) +{ + nvlist_t *vdevConfig; + + Reset(); + while ((vdevConfig = Next()) != NULL) { + Vdev vdev(m_poolConfig, vdevConfig); + + if (vdev.GUID() == vdevGUID) + return vdevConfig; + } + return (NULL); +} Index: cddl/sbin/zfsd/vdev_iterator.h =================================================================== --- cddl/sbin/zfsd/vdev_iterator.h (.../head) (revision 0) +++ cddl/sbin/zfsd/vdev_iterator.h (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,116 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file vdev_iterator.h + * + * VdevIterator class definition. + */ +#ifndef _VDEV_ITERATOR_H_ +#define _VDEV_ITERATOR_H_ + +#include + +#include +#include + +/*=========================== Forward Declarations ===========================*/ +class Vdev; + +/*============================= Class Definitions ============================*/ +/*------------------------------- VdevIterator -------------------------------*/ +typedef bool VdevCallback_t(Vdev &vdev, void *cbArg); + +/** + * \brief VdevIterator provides mechanisms for traversing and searching + * the leaf vdevs contained in a ZFS pool configuration. + */ +class VdevIterator +{ +public: + /** + * \brief Instantiate a VdevIterator for the given ZFS pool. + * + * \param pool The ZFS pool to traverse/search. + */ + VdevIterator(zpool_handle_t *pool); + + /** + * \brief Instantiate a VdevIterator for the given ZFS pool. + * + * \param poolConfig The configuration data for the ZFS pool + * to traverse/search. + */ + VdevIterator(nvlist_t *poolConfig); + + /** + * \brief Reset this iterator's cursor so that Next() will + * report the first member of the pool. + */ + void Reset(); + + /** + * \brief Report the vdev at this iterator's cursor and increment + * the cursor to the next pool member. + */ + nvlist_t *Next(); + + /** + * \brief Traverse the entire pool configuration starting its + * first member, returning a vdev object with the given + * vdev GUID if found. + * + * \param vdevGUID The vdev GUID of the vdev object to find. + * + * \return A Vdev object for the matching vdev if found. Otherwise + * NULL. + * + * Upon return, the VdevIterator's cursor points to the vdev just + * past the returned vdev or end() if no matching vdev is found. + */ + nvlist_t *Find(uint64_t vdevGUID); + + /** + * \brief Perform the specified operation on each leaf member of + * a pool's vdev membership. + * + * \param cb Callback function to execute for each member. + * \param cbArg Argument to pass to cb. + */ + void Each(VdevCallback_t *cb, void *cbArg); + +private: + nvlist_t *m_poolConfig; + std::list m_vdevQueue; +}; + +#endif /* _VDEV_ITERATOR_H_ */ Index: cddl/sbin/zfsd/dev_ctl_event.cc =================================================================== --- cddl/sbin/zfsd/dev_ctl_event.cc (.../head) (revision 0) +++ cddl/sbin/zfsd/dev_ctl_event.cc (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,707 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file dev_ctl_event.cc + * + * Implementation of the class hierarchy used to express events + * received via the devctl API. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "case_file.h" +#include "dev_ctl_event.h" +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +/*============================ Namespace Control =============================*/ +using std::cerr; +using std::cout; +using std::endl; +using std::stringstream; + +/*=========================== Class Implementations ==========================*/ +/*------------------------------ ParseException ------------------------------*/ +//- ParseException Public Methods ---------------------------------------------- +string +ParseException::ToString(const string &parsedBuffer) const +{ + stringstream result; + + result << "Parsing "; + + switch(Type()) { + case INVALID_FORMAT: + result << "invalid format "; + break; + case DISCARDED_EVENT_TYPE: + result << "discarded event "; + break; + case UNKNOWN_EVENT_TYPE: + result << "unknown event "; + break; + default: + break; + } + result << "exception on buffer: \'"; + if (GetOffset() == 0) { + result << parsedBuffer << '\'' << endl; + } else { + string markedBuffer(parsedBuffer); + + markedBuffer.insert(GetOffset(), ""); + result << markedBuffer << '\'' << endl; + } + + return (result.str()); +} + +void +ParseException::Log(const string &parsedBuffer) const +{ + int priority(LOG_ERR); + + if (Type() == DISCARDED_EVENT_TYPE) + priority = LOG_INFO; + + syslog(priority, "%s", ToString(parsedBuffer).c_str()); +} + +/*-------------------------------- DevCtlEvent -------------------------------*/ +//- DevCtlEvent Static Protected Data ------------------------------------------ +const string DevCtlEvent::s_theEmptyString; + +DevCtlEvent::EventTypeRecord DevCtlEvent::s_typeTable[] = +{ + { DevCtlEvent::NOTIFY, "Notify" }, + { DevCtlEvent::NOMATCH, "No Driver Match" }, + { DevCtlEvent::ATTACH, "Attach" }, + { DevCtlEvent::DETACH, "Detach" } +}; + +DevCtlEvent::EventFactoryRecord DevCtlEvent::s_factoryTable[] = +{ + { DevCtlEvent::NOTIFY, "DEVFS", &DevfsEvent::DevfsEventFactory }, + { DevCtlEvent::NOTIFY, "ZFS", &ZfsEvent::ZfsEventFactory} +}; + +DevCtlEvent::EventFactoryRegistry DevCtlEvent::s_factoryRegistry; + +//- DevCtlEvent Static Public Methods ------------------------------------------ +void +DevCtlEvent::Init() +{ + EventFactoryRecord *rec(s_factoryTable); + EventFactoryRecord *lastRec(s_factoryTable + + NUM_ELEMENTS(s_factoryTable) - 1); + + for (; rec <= lastRec; rec++) { + EventFactoryKey key(rec->m_type, rec->m_subsystem); + + s_factoryRegistry[key] = rec->m_method; + } +} + +DevCtlEvent * +DevCtlEvent::CreateEvent(const string &eventString) +{ + NVPairMap &nvpairs(*new NVPairMap); + Type type(static_cast(eventString[0])); + + try { + ParseEventString(type, eventString, nvpairs); + } catch (const ParseException &exp) { + if (exp.GetType() == ParseException::INVALID_FORMAT) + exp.Log(eventString); + return (NULL); + } + + /* + * Allow entries in our table for events with no system specified. + * These entries should specify the string "none". + */ + NVPairMap::iterator system_item(nvpairs.find("system")); + if (system_item == nvpairs.end()) + nvpairs["system"] = "none"; + + EventFactoryKey key(type, nvpairs["system"]); + EventFactoryRegistry::iterator foundMethod(s_factoryRegistry.find(key)); + if (foundMethod == s_factoryRegistry.end()) + return (NULL); + return ((foundMethod->second)(type, nvpairs, eventString)); +} + +const char * +DevCtlEvent::TypeToString(DevCtlEvent::Type type) +{ + EventTypeRecord *rec(s_typeTable); + EventTypeRecord *lastRec(s_typeTable + NUM_ELEMENTS(s_typeTable) - 1); + + for (; rec <= lastRec; rec++) { + if (rec->m_type == type) + return (rec->m_typeName); + } + return ("Unknown"); +} + +//- DevCtlEvent Public Methods ------------------------------------------------- +const string & +DevCtlEvent::Value(const string &varName) const +{ + NVPairMap::const_iterator item(m_nvPairs.find(varName)); + if (item == m_nvPairs.end()) + return (s_theEmptyString); + + return (item->second); +} + +bool +DevCtlEvent::Contains(const string &varName) const +{ + return (m_nvPairs.find(varName) != m_nvPairs.end()); +} + +string +DevCtlEvent::ToString() const +{ + stringstream result; + + NVPairMap::const_iterator devName(m_nvPairs.find("device-name")); + if (devName != m_nvPairs.end()) + result << devName->second << ": "; + + NVPairMap::const_iterator systemName(m_nvPairs.find("system")); + if (systemName != m_nvPairs.end() + && systemName->second != "none") + result << systemName->second << ": "; + + result << TypeToString(GetType()) << ' '; + + for (NVPairMap::const_iterator curVar = m_nvPairs.begin(); + curVar != m_nvPairs.end(); curVar++) { + if (curVar == devName || curVar == systemName) + continue; + + result << ' ' + << curVar->first << "=" << curVar->second; + } + result << endl; + + return (result.str()); +} + +void +DevCtlEvent::Print() const +{ + cout << ToString() << std::flush; +} + +void +DevCtlEvent::Log(int priority) const +{ + syslog(priority, "%s", ToString().c_str()); +} + +//- DevCtlEvent Virtual Public Methods ----------------------------------------- +DevCtlEvent::~DevCtlEvent() +{ + delete &m_nvPairs; +} + +void +DevCtlEvent::Process() const +{ +} + +//- DevCtlEvent Protected Methods ---------------------------------------------- +DevCtlEvent::DevCtlEvent(Type type, NVPairMap &map, const string &eventString) + : m_type(type), + m_nvPairs(map), + m_eventString(eventString) +{ +} + +DevCtlEvent::DevCtlEvent(const DevCtlEvent &src) + : m_type(src.m_type), + m_nvPairs(*new NVPairMap(src.m_nvPairs)), + m_eventString(src.m_eventString) +{ +} + +void +DevCtlEvent::ParseEventString(DevCtlEvent::Type type, + const string &eventString, + NVPairMap& nvpairs) +{ + size_t start; + size_t end; + + switch (type) { + case ATTACH: + case DETACH: + + /* + * \ + * at \ + * on + * + * Handle all data that doesn't conform to the + * "name=value" format, and let the generic parser + * below handle the rest. + * + * Type is a single char. Skip it. + */ + start = 1; + end = eventString.find_first_of(" \t\n", start); + if (end == string::npos) + throw ParseException(ParseException::INVALID_FORMAT, + start); + + nvpairs["device-name"] = eventString.substr(start, end - start); + + start = eventString.find(" on ", end); + if (end == string::npos) + throw ParseException(ParseException::INVALID_FORMAT, + start); + start += 4; + end = eventString.find_first_of(" \t\n", start); + nvpairs["parent"] = eventString.substr(start, end); + if (end == string::npos) + break; + + /* + * The parent field should terminate the event with the + * exception of trailing whitespace. + */ + end = eventString.find_first_not_of(" \t\n", end); + if (end != string::npos) + throw ParseException(ParseException::INVALID_FORMAT, + end); + break; + case NOTIFY: + break; + case NOMATCH: + throw ParseException(ParseException::DISCARDED_EVENT_TYPE); + default: + throw ParseException(ParseException::UNKNOWN_EVENT_TYPE); + } + + /* Process common "key=value" format. */ + for (start = 1; start < eventString.length(); start = end + 1) { + + /* Find the '=' in the middle of the key/value pair. */ + end = eventString.find('=', start); + if (end == string::npos) + break; + + /* + * Find the start of the key by backing up until + * we hit whitespace or '!' (event type "notice"). + * Due to the devctl format, all key/value pair must + * start with one of these two characters. + */ + start = eventString.find_last_of("! \t\n", end); + if (start == string::npos) + throw ParseException(ParseException::INVALID_FORMAT, + end); + start++; + string key(eventString.substr(start, end - start)); + + /* + * Walk forward from the '=' until either we exhaust + * the buffer or we hit whitespace. + */ + start = end + 1; + if (start >= eventString.length()) + throw ParseException(ParseException::INVALID_FORMAT, + end); + end = eventString.find_first_of(" \t\n", start); + if (end == string::npos) + end = eventString.length() - 1; + string value(eventString.substr(start, end - start)); + + nvpairs[key] = value; + } +} + +/*-------------------------------- DevfsEvent --------------------------------*/ +//- DevfsEvent Static Public Methods ------------------------------------------- +DevCtlEvent * +DevfsEvent::DevfsEventFactory(DevCtlEvent::Type type, NVPairMap &nvPairs, + const string &eventString) +{ + return (new DevfsEvent(type, nvPairs, eventString)); +} + +//- DevfsEvent Static Protected Methods ---------------------------------------- +bool +DevfsEvent::IsDiskDev(const string &devName) +{ + static const char *diskDevNames[] = + { + "da", + "ada" + }; + + const char **diskName(diskDevNames); + const char **lastDiskName(diskDevNames + + NUM_ELEMENTS(diskDevNames) - 1); + size_t find_start = devName.rfind('/'); + if (find_start == string::npos) { + find_start = 0; + } else { + /* Just after the last '/'. */ + find_start++; + } + + for (; diskName <= lastDiskName; diskName++) { + + size_t loc(devName.find(*diskName, find_start)); + if (loc == find_start) { + size_t prefixLen(strlen(*diskName)); + + if (devName.length() - find_start >= prefixLen + && isdigit(devName[find_start + prefixLen])) + return (true); + } + } + + return (false); +} + +bool +DevfsEvent::IsWholeDev(const string &devName) +{ + string::const_iterator i(devName.begin()); + + size_t start = devName.rfind('/'); + if (start == string::npos) { + start = 0; + } else { + /* Just after the last '/'. */ + start++; + } + i += start; + + /* alpha prefix followed only by digits. */ + for (; i < devName.end() && !isdigit(*i); i++) + ; + + if (i == devName.end()) + return (false); + + for (; i < devName.end() && isdigit(*i); i++) + ; + + return (i == devName.end()); +} + +nvlist_t * +DevfsEvent::ReadLabel(int devFd, bool &inUse, bool °raded) +{ + pool_state_t poolState; + char *poolName; + boolean_t b_inuse; + + inUse = false; + degraded = false; + poolName = NULL; + if (zpool_in_use(g_zfsHandle, devFd, &poolState, + &poolName, &b_inuse) == 0) { + nvlist_t *devLabel; + + inUse = b_inuse == B_TRUE; + if (poolName != NULL) + free(poolName); + + if (zpool_read_label(devFd, &devLabel) != 0 + || devLabel == NULL) + return (NULL); + + try { + Vdev vdev(devLabel); + degraded = vdev.State() != VDEV_STATE_HEALTHY; + return (devLabel); + } catch (ZfsdException &exp) { + string devName = fdevname(devFd); + string devPath = _PATH_DEV + devName; + string context("DevfsEvent::ReadLabel: " + devPath + ": "); + + exp.GetString().insert(0, context); + exp.Log(); + } + } + return (NULL); +} + +bool +DevfsEvent::OnlineByLabel(const string &devPath, const string& physPath, + nvlist_t *devConfig) +{ + try { + /* + * A device with ZFS label information has been + * inserted. If it matches a device for which we + * have a case, see if we can solve that case. + */ + syslog(LOG_INFO, "Interrogating VDEV label for %s\n", + devPath.c_str()); + Vdev vdev(devConfig); + CaseFile *caseFile(CaseFile::Find(vdev.PoolGUID(), + vdev.GUID())); + if (caseFile != NULL) + return (caseFile->ReEvaluate(devPath, physPath, &vdev)); + + } catch (ZfsdException &exp) { + string context("DevfsEvent::OnlineByLabel: " + devPath + ": "); + + exp.GetString().insert(0, context); + exp.Log(); + } + return (false); +} + +//- DevfsEvent Virtual Public Methods ------------------------------------------ +DevCtlEvent * +DevfsEvent::DeepCopy() const +{ + return (new DevfsEvent(*this)); +} + +void +DevfsEvent::Process() const +{ + /* + * We are only concerned with newly discovered + * devices that can be ZFS vdevs. + */ + string devName(Value("cdev")); + if (Value("type") != "CREATE" + || Value("subsystem") != "CDEV" + || !IsDiskDev(devName)) + return; + + /* Log the event since it is of interest. */ + Log(LOG_INFO); + + string devPath(_PATH_DEV + devName); + int devFd(open(devPath.c_str(), O_RDONLY)); + if (devFd == -1) + return; + + /* Normalize the device name in case the DEVFS event is for a link. */ + devName = fdevname(devFd); + devPath = _PATH_DEV + devName; + + bool inUse; + bool degraded; + nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded)); + + char physPath[MAXPATHLEN]; + physPath[0] = '\0'; + bool havePhysPath(ioctl(devFd, DIOCGPHYSPATH, physPath) == 0); + + close(devFd); + + if (inUse && devLabel != NULL) { + OnlineByLabel(devPath, physPath, devLabel); + } else if (degraded) { + syslog(LOG_INFO, "%s is marked degraded. Ignoring " + "as a replace by physical path candidate.\n", + devName.c_str()); + } else if (havePhysPath && IsWholeDev(devName)) { + syslog(LOG_INFO, "Searching for CaseFile by Physical Path\n"); + CaseFile *caseFile(CaseFile::Find(physPath)); + if (caseFile != NULL) { + syslog(LOG_INFO, + "Found CaseFile(%s:%s:%s) - ReEvaluating\n", + caseFile->PoolGUIDString().c_str(), + caseFile->VdevGUIDString().c_str(), + zpool_state_to_name(caseFile->VdevState(), + VDEV_AUX_NONE)); + caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); + } + } + if (devLabel != NULL) + nvlist_free(devLabel); +} + +//- DevfsEvent Protected Methods ----------------------------------------------- +DevfsEvent::DevfsEvent(DevCtlEvent::Type type, NVPairMap &nvpairs, + const string &eventString) + : DevCtlEvent(type, nvpairs, eventString) +{ +} + +DevfsEvent::DevfsEvent(const DevfsEvent &src) + : DevCtlEvent(src) +{ +} + +/*--------------------------------- ZfsEvent ---------------------------------*/ +//- ZfsEvent Static Public Methods --------------------------------------------- +DevCtlEvent * +ZfsEvent::ZfsEventFactory(DevCtlEvent::Type type, NVPairMap &nvpairs, + const string &eventString) +{ + return (new ZfsEvent(type, nvpairs, eventString)); +} + +//- ZfsEvent Virtual Public Methods -------------------------------------------- +DevCtlEvent * +ZfsEvent::DeepCopy() const +{ + return (new ZfsEvent(*this)); +} + +void +ZfsEvent::Process() const +{ + string logstr(""); + + if (!Contains("class") && !Contains("type")) { + syslog(LOG_ERR, + "ZfsEvent::Process: Missing class or type data."); + return; + } + + /* On config syncs, replay any queued events first. */ + if (Value("type").find("ESC_ZFS_config_sync") == 0) + ZfsDaemon::ReplayUnconsumedEvents(); + + Log(LOG_INFO); + + if (Value("subsystem").find("misc.fs.zfs.") == 0) { + /* Configuration changes, resilver events, etc. */ + ProcessPoolEvent(); + return; + } + + if (!Contains("pool_guid") || !Contains("vdev_guid")) { + /* Only currently interested in Vdev related events. */ + return; + } + + CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); + if (caseFile != NULL) { + syslog(LOG_INFO, "Evaluating existing case file\n"); + caseFile->ReEvaluate(*this); + return; + } + + /* Skip events that can't be handled. */ + uint64_t poolGUID(PoolGUID()); + /* If there are no replicas for a pool, then it's not manageable. */ + if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) { + syslog(LOG_INFO, "No replicas available for pool %ju" + ", ignoring\n", (uintmax_t)poolGUID); + return; + } + + /* + * Create a case file for this vdev, and have it + * evaluate the event. + */ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); + if (zpl.empty()) { + bool queued = ZfsDaemon::SaveEvent(*this); + int priority = queued ? LOG_INFO : LOG_ERR; + syslog(priority, + "ZfsEvent::Process: Event for unknown pool %ju %s", + (uintmax_t)poolGUID, queued ? "queued" : "dropped"); + return; + } + + nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID()); + if (vdevConfig == NULL) { + bool queued = ZfsDaemon::SaveEvent(*this); + int priority = queued ? LOG_INFO : LOG_ERR; + syslog(priority, + "ZfsEvent::Process: Event for unknown vdev %ju %s", + (uintmax_t)poolGUID, queued ? "queued" : "dropped"); + return; + } + + Vdev vdev(zpl.front(), vdevConfig); + caseFile = &CaseFile::Create(vdev); + caseFile->ReEvaluate(*this); +} + +//- ZfsEvent Protected Methods ------------------------------------------------- +ZfsEvent::ZfsEvent(DevCtlEvent::Type type, NVPairMap &nvpairs, + const string &eventString) + : DevCtlEvent(type, nvpairs, eventString) +{ + /* + * These are zero on conversion failure as will happen if + * Value returns the empty string. + */ + m_poolGUID = (uint64_t)strtoumax(Value("pool_guid").c_str(), NULL, 0); + m_vdevGUID = (uint64_t)strtoumax(Value("vdev_guid").c_str(), NULL, 0); +} + +ZfsEvent::ZfsEvent(const ZfsEvent &src) + : DevCtlEvent(src), + m_poolGUID(src.m_poolGUID), + m_vdevGUID(src.m_vdevGUID) +{ +} + +void +ZfsEvent::ProcessPoolEvent() const +{ + bool degradedDevice(false); + + CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); + if (caseFile != NULL) { + if (caseFile->VdevState() != VDEV_STATE_UNKNOWN + && caseFile->VdevState() < VDEV_STATE_HEALTHY) + degradedDevice = true; + + caseFile->ReEvaluate(*this); + } + + /* XXX Needs to be changed. */ + if (Value("type") == "ESC_ZFS_vdev_remove" + && degradedDevice == false) { + /* See if any other cases can make use of this device. */ + ZfsDaemon::RequestSystemRescan(); + } +} Index: cddl/sbin/zfsd/dev_ctl_event.h =================================================================== --- cddl/sbin/zfsd/dev_ctl_event.h (.../head) (revision 0) +++ cddl/sbin/zfsd/dev_ctl_event.h (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,506 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file dev_ctl_event.h + * + * \brief Class hierarchy used to express events received via + * the devctl API. + */ + +#ifndef _DEV_CTL_EVENT_H_ +#define _DEV_CTL_EVENT_H_ + +#include +#include +#include + +#include +#include + +/*============================ Namespace Control =============================*/ +using std::map; +using std::pair; +using std::string; + +/*============================= Class Definitions ============================*/ +/*------------------------------ ParseException ------------------------------*/ +/** + * Exception thrown when an event string is not converted to an actionable + * DevCtlEvent object. + */ +class ParseException +{ +public: + enum Type + { + /** Improperly formatted event string encounterd. */ + INVALID_FORMAT, + + /** No handlers for this event type. */ + DISCARDED_EVENT_TYPE, + + /** Unhandled event type. */ + UNKNOWN_EVENT_TYPE + }; + + /** + * Constructor + * + * \param type The type of this exception. + * \param offset The location in the parse buffer where this + * exception occurred. + */ + ParseException(Type type, size_t offset = 0); + + /** + * Accessor + * + * \return The classification for this exception. + */ + Type GetType() const; + + /** + * Accessor + * + * \return The offset into the event string where this exception + * occurred. + */ + size_t GetOffset() const; + + /** + * Convert an exception into a human readable string. + * + * \param parsedBuffer The event buffer that caused the failure. + */ + string ToString(const string &parsedBuffer) const; + + /** + * Log exception data to syslog. + * + * \param parsedBuffer The event buffer that caused the failure. + */ + void Log(const string &parsedBuffer) const; + +private: + /** The type of this exception. */ + Type m_type; + + /** + * The offset into the event string buffer from where this + * exception was triggered. + */ + size_t m_offset; +}; + +//- ParseException Inline Public Methods --------------------------------------- +inline +ParseException::ParseException(Type type, size_t offset) + : m_type(type), + m_offset(offset) +{ +} + +//- ParseException Inline Const Public Methods --------------------------------- +inline ParseException::Type +ParseException::GetType() const +{ + return (m_type); +} + +inline size_t +ParseException::GetOffset() const +{ + return (m_offset); +} + +/*-------------------------------- NVPairMap ---------------------------------*/ +/** + * NVPairMap is a specialization of the standard map STL container. + */ +typedef map NVPairMap; + +/*-------------------------------- DevCtlEvent -------------------------------*/ +/** + * \brief Container for the name => value pairs that comprise the content of + * a device control event. + * + * All name => value data for events can be accessed via the Contains() + * and Value() methods. name => value pairs for data not explicitly + * recieved as a a name => value pair are synthesized during parsing. For + * example, ATTACH and DETACH events have "device-name" and "parent" + * name => value pairs added. + */ +class DevCtlEvent +{ +public: + /** Event type */ + enum Type { + /** Generic event notification. */ + NOTIFY = '!', + + /** A driver was not found for this device. */ + NOMATCH = '?', + + /** A bus device instance has been added. */ + ATTACH = '+', + + /** A bus device instance has been removed. */ + DETACH = '-' + }; + + /** Prepare the DevCtlEvent class for processing of events. */ + static void Init(); + + /** + * Factory method for creating events. + * + * \param buffer String representing a single event received + * from devd. + * + * \note Init() must be invoked prior to the first call to + * CreateEvent(). + */ + static DevCtlEvent *CreateEvent(const string &buffer); + + /** + * Provide a user friendly string representation of an + * event type. + * + * \param type The type of event to map to a string. + * + * \return A user friendly string representing the input type. + */ + static const char *TypeToString(Type type); + + /** + * Determine the availability of a name => value pair by name. + * + * \param name The key name to search for in this event instance. + * + * \return true if the specified key is available in this + * event, otherwise false. + */ + bool Contains(const string &name) const; + + /** + * \param key The name of the key for which to retrieve its + * associated value. + * + * \return A const reference to the string representing the + * value associated with key. + * + * \note For key's with no registered value, the empty string + * is returned. + */ + const string &Value(const string &key) const; + + /** + * Get the type of this event instance. + * + * \return The type of this event instance. + */ + Type GetType() const; + + /** + * Get the orginal DevCtl event string for this event. + * + * \return The DevCtl event string. + */ + const string &GetEventString() const; + + /** + * Convert the event instance into a string suitable for + * printing to the console or emitting to syslog. + * + * \return A string of formatted event data. + */ + string ToString() const; + + /** + * Pretty-print this event instance to cout. + */ + void Print() const; + + /** + * Pretty-print this event instance to syslog. + * + * \param priority The logging priority/facility. + * See syslog(3). + */ + void Log(int priority) const; + + /** + * Create and return a fully independent clone + * of this event. + */ + virtual DevCtlEvent *DeepCopy() const = 0; + + /** Destructor */ + virtual ~DevCtlEvent(); + + /** + * Interpret and perform any actions necessary to + * consume the event. + */ + virtual void Process() const; + +protected: + /** Table entries used to map a type to a user friendly string. */ + struct EventTypeRecord + { + Type m_type; + const char *m_typeName; + }; + + /** + * Event creation handlers are matched by event type and a + * string representing the system emitting the event. + */ + typedef pair EventFactoryKey; + + /** + * Event factory methods construct a DevCtlEvent given + * the type of event and an NVPairMap populated from + * the event string received from devd. + */ + typedef DevCtlEvent* (EventFactory)(Type, NVPairMap &, const string &); + + /** Map type for Factory method lookups. */ + typedef map EventFactoryRegistry; + + /** Table record of factory methods to add to our registry. */ + struct EventFactoryRecord + { + Type m_type; + const char *m_subsystem; + EventFactory *m_method; + }; + + /** + * Constructor + * + * \param type The type of event to create. + */ + DevCtlEvent(Type type, NVPairMap &map, const string &eventString); + + /** Deep copy constructor. */ + DevCtlEvent(const DevCtlEvent &src); + + /** Always empty string returned when NVPairMap lookups fail. */ + static const string s_theEmptyString; + + /** Unsorted table of event types. */ + static EventTypeRecord s_typeTable[]; + + /** Unsorted table of factory methods to add to our registry. */ + static EventFactoryRecord s_factoryTable[]; + + /** Registry of event factory methods providing O(log(n)) lookup. */ + static EventFactoryRegistry s_factoryRegistry; + + /** The type of this event. */ + const Type m_type; + + /** + * Event attribute storage. + * + * \note Although stored by reference (since m_nvPairs can + * never be NULL), the NVPairMap referenced by this field + * is dynamically allocated and owned by this event object. + * m_nvPairs must be deleted at event desctruction. + */ + NVPairMap &m_nvPairs; + + /** + * The unaltered event string, as received from devd, used to + * create this event object. + */ + string m_eventString; + +private: + /** + * Ingest event data from the supplied string. + * + * \param eventString The string of devd event data to parse. + */ + static void ParseEventString(Type type, const string &eventString, + NVPairMap &nvpairs); +}; + +inline DevCtlEvent::Type +DevCtlEvent::GetType() const +{ + return (m_type); +} + +inline const string & +DevCtlEvent::GetEventString() const +{ + return (m_eventString); +} + +/*------------------------------ DevCtlEventList -----------------------------*/ +/** + * DevCtlEventList is a specialization of the standard list STL container. + */ +typedef std::list DevCtlEventList; + +/*-------------------------------- DevfsEvent --------------------------------*/ +class DevfsEvent : public DevCtlEvent +{ +public: + /** Specialized DevCtlEvent object factory for Devfs events. */ + static EventFactory DevfsEventFactory; + + virtual DevCtlEvent *DeepCopy() const; + + /** + * Interpret and perform any actions necessary to + * consume the event. + */ + virtual void Process() const; + +protected: + /** + * Determine if the given device name references a potential + * ZFS leaf vdev. + * + * \param devName The device name to test. + */ + static bool IsDiskDev(const string &devName); + + /** + * Given the device name of a disk, determine if the device + * represents the whole device, not just a partition. + * + * \param devName Device name of disk device to test. + * + * \return True if the device name represents the whole device. + * Otherwise false. + */ + static bool IsWholeDev(const string &devName); + + /** + * \brief Read and return label information for a device. + * + * \param devFd The device from which to read ZFS label information. + * \param inUse The device is part of an active or potentially + * active configuration. + * \param degraded The device label indicates the vdev is not healthy. + * + * \return If label information is available, an nvlist describing + * the vdev configuraiton found on the device specified by + * devFd. Otherwise NULL. + */ + static nvlist_t *ReadLabel(int devFd, bool &inUse, bool °raded); + + /** + * Attempt to match the ZFS labeled device at devPath with an active + * CaseFile for a missing vdev. If a CaseFile is found, attempt + * to re-integrate the device with its pool. + * + * \param devPath The devfs path to the potential leaf vdev. + * \param physPath The physical path string reported by the device + * at devPath. + * \param devConfig The ZFS label information found on the device + * at devPath. + * + * \return true if the event that caused the online action can + * be considered consumed. + */ + static bool OnlineByLabel(const string &devPath, + const string& physPath, + nvlist_t *devConfig); + + /** DeepCopy Constructor. */ + DevfsEvent(const DevfsEvent &src); + + /** Constructor */ + DevfsEvent(Type, NVPairMap &, const string &); +}; + +/*--------------------------------- ZfsEvent ---------------------------------*/ +class ZfsEvent : public DevCtlEvent +{ +public: + /** Specialized DevCtlEvent object factory for ZFS events. */ + static EventFactory ZfsEventFactory; + + virtual DevCtlEvent *DeepCopy() const; + + /** + * Interpret and perform any actions necessary to + * consume the event. + */ + virtual void Process() const; + + const string &PoolName() const; + uint64_t PoolGUID() const; + uint64_t VdevGUID() const; + +protected: + /** Constructor */ + ZfsEvent(Type, NVPairMap &, const string &); + + /** Deep copy constructor. */ + ZfsEvent(const ZfsEvent &src); + + void ProcessPoolEvent() const; + + uint64_t m_poolGUID; + uint64_t m_vdevGUID; +}; + +//- ZfsEvent Inline Public Methods -------------------------------------------- +inline const string& +ZfsEvent::PoolName() const +{ + /* The pool name is reported as the subsystem of ZFS events. */ + return (Value("subsystem")); +} + +inline uint64_t +ZfsEvent::PoolGUID() const +{ + return (m_poolGUID); +} + +inline uint64_t +ZfsEvent::VdevGUID() const +{ + return (m_vdevGUID); +} + +#endif /*_DEV_CTL_EVENT_H_ */ Index: cddl/sbin/zfsd/zfsd_exception.cc =================================================================== --- cddl/sbin/zfsd/zfsd_exception.cc (.../head) (revision 0) +++ cddl/sbin/zfsd/zfsd_exception.cc (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,157 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zfsd_exception + * + * Implementation of the ZfsdException class. + */ +#include +#include + +#include +#include + +#include "vdev.h" +#include "zfsd_exception.h" + +/*============================ Namespace Control =============================*/ +using std::endl; +using std::string; +using std::stringstream; + +/*=========================== Class Implementations ==========================*/ +/*------------------------------- ZfsdException ------------------------------*/ +void +ZfsdException::FormatLog(const char *fmt, va_list ap) +{ + char buf[256]; + + vsnprintf(buf, sizeof(buf), fmt, ap); + m_log = buf; +} + +ZfsdException::ZfsdException(const char *fmt, ...) + : m_poolConfig(NULL), + m_vdevConfig(NULL) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +ZfsdException::ZfsdException(zpool_handle_t *pool, const char *fmt, ...) + : m_poolConfig(zpool_get_config(pool, NULL)), + m_vdevConfig(NULL) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +ZfsdException::ZfsdException(nvlist_t *poolConfig, const char *fmt, ...) + : m_poolConfig(poolConfig), + m_vdevConfig(NULL) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +inline +ZfsdException::ZfsdException(zpool_handle_t *pool, nvlist_t *vdevConfig, + const char *fmt, ...) + : m_poolConfig(zpool_get_config(pool, NULL)), + m_vdevConfig(vdevConfig) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +inline +ZfsdException::ZfsdException(nvlist_t *poolConfig, nvlist_t *vdevConfig, + const char *fmt, ...) + : m_poolConfig(poolConfig), + m_vdevConfig(vdevConfig) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +void +ZfsdException::Log() const +{ + stringstream output; + + if (m_poolConfig != NULL) { + + output << "Pool "; + + char *poolName; + if (nvlist_lookup_string(m_poolConfig, ZPOOL_CONFIG_POOL_NAME, + &poolName) == 0) + output << poolName; + else + output << "Unkown"; + output << ": "; + } + + if (m_vdevConfig != NULL) { + + if (m_poolConfig != NULL) { + Vdev vdev(m_poolConfig, m_vdevConfig); + + output << "Vdev " << vdev.GUID() << ": "; + } else { + Vdev vdev(m_vdevConfig); + + output << "Pool " << vdev.PoolGUID() << ": "; + output << "Vdev " << vdev.GUID() << ": "; + } + } + + output << m_log << endl; + syslog(LOG_ERR, "%s", output.str().c_str()); +} + Index: cddl/sbin/zfsd/zfsd_exception.h =================================================================== --- cddl/sbin/zfsd/zfsd_exception.h (.../head) (revision 0) +++ cddl/sbin/zfsd/zfsd_exception.h (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,158 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zfsd_exception.h + * + * Definition of the ZfsdException class hierarchy. All exceptions + * explicitly thrown by Zfsd are defined here. + */ +#ifndef _ZFSD_EXCEPTION_H_ +#define _ZFSD_EXCEPTION_H_ + +#include + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +/*============================= Class Definitions ============================*/ +/*------------------------------- ZfsdException ------------------------------*/ +/** + * \brief Class allowing unified reporting/logging of exceptional events. + */ +class ZfsdException +{ +public: + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported. + * + * \param fmt Printf-like string format specifier. + */ + ZfsdException(const char *fmt, ...); + + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported and associated with the configuration + * data for a ZFS pool. + * + * \param pool Pool handle describing the pool to which this + * exception is associated. + * \param fmt Printf-like string format specifier. + * + * Instantiation with this method is used to report global + * pool errors. + */ + ZfsdException(zpool_handle_t *pool, const char *, ...); + + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported and associated with the configuration + * data for a ZFS pool. + * + * \param poolConfig Pool configuration describing the pool to + * which this exception is associated. + * \param fmt Printf-like string format specifier. + * + * Instantiation with this method is used to report global + * pool errors. + */ + ZfsdException(nvlist_t *poolConfig, const char *, ...); + + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported and associated with the configuration + * data for a single vdev and its parent pool. + * + * \param pool Pool handle describing the pool to which this + * exception is associated. + * \param vdevConfig A name/value list describing the vdev + * to which this exception is associated. + * \param fmt Printf-like string format specifier. + * + * Instantiation with this method is used to report errors + * associated with a vdev when both the vdev's config and + * its pool membership are available. + */ + ZfsdException(zpool_handle_t *pool, nvlist_t *vdevConfig, + const char *fmt, ...); + + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported and associated with the configuration + * data for a single vdev and its parent pool. + * + * \param poolConfig Pool configuration describing the pool to + * which this exception is associated. + * \param vdevConfig A name/value list describing the vdev + * to which this exception is associated. + * \param fmt Printf-like string format specifier. + * + * Instantiation with this method is used to report errors + * associated with a vdev when both the vdev's config and + * its pool membership are available. + */ + ZfsdException(nvlist_t *poolConfig, nvlist_t *vdevConfig, + const char *fmt, ...); + + /** + * \brief Augment/Modify a ZfsdException's string data. + */ + std::string& GetString(); + + /** + * \brief Emit exception data to syslog(3). + */ + void Log() const; +private: + /** + * \brief Convert exception string data and arguments provided + * in ZfsdException constructors into a linear string. + */ + void FormatLog(const char *fmt, va_list ap); + + nvlist_t *m_poolConfig; + nvlist_t *m_vdevConfig; + std::string m_log; +}; + +inline std::string & +ZfsdException::GetString() +{ + return (m_log); +} + +#endif /* _ZFSD_EXCEPTION_H_ */ Index: cddl/sbin/zfsd/Makefile =================================================================== --- cddl/sbin/zfsd/Makefile (.../head) (revision 0) +++ cddl/sbin/zfsd/Makefile (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,42 @@ +# $FreeBSD$ + +PROG_CXX= zfsd +SRCS= callout.cc \ + case_file.cc \ + dev_ctl_event.cc \ + vdev.cc \ + vdev_iterator.cc \ + zfsd.cc \ + zfsd_exception.cc \ + zpool_list.cc + +NO_MAN= YES + +WARNS?= 0 + +INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common +INCFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include +INCFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/lib/libumem +INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris +INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head +INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common +INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libumem/common +INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common +INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair +INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs +INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common +INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs +INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys + +CFLAGS= -g -DNEED_SOLARIS_BOOLEAN ${INCFLAGS} + +DPADD= ${LIBZFS} ${LIBUTIL} ${LIBGEOM} ${LIBBSDXML} ${LIBSBUF} \ + ${LIBNVPAIR} ${LIBUUTIL} +LDADD= -lzfs -lutil -lgeom -lbsdxml -lsbuf -lnvpair -luutil + +cscope: + find ${.CURDIR} -type f -a \( -name "*.[ch]" -o -name "*.cc" \) \ + > ${.CURDIR}/cscope.files + cd ${.CURDIR} && cscope -buq ${INCFLAGS} + +.include Index: cddl/sbin/zfsd/zpool_list.cc =================================================================== --- cddl/sbin/zfsd/zpool_list.cc (.../head) (revision 0) +++ cddl/sbin/zfsd/zpool_list.cc (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,98 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zpool_list.cc + * + * Implementation of the ZpoolList class. + */ +#include "zpool_list.h" +#include "zfsd.h" + +/*=========================== Class Implementations ==========================*/ +/*--------------------------------- ZpoolList --------------------------------*/ +bool +ZpoolList::ZpoolAll(zpool_handle_t *pool, nvlist_t *poolConfig, void *cbArg) +{ + return (true); +} + +bool +ZpoolList::ZpoolByGUID(zpool_handle_t *pool, nvlist_t *poolConfig, + void *cbArg) +{ + uint64_t *desiredPoolGUID(static_cast(cbArg)); + uint64_t poolGUID; + + /* We are only intested in the pool that matches our pool GUID. */ + return (nvlist_lookup_uint64(poolConfig, ZPOOL_CONFIG_POOL_GUID, + &poolGUID) == 0 + && poolGUID == *desiredPoolGUID); +} + +bool +ZpoolList::ZpoolByName(zpool_handle_t *pool, nvlist_t *poolConfig, void *cbArg) +{ + const string &desiredPoolName(*static_cast(cbArg)); + + /* We are only intested in the pool that matches our pool GUID. */ + return (desiredPoolName == zpool_get_name(pool)); +} + +int +ZpoolList::LoadIterator(zpool_handle_t *pool, void *data) +{ + ZpoolList *zpl(reinterpret_cast(data)); + nvlist_t *poolConfig(zpool_get_config(pool, NULL)); + + if (zpl->m_filter(pool, poolConfig, zpl->m_filterArg)) { + zpl->push_back(pool); + } else { + zpool_close(pool); + } + return (0); +} + +ZpoolList::ZpoolList(PoolFilter_t *filter, void * filterArg) + : m_filter(filter), + m_filterArg(filterArg) +{ + zpool_iter(g_zfsHandle, LoadIterator, this); +} + +ZpoolList::~ZpoolList() +{ + for (iterator it(begin()); it != end(); it++) + zpool_close(*it); + + clear(); +} Index: cddl/sbin/zfsd/zpool_list.h =================================================================== --- cddl/sbin/zfsd/zpool_list.h (.../head) (revision 0) +++ cddl/sbin/zfsd/zpool_list.h (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,126 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zpool_list.h + * + * ZpoolList class definition. ZpoolList is a standard container + * allowing filtering and iteration of imported ZFS pool information. + */ +#ifndef _ZPOOL_LIST_H_ +#define _ZPOOL_LIST_H_ + +#include +#include + +#include +#include + +/*============================ Namespace Control =============================*/ +using std::string; + +/*=========================== Forward Declarations ===========================*/ +class Vdev; + +/*============================= Class Definitions ============================*/ +/*--------------------------------- ZpoolList --------------------------------*/ +class ZpoolList; +typedef bool PoolFilter_t(zpool_handle_t *pool, nvlist_t *poolConfig, + void *filterArg); + +/** + * \brief Container of imported ZFS pool data. + * + * ZpoolList is a convenience class that converts libzfs's ZFS + * pool methods into a standard list container. + */ +class ZpoolList : public std::list +{ +public: + /** + * \brief Utility ZpoolList construction filter that causes all + * pools known to the system to be included in the + * intantiated ZpoolList. + */ + static PoolFilter_t ZpoolAll; + + /** + * \brief Utility ZpoolList construction filter that causes only + * a pool known to the system and having the specified GUID + * to be included in the intantiated ZpoolList. + */ + static PoolFilter_t ZpoolByGUID; + + /** + * \brief Utility ZpoolList construction filter that causes only + * pools known to the system and having the specified name + * to be included in the intantiated ZpoolList. + */ + static PoolFilter_t ZpoolByName; + + /** + * \brief ZpoolList contructor + * + * \param filter The filter function to use when constructing + * the ZpoolList. This may be one of the static + * utility filters defined for ZpoolList or a + * user defined function. + * \param filterArg A single argument to pass into the filter function + * when it is invoked on each candidate pool. + */ + ZpoolList(PoolFilter_t *filter = ZpoolAll, void *filterArg = NULL); + ~ZpoolList(); + +private: + /** + * \brief Helper routine used to populate the internal + * data store of ZFS pool objects using libzfs's + * zpool_iter() function. + * + * \param pool The ZFS pool object to filter. + * \param data User argument passed through zpool_iter(). + */ + static int LoadIterator(zpool_handle_t *pool, void *data); + + /** + * \brief The filter with which this ZpoolList was constructed. + */ + PoolFilter_t *m_filter; + + /** + * \brief The filter argument with which this ZpoolList was + * constructed. + */ + void *m_filterArg; +}; + +#endif /* _ZPOOL_ITERATOR_H_ */ Index: cddl/sbin/zfsd/callout.cc =================================================================== --- cddl/sbin/zfsd/callout.cc (.../head) (revision 0) +++ cddl/sbin/zfsd/callout.cc (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,162 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +#include +#include + +#include "callout.h" +#include "zfsd.h" +#include "zfsd_exception.h" + +std::list Callout::s_activeCallouts; +bool Callout::s_alarmFired(false); + +void +Callout::Init() +{ + signal(SIGALRM, Callout::AlarmSignalHandler); +} + +inline bool +Callout::Stop() +{ + if (!IsPending()) + return (false); + + for (std::list::iterator it(s_activeCallouts.begin()); + it != s_activeCallouts.end(); it++) { + if (*it != this) + continue; + + it = s_activeCallouts.erase(it); + if (it != s_activeCallouts.end()) { + + /* + * Maintain correct interval for the + * callouts that follow the just removed + * entry. + */ + timeradd(&(*it)->m_interval, &m_interval, + &(*it)->m_interval); + } + break; + } + m_pending = false; + return (true); +} + +bool +Callout::Reset(const timeval &interval, CalloutFunc_t *func, void *arg) +{ + bool cancelled(false); + + if (!timerisset(&interval)) + throw ZfsdException("Callout::Reset: interval of 0"); + + cancelled = Stop(); + + m_interval = interval; + m_func = func; + m_arg = arg; + m_pending = true; + + std::list::iterator it(s_activeCallouts.begin()); + for (; it != s_activeCallouts.end(); it++) { + + if (timercmp(&(*it)->m_interval, &m_interval, <=)) { + /* + * Decrease our interval by those that come + * before us. + */ + timersub(&m_interval, &(*it)->m_interval, &m_interval); + } else { + /* + * Account for the time between the newly + * inserted event and those that follow. + */ + timersub(&(*it)->m_interval, &m_interval, + &(*it)->m_interval); + break; + } + } + s_activeCallouts.insert(it, this); + + + if (s_activeCallouts.front() == this) { + itimerval timerval = { {0, 0}, m_interval }; + + setitimer(ITIMER_REAL, &timerval, NULL); + } + + return (cancelled); +} + +void +Callout::AlarmSignalHandler(int) +{ + s_alarmFired = true; + ZfsDaemon::WakeEventLoop(); +} + +void +Callout::ExpireCallouts() +{ + if (!s_alarmFired) + return; + + s_alarmFired = false; + if (s_activeCallouts.empty()) { + /* Callout removal/SIGALRM race was lost. */ + return; + } + + /* + * Expire the first callout (the one we used to set the + * interval timer) as well as any callouts following that + * expire at the same time (have a zero interval from + * the callout before it). + */ + do { + Callout *cur(s_activeCallouts.front()); + s_activeCallouts.pop_front(); + cur->m_pending = false; + cur->m_func(cur->m_arg); + } while (!s_activeCallouts.empty() + && timerisset(&s_activeCallouts.front()->m_interval) == 0); + + if (!s_activeCallouts.empty()) { + Callout *next(s_activeCallouts.front()); + itimerval timerval = { { 0, 0 }, next->m_interval }; + + setitimer(ITIMER_REAL, &timerval, NULL); + } +} Index: cddl/sbin/zfsd/vdev.cc =================================================================== --- cddl/sbin/zfsd/vdev.cc (.../head) (revision 0) +++ cddl/sbin/zfsd/vdev.cc (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,154 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev.cc + * + * Implementation of the Vdev class. + */ +#include + +#include + +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" + +__FBSDID("$FreeBSD$"); +/*============================ Namespace Control =============================*/ +using std::stringstream; + +/*=========================== Class Implementations ==========================*/ +/*----------------------------------- Vdev -----------------------------------*/ +Vdev::Vdev(zpool_handle_t *pool, nvlist_t *config) + : m_poolConfig(zpool_get_config(pool, NULL)), + m_config(config) +{ + if (nvlist_lookup_uint64(m_poolConfig, ZPOOL_CONFIG_POOL_GUID, + &m_poolGUID) != 0) + throw ZfsdException("Unable to extract pool GUID " + "from pool handle."); + + if (nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_GUID, &m_vdevGUID) != 0) + throw ZfsdException("Unable to extract vdev GUID " + "from vdev config data."); +} + +Vdev::Vdev(nvlist_t *poolConfig, nvlist_t *config) + : m_poolConfig(poolConfig), + m_config(config) +{ + if (nvlist_lookup_uint64(m_poolConfig, ZPOOL_CONFIG_POOL_GUID, + &m_poolGUID) != 0) + throw ZfsdException("Unable to extract pool GUID " + "from pool handle."); + + if (nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_GUID, &m_vdevGUID) != 0) + throw ZfsdException("Unable to extract vdev GUID " + "from vdev config data."); +} + +Vdev::Vdev(nvlist_t *labelConfig) + : m_poolConfig(labelConfig) +{ + if (nvlist_lookup_uint64(labelConfig, ZPOOL_CONFIG_POOL_GUID, + &m_poolGUID) != 0) + throw ZfsdException("Unable to extract pool GUID " + "from vdev label data."); + + if (nvlist_lookup_uint64(labelConfig, ZPOOL_CONFIG_GUID, + &m_vdevGUID) != 0) + throw ZfsdException("Unable to extract vdev GUID " + "from vdev label data."); + m_config = VdevIterator(labelConfig).Find(m_vdevGUID); + if (m_config == NULL) + throw ZfsdException("Unable to find vdev config " + "within vdev label data."); +} + +vdev_state +Vdev::State() const +{ + vdev_stat_t *vs; + uint_t vsc; + + if (nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0) + return (static_cast(vs->vs_state)); + + /* + * Stats are not available. This vdev was created from a label. + * Synthesize a state based on available data. + */ + uint64_t faulted(0); + uint64_t degraded(0); + (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_FAULTED, &faulted); + (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_DEGRADED, °raded); + if (faulted) + return (VDEV_STATE_FAULTED); + if (degraded) + return (VDEV_STATE_DEGRADED); + return (VDEV_STATE_HEALTHY); +} + +string +Vdev::GUIDString() const +{ + stringstream vdevGUIDString; + + vdevGUIDString << GUID(); + return (vdevGUIDString.str()); +} + +string +Vdev::Path() const +{ + char *path(NULL); + + if (nvlist_lookup_string(m_config, ZPOOL_CONFIG_PATH, &path) == 0) + return (path); + + return (""); +} + +string +Vdev::PhysicalPath() const +{ + char *path(NULL); + + if (nvlist_lookup_string(m_config, ZPOOL_CONFIG_PHYS_PATH, &path) == 0) + return (path); + + return (""); +} Index: cddl/sbin/zfsd/callout.h =================================================================== --- cddl/sbin/zfsd/callout.h (.../head) (revision 0) +++ cddl/sbin/zfsd/callout.h (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,170 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file callout.h + * + * \brief Interface for timer based callback services. + */ + +#ifndef _CALLOUT_H_ +#define _CALLOUT_H_ + +#include + +#include + +/** + * \brief Type of the function callback from a Callout. + */ +typedef void CalloutFunc_t(void *); + +/** + * \brief Interface to a schedulable one-shot timer with the granlarity + * of the system clock (see setitimer(2)). + * + * Determination of callback expiration is triggered by the SIGALRM + * signal. Callout callbacks are always delivered from Zfsd's event + * processing loop. + * + * Periodic actions can be triggered via the Callout mechanisms by + * resetting the Callout from within its callback. + */ +class Callout +{ +public: + + /** + * Initialize the Callout subsystem. + */ + static void Init(); + + /** + * Function called (via SIGALRM) when our interval + * timer expires. + */ + static void AlarmSignalHandler(int); + + /** + * Execute callbacks for all callouts that have the same + * expiration time as the first callout in the list. + */ + static void ExpireCallouts(); + + /** Constructor. */ + Callout(); + + /** + * returns true if callout has not been stopped, + * or deactivated since the last time the callout was + * reset. + */ + bool IsActive() const; + + /** + * Returns true if callout is still waiting to expire. + */ + bool IsPending() const; + + /** + * Disestablish a callout. + */ + bool Stop(); + + /** + * \brief Establish or change a timeout. + * + * \param interval Timeval indicating the time which must elapse + * before this callout fires. + * \param func Pointer to the callback funtion + * \param arg Argument pointer to pass to callback function + * + * \return Cancelation status. + * true: The previous callback was pending and therfore + * was cancelled. + * false: The callout was not pending at the time of this + * reset request. + * In all cases, a new callout is established. + */ + bool Reset(const timeval &interval, CalloutFunc_t *func, void *arg); + +private: + /** + * All active callouts sorted by expiration time. The callout + * with the nearest expiration time is at the head of the list. + */ + static std::list s_activeCallouts; + + /** + * The interval timer has expired. This variable is set from + * signal handler context and tested from Zfsd::EventLoop() + * context via ExpireCallouts(). + */ + static bool s_alarmFired; + + /** + * Time, realtive to others in the active list, until + * this callout is fired. + */ + timeval m_interval; + + /** Callback function argument. */ + void *m_arg; + + /** + * The callback function associated with this timer + * entry. + */ + CalloutFunc_t *m_func; + + /** State of this callout. */ + bool m_pending; +}; + +//- Callout public const methods ---------------------------------------------- +inline bool +Callout::IsPending() const +{ + return (m_pending); +} + +//- Callout public methods ---------------------------------------------------- +inline +Callout::Callout() + : m_arg(0), + m_func(NULL), + m_pending(false) +{ + timerclear(&m_interval); +} + +#endif /* CALLOUT_H_ */ Index: cddl/sbin/zfsd/vdev.h =================================================================== --- cddl/sbin/zfsd/vdev.h (.../head) (revision 0) +++ cddl/sbin/zfsd/vdev.h (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,136 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev.h + * + * Definition of the Vdev class. + */ +#ifndef _VDEV_H_ +#define _VDEV_H_ + +#include + +#include +#include + +/** + * \brief Wrapper class for a vdev's name/value configuration list + * simplifying access to commonly used vdev attributes. + */ +class Vdev +{ +public: + /** + * \brief Instantiate a vdev object for a vdev that is a member + * of an imported pool. + * + * \param pool The pool object containing the vdev with + * configuration data provided in vdevConfig. + * \param vdevConfig Vdev configuration data. + * + * This method should be used whenever dealing with vdev's + * enumerated via the ZpoolList class. The in-core configuration + * data for a vdev does not contain all of the items found in + * the on-disk label. This requires the vdev class to augment + * the data in vdevConfig with data found in the pool object. + */ + Vdev(zpool_handle_t *pool, nvlist_t *vdevConfig); + + /** + * \brief Instantiate a vdev object for a vdev that is a member + * of a pool configuration. + * + * \param poolConfig The pool configuration containing the vdev + * configuration data provided in vdevConfig. + * \param vdevConfig Vdev configuration data. + * + * This method should be used whenever dealing with vdev's + * enumerated via the ZpoolList class. The in-core configuration + * data for a vdev does not contain all of the items found in + * the on-disk label. This requires the vdev class to augment + * the data in vdevConfig with data found in the pool object. + */ + Vdev(nvlist_t *poolConfig, nvlist_t *vdevConfig); + + /** + * \brief Instantiate a vdev object from a ZFS label stored on + * the device. + * + * \param vdevConfig The name/value list retrieved by reading + * the label information on a leaf vdev. + */ + Vdev(nvlist_t *vdevConfig); + + uint64_t GUID() const; + uint64_t PoolGUID() const; + vdev_state State() const; + std::string Path() const; + std::string PhysicalPath() const; + std::string GUIDString() const; + nvlist_t *PoolConfig() const; + nvlist_t *Config() const; + +private: + uint64_t m_poolGUID; + uint64_t m_vdevGUID; + nvlist_t *m_poolConfig; + nvlist_t *m_config; +}; + +inline uint64_t +Vdev::PoolGUID() const +{ + return (m_poolGUID); +} + +inline uint64_t +Vdev::GUID() const +{ + return (m_vdevGUID); +} + +inline nvlist_t * +Vdev::PoolConfig() const +{ + return (m_poolConfig); +} + +inline nvlist_t * +Vdev::Config() const +{ + return (m_config); +} + +#endif /* _VDEV_H_ */ Index: cddl/sbin/zfsd/zfsd.cc =================================================================== --- cddl/sbin/zfsd/zfsd.cc (.../head) (revision 0) +++ cddl/sbin/zfsd/zfsd.cc (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,659 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zfsd.cc + * + * The ZFS daemon consumes kernel devctl(4) event data via devd(8)'s + * unix domain socket in order to react to system changes that impact + * the function of ZFS storage pools. The goal of this daemon is to + * provide similar functionality to the Solaris ZFS Diagnostic Engine + * (zfs-diagnosis), the Solaris ZFS fault handler (zfs-retire), and + * the Solaris ZFS vdev insertion agent (zfs-mod sysevent handler). + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "callout.h" +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); + +/*============================ Namespace Control =============================*/ +using std::string; +using std::stringstream; +using std::cerr; +using std::cout; +using std::endl; + +/*================================ Global Data ===============================*/ +const char g_devdSock[] = "/var/run/devd.pipe"; +int g_debug = 0; +libzfs_handle_t *g_zfsHandle; + +/*-------------------------------- EventBuffer -------------------------------*/ +//- EventBuffer Static Data ---------------------------------------------------- +/** + * NOTIFY, NOMATCH, ATTACH, DETACH. See DevCtlEvent::Type. + */ +const char EventBuffer::s_eventStartTokens[] = "!?+-"; + +/** + * Events are terminated by a newline. + */ +const char EventBuffer::s_eventEndTokens[] = "\n"; + +/** + * Key=Value pairs are terminated by whitespace. + */ +const char EventBuffer::s_keyPairSepTokens[] = " \t\n"; + +//- EventBuffer Public Methods ------------------------------------------------- +EventBuffer::EventBuffer(int fd) + : m_fd(fd), + m_validLen(0), + m_parsedLen(0), + m_nextEventOffset(0), + m_synchronized(true) +{ +} + +bool +EventBuffer::ExtractEvent(string &eventString) +{ + + while (UnParsed() > 0 || Fill()) { + + /* + * If the valid data in the buffer isn't enough to hold + * a full event, try reading more. + */ + if (NextEventMaxLen() < MIN_EVENT_SIZE) { + m_parsedLen += UnParsed(); + continue; + } + + char *nextEvent(m_buf + m_nextEventOffset); + bool truncated(true); + size_t eventLen(strcspn(nextEvent, s_eventEndTokens)); + + if (!m_synchronized) { + /* Discard data until an end token is read. */ + if (nextEvent[eventLen] != '\0') + m_synchronized = true; + m_nextEventOffset += eventLen; + m_parsedLen = m_nextEventOffset; + continue; + } else if (nextEvent[eventLen] == '\0') { + + m_parsedLen += eventLen; + if (m_parsedLen < MAX_EVENT_SIZE) { + /* + * Ran out of buffer before hitting + * a full event. Fill() and try again. + */ + continue; + } + syslog(LOG_WARNING, + "Event exceeds event size limit of %d bytes."); + } else { + /* + * Include the normal terminator in the extracted + * event data. + */ + eventLen += 1; + truncated = false; + } + + m_nextEventOffset += eventLen; + m_parsedLen = m_nextEventOffset; + eventString.assign(nextEvent, eventLen); + + if (truncated) { + size_t fieldEnd; + + /* Break cleanly at the end of a key<=>value pair. */ + fieldEnd = eventString.find_last_of(s_keyPairSepTokens); + if (fieldEnd != string::npos) + eventString.erase(fieldEnd); + eventString += '\n'; + + m_synchronized = false; + syslog(LOG_WARNING, + "Truncated %d characters from event.", + eventLen - fieldEnd); + } + return (true); + } + return (false); +} + +//- EventBuffer Private Methods ------------------------------------------------ +bool +EventBuffer::Fill() +{ + ssize_t result; + + /* Compact the buffer. */ + if (m_nextEventOffset != 0) { + memmove(m_buf, m_buf + m_nextEventOffset, + m_validLen - m_nextEventOffset); + m_validLen -= m_nextEventOffset; + m_parsedLen -= m_nextEventOffset; + m_nextEventOffset = 0; + } + + /* Fill any empty space. */ + result = read(m_fd, m_buf + m_validLen, MAX_READ_SIZE - m_validLen); + if (result == -1) { + if (errno == EINTR || errno == EAGAIN) { + return (false); + } else { + err(1, "Read from devd socket failed"); + } + } + m_validLen += result; + /* Guarantee our buffer is always NUL terminated. */ + m_buf[m_validLen] = '\0'; + + return (result > 0); +} + +/*--------------------------------- ZfsDaemon --------------------------------*/ +//- ZfsDaemon Static Private Data ---------------------------------------------- +bool ZfsDaemon::s_logCaseFiles; +bool ZfsDaemon::s_terminateEventLoop; +char ZfsDaemon::s_pidFilePath[] = "/var/run/zfsd.pid"; +pidfh *ZfsDaemon::s_pidFH; +int ZfsDaemon::s_devdSockFD = -1; +int ZfsDaemon::s_signalPipeFD[2]; +bool ZfsDaemon::s_systemRescanRequested(false); +bool ZfsDaemon::s_consumingEvents(false); +DevCtlEventList ZfsDaemon::s_unconsumedEvents; + +//- ZfsDaemon Static Public Methods -------------------------------------------- +void +ZfsDaemon::WakeEventLoop() +{ + write(s_signalPipeFD[1], "+", 1); +} + +void +ZfsDaemon::RequestSystemRescan() +{ + s_systemRescanRequested = true; + ZfsDaemon::WakeEventLoop(); +} + +void +ZfsDaemon::Run() +{ + Init(); + + while (s_terminateEventLoop == false) { + + try { + DisconnectFromDevd(); + + if (ConnectToDevd() == false) { + sleep(30); + continue; + } + + DetectMissedEvents(); + + EventLoop(); + + } catch (const ZfsdException &exp) { + exp.Log(); + } + } + + DisconnectFromDevd(); + + Fini(); +} + +//- ZfsDaemon Static Private Methods ------------------------------------------- +void +ZfsDaemon::Init() +{ + if (pipe(s_signalPipeFD) != 0) + errx(1, "Unable to allocate signal pipe. Exiting"); + + if (fcntl(s_signalPipeFD[0], F_SETFL, O_NONBLOCK) == -1) + errx(1, "Unable to set pipe as non-blocking. Exiting"); + + if (fcntl(s_signalPipeFD[1], F_SETFL, O_NONBLOCK) == -1) + errx(1, "Unable to set pipe as non-blocking. Exiting"); + + signal(SIGHUP, ZfsDaemon::RescanSignalHandler); + signal(SIGINFO, ZfsDaemon::InfoSignalHandler); + signal(SIGINT, ZfsDaemon::QuitSignalHandler); + signal(SIGTERM, ZfsDaemon::QuitSignalHandler); + signal(SIGUSR1, ZfsDaemon::RescanSignalHandler); + + g_zfsHandle = libzfs_init(); + if (g_zfsHandle == NULL) + errx(1, "Unable to initialize ZFS library. Exiting"); + + Callout::Init(); + DevCtlEvent::Init(); + InitializeSyslog(); + OpenPIDFile(); + + if (g_debug == 0) + daemon(0, 0); + + UpdatePIDFile(); +} + +void +ZfsDaemon::Fini() +{ + ClosePIDFile(); +} + +void +ZfsDaemon::InfoSignalHandler(int) +{ + s_logCaseFiles = true; + ZfsDaemon::WakeEventLoop(); +} + +void +ZfsDaemon::RescanSignalHandler(int) +{ + RequestSystemRescan(); +} + +void +ZfsDaemon::QuitSignalHandler(int) +{ + s_terminateEventLoop = true; + ZfsDaemon::WakeEventLoop(); +} + +void +ZfsDaemon::OpenPIDFile() +{ + pid_t otherPID; + + s_pidFH = pidfile_open(s_pidFilePath, 0600, &otherPID); + if (s_pidFH == NULL) { + if (errno == EEXIST) + errx(1, "already running as PID %d. Exiting", otherPID); + warn("cannot open PID file"); + } +} + +void +ZfsDaemon::UpdatePIDFile() +{ + if (s_pidFH != NULL) + pidfile_write(s_pidFH); +} + +void +ZfsDaemon::ClosePIDFile() +{ + if (s_pidFH != NULL) + pidfile_close(s_pidFH); +} + +void +ZfsDaemon::InitializeSyslog() +{ + openlog("zfsd", LOG_NDELAY, LOG_DAEMON); +} + +bool +ZfsDaemon::ConnectToDevd() +{ + struct sockaddr_un devdAddr; + int sLen; + int result; + + syslog(LOG_INFO, "Connecting to devd"); + + memset(&devdAddr, 0, sizeof(devdAddr)); + devdAddr.sun_family= AF_UNIX; + strlcpy(devdAddr.sun_path, g_devdSock, sizeof(devdAddr.sun_path)); + sLen = SUN_LEN(&devdAddr); + + s_devdSockFD = socket(AF_UNIX, SOCK_STREAM, 0); + if (s_devdSockFD == -1) + err(1, "Unable to create socket"); + result = connect(s_devdSockFD, + reinterpret_cast(&devdAddr), + sLen); + if (result == -1) { + syslog(LOG_INFO, "Unable to connect to devd"); + return (false); + } + + /* Don't block on reads. */ + if (fcntl(s_devdSockFD, F_SETFL, O_NONBLOCK) == -1) + err(1, "Unable to enable nonblocking behavior on devd socket"); + + syslog(LOG_INFO, "Connection to devd successful"); + return (true); +} + +void +ZfsDaemon::DisconnectFromDevd() +{ + close(s_devdSockFD); +} + +void +ZfsDaemon::ReplayUnconsumedEvents() +{ + DevCtlEventList::iterator event(s_unconsumedEvents.begin()); + bool replayed_any = (event != s_unconsumedEvents.end()); + + s_consumingEvents = true; + if (replayed_any) + syslog(LOG_INFO, "Started replaying unconsumed events"); + while (event != s_unconsumedEvents.end()) { + (*event)->Process(); + delete *event; + s_unconsumedEvents.erase(event++); + } + if (replayed_any) + syslog(LOG_INFO, "Finished replaying unconsumed events"); + s_consumingEvents = false; +} + +bool +ZfsDaemon::SaveEvent(const DevCtlEvent &event) +{ + if (s_consumingEvents) + return false; + s_unconsumedEvents.push_back(event.DeepCopy()); + return true; +} + +/* Capture and process buffered events. */ +void +ZfsDaemon::ProcessEvents(EventBuffer &eventBuffer) +{ + string evString; + while (eventBuffer.ExtractEvent(evString)) { + DevCtlEvent *event(DevCtlEvent::CreateEvent(evString)); + if (event != NULL) { + event->Process(); + delete event; + } + } +} + +void +ZfsDaemon::FlushEvents() +{ + char discardBuf[256]; + + while (read(s_devdSockFD, discardBuf, sizeof(discardBuf)) > 0) + ; +} + +bool +ZfsDaemon::EventsPending() +{ + struct pollfd fds[1]; + int result; + + fds->fd = s_devdSockFD; + fds->events = POLLIN; + fds->revents = 0; + result = poll(fds, NUM_ELEMENTS(fds), /*timeout*/0); + + return ((fds->revents & POLLIN) != 0); +} + +void +ZfsDaemon::PurgeCaseFiles() +{ + CaseFile::PurgeAll(); +} + +bool +ZfsDaemon::VdevAddCaseFile(Vdev &vdev, void *cbArg) +{ + + if (vdev.State() != VDEV_STATE_HEALTHY) + CaseFile::Create(vdev); + + return (/*break early*/false); +} + +void +ZfsDaemon::BuildCaseFiles() +{ + /* Add CaseFiles for vdevs with issues. */ + ZpoolList zpl; + + for (ZpoolList::iterator pool = zpl.begin(); pool != zpl.end(); pool++) + VdevIterator(*pool).Each(VdevAddCaseFile, NULL); + + /* De-serialize any saved cases. */ + CaseFile::DeSerialize(); +} + +void +ZfsDaemon::RescanSystem() +{ + struct gmesh mesh; + struct gclass *mp; + struct ggeom *gp; + struct gprovider *pp; + int result; + + /* + * The devctl system doesn't replay events for new consumers + * of the interface. Emit manufactured DEVFS arrival events + * for any devices that already before we started or during + * periods where we've lost our connection to devd. + */ + result = geom_gettree(&mesh); + if (result != 0) { + syslog(LOG_ERR, "ZfsDaemon::RescanSystem: " + "geom_gettree faild with error %d\n", result); + return; + } + + const string evStart("!system=DEVFS subsystem=CDEV type=CREATE " + "sub_type=synthesized cdev="); + LIST_FOREACH(mp, &mesh.lg_class, lg_class) { + LIST_FOREACH(gp, &mp->lg_geom, lg_geom) { + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + DevCtlEvent *event; + + string evString(evStart + pp->lg_name + "\n"); + event = DevCtlEvent::CreateEvent(evString); + if (event != NULL) + event->Process(); + } + } + } + geom_deletetree(&mesh); +} + +void +ZfsDaemon::DetectMissedEvents() +{ + do { + PurgeCaseFiles(); + + /* + * Discard any events waiting for us. We don't know + * if they still apply to the current state of the + * system. + */ + FlushEvents(); + + BuildCaseFiles(); + + /* + * If the system state has changed durring our + * interrogation, start over. + */ + } while (EventsPending()); + + RescanSystem(); +} + +void +ZfsDaemon::EventLoop() +{ + EventBuffer eventBuffer(s_devdSockFD); + + while (s_terminateEventLoop == false) { + struct pollfd fds[2]; + int result; + + if (s_logCaseFiles == true) { + s_logCaseFiles = false; + CaseFile::LogAll(); + } + + Callout::ExpireCallouts(); + + /* Wait for data. */ + fds[0].fd = s_devdSockFD; + fds[0].events = POLLIN; + fds[0].revents = 0; + fds[1].fd = s_signalPipeFD[0]; + fds[1].events = POLLIN; + fds[1].revents = 0; + result = poll(fds, NUM_ELEMENTS(fds), /*timeout*/INFTIM); + if (result == -1) { + if (errno == EINTR) + continue; + else + err(1, "Polling for devd events failed"); + } else if (result == 0) { + errx(1, "Unexpected result of 0 from poll. Exiting"); + } + + if ((fds[0].revents & POLLIN) != 0) + ProcessEvents(eventBuffer); + + if ((fds[1].revents & POLLIN) != 0) { + static char discardBuf[128]; + + /* + * This pipe exists just to close the signal + * race. Its contents are of no interest to + * us, but we must ensure that future signals + * have space in the pipe to write. + */ + while (read(s_signalPipeFD[0], discardBuf, + sizeof(discardBuf)) > 0) + ; + } + + if (s_systemRescanRequested == true) { + s_systemRescanRequested = false; + RescanSystem(); + } + + if ((fds->revents & POLLERR) != 0) { + /* Try reconnecting. */ + syslog(LOG_INFO, "Error on socket. Disconnecting."); + break; + } + + if ((fds->revents & POLLHUP) != 0) { + /* Try reconnecting. */ + syslog(LOG_INFO, "Hup on socket. Disconnecting."); + break; + } + } +} + +/*=============================== Program Main ===============================*/ +static void +usage() +{ + fprintf(stderr, "usage: %s [-d]\n", getprogname()); + exit(1); +} + +/** + * Program entry point. + */ +int +main(int argc, char **argv) +{ + int ch; + + while ((ch = getopt(argc, argv, "d")) != -1) { + switch (ch) { + case 'd': + g_debug++; + break; + default: + usage(); + } + } + + ZfsDaemon::Run(); + + return (0); +} Index: cddl/sbin/zfsd/zfsd.h =================================================================== --- cddl/sbin/zfsd/zfsd.h (.../head) (revision 0) +++ cddl/sbin/zfsd/zfsd.h (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,388 @@ +/*- + * Copyright (c) 2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file zfsd.h + * + * Class definitions and supporting data strutures for the ZFS fault + * management daemon. + */ +#ifndef _ZFSD_H_ +#define _ZFSD_H_ + +#include +#include +#include +#include + +#include +#include + +#include "case_file.h" +#include "dev_ctl_event.h" +#include "vdev_iterator.h" + +/*============================ Namespace Control =============================*/ +using std::auto_ptr; +using std::map; +using std::pair; +using std::string; + +/*================================ Global Data ===============================*/ +extern const char g_devdSock[]; +extern int g_debug; +extern libzfs_handle_t *g_zfsHandle; + +/*=========================== Forward Declarations ===========================*/ +struct EventFactoryRecord; +struct pidfh; + +typedef int LeafIterFunc(zpool_handle_t *, nvlist_t *, void *); + +/*================================== Macros ==================================*/ +#define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) + +/*============================= Class Definitions ============================*/ +/*-------------------------------- EventBuffer -------------------------------*/ +/** + * \brief Class buffering event data from Devd and splitting it + * into individual event strings. + * + * Users of this class initialize it with the file descriptor associated + * with the unix domain socket connection with devd. The lifetime of + * an EventBuffer instance should match that of the file descriptor passed + * to it. This is required as data from partially received events is + * retained in the EventBuffer in order to allow reconstruction of these + * events across multiple reads of the Devd file descriptor. + * + * Once the program determines that the Devd file descriptor is ready + * for reading, the EventBuffer::ExtractEvent() should be called in a + * loop until the method returns false. + */ +class EventBuffer +{ +public: + /** + * Constructor + * + * \param fd The file descriptor on which to buffer/parse event data. + */ + EventBuffer(int fd); + + /** + * Pull a single event string out of the event buffer. + * + * \param eventString The extracted event data (if available). + * + * \return true if event data is available and eventString has + * been populated. Otherwise false. + */ + bool ExtractEvent(string &eventString); + +private: + enum { + /** + * Size of an empty event (start and end token with + * no data. The EventBuffer parsing needs at least + * this much data in the buffer for safe event extraction. + */ + MIN_EVENT_SIZE = 2, + + /* + * The maximum event size supported by ZFSD. + * Events larger than this size (minus 1) are + * truncated at the end of the last fully received + * key/value pair. + */ + MAX_EVENT_SIZE = 8192, + + /** + * The maximum amount of buffer data to read at + * a single time from the Devd file descriptor. + */ + MAX_READ_SIZE = MAX_EVENT_SIZE, + + /** + * The size of EventBuffer's buffer of Devd event data. + * This is one larger than the maximum supported event + * size, which alows us to always include a terminating + * NUL without overwriting any received data. + */ + EVENT_BUFSIZE = MAX_EVENT_SIZE + /*NUL*/1 + }; + + /** The amount of data in m_buf we have yet to look at. */ + size_t UnParsed() const; + + /** The amount of data in m_buf available for the next event. */ + size_t NextEventMaxLen() const; + + /** Fill the event buffer with event data from Devd. */ + bool Fill(); + + /** Characters we treat as beginning an event string. */ + static const char s_eventStartTokens[]; + + /** Characters we treat as ending an event string. */ + static const char s_eventEndTokens[]; + + /** Characters found between successive "key=value" strings. */ + static const char s_keyPairSepTokens[]; + + /** Temporary space for event data during our parsing. */ + char m_buf[EVENT_BUFSIZE]; + + /** Copy of the file descriptor linked to devd's domain socket. */ + int m_fd; + + /** Valid bytes in m_buf. */ + size_t m_validLen; + + /** The amount of data in m_buf we have looked at. */ + size_t m_parsedLen; + + /** Offset to the start token of the next event. */ + size_t m_nextEventOffset; + + /** The EventBuffer is aligned and tracking event records. */ + bool m_synchronized; +}; + +//- EventBuffer Inline Private Methods ----------------------------------------- +inline size_t +EventBuffer::UnParsed() const +{ + return (m_validLen - m_parsedLen); +} + +inline size_t +EventBuffer::NextEventMaxLen() const +{ + return (m_validLen - m_nextEventOffset); +} + +/*--------------------------------- ZfsDaemon --------------------------------*/ +/** + * Static singleton orchestrating the operations of the ZFS daemon program. + */ +class ZfsDaemon +{ +public: + /** + * Used by signal handlers to ensure, in a race free way, that + * the event loop will perform at least one more full loop + * before sleeping again. + */ + static void WakeEventLoop(); + + /** + * Schedules a rescan of devices in the system for potential + * candidates to replace a missing vdev. The scan is performed + * during the next run of the event loop. + */ + static void RequestSystemRescan(); + + /** + * Queue an event for replay after the next ZFS configuration + * sync event is received. This facility is used when an event + * is received for a pool or vdev that is not visible in the + * current ZFS configuration, but may "arrive" once the kernel + * commits the configuration change that emitted the event. + */ + static bool SaveEvent(const DevCtlEvent &event); + + /** + * Reprocess any events saved via the SaveEvent() facility. + */ + static void ReplayUnconsumedEvents(); + + /** Daemonize and perform all functions of the ZFS daemon. */ + static void Run(); + +private: + /** Initialize the daemon. */ + static void Init(); + + /** Perform any necessary cleanup at daemon shutdown. */ + static void Fini(); + + /** Process incoming devctl events from devd. */ + static void ProcessEvents(EventBuffer &eventBuffer); + + /** Discard all data pending in s_devdSockFD. */ + static void FlushEvents(); + + static VdevCallback_t VdevAddCaseFile; + + /** + * Test for data pending in s_devdSockFD + * + * \return True if data is pending. Otherwise false. + */ + static bool EventsPending(); + + /** Purge our cache of outstanding ZFS issues in the system. */ + static void PurgeCaseFiles(); + + /** Build a cache of outstanding ZFS issues in the system. */ + static void BuildCaseFiles(); + + /** + * Iterate over all known issues and attempt to solve them + * given resources currently available in the system. + */ + static void RescanSystem(); + + /** + * Interrogate the system looking for previously unknown + * faults that occurred either before ZFSD was started, + * or during a period of lost communication with Devd. + */ + static void DetectMissedEvents(); + + /** + * Wait for and process event source activity. + */ + static void EventLoop(); + + /** + * Signal handler for which our response is to + * log the current state of the daemon. + * + * \param sigNum The signal caught. + */ + static void InfoSignalHandler(int sigNum); + + /** + * Signal handler for which our response is to + * request a case rescan. + * + * \param sigNum The signal caught. + */ + static void RescanSignalHandler(int sigNum); + + /** + * Signal handler for which our response is to + * gracefully terminate. + * + * \param sigNum The signal caught. + */ + static void QuitSignalHandler(int sigNum); + + /** + * Open and lock our PID file. + */ + static void OpenPIDFile(); + + /** + * Update our PID file with our PID. + */ + static void UpdatePIDFile(); + + /** + * Close and release the lock on our PID file. + */ + static void ClosePIDFile(); + + /** + * Perform syslog configuraiton. + */ + static void InitializeSyslog(); + + /** + * Open a connection to devd's unix domain socket. + * + * \return True if the connection attempt is successsful. Otherwise + * false. + */ + static bool ConnectToDevd(); + + /** + * Close a connection (if any) to devd's unix domain socket. + */ + static void DisconnectFromDevd(); + + /** + * Set to true when our program is signaled to + * gracefully exit. + */ + static bool s_logCaseFiles; + + /** + * Set to true when our program is signaled to + * gracefully exit. + */ + static bool s_terminateEventLoop; + + /** + * The canonical path and file name of zfsd's PID file. + */ + static char s_pidFilePath[]; + + /** + * Control structure for PIDFILE(3) API. + */ + static pidfh *s_pidFH; + + /** + * File descriptor representing the unix domain socket + * connection with devd. + */ + static int s_devdSockFD; + + /** + * Pipe file descriptors used to close races with our + * signal handlers. + */ + static int s_signalPipeFD[2]; + + /** Queued events for replay. */ + static DevCtlEventList s_unconsumedEvents; + + /** + * Flag controlling a rescan from ZFSD's event loop of all + * GEOM providers in the system to find candidates for solving + * cases. + */ + static bool s_systemRescanRequested; + + /** + * Flag controlling whether events can be queued. This boolean + * is set during event replay to ensure that events for pools or + * devices no longer in the system are not retained forever. + */ + static bool s_consumingEvents; +}; + +#endif /* _ZFSD_H_ */ Index: cddl/sbin/Makefile =================================================================== --- cddl/sbin/Makefile (.../head) (revision 243619) +++ cddl/sbin/Makefile (.../projects/zfsd/head) (revision 243630) @@ -2,11 +2,14 @@ .include -SUBDIR= ${_zfs} ${_zpool} +SUBDIR= ${_zfs} ${_zpool} ${_zfsd} .if ${MK_ZFS} != "no" _zfs= zfs _zpool= zpool +. if ${MK_CXX} != "no" +_zfsd= zfsd +. endif .endif .include Index: etc/mtree/BSD.root.dist =================================================================== --- etc/mtree/BSD.root.dist (.../head) (revision 243619) +++ etc/mtree/BSD.root.dist (.../projects/zfsd/head) (revision 243630) @@ -67,6 +67,8 @@ ssl .. zfs + cases + .. .. .. lib Index: etc/rc.d/zfsd =================================================================== --- etc/rc.d/zfsd (.../head) (revision 0) +++ etc/rc.d/zfsd (.../projects/zfsd/head) (revision 243630) @@ -0,0 +1,17 @@ +#!/bin/sh +# +# $FreeBSD$ +# + +# PROVIDE: zfsd +# REQUIRE: devd zfs +# KEYWORD: nojail shutdown + +. /etc/rc.subr + +name="zfsd" +rcvar=`set_rcvar` +command="/sbin/${name}" + +load_rc_config $name +run_rc_command "$1" Index: etc/rc.d/Makefile =================================================================== --- etc/rc.d/Makefile (.../head) (revision 243619) +++ etc/rc.d/Makefile (.../projects/zfsd/head) (revision 243630) @@ -158,6 +158,7 @@ FILES= DAEMON \ ypupdated \ ypxfrd \ zfs \ + zfsd \ zvol .if ${MK_IPX} != "no" Index: etc/defaults/rc.conf =================================================================== --- etc/defaults/rc.conf (.../head) (revision 243619) +++ etc/defaults/rc.conf (.../projects/zfsd/head) (revision 243630) @@ -61,6 +61,10 @@ rc_conf_files="/etc/rc.conf /etc/rc.conf.local" # ZFS support zfs_enable="NO" # Set to YES to automatically mount ZFS file systems +# ZFSD support +zfsd_enable="NO" # Set to YES to automatically start the ZFS fault + # management daemon. + gptboot_enable="YES" # GPT boot success/failure reporting. # Experimental - test before enabling Index: sys/cddl/contrib/opensolaris/uts/common/sys/sysevent/dev.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/sys/sysevent/dev.h (.../head) (revision 243619) +++ sys/cddl/contrib/opensolaris/uts/common/sys/sysevent/dev.h (.../projects/zfsd/head) (revision 243630) @@ -236,6 +236,7 @@ extern "C" { #define EV_VERSION "version" #define DEV_PHYS_PATH "phys_path" +#define DEV_PATH "dev_path" #define DEV_NAME "dev_name" #define DEV_DRIVER_NAME "driver_name" #define DEV_INSTANCE "instance" Index: sys/cddl/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h (.../head) (revision 243619) +++ sys/cddl/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h (.../projects/zfsd/head) (revision 243630) @@ -46,6 +46,7 @@ extern "C" { #define FM_EREPORT_ZFS_IO_FAILURE "io_failure" #define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure" #define FM_EREPORT_ZFS_LOG_REPLAY "log_replay" +#define FM_EREPORT_ZFS_CONFIG_CACHE_WRITE "config_cache_write" #define FM_EREPORT_PAYLOAD_ZFS_POOL "pool" #define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode" Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c (.../head) (revision 243619) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c (.../projects/zfsd/head) (revision 243630) @@ -26,6 +26,7 @@ */ #include +#include #include #include #include @@ -139,7 +140,7 @@ out: kobj_close_file(file); } -static void +static int spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) { size_t buflen; @@ -147,13 +148,14 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t vnode_t *vp; int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; char *temp; + int err; /* * If the nvlist is empty (NULL), then remove the old cachefile. */ if (nvl == NULL) { - (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); - return; + err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); + return (err); } /* @@ -174,11 +176,12 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t */ (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path); - if (vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) == 0) { - if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, - 0, RLIM64_INFINITY, kcred, NULL) == 0 && - VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) { - (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE); + err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0); + if (err == 0) { + if ((err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, + 0, RLIM64_INFINITY, kcred, NULL)) == 0 && + (err = VOP_FSYNC(vp, FSYNC, kcred, NULL)) == 0) { + err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE); } (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); } @@ -187,6 +190,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t kmem_free(buf, buflen); kmem_free(temp, MAXPATHLEN); + return (err); } /* @@ -198,6 +202,8 @@ spa_config_sync(spa_t *target, boolean_t removing, { spa_config_dirent_t *dp, *tdp; nvlist_t *nvl; + boolean_t ccw_failure; + int error; ASSERT(MUTEX_HELD(&spa_namespace_lock)); @@ -209,6 +215,7 @@ spa_config_sync(spa_t *target, boolean_t removing, * cachefile is changed, the new one is pushed onto this list, allowing * us to update previous cachefiles that no longer contain this pool. */ + ccw_failure = B_FALSE; for (dp = list_head(&target->spa_config_list); dp != NULL; dp = list_next(&target->spa_config_list, dp)) { spa_t *spa = NULL; @@ -241,10 +248,35 @@ spa_config_sync(spa_t *target, boolean_t removing, mutex_exit(&spa->spa_props_lock); } - spa_config_write(dp, nvl); + error = spa_config_write(dp, nvl); + if (error != 0) { + + printf("ZFS ERROR: Update of cache file %s failed: " + "Errno %d\n", dp->scd_path, error); + ccw_failure = B_TRUE; + } + nvlist_free(nvl); } + if (ccw_failure) { + /* + * Keep trying so that configuration data is + * written if/when any temporary filesystem + * resource issues are resolved. + */ + target->spa_ccw_fail_time = ddi_get_lbolt64(); + spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE); + zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE, + target, NULL, NULL, 0, 0); + } else { + /* + * Do not rate limit future attempts to update + * the config cache. + */ + target->spa_ccw_fail_time = 0; + } + /* * Remove any config entries older than the current one. */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h (.../head) (revision 243619) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h (.../projects/zfsd/head) (revision 243630) @@ -230,6 +230,7 @@ struct spa { uint64_t spa_feat_for_write_obj; /* required to write to pool */ uint64_t spa_feat_for_read_obj; /* required to read from pool */ uint64_t spa_feat_desc_obj; /* Feature descriptions */ + int64_t spa_ccw_fail_time; /* Conf cache write fail time */ /* * spa_refcnt & spa_config_lock must be the last elements * because refcount_t changes size based on compilation options. Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c (.../head) (revision 243619) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c (.../projects/zfsd/head) (revision 243630) @@ -69,6 +69,10 @@ vdev_geom_orphan(struct g_consumer *cp) g_topology_assert(); vd = cp->private; + if (vd == NULL) { + /* Vdev close in progress. Ignore the event. */ + return; + } /* * Orphan callbacks occur from the GEOM event thread. @@ -84,13 +88,65 @@ vdev_geom_orphan(struct g_consumer *cp) * async removal support to invoke a close on this * vdev once it is safe to do so. */ - zfs_post_remove(vd->vdev_spa, vd); vd->vdev_remove_wanted = B_TRUE; spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); } +static void +vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) +{ + vdev_t *vd; + spa_t *spa; + char *physpath; + int error, physpath_len; + + g_topology_assert(); + + if (strcmp(attr, "GEOM::physpath") != 0) + return; + + if (g_access(cp, 1, 0, 0) != 0) + return; + + /* + * Record/Update physical path information for this device. + */ + vd = cp->private; + spa = vd->vdev_spa; + physpath_len = MAXPATHLEN; + physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); + error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); + g_access(cp, -1, 0, 0); + if (error == 0) { + char *old_physpath; + + old_physpath = vd->vdev_physpath; + vd->vdev_physpath = spa_strdup(physpath); + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); + + if (old_physpath != NULL) { + int held_lock; + + held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER); + if (held_lock == 0) { + g_topology_unlock(); + spa_config_enter(spa, SCL_STATE, FTAG, + RW_WRITER); + } + + spa_strfree(old_physpath); + + if (held_lock == 0) { + spa_config_exit(spa, SCL_STATE, FTAG); + g_topology_lock(); + } + } + } + g_free(physpath); +} + static struct g_consumer * -vdev_geom_attach(struct g_provider *pp) +vdev_geom_attach(struct g_provider *pp, vdev_t *vd) { struct g_geom *gp; struct g_consumer *cp; @@ -109,6 +165,7 @@ static struct g_consumer * if (gp == NULL) { gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); gp->orphan = vdev_geom_orphan; + gp->attrchanged = vdev_geom_attrchanged; cp = g_new_consumer(gp); if (g_attach(cp, pp) != 0) { g_wither_geom(gp, ENXIO); @@ -145,20 +202,32 @@ static struct g_consumer * ZFS_LOG(1, "Used existing consumer for %s.", pp->name); } } + + cp->private = vd; + + /* Fetch initial physical path information for this device. */ + vdev_geom_attrchanged(cp, "GEOM::physpath"); + return (cp); } static void -vdev_geom_detach(void *arg, int flag __unused) +vdev_geom_detach(void *arg) { struct g_geom *gp; struct g_consumer *cp; + vdev_t *vd; g_topology_assert(); cp = arg; gp = cp->geom; ZFS_LOG(1, "Closing access to %s.", cp->provider->name); + vd = cp->private; + if (vd != NULL) { + vd->vdev_tsd = NULL; + cp->private = NULL; + } g_access(cp, -1, 0, -1); /* Destroy consumer on last close. */ if (cp->acr == 0 && cp->ace == 0) { @@ -176,12 +245,16 @@ static void } static uint64_t -nvlist_get_guid(nvlist_t *list) +nvlist_get_guid(nvlist_t *list, uint64_t *pguid) { uint64_t value; value = 0; nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value); + if (pguid) { + *pguid = 0; + nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); + } return (value); } @@ -442,7 +515,7 @@ vdev_geom_read_pool_label(const char *name, } static uint64_t -vdev_geom_read_guid(struct g_consumer *cp) +vdev_geom_read_guid(struct g_consumer *cp, uint64_t *pguid) { nvlist_t *config; uint64_t guid; @@ -451,20 +524,22 @@ static uint64_t guid = 0; if (vdev_geom_read_config(cp, &config) == 0) { - guid = nvlist_get_guid(config); + guid = nvlist_get_guid(config, pguid); nvlist_free(config); - } + } else if (pguid) + *pguid = 0; return (guid); } static struct g_consumer * -vdev_geom_attach_by_guid(uint64_t guid) +vdev_geom_attach_by_guids(vdev_t *vd) { struct g_class *mp; struct g_geom *gp, *zgp; struct g_provider *pp; struct g_consumer *cp, *zcp; uint64_t pguid; + uint64_t vguid; g_topology_assert(); @@ -484,12 +559,13 @@ static struct g_consumer * if (vdev_geom_attach_taster(zcp, pp) != 0) continue; g_topology_unlock(); - pguid = vdev_geom_read_guid(zcp); + vguid = vdev_geom_read_guid(zcp, &pguid); g_topology_lock(); vdev_geom_detach_taster(zcp); - if (pguid != guid) + if (pguid != spa_guid(vd->vdev_spa) || + vguid != vd->vdev_guid) continue; - cp = vdev_geom_attach(pp); + cp = vdev_geom_attach(pp, vd); if (cp == NULL) { printf("ZFS WARNING: Unable to attach to %s.\n", pp->name); @@ -510,7 +586,7 @@ end: } static struct g_consumer * -vdev_geom_open_by_guid(vdev_t *vd) +vdev_geom_open_by_guids(vdev_t *vd) { struct g_consumer *cp; char *buf; @@ -518,8 +594,9 @@ static struct g_consumer * g_topology_assert(); - ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); - cp = vdev_geom_attach_by_guid(vd->vdev_guid); + ZFS_LOG(1, "Searching by guids [%ju:%ju].", + (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); + cp = vdev_geom_attach_by_guids(vd); if (cp != NULL) { len = strlen(cp->provider->name) + strlen("/dev/") + 1; buf = kmem_alloc(len, KM_SLEEP); @@ -528,10 +605,12 @@ static struct g_consumer * spa_strfree(vd->vdev_path); vd->vdev_path = buf; - ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.", + ZFS_LOG(1, "Attach by guids [%ju:%ju] succeeded, provider %s.", + (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid, vd->vdev_path); } else { - ZFS_LOG(1, "Search by guid [%ju] failed.", + ZFS_LOG(1, "Search by guids [%ju:%ju] failed.", + (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); } @@ -543,7 +622,8 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid) { struct g_provider *pp; struct g_consumer *cp; - uint64_t guid; + uint64_t pguid; + uint64_t vguid; g_topology_assert(); @@ -551,20 +631,23 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid) pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); if (pp != NULL) { ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); - cp = vdev_geom_attach(pp); + cp = vdev_geom_attach(pp, vd); if (cp != NULL && check_guid && ISP2(pp->sectorsize) && pp->sectorsize <= VDEV_PAD_SIZE) { g_topology_unlock(); - guid = vdev_geom_read_guid(cp); + vguid = vdev_geom_read_guid(cp, &pguid); g_topology_lock(); - if (guid != vd->vdev_guid) { - vdev_geom_detach(cp, 0); + if (pguid != spa_guid(vd->vdev_spa) || + vguid != vd->vdev_guid) { + vdev_geom_detach(cp); cp = NULL; ZFS_LOG(1, "guid mismatch for provider %s: " - "%ju != %ju.", vd->vdev_path, - (uintmax_t)vd->vdev_guid, (uintmax_t)guid); + "%ju:%ju != %ju:%ju.", vd->vdev_path, + (uintmax_t)spa_guid(vd->vdev_spa), + (uintmax_t)vd->vdev_guid, + (uintmax_t)pguid, (uintmax_t)vguid); } else { - ZFS_LOG(1, "guid match for provider %s.", + ZFS_LOG(1, "guids match for provider %s.", vd->vdev_path); } } @@ -597,23 +680,37 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64 error = 0; /* - * If we're creating or splitting a pool, just find the GEOM provider - * by its name and ignore GUID mismatches. + * Try using the recorded path for this device, but only + * accept it if its label data contains the expected GUIDs. */ - if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || - vd->vdev_spa->spa_splitting_newspa == B_TRUE) + cp = vdev_geom_open_by_path(vd, 1); + if (cp == NULL) { + /* + * The device at vd->vdev_path doesn't have the + * expected GUIDs. The disks might have merely + * moved around so try all other GEOM providers + * to find one with the right GUIDs. + */ + cp = vdev_geom_open_by_guids(vd); + } + + if (cp == NULL && + ((vd->vdev_prevstate == VDEV_STATE_UNKNOWN && + vd->vdev_spa->spa_load_state == SPA_LOAD_NONE) || + vd->vdev_spa->spa_splitting_newspa == B_TRUE)) { + /* + * We are dealing with a vdev that hasn't been previosly + * opened (since boot), and we are not loading an + * existing pool configuration (e.g. this operations is + * an add of a vdev to new or * existing pool) or we are + * in the process of splitting a pool. Find the GEOM + * provider by its name, ignoring GUID mismatches. + * + * XXPOLICY: It would be safer to only allow a device + * that is unlabeled or labeled but missing + * GUID information to be opened in this fashion. + */ cp = vdev_geom_open_by_path(vd, 0); - else { - cp = vdev_geom_open_by_path(vd, 1); - if (cp == NULL) { - /* - * The device at vd->vdev_path doesn't have the - * expected guid. The disks might have merely - * moved around so try all other GEOM providers - * to find one with the right guid. - */ - cp = vdev_geom_open_by_guid(vd); - } } if (cp == NULL) { @@ -623,7 +720,7 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64 !ISP2(cp->provider->sectorsize)) { ZFS_LOG(1, "Provider %s has unsupported sectorsize.", vd->vdev_path); - vdev_geom_detach(cp, 0); + vdev_geom_detach(cp); error = EINVAL; cp = NULL; } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { @@ -640,20 +737,19 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64 if (error != 0) { printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n", vd->vdev_path, error); - vdev_geom_detach(cp, 0); + vdev_geom_detach(cp); cp = NULL; } } + g_topology_unlock(); PICKUP_GIANT(); if (cp == NULL) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } - - cp->private = vd; + pp = cp->provider; vd->vdev_tsd = cp; - pp = cp->provider; /* * Determine the actual size of the device. @@ -671,12 +767,6 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64 */ vd->vdev_nowritecache = B_FALSE; - if (vd->vdev_physpath != NULL) - spa_strfree(vd->vdev_physpath); - bufsize = sizeof("/dev/") + strlen(pp->name); - vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP); - snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name); - return (0); } @@ -688,9 +778,9 @@ vdev_geom_close(vdev_t *vd) cp = vd->vdev_tsd; if (cp == NULL) return; - vd->vdev_tsd = NULL; - vd->vdev_delayed_close = B_FALSE; - g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); + g_topology_lock(); + vdev_geom_detach(cp); + g_topology_unlock(); } static void Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (.../head) (revision 243619) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (.../projects/zfsd/head) (revision 243630) @@ -76,10 +76,20 @@ /* Check hostid on import? */ static int check_hostid = 1; +/* + * The interval at which failed configuration cache file writes + * should be retried. + */ +static int zfs_ccw_retry_interval = 300; + SYSCTL_DECL(_vfs_zfs); TUNABLE_INT("vfs.zfs.check_hostid", &check_hostid); SYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFLAG_RW, &check_hostid, 0, "Check hostid on import?"); +TUNABLE_INT("vfs.zfs.ccw_retry_interval", &zfs_ccw_retry_interval); +SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RW, + &zfs_ccw_retry_interval, 0, + "Configuration cache file write, retry after failure, interval (seconds)"); typedef enum zti_modes { zti_mode_fixed, /* value is # of threads (min 1) */ @@ -5694,6 +5704,8 @@ spa_async_remove(spa_t *spa, vdev_t *vd) vd->vdev_stat.vs_checksum_errors = 0; vdev_state_dirty(vd->vdev_top); + /* Tell userspace that the vdev is gone. */ + zfs_post_remove(spa, vd); } for (int c = 0; c < vd->vdev_children; c++) @@ -5717,7 +5729,6 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd) { sysevent_id_t eid; nvlist_t *attr; - char *physpath; if (!spa->spa_autoexpand) return; @@ -5727,20 +5738,16 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd) spa_async_autoexpand(spa, cvd); } - if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) + if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_path == NULL) return; - physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); - VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); + VERIFY(nvlist_add_string(attr, DEV_PATH, vd->vdev_path) == 0); (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, ESC_ZFS_VDEV_AUTOEXPAND, attr, &eid, DDI_SLEEP); nvlist_free(attr); - kmem_free(physpath, MAXPATHLEN); } static void @@ -5849,13 +5856,34 @@ spa_async_resume(spa_t *spa) mutex_exit(&spa->spa_async_lock); } +static int +spa_async_tasks_pending(spa_t *spa) +{ + u_int non_config_tasks; + u_int config_task; + boolean_t config_task_suspended; + + non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE; + config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE; + if (spa->spa_ccw_fail_time == 0) { + config_task_suspended = B_FALSE; + } else { + config_task_suspended = + (ddi_get_lbolt64() - spa->spa_ccw_fail_time) + < (zfs_ccw_retry_interval * hz); + } + + return (non_config_tasks || (config_task && !config_task_suspended)); +} + static void spa_async_dispatch(spa_t *spa) { mutex_enter(&spa->spa_async_lock); - if (spa->spa_async_tasks && !spa->spa_async_suspended && + if (spa_async_tasks_pending(spa) && + !spa->spa_async_suspended && spa->spa_async_thread == NULL && - rootdir != NULL && !vn_is_readonly(rootdir)) + rootdir != NULL) spa->spa_async_thread = thread_create(NULL, 0, spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); mutex_exit(&spa->spa_async_lock); Index: sys/fs/devfs/devfs_vnops.c =================================================================== --- sys/fs/devfs/devfs_vnops.c (.../head) (revision 243619) +++ sys/fs/devfs/devfs_vnops.c (.../projects/zfsd/head) (revision 243630) @@ -1265,8 +1265,53 @@ static int devfs_readlink(struct vop_readlink_args *ap) { struct devfs_dirent *de; + struct cdev_priv *cdp; de = ap->a_vp->v_data; + cdp = de->de_cdp; + + if (cdp != NULL && (cdp->cdp_c.si_flags & SI_ALIAS) != 0) { + struct devfs_mount *dmp; + struct prison *pr; + char *mp; + int mp_len; + int pr_path_len; + int err; + + /* + * For device aliases, construct an absolute symlink (to + * shorten its length and avoid the ugliness of a relative + * link) by prepending the fully qualified path to the root + * of this devfs. For a non-jailed process, the devfs root + * is our mount point. For a jailed process, we must remove + * any jail prefix in our mount point so that our response + * matches the user process's world view. + */ + dmp = VFSTODEVFS(ap->a_vp->v_mount); + mp = dmp->dm_mount->mnt_stat.f_mntonname; + mp_len = strlen(mp); + + pr = ap->a_cred->cr_prison; + pr_path_len = strlen(pr->pr_path); + + if (strncmp(pr->pr_path, mp, pr_path_len) == 0 + && mp[pr_path_len] == '/') { + mp += pr_path_len; + mp_len -= pr_path_len; + } + + err = uiomove(mp, mp_len, ap->a_uio); + if (err != 0) + return (err); + + /* + * Devfs cannot be the root file system, so its + * mount point must always be terminated by a '/'. + */ + err = uiomove("/", 1, ap->a_uio); + if (err != 0) + return (err); + } return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); }