Index: cddl/contrib/opensolaris/cmd/zfs/zfs_main.c =================================================================== --- cddl/contrib/opensolaris/cmd/zfs/zfs_main.c (revision 227654) +++ cddl/contrib/opensolaris/cmd/zfs/zfs_main.c (working copy) @@ -22,6 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. */ @@ -145,7 +146,7 @@ typedef enum { HELP_HOLD, HELP_HOLDS, HELP_RELEASE, - HELP_DIFF + HELP_DIFF, } zfs_help_t; typedef struct zfs_command { @@ -220,8 +221,9 @@ get_usage(zfs_help_t idx) "\tcreate [-ps] [-b blocksize] [-o property=value] ... " "-V \n")); case HELP_DESTROY: - return (gettext("\tdestroy [-rRf] \n" - "\tdestroy [-rRd] \n")); + return (gettext("\tdestroy [-fnpRrv] \n" + "\tdestroy [-dnpRrv] " + "@[%][,...]\n")); case HELP_GET: return (gettext("\tget [-rHp] [-d max] " "[-o \"all\" | field[,...]] [-s source[,...]]\n" @@ -260,7 +262,8 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] \n")); case HELP_SEND: - return (gettext("\tsend [-RDp] [-[iI] snapshot] \n")); + return (gettext("\tsend [-DnPpRrv] [-[iI] snapshot] " + "\n")); case HELP_SET: return (gettext("\tset " " ...\n")); @@ -439,6 +442,8 @@ usage(boolean_t requested) (void) fprintf(fp, "YES NO | none\n"); (void) fprintf(fp, "\t%-15s ", "groupquota@..."); (void) fprintf(fp, "YES NO | none\n"); + (void) fprintf(fp, "\t%-15s ", "written@"); + (void) fprintf(fp, " NO NO \n"); (void) fprintf(fp, gettext("\nSizes are specified in bytes " "with standard units such as K, M, G, etc.\n")); @@ -884,15 +889,23 @@ badusage: */ typedef struct destroy_cbdata { boolean_t cb_first; - int cb_force; - int cb_recurse; - int cb_error; - int cb_needforce; - int cb_doclones; - boolean_t cb_closezhp; + boolean_t cb_force; + boolean_t cb_recurse; + boolean_t cb_error; + boolean_t cb_doclones; zfs_handle_t *cb_target; - char *cb_snapname; boolean_t cb_defer_destroy; + boolean_t cb_verbose; + boolean_t cb_parsable; + boolean_t cb_dryrun; + nvlist_t *cb_nvl; + + /* first snap in contiguous run */ + zfs_handle_t *cb_firstsnap; + /* previous snap in contiguous run */ + zfs_handle_t *cb_prevsnap; + int64_t cb_snapused; + char *cb_snapspec; } destroy_cbdata_t; /* @@ -922,7 +935,7 @@ destroy_check_dependent(zfs_handle_t *zhp, void *d (void) fprintf(stderr, gettext("use '-r' to destroy " "the following datasets:\n")); cbp->cb_first = B_FALSE; - cbp->cb_error = 1; + cbp->cb_error = B_TRUE; } (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); @@ -943,7 +956,8 @@ destroy_check_dependent(zfs_handle_t *zhp, void *d (void) fprintf(stderr, gettext("use '-R' to destroy " "the following datasets:\n")); cbp->cb_first = B_FALSE; - cbp->cb_error = 1; + cbp->cb_error = B_TRUE; + cbp->cb_dryrun = B_TRUE; } (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); @@ -957,8 +971,21 @@ out: static int destroy_callback(zfs_handle_t *zhp, void *data) { - destroy_cbdata_t *cbp = data; + destroy_cbdata_t *cb = data; + const char *name = zfs_get_name(zhp); + if (cb->cb_verbose) { + if (cb->cb_parsable) { + (void) printf("destroy\t%s\n", name); + } else if (cb->cb_dryrun) { + (void) printf(gettext("would destroy %s\n"), + name); + } else { + (void) printf(gettext("will destroy %s\n"), + name); + } + } + /* * Ignore pools (which we've already flagged as an error before getting * here). @@ -969,13 +996,12 @@ destroy_callback(zfs_handle_t *zhp, void *data) return (0); } - /* - * Bail out on the first error. - */ - if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 || - zfs_destroy(zhp, cbp->cb_defer_destroy) != 0) { - zfs_close(zhp); - return (-1); + if (!cb->cb_dryrun) { + if (zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 || + zfs_destroy(zhp, cb->cb_defer_destroy) != 0) { + zfs_close(zhp); + return (-1); + } } zfs_close(zhp); @@ -983,66 +1009,179 @@ destroy_callback(zfs_handle_t *zhp, void *data) } static int -destroy_snap_clones(zfs_handle_t *zhp, void *arg) +destroy_print_cb(zfs_handle_t *zhp, void *arg) { - destroy_cbdata_t *cbp = arg; - char thissnap[MAXPATHLEN]; - zfs_handle_t *szhp; - boolean_t closezhp = cbp->cb_closezhp; - int rv; + destroy_cbdata_t *cb = arg; + const char *name = zfs_get_name(zhp); + int err = 0; - (void) snprintf(thissnap, sizeof (thissnap), - "%s@%s", zfs_get_name(zhp), cbp->cb_snapname); + if (nvlist_exists(cb->cb_nvl, name)) { + if (cb->cb_firstsnap == NULL) + cb->cb_firstsnap = zfs_handle_dup(zhp); + if (cb->cb_prevsnap != NULL) + zfs_close(cb->cb_prevsnap); + /* this snap continues the current range */ + cb->cb_prevsnap = zfs_handle_dup(zhp); + if (cb->cb_verbose) { + if (cb->cb_parsable) { + (void) printf("destroy\t%s\n", name); + } else if (cb->cb_dryrun) { + (void) printf(gettext("would destroy %s\n"), + name); + } else { + (void) printf(gettext("will destroy %s\n"), + name); + } + } + } else if (cb->cb_firstsnap != NULL) { + /* end of this range */ + uint64_t used = 0; + err = zfs_get_snapused_int(cb->cb_firstsnap, + cb->cb_prevsnap, &used); + cb->cb_snapused += used; + zfs_close(cb->cb_firstsnap); + cb->cb_firstsnap = NULL; + zfs_close(cb->cb_prevsnap); + cb->cb_prevsnap = NULL; + } + zfs_close(zhp); + return (err); +} - libzfs_print_on_error(g_zfs, B_FALSE); - szhp = zfs_open(g_zfs, thissnap, ZFS_TYPE_SNAPSHOT); - libzfs_print_on_error(g_zfs, B_TRUE); - if (szhp) { - /* - * Destroy any clones of this snapshot - */ - if (zfs_iter_dependents(szhp, B_FALSE, destroy_callback, - cbp) != 0) { - zfs_close(szhp); - if (closezhp) - zfs_close(zhp); - return (-1); +static int +destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb) +{ + int err; + assert(cb->cb_firstsnap == NULL); + assert(cb->cb_prevsnap == NULL); + err = zfs_iter_snapshots_sorted(fs_zhp, destroy_print_cb, cb); + if (cb->cb_firstsnap != NULL) { + uint64_t used = 0; + if (err == 0) { + err = zfs_get_snapused_int(cb->cb_firstsnap, + cb->cb_prevsnap, &used); } - zfs_close(szhp); + cb->cb_snapused += used; + zfs_close(cb->cb_firstsnap); + cb->cb_firstsnap = NULL; + zfs_close(cb->cb_prevsnap); + cb->cb_prevsnap = NULL; } + return (err); +} - cbp->cb_closezhp = B_TRUE; - rv = zfs_iter_filesystems(zhp, destroy_snap_clones, arg); - if (closezhp) - zfs_close(zhp); - return (rv); +static int +snapshot_to_nvl_cb(zfs_handle_t *zhp, void *arg) +{ + destroy_cbdata_t *cb = arg; + int err = 0; + + /* Check for clones. */ + if (!cb->cb_doclones) { + cb->cb_target = zhp; + cb->cb_first = B_TRUE; + err = zfs_iter_dependents(zhp, B_TRUE, + destroy_check_dependent, cb); + } + + if (err == 0) { + if (nvlist_add_boolean(cb->cb_nvl, zfs_get_name(zhp))) + nomem(); + } + zfs_close(zhp); + return (err); } static int +gather_snapshots(zfs_handle_t *zhp, void *arg) +{ + destroy_cbdata_t *cb = arg; + int err = 0; + + err = zfs_iter_snapspec(zhp, cb->cb_snapspec, snapshot_to_nvl_cb, cb); + if (err == ENOENT) + err = 0; + if (err != 0) + goto out; + + if (cb->cb_verbose) { + err = destroy_print_snapshots(zhp, cb); + if (err != 0) + goto out; + } + + if (cb->cb_recurse) + err = zfs_iter_filesystems(zhp, gather_snapshots, cb); + +out: + zfs_close(zhp); + return (err); +} + +static int +destroy_clones(destroy_cbdata_t *cb) +{ + nvpair_t *pair; + for (pair = nvlist_next_nvpair(cb->cb_nvl, NULL); + pair != NULL; + pair = nvlist_next_nvpair(cb->cb_nvl, pair)) { + zfs_handle_t *zhp = zfs_open(g_zfs, nvpair_name(pair), + ZFS_TYPE_SNAPSHOT); + if (zhp != NULL) { + boolean_t defer = cb->cb_defer_destroy; + int err; + + /* + * We can't defer destroy non-snapshots, so set it to + * false while destroying the clones. + */ + cb->cb_defer_destroy = B_FALSE; + err = zfs_iter_dependents(zhp, B_FALSE, + destroy_callback, cb); + cb->cb_defer_destroy = defer; + zfs_close(zhp); + if (err != 0) + return (err); + } + } + return (0); +} + +static int zfs_do_destroy(int argc, char **argv) { destroy_cbdata_t cb = { 0 }; int c; zfs_handle_t *zhp; - char *cp; + char *at; zfs_type_t type = ZFS_TYPE_DATASET; /* check options */ - while ((c = getopt(argc, argv, "dfrR")) != -1) { + while ((c = getopt(argc, argv, "vpndfrR")) != -1) { switch (c) { + case 'v': + cb.cb_verbose = B_TRUE; + break; + case 'p': + cb.cb_verbose = B_TRUE; + cb.cb_parsable = B_TRUE; + break; + case 'n': + cb.cb_dryrun = B_TRUE; + break; case 'd': cb.cb_defer_destroy = B_TRUE; type = ZFS_TYPE_SNAPSHOT; break; case 'f': - cb.cb_force = 1; + cb.cb_force = B_TRUE; break; case 'r': - cb.cb_recurse = 1; + cb.cb_recurse = B_TRUE; break; case 'R': - cb.cb_recurse = 1; - cb.cb_doclones = 1; + cb.cb_recurse = B_TRUE; + cb.cb_doclones = B_TRUE; break; case '?': default: @@ -1057,7 +1196,7 @@ zfs_do_destroy(int argc, char **argv) /* check number of arguments */ if (argc == 0) { - (void) fprintf(stderr, gettext("missing path argument\n")); + (void) fprintf(stderr, gettext("missing dataset argument\n")); usage(B_FALSE); } if (argc > 1) { @@ -1065,91 +1204,117 @@ zfs_do_destroy(int argc, char **argv) usage(B_FALSE); } - /* - * If we are doing recursive destroy of a snapshot, then the - * named snapshot may not exist. Go straight to libzfs. - */ - if (cb.cb_recurse && (cp = strchr(argv[0], '@'))) { - int ret; + at = strchr(argv[0], '@'); + if (at != NULL) { + int err; - *cp = '\0'; - if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL) + /* Build the list of snaps to destroy in cb_nvl. */ + if (nvlist_alloc(&cb.cb_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + *at = '\0'; + zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) return (1); - *cp = '@'; - cp++; - if (cb.cb_doclones) { - boolean_t defer = cb.cb_defer_destroy; + cb.cb_snapspec = at + 1; + if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 || + cb.cb_error) { + zfs_close(zhp); + nvlist_free(cb.cb_nvl); + return (1); + } - /* - * Temporarily ignore the defer_destroy setting since - * it's not supported for clones. - */ - cb.cb_defer_destroy = B_FALSE; - cb.cb_snapname = cp; - if (destroy_snap_clones(zhp, &cb) != 0) { - zfs_close(zhp); - return (1); + if (nvlist_empty(cb.cb_nvl)) { + (void) fprintf(stderr, gettext("could not find any " + "snapshots to destroy; check snapshot names.\n")); + zfs_close(zhp); + nvlist_free(cb.cb_nvl); + return (1); + } + + if (cb.cb_verbose) { + char buf[16]; + zfs_nicenum(cb.cb_snapused, buf, sizeof (buf)); + if (cb.cb_parsable) { + (void) printf("reclaim\t%llu\n", + cb.cb_snapused); + } else if (cb.cb_dryrun) { + (void) printf(gettext("would reclaim %s\n"), + buf); + } else { + (void) printf(gettext("will reclaim %s\n"), + buf); } - cb.cb_defer_destroy = defer; } - ret = zfs_destroy_snaps(zhp, cp, cb.cb_defer_destroy); - zfs_close(zhp); - if (ret) { - (void) fprintf(stderr, - gettext("no snapshots destroyed\n")); + if (!cb.cb_dryrun) { + if (cb.cb_doclones) + err = destroy_clones(&cb); + if (err == 0) { + err = zfs_destroy_snaps_nvl(zhp, cb.cb_nvl, + cb.cb_defer_destroy); + } } - return (ret != 0); - } - /* Open the given dataset */ - if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL) - return (1); + zfs_close(zhp); + nvlist_free(cb.cb_nvl); + if (err != 0) + return (1); + } else { + /* Open the given dataset */ + if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL) + return (1); - cb.cb_target = zhp; + cb.cb_target = zhp; - /* - * Perform an explicit check for pools before going any further. - */ - if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL && - zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { - (void) fprintf(stderr, gettext("cannot destroy '%s': " - "operation does not apply to pools\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use 'zfs destroy -r " - "%s' to destroy all datasets in the pool\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use 'zpool destroy %s' " - "to destroy the pool itself\n"), zfs_get_name(zhp)); - zfs_close(zhp); - return (1); - } + /* + * Perform an explicit check for pools before going any further. + */ + if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL && + zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { + (void) fprintf(stderr, gettext("cannot destroy '%s': " + "operation does not apply to pools\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use 'zfs destroy -r " + "%s' to destroy all datasets in the pool\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use 'zpool destroy %s' " + "to destroy the pool itself\n"), zfs_get_name(zhp)); + zfs_close(zhp); + return (1); + } - /* - * Check for any dependents and/or clones. - */ - cb.cb_first = B_TRUE; - if (!cb.cb_doclones && !cb.cb_defer_destroy && - zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, - &cb) != 0) { - zfs_close(zhp); - return (1); - } + /* + * Check for any dependents and/or clones. + */ + cb.cb_first = B_TRUE; + if (!cb.cb_doclones && + zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, + &cb) != 0) { + zfs_close(zhp); + return (1); + } - if (cb.cb_error || (!cb.cb_defer_destroy && - (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0))) { - zfs_close(zhp); - return (1); - } + if (cb.cb_error) { + zfs_close(zhp); + return (1); + } - /* - * Do the real thing. The callback will close the handle regardless of - * whether it succeeds or not. - */ + if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, + &cb) != 0) { + zfs_close(zhp); + return (1); + } - if (destroy_callback(zhp, &cb) != 0) - return (1); + /* + * Do the real thing. The callback will close the + * handle regardless of whether it succeeds or not. + */ + if (destroy_callback(zhp, &cb) != 0) + return (1); + } return (0); } @@ -1251,6 +1416,17 @@ get_callback(zfs_handle_t *zhp, void *data) zprop_print_one_property(zfs_get_name(zhp), cbp, pl->pl_user_prop, buf, sourcetype, source, NULL); + } else if (zfs_prop_written(pl->pl_user_prop)) { + sourcetype = ZPROP_SRC_LOCAL; + + if (zfs_prop_get_written(zhp, pl->pl_user_prop, + buf, sizeof (buf), cbp->cb_literal) != 0) { + sourcetype = ZPROP_SRC_NONE; + (void) strlcpy(buf, "-", sizeof (buf)); + } + + zprop_print_one_property(zfs_get_name(zhp), cbp, + pl->pl_user_prop, buf, sourcetype, source, NULL); } else { if (nvlist_lookup_nvlist(user_props, pl->pl_user_prop, &propval) != 0) { @@ -1795,8 +1971,8 @@ zfs_do_upgrade(int argc, char **argv) "---------------\n"); (void) printf(gettext(" 1 Initial ZFS filesystem version\n")); (void) printf(gettext(" 2 Enhanced directory entries\n")); - (void) printf(gettext(" 3 Case insensitive and File system " - "unique identifier (FUID)\n")); + (void) printf(gettext(" 3 Case insensitive and filesystem " + "user identifier (FUID)\n")); (void) printf(gettext(" 4 userquota, groupquota " "properties\n")); (void) printf(gettext(" 5 System attributes\n")); @@ -2676,6 +2852,13 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, else propstr = property; right_justify = B_TRUE; + } else if (zfs_prop_written(pl->pl_user_prop)) { + if (zfs_prop_get_written(zhp, pl->pl_user_prop, + property, sizeof (property), B_FALSE) != 0) + propstr = "-"; + else + propstr = property; + right_justify = B_TRUE; } else { if (nvlist_lookup_nvlist(userprops, pl->pl_user_prop, &propval) != 0) @@ -3302,9 +3485,6 @@ usage: } /* - * zfs send [-vDp] -R [-i|-I <@snap>] - * zfs send [-vDp] [-i|-I <@snap>] - * * Send a backup stream to stdout. */ static int @@ -3316,11 +3496,11 @@ zfs_do_send(int argc, char **argv) zfs_handle_t *zhp; sendflags_t flags = { 0 }; int c, err; - nvlist_t *dbgnv; + nvlist_t *dbgnv = NULL; boolean_t extraverbose = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":i:I:RDpv")) != -1) { + while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) { switch (c) { case 'i': if (fromname) @@ -3339,6 +3519,10 @@ zfs_do_send(int argc, char **argv) case 'p': flags.props = B_TRUE; break; + case 'P': + flags.parsable = B_TRUE; + flags.verbose = B_TRUE; + break; case 'v': if (flags.verbose) extraverbose = B_TRUE; @@ -3347,6 +3531,9 @@ zfs_do_send(int argc, char **argv) case 'D': flags.dedup = B_TRUE; break; + case 'n': + flags.dryrun = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -3372,7 +3559,7 @@ zfs_do_send(int argc, char **argv) usage(B_FALSE); } - if (isatty(STDOUT_FILENO)) { + if (!flags.dryrun && isatty(STDOUT_FILENO)) { (void) fprintf(stderr, gettext("Error: Stream can not be written to a terminal.\n" "You must redirect standard output.\n")); @@ -3426,10 +3613,10 @@ zfs_do_send(int argc, char **argv) if (flags.replicate && fromname == NULL) flags.doall = B_TRUE; - err = zfs_send(zhp, fromname, toname, flags, STDOUT_FILENO, NULL, 0, + err = zfs_send(zhp, fromname, toname, &flags, STDOUT_FILENO, NULL, 0, extraverbose ? &dbgnv : NULL); - if (extraverbose) { + if (extraverbose && dbgnv != NULL) { /* * dump_nvlist prints to stdout, but that's been * redirected to a file. Make it print to stderr @@ -3510,7 +3697,7 @@ zfs_do_receive(int argc, char **argv) return (1); } - err = zfs_receive(g_zfs, argv[0], flags, STDIN_FILENO, NULL); + err = zfs_receive(g_zfs, argv[0], &flags, STDIN_FILENO, NULL); return (err != 0); } Index: cddl/contrib/opensolaris/cmd/zfs/zfs.8 =================================================================== --- cddl/contrib/opensolaris/cmd/zfs/zfs.8 (revision 227654) +++ cddl/contrib/opensolaris/cmd/zfs/zfs.8 (working copy) @@ -3,14 +3,11 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with .\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with -.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] .\" Copyright 2011 Nexenta Systems, Inc. All rights reserved. .\" Copyright 2011 by Delphix. All rights reserved. .\" Portions Copyright 2011 Pawel Jakub Dawidek .\" Portions Copyright 2011 Martin Matuska -.TH ZFS 8 "September 24, 2009" FreeBSD +.TH ZFS 8 "July 28, 2011" FreeBSD .SH NAME zfs \- configures ZFS file systems .SH SYNOPSIS @@ -31,12 +28,12 @@ zfs \- configures ZFS file systems .LP .nf -\fBzfs\fR \fBdestroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR +\fBzfs\fR \fBdestroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR .fi .LP .nf -\fBzfs\fR \fBdestroy\fR [\fB-rRd\fR] \fIsnapshot\fR +\fBzfs\fR \fBdestroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,...] .fi .LP @@ -116,13 +113,13 @@ zfs \- configures ZFS file systems .LP .nf \fBzfs\fR \fBuserspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR] ... - [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR|\fIsnapshot\fR + [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR .fi .LP .nf \fBzfs\fR \fBgroupspace\fR [\fB-niHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-sS\fR \fIfield\fR] ... - [\fB-t\fR \fItype\fR [,...]] \fIfilesystem\fR|\fIsnapshot\fR + [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR .fi .LP @@ -152,7 +149,7 @@ zfs \- configures ZFS file systems .LP .nf -\fBzfs\fR \fBsend\fR [\fB-vR\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR +\fBzfs\fR \fBsend\fR [\fB-DnPpRrv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR .fi .LP @@ -479,6 +476,19 @@ The time this dataset was created. .sp .ne 2 .na +\fB\fBclones\fR\fR +.ad +.sp .6 +.RS 4n +For snapshots, this property is a comma-separated list of filesystems or +volumes which are clones of this snapshot. The clones' \fBorigin\fR property +is this snapshot. If the \fBclones\fR property is not empty, then this +snapshot can not be destroyed (even with the \fB-r\fR or \fB-f\fR options). +.RE + +.sp +.ne 2 +.na \fB\fBdefer_destroy\fR\fR .ad .sp .6 @@ -507,8 +517,7 @@ property can be either \fByes\fR or \fBno\fR. .sp .6 .RS 4n For cloned file systems or volumes, the snapshot from which the clone was -created. The origin cannot be destroyed (even with the \fB-r\fR or \fB-f\fR -options) so long as a clone exists. +created. See also the \fBclones\fR property. .RE .sp @@ -728,6 +737,36 @@ This property can also be referred to by its short .RE .sp +.ne 2 +.na +\fB\fBwritten\fR\fR +.ad +.sp .6 +.RS 4n +The amount of \fBreferenced\fR space written to this dataset since the +previous snapshot. +.RE + +.sp +.ne 2 +.na +\fB\fBwritten@\fR\fIsnapshot\fR\fR +.ad +.sp .6 +.RS 4n +The amount of \fBreferenced\fR space written to this dataset since the +specified snapshot. This is the space that is referenced by this dataset +but was not referenced by the specified snapshot. +.sp +The \fIsnapshot\fR may be specified as a short snapshot name (just the part +after the \fB@\fR), in which case it will be interpreted as a snapshot in +the same filesystem as this dataset. +The \fIsnapshot\fR be a full snapshot name (\fIfilesystem\fR@\fIsnapshot\fR), +which for clones may be a snapshot in the origin's filesystem (or the origin +of the origin's filesystem, etc). +.RE + +.sp .LP The following native properties can be used to change the behavior of a \fBZFS\fR dataset. @@ -836,7 +875,7 @@ Changing this property affects only newly-written .ne 2 .na \fB\fBcompression\fR=\fBon\fR | \fBoff\fR | \fBlzjb\fR | \fBgzip\fR | -\fBgzip-\fR\fIN\fR\fR +\fBgzip-\fR\fIN\fR | \fBzle\fR\fR .ad .sp .6 .RS 4n @@ -847,7 +886,8 @@ algorithm. The \fBgzip\fR compression algorithm us the \fBgzip\fR(1) command. You can specify the \fBgzip\fR level by using the value \fBgzip-\fR\fIN\fR where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, \fBgzip\fR is equivalent to \fBgzip-6\fR -(which is also the default for \fBgzip\fR(1)). +(which is also the default for \fBgzip\fR(1)). The \fBzle\fR compression +algorithm compresses runs of zeros. .sp This property can also be referred to by its shortened column name \fBcompress\fR. Changing this property affects only newly-written data. @@ -1207,6 +1247,26 @@ the file system as discussed in the "Snapshots" se .sp .ne 2 .na +\fB\fBsync\fR=\fBdefault\fR | \fBalways\fR | \fBdisabled\fR\fR +.ad +.sp .6 +.RS 4n +Controls the behavior of synchronous requests (e.g. fsync, O_DSYNC). +\fBdefault\fR is the POSIX specified behavior of ensuring all synchronous +requests are written to stable storage and all devices are flushed to ensure +data is not cached by device controllers (this is the default). \fBalways\fR +causes every file system transaction to be written and flushed before its +system call returns. This has a large performance penalty. \fBdisabled\fR +disables synchronous requests. File system transactions are only committed to +stable storage periodically. This option will give the highest performance. +However, it is very dangerous as ZFS would be ignoring the synchronous +transaction demands of applications such as databases or NFS. Administrators +should only use this option when the risks are understood. +.RE + +.sp +.ne 2 +.na \fB\fBversion\fR=\fB1\fR | \fB2\fR | \fBcurrent\fR\fR .ad .sp .6 @@ -1530,7 +1590,7 @@ behavior is undefined. .sp .ne 2 .na -\fB\fBzfs destroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR\fR +\fBzfs destroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR .ad .sp .6 .RS 4n @@ -1570,6 +1630,38 @@ Force an unmount of any file systems using the \fB option has no effect on non-file systems or unmounted file systems. .RE +.sp +.ne 2 +.na +\fB\fB-n\fR\fR +.ad +.sp .6 +.RS 4n +Do a dry-run ("No-op") deletion. No data will be deleted. This is +useful in conjunction with the \fB-v\fR or \fB-p\fR flags to determine what +data would be deleted. +.RE + +.sp +.ne 2 +.na +\fB\fB-p\fR\fR +.ad +.sp .6 +.RS 4n +Print machine-parsable verbose information about the deleted data. +.RE + +.sp +.ne 2 +.na +\fB\fB-v\fR\fR +.ad +.sp .6 +.RS 4n +Print verbose information about the deleted data. +.RE +.sp Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use. @@ -1578,19 +1670,31 @@ behavior for mounted file systems in use. .sp .ne 2 .na -\fB\fBzfs destroy\fR [\fB-rRd\fR] \fIsnapshot\fR\fR +\fBzfs destroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,...] .ad .sp .6 .RS 4n -The given snapshot is destroyed immediately if and only if the \fBzfs +The given snapshots are destroyed immediately if and only if the \fBzfs destroy\fR command without the \fB-d\fR option would have destroyed it. Such immediate destruction would occur, for example, if the snapshot had no clones and the user-initiated reference count were zero. .sp -If the snapshot does not qualify for immediate destruction, it is marked for +If a snapshot does not qualify for immediate destruction, it is marked for deferred deletion. In this state, it exists as a usable, visible snapshot until both of the preconditions listed above are met, at which point it is destroyed. .sp +An inclusive range of snapshots may be specified by separating the +first and last snapshots with a percent sign. +The first and/or last snapshots may be left blank, in which case the +filesystem's oldest or newest snapshot will be implied. +.sp +Multiple snapshots +(or ranges of snapshots) of the same filesystem or volume may be specified +in a comma-separated list of snapshots. +Only the snapshot's short name (the +part after the \fB@\fR) should be specified when using a range or +comma-separated list to identify multiple snapshots. +.sp .ne 2 .na \fB\fB-d\fR\fR @@ -1621,11 +1725,49 @@ descendent file systems. Recursively destroy all dependents. .RE +.sp +.ne 2 +.na +\fB\fB-n\fR\fR +.ad +.sp .6 +.RS 4n +Do a dry-run ("No-op") deletion. No data will be deleted. This is +useful in conjunction with the \fB-v\fR or \fB-p\fR flags to determine what +data would be deleted. .RE .sp .ne 2 .na +\fB\fB-p\fR\fR +.ad +.sp .6 +.RS 4n +Print machine-parsable verbose information about the deleted data. +.RE + +.sp +.ne 2 +.na +\fB\fB-v\fR\fR +.ad +.sp .6 +.RS 4n +Print verbose information about the deleted data. +.RE + +.sp +Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR +options, as they can destroy large portions of a pool and cause unexpected +behavior for mounted file systems in use. +.RE + +.RE + +.sp +.ne 2 +.na \fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR .ad @@ -2501,8 +2643,7 @@ Unshare the specified filesystem. The command can .sp .ne 2 .na -\fB\fBzfs send\fR [\fB-vR\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] -\fIsnapshot\fR\fR +\fBzfs send\fR [\fB-DnPpRrv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR .ad .sp .6 .RS 4n @@ -2563,6 +2704,66 @@ snapshots and file systems that do not exist on th .sp .ne 2 .na +\fB\fB-D\fR\fR +.ad +.sp .6 +.RS 4n +Generate a deduplicated stream. Blocks which would have been sent multiple +times in the send stream will only be sent once. The receiving system must +also support this feature to recieve a deduplicated stream. This flag can +be used regardless of the dataset's \fBdedup\fR property, but performance +will be much better if the filesystem uses a dedup-capable checksum (eg. +\fBsha256\fR). +.RE + +.sp +.ne 2 +.na +\fB\fB-r\fR\fR +.ad +.sp .6 +.RS 4n +Recursively send all descendant snapshots. This is similar to the \fB-R\fR +flag, but information about deleted and renamed datasets is not included, and +property information is only included if the \fB-p\fR flag is specified. +.RE + +.sp +.ne 2 +.na +\fB\fB-p\fR\fR +.ad +.sp .6 +.RS 4n +Include the dataset's properties in the stream. This flag is implicit when +\fB-R\fR is specified. The receiving system must also support this feature. +.RE + +.sp +.ne 2 +.na +\fB\fB-n\fR\fR +.ad +.sp .6 +.RS 4n +Do a dry-run ("No-op") send. Do not generate any actual send data. This is +useful in conjunction with the \fB-v\fR or \fB-P\fR flags to determine what +data will be sent. +.RE + +.sp +.ne 2 +.na +\fB\fB-P\fR\fR +.ad +.sp .6 +.RS 4n +Print machine-parsable verbose information about the stream package generated. +.RE + +.sp +.ne 2 +.na \fB\fB-v\fR\fR .ad .sp .6 Index: cddl/contrib/opensolaris/cmd/zpool/zpool_main.c =================================================================== --- cddl/contrib/opensolaris/cmd/zpool/zpool_main.c (revision 227654) +++ cddl/contrib/opensolaris/cmd/zpool/zpool_main.c (working copy) @@ -22,6 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -68,6 +69,8 @@ static int zpool_do_online(int, char **); static int zpool_do_offline(int, char **); static int zpool_do_clear(int, char **); +static int zpool_do_reguid(int, char **); + static int zpool_do_attach(int, char **); static int zpool_do_detach(int, char **); static int zpool_do_replace(int, char **); @@ -126,7 +129,8 @@ typedef enum { HELP_UPGRADE, HELP_GET, HELP_SET, - HELP_SPLIT + HELP_SPLIT, + HELP_REGUID } zpool_help_t; @@ -172,6 +176,7 @@ static zpool_command_t command_table[] = { { "import", zpool_do_import, HELP_IMPORT }, { "export", zpool_do_export, HELP_EXPORT }, { "upgrade", zpool_do_upgrade, HELP_UPGRADE }, + { "reguid", zpool_do_reguid, HELP_REGUID }, { NULL }, { "history", zpool_do_history, HELP_HISTORY }, { "get", zpool_do_get, HELP_GET }, @@ -252,6 +257,8 @@ get_usage(zpool_help_t idx) { return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n" "\t [-o property=value] " "[ ...]\n")); + case HELP_REGUID: + return (gettext("\treguid \n")); } abort(); @@ -1455,6 +1462,7 @@ show_import(nvlist_t *config) const char *health; uint_t vsc; int namewidth; + char *comment; verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); @@ -1471,9 +1479,9 @@ show_import(nvlist_t *config) reason = zpool_import_status(config, &msgid); - (void) printf(gettext(" pool: %s\n"), name); - (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); - (void) printf(gettext(" state: %s"), health); + (void) printf(gettext(" pool: %s\n"), name); + (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); + (void) printf(gettext(" state: %s"), health); if (pool_state == POOL_STATE_DESTROYED) (void) printf(gettext(" (DESTROYED)")); (void) printf("\n"); @@ -1482,58 +1490,59 @@ show_import(nvlist_t *config) case ZPOOL_STATUS_MISSING_DEV_R: case ZPOOL_STATUS_MISSING_DEV_NR: case ZPOOL_STATUS_BAD_GUID_SUM: - (void) printf(gettext("status: One or more devices are missing " - "from the system.\n")); + (void) printf(gettext(" status: One or more devices are " + "missing from the system.\n")); break; case ZPOOL_STATUS_CORRUPT_LABEL_R: case ZPOOL_STATUS_CORRUPT_LABEL_NR: - (void) printf(gettext("status: One or more devices contains " + (void) printf(gettext(" status: One or more devices contains " "corrupted data.\n")); break; case ZPOOL_STATUS_CORRUPT_DATA: - (void) printf(gettext("status: The pool data is corrupted.\n")); + (void) printf( + gettext(" status: The pool data is corrupted.\n")); break; case ZPOOL_STATUS_OFFLINE_DEV: - (void) printf(gettext("status: One or more devices " + (void) printf(gettext(" status: One or more devices " "are offlined.\n")); break; case ZPOOL_STATUS_CORRUPT_POOL: - (void) printf(gettext("status: The pool metadata is " + (void) printf(gettext(" status: The pool metadata is " "corrupted.\n")); break; case ZPOOL_STATUS_VERSION_OLDER: - (void) printf(gettext("status: The pool is formatted using an " + (void) printf(gettext(" status: The pool is formatted using an " "older on-disk version.\n")); break; case ZPOOL_STATUS_VERSION_NEWER: - (void) printf(gettext("status: The pool is formatted using an " + (void) printf(gettext(" status: The pool is formatted using an " "incompatible version.\n")); break; case ZPOOL_STATUS_HOSTID_MISMATCH: - (void) printf(gettext("status: The pool was last accessed by " + (void) printf(gettext(" status: The pool was last accessed by " "another system.\n")); break; case ZPOOL_STATUS_FAULTED_DEV_R: case ZPOOL_STATUS_FAULTED_DEV_NR: - (void) printf(gettext("status: One or more devices are " + (void) printf(gettext(" status: One or more devices are " "faulted.\n")); break; case ZPOOL_STATUS_BAD_LOG: - (void) printf(gettext("status: An intent log record cannot be " + (void) printf(gettext(" status: An intent log record cannot be " "read.\n")); break; case ZPOOL_STATUS_RESILVERING: - (void) printf(gettext("status: One or more devices were being " + (void) printf(gettext(" status: One or more devices were being " "resilvered.\n")); break; @@ -1549,26 +1558,26 @@ show_import(nvlist_t *config) */ if (vs->vs_state == VDEV_STATE_HEALTHY) { if (reason == ZPOOL_STATUS_VERSION_OLDER) - (void) printf(gettext("action: The pool can be " + (void) printf(gettext(" action: The pool can be " "imported using its name or numeric identifier, " "though\n\tsome features will not be available " "without an explicit 'zpool upgrade'.\n")); else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH) - (void) printf(gettext("action: The pool can be " + (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " "identifier and\n\tthe '-f' flag.\n")); else - (void) printf(gettext("action: The pool can be " + (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " "identifier.\n")); } else if (vs->vs_state == VDEV_STATE_DEGRADED) { - (void) printf(gettext("action: The pool can be imported " + (void) printf(gettext(" action: The pool can be imported " "despite missing or damaged devices. The\n\tfault " "tolerance of the pool may be compromised if imported.\n")); } else { switch (reason) { case ZPOOL_STATUS_VERSION_NEWER: - (void) printf(gettext("action: The pool cannot be " + (void) printf(gettext(" action: The pool cannot be " "imported. Access the pool on a system running " "newer\n\tsoftware, or recreate the pool from " "backup.\n")); @@ -1576,16 +1585,20 @@ show_import(nvlist_t *config) case ZPOOL_STATUS_MISSING_DEV_R: case ZPOOL_STATUS_MISSING_DEV_NR: case ZPOOL_STATUS_BAD_GUID_SUM: - (void) printf(gettext("action: The pool cannot be " + (void) printf(gettext(" action: The pool cannot be " "imported. Attach the missing\n\tdevices and try " "again.\n")); break; default: - (void) printf(gettext("action: The pool cannot be " + (void) printf(gettext(" action: The pool cannot be " "imported due to damaged devices or data.\n")); } } + /* Print the comment attached to the pool. */ + if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) + (void) printf(gettext("comment: %s\n"), comment); + /* * If the state is "closed" or "can't open", and the aux state * is "corrupt data": @@ -1606,7 +1619,7 @@ show_import(nvlist_t *config) (void) printf(gettext(" see: http://www.sun.com/msg/%s\n"), msgid); - (void) printf(gettext("config:\n\n")); + (void) printf(gettext(" config:\n\n")); namewidth = max_width(NULL, nvroot, 0, 0); if (namewidth < 10) @@ -3321,6 +3334,52 @@ zpool_do_clear(int argc, char **argv) return (ret); } +/* + * zpool reguid + */ +int +zpool_do_reguid(int argc, char **argv) +{ + int c; + char *poolname; + zpool_handle_t *zhp; + int ret = 0; + + /* check options */ + while ((c = getopt(argc, argv, "")) != -1) { + switch (c) { + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + ret = zpool_reguid(zhp); + + zpool_close(zhp); + return (ret); +} + + typedef struct scrub_cbdata { int cb_type; int cb_argc; Index: cddl/contrib/opensolaris/cmd/zpool/zpool.8 =================================================================== --- cddl/contrib/opensolaris/cmd/zpool/zpool.8 (revision 227654) +++ cddl/contrib/opensolaris/cmd/zpool/zpool.8 (working copy) @@ -1,11 +1,12 @@ '\" te .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2011 Nexenta Systems, Inc. All rights reserved. .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] .\" Portions Copyright 2011 Justin T. Gibbs .\" Portions Copyright 2011 Martin Matuska -.TH ZPOOL 8 "September 21, 2009" FreeBSD +.TH ZPOOL 8 "November 11, 2011" FreeBSD .SH NAME zpool \- configures ZFS storage pools .SH SYNOPSIS @@ -104,6 +105,11 @@ zpool \- configures ZFS storage pools .LP .nf +\fBzpool reguid\fR \fIpool\fR +.fi + +.LP +.nf \fBzpool remove\fR \fIpool\fR \fIdevice\fR ... .fi @@ -545,6 +551,17 @@ shortened column name, "cap". .sp .ne 2 .na +\fB\fBcomment\fR\fR +.ad +.RS 20n +A text string consisting of printable ASCII characters that will be stored +such that it is available even if the pool becomes faulted. An administrator +can provide additional information about a pool using this property. +.RE + +.sp +.ne 2 +.na \fB\fBhealth\fR\fR .ad .RS 20n @@ -1467,6 +1484,17 @@ become available to the pool. .sp .ne 2 .na +\fB\fBzpool reguid\fR \fIpool\fR +.ad +.sp .6 +.RS 4n +Generates a new unique identifier for the pool. You must ensure that all devices in this pool are online and +healthy before performing this action. +.RE + +.sp +.ne 2 +.na \fB\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...\fR .ad .sp .6 Index: cddl/contrib/opensolaris/cmd/ztest/ztest.c =================================================================== --- cddl/contrib/opensolaris/cmd/ztest/ztest.c (revision 227654) +++ cddl/contrib/opensolaris/cmd/ztest/ztest.c (working copy) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* @@ -259,6 +260,7 @@ ztest_func_t ztest_vdev_LUN_growth; ztest_func_t ztest_vdev_add_remove; ztest_func_t ztest_vdev_aux_add_remove; ztest_func_t ztest_split_pool; +ztest_func_t ztest_reguid; uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ @@ -289,6 +291,7 @@ ztest_info_t ztest_info[] = { { ztest_fault_inject, 1, &zopt_sometimes }, { ztest_ddt_repair, 1, &zopt_sometimes }, { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, + { ztest_reguid, 1, &zopt_sometimes }, { ztest_spa_rename, 1, &zopt_rarely }, { ztest_scrub, 1, &zopt_rarely }, { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, @@ -325,6 +328,7 @@ typedef struct ztest_shared { uint64_t zs_vdev_aux; uint64_t zs_alloc; uint64_t zs_space; + uint64_t zs_guid; mutex_t zs_vdev_lock; rwlock_t zs_name_lock; ztest_info_t zs_info[ZTEST_FUNCS]; @@ -4646,7 +4650,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) object = od[0].od_object; blocksize = od[0].od_blocksize; - pattern = spa_guid(spa) ^ dmu_objset_fsid_guid(os); + pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os); ASSERT(object != 0); @@ -4717,6 +4721,31 @@ ztest_scrub(ztest_ds_t *zd, uint64_t id) } /* + * Change the guid for the pool. + */ +/* ARGSUSED */ +void +ztest_reguid(ztest_ds_t *zd, uint64_t id) +{ + ztest_shared_t *zs = ztest_shared; + spa_t *spa = zs->zs_spa; + uint64_t orig, load; + + orig = spa_guid(spa); + load = spa_load_guid(spa); + if (spa_change_guid(spa) != 0) + return; + + if (zopt_verbose >= 3) { + (void) printf("Changed guid old %llu -> %llu\n", + (u_longlong_t)orig, (u_longlong_t)spa_guid(spa)); + } + + VERIFY3U(orig, !=, spa_guid(spa)); + VERIFY3U(load, ==, spa_load_guid(spa)); +} + +/* * Rename the pool to a different name and then rename it back. */ /* ARGSUSED */ @@ -5145,6 +5174,7 @@ ztest_run(ztest_shared_t *zs) { thread_t *tid; spa_t *spa; + objset_t *os; thread_t resume_tid; int error; @@ -5176,6 +5206,10 @@ ztest_run(ztest_shared_t *zs) spa->spa_debug = B_TRUE; zs->zs_spa = spa; + VERIFY3U(0, ==, dmu_objset_hold(zs->zs_pool, FTAG, &os)); + zs->zs_guid = dmu_objset_fsid_guid(os); + dmu_objset_rele(os, FTAG); + spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; /* Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h (revision 227654) +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h (working copy) @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _LIBFS_IMPL_H @@ -116,7 +117,7 @@ struct zpool_handle { diskaddr_t zpool_start_block; }; -typedef enum { +typedef enum { PROTO_NFS = 0, PROTO_SMB = 1, PROTO_END = 2 @@ -148,6 +149,7 @@ int zpool_standard_error_fmt(libzfs_handle_t *, in int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***, size_t *); +zfs_handle_t *make_dataset_handle_zc(libzfs_handle_t *, zfs_cmd_t *); int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t, Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c (revision 227654) +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c (working copy) @@ -21,6 +21,8 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -261,6 +263,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t p case ZPOOL_PROP_ALTROOT: case ZPOOL_PROP_CACHEFILE: + case ZPOOL_PROP_COMMENT: if (zhp->zpool_props != NULL || zpool_get_all_props(zhp) == 0) { (void) strlcpy(buf, @@ -412,7 +415,7 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const c zpool_prop_t prop; char *strval; uint64_t intval; - char *slash; + char *slash, *check; struct stat64 statbuf; zpool_handle_t *zhp; nvlist_t *nvroot; @@ -573,6 +576,26 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const c *slash = '/'; break; + case ZPOOL_PROP_COMMENT: + for (check = strval; *check != '\0'; check++) { + if (!isprint(*check)) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "comment may only have printable " + "characters")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + } + if (strlen(strval) > ZPROP_MAX_COMMENT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "comment must not exceed %d characters"), + ZPROP_MAX_COMMENT); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; case ZPOOL_PROP_READONLY: if (!flags.import) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -3011,6 +3034,26 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t gui } /* + * Change the GUID for a pool. + */ +int +zpool_reguid(zpool_handle_t *zhp) +{ + char msg[1024]; + libzfs_handle_t *hdl = zhp->zpool_hdl; + zfs_cmd_t zc = { 0 }; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0) + return (0); + + return (zpool_standard_error(hdl, errno, msg)); +} + +/* * Convert from a devid string to a path. */ static char * Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c (revision 227654) +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c (working copy) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -53,7 +54,7 @@ extern void zfs_setprop_error(libzfs_handle_t *, z /* We need to use something for ENODATA. */ #define ENODATA EIDRM -static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t, +static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *); static const zio_cksum_t zero_cksum = { 0 }; @@ -774,88 +775,6 @@ gather_nvlist(libzfs_handle_t *hdl, const char *fs } /* - * Routines for dealing with the sorted snapshot functionality - */ -typedef struct zfs_node { - zfs_handle_t *zn_handle; - avl_node_t zn_avlnode; -} zfs_node_t; - -static int -zfs_sort_snaps(zfs_handle_t *zhp, void *data) -{ - avl_tree_t *avl = data; - zfs_node_t *node; - zfs_node_t search; - - search.zn_handle = zhp; - node = avl_find(avl, &search, NULL); - if (node) { - /* - * If this snapshot was renamed while we were creating the - * AVL tree, it's possible that we already inserted it under - * its old name. Remove the old handle before adding the new - * one. - */ - zfs_close(node->zn_handle); - avl_remove(avl, node); - free(node); - } - - node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); - node->zn_handle = zhp; - avl_add(avl, node); - - return (0); -} - -static int -zfs_snapshot_compare(const void *larg, const void *rarg) -{ - zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; - zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; - uint64_t lcreate, rcreate; - - /* - * Sort them according to creation time. We use the hidden - * CREATETXG property to get an absolute ordering of snapshots. - */ - lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); - rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); - - if (lcreate < rcreate) - return (-1); - else if (lcreate > rcreate) - return (+1); - else - return (0); -} - -int -zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data) -{ - int ret = 0; - zfs_node_t *node; - avl_tree_t avl; - void *cookie = NULL; - - avl_create(&avl, zfs_snapshot_compare, - sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode)); - - ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl); - - for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node)) - ret |= callback(node->zn_handle, data); - - while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL) - free(node); - - avl_destroy(&avl); - - return (ret); -} - -/* * Routines specific to "zfs send" */ typedef struct send_dump_data { @@ -865,7 +784,7 @@ typedef struct send_dump_data { char prevsnap[ZFS_MAXNAMELEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; - boolean_t verbose; + boolean_t verbose, dryrun, parsable; int outfd; boolean_t err; nvlist_t *fss; @@ -875,8 +794,68 @@ typedef struct send_dump_data { nvlist_t *debugnv; char holdtag[ZFS_MAXNAMELEN]; int cleanup_fd; + uint64_t size; } send_dump_data_t; +static int +estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj, + boolean_t fromorigin, uint64_t *sizep) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + assert(fromsnap_obj == 0 || !fromorigin); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + zc.zc_obj = fromorigin; + zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); + zc.zc_fromobj = fromsnap_obj; + zc.zc_guid = 1; /* estimate flag */ + + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot estimate space for '%s'"), zhp->zfs_name); + + switch (errno) { + case EXDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not an earlier snapshot from the same fs")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + + case ENOENT: + if (zfs_dataset_exists(hdl, zc.zc_name, + ZFS_TYPE_SNAPSHOT)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incremental source (@%s) does not exist"), + zc.zc_value); + } + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + + case EDQUOT: + case EFBIG: + case EIO: + case ENOLINK: + case ENOSPC: + case ENXIO: + case EPIPE: + case ERANGE: + case EFAULT: + case EROFS: + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); + + default: + return (zfs_standard_error(hdl, errno, errbuf)); + } + } + + *sizep = zc.zc_objset_type; + + return (0); +} + /* * Dumps a backup of the given snapshot (incremental from fromsnap if it's not * NULL) to the file descriptor specified by outfd. @@ -904,7 +883,7 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap "fromsnap", fromsnap)); } - if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) { + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "warning: cannot send '%s'"), zhp->zfs_name); @@ -917,7 +896,6 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap nvlist_free(thisdbg); switch (errno) { - case EXDEV: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "not an earlier snapshot from the same fs")); @@ -937,6 +915,9 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap case EIO: case ENOLINK: case ENOSPC: +#ifdef sun + case ENOSTR: +#endif case ENXIO: case EPIPE: case ERANGE: @@ -966,6 +947,9 @@ hold_for_send(zfs_handle_t *zhp, send_dump_data_t assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + if (sdd->dryrun) + return (0); + /* * zfs_send() only opens a cleanup_fd for sends that need it, * e.g. replication and doall. @@ -999,7 +983,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) send_dump_data_t *sdd = arg; char *thissnap; int err; - boolean_t isfromsnap, istosnap; + boolean_t isfromsnap, istosnap, fromorigin; boolean_t exclude = B_FALSE; thissnap = strchr(zhp->zfs_name, '@') + 1; @@ -1076,15 +1060,47 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) return (err); } - /* send it */ + fromorigin = sdd->prevsnap[0] == '\0' && + (sdd->fromorigin || sdd->replicate); + if (sdd->verbose) { - (void) fprintf(stderr, "sending from @%s to %s\n", - sdd->prevsnap, zhp->zfs_name); + uint64_t size; + err = estimate_ioctl(zhp, sdd->prevsnap_obj, + fromorigin, &size); + + if (sdd->parsable) { + if (sdd->prevsnap[0] != '\0') { + (void) fprintf(stderr, "incremental\t%s\t%s", + sdd->prevsnap, zhp->zfs_name); + } else { + (void) fprintf(stderr, "full\t%s", + zhp->zfs_name); + } + } else { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "send from @%s to %s"), + sdd->prevsnap, zhp->zfs_name); + } + if (err == 0) { + if (sdd->parsable) { + (void) fprintf(stderr, "\t%llu\n", + (longlong_t)size); + } else { + char buf[16]; + zfs_nicenum(size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + " estimated size is %s\n"), buf); + } + sdd->size += size; + } else { + (void) fprintf(stderr, "\n"); + } } - err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, - sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate), - sdd->outfd, sdd->debugnv); + if (!sdd->dryrun) { + err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, + fromorigin, sdd->outfd, sdd->debugnv); + } (void) strcpy(sdd->prevsnap, thissnap); sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); @@ -1103,8 +1119,8 @@ dump_filesystem(zfs_handle_t *zhp, void *arg) (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", zhp->zfs_name, sdd->tosnap); if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) { - (void) fprintf(stderr, "WARNING: " - "could not send %s@%s: does not exist\n", + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "WARNING: could not send %s@%s: does not exist\n"), zhp->zfs_name, sdd->tosnap); sdd->err = B_TRUE; return (0); @@ -1133,23 +1149,24 @@ dump_filesystem(zfs_handle_t *zhp, void *arg) rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg); if (!sdd->seenfrom) { - (void) fprintf(stderr, + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "WARNING: could not send %s@%s:\n" - "incremental source (%s@%s) does not exist\n", + "incremental source (%s@%s) does not exist\n"), zhp->zfs_name, sdd->tosnap, zhp->zfs_name, sdd->fromsnap); sdd->err = B_TRUE; } else if (!sdd->seento) { if (sdd->fromsnap) { - (void) fprintf(stderr, + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "WARNING: could not send %s@%s:\n" "incremental source (%s@%s) " - "is not earlier than it\n", + "is not earlier than it\n"), zhp->zfs_name, sdd->tosnap, zhp->zfs_name, sdd->fromsnap); } else { - (void) fprintf(stderr, "WARNING: " - "could not send %s@%s: does not exist\n", + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "WARNING: " + "could not send %s@%s: does not exist\n"), zhp->zfs_name, sdd->tosnap); } sdd->err = B_TRUE; @@ -1195,11 +1212,12 @@ again: needagain = progress = B_FALSE; for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; fspair = nvlist_next_nvpair(sdd->fss, fspair)) { - nvlist_t *fslist; + nvlist_t *fslist, *parent_nv; char *fsname; zfs_handle_t *zhp; int err; uint64_t origin_guid = 0; + uint64_t parent_guid = 0; VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); if (nvlist_lookup_boolean(fslist, "sent") == 0) @@ -1207,13 +1225,23 @@ again: VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0); (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid); + (void) nvlist_lookup_uint64(fslist, "parentfromsnap", + &parent_guid); + if (parent_guid != 0) { + parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL); + if (!nvlist_exists(parent_nv, "sent")) { + /* parent has not been sent; skip this one */ + needagain = B_TRUE; + continue; + } + } + if (origin_guid != 0) { nvlist_t *origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL); if (origin_nv != NULL && - nvlist_lookup_boolean(origin_nv, - "sent") == ENOENT) { + !nvlist_exists(origin_nv, "sent")) { /* * origin has not been sent yet; * skip this clone. @@ -1237,6 +1265,16 @@ again: assert(progress); goto again; } + + /* clean out the sent flags in case we reuse this fss */ + for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; + fspair = nvlist_next_nvpair(sdd->fss, fspair)) { + nvlist_t *fslist; + + VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); + (void) nvlist_remove_all(fslist, "sent"); + } + return (0); } @@ -1258,12 +1296,12 @@ again: */ int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, - sendflags_t flags, int outfd, snapfilter_cb_t filter_func, + sendflags_t *flags, int outfd, snapfilter_cb_t filter_func, void *cb_arg, nvlist_t **debugnvp) { char errbuf[1024]; send_dump_data_t sdd = { 0 }; - int err; + int err = 0; nvlist_t *fss = NULL; avl_tree_t *fsavl = NULL; static uint64_t holdseq; @@ -1291,12 +1329,12 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, } } - if (zfs_spa_version(zhp, &spa_version) == 0 && + if (!flags->dryrun && zfs_spa_version(zhp, &spa_version) == 0 && spa_version >= SPA_VERSION_USERREFS && - (flags.doall || flags.replicate)) + (flags->doall || flags->replicate)) holdsnaps = B_TRUE; - if (flags.dedup) { + if (flags->dedup && !flags->dryrun) { featureflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); if (err = pipe(pipefd)) { @@ -1316,13 +1354,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, } } - if (flags.replicate || flags.doall || flags.props) { + if (flags->replicate || flags->doall || flags->props) { dmu_replay_record_t drr = { 0 }; char *packbuf = NULL; size_t buflen = 0; zio_cksum_t zc = { 0 }; - if (flags.replicate || flags.props) { + if (flags->replicate || flags->props) { nvlist_t *hdrnv; VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0)); @@ -1331,13 +1369,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, "fromsnap", fromsnap)); } VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap)); - if (!flags.replicate) { + if (!flags->replicate) { VERIFY(0 == nvlist_add_boolean(hdrnv, "not_recursive")); } err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, - fromsnap, tosnap, flags.replicate, &fss, &fsavl); + fromsnap, tosnap, flags->replicate, &fss, &fsavl); if (err) goto err_out; VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); @@ -1354,33 +1392,34 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, } } - /* write first begin record */ - drr.drr_type = DRR_BEGIN; - drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; - DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo, - DMU_COMPOUNDSTREAM); - DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo, - featureflags); - (void) snprintf(drr.drr_u.drr_begin.drr_toname, - sizeof (drr.drr_u.drr_begin.drr_toname), - "%s@%s", zhp->zfs_name, tosnap); - drr.drr_payloadlen = buflen; - err = cksum_and_write(&drr, sizeof (drr), &zc, outfd); + if (!flags->dryrun) { + /* write first begin record */ + drr.drr_type = DRR_BEGIN; + drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; + DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin. + drr_versioninfo, DMU_COMPOUNDSTREAM); + DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin. + drr_versioninfo, featureflags); + (void) snprintf(drr.drr_u.drr_begin.drr_toname, + sizeof (drr.drr_u.drr_begin.drr_toname), + "%s@%s", zhp->zfs_name, tosnap); + drr.drr_payloadlen = buflen; + err = cksum_and_write(&drr, sizeof (drr), &zc, outfd); - /* write header nvlist */ - if (err != -1 && packbuf != NULL) { - err = cksum_and_write(packbuf, buflen, &zc, outfd); - } - free(packbuf); - if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); - err = errno; - goto stderr_out; - } + /* write header nvlist */ + if (err != -1 && packbuf != NULL) { + err = cksum_and_write(packbuf, buflen, &zc, + outfd); + } + free(packbuf); + if (err == -1) { + fsavl_destroy(fsavl); + nvlist_free(fss); + err = errno; + goto stderr_out; + } - /* write end record */ - if (err != -1) { + /* write end record */ bzero(&drr, sizeof (drr)); drr.drr_type = DRR_END; drr.drr_u.drr_end.drr_checksum = zc; @@ -1391,22 +1430,26 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, err = errno; goto stderr_out; } + + err = 0; } } /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; - if (flags.dedup) + if (flags->dedup) sdd.outfd = pipefd[0]; else sdd.outfd = outfd; - sdd.replicate = flags.replicate; - sdd.doall = flags.doall; - sdd.fromorigin = flags.fromorigin; + sdd.replicate = flags->replicate; + sdd.doall = flags->doall; + sdd.fromorigin = flags->fromorigin; sdd.fss = fss; sdd.fsavl = fsavl; - sdd.verbose = flags.verbose; + sdd.verbose = flags->verbose; + sdd.parsable = flags->parsable; + sdd.dryrun = flags->dryrun; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; if (debugnvp) @@ -1423,11 +1466,31 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, } else { sdd.cleanup_fd = -1; } + if (flags->verbose) { + /* + * Do a verbose no-op dry run to get all the verbose output + * before generating any data. Then do a non-verbose real + * run to generate the streams. + */ + sdd.dryrun = B_TRUE; + err = dump_filesystems(zhp, &sdd); + sdd.dryrun = flags->dryrun; + sdd.verbose = B_FALSE; + if (flags->parsable) { + (void) fprintf(stderr, "size\t%llu\n", + (longlong_t)sdd.size); + } else { + char buf[16]; + zfs_nicenum(sdd.size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "total estimated size is %s\n"), buf); + } + } err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); - if (flags.dedup) { + if (flags->dedup) { (void) close(pipefd[0]); (void) pthread_join(tid, NULL); } @@ -1437,7 +1500,8 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, sdd.cleanup_fd = -1; } - if (flags.replicate || flags.doall || flags.props) { + if (!flags->dryrun && (flags->replicate || flags->doall || + flags->props)) { /* * write final end record. NB: want to do this even if * there was some error, because it might not be totally @@ -1458,7 +1522,7 @@ stderr_out: err_out: if (sdd.cleanup_fd != -1) VERIFY(0 == close(sdd.cleanup_fd)); - if (flags.dedup) { + if (flags->dedup) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); (void) close(pipefd[0]); @@ -1529,7 +1593,7 @@ recv_read_nvlist(libzfs_handle_t *hdl, int fd, int static int recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, - int baselen, char *newname, recvflags_t flags) + int baselen, char *newname, recvflags_t *flags) { static int seq; zfs_cmd_t zc = { 0 }; @@ -1541,7 +1605,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name if (zhp == NULL) return (-1); clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, - flags.force ? MS_FORCE : 0); + flags->force ? MS_FORCE : 0); zfs_close(zhp); if (clp == NULL) return (-1); @@ -1557,7 +1621,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value)); - if (flags.verbose) { + if (flags->verbose) { (void) printf("attempting rename %s to %s\n", zc.zc_name, zc.zc_value); } @@ -1576,19 +1640,19 @@ recv_rename(libzfs_handle_t *hdl, const char *name "recv-%u-%u", getpid(), seq); (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value)); - if (flags.verbose) { + if (flags->verbose) { (void) printf("failed - trying rename %s to %s\n", zc.zc_name, zc.zc_value); } err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); if (err == 0) changelist_rename(clp, name, newname); - if (err && flags.verbose) { + if (err && flags->verbose) { (void) printf("failed (%u) - " "will try again on next pass\n", errno); } err = EAGAIN; - } else if (flags.verbose) { + } else if (flags->verbose) { if (err == 0) (void) printf("success\n"); else @@ -1603,7 +1667,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name static int recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, - char *newname, recvflags_t flags) + char *newname, recvflags_t *flags) { zfs_cmd_t zc = { 0 }; int err = 0; @@ -1616,7 +1680,7 @@ recv_destroy(libzfs_handle_t *hdl, const char *nam if (zhp == NULL) return (-1); clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, - flags.force ? MS_FORCE : 0); + flags->force ? MS_FORCE : 0); if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && zfs_spa_version(zhp, &spa_version) == 0 && spa_version >= SPA_VERSION_USERREFS) @@ -1632,11 +1696,11 @@ recv_destroy(libzfs_handle_t *hdl, const char *nam zc.zc_defer_destroy = defer; (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); - if (flags.verbose) + if (flags->verbose) (void) printf("attempting destroy %s\n", zc.zc_name); err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc); if (err == 0) { - if (flags.verbose) + if (flags->verbose) (void) printf("success\n"); changelist_remove(clp, zc.zc_name); } @@ -1659,6 +1723,7 @@ recv_destroy(libzfs_handle_t *hdl, const char *nam typedef struct guid_to_name_data { uint64_t guid; char *name; + char *skip; } guid_to_name_data_t; static int @@ -1667,21 +1732,35 @@ guid_to_name_cb(zfs_handle_t *zhp, void *arg) guid_to_name_data_t *gtnd = arg; int err; + if (gtnd->skip != NULL && + strcmp(zhp->zfs_name, gtnd->skip) == 0) { + return (0); + } + if (zhp->zfs_dmustats.dds_guid == gtnd->guid) { (void) strcpy(gtnd->name, zhp->zfs_name); zfs_close(zhp); return (EEXIST); } + err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); zfs_close(zhp); return (err); } +/* + * Attempt to find the local dataset associated with this guid. In the case of + * multiple matches, we attempt to find the "best" match by searching + * progressively larger portions of the hierarchy. This allows one to send a + * tree of datasets individually and guarantee that we will find the source + * guid within that hierarchy, even if there are multiple matches elsewhere. + */ static int guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, char *name) { /* exhaustive search all local snapshots */ + char pname[ZFS_MAXNAMELEN]; guid_to_name_data_t gtnd; int err = 0; zfs_handle_t *zhp; @@ -1689,35 +1768,42 @@ guid_to_name(libzfs_handle_t *hdl, const char *par gtnd.guid = guid; gtnd.name = name; + gtnd.skip = NULL; - if (strchr(parent, '@') == NULL) { - zhp = make_dataset_handle(hdl, parent); - if (zhp != NULL) { - err = zfs_iter_children(zhp, guid_to_name_cb, >nd); - zfs_close(zhp); - if (err == EEXIST) - return (0); - } - } + (void) strlcpy(pname, parent, sizeof (pname)); - cp = strchr(parent, '/'); - if (cp) + /* + * Search progressively larger portions of the hierarchy. This will + * select the "most local" version of the origin snapshot in the case + * that there are multiple matching snapshots in the system. + */ + while ((cp = strrchr(pname, '/')) != NULL) { + + /* Chop off the last component and open the parent */ *cp = '\0'; - zhp = make_dataset_handle(hdl, parent); - if (cp) - *cp = '/'; + zhp = make_dataset_handle(hdl, pname); - if (zhp) { + if (zhp == NULL) + continue; + err = zfs_iter_children(zhp, guid_to_name_cb, >nd); zfs_close(zhp); + if (err == EEXIST) + return (0); + + /* + * Remember the dataset that we already searched, so we + * skip it next time through. + */ + gtnd.skip = pname; } - return (err == EEXIST ? 0 : ENOENT); - + return (ENOENT); } /* - * Return true if dataset guid1 is created before guid2. + * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if + * guid1 is after guid2. */ static int created_before(libzfs_handle_t *hdl, avl_tree_t *avl, @@ -1727,7 +1813,8 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *a char *fsname, *snapname; char buf[ZFS_MAXNAMELEN]; int rv; - zfs_node_t zn1, zn2; + zfs_handle_t *guid1hdl, *guid2hdl; + uint64_t create1, create2; if (guid2 == 0) return (0); @@ -1737,30 +1824,38 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *a nvfs = fsavl_find(avl, guid1, &snapname); VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); - zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); - if (zn1.zn_handle == NULL) + guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); + if (guid1hdl == NULL) return (-1); nvfs = fsavl_find(avl, guid2, &snapname); VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); - zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); - if (zn2.zn_handle == NULL) { - zfs_close(zn2.zn_handle); + guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); + if (guid2hdl == NULL) { + zfs_close(guid1hdl); return (-1); } - rv = (zfs_snapshot_compare(&zn1, &zn2) == -1); + create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG); + create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG); - zfs_close(zn1.zn_handle); - zfs_close(zn2.zn_handle); + if (create1 < create2) + rv = -1; + else if (create1 > create2) + rv = +1; + else + rv = 0; + zfs_close(guid1hdl); + zfs_close(guid2hdl); + return (rv); } static int recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, - recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, + recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, nvlist_t *renamed) { nvlist_t *local_nv; @@ -1777,7 +1872,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); - if (flags.dryrun) + if (flags->dryrun) return (0); again: @@ -1837,7 +1932,7 @@ again: nvlist_t *origin_nvfs; char *origin_fsname; - if (flags.verbose) + if (flags->verbose) (void) printf("promoting %s\n", fsname); origin_nvfs = fsavl_find(local_avl, originguid, @@ -1885,7 +1980,7 @@ again: if (found == NULL) { char name[ZFS_MAXNAMELEN]; - if (!flags.force) + if (!flags->force) continue; (void) snprintf(name, sizeof (name), "%s@%s", @@ -1943,7 +2038,7 @@ again: /* check for delete */ if (stream_nvfs == NULL) { - if (!flags.force) + if (!flags->force) continue; error = recv_destroy(hdl, fsname, strlen(tofs)+1, @@ -1956,7 +2051,7 @@ again: } if (fromguid == 0) { - if (flags.verbose) { + if (flags->verbose) { (void) printf("local fs %s does not have " "fromsnap (%s in stream); must have " "been deleted locally; ignoring\n", @@ -1981,7 +2076,7 @@ again: if ((stream_parent_fromsnap_guid != 0 && parent_fromsnap_guid != 0 && stream_parent_fromsnap_guid != parent_fromsnap_guid) || - ((flags.isprefix || strcmp(tofs, fsname) != 0) && + ((flags->isprefix || strcmp(tofs, fsname) != 0) && (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) { nvlist_t *parent; char tryname[ZFS_MAXNAMELEN]; @@ -2004,7 +2099,7 @@ again: "%s%s", pname, strrchr(stream_fsname, '/')); } else { tryname[0] = '\0'; - if (flags.verbose) { + if (flags->verbose) { (void) printf("local fs %s new parent " "not found\n", fsname); } @@ -2032,7 +2127,7 @@ again: if (needagain && progress) { /* do another pass to fix up temporary names */ - if (flags.verbose) + if (flags->verbose) (void) printf("another pass:\n"); goto again; } @@ -2042,7 +2137,7 @@ again: static int zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, - recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc, + recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { nvlist_t *stream_nv = NULL; @@ -2071,7 +2166,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, */ if (drr->drr_payloadlen != 0) { error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen, - &stream_nv, flags.byteswap, zc); + &stream_nv, flags->byteswap, zc); if (error) { error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); goto out; @@ -2092,9 +2187,9 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, * Read in the end record and verify checksum. */ if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre), - flags.byteswap, NULL))) + flags->byteswap, NULL))) goto out; - if (flags.byteswap) { + if (flags->byteswap) { drre.drr_type = BSWAP_32(drre.drr_type); drre.drr_u.drr_end.drr_checksum.zc_word[0] = BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]); @@ -2135,11 +2230,11 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, nvpair_t *pair = NULL; (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN); - if (flags.isprefix) { + if (flags->isprefix) { struct drr_begin *drrb = &drr->drr_u.drr_begin; int i; - if (flags.istail) { + if (flags->istail) { cp = strrchr(drrb->drr_toname, '/'); if (cp == NULL) { (void) strlcat(tofs, "/", @@ -2157,7 +2252,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, *strchr(tofs, '@') = '\0'; } - if (recursive && !flags.dryrun && !flags.nomount) { + if (recursive && !flags->dryrun && !flags->nomount) { VERIFY(0 == nvlist_alloc(&renamed, NV_UNIQUE_NAME, 0)); } @@ -2331,7 +2426,7 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t */ static int zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, - recvflags_t flags, dmu_replay_record_t *drr, + recvflags_t *flags, dmu_replay_record_t *drr, dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) @@ -2373,7 +2468,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co if (err) VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); - if (flags.canmountoff) { + if (flags->canmountoff) { VERIFY(0 == nvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); } @@ -2400,7 +2495,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co * If they specified a snapshot, chop the entire name stored in * the stream. */ - if (flags.istail) { + if (flags->istail) { /* * A filesystem was specified with -e. We want to tack on only * the tail of the sent snapshot path. @@ -2426,7 +2521,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co } else { chopprefix = drrb->drr_toname + (chopprefix - sendfs); } - } else if (flags.isprefix) { + } else if (flags->isprefix) { /* * A filesystem was specified with -d. We want to tack on * everything but the first element of the sent snapshot path @@ -2480,7 +2575,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co * Determine the name of the origin snapshot, store in zc_string. */ if (drrb->drr_flags & DRR_FLAG_CLONE) { - if (guid_to_name(hdl, tosnap, + if (guid_to_name(hdl, zc.zc_value, drrb->drr_fromguid, zc.zc_string) != 0) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -2488,7 +2583,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co zc.zc_value); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } - if (flags.verbose) + if (flags->verbose) (void) printf("found clone origin %s\n", zc.zc_string); } @@ -2511,7 +2606,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { char suffix[ZFS_MAXNAMELEN]; (void) strcpy(suffix, strrchr(zc.zc_value, '/')); - if (guid_to_name(hdl, tosnap, parent_snapguid, + if (guid_to_name(hdl, zc.zc_name, parent_snapguid, zc.zc_value) == 0) { *strchr(zc.zc_value, '@') = '\0'; (void) strcat(zc.zc_value, suffix); @@ -2533,12 +2628,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co * topmost path in the stream, then if the fs does not exist we * should look no further. */ - if ((flags.isprefix || (*(chopprefix = drrb->drr_toname + + if ((flags->isprefix || (*(chopprefix = drrb->drr_toname + strlen(sendfs)) != '\0' && *chopprefix != '@')) && !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { char snap[ZFS_MAXNAMELEN]; (void) strcpy(snap, strchr(zc.zc_value, '@')); - if (guid_to_name(hdl, tosnap, drrb->drr_fromguid, + if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid, zc.zc_value) == 0) { *strchr(zc.zc_value, '@') = '\0'; (void) strcat(zc.zc_value, snap); @@ -2560,7 +2655,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co * snapshots). */ if (stream_wantsnewfs) { - if (!flags.force) { + if (!flags->force) { zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' exists\n" @@ -2596,7 +2691,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } - if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && + if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && stream_wantsnewfs) { /* We can't do online recv in this case */ clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0); @@ -2635,7 +2730,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co */ *cp = '\0'; - if (flags.isprefix && !flags.istail && !flags.dryrun && + if (flags->isprefix && !flags->istail && !flags->dryrun && create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) { zcmd_free_nvlists(&zc); return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); @@ -2646,18 +2741,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co zc.zc_begin_record = drr_noswap->drr_u.drr_begin; zc.zc_cookie = infd; - zc.zc_guid = flags.force; - if (flags.verbose) { + zc.zc_guid = flags->force; + if (flags->verbose) { (void) printf("%s %s stream of %s into %s\n", - flags.dryrun ? "would receive" : "receiving", + flags->dryrun ? "would receive" : "receiving", drrb->drr_fromguid ? "incremental" : "full", drrb->drr_toname, zc.zc_value); (void) fflush(stdout); } - if (flags.dryrun) { + if (flags->dryrun) { zcmd_free_nvlists(&zc); - return (recv_skip(hdl, infd, flags.byteswap)); + return (recv_skip(hdl, infd, flags->byteswap)); } zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf; @@ -2738,12 +2833,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co nvlist_free(local_nv); if (fs != NULL) { - if (flags.verbose) { + if (flags->verbose) { (void) printf("snap %s already exists; " "ignoring\n", zc.zc_value); } err = ioctl_err = recv_skip(hdl, infd, - flags.byteswap); + flags->byteswap); } } *cp = '@'; @@ -2795,7 +2890,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co case EDQUOT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination %s space quota exceeded"), zc.zc_name); - (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); + (void) zfs_error(hdl, EZFS_NOSPC, errbuf); break; default: (void) zfs_standard_error(hdl, ioctl_errno, errbuf); @@ -2853,7 +2948,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co *action_handlep = zc.zc_action_handle; - if (flags.verbose) { + if (flags->verbose) { char buf1[64]; char buf2[64]; uint64_t bytes = zc.zc_cookie; @@ -2871,7 +2966,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, co } static int -zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, +zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { @@ -2886,7 +2981,7 @@ static int (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); - if (flags.isprefix && + if (flags->isprefix && !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs " "(%s) does not exist"), tosnap); @@ -2906,7 +3001,7 @@ static int /* the kernel needs the non-byteswapped begin record */ drr_noswap = drr; - flags.byteswap = B_FALSE; + flags->byteswap = B_FALSE; if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { /* * We computed the checksum in the wrong byteorder in @@ -2914,7 +3009,7 @@ static int */ bzero(&zcksum, sizeof (zio_cksum_t)); fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum); - flags.byteswap = B_TRUE; + flags->byteswap = B_TRUE; drr.drr_type = BSWAP_32(drr.drr_type); drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen); @@ -2982,7 +3077,7 @@ static int * (-1 will override -2). */ int -zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, +zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, int infd, avl_tree_t *stream_avl) { char *top_zfs = NULL; @@ -2998,7 +3093,7 @@ int VERIFY(0 == close(cleanup_fd)); - if (err == 0 && !flags.nomount && top_zfs) { + if (err == 0 && !flags->nomount && top_zfs) { zfs_handle_t *zhp; prop_changelist_t *clp; Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h (revision 227654) +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h (working copy) @@ -21,8 +21,8 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 Pawel Jakub Dawidek . + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. * All rights reserved. */ @@ -233,6 +233,7 @@ typedef struct splitflags { */ extern int zpool_scan(zpool_handle_t *, pool_scan_func_t); extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); +extern int zpool_reguid(zpool_handle_t *); extern int zpool_vdev_online(zpool_handle_t *, const char *, int, vdev_state_t *); @@ -384,6 +385,7 @@ extern void zpool_explain_recover(libzfs_handle_t * underlying datasets, only the references to them. */ extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int); +extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *); extern void zfs_close(zfs_handle_t *); extern zfs_type_t zfs_get_type(const zfs_handle_t *); extern const char *zfs_get_name(const zfs_handle_t *); @@ -417,13 +419,21 @@ extern int zfs_prop_get_userquota_int(zfs_handle_t uint64_t *propvalue); extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); +extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, + uint64_t *propvalue); +extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, + char *propbuf, int proplen, boolean_t literal); +extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, + uint64_t *usedp); extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); extern const char *zfs_prop_values(zfs_prop_t); extern int zfs_prop_is_string(zfs_prop_t prop); extern nvlist_t *zfs_get_user_props(zfs_handle_t *); extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); +extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); + typedef struct zprop_list { int pl_prop; char *pl_user_prop; @@ -497,6 +507,7 @@ extern int zfs_iter_dependents(zfs_handle_t *, boo extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *); +extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *); typedef struct get_all_cb { zfs_handle_t **cb_handles; @@ -517,6 +528,7 @@ extern int zfs_create(libzfs_handle_t *, const cha extern int zfs_create_ancestors(libzfs_handle_t *, const char *); extern int zfs_destroy(zfs_handle_t *, boolean_t); extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); +extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t); extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); @@ -533,29 +545,34 @@ extern int zfs_rename(zfs_handle_t *, const char * typedef struct sendflags { /* print informational messages (ie, -v was specified) */ - int verbose : 1; + boolean_t verbose; /* recursive send (ie, -R) */ - int replicate : 1; + boolean_t replicate; /* for incrementals, do all intermediate snapshots */ - int doall : 1; /* (ie, -I) */ + boolean_t doall; /* if dataset is a clone, do incremental from its origin */ - int fromorigin : 1; + boolean_t fromorigin; /* do deduplication */ - int dedup : 1; + boolean_t dedup; /* send properties (ie, -p) */ - int props : 1; + boolean_t props; + + /* do not send (no-op, ie. -n) */ + boolean_t dryrun; + + /* parsable verbose output (ie. -P) */ + boolean_t parsable; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); -extern int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, - sendflags_t flags, int outfd, snapfilter_cb_t filter_func, - void *cb_arg, nvlist_t **debugnvp); +extern int zfs_send(zfs_handle_t *, const char *, const char *, + sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, @@ -575,34 +592,34 @@ extern int zfs_set_fsacl(zfs_handle_t *, boolean_t typedef struct recvflags { /* print informational messages (ie, -v was specified) */ - int verbose : 1; + boolean_t verbose; /* the destination is a prefix, not the exact fs (ie, -d) */ - int isprefix : 1; + boolean_t isprefix; /* * Only the tail of the sent snapshot path is appended to the * destination to determine the received snapshot name (ie, -e). */ - int istail : 1; + boolean_t istail; /* do not actually do the recv, just check if it would work (ie, -n) */ - int dryrun : 1; + boolean_t dryrun; /* rollback/destroy filesystems as necessary (eg, -F) */ - int force : 1; + boolean_t force; /* set "canmount=off" on all modified filesystems */ - int canmountoff : 1; + boolean_t canmountoff; /* byteswap flag is used internally; callers need not specify */ - int byteswap : 1; + boolean_t byteswap; /* do not mount file systems as they are extracted (private) */ - int nomount : 1; + boolean_t nomount; } recvflags_t; -extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t, +extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *, int, avl_tree_t *); typedef enum diff_flags { Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c (revision 227654) +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c (working copy) @@ -496,7 +496,7 @@ make_dataset_handle(libzfs_handle_t *hdl, const ch return (zhp); } -static zfs_handle_t * +zfs_handle_t * make_dataset_handle_zc(libzfs_handle_t *hdl, zfs_cmd_t *zc) { zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); @@ -513,6 +513,53 @@ make_dataset_handle_zc(libzfs_handle_t *hdl, zfs_c return (zhp); } +zfs_handle_t * +zfs_handle_dup(zfs_handle_t *zhp_orig) +{ + zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); + + if (zhp == NULL) + return (NULL); + + zhp->zfs_hdl = zhp_orig->zfs_hdl; + zhp->zpool_hdl = zhp_orig->zpool_hdl; + (void) strlcpy(zhp->zfs_name, zhp_orig->zfs_name, + sizeof (zhp->zfs_name)); + zhp->zfs_type = zhp_orig->zfs_type; + zhp->zfs_head_type = zhp_orig->zfs_head_type; + zhp->zfs_dmustats = zhp_orig->zfs_dmustats; + if (zhp_orig->zfs_props != NULL) { + if (nvlist_dup(zhp_orig->zfs_props, &zhp->zfs_props, 0) != 0) { + (void) no_memory(zhp->zfs_hdl); + zfs_close(zhp); + return (NULL); + } + } + if (zhp_orig->zfs_user_props != NULL) { + if (nvlist_dup(zhp_orig->zfs_user_props, + &zhp->zfs_user_props, 0) != 0) { + (void) no_memory(zhp->zfs_hdl); + zfs_close(zhp); + return (NULL); + } + } + if (zhp_orig->zfs_recvd_props != NULL) { + if (nvlist_dup(zhp_orig->zfs_recvd_props, + &zhp->zfs_recvd_props, 0)) { + (void) no_memory(zhp->zfs_hdl); + zfs_close(zhp); + return (NULL); + } + } + zhp->zfs_mntcheck = zhp_orig->zfs_mntcheck; + if (zhp_orig->zfs_mntopts != NULL) { + zhp->zfs_mntopts = zfs_strdup(zhp_orig->zfs_hdl, + zhp_orig->zfs_mntopts); + } + zhp->zfs_props_table = zhp_orig->zfs_props_table; + return (zhp); +} + /* * Opens the given snapshot, filesystem, or volume. The 'types' * argument is a mask of acceptable types. The function will print an @@ -876,6 +923,12 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_ goto error; } continue; + } else if (prop == ZPROP_INVAL && zfs_prop_written(propname)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is readonly"), + propname); + (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); + goto error; } if (prop == ZPROP_INVAL) { @@ -1869,8 +1922,6 @@ zfs_prop_get_recvd(zfs_handle_t *zhp, const char * err = zfs_prop_get(zhp, prop, propbuf, proplen, NULL, NULL, 0, literal); zfs_unset_recvd_props_mode(zhp, &cookie); - } else if (zfs_prop_userquota(propname)) { - return (-1); } else { nvlist_t *propval; char *recvdval; @@ -1885,6 +1936,120 @@ zfs_prop_get_recvd(zfs_handle_t *zhp, const char * return (err == 0 ? 0 : -1); } +static int +get_clones_string(zfs_handle_t *zhp, char *propbuf, size_t proplen) +{ + nvlist_t *value; + nvpair_t *pair; + + value = zfs_get_clones_nvl(zhp); + if (value == NULL) + return (-1); + + propbuf[0] = '\0'; + for (pair = nvlist_next_nvpair(value, NULL); pair != NULL; + pair = nvlist_next_nvpair(value, pair)) { + if (propbuf[0] != '\0') + (void) strlcat(propbuf, ",", proplen); + (void) strlcat(propbuf, nvpair_name(pair), proplen); + } + + return (0); +} + +struct get_clones_arg { + uint64_t numclones; + nvlist_t *value; + const char *origin; + char buf[ZFS_MAXNAMELEN]; +}; + +int +get_clones_cb(zfs_handle_t *zhp, void *arg) +{ + struct get_clones_arg *gca = arg; + + if (gca->numclones == 0) { + zfs_close(zhp); + return (0); + } + + if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, gca->buf, sizeof (gca->buf), + NULL, NULL, 0, B_TRUE) != 0) + goto out; + if (strcmp(gca->buf, gca->origin) == 0) { + if (nvlist_add_boolean(gca->value, zfs_get_name(zhp)) != 0) { + zfs_close(zhp); + return (no_memory(zhp->zfs_hdl)); + } + gca->numclones--; + } + +out: + (void) zfs_iter_children(zhp, get_clones_cb, gca); + zfs_close(zhp); + return (0); +} + +nvlist_t * +zfs_get_clones_nvl(zfs_handle_t *zhp) +{ + nvlist_t *nv, *value; + + if (nvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_CLONES), &nv) != 0) { + struct get_clones_arg gca; + + /* + * if this is a snapshot, then the kernel wasn't able + * to get the clones. Do it by slowly iterating. + */ + if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) + return (NULL); + if (nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) != 0) + return (NULL); + if (nvlist_alloc(&value, NV_UNIQUE_NAME, 0) != 0) { + nvlist_free(nv); + return (NULL); + } + + gca.numclones = zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES); + gca.value = value; + gca.origin = zhp->zfs_name; + + if (gca.numclones != 0) { + zfs_handle_t *root; + char pool[ZFS_MAXNAMELEN]; + char *cp = pool; + + /* get the pool name */ + (void) strlcpy(pool, zhp->zfs_name, sizeof (pool)); + (void) strsep(&cp, "/@"); + root = zfs_open(zhp->zfs_hdl, pool, + ZFS_TYPE_FILESYSTEM); + + (void) get_clones_cb(root, &gca); + } + + if (gca.numclones != 0 || + nvlist_add_nvlist(nv, ZPROP_VALUE, value) != 0 || + nvlist_add_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_CLONES), nv) != 0) { + nvlist_free(nv); + nvlist_free(value); + return (NULL); + } + nvlist_free(nv); + nvlist_free(value); + verify(0 == nvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_CLONES), &nv)); + } + + verify(nvlist_lookup_nvlist(nv, ZPROP_VALUE, &value) == 0); + + return (value); +} + /* * Retrieve a property from the given object. If 'literal' is specified, then * numbers are left as exact values. Otherwise, numbers are converted to a @@ -2013,6 +2178,11 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, c return (-1); break; + case ZFS_PROP_CLONES: + if (get_clones_string(zhp, propbuf, proplen) != 0) + return (-1); + break; + case ZFS_PROP_QUOTA: case ZFS_PROP_REFQUOTA: case ZFS_PROP_RESERVATION: @@ -2385,7 +2555,7 @@ zfs_prop_get_userquota_common(zfs_handle_t *zhp, c int err; zfs_cmd_t zc = { 0 }; - (void) strncpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); err = userquota_propname_decode(propname, zfs_prop_get_int(zhp, ZFS_PROP_ZONED), @@ -2437,144 +2607,95 @@ zfs_prop_get_userquota(zfs_handle_t *zhp, const ch return (0); } -/* - * Returns the name of the given zfs handle. - */ -const char * -zfs_get_name(const zfs_handle_t *zhp) +int +zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, + uint64_t *propvalue) { - return (zhp->zfs_name); -} + int err; + zfs_cmd_t zc = { 0 }; + const char *snapname; -/* - * Returns the type of the given zfs handle. - */ -zfs_type_t -zfs_get_type(const zfs_handle_t *zhp) -{ - return (zhp->zfs_type); -} + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); -static int -zfs_do_list_ioctl(zfs_handle_t *zhp, unsigned long arg, zfs_cmd_t *zc) -{ - int rc; - uint64_t orig_cookie; + snapname = strchr(propname, '@') + 1; + if (strchr(snapname, '@')) { + (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + } else { + /* snapname is the short name, append it to zhp's fsname */ + char *cp; - orig_cookie = zc->zc_cookie; -top: - (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); - rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc); + (void) strlcpy(zc.zc_value, zhp->zfs_name, + sizeof (zc.zc_value)); + cp = strchr(zc.zc_value, '@'); + if (cp != NULL) + *cp = '\0'; + (void) strlcat(zc.zc_value, "@", sizeof (zc.zc_value)); + (void) strlcat(zc.zc_value, snapname, sizeof (zc.zc_value)); + } - if (rc == -1) { - switch (errno) { - case ENOMEM: - /* expand nvlist memory and try again */ - if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, zc) != 0) { - zcmd_free_nvlists(zc); - return (-1); - } - zc->zc_cookie = orig_cookie; - goto top; - /* - * An errno value of ESRCH indicates normal completion. - * If ENOENT is returned, then the underlying dataset - * has been removed since we obtained the handle. - */ - case ESRCH: - case ENOENT: - rc = 1; - break; - default: - rc = zfs_standard_error(zhp->zfs_hdl, errno, - dgettext(TEXT_DOMAIN, - "cannot iterate filesystems")); - break; - } - } - return (rc); + err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_WRITTEN, &zc); + if (err) + return (err); + + *propvalue = zc.zc_cookie; + return (0); } -/* - * Iterate over all child filesystems - */ int -zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) +zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, + char *propbuf, int proplen, boolean_t literal) { - zfs_cmd_t zc = { 0 }; - zfs_handle_t *nzhp; - int ret; + int err; + uint64_t propvalue; - if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM) - return (0); + err = zfs_prop_get_written_int(zhp, propname, &propvalue); - if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) - return (-1); + if (err) + return (err); - while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT, - &zc)) == 0) { - /* - * Silently ignore errors, as the only plausible explanation is - * that the pool has since been removed. - */ - if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl, - &zc)) == NULL) { - continue; - } - - if ((ret = func(nzhp, data)) != 0) { - zcmd_free_nvlists(&zc); - return (ret); - } + if (literal) { + (void) snprintf(propbuf, proplen, "%llu", propvalue); + } else { + zfs_nicenum(propvalue, propbuf, proplen); } - zcmd_free_nvlists(&zc); - return ((ret < 0) ? ret : 0); + return (0); } -/* - * Iterate over all snapshots - */ int -zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data) +zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, + uint64_t *usedp) { + int err; zfs_cmd_t zc = { 0 }; - zfs_handle_t *nzhp; - int ret; - if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) - return (0); + (void) strlcpy(zc.zc_name, lastsnap->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_value, firstsnap->zfs_name, sizeof (zc.zc_value)); - if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) - return (-1); - while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT, - &zc)) == 0) { + err = ioctl(lastsnap->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_SNAPS, &zc); + if (err) + return (err); - if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl, - &zc)) == NULL) { - continue; - } + *usedp = zc.zc_cookie; - if ((ret = func(nzhp, data)) != 0) { - zcmd_free_nvlists(&zc); - return (ret); - } - } - zcmd_free_nvlists(&zc); - return ((ret < 0) ? ret : 0); + return (0); } /* - * Iterate over all children, snapshots and filesystems + * Returns the name of the given zfs handle. */ -int -zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data) +const char * +zfs_get_name(const zfs_handle_t *zhp) { - int ret; + return (zhp->zfs_name); +} - if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0) - return (ret); - - return (zfs_iter_snapshots(zhp, func, data)); +/* + * Returns the type of the given zfs handle. + */ +zfs_type_t +zfs_get_type(const zfs_handle_t *zhp) +{ + return (zhp->zfs_type); } /* @@ -2600,19 +2721,20 @@ is_descendant(const char *ds1, const char *ds2) /* * Given a complete name, return just the portion that refers to the parent. - * Can return NULL if this is a pool. + * Will return -1 if there is no parent (path is just the name of the + * pool). */ static int parent_name(const char *path, char *buf, size_t buflen) { - char *loc; + char *slashp; - if ((loc = strrchr(path, '/')) == NULL) + (void) strlcpy(buf, path, buflen); + + if ((slashp = strrchr(buf, '/')) == NULL) return (-1); + *slashp = '\0'; - (void) strncpy(buf, path, MIN(buflen, loc - path)); - buf[loc - path] = '\0'; - return (0); } @@ -3010,9 +3132,8 @@ zfs_destroy(zfs_handle_t *zhp, boolean_t defer) } struct destroydata { - char *snapname; - boolean_t gotone; - boolean_t closezhp; + nvlist_t *nvl; + const char *snapname; }; static int @@ -3021,24 +3142,19 @@ zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) struct destroydata *dd = arg; zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; - boolean_t closezhp = dd->closezhp; int rv = 0; - (void) strlcpy(name, zhp->zfs_name, sizeof (name)); - (void) strlcat(name, "@", sizeof (name)); - (void) strlcat(name, dd->snapname, sizeof (name)); + (void) snprintf(name, sizeof (name), + "%s@%s", zhp->zfs_name, dd->snapname); szhp = make_dataset_handle(zhp->zfs_hdl, name); if (szhp) { - dd->gotone = B_TRUE; + verify(nvlist_add_boolean(dd->nvl, name) == 0); zfs_close(szhp); } - dd->closezhp = B_TRUE; - if (!dd->gotone) - rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg); - if (closezhp) - zfs_close(zhp); + rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd); + zfs_close(zhp); return (rv); } @@ -3048,29 +3164,45 @@ zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) int zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) { - zfs_cmd_t zc = { 0 }; int ret; struct destroydata dd = { 0 }; dd.snapname = snapname; - (void) zfs_check_snap_cb(zhp, &dd); + verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0); + (void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd); - if (!dd.gotone) { - return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, + if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) { + ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), - zhp->zfs_name, snapname)); + zhp->zfs_name, snapname); + } else { + ret = zfs_destroy_snaps_nvl(zhp, dd.nvl, defer); } + nvlist_free(dd.nvl); + return (ret); +} +/* + * Destroys all the snapshots named in the nvlist. They must be underneath + * the zhp (either snapshots of it, or snapshots of its descendants). + */ +int +zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer) +{ + int ret; + zfs_cmd_t zc = { 0 }; + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, snaps) != 0) + return (-1); zc.zc_defer_destroy = defer; - ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS, &zc); + ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS_NVL, &zc); if (ret != 0) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot destroy '%s@%s'"), zc.zc_name, snapname); + "cannot destroy snapshots in %s"), zc.zc_name); switch (errno) { case EEXIST: @@ -3106,7 +3238,7 @@ zfs_clone(zfs_handle_t *zhp, const char *target, n (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), target); - /* validate the target name */ + /* validate the target/clone name */ if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); @@ -3443,42 +3575,6 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap } /* - * Iterate over all dependents for a given dataset. This includes both - * hierarchical dependents (children) and data dependents (snapshots and - * clones). The bulk of the processing occurs in get_dependents() in - * libzfs_graph.c. - */ -int -zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion, - zfs_iter_f func, void *data) -{ - char **dependents; - size_t count; - int i; - zfs_handle_t *child; - int ret = 0; - - if (get_dependents(zhp->zfs_hdl, allowrecursion, zhp->zfs_name, - &dependents, &count) != 0) - return (-1); - - for (i = 0; i < count; i++) { - if ((child = make_dataset_handle(zhp->zfs_hdl, - dependents[i])) == NULL) - continue; - - if ((ret = func(child, data)) != 0) - break; - } - - for (i = 0; i < count; i++) - free(dependents[i]); - free(dependents); - - return (ret); -} - -/* * Renames the given dataset. */ int @@ -3947,7 +4043,7 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_pro int error; zfs_useracct_t buf[100]; - (void) strncpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); zc.zc_objset_type = type; zc.zc_nvlist_dst = (uintptr_t)buf; Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c (revision 227654) +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c (working copy) @@ -1,653 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Iterate over all children of the current object. This includes the normal - * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to - * walk all datasets in the pool, and construct a directed graph of the form: - * - * home - * | - * +----+----+ - * | | - * v v ws - * bar baz | - * | | - * v v - * @yesterday ----> foo - * - * In order to construct this graph, we have to walk every dataset in the pool, - * because the clone parent is stored as a property of the child, not the - * parent. The parent only keeps track of the number of clones. - * - * In the normal case (without clones) this would be rather expensive. To avoid - * unnecessary computation, we first try a walk of the subtree hierarchy - * starting from the initial node. At each dataset, we construct a node in the - * graph and an edge leading from its parent. If we don't see any snapshots - * with a non-zero clone count, then we are finished. - * - * If we do find a cloned snapshot, then we finish the walk of the current - * subtree, but indicate that we need to do a complete walk. We then perform a - * global walk of all datasets, avoiding the subtree we already processed. - * - * At the end of this, we'll end up with a directed graph of all relevant (and - * possible some irrelevant) datasets in the system. We need to both find our - * limiting subgraph and determine a safe ordering in which to destroy the - * datasets. We do a topological ordering of our graph starting at our target - * dataset, and then walk the results in reverse. - * - * It's possible for the graph to have cycles if, for example, the user renames - * a clone to be the parent of its origin snapshot. The user can request to - * generate an error in this case, or ignore the cycle and continue. - * - * When removing datasets, we want to destroy the snapshots in chronological - * order (because this is the most efficient method). In order to accomplish - * this, we store the creation transaction group with each vertex and keep each - * vertex's edges sorted according to this value. The topological sort will - * automatically walk the snapshots in the correct order. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "libzfs_impl.h" -#include "zfs_namecheck.h" - -#define MIN_EDGECOUNT 4 - -/* - * Vertex structure. Indexed by dataset name, this structure maintains a list - * of edges to other vertices. - */ -struct zfs_edge; -typedef struct zfs_vertex { - char zv_dataset[ZFS_MAXNAMELEN]; - struct zfs_vertex *zv_next; - int zv_visited; - uint64_t zv_txg; - struct zfs_edge **zv_edges; - int zv_edgecount; - int zv_edgealloc; -} zfs_vertex_t; - -enum { - VISIT_SEEN = 1, - VISIT_SORT_PRE, - VISIT_SORT_POST -}; - -/* - * Edge structure. Simply maintains a pointer to the destination vertex. There - * is no need to store the source vertex, since we only use edges in the context - * of the source vertex. - */ -typedef struct zfs_edge { - zfs_vertex_t *ze_dest; - struct zfs_edge *ze_next; -} zfs_edge_t; - -#define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */ - -/* - * Graph structure. Vertices are maintained in a hash indexed by dataset name. - */ -typedef struct zfs_graph { - zfs_vertex_t **zg_hash; - size_t zg_size; - size_t zg_nvertex; - const char *zg_root; - int zg_clone_count; -} zfs_graph_t; - -/* - * Allocate a new edge pointing to the target vertex. - */ -static zfs_edge_t * -zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest) -{ - zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t)); - - if (zep == NULL) - return (NULL); - - zep->ze_dest = dest; - - return (zep); -} - -/* - * Destroy an edge. - */ -static void -zfs_edge_destroy(zfs_edge_t *zep) -{ - free(zep); -} - -/* - * Allocate a new vertex with the given name. - */ -static zfs_vertex_t * -zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset) -{ - zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t)); - - if (zvp == NULL) - return (NULL); - - assert(strlen(dataset) < ZFS_MAXNAMELEN); - - (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset)); - - if ((zvp->zv_edges = zfs_alloc(hdl, - MIN_EDGECOUNT * sizeof (void *))) == NULL) { - free(zvp); - return (NULL); - } - - zvp->zv_edgealloc = MIN_EDGECOUNT; - - return (zvp); -} - -/* - * Destroy a vertex. Frees up any associated edges. - */ -static void -zfs_vertex_destroy(zfs_vertex_t *zvp) -{ - int i; - - for (i = 0; i < zvp->zv_edgecount; i++) - zfs_edge_destroy(zvp->zv_edges[i]); - - free(zvp->zv_edges); - free(zvp); -} - -/* - * Given a vertex, add an edge to the destination vertex. - */ -static int -zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp, - zfs_vertex_t *dest) -{ - zfs_edge_t *zep = zfs_edge_create(hdl, dest); - - if (zep == NULL) - return (-1); - - if (zvp->zv_edgecount == zvp->zv_edgealloc) { - void *ptr; - - if ((ptr = zfs_realloc(hdl, zvp->zv_edges, - zvp->zv_edgealloc * sizeof (void *), - zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL) - return (-1); - - zvp->zv_edges = ptr; - zvp->zv_edgealloc *= 2; - } - - zvp->zv_edges[zvp->zv_edgecount++] = zep; - - return (0); -} - -static int -zfs_edge_compare(const void *a, const void *b) -{ - const zfs_edge_t *ea = *((zfs_edge_t **)a); - const zfs_edge_t *eb = *((zfs_edge_t **)b); - - if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg) - return (-1); - if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg) - return (1); - return (0); -} - -/* - * Sort the given vertex edges according to the creation txg of each vertex. - */ -static void -zfs_vertex_sort_edges(zfs_vertex_t *zvp) -{ - if (zvp->zv_edgecount == 0) - return; - - qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *), - zfs_edge_compare); -} - -/* - * Construct a new graph object. We allow the size to be specified as a - * parameter so in the future we can size the hash according to the number of - * datasets in the pool. - */ -static zfs_graph_t * -zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size) -{ - zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t)); - - if (zgp == NULL) - return (NULL); - - zgp->zg_size = size; - if ((zgp->zg_hash = zfs_alloc(hdl, - size * sizeof (zfs_vertex_t *))) == NULL) { - free(zgp); - return (NULL); - } - - zgp->zg_root = dataset; - zgp->zg_clone_count = 0; - - return (zgp); -} - -/* - * Destroy a graph object. We have to iterate over all the hash chains, - * destroying each vertex in the process. - */ -static void -zfs_graph_destroy(zfs_graph_t *zgp) -{ - int i; - zfs_vertex_t *current, *next; - - for (i = 0; i < zgp->zg_size; i++) { - current = zgp->zg_hash[i]; - while (current != NULL) { - next = current->zv_next; - zfs_vertex_destroy(current); - current = next; - } - } - - free(zgp->zg_hash); - free(zgp); -} - -/* - * Graph hash function. Classic bernstein k=33 hash function, taken from - * usr/src/cmd/sgs/tools/common/strhash.c - */ -static size_t -zfs_graph_hash(zfs_graph_t *zgp, const char *str) -{ - size_t hash = 5381; - int c; - - while ((c = *str++) != 0) - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - - return (hash % zgp->zg_size); -} - -/* - * Given a dataset name, finds the associated vertex, creating it if necessary. - */ -static zfs_vertex_t * -zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset, - uint64_t txg) -{ - size_t idx = zfs_graph_hash(zgp, dataset); - zfs_vertex_t *zvp; - - for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) { - if (strcmp(zvp->zv_dataset, dataset) == 0) { - if (zvp->zv_txg == 0) - zvp->zv_txg = txg; - return (zvp); - } - } - - if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL) - return (NULL); - - zvp->zv_next = zgp->zg_hash[idx]; - zvp->zv_txg = txg; - zgp->zg_hash[idx] = zvp; - zgp->zg_nvertex++; - - return (zvp); -} - -/* - * Given two dataset names, create an edge between them. For the source vertex, - * mark 'zv_visited' to indicate that we have seen this vertex, and not simply - * created it as a destination of another edge. If 'dest' is NULL, then this - * is an individual vertex (i.e. the starting vertex), so don't add an edge. - */ -static int -zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source, - const char *dest, uint64_t txg) -{ - zfs_vertex_t *svp, *dvp; - - if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL) - return (-1); - svp->zv_visited = VISIT_SEEN; - if (dest != NULL) { - dvp = zfs_graph_lookup(hdl, zgp, dest, txg); - if (dvp == NULL) - return (-1); - if (zfs_vertex_add_edge(hdl, svp, dvp) != 0) - return (-1); - } - - return (0); -} - -/* - * Iterate over all children of the given dataset, adding any vertices - * as necessary. Returns -1 if there was an error, or 0 otherwise. - * This is a simple recursive algorithm - the ZFS namespace typically - * is very flat. We manually invoke the necessary ioctl() calls to - * avoid the overhead and additional semantics of zfs_open(). - */ -static int -iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) -{ - zfs_cmd_t zc = { 0 }; - zfs_vertex_t *zvp; - - /* - * Look up the source vertex, and avoid it if we've seen it before. - */ - zvp = zfs_graph_lookup(hdl, zgp, dataset, 0); - if (zvp == NULL) - return (-1); - if (zvp->zv_visited == VISIT_SEEN) - return (0); - - /* - * Iterate over all children - */ - for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0; - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { - /* - * Get statistics for this dataset, to determine the type of the - * dataset and clone statistics. If this fails, the dataset has - * since been removed, and we're pretty much screwed anyway. - */ - zc.zc_objset_stats.dds_origin[0] = '\0'; - if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) - continue; - - if (zc.zc_objset_stats.dds_origin[0] != '\0') { - if (zfs_graph_add(hdl, zgp, - zc.zc_objset_stats.dds_origin, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg) != 0) - return (-1); - /* - * Count origins only if they are contained in the graph - */ - if (isa_child_of(zc.zc_objset_stats.dds_origin, - zgp->zg_root)) - zgp->zg_clone_count--; - } - - /* - * Add an edge between the parent and the child. - */ - if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg) != 0) - return (-1); - - /* - * Recursively visit child - */ - if (iterate_children(hdl, zgp, zc.zc_name)) - return (-1); - } - - /* - * Now iterate over all snapshots. - */ - bzero(&zc, sizeof (zc)); - - for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0; - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { - - /* - * Get statistics for this dataset, to determine the type of the - * dataset and clone statistics. If this fails, the dataset has - * since been removed, and we're pretty much screwed anyway. - */ - if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) - continue; - - /* - * Add an edge between the parent and the child. - */ - if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg) != 0) - return (-1); - - zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones; - } - - zvp->zv_visited = VISIT_SEEN; - - return (0); -} - -/* - * Returns false if there are no snapshots with dependent clones in this - * subtree or if all of those clones are also in this subtree. Returns - * true if there is an error or there are external dependents. - */ -static boolean_t -external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) -{ - zfs_cmd_t zc = { 0 }; - - /* - * Check whether this dataset is a clone or has clones since - * iterate_children() only checks the children. - */ - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) - return (B_TRUE); - - if (zc.zc_objset_stats.dds_origin[0] != '\0') { - if (zfs_graph_add(hdl, zgp, - zc.zc_objset_stats.dds_origin, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg) != 0) - return (B_TRUE); - if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset)) - zgp->zg_clone_count--; - } - - if ((zc.zc_objset_stats.dds_num_clones) || - iterate_children(hdl, zgp, dataset)) - return (B_TRUE); - - return (zgp->zg_clone_count != 0); -} - -/* - * Construct a complete graph of all necessary vertices. First, iterate over - * only our object's children. If no cloned snapshots are found, or all of - * the cloned snapshots are in this subtree then return a graph of the subtree. - * Otherwise, start at the root of the pool and iterate over all datasets. - */ -static zfs_graph_t * -construct_graph(libzfs_handle_t *hdl, const char *dataset) -{ - zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE); - int ret = 0; - - if (zgp == NULL) - return (zgp); - - if ((strchr(dataset, '/') == NULL) || - (external_dependents(hdl, zgp, dataset))) { - /* - * Determine pool name and try again. - */ - int len = strcspn(dataset, "/@") + 1; - char *pool = zfs_alloc(hdl, len); - - if (pool == NULL) { - zfs_graph_destroy(zgp); - return (NULL); - } - (void) strlcpy(pool, dataset, len); - - if (iterate_children(hdl, zgp, pool) == -1 || - zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) { - free(pool); - zfs_graph_destroy(zgp); - return (NULL); - } - free(pool); - } - - if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) { - zfs_graph_destroy(zgp); - return (NULL); - } - - return (zgp); -} - -/* - * Given a graph, do a recursive topological sort into the given array. This is - * really just a depth first search, so that the deepest nodes appear first. - * hijack the 'zv_visited' marker to avoid visiting the same vertex twice. - */ -static int -topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result, - size_t *idx, zfs_vertex_t *zgv) -{ - int i; - - if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) { - /* - * If we've already seen this vertex as part of our depth-first - * search, then we have a cyclic dependency, and we must return - * an error. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "recursive dependency at '%s'"), - zgv->zv_dataset); - return (zfs_error(hdl, EZFS_RECURSIVE, - dgettext(TEXT_DOMAIN, - "cannot determine dependent datasets"))); - } else if (zgv->zv_visited >= VISIT_SORT_PRE) { - /* - * If we've already processed this as part of the topological - * sort, then don't bother doing so again. - */ - return (0); - } - - zgv->zv_visited = VISIT_SORT_PRE; - - /* avoid doing a search if we don't have to */ - zfs_vertex_sort_edges(zgv); - for (i = 0; i < zgv->zv_edgecount; i++) { - if (topo_sort(hdl, allowrecursion, result, idx, - zgv->zv_edges[i]->ze_dest) != 0) - return (-1); - } - - /* we may have visited this in the course of the above */ - if (zgv->zv_visited == VISIT_SORT_POST) - return (0); - - if ((result[*idx] = zfs_alloc(hdl, - strlen(zgv->zv_dataset) + 1)) == NULL) - return (-1); - - (void) strcpy(result[*idx], zgv->zv_dataset); - *idx += 1; - zgv->zv_visited = VISIT_SORT_POST; - return (0); -} - -/* - * The only public interface for this file. Do the dirty work of constructing a - * child list for the given object. Construct the graph, do the toplogical - * sort, and then return the array of strings to the caller. - * - * The 'allowrecursion' parameter controls behavior when cycles are found. If - * it is set, the the cycle is ignored and the results returned as if the cycle - * did not exist. If it is not set, then the routine will generate an error if - * a cycle is found. - */ -int -get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion, - const char *dataset, char ***result, size_t *count) -{ - zfs_graph_t *zgp; - zfs_vertex_t *zvp; - - if ((zgp = construct_graph(hdl, dataset)) == NULL) - return (-1); - - if ((*result = zfs_alloc(hdl, - zgp->zg_nvertex * sizeof (char *))) == NULL) { - zfs_graph_destroy(zgp); - return (-1); - } - - if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) { - free(*result); - zfs_graph_destroy(zgp); - return (-1); - } - - *count = 0; - if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) { - free(*result); - zfs_graph_destroy(zgp); - return (-1); - } - - /* - * Get rid of the last entry, which is our starting vertex and not - * strictly a dependent. - */ - assert(*count > 0); - free((*result)[*count - 1]); - (*count)--; - - zfs_graph_destroy(zgp); - - return (0); -} Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c (revision 0) +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c (working copy) @@ -0,0 +1,462 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "libzfs_impl.h" + +int +zfs_iter_clones(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + nvlist_t *nvl = zfs_get_clones_nvl(zhp); + nvpair_t *pair; + + if (nvl == NULL) + return (0); + + for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(nvl, pair)) { + zfs_handle_t *clone = zfs_open(zhp->zfs_hdl, nvpair_name(pair), + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (clone != NULL) { + int err = func(clone, data); + if (err != 0) + return (err); + } + } + return (0); +} + +static int +zfs_do_list_ioctl(zfs_handle_t *zhp, unsigned long arg, zfs_cmd_t *zc) +{ + int rc; + uint64_t orig_cookie; + + orig_cookie = zc->zc_cookie; +top: + (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); + rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc); + + if (rc == -1) { + switch (errno) { + case ENOMEM: + /* expand nvlist memory and try again */ + if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, zc) != 0) { + zcmd_free_nvlists(zc); + return (-1); + } + zc->zc_cookie = orig_cookie; + goto top; + /* + * An errno value of ESRCH indicates normal completion. + * If ENOENT is returned, then the underlying dataset + * has been removed since we obtained the handle. + */ + case ESRCH: + case ENOENT: + rc = 1; + break; + default: + rc = zfs_standard_error(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, + "cannot iterate filesystems")); + break; + } + } + return (rc); +} + +/* + * Iterate over all child filesystems + */ +int +zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + zfs_cmd_t zc = { 0 }; + zfs_handle_t *nzhp; + int ret; + + if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM) + return (0); + + if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) + return (-1); + + while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT, + &zc)) == 0) { + /* + * Silently ignore errors, as the only plausible explanation is + * that the pool has since been removed. + */ + if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl, + &zc)) == NULL) { + continue; + } + + if ((ret = func(nzhp, data)) != 0) { + zcmd_free_nvlists(&zc); + return (ret); + } + } + zcmd_free_nvlists(&zc); + return ((ret < 0) ? ret : 0); +} + +/* + * Iterate over all snapshots + */ +int +zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + zfs_cmd_t zc = { 0 }; + zfs_handle_t *nzhp; + int ret; + + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) + return (0); + + if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) + return (-1); + while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT, + &zc)) == 0) { + + if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl, + &zc)) == NULL) { + continue; + } + + if ((ret = func(nzhp, data)) != 0) { + zcmd_free_nvlists(&zc); + return (ret); + } + } + zcmd_free_nvlists(&zc); + return ((ret < 0) ? ret : 0); +} + +/* + * Routines for dealing with the sorted snapshot functionality + */ +typedef struct zfs_node { + zfs_handle_t *zn_handle; + avl_node_t zn_avlnode; +} zfs_node_t; + +static int +zfs_sort_snaps(zfs_handle_t *zhp, void *data) +{ + avl_tree_t *avl = data; + zfs_node_t *node; + zfs_node_t search; + + search.zn_handle = zhp; + node = avl_find(avl, &search, NULL); + if (node) { + /* + * If this snapshot was renamed while we were creating the + * AVL tree, it's possible that we already inserted it under + * its old name. Remove the old handle before adding the new + * one. + */ + zfs_close(node->zn_handle); + avl_remove(avl, node); + free(node); + } + + node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); + node->zn_handle = zhp; + avl_add(avl, node); + + return (0); +} + +static int +zfs_snapshot_compare(const void *larg, const void *rarg) +{ + zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; + zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; + uint64_t lcreate, rcreate; + + /* + * Sort them according to creation time. We use the hidden + * CREATETXG property to get an absolute ordering of snapshots. + */ + lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); + rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); + + if (lcreate < rcreate) + return (-1); + else if (lcreate > rcreate) + return (+1); + else + return (0); +} + +int +zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data) +{ + int ret = 0; + zfs_node_t *node; + avl_tree_t avl; + void *cookie = NULL; + + avl_create(&avl, zfs_snapshot_compare, + sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode)); + + ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl); + + for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node)) + ret |= callback(node->zn_handle, data); + + while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL) + free(node); + + avl_destroy(&avl); + + return (ret); +} + +typedef struct { + char *ssa_first; + char *ssa_last; + boolean_t ssa_seenfirst; + boolean_t ssa_seenlast; + zfs_iter_f ssa_func; + void *ssa_arg; +} snapspec_arg_t; + +static int +snapspec_cb(zfs_handle_t *zhp, void *arg) { + snapspec_arg_t *ssa = arg; + char *shortsnapname; + int err = 0; + + if (ssa->ssa_seenlast) + return (0); + shortsnapname = zfs_strdup(zhp->zfs_hdl, + strchr(zfs_get_name(zhp), '@') + 1); + + if (!ssa->ssa_seenfirst && strcmp(shortsnapname, ssa->ssa_first) == 0) + ssa->ssa_seenfirst = B_TRUE; + + if (ssa->ssa_seenfirst) { + err = ssa->ssa_func(zhp, ssa->ssa_arg); + } else { + zfs_close(zhp); + } + + if (strcmp(shortsnapname, ssa->ssa_last) == 0) + ssa->ssa_seenlast = B_TRUE; + free(shortsnapname); + + return (err); +} + +/* + * spec is a string like "A,B%C,D" + * + * , where can be: + * (single snapshot) + * % (range of snapshots, inclusive) + * % (range of snapshots, starting with earliest) + * % (range of snapshots, ending with last) + * % (all snapshots) + * [,...] (comma separated list of the above) + * + * If a snapshot can not be opened, continue trying to open the others, but + * return ENOENT at the end. + */ +int +zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, + zfs_iter_f func, void *arg) +{ + char buf[ZFS_MAXNAMELEN]; + char *comma_separated, *cp; + int err = 0; + int ret = 0; + + (void) strlcpy(buf, spec_orig, sizeof (buf)); + cp = buf; + + while ((comma_separated = strsep(&cp, ",")) != NULL) { + char *pct = strchr(comma_separated, '%'); + if (pct != NULL) { + snapspec_arg_t ssa = { 0 }; + ssa.ssa_func = func; + ssa.ssa_arg = arg; + + if (pct == comma_separated) + ssa.ssa_seenfirst = B_TRUE; + else + ssa.ssa_first = comma_separated; + *pct = '\0'; + ssa.ssa_last = pct + 1; + + /* + * If there is a lastname specified, make sure it + * exists. + */ + if (ssa.ssa_last[0] != '\0') { + char snapname[ZFS_MAXNAMELEN]; + (void) snprintf(snapname, sizeof (snapname), + "%s@%s", zfs_get_name(fs_zhp), + ssa.ssa_last); + if (!zfs_dataset_exists(fs_zhp->zfs_hdl, + snapname, ZFS_TYPE_SNAPSHOT)) { + ret = ENOENT; + continue; + } + } + + err = zfs_iter_snapshots_sorted(fs_zhp, + snapspec_cb, &ssa); + if (ret == 0) + ret = err; + if (ret == 0 && (!ssa.ssa_seenfirst || + (ssa.ssa_last[0] != '\0' && !ssa.ssa_seenlast))) { + ret = ENOENT; + } + } else { + char snapname[ZFS_MAXNAMELEN]; + zfs_handle_t *snap_zhp; + (void) snprintf(snapname, sizeof (snapname), "%s@%s", + zfs_get_name(fs_zhp), comma_separated); + snap_zhp = make_dataset_handle(fs_zhp->zfs_hdl, + snapname); + if (snap_zhp == NULL) { + ret = ENOENT; + continue; + } + err = func(snap_zhp, arg); + if (ret == 0) + ret = err; + } + } + + return (ret); +} + +/* + * Iterate over all children, snapshots and filesystems + */ +int +zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + int ret; + + if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0) + return (ret); + + return (zfs_iter_snapshots(zhp, func, data)); +} + + +typedef struct iter_stack_frame { + struct iter_stack_frame *next; + zfs_handle_t *zhp; +} iter_stack_frame_t; + +typedef struct iter_dependents_arg { + boolean_t first; + boolean_t allowrecursion; + iter_stack_frame_t *stack; + zfs_iter_f func; + void *data; +} iter_dependents_arg_t; + +static int +iter_dependents_cb(zfs_handle_t *zhp, void *arg) +{ + iter_dependents_arg_t *ida = arg; + int err; + boolean_t first = ida->first; + ida->first = B_FALSE; + + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { + err = zfs_iter_clones(zhp, iter_dependents_cb, ida); + } else { + iter_stack_frame_t isf; + iter_stack_frame_t *f; + + /* + * check if there is a cycle by seeing if this fs is already + * on the stack. + */ + for (f = ida->stack; f != NULL; f = f->next) { + if (f->zhp->zfs_dmustats.dds_guid == + zhp->zfs_dmustats.dds_guid) { + if (ida->allowrecursion) { + zfs_close(zhp); + return (0); + } else { + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, + "recursive dependency at '%s'"), + zfs_get_name(zhp)); + err = zfs_error(zhp->zfs_hdl, + EZFS_RECURSIVE, + dgettext(TEXT_DOMAIN, + "cannot determine dependent " + "datasets")); + zfs_close(zhp); + return (err); + } + } + } + + isf.zhp = zhp; + isf.next = ida->stack; + ida->stack = &isf; + err = zfs_iter_filesystems(zhp, iter_dependents_cb, ida); + if (err == 0) + err = zfs_iter_snapshots(zhp, iter_dependents_cb, ida); + ida->stack = isf.next; + } + if (!first && err == 0) + err = ida->func(zhp, ida->data); + return (err); +} + +int +zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion, + zfs_iter_f func, void *data) +{ + iter_dependents_arg_t ida; + ida.allowrecursion = allowrecursion; + ida.stack = NULL; + ida.func = func; + ida.data = data; + ida.first = B_TRUE; + return (iter_dependents_cb(zfs_handle_dup(zhp), &ida)); +} Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c (revision 227654) +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c (working copy) @@ -20,6 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* @@ -435,7 +437,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, uint_t i, nspares, nl2cache; boolean_t config_seen; uint64_t best_txg; - char *name, *hostname; + char *name, *hostname, *comment; uint64_t version, guid; uint_t children = 0; nvlist_t **child = NULL; @@ -524,6 +526,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, * version * pool guid * name + * comment (if available) * pool state * hostid (if available) * hostname (if available) @@ -545,11 +548,24 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, if (nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, name) != 0) goto nomem; + + /* + * COMMENT is optional, don't bail if it's not + * there, instead, set it to NULL. + */ + if (nvlist_lookup_string(tmp, + ZPOOL_CONFIG_COMMENT, &comment) != 0) + comment = NULL; + else if (nvlist_add_string(config, + ZPOOL_CONFIG_COMMENT, comment) != 0) + goto nomem; + verify(nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_POOL_STATE, &state) == 0); if (nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, state) != 0) goto nomem; + hostid = 0; if (nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_HOSTID, &hostid) == 0) { Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c (revision 227654) +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c (working copy) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* @@ -351,6 +352,7 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int e switch (error) { case ENXIO: case ENODEV: + case EPIPE: zfs_verror(hdl, EZFS_IO, fmt, ap); break; @@ -1324,7 +1326,8 @@ addlist(libzfs_handle_t *hdl, char *propname, zpro * dataset property, */ if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL || - (!zfs_prop_user(propname) && !zfs_prop_userquota(propname)))) { + (!zfs_prop_user(propname) && !zfs_prop_userquota(propname) && + !zfs_prop_written(propname)))) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property '%s'"), propname); return (zfs_error(hdl, EZFS_BADPROP, Index: cddl/lib/libzfs/Makefile =================================================================== --- cddl/lib/libzfs/Makefile (revision 227654) +++ cddl/lib/libzfs/Makefile (working copy) @@ -20,8 +20,8 @@ SRCS+= libzfs_changelist.c \ libzfs_config.c \ libzfs_dataset.c \ libzfs_diff.c \ - libzfs_graph.c \ libzfs_import.c \ + libzfs_iter.c \ libzfs_mount.c \ libzfs_pool.c \ libzfs_sendrecv.c \ Index: sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c =================================================================== --- sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c (revision 227654) +++ sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c (working copy) @@ -267,7 +267,7 @@ zfs_prop_init(void) /* default index properties */ zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, - "1 | 2 | 3 | 4 | current", "VERSION", version_table); + "1 | 2 | 3 | 4 | 5 | current", "VERSION", version_table); zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", "CANMOUNT", canmount_table); @@ -297,6 +297,8 @@ zfs_prop_init(void) /* string properties */ zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "ORIGIN"); + zprop_register_string(ZFS_PROP_CLONES, "clones", NULL, PROP_READONLY, + ZFS_TYPE_SNAPSHOT, "[,...]", "CLONES"); zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/", PROP_INHERIT, ZFS_TYPE_FILESYSTEM, " | legacy | none", "MOUNTPOINT"); @@ -342,6 +344,8 @@ zfs_prop_init(void) ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDREFRESERV"); zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "", "USERREFS"); + zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY, + ZFS_TYPE_DATASET, "", "WRITTEN"); /* default number properties */ zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, @@ -468,6 +472,18 @@ zfs_prop_userquota(const char *name) } /* + * Returns true if this is a valid written@ property. + * Note that after the @, any character is valid (eg, another @, for + * written@pool/fs@origin). + */ +boolean_t +zfs_prop_written(const char *name) +{ + static const char *prefix = "written@"; + return (strncmp(name, prefix, strlen(prefix)) == 0); +} + +/* * Tables of index types, plus functions to convert between the user view * (strings) and internal representation (uint64_t). */ Index: sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c =================================================================== --- sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c (revision 227654) +++ sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c (working copy) @@ -20,6 +20,8 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -69,6 +71,8 @@ zpool_prop_init(void) ZFS_TYPE_POOL, "", "BOOTFS"); zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL, PROP_DEFAULT, ZFS_TYPE_POOL, " | none", "CACHEFILE"); + zprop_register_string(ZPOOL_PROP_COMMENT, "comment", NULL, + PROP_DEFAULT, ZFS_TYPE_POOL, "", "COMMENT"); /* readonly number properties */ zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY, Index: sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.h =================================================================== --- sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.h (revision 227654) +++ sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.h (working copy) @@ -121,7 +121,9 @@ uint64_t zprop_random_value(int, uint64_t, zfs_typ const char *zprop_values(int, zfs_type_t); size_t zprop_width(int, boolean_t *, zfs_type_t); boolean_t zprop_valid_for_type(int, zfs_type_t); +boolean_t zfs_prop_written(const char *name); + #ifdef __cplusplus } #endif Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c (working copy) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -316,7 +317,10 @@ static int deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { dsl_deadlist_t *dl = arg; + dsl_pool_t *dp = dmu_objset_pool(dl->dl_os); + rw_enter(&dp->dp_config_rwlock, RW_READER); dsl_deadlist_insert(dl, bp, tx); + rw_exit(&dp->dp_config_rwlock); return (0); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c (working copy) @@ -21,6 +21,8 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -297,6 +299,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t if (spa->spa_root_vdev == NULL) { ASSERT(ops == &vdev_root_ops); spa->spa_root_vdev = vd; + spa->spa_load_guid = spa_generate_guid(NULL); } if (guid == 0 && ops != &vdev_hole_ops) { Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c (working copy) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* @@ -525,10 +526,12 @@ dsl_load_user_sets(objset_t *mos, uint64_t zapobj, } /* - * Check if user has requested permission. + * Check if user has requested permission. If descendent is set, must have + * descendent perms. */ int -dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) +dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, + cred_t *cr) { dsl_dir_t *dd; dsl_pool_t *dp; @@ -549,7 +552,7 @@ int SPA_VERSION_DELEGATED_PERMS) return (EPERM); - if (dsl_dataset_is_snapshot(ds)) { + if (dsl_dataset_is_snapshot(ds) || descendent) { /* * Snapshots are treated as descendents only, * local permissions do not apply. @@ -642,7 +645,7 @@ dsl_deleg_access(const char *dsname, const char *p if (error) return (error); - error = dsl_deleg_access_impl(ds, perm, cr); + error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); dsl_dataset_rele(ds, FTAG); return (error); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c (working copy) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -1414,7 +1415,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, con } /* - * We lock the zap with adding == FALSE. Because, if we pass + * We lock the zap with adding == FALSE. Because, if we pass * the actual value of add, it could trigger a mzap_upgrade(). * At present we are just evaluating the possibility of this operation * and hence we donot want to trigger an upgrade. Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h (working copy) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_DELEG_H @@ -64,7 +65,8 @@ extern "C" { int dsl_deleg_get(const char *ddname, nvlist_t **nvp); int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset); int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr); -int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr); +int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent, + const char *perm, cred_t *cr); void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr); int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr); int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h (working copy) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_SPA_IMPL_H @@ -111,6 +112,7 @@ struct spa { * Fields protected by spa_namespace_lock. */ char spa_name[MAXNAMELEN]; /* pool name */ + char *spa_comment; /* comment */ avl_node_t spa_avl; /* node in spa_namespace_avl */ nvlist_t *spa_config; /* last synced config */ nvlist_t *spa_config_syncing; /* currently syncing config */ @@ -136,7 +138,8 @@ struct spa { objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */ txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */ vdev_t *spa_root_vdev; /* top-level vdev container */ - uint64_t spa_load_guid; /* initial guid for spa_load */ + uint64_t spa_config_guid; /* config pool guid */ + uint64_t spa_load_guid; /* spa_load initialized guid */ list_t spa_config_dirty_list; /* vdevs with dirty config */ list_t spa_state_dirty_list; /* vdevs with dirty state */ spa_aux_vdev_t spa_spares; /* hot spares */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h (working copy) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_SPA_H @@ -578,6 +579,7 @@ extern void spa_altroot(spa_t *, char *, size_t); extern int spa_sync_pass(spa_t *spa); extern char *spa_name(spa_t *spa); extern uint64_t spa_guid(spa_t *spa); +extern uint64_t spa_load_guid(spa_t *spa); extern uint64_t spa_last_synced_txg(spa_t *spa); extern uint64_t spa_first_txg(spa_t *spa); extern uint64_t spa_syncing_txg(spa_t *spa); @@ -611,6 +613,7 @@ extern uint64_t spa_get_random(uint64_t range); extern uint64_t spa_generate_guid(spa_t *spa); extern void sprintf_blkptr(char *buf, const blkptr_t *bp); extern void spa_freeze(spa_t *spa); +extern int spa_change_guid(spa_t *spa); extern void spa_upgrade(spa_t *spa, uint64_t version); extern void spa_evict_all(void); extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid, Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h (working copy) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. @@ -194,7 +195,9 @@ int dmu_objset_create(const char *name, dmu_objset int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, uint64_t flags); int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); +int dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname, + struct nvlist *snaps); +int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *); int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); int dmu_objset_rename(const char *name, const char *newname, @@ -705,6 +708,8 @@ void dmu_traverse_objset(objset_t *os, uint64_t tx int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, struct file *fp, offset_t *off); +int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, + boolean_t fromorigin, uint64_t *sizep); typedef struct dmu_recv_cookie { /* Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h (working copy) @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_DATASET_H @@ -257,6 +258,10 @@ void dsl_dataset_space(dsl_dataset_t *ds, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp); uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds); +int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); +int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (working copy) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* @@ -212,6 +213,11 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); + if (spa->spa_comment != NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, + 0, ZPROP_SRC_LOCAL); + } + if (spa->spa_root != NULL) spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 0, ZPROP_SRC_LOCAL); @@ -351,7 +357,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) char *propname, *strval; uint64_t intval; objset_t *os; - char *slash; + char *slash, *check; propname = nvpair_name(elem); @@ -471,6 +477,26 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) error = EINVAL; break; + case ZPOOL_PROP_COMMENT: + if ((error = nvpair_value_string(elem, &strval)) != 0) + break; + for (check = strval; *check != '\0'; check++) { + /* + * The kernel doesn't have an easy isprint() + * check. For this kernel check, we merely + * check ASCII apart from DEL. Fix this if + * there is an easy-to-use kernel isprint(). + */ + if (*check >= 0x7f) { + error = EINVAL; + break; + } + check++; + } + if (strlen(strval) > ZPROP_MAX_COMMENT) + error = E2BIG; + break; + case ZPOOL_PROP_DEDUPDITTO: if (spa_version(spa) < SPA_VERSION_DEDUP) error = ENOTSUP; @@ -572,6 +598,43 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, } /* + * Change the GUID for the pool. This is done so that we can later + * re-import a pool built from a clone of our own vdevs. We will modify + * the root vdev's guid, our own pool guid, and then mark all of our + * vdevs dirty. Note that we must make sure that all our vdevs are + * online when we do this, or else any vdevs that weren't present + * would be orphaned from our pool. We are also going to issue a + * sysevent to update any watchers. + */ +int +spa_change_guid(spa_t *spa) +{ + uint64_t oldguid, newguid; + uint64_t txg; + + if (!(spa_mode_global & FWRITE)) + return (EROFS); + + txg = spa_vdev_enter(spa); + + if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY) + return (spa_vdev_exit(spa, NULL, txg, ENXIO)); + + oldguid = spa_guid(spa); + newguid = spa_generate_guid(NULL); + ASSERT3U(oldguid, !=, newguid); + + spa->spa_root_vdev->vdev_guid = newguid; + spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid); + + vdev_config_dirty(spa->spa_root_vdev); + + spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID); + + return (spa_vdev_exit(spa, NULL, txg, 0)); +} + +/* * ========================================================================== * SPA state manipulation (open/create/destroy/import/export) * ========================================================================== @@ -1025,6 +1088,11 @@ spa_unload(spa_t *spa) spa->spa_async_suspended = 0; + if (spa->spa_comment != NULL) { + spa_strfree(spa->spa_comment); + spa->spa_comment = NULL; + } + spa_config_exit(spa, SCL_ALL, FTAG); } @@ -1742,6 +1810,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_i { nvlist_t *config = spa->spa_config; char *ereport = FM_EREPORT_ZFS_POOL; + char *comment; int error; uint64_t pool_guid; nvlist_t *nvl; @@ -1749,6 +1818,10 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_i if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) return (EINVAL); + ASSERT(spa->spa_comment == NULL); + if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) + spa->spa_comment = spa_strdup(comment); + /* * Versioning wasn't explicitly added to the label until later, so if * it's not present treat it as the initial version. @@ -1764,7 +1837,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_i spa_guid_exists(pool_guid, 0)) { error = EEXIST; } else { - spa->spa_load_guid = pool_guid; + spa->spa_config_guid = pool_guid; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) == 0) { @@ -5380,6 +5453,20 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *t * properties. */ break; + case ZPOOL_PROP_COMMENT: + VERIFY(nvpair_value_string(elem, &strval) == 0); + if (spa->spa_comment != NULL) + spa_strfree(spa->spa_comment); + spa->spa_comment = spa_strdup(strval); + /* + * We need to dirty the configuration on all the vdevs + * so that their labels get updated. It's unnecessary + * to do this for pool creation since the vdev's + * configuratoin has already been dirtied. + */ + if (tx->tx_txg != TXG_INITIAL) + vdev_config_dirty(spa->spa_root_vdev); + break; default: /* * Set pool property values in the poolprops mos object. Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c (working copy) @@ -20,9 +20,11 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -47,6 +49,9 @@ #include #include +/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ +int zfs_send_corrupt_data = B_FALSE; + static char *dmu_recv_tag = "dmu_recv_tag"; /* @@ -384,8 +389,20 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL, &aflags, zb) != 0) - return (EIO); + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) { + if (zfs_send_corrupt_data) { + /* Send a block filled with 0x"zfs badd bloc" */ + abuf = arc_buf_alloc(spa, blksz, &abuf, + ARC_BUFC_DATA); + uint64_t *ptr; + for (ptr = abuf->b_data; + (char *)ptr < (char *)abuf->b_data + blksz; + ptr++) + *ptr = 0x2f5baddb10c; + } else { + return (EIO); + } + } err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, blksz, bp, abuf->b_data); @@ -515,6 +532,86 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsna return (0); } +int +dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + uint64_t *sizep) +{ + dsl_dataset_t *ds = tosnap->os_dsl_dataset; + dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + int err; + uint64_t size; + + /* tosnap must be a snapshot */ + if (ds->ds_phys->ds_next_snap_obj == 0) + return (EINVAL); + + /* fromsnap must be an earlier snapshot from the same fs as tosnap */ + if (fromds && (ds->ds_dir != fromds->ds_dir || + fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + return (EXDEV); + + if (fromorigin) { + if (fromsnap) + return (EINVAL); + + if (dsl_dir_is_clone(ds->ds_dir)) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); + rw_exit(&dp->dp_config_rwlock); + if (err) + return (err); + } else { + fromorigin = B_FALSE; + } + } + + /* Get uncompressed size estimate of changed data. */ + if (fromds == NULL) { + size = ds->ds_phys->ds_uncompressed_bytes; + } else { + uint64_t used, comp; + err = dsl_dataset_space_written(fromds, ds, + &used, &comp, &size); + if (fromorigin) + dsl_dataset_rele(fromds, FTAG); + if (err) + return (err); + } + + /* + * Assume that space (both on-disk and in-stream) is dominated by + * data. We will adjust for indirect blocks and the copies property, + * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). + */ + + /* + * Subtract out approximate space used by indirect blocks. + * Assume most space is used by data blocks (non-indirect, non-dnode). + * Assume all blocks are recordsize. Assume ditto blocks and + * internal fragmentation counter out compression. + * + * Therefore, space used by indirect blocks is sizeof(blkptr_t) per + * block, which we observe in practice. + */ + uint64_t recordsize; + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_prop_get_ds(ds, "recordsize", + sizeof (recordsize), 1, &recordsize, NULL); + rw_exit(&dp->dp_config_rwlock); + if (err) + return (err); + size -= size / recordsize * sizeof (blkptr_t); + + /* Add in the space for the record associated with each block. */ + size += size / recordsize * sizeof (dmu_replay_record_t); + + *sizep = size; + + return (0); +} + struct recvbeginsyncarg { const char *tofs; const char *tosnap; @@ -1540,7 +1637,7 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc) { struct recvendsyncarg resa; dsl_dataset_t *ds = drc->drc_logical_ds; - int err; + int err, myerr; /* * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() @@ -1578,7 +1675,8 @@ out: if (err == 0 && drc->drc_guid_to_ds_map != NULL) (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); dsl_dataset_disown(ds, dmu_recv_tag); - (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); + myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); + ASSERT3U(myerr, ==, 0); return (err); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c (working copy) @@ -21,6 +21,8 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -343,6 +345,10 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64 txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); + VERIFY(spa->spa_comment == NULL || nvlist_add_string(config, + ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0); + + #ifdef _KERNEL hostid = zone_get_hostid(NULL); #else /* _KERNEL */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy) @@ -20,6 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* @@ -1365,7 +1367,7 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc ASSERT(BUF_EMPTY(hdr)); hdr->b_size = size; hdr->b_type = type; - hdr->b_spa = spa_guid(spa); + hdr->b_spa = spa_load_guid(spa); hdr->b_state = arc_anon; hdr->b_arc_access = 0; buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); @@ -2146,7 +2148,7 @@ arc_flush(spa_t *spa) uint64_t guid = 0; if (spa) - guid = spa_guid(spa); + guid = spa_load_guid(spa); while (arc_mru->arcs_lsize[ARC_BUFC_DATA]) { (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_DATA); @@ -2936,7 +2938,7 @@ arc_read_nolock(zio_t *pio, spa_t *spa, const blkp arc_buf_t *buf; kmutex_t *hash_lock; zio_t *rzio; - uint64_t guid = spa_guid(spa); + uint64_t guid = spa_load_guid(spa); top: hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp), @@ -4593,7 +4595,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, boolean_t have_lock, full; l2arc_write_callback_t *cb; zio_t *pio, *wzio; - uint64_t guid = spa_guid(spa); + uint64_t guid = spa_load_guid(spa); int try; ASSERT(dev->l2ad_vdev != NULL); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c (working copy) @@ -20,11 +20,13 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include #include #include +#include uint64_t bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) @@ -440,7 +442,10 @@ space_range_cb(void *arg, const blkptr_t *bp, dmu_ struct space_range_arg *sra = arg; if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) { - sra->used += bp_get_dsize_sync(sra->spa, bp); + if (dsl_pool_sync_context(spa_get_dsl(sra->spa))) + sra->used += bp_get_dsize_sync(sra->spa, bp); + else + sra->used += bp_get_dsize(sra->spa, bp); sra->comp += BP_GET_PSIZE(bp); sra->uncomp += BP_GET_UCSIZE(bp); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c (working copy) @@ -23,6 +23,8 @@ * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. * Portions Copyright 2011 Martin Matuska + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -348,17 +350,37 @@ zfs_dozonecheck_ds(const char *dataset, dsl_datase return (zfs_dozonecheck_impl(dataset, zoned, cr)); } +/* + * If name ends in a '@', then require recursive permissions. + */ int zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { int error; + boolean_t descendent = B_FALSE; + dsl_dataset_t *ds; + char *at; - error = zfs_dozonecheck(name, cr); + at = strchr(name, '@'); + if (at != NULL && at[1] == '\0') { + *at = '\0'; + descendent = B_TRUE; + } + + error = dsl_dataset_hold(name, FTAG, &ds); + if (at != NULL) + *at = '@'; + if (error != 0) + return (error); + + error = zfs_dozonecheck_ds(name, ds, cr); if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access(name, perm, cr); + error = dsl_deleg_access_impl(ds, descendent, perm, cr); } + + dsl_dataset_rele(ds, FTAG); return (error); } @@ -372,7 +394,7 @@ zfs_secpolicy_write_perms_ds(const char *name, dsl if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access_impl(ds, perm, cr); + error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); } return (error); } @@ -685,24 +707,14 @@ zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) /* * Destroying snapshots with delegated permissions requires * descendent mount and destroy permissions. - * Reassemble the full filesystem@snap name so dsl_deleg_access() - * can do the correct permission check. - * - * Since this routine is used when doing a recursive destroy of snapshots - * and destroying snapshots requires descendent permissions, a successfull - * check of the top level snapshot applies to snapshots of all descendent - * datasets as well. - * - * The top level snapshot may not exist when doing a recursive destroy. - * In this case fallback to permissions of the parent dataset. */ static int -zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr) { int error; char *dsname; - dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value); + dsname = kmem_asprintf("%s@", zc->zc_name); error = zfs_secpolicy_destroy_perms(dsname, cr); @@ -1469,6 +1481,20 @@ zfs_ioc_pool_get_history(zfs_cmd_t *zc) } static int +zfs_ioc_pool_reguid(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error == 0) { + error = spa_change_guid(spa); + spa_close(spa, FTAG); + } + return (error); +} + +static int zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc) { int error; @@ -1765,9 +1791,12 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t * inconsistent. So this is a bit of a workaround... * XXX reading with out owning */ - if (!zc->zc_objset_stats.dds_inconsistent) { - if (dmu_objset_type(os) == DMU_OST_ZVOL) - VERIFY(zvol_get_stats(os, nv) == 0); + if (!zc->zc_objset_stats.dds_inconsistent && + dmu_objset_type(os) == DMU_OST_ZVOL) { + error = zvol_get_stats(os, nv); + if (error == EIO) + return (error); + VERIFY3S(error, ==, 0); } error = put_nvlist(zc, nv); nvlist_free(nv); @@ -1978,8 +2007,7 @@ top: NULL, &zc->zc_cookie); if (error == ENOENT) error = ESRCH; - } while (error == 0 && dataset_name_hidden(zc->zc_name) && - !(zc->zc_iflags & FKIOCTL)); + } while (error == 0 && dataset_name_hidden(zc->zc_name)); dmu_objset_rele(os, FTAG); /* @@ -2257,6 +2285,8 @@ retry: if (nvpair_type(propval) != DATA_TYPE_UINT64_ARRAY) err = EINVAL; + } else { + err = EINVAL; } } else if (err == 0) { if (nvpair_type(propval) == DATA_TYPE_STRING) { @@ -3124,25 +3154,62 @@ zfs_unmount_snap(const char *name, void *arg) /* * inputs: - * zc_name name of filesystem - * zc_value short name of snapshot + * zc_name name of filesystem, snaps must be under it + * zc_nvlist_src[_size] full names of snapshots to destroy * zc_defer_destroy mark for deferred destroy * - * outputs: none + * outputs: + * zc_name on failure, name of failed snapshot */ static int -zfs_ioc_destroy_snaps(zfs_cmd_t *zc) +zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc) { - int err; + int err, len; + nvlist_t *nvl; + nvpair_t *pair; - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) - return (EINVAL); - err = dmu_objset_find(zc->zc_name, - zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN); - if (err) + if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &nvl)) != 0) { +#ifndef __FreeBSD__ return (err); - return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value, - zc->zc_defer_destroy)); +#else + /* + * We are probably called by older binaries, + * allocate and populate nvlist with recursive snapshots + */ + if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) + return (EINVAL); + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + err = dmu_get_recursive_snaps_nvl(zc->zc_name, + zc->zc_value, nvl); + if (err) { + nvlist_free(nvl); + return (err); + } +#endif /* __FreeBSD__ */ + } + + len = strlen(zc->zc_name); + for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(nvl, pair)) { + const char *name = nvpair_name(pair); + /* + * The snap name must be underneath the zc_name. This ensures + * that our permission checks were legitimate. + */ + if (strncmp(zc->zc_name, name, len) != 0 || + (name[len] != '@' && name[len] != '/')) { + nvlist_free(nvl); + return (EINVAL); + } + + (void) zfs_unmount_snap(name, NULL); + } + + err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy, + zc->zc_name); + nvlist_free(nvl); + return (err); } /* @@ -3789,6 +3856,8 @@ out: * zc_obj fromorigin flag (mutually exclusive with zc_fromobj) * zc_sendobj objsetid of snapshot to send * zc_fromobj objsetid of incremental fromsnap (may be zero) + * zc_guid if set, estimate size of stream only. zc_cookie is ignored. + * output size in zc_objset_type. * * outputs: none */ @@ -3797,13 +3866,13 @@ zfs_ioc_send(zfs_cmd_t *zc) { objset_t *fromsnap = NULL; objset_t *tosnap; - file_t *fp; int error; offset_t off; dsl_dataset_t *ds; dsl_dataset_t *dsfrom = NULL; spa_t *spa; dsl_pool_t *dp; + boolean_t estimate = (zc->zc_guid != 0); error = spa_open(zc->zc_name, &spa, FTAG); if (error) @@ -3844,20 +3913,25 @@ zfs_ioc_send(zfs_cmd_t *zc) spa_close(spa, FTAG); } - fp = getf(zc->zc_cookie); - if (fp == NULL) { - dsl_dataset_rele(ds, FTAG); - if (dsfrom) - dsl_dataset_rele(dsfrom, FTAG); - return (EBADF); - } + if (estimate) { + error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, + &zc->zc_objset_type); + } else { + file_t *fp = getf(zc->zc_cookie); + if (fp == NULL) { + dsl_dataset_rele(ds, FTAG); + if (dsfrom) + dsl_dataset_rele(dsfrom, FTAG); + return (EBADF); + } - off = fp->f_offset; - error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp, &off); + off = fp->f_offset; + error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp, &off); - if (off >= 0 && off <= MAXOFFSET_T) - fp->f_offset = off; - releasef(zc->zc_cookie); + if (off >= 0 && off <= MAXOFFSET_T) + fp->f_offset = off; + releasef(zc->zc_cookie); + } if (dsfrom) dsl_dataset_rele(dsfrom, FTAG); dsl_dataset_rele(ds, FTAG); @@ -4666,6 +4740,70 @@ zfs_ioc_get_holds(zfs_cmd_t *zc) } /* + * inputs: + * zc_name name of new filesystem or snapshot + * zc_value full name of old snapshot + * + * outputs: + * zc_cookie space in bytes + * zc_objset_type compressed space in bytes + * zc_perm_action uncompressed space in bytes + */ +static int +zfs_ioc_space_written(zfs_cmd_t *zc) +{ + int error; + dsl_dataset_t *new, *old; + + error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + if (error != 0) + return (error); + error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + if (error != 0) { + dsl_dataset_rele(new, FTAG); + return (error); + } + + error = dsl_dataset_space_written(old, new, &zc->zc_cookie, + &zc->zc_objset_type, &zc->zc_perm_action); + dsl_dataset_rele(old, FTAG); + dsl_dataset_rele(new, FTAG); + return (error); +} + +/* + * inputs: + * zc_name full name of last snapshot + * zc_value full name of first snapshot + * + * outputs: + * zc_cookie space in bytes + * zc_objset_type compressed space in bytes + * zc_perm_action uncompressed space in bytes + */ +static int +zfs_ioc_space_snaps(zfs_cmd_t *zc) +{ + int error; + dsl_dataset_t *new, *old; + + error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + if (error != 0) + return (error); + error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + if (error != 0) { + dsl_dataset_rele(new, FTAG); + return (error); + } + + error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie, + &zc->zc_objset_type, &zc->zc_perm_action); + dsl_dataset_rele(old, FTAG); + dsl_dataset_rele(new, FTAG); + return (error); +} + +/* * pool create, destroy, and export don't log the history as part of * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export * do the logging of those commands. @@ -4739,7 +4877,7 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { B_TRUE }, { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE, B_TRUE }, { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, B_TRUE }, - { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, B_FALSE }, + { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE, B_FALSE }, { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, B_FALSE }, { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, @@ -4751,7 +4889,7 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, B_FALSE }, { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, B_TRUE }, - { zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME, + { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive, DATASET_NAME, B_TRUE, B_TRUE }, { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, B_TRUE }, @@ -4795,7 +4933,13 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, B_TRUE }, { zfs_ioc_jail, zfs_secpolicy_config, DATASET_NAME, B_TRUE, B_FALSE }, - { zfs_ioc_unjail, zfs_secpolicy_config, DATASET_NAME, B_TRUE, B_FALSE } + { zfs_ioc_unjail, zfs_secpolicy_config, DATASET_NAME, B_TRUE, B_FALSE }, + { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE, + B_TRUE }, + { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + B_TRUE }, + { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + B_TRUE } }; int Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c (working copy) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #include @@ -1308,16 +1309,27 @@ spa_guid(spa_t *spa) /* * If we fail to parse the config during spa_load(), we can go through * the error path (which posts an ereport) and end up here with no root - * vdev. We stash the original pool guid in 'spa_load_guid' to handle + * vdev. We stash the original pool guid in 'spa_config_guid' to handle * this case. */ if (spa->spa_root_vdev != NULL) return (spa->spa_root_vdev->vdev_guid); else - return (spa->spa_load_guid); + return (spa->spa_config_guid); } uint64_t +spa_load_guid(spa_t *spa) +{ + /* + * This is a GUID that exists solely as a reference for the + * purposes of the arc. It is generated at load time, and + * is never written to persistent storage. + */ + return (spa->spa_load_guid); +} + +uint64_t spa_last_synced_txg(spa_t *spa) { return (spa->spa_ubsync.ub_txg); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c (working copy) @@ -23,6 +23,7 @@ * Copyright (c) 2011 by Delphix. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. + * Portions Copyright 2011 Martin Matuska */ #include @@ -908,15 +909,15 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char return (dsobj); } +#ifdef __FreeBSD__ +/* FreeBSD ioctl compat begin */ struct destroyarg { - dsl_sync_task_group_t *dstg; - char *snapname; - char *failed; - boolean_t defer; + nvlist_t *nvl; + const char *snapname; }; static int -dsl_snapshot_destroy_one(const char *name, void *arg) +dsl_check_snap_cb(const char *name, void *arg) { struct destroyarg *da = arg; dsl_dataset_t *ds; @@ -924,53 +925,79 @@ static int char *dsname; dsname = kmem_asprintf("%s@%s", name, da->snapname); - err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds); - strfree(dsname); - if (err == 0) { - struct dsl_ds_destroyarg *dsda; + VERIFY(nvlist_add_boolean(da->nvl, dsname) == 0); - dsl_dataset_make_exclusive(ds, da->dstg); - dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); - dsda->ds = ds; - dsda->defer = da->defer; - dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, - dsl_dataset_destroy_sync, dsda, da->dstg, 0); - } else if (err == ENOENT) { - err = 0; - } else { - (void) strcpy(da->failed, name); - } + return (0); +} + +int +dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname, + nvlist_t *snaps) +{ + struct destroyarg *da; + + da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); + da.nvl = snaps; + da.snapname = snapname; + err = dmu_objset_find(fsname, dsl_check_snap_cb, &da, + DS_FIND_CHILDREN); + kmem_free(da, sizeof (struct destroyarg)); + return (err); } +/* FreeBSD ioctl compat end */ +#endif /* __FreeBSD__ */ /* - * Destroy 'snapname' in all descendants of 'fsname'. + * The snapshots must all be in the same pool. */ -#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy int -dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer) +dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) { int err; - struct destroyarg da; dsl_sync_task_t *dst; spa_t *spa; + nvpair_t *pair; + dsl_sync_task_group_t *dstg; - err = spa_open(fsname, &spa, FTAG); + pair = nvlist_next_nvpair(snaps, NULL); + if (pair == NULL) + return (0); + + err = spa_open(nvpair_name(pair), &spa, FTAG); if (err) return (err); - da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - da.snapname = snapname; - da.failed = fsname; - da.defer = defer; + dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - err = dmu_objset_find(fsname, - dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + dsl_dataset_t *ds; + int err; + err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds); + if (err == 0) { + struct dsl_ds_destroyarg *dsda; + + dsl_dataset_make_exclusive(ds, dstg); + dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), + KM_SLEEP); + dsda->ds = ds; + dsda->defer = defer; + dsl_sync_task_create(dstg, dsl_dataset_destroy_check, + dsl_dataset_destroy_sync, dsda, dstg, 0); + } else if (err == ENOENT) { + err = 0; + } else { + (void) strcpy(failed, nvpair_name(pair)); + break; + } + } + if (err == 0) - err = dsl_sync_task_group_wait(da.dstg); + err = dsl_sync_task_group_wait(dstg); - for (dst = list_head(&da.dstg->dstg_tasks); dst; - dst = list_next(&da.dstg->dstg_tasks, dst)) { + for (dst = list_head(&dstg->dstg_tasks); dst; + dst = list_next(&dstg->dstg_tasks, dst)) { struct dsl_ds_destroyarg *dsda = dst->dst_arg1; dsl_dataset_t *ds = dsda->ds; @@ -978,17 +1005,17 @@ int * Return the file system name that triggered the error */ if (dst->dst_err) { - dsl_dataset_name(ds, fsname); - *strchr(fsname, '@') = '\0'; + dsl_dataset_name(ds, failed); } ASSERT3P(dsda->rm_origin, ==, NULL); - dsl_dataset_disown(ds, da.dstg); + dsl_dataset_disown(ds, dstg); kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); } - dsl_sync_task_group_destroy(da.dstg); + dsl_sync_task_group_destroy(dstg); spa_close(spa, FTAG); return (err); + } static boolean_t @@ -2150,6 +2177,55 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dm dmu_objset_sync(ds->ds_objset, zio, tx); } +static void +get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) +{ + uint64_t count = 0; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + zap_cursor_t zc; + zap_attribute_t za; + nvlist_t *propval; + nvlist_t *val; + + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + /* + * There may me missing entries in ds_next_clones_obj + * due to a bug in a previous version of the code. + * Only trust it if it has the right number of entries. + */ + if (ds->ds_phys->ds_next_clones_obj != 0) { + ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, + &count)); + } + if (count != ds->ds_phys->ds_num_children - 1) { + goto fail; + } + for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + dsl_dataset_t *clone; + char buf[ZFS_MAXNAMELEN]; + if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool, + za.za_first_integer, FTAG, &clone) != 0) { + goto fail; + } + dsl_dir_name(clone->ds_dir, buf); + VERIFY(nvlist_add_boolean(val, buf) == 0); + dsl_dataset_rele(clone, FTAG); + } + zap_cursor_fini(&zc); + VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0); + VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), + propval) == 0); +fail: + nvlist_free(val); + nvlist_free(propval); + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); +} + void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { @@ -2180,6 +2256,26 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, DS_IS_DEFER_DESTROY(ds) ? 1 : 0); + if (ds->ds_phys->ds_prev_snap_obj != 0) { + uint64_t written, comp, uncomp; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dsl_dataset_t *prev; + + rw_enter(&dp->dp_config_rwlock, RW_READER); + int err = dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); + rw_exit(&dp->dp_config_rwlock); + if (err == 0) { + err = dsl_dataset_space_written(prev, ds, &written, + &comp, &uncomp); + dsl_dataset_rele(prev, FTAG); + if (err == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, + written); + } + } + } + ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : (ds->ds_phys->ds_uncompressed_bytes * 100 / ds->ds_phys->ds_compressed_bytes); @@ -2193,6 +2289,8 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, ds->ds_phys->ds_unique_bytes); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); + + get_clones_stat(ds, nv); } } @@ -4025,7 +4123,7 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t } /* - * Note, this fuction is used as the callback for dmu_objset_find(). We + * Note, this function is used as the callback for dmu_objset_find(). We * always return 0 so that we will continue to find and process * inconsistent datasets, even if we encounter an error trying to * process one of them. @@ -4044,3 +4142,151 @@ dsl_destroy_inconsistent(const char *dsname, void } return (0); } + +/* + * Return (in *usedp) the amount of space written in new that is not + * present in oldsnap. New may be a snapshot or the head. Old must be + * a snapshot before new, in new's filesystem (or its origin). If not then + * fail and return EINVAL. + * + * The written space is calculated by considering two components: First, we + * ignore any freed space, and calculate the written as new's used space + * minus old's used space. Next, we add in the amount of space that was freed + * between the two snapshots, thus reducing new's used space relative to old's. + * Specifically, this is the space that was born before old->ds_creation_txg, + * and freed before new (ie. on new's deadlist or a previous deadlist). + * + * space freed [---------------------] + * snapshots ---O-------O--------O-------O------ + * oldsnap new + */ +int +dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) +{ + int err = 0; + uint64_t snapobj; + dsl_pool_t *dp = new->ds_dir->dd_pool; + + *usedp = 0; + *usedp += new->ds_phys->ds_used_bytes; + *usedp -= oldsnap->ds_phys->ds_used_bytes; + + *compp = 0; + *compp += new->ds_phys->ds_compressed_bytes; + *compp -= oldsnap->ds_phys->ds_compressed_bytes; + + *uncompp = 0; + *uncompp += new->ds_phys->ds_uncompressed_bytes; + *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; + + rw_enter(&dp->dp_config_rwlock, RW_READER); + snapobj = new->ds_object; + while (snapobj != oldsnap->ds_object) { + dsl_dataset_t *snap; + uint64_t used, comp, uncomp; + + err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); + if (err != 0) + break; + + if (snap->ds_phys->ds_prev_snap_txg == + oldsnap->ds_phys->ds_creation_txg) { + /* + * The blocks in the deadlist can not be born after + * ds_prev_snap_txg, so get the whole deadlist space, + * which is more efficient (especially for old-format + * deadlists). Unfortunately the deadlist code + * doesn't have enough information to make this + * optimization itself. + */ + dsl_deadlist_space(&snap->ds_deadlist, + &used, &comp, &uncomp); + } else { + dsl_deadlist_space_range(&snap->ds_deadlist, + 0, oldsnap->ds_phys->ds_creation_txg, + &used, &comp, &uncomp); + } + *usedp += used; + *compp += comp; + *uncompp += uncomp; + + /* + * If we get to the beginning of the chain of snapshots + * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap + * was not a snapshot of/before new. + */ + snapobj = snap->ds_phys->ds_prev_snap_obj; + dsl_dataset_rele(snap, FTAG); + if (snapobj == 0) { + err = EINVAL; + break; + } + + } + rw_exit(&dp->dp_config_rwlock); + return (err); +} + +/* + * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, + * lastsnap, and all snapshots in between are deleted. + * + * blocks that would be freed [---------------------------] + * snapshots ---O-------O--------O-------O--------O + * firstsnap lastsnap + * + * This is the set of blocks that were born after the snap before firstsnap, + * (birth > firstsnap->prev_snap_txg) and died before the snap after the + * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). + * We calculate this by iterating over the relevant deadlists (from the snap + * after lastsnap, backward to the snap after firstsnap), summing up the + * space on the deadlist that was born after the snap before firstsnap. + */ +int +dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, + dsl_dataset_t *lastsnap, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) +{ + int err = 0; + uint64_t snapobj; + dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; + + ASSERT(dsl_dataset_is_snapshot(firstsnap)); + ASSERT(dsl_dataset_is_snapshot(lastsnap)); + + /* + * Check that the snapshots are in the same dsl_dir, and firstsnap + * is before lastsnap. + */ + if (firstsnap->ds_dir != lastsnap->ds_dir || + firstsnap->ds_phys->ds_creation_txg > + lastsnap->ds_phys->ds_creation_txg) + return (EINVAL); + + *usedp = *compp = *uncompp = 0; + + rw_enter(&dp->dp_config_rwlock, RW_READER); + snapobj = lastsnap->ds_phys->ds_next_snap_obj; + while (snapobj != firstsnap->ds_object) { + dsl_dataset_t *ds; + uint64_t used, comp, uncomp; + + err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); + if (err != 0) + break; + + dsl_deadlist_space_range(&ds->ds_deadlist, + firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &used, &comp, &uncomp); + *usedp += used; + *compp += comp; + *uncompp += uncomp; + + snapobj = ds->ds_phys->ds_prev_snap_obj; + ASSERT3U(snapobj, !=, 0); + dsl_dataset_rele(ds, FTAG); + } + rw_exit(&dp->dp_config_rwlock); + return (err); +} Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c (working copy) @@ -21,6 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -101,11 +102,11 @@ spa_history_create_obj(spa_t *spa, dmu_tx_t *tx) /* * Figure out maximum size of history log. We set it at - * 1% of pool size, with a max of 32MB and min of 128KB. + * 0.1% of pool size, with a max of 1G and min of 128KB. */ shpp->sh_phys_max_off = - metaslab_class_get_dspace(spa_normal_class(spa)) / 100; - shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 32<<20); + metaslab_class_get_dspace(spa_normal_class(spa)) / 1000; + shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30); shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10); dmu_buf_rele(dbp, FTAG); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c (working copy) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -29,6 +30,26 @@ #include #include +/* + * Deadlist concurrency: + * + * Deadlists can only be modified from the syncing thread. + * + * Except for dsl_deadlist_insert(), it can only be modified with the + * dp_config_rwlock held with RW_WRITER. + * + * The accessors (dsl_deadlist_space() and dsl_deadlist_space_range()) can + * be called concurrently, from open context, with the dl_config_rwlock held + * with RW_READER. + * + * Therefore, we only need to provide locking between dsl_deadlist_insert() and + * the accessors, protecting: + * dl_phys->dl_used,comp,uncomp + * and protecting the dl_tree from being loaded. + * The locking is provided by dl_lock. Note that locking on the bpobj_t + * provides its own locking, and dl_oldfmt is immutable. + */ + static int dsl_deadlist_compare(const void *arg1, const void *arg2) { @@ -309,14 +330,14 @@ dsl_deadlist_space(dsl_deadlist_t *dl, * return space used in the range (mintxg, maxtxg]. * Includes maxtxg, does not include mintxg. * mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is - * UINT64_MAX). + * larger than any bp in the deadlist (eg. UINT64_MAX)). */ void dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { + dsl_deadlist_entry_t *dle; dsl_deadlist_entry_t dle_tofind; - dsl_deadlist_entry_t *dle; avl_index_t where; if (dl->dl_oldfmt) { @@ -325,9 +346,10 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint6 return; } - dsl_deadlist_load_tree(dl); *usedp = *compp = *uncompp = 0; + mutex_enter(&dl->dl_lock); + dsl_deadlist_load_tree(dl); dle_tofind.dle_mintxg = mintxg; dle = avl_find(&dl->dl_tree, &dle_tofind, &where); /* @@ -336,6 +358,7 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint6 */ ASSERT(dle != NULL || avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL); + for (; dle && dle->dle_mintxg < maxtxg; dle = AVL_NEXT(&dl->dl_tree, dle)) { uint64_t used, comp, uncomp; @@ -347,6 +370,7 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint6 *compp += comp; *uncompp += uncomp; } + mutex_exit(&dl->dl_lock); } static void Index: sys/cddl/contrib/opensolaris/uts/common/sys/sysevent/eventdefs.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/sys/sysevent/eventdefs.h (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/sys/sysevent/eventdefs.h (working copy) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_SYSEVENT_EVENTDEFS_H @@ -256,6 +257,7 @@ extern "C" { #define ESC_ZFS_SCRUB_FINISH "ESC_ZFS_scrub_finish" #define ESC_ZFS_VDEV_SPARE "ESC_ZFS_vdev_spare" #define ESC_ZFS_BOOTFS_VDEV_ATTACH "ESC_ZFS_bootfs_vdev_attach" +#define ESC_ZFS_POOL_REGUID "ESC_ZFS_pool_reguid" #define ESC_ZFS_VDEV_AUTOEXPAND "ESC_ZFS_vdev_autoexpand" /* Index: sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h (revision 227654) +++ sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h (working copy) @@ -22,6 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -126,6 +127,8 @@ typedef enum { ZFS_PROP_MLSLABEL, ZFS_PROP_SYNC, ZFS_PROP_REFRATIO, + ZFS_PROP_WRITTEN, + ZFS_PROP_CLONES, ZFS_NUM_PROPS } zfs_prop_t; @@ -165,9 +168,13 @@ typedef enum { ZPOOL_PROP_FREE, ZPOOL_PROP_ALLOCATED, ZPOOL_PROP_READONLY, + ZPOOL_PROP_COMMENT, ZPOOL_NUM_PROPS } zpool_prop_t; +/* Small enough to not hog a whole line of printout in zpool(1M). */ +#define ZPROP_MAX_COMMENT 32 + #define ZPROP_CONT -2 #define ZPROP_INVAL -1 @@ -492,6 +499,7 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_SPLIT_LIST "guid_list" #define ZPOOL_CONFIG_REMOVING "removing" #define ZPOOL_CONFIG_RESILVERING "resilvering" +#define ZPOOL_CONFIG_COMMENT "comment" #define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ @@ -758,7 +766,7 @@ typedef unsigned long zfs_ioc_t; #define ZFS_IOC_ERROR_LOG _IOWR('Z', 32, struct zfs_cmd) #define ZFS_IOC_CLEAR _IOWR('Z', 33, struct zfs_cmd) #define ZFS_IOC_PROMOTE _IOWR('Z', 34, struct zfs_cmd) -#define ZFS_IOC_DESTROY_SNAPS _IOWR('Z', 35, struct zfs_cmd) +#define ZFS_IOC_DESTROY_SNAPS_NVL _IOWR('Z', 35, struct zfs_cmd) #define ZFS_IOC_SNAPSHOT _IOWR('Z', 36, struct zfs_cmd) #define ZFS_IOC_DSOBJ_TO_DSNAME _IOWR('Z', 37, struct zfs_cmd) #define ZFS_IOC_OBJ_TO_PATH _IOWR('Z', 38, struct zfs_cmd) @@ -783,6 +791,9 @@ typedef unsigned long zfs_ioc_t; #define ZFS_IOC_OBJ_TO_STATS _IOWR('Z', 57, struct zfs_cmd) #define ZFS_IOC_JAIL _IOWR('Z', 58, struct zfs_cmd) #define ZFS_IOC_UNJAIL _IOWR('Z', 59, struct zfs_cmd) +#define ZFS_IOC_POOL_REGUID _IOWR('Z', 60, struct zfs_cmd) +#define ZFS_IOC_SPACE_WRITTEN _IOWR('Z', 61, struct zfs_cmd) +#define ZFS_IOC_SPACE_SNAPS _IOWR('Z', 62, struct zfs_cmd) /* * Internal SPA load state. Used by FMA diagnosis engine. @@ -844,6 +855,7 @@ typedef enum { * ESC_ZFS_RESILVER_START * ESC_ZFS_RESILVER_END * ESC_ZFS_POOL_DESTROY + * ESC_ZFS_POOL_REGUID * * ZFS_EV_POOL_NAME DATA_TYPE_STRING * ZFS_EV_POOL_GUID DATA_TYPE_UINT64