FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 00022 /* 00023 * Copyright (c) 2012 by Delphix. All rights reserved. 00024 */ 00025 00026 #include <sys/arc.h> 00027 #include <sys/bptree.h> 00028 #include <sys/dmu.h> 00029 #include <sys/dmu_objset.h> 00030 #include <sys/dmu_tx.h> 00031 #include <sys/dmu_traverse.h> 00032 #include <sys/dsl_dataset.h> 00033 #include <sys/dsl_dir.h> 00034 #include <sys/dsl_pool.h> 00035 #include <sys/dnode.h> 00036 #include <sys/refcount.h> 00037 #include <sys/spa.h> 00038 00053 struct bptree_args { 00054 bptree_phys_t *ba_phys; 00055 boolean_t ba_free; 00057 bptree_itor_t *ba_func; 00058 void *ba_arg; 00059 dmu_tx_t *ba_tx; 00060 } bptree_args_t; 00061 00062 uint64_t 00063 bptree_alloc(objset_t *os, dmu_tx_t *tx) 00064 { 00065 uint64_t obj; 00066 dmu_buf_t *db; 00067 bptree_phys_t *bt; 00068 00069 obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA, 00070 SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA, 00071 sizeof (bptree_phys_t), tx); 00072 00073 /* 00074 * Bonus buffer contents are already initialized to 0, but for 00075 * readability we make it explicit. 00076 */ 00077 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 00078 dmu_buf_will_dirty(db, tx); 00079 bt = db->db_data; 00080 bt->bt_begin = 0; 00081 bt->bt_end = 0; 00082 bt->bt_bytes = 0; 00083 bt->bt_comp = 0; 00084 bt->bt_uncomp = 0; 00085 dmu_buf_rele(db, FTAG); 00086 00087 return (obj); 00088 } 00089 00090 int 00091 bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx) 00092 { 00093 dmu_buf_t *db; 00094 bptree_phys_t *bt; 00095 00096 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 00097 bt = db->db_data; 00098 ASSERT3U(bt->bt_begin, ==, bt->bt_end); 00099 ASSERT0(bt->bt_bytes); 00100 ASSERT0(bt->bt_comp); 00101 ASSERT0(bt->bt_uncomp); 00102 dmu_buf_rele(db, FTAG); 00103 00104 return (dmu_object_free(os, obj, tx)); 00105 } 00106 00107 void 00108 bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg, 00109 uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx) 00110 { 00111 dmu_buf_t *db; 00112 bptree_phys_t *bt; 00113 bptree_entry_phys_t bte; 00114 00115 /* 00116 * bptree objects are in the pool mos, therefore they can only be 00117 * modified in syncing context. Furthermore, this is only modified 00118 * by the sync thread, so no locking is necessary. 00119 */ 00120 ASSERT(dmu_tx_is_syncing(tx)); 00121 00122 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 00123 bt = db->db_data; 00124 00125 bte.be_birth_txg = birth_txg; 00126 bte.be_bp = *bp; 00127 bzero(&bte.be_zb, sizeof (bte.be_zb)); 00128 dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx); 00129 00130 dmu_buf_will_dirty(db, tx); 00131 bt->bt_end++; 00132 bt->bt_bytes += bytes; 00133 bt->bt_comp += comp; 00134 bt->bt_uncomp += uncomp; 00135 dmu_buf_rele(db, FTAG); 00136 } 00137 00138 /* ARGSUSED */ 00139 static int 00140 bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, 00141 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 00142 { 00143 int err; 00144 struct bptree_args *ba = arg; 00145 00146 if (bp == NULL) 00147 return (0); 00148 00149 err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx); 00150 if (err == 0 && ba->ba_free) { 00151 ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp); 00152 ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp); 00153 ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp); 00154 } 00155 return (err); 00156 } 00157 00158 int 00159 bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, 00160 void *arg, dmu_tx_t *tx) 00161 { 00162 int err; 00163 uint64_t i; 00164 dmu_buf_t *db; 00165 struct bptree_args ba; 00166 00167 ASSERT(!free || dmu_tx_is_syncing(tx)); 00168 00169 err = dmu_bonus_hold(os, obj, FTAG, &db); 00170 if (err != 0) 00171 return (err); 00172 00173 if (free) 00174 dmu_buf_will_dirty(db, tx); 00175 00176 ba.ba_phys = db->db_data; 00177 ba.ba_free = free; 00178 ba.ba_func = func; 00179 ba.ba_arg = arg; 00180 ba.ba_tx = tx; 00181 00182 err = 0; 00183 for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { 00184 bptree_entry_phys_t bte; 00185 00186 ASSERT(!free || i == ba.ba_phys->bt_begin); 00187 00188 err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte), 00189 &bte, DMU_READ_NO_PREFETCH); 00190 if (err != 0) 00191 break; 00192 00193 err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp, 00194 bte.be_birth_txg, &bte.be_zb, 00195 TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST, 00196 bptree_visit_cb, &ba); 00197 if (free) { 00198 ASSERT(err == 0 || err == ERESTART); 00199 if (err != 0) { 00200 /* save bookmark for future resume */ 00201 ASSERT3U(bte.be_zb.zb_objset, ==, 00202 ZB_DESTROYED_OBJSET); 00203 ASSERT0(bte.be_zb.zb_level); 00204 dmu_write(os, obj, i * sizeof (bte), 00205 sizeof (bte), &bte, tx); 00206 break; 00207 } else { 00208 ba.ba_phys->bt_begin++; 00209 (void) dmu_free_range(os, obj, 00210 i * sizeof (bte), sizeof (bte), tx); 00211 } 00212 } 00213 } 00214 00215 ASSERT(!free || err != 0 || ba.ba_phys->bt_begin == ba.ba_phys->bt_end); 00216 00217 /* if all blocks are free there should be no used space */ 00218 if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) { 00219 ASSERT0(ba.ba_phys->bt_bytes); 00220 ASSERT0(ba.ba_phys->bt_comp); 00221 ASSERT0(ba.ba_phys->bt_uncomp); 00222 } 00223 00224 dmu_buf_rele(db, FTAG); 00225 00226 return (err); 00227 }