FreeBSD ZFS
The Zettabyte File System

bptree.c

Go to the documentation of this file.
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 
00022 /*
00023  * Copyright (c) 2012 by Delphix. All rights reserved.
00024  */
00025 
00026 #include <sys/arc.h>
00027 #include <sys/bptree.h>
00028 #include <sys/dmu.h>
00029 #include <sys/dmu_objset.h>
00030 #include <sys/dmu_tx.h>
00031 #include <sys/dmu_traverse.h>
00032 #include <sys/dsl_dataset.h>
00033 #include <sys/dsl_dir.h>
00034 #include <sys/dsl_pool.h>
00035 #include <sys/dnode.h>
00036 #include <sys/refcount.h>
00037 #include <sys/spa.h>
00038 
00053 struct bptree_args {
00054         bptree_phys_t *ba_phys; 
00055         boolean_t ba_free;      
00057         bptree_itor_t *ba_func; 
00058         void *ba_arg;           
00059         dmu_tx_t *ba_tx;        
00060 } bptree_args_t;
00061 
00062 uint64_t
00063 bptree_alloc(objset_t *os, dmu_tx_t *tx)
00064 {
00065         uint64_t obj;
00066         dmu_buf_t *db;
00067         bptree_phys_t *bt;
00068 
00069         obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA,
00070             SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA,
00071             sizeof (bptree_phys_t), tx);
00072 
00073         /*
00074          * Bonus buffer contents are already initialized to 0, but for
00075          * readability we make it explicit.
00076          */
00077         VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
00078         dmu_buf_will_dirty(db, tx);
00079         bt = db->db_data;
00080         bt->bt_begin = 0;
00081         bt->bt_end = 0;
00082         bt->bt_bytes = 0;
00083         bt->bt_comp = 0;
00084         bt->bt_uncomp = 0;
00085         dmu_buf_rele(db, FTAG);
00086 
00087         return (obj);
00088 }
00089 
00090 int
00091 bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
00092 {
00093         dmu_buf_t *db;
00094         bptree_phys_t *bt;
00095 
00096         VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
00097         bt = db->db_data;
00098         ASSERT3U(bt->bt_begin, ==, bt->bt_end);
00099         ASSERT0(bt->bt_bytes);
00100         ASSERT0(bt->bt_comp);
00101         ASSERT0(bt->bt_uncomp);
00102         dmu_buf_rele(db, FTAG);
00103 
00104         return (dmu_object_free(os, obj, tx));
00105 }
00106 
00107 void
00108 bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
00109     uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx)
00110 {
00111         dmu_buf_t *db;
00112         bptree_phys_t *bt;
00113         bptree_entry_phys_t bte;
00114 
00115         /*
00116          * bptree objects are in the pool mos, therefore they can only be
00117          * modified in syncing context. Furthermore, this is only modified
00118          * by the sync thread, so no locking is necessary.
00119          */
00120         ASSERT(dmu_tx_is_syncing(tx));
00121 
00122         VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
00123         bt = db->db_data;
00124 
00125         bte.be_birth_txg = birth_txg;
00126         bte.be_bp = *bp;
00127         bzero(&bte.be_zb, sizeof (bte.be_zb));
00128         dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx);
00129 
00130         dmu_buf_will_dirty(db, tx);
00131         bt->bt_end++;
00132         bt->bt_bytes += bytes;
00133         bt->bt_comp += comp;
00134         bt->bt_uncomp += uncomp;
00135         dmu_buf_rele(db, FTAG);
00136 }
00137 
00138 /* ARGSUSED */
00139 static int
00140 bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
00141     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
00142 {
00143         int err;
00144         struct bptree_args *ba = arg;
00145 
00146         if (bp == NULL)
00147                 return (0);
00148 
00149         err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
00150         if (err == 0 && ba->ba_free) {
00151                 ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp);
00152                 ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp);
00153                 ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp);
00154         }
00155         return (err);
00156 }
00157 
00158 int
00159 bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
00160     void *arg, dmu_tx_t *tx)
00161 {
00162         int err;
00163         uint64_t i;
00164         dmu_buf_t *db;
00165         struct bptree_args ba;
00166 
00167         ASSERT(!free || dmu_tx_is_syncing(tx));
00168 
00169         err = dmu_bonus_hold(os, obj, FTAG, &db);
00170         if (err != 0)
00171                 return (err);
00172 
00173         if (free)
00174                 dmu_buf_will_dirty(db, tx);
00175 
00176         ba.ba_phys = db->db_data;
00177         ba.ba_free = free;
00178         ba.ba_func = func;
00179         ba.ba_arg = arg;
00180         ba.ba_tx = tx;
00181 
00182         err = 0;
00183         for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
00184                 bptree_entry_phys_t bte;
00185 
00186                 ASSERT(!free || i == ba.ba_phys->bt_begin);
00187 
00188                 err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte),
00189                     &bte, DMU_READ_NO_PREFETCH);
00190                 if (err != 0)
00191                         break;
00192 
00193                 err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
00194                     bte.be_birth_txg, &bte.be_zb,
00195                     TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST,
00196                     bptree_visit_cb, &ba);
00197                 if (free) {
00198                         ASSERT(err == 0 || err == ERESTART);
00199                         if (err != 0) {
00200                                 /* save bookmark for future resume */
00201                                 ASSERT3U(bte.be_zb.zb_objset, ==,
00202                                     ZB_DESTROYED_OBJSET);
00203                                 ASSERT0(bte.be_zb.zb_level);
00204                                 dmu_write(os, obj, i * sizeof (bte),
00205                                     sizeof (bte), &bte, tx);
00206                                 break;
00207                         } else {
00208                                 ba.ba_phys->bt_begin++;
00209                                 (void) dmu_free_range(os, obj,
00210                                     i * sizeof (bte), sizeof (bte), tx);
00211                         }
00212                 }
00213         }
00214 
00215         ASSERT(!free || err != 0 || ba.ba_phys->bt_begin == ba.ba_phys->bt_end);
00216 
00217         /* if all blocks are free there should be no used space */
00218         if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) {
00219                 ASSERT0(ba.ba_phys->bt_bytes);
00220                 ASSERT0(ba.ba_phys->bt_comp);
00221                 ASSERT0(ba.ba_phys->bt_uncomp);
00222         }
00223 
00224         dmu_buf_rele(db, FTAG);
00225 
00226         return (err);
00227 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines