FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 00023 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 00024 * Copyright (c) 2012 by Delphix. All rights reserved. 00025 */ 00026 00027 #include <sys/dmu.h> 00028 #include <sys/dmu_impl.h> 00029 #include <sys/dbuf.h> 00030 #include <sys/dmu_tx.h> 00031 #include <sys/dmu_objset.h> 00032 #include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */ 00033 #include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */ 00034 #include <sys/dsl_pool.h> 00035 #include <sys/zap_impl.h> /* for fzap_default_block_shift */ 00036 #include <sys/spa.h> 00037 #include <sys/sa.h> 00038 #include <sys/sa_impl.h> 00039 #include <sys/zfs_context.h> 00040 #include <sys/varargs.h> 00041 00042 typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, 00043 uint64_t arg1, uint64_t arg2); 00044 00045 00046 dmu_tx_t * 00047 dmu_tx_create_dd(dsl_dir_t *dd) 00048 { 00049 dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); 00050 tx->tx_dir = dd; 00051 if (dd) 00052 tx->tx_pool = dd->dd_pool; 00053 list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), 00054 offsetof(dmu_tx_hold_t, txh_node)); 00055 list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t), 00056 offsetof(dmu_tx_callback_t, dcb_node)); 00057 #ifdef ZFS_DEBUG 00058 refcount_create(&tx->tx_space_written); 00059 refcount_create(&tx->tx_space_freed); 00060 #endif 00061 return (tx); 00062 } 00063 00064 dmu_tx_t * 00065 dmu_tx_create(objset_t *os) 00066 { 00067 dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir); 00068 tx->tx_objset = os; 00069 tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset); 00070 return (tx); 00071 } 00072 00073 dmu_tx_t * 00074 dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg) 00075 { 00076 dmu_tx_t *tx = dmu_tx_create_dd(NULL); 00077 00078 ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg); 00079 tx->tx_pool = dp; 00080 tx->tx_txg = txg; 00081 tx->tx_anyobj = TRUE; 00082 00083 return (tx); 00084 } 00085 00086 int 00087 dmu_tx_is_syncing(dmu_tx_t *tx) 00088 { 00089 return (tx->tx_anyobj); 00090 } 00091 00092 int 00093 dmu_tx_private_ok(dmu_tx_t *tx) 00094 { 00095 return (tx->tx_anyobj); 00096 } 00097 00098 static dmu_tx_hold_t * 00099 dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, 00100 enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2) 00101 { 00102 dmu_tx_hold_t *txh; 00103 dnode_t *dn = NULL; 00104 int err; 00105 00106 if (object != DMU_NEW_OBJECT) { 00107 err = dnode_hold(os, object, tx, &dn); 00108 if (err) { 00109 tx->tx_err = err; 00110 return (NULL); 00111 } 00112 00113 if (err == 0 && tx->tx_txg != 0) { 00114 mutex_enter(&dn->dn_mtx); 00115 /* 00116 * dn->dn_assigned_txg == tx->tx_txg doesn't pose a 00117 * problem, but there's no way for it to happen (for 00118 * now, at least). 00119 */ 00120 ASSERT(dn->dn_assigned_txg == 0); 00121 dn->dn_assigned_txg = tx->tx_txg; 00122 (void) refcount_add(&dn->dn_tx_holds, tx); 00123 mutex_exit(&dn->dn_mtx); 00124 } 00125 } 00126 00127 txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); 00128 txh->txh_tx = tx; 00129 txh->txh_dnode = dn; 00130 #ifdef ZFS_DEBUG 00131 txh->txh_type = type; 00132 txh->txh_arg1 = arg1; 00133 txh->txh_arg2 = arg2; 00134 #endif 00135 list_insert_tail(&tx->tx_holds, txh); 00136 00137 return (txh); 00138 } 00139 00140 void 00141 dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object) 00142 { 00143 /* 00144 * If we're syncing, they can manipulate any object anyhow, and 00145 * the hold on the dnode_t can cause problems. 00146 */ 00147 if (!dmu_tx_is_syncing(tx)) { 00148 (void) dmu_tx_hold_object_impl(tx, os, 00149 object, THT_NEWOBJECT, 0, 0); 00150 } 00151 } 00152 00153 static int 00154 dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) 00155 { 00156 int err; 00157 dmu_buf_impl_t *db; 00158 00159 rw_enter(&dn->dn_struct_rwlock, RW_READER); 00160 db = dbuf_hold_level(dn, level, blkid, FTAG); 00161 rw_exit(&dn->dn_struct_rwlock); 00162 if (db == NULL) 00163 return (EIO); 00164 err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH); 00165 dbuf_rele(db, FTAG); 00166 return (err); 00167 } 00168 00169 static void 00170 dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db, 00171 int level, uint64_t blkid, boolean_t freeable, uint64_t *history) 00172 { 00173 objset_t *os = dn->dn_objset; 00174 dsl_dataset_t *ds = os->os_dsl_dataset; 00175 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 00176 dmu_buf_impl_t *parent = NULL; 00177 blkptr_t *bp = NULL; 00178 uint64_t space; 00179 00180 if (level >= dn->dn_nlevels || history[level] == blkid) 00181 return; 00182 00183 history[level] = blkid; 00184 00185 space = (level == 0) ? dn->dn_datablksz : (1ULL << dn->dn_indblkshift); 00186 00187 if (db == NULL || db == dn->dn_dbuf) { 00188 ASSERT(level != 0); 00189 db = NULL; 00190 } else { 00191 ASSERT(DB_DNODE(db) == dn); 00192 ASSERT(db->db_level == level); 00193 ASSERT(db->db.db_size == space); 00194 ASSERT(db->db_blkid == blkid); 00195 bp = db->db_blkptr; 00196 parent = db->db_parent; 00197 } 00198 00199 freeable = (bp && (freeable || 00200 dsl_dataset_block_freeable(ds, bp, bp->blk_birth))); 00201 00202 if (freeable) 00203 txh->txh_space_tooverwrite += space; 00204 else 00205 txh->txh_space_towrite += space; 00206 if (bp) 00207 txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp); 00208 00209 dmu_tx_count_twig(txh, dn, parent, level + 1, 00210 blkid >> epbs, freeable, history); 00211 } 00212 00213 /* ARGSUSED */ 00214 static void 00215 dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) 00216 { 00217 dnode_t *dn = txh->txh_dnode; 00218 uint64_t start, end, i; 00219 int min_bs, max_bs, min_ibs, max_ibs, epbs, bits; 00220 int err = 0; 00221 00222 if (len == 0) 00223 return; 00224 00225 min_bs = SPA_MINBLOCKSHIFT; 00226 max_bs = SPA_MAXBLOCKSHIFT; 00227 min_ibs = DN_MIN_INDBLKSHIFT; 00228 max_ibs = DN_MAX_INDBLKSHIFT; 00229 00230 if (dn) { 00231 uint64_t history[DN_MAX_LEVELS]; 00232 int nlvls = dn->dn_nlevels; 00233 int delta; 00234 00235 /* 00236 * For i/o error checking, read the first and last level-0 00237 * blocks (if they are not aligned), and all the level-1 blocks. 00238 */ 00239 if (dn->dn_maxblkid == 0) { 00240 delta = dn->dn_datablksz; 00241 start = (off < dn->dn_datablksz) ? 0 : 1; 00242 end = (off+len <= dn->dn_datablksz) ? 0 : 1; 00243 if (start == 0 && (off > 0 || len < dn->dn_datablksz)) { 00244 err = dmu_tx_check_ioerr(NULL, dn, 0, 0); 00245 if (err) 00246 goto out; 00247 delta -= off; 00248 } 00249 } else { 00250 zio_t *zio = zio_root(dn->dn_objset->os_spa, 00251 NULL, NULL, ZIO_FLAG_CANFAIL); 00252 00253 /* first level-0 block */ 00254 start = off >> dn->dn_datablkshift; 00255 if (P2PHASE(off, dn->dn_datablksz) || 00256 len < dn->dn_datablksz) { 00257 err = dmu_tx_check_ioerr(zio, dn, 0, start); 00258 if (err) 00259 goto out; 00260 } 00261 00262 /* last level-0 block */ 00263 end = (off+len-1) >> dn->dn_datablkshift; 00264 if (end != start && end <= dn->dn_maxblkid && 00265 P2PHASE(off+len, dn->dn_datablksz)) { 00266 err = dmu_tx_check_ioerr(zio, dn, 0, end); 00267 if (err) 00268 goto out; 00269 } 00270 00271 /* level-1 blocks */ 00272 if (nlvls > 1) { 00273 int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 00274 for (i = (start>>shft)+1; i < end>>shft; i++) { 00275 err = dmu_tx_check_ioerr(zio, dn, 1, i); 00276 if (err) 00277 goto out; 00278 } 00279 } 00280 00281 err = zio_wait(zio); 00282 if (err) 00283 goto out; 00284 delta = P2NPHASE(off, dn->dn_datablksz); 00285 } 00286 00287 if (dn->dn_maxblkid > 0) { 00288 /* 00289 * The blocksize can't change, 00290 * so we can make a more precise estimate. 00291 */ 00292 ASSERT(dn->dn_datablkshift != 0); 00293 min_bs = max_bs = dn->dn_datablkshift; 00294 min_ibs = max_ibs = dn->dn_indblkshift; 00295 } else if (dn->dn_indblkshift > max_ibs) { 00296 /* 00297 * This ensures that if we reduce DN_MAX_INDBLKSHIFT, 00298 * the code will still work correctly on older pools. 00299 */ 00300 min_ibs = max_ibs = dn->dn_indblkshift; 00301 } 00302 00303 /* 00304 * If this write is not off the end of the file 00305 * we need to account for overwrites/unref. 00306 */ 00307 if (start <= dn->dn_maxblkid) { 00308 for (int l = 0; l < DN_MAX_LEVELS; l++) 00309 history[l] = -1ULL; 00310 } 00311 while (start <= dn->dn_maxblkid) { 00312 dmu_buf_impl_t *db; 00313 00314 rw_enter(&dn->dn_struct_rwlock, RW_READER); 00315 err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db); 00316 rw_exit(&dn->dn_struct_rwlock); 00317 00318 if (err) { 00319 txh->txh_tx->tx_err = err; 00320 return; 00321 } 00322 00323 dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE, 00324 history); 00325 dbuf_rele(db, FTAG); 00326 if (++start > end) { 00327 /* 00328 * Account for new indirects appearing 00329 * before this IO gets assigned into a txg. 00330 */ 00331 bits = 64 - min_bs; 00332 epbs = min_ibs - SPA_BLKPTRSHIFT; 00333 for (bits -= epbs * (nlvls - 1); 00334 bits >= 0; bits -= epbs) 00335 txh->txh_fudge += 1ULL << max_ibs; 00336 goto out; 00337 } 00338 off += delta; 00339 if (len >= delta) 00340 len -= delta; 00341 delta = dn->dn_datablksz; 00342 } 00343 } 00344 00345 /* 00346 * 'end' is the last thing we will access, not one past. 00347 * This way we won't overflow when accessing the last byte. 00348 */ 00349 start = P2ALIGN(off, 1ULL << max_bs); 00350 end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1; 00351 txh->txh_space_towrite += end - start + 1; 00352 00353 start >>= min_bs; 00354 end >>= min_bs; 00355 00356 epbs = min_ibs - SPA_BLKPTRSHIFT; 00357 00358 /* 00359 * The object contains at most 2^(64 - min_bs) blocks, 00360 * and each indirect level maps 2^epbs. 00361 */ 00362 for (bits = 64 - min_bs; bits >= 0; bits -= epbs) { 00363 start >>= epbs; 00364 end >>= epbs; 00365 ASSERT3U(end, >=, start); 00366 txh->txh_space_towrite += (end - start + 1) << max_ibs; 00367 if (start != 0) { 00368 /* 00369 * We also need a new blkid=0 indirect block 00370 * to reference any existing file data. 00371 */ 00372 txh->txh_space_towrite += 1ULL << max_ibs; 00373 } 00374 } 00375 00376 out: 00377 if (txh->txh_space_towrite + txh->txh_space_tooverwrite > 00378 2 * DMU_MAX_ACCESS) 00379 err = EFBIG; 00380 00381 if (err) 00382 txh->txh_tx->tx_err = err; 00383 } 00384 00385 static void 00386 dmu_tx_count_dnode(dmu_tx_hold_t *txh) 00387 { 00388 dnode_t *dn = txh->txh_dnode; 00389 dnode_t *mdn = DMU_META_DNODE(txh->txh_tx->tx_objset); 00390 uint64_t space = mdn->dn_datablksz + 00391 ((mdn->dn_nlevels-1) << mdn->dn_indblkshift); 00392 00393 if (dn && dn->dn_dbuf->db_blkptr && 00394 dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 00395 dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) { 00396 txh->txh_space_tooverwrite += space; 00397 txh->txh_space_tounref += space; 00398 } else { 00399 txh->txh_space_towrite += space; 00400 if (dn && dn->dn_dbuf->db_blkptr) 00401 txh->txh_space_tounref += space; 00402 } 00403 } 00404 00405 void 00406 dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) 00407 { 00408 dmu_tx_hold_t *txh; 00409 00410 ASSERT(tx->tx_txg == 0); 00411 ASSERT(len < DMU_MAX_ACCESS); 00412 ASSERT(len == 0 || UINT64_MAX - off >= len - 1); 00413 00414 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 00415 object, THT_WRITE, off, len); 00416 if (txh == NULL) 00417 return; 00418 00419 dmu_tx_count_write(txh, off, len); 00420 dmu_tx_count_dnode(txh); 00421 } 00422 00423 static void 00424 dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) 00425 { 00426 uint64_t blkid, nblks, lastblk; 00427 uint64_t space = 0, unref = 0, skipped = 0; 00428 dnode_t *dn = txh->txh_dnode; 00429 dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; 00430 spa_t *spa = txh->txh_tx->tx_pool->dp_spa; 00431 int epbs; 00432 uint64_t l0span = 0, nl1blks = 0; 00433 00434 if (dn->dn_nlevels == 0) 00435 return; 00436 00437 /* 00438 * The struct_rwlock protects us against dn_nlevels 00439 * changing, in case (against all odds) we manage to dirty & 00440 * sync out the changes after we check for being dirty. 00441 * Also, dbuf_hold_impl() wants us to have the struct_rwlock. 00442 */ 00443 rw_enter(&dn->dn_struct_rwlock, RW_READER); 00444 epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 00445 if (dn->dn_maxblkid == 0) { 00446 if (off == 0 && len >= dn->dn_datablksz) { 00447 blkid = 0; 00448 nblks = 1; 00449 } else { 00450 rw_exit(&dn->dn_struct_rwlock); 00451 return; 00452 } 00453 } else { 00454 blkid = off >> dn->dn_datablkshift; 00455 nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift; 00456 00457 if (blkid >= dn->dn_maxblkid) { 00458 rw_exit(&dn->dn_struct_rwlock); 00459 return; 00460 } 00461 if (blkid + nblks > dn->dn_maxblkid) 00462 nblks = dn->dn_maxblkid - blkid; 00463 00464 } 00465 l0span = nblks; /* save for later use to calc level > 1 overhead */ 00466 if (dn->dn_nlevels == 1) { 00467 int i; 00468 for (i = 0; i < nblks; i++) { 00469 blkptr_t *bp = dn->dn_phys->dn_blkptr; 00470 ASSERT3U(blkid + i, <, dn->dn_nblkptr); 00471 bp += blkid + i; 00472 if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) { 00473 dprintf_bp(bp, "can free old%s", ""); 00474 space += bp_get_dsize(spa, bp); 00475 } 00476 unref += BP_GET_ASIZE(bp); 00477 } 00478 nl1blks = 1; 00479 nblks = 0; 00480 } 00481 00482 lastblk = blkid + nblks - 1; 00483 while (nblks) { 00484 dmu_buf_impl_t *dbuf; 00485 uint64_t ibyte, new_blkid; 00486 int epb = 1 << epbs; 00487 int err, i, blkoff, tochk; 00488 blkptr_t *bp; 00489 00490 ibyte = blkid << dn->dn_datablkshift; 00491 err = dnode_next_offset(dn, 00492 DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0); 00493 new_blkid = ibyte >> dn->dn_datablkshift; 00494 if (err == ESRCH) { 00495 skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; 00496 break; 00497 } 00498 if (err) { 00499 txh->txh_tx->tx_err = err; 00500 break; 00501 } 00502 if (new_blkid > lastblk) { 00503 skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; 00504 break; 00505 } 00506 00507 if (new_blkid > blkid) { 00508 ASSERT((new_blkid >> epbs) > (blkid >> epbs)); 00509 skipped += (new_blkid >> epbs) - (blkid >> epbs) - 1; 00510 nblks -= new_blkid - blkid; 00511 blkid = new_blkid; 00512 } 00513 blkoff = P2PHASE(blkid, epb); 00514 tochk = MIN(epb - blkoff, nblks); 00515 00516 err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf); 00517 if (err) { 00518 txh->txh_tx->tx_err = err; 00519 break; 00520 } 00521 00522 txh->txh_memory_tohold += dbuf->db.db_size; 00523 00524 /* 00525 * We don't check memory_tohold against DMU_MAX_ACCESS because 00526 * memory_tohold is an over-estimation (especially the >L1 00527 * indirect blocks), so it could fail. Callers should have 00528 * already verified that they will not be holding too much 00529 * memory. 00530 */ 00531 00532 err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL); 00533 if (err != 0) { 00534 txh->txh_tx->tx_err = err; 00535 dbuf_rele(dbuf, FTAG); 00536 break; 00537 } 00538 00539 bp = dbuf->db.db_data; 00540 bp += blkoff; 00541 00542 for (i = 0; i < tochk; i++) { 00543 if (dsl_dataset_block_freeable(ds, &bp[i], 00544 bp[i].blk_birth)) { 00545 dprintf_bp(&bp[i], "can free old%s", ""); 00546 space += bp_get_dsize(spa, &bp[i]); 00547 } 00548 unref += BP_GET_ASIZE(bp); 00549 } 00550 dbuf_rele(dbuf, FTAG); 00551 00552 ++nl1blks; 00553 blkid += tochk; 00554 nblks -= tochk; 00555 } 00556 rw_exit(&dn->dn_struct_rwlock); 00557 00558 /* 00559 * Add in memory requirements of higher-level indirects. 00560 * This assumes a worst-possible scenario for dn_nlevels and a 00561 * worst-possible distribution of l1-blocks over the region to free. 00562 */ 00563 { 00564 uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs); 00565 int level = 2; 00566 /* 00567 * Here we don't use DN_MAX_LEVEL, but calculate it with the 00568 * given datablkshift and indblkshift. This makes the 00569 * difference between 19 and 8 on large files. 00570 */ 00571 int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) / 00572 (dn->dn_indblkshift - SPA_BLKPTRSHIFT); 00573 00574 while (level++ < maxlevel) { 00575 txh->txh_memory_tohold += MAX(MIN(blkcnt, nl1blks), 1) 00576 << dn->dn_indblkshift; 00577 blkcnt = 1 + (blkcnt >> epbs); 00578 } 00579 } 00580 00581 /* account for new level 1 indirect blocks that might show up */ 00582 if (skipped > 0) { 00583 txh->txh_fudge += skipped << dn->dn_indblkshift; 00584 skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs); 00585 txh->txh_memory_tohold += skipped << dn->dn_indblkshift; 00586 } 00587 txh->txh_space_tofree += space; 00588 txh->txh_space_tounref += unref; 00589 } 00590 00591 void 00592 dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) 00593 { 00594 dmu_tx_hold_t *txh; 00595 dnode_t *dn; 00596 uint64_t start, end, i; 00597 int err, shift; 00598 zio_t *zio; 00599 00600 ASSERT(tx->tx_txg == 0); 00601 00602 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 00603 object, THT_FREE, off, len); 00604 if (txh == NULL) 00605 return; 00606 dn = txh->txh_dnode; 00607 00608 /* first block */ 00609 if (off != 0) 00610 dmu_tx_count_write(txh, off, 1); 00611 /* last block */ 00612 if (len != DMU_OBJECT_END) 00613 dmu_tx_count_write(txh, off+len, 1); 00614 00615 dmu_tx_count_dnode(txh); 00616 00617 if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz) 00618 return; 00619 if (len == DMU_OBJECT_END) 00620 len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off; 00621 00622 /* 00623 * For i/o error checking, read the first and last level-0 00624 * blocks, and all the level-1 blocks. The above count_write's 00625 * have already taken care of the level-0 blocks. 00626 */ 00627 if (dn->dn_nlevels > 1) { 00628 shift = dn->dn_datablkshift + dn->dn_indblkshift - 00629 SPA_BLKPTRSHIFT; 00630 start = off >> shift; 00631 end = dn->dn_datablkshift ? ((off+len) >> shift) : 0; 00632 00633 zio = zio_root(tx->tx_pool->dp_spa, 00634 NULL, NULL, ZIO_FLAG_CANFAIL); 00635 for (i = start; i <= end; i++) { 00636 uint64_t ibyte = i << shift; 00637 err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0); 00638 i = ibyte >> shift; 00639 if (err == ESRCH) 00640 break; 00641 if (err) { 00642 tx->tx_err = err; 00643 return; 00644 } 00645 00646 err = dmu_tx_check_ioerr(zio, dn, 1, i); 00647 if (err) { 00648 tx->tx_err = err; 00649 return; 00650 } 00651 } 00652 err = zio_wait(zio); 00653 if (err) { 00654 tx->tx_err = err; 00655 return; 00656 } 00657 } 00658 00659 dmu_tx_count_free(txh, off, len); 00660 } 00661 00662 void 00663 dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) 00664 { 00665 dmu_tx_hold_t *txh; 00666 dnode_t *dn; 00667 uint64_t nblocks; 00668 int epbs, err; 00669 00670 ASSERT(tx->tx_txg == 0); 00671 00672 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 00673 object, THT_ZAP, add, (uintptr_t)name); 00674 if (txh == NULL) 00675 return; 00676 dn = txh->txh_dnode; 00677 00678 dmu_tx_count_dnode(txh); 00679 00680 if (dn == NULL) { 00681 /* 00682 * We will be able to fit a new object's entries into one leaf 00683 * block. So there will be at most 2 blocks total, 00684 * including the header block. 00685 */ 00686 dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift); 00687 return; 00688 } 00689 00690 ASSERT3P(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP); 00691 00692 if (dn->dn_maxblkid == 0 && !add) { 00693 blkptr_t *bp; 00694 00695 /* 00696 * If there is only one block (i.e. this is a micro-zap) 00697 * and we are not adding anything, the accounting is simple. 00698 */ 00699 err = dmu_tx_check_ioerr(NULL, dn, 0, 0); 00700 if (err) { 00701 tx->tx_err = err; 00702 return; 00703 } 00704 00705 /* 00706 * Use max block size here, since we don't know how much 00707 * the size will change between now and the dbuf dirty call. 00708 */ 00709 bp = &dn->dn_phys->dn_blkptr[0]; 00710 if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 00711 bp, bp->blk_birth)) 00712 txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; 00713 else 00714 txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 00715 if (!BP_IS_HOLE(bp)) 00716 txh->txh_space_tounref += SPA_MAXBLOCKSIZE; 00717 return; 00718 } 00719 00720 if (dn->dn_maxblkid > 0 && name) { 00721 /* 00722 * access the name in this fat-zap so that we'll check 00723 * for i/o errors to the leaf blocks, etc. 00724 */ 00725 err = zap_lookup(dn->dn_objset, dn->dn_object, name, 00726 8, 0, NULL); 00727 if (err == EIO) { 00728 tx->tx_err = err; 00729 return; 00730 } 00731 } 00732 00733 err = zap_count_write(dn->dn_objset, dn->dn_object, name, add, 00734 &txh->txh_space_towrite, &txh->txh_space_tooverwrite); 00735 00736 /* 00737 * If the modified blocks are scattered to the four winds, 00738 * we'll have to modify an indirect twig for each. 00739 */ 00740 epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 00741 for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs) 00742 if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj) 00743 txh->txh_space_towrite += 3 << dn->dn_indblkshift; 00744 else 00745 txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift; 00746 } 00747 00748 void 00749 dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object) 00750 { 00751 dmu_tx_hold_t *txh; 00752 00753 ASSERT(tx->tx_txg == 0); 00754 00755 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 00756 object, THT_BONUS, 0, 0); 00757 if (txh) 00758 dmu_tx_count_dnode(txh); 00759 } 00760 00761 void 00762 dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) 00763 { 00764 dmu_tx_hold_t *txh; 00765 ASSERT(tx->tx_txg == 0); 00766 00767 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 00768 DMU_NEW_OBJECT, THT_SPACE, space, 0); 00769 00770 txh->txh_space_towrite += space; 00771 } 00772 00773 int 00774 dmu_tx_holds(dmu_tx_t *tx, uint64_t object) 00775 { 00776 dmu_tx_hold_t *txh; 00777 int holds = 0; 00778 00779 /* 00780 * By asserting that the tx is assigned, we're counting the 00781 * number of dn_tx_holds, which is the same as the number of 00782 * dn_holds. Otherwise, we'd be counting dn_holds, but 00783 * dn_tx_holds could be 0. 00784 */ 00785 ASSERT(tx->tx_txg != 0); 00786 00787 /* if (tx->tx_anyobj == TRUE) */ 00788 /* return (0); */ 00789 00790 for (txh = list_head(&tx->tx_holds); txh; 00791 txh = list_next(&tx->tx_holds, txh)) { 00792 if (txh->txh_dnode && txh->txh_dnode->dn_object == object) 00793 holds++; 00794 } 00795 00796 return (holds); 00797 } 00798 00799 #ifdef ZFS_DEBUG 00800 void 00801 dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) 00802 { 00803 dmu_tx_hold_t *txh; 00804 int match_object = FALSE, match_offset = FALSE; 00805 dnode_t *dn; 00806 00807 DB_DNODE_ENTER(db); 00808 dn = DB_DNODE(db); 00809 ASSERT(tx->tx_txg != 0); 00810 ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset); 00811 ASSERT3U(dn->dn_object, ==, db->db.db_object); 00812 00813 if (tx->tx_anyobj) { 00814 DB_DNODE_EXIT(db); 00815 return; 00816 } 00817 00818 /* XXX No checking on the meta dnode for now */ 00819 if (db->db.db_object == DMU_META_DNODE_OBJECT) { 00820 DB_DNODE_EXIT(db); 00821 return; 00822 } 00823 00824 for (txh = list_head(&tx->tx_holds); txh; 00825 txh = list_next(&tx->tx_holds, txh)) { 00826 ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg); 00827 if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT) 00828 match_object = TRUE; 00829 if (txh->txh_dnode == NULL || txh->txh_dnode == dn) { 00830 int datablkshift = dn->dn_datablkshift ? 00831 dn->dn_datablkshift : SPA_MAXBLOCKSHIFT; 00832 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 00833 int shift = datablkshift + epbs * db->db_level; 00834 uint64_t beginblk = shift >= 64 ? 0 : 00835 (txh->txh_arg1 >> shift); 00836 uint64_t endblk = shift >= 64 ? 0 : 00837 ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift); 00838 uint64_t blkid = db->db_blkid; 00839 00840 /* XXX txh_arg2 better not be zero... */ 00841 00842 dprintf("found txh type %x beginblk=%llx endblk=%llx\n", 00843 txh->txh_type, beginblk, endblk); 00844 00845 switch (txh->txh_type) { 00846 case THT_WRITE: 00847 if (blkid >= beginblk && blkid <= endblk) 00848 match_offset = TRUE; 00849 /* 00850 * We will let this hold work for the bonus 00851 * or spill buffer so that we don't need to 00852 * hold it when creating a new object. 00853 */ 00854 if (blkid == DMU_BONUS_BLKID || 00855 blkid == DMU_SPILL_BLKID) 00856 match_offset = TRUE; 00857 /* 00858 * They might have to increase nlevels, 00859 * thus dirtying the new TLIBs. Or the 00860 * might have to change the block size, 00861 * thus dirying the new lvl=0 blk=0. 00862 */ 00863 if (blkid == 0) 00864 match_offset = TRUE; 00865 break; 00866 case THT_FREE: 00867 /* 00868 * We will dirty all the level 1 blocks in 00869 * the free range and perhaps the first and 00870 * last level 0 block. 00871 */ 00872 if (blkid >= beginblk && (blkid <= endblk || 00873 txh->txh_arg2 == DMU_OBJECT_END)) 00874 match_offset = TRUE; 00875 break; 00876 case THT_SPILL: 00877 if (blkid == DMU_SPILL_BLKID) 00878 match_offset = TRUE; 00879 break; 00880 case THT_BONUS: 00881 if (blkid == DMU_BONUS_BLKID) 00882 match_offset = TRUE; 00883 break; 00884 case THT_ZAP: 00885 match_offset = TRUE; 00886 break; 00887 case THT_NEWOBJECT: 00888 match_object = TRUE; 00889 break; 00890 default: 00891 ASSERT(!"bad txh_type"); 00892 } 00893 } 00894 if (match_object && match_offset) { 00895 DB_DNODE_EXIT(db); 00896 return; 00897 } 00898 } 00899 DB_DNODE_EXIT(db); 00900 panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n", 00901 (u_longlong_t)db->db.db_object, db->db_level, 00902 (u_longlong_t)db->db_blkid); 00903 } 00904 #endif 00905 00906 static int 00907 dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) 00908 { 00909 dmu_tx_hold_t *txh; 00910 spa_t *spa = tx->tx_pool->dp_spa; 00911 uint64_t memory, asize, fsize, usize; 00912 uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge; 00913 00914 ASSERT0(tx->tx_txg); 00915 00916 if (tx->tx_err) 00917 return (tx->tx_err); 00918 00919 if (spa_suspended(spa)) { 00920 /* 00921 * If the user has indicated a blocking failure mode 00922 * then return ERESTART which will block in dmu_tx_wait(). 00923 * Otherwise, return EIO so that an error can get 00924 * propagated back to the VOP calls. 00925 * 00926 * Note that we always honor the txg_how flag regardless 00927 * of the failuremode setting. 00928 */ 00929 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE && 00930 txg_how != TXG_WAIT) 00931 return (EIO); 00932 00933 return (ERESTART); 00934 } 00935 00936 tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh); 00937 tx->tx_needassign_txh = NULL; 00938 00939 /* 00940 * NB: No error returns are allowed after txg_hold_open, but 00941 * before processing the dnode holds, due to the 00942 * dmu_tx_unassign() logic. 00943 */ 00944 00945 towrite = tofree = tooverwrite = tounref = tohold = fudge = 0; 00946 for (txh = list_head(&tx->tx_holds); txh; 00947 txh = list_next(&tx->tx_holds, txh)) { 00948 dnode_t *dn = txh->txh_dnode; 00949 if (dn != NULL) { 00950 mutex_enter(&dn->dn_mtx); 00951 if (dn->dn_assigned_txg == tx->tx_txg - 1) { 00952 mutex_exit(&dn->dn_mtx); 00953 tx->tx_needassign_txh = txh; 00954 return (ERESTART); 00955 } 00956 if (dn->dn_assigned_txg == 0) 00957 dn->dn_assigned_txg = tx->tx_txg; 00958 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 00959 (void) refcount_add(&dn->dn_tx_holds, tx); 00960 mutex_exit(&dn->dn_mtx); 00961 } 00962 towrite += txh->txh_space_towrite; 00963 tofree += txh->txh_space_tofree; 00964 tooverwrite += txh->txh_space_tooverwrite; 00965 tounref += txh->txh_space_tounref; 00966 tohold += txh->txh_memory_tohold; 00967 fudge += txh->txh_fudge; 00968 } 00969 00970 /* 00971 * NB: This check must be after we've held the dnodes, so that 00972 * the dmu_tx_unassign() logic will work properly 00973 */ 00974 if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) 00975 return (ERESTART); 00976 00977 /* 00978 * If a snapshot has been taken since we made our estimates, 00979 * assume that we won't be able to free or overwrite anything. 00980 */ 00981 if (tx->tx_objset && 00982 dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) > 00983 tx->tx_lastsnap_txg) { 00984 towrite += tooverwrite; 00985 tooverwrite = tofree = 0; 00986 } 00987 00988 /* needed allocation: worst-case estimate of write space */ 00989 asize = spa_get_asize(tx->tx_pool->dp_spa, towrite + tooverwrite); 00990 /* freed space estimate: worst-case overwrite + free estimate */ 00991 fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree; 00992 /* convert unrefd space to worst-case estimate */ 00993 usize = spa_get_asize(tx->tx_pool->dp_spa, tounref); 00994 /* calculate memory footprint estimate */ 00995 memory = towrite + tooverwrite + tohold; 00996 00997 #ifdef ZFS_DEBUG 00998 /* 00999 * Add in 'tohold' to account for our dirty holds on this memory 01000 * XXX - the "fudge" factor is to account for skipped blocks that 01001 * we missed because dnode_next_offset() misses in-core-only blocks. 01002 */ 01003 tx->tx_space_towrite = asize + 01004 spa_get_asize(tx->tx_pool->dp_spa, tohold + fudge); 01005 tx->tx_space_tofree = tofree; 01006 tx->tx_space_tooverwrite = tooverwrite; 01007 tx->tx_space_tounref = tounref; 01008 #endif 01009 01010 if (tx->tx_dir && asize != 0) { 01011 int err = dsl_dir_tempreserve_space(tx->tx_dir, memory, 01012 asize, fsize, usize, &tx->tx_tempreserve_cookie, tx); 01013 if (err) 01014 return (err); 01015 } 01016 01017 return (0); 01018 } 01019 01020 static void 01021 dmu_tx_unassign(dmu_tx_t *tx) 01022 { 01023 dmu_tx_hold_t *txh; 01024 01025 if (tx->tx_txg == 0) 01026 return; 01027 01028 txg_rele_to_quiesce(&tx->tx_txgh); 01029 01030 /* 01031 * Walk the transaction's hold list, removing a reference, and 01032 * notifying waiters if the refcount drops to 0. 01033 */ 01034 for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh; 01035 txh = list_next(&tx->tx_holds, txh)) { 01036 dnode_t *dn = txh->txh_dnode; 01037 01038 if (dn == NULL) 01039 continue; 01040 mutex_enter(&dn->dn_mtx); 01041 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 01042 01043 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { 01044 dn->dn_assigned_txg = 0; 01045 cv_broadcast(&dn->dn_notxholds); 01046 } 01047 mutex_exit(&dn->dn_mtx); 01048 } 01049 01050 txg_rele_to_sync(&tx->tx_txgh); 01051 01052 tx->tx_lasttried_txg = tx->tx_txg; 01053 tx->tx_txg = 0; 01054 } 01055 01071 int 01072 dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) 01073 { 01074 int err; 01075 01076 ASSERT(tx->tx_txg == 0); 01077 ASSERT(txg_how != 0); 01078 ASSERT(!dsl_pool_sync_context(tx->tx_pool)); 01079 01080 while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { 01081 dmu_tx_unassign(tx); 01082 01083 if (err != ERESTART || txg_how != TXG_WAIT) 01084 return (err); 01085 01086 dmu_tx_wait(tx); 01087 } 01088 01089 txg_rele_to_quiesce(&tx->tx_txgh); 01090 01091 return (0); 01092 } 01093 01094 void 01095 dmu_tx_wait(dmu_tx_t *tx) 01096 { 01097 spa_t *spa = tx->tx_pool->dp_spa; 01098 01099 ASSERT(tx->tx_txg == 0); 01100 01101 /* 01102 * It's possible that the pool has become active after this thread 01103 * has tried to obtain a tx. If that's the case then his 01104 * tx_lasttried_txg would not have been assigned. 01105 */ 01106 if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) { 01107 txg_wait_synced(tx->tx_pool, spa_last_synced_txg(spa) + 1); 01108 } else if (tx->tx_needassign_txh) { 01109 dnode_t *dn = tx->tx_needassign_txh->txh_dnode; 01110 01111 mutex_enter(&dn->dn_mtx); 01112 while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1) 01113 cv_wait(&dn->dn_notxholds, &dn->dn_mtx); 01114 mutex_exit(&dn->dn_mtx); 01115 tx->tx_needassign_txh = NULL; 01116 } else { 01117 txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1); 01118 } 01119 } 01120 01121 void 01122 dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta) 01123 { 01124 #ifdef ZFS_DEBUG 01125 if (tx->tx_dir == NULL || delta == 0) 01126 return; 01127 01128 if (delta > 0) { 01129 ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=, 01130 tx->tx_space_towrite); 01131 (void) refcount_add_many(&tx->tx_space_written, delta, NULL); 01132 } else { 01133 (void) refcount_add_many(&tx->tx_space_freed, -delta, NULL); 01134 } 01135 #endif 01136 } 01137 01138 void 01139 dmu_tx_commit(dmu_tx_t *tx) 01140 { 01141 dmu_tx_hold_t *txh; 01142 01143 ASSERT(tx->tx_txg != 0); 01144 01145 /* 01146 * Go through the transaction's hold list and decrement the hold 01147 * refcount, notifying waiters if no holds remain. 01148 */ 01149 while (txh = list_head(&tx->tx_holds)) { 01150 dnode_t *dn = txh->txh_dnode; 01151 01152 list_remove(&tx->tx_holds, txh); 01153 kmem_free(txh, sizeof (dmu_tx_hold_t)); 01154 if (dn == NULL) 01155 continue; 01156 mutex_enter(&dn->dn_mtx); 01157 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 01158 01159 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { 01160 dn->dn_assigned_txg = 0; 01161 cv_broadcast(&dn->dn_notxholds); 01162 } 01163 mutex_exit(&dn->dn_mtx); 01164 dnode_rele(dn, tx); 01165 } 01166 01167 if (tx->tx_tempreserve_cookie) 01168 dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx); 01169 01170 if (!list_is_empty(&tx->tx_callbacks)) 01171 txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks); 01172 01173 if (tx->tx_anyobj == FALSE) 01174 txg_rele_to_sync(&tx->tx_txgh); 01175 01176 list_destroy(&tx->tx_callbacks); 01177 list_destroy(&tx->tx_holds); 01178 #ifdef ZFS_DEBUG 01179 dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n", 01180 tx->tx_space_towrite, refcount_count(&tx->tx_space_written), 01181 tx->tx_space_tofree, refcount_count(&tx->tx_space_freed)); 01182 refcount_destroy_many(&tx->tx_space_written, 01183 refcount_count(&tx->tx_space_written)); 01184 refcount_destroy_many(&tx->tx_space_freed, 01185 refcount_count(&tx->tx_space_freed)); 01186 #endif 01187 kmem_free(tx, sizeof (dmu_tx_t)); 01188 } 01189 01190 void 01191 dmu_tx_abort(dmu_tx_t *tx) 01192 { 01193 dmu_tx_hold_t *txh; 01194 01195 ASSERT(tx->tx_txg == 0); 01196 01197 while (txh = list_head(&tx->tx_holds)) { 01198 dnode_t *dn = txh->txh_dnode; 01199 01200 list_remove(&tx->tx_holds, txh); 01201 kmem_free(txh, sizeof (dmu_tx_hold_t)); 01202 if (dn != NULL) 01203 dnode_rele(dn, tx); 01204 } 01205 01206 /* 01207 * Call any registered callbacks with an error code. 01208 */ 01209 if (!list_is_empty(&tx->tx_callbacks)) 01210 dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED); 01211 01212 list_destroy(&tx->tx_callbacks); 01213 list_destroy(&tx->tx_holds); 01214 #ifdef ZFS_DEBUG 01215 refcount_destroy_many(&tx->tx_space_written, 01216 refcount_count(&tx->tx_space_written)); 01217 refcount_destroy_many(&tx->tx_space_freed, 01218 refcount_count(&tx->tx_space_freed)); 01219 #endif 01220 kmem_free(tx, sizeof (dmu_tx_t)); 01221 } 01222 01223 uint64_t 01224 dmu_tx_get_txg(dmu_tx_t *tx) 01225 { 01226 ASSERT(tx->tx_txg != 0); 01227 return (tx->tx_txg); 01228 } 01229 01230 void 01231 dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) 01232 { 01233 dmu_tx_callback_t *dcb; 01234 01235 dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP); 01236 01237 dcb->dcb_func = func; 01238 dcb->dcb_data = data; 01239 01240 list_insert_tail(&tx->tx_callbacks, dcb); 01241 } 01242 01246 void 01247 dmu_tx_do_callbacks(list_t *cb_list, int error) 01248 { 01249 dmu_tx_callback_t *dcb; 01250 01251 while (dcb = list_head(cb_list)) { 01252 list_remove(cb_list, dcb); 01253 dcb->dcb_func(dcb->dcb_data, error); 01254 kmem_free(dcb, sizeof (dmu_tx_callback_t)); 01255 } 01256 } 01257 01263 static void 01264 dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx) 01265 { 01266 int i; 01267 01268 if (!sa->sa_need_attr_registration) 01269 return; 01270 01271 for (i = 0; i != sa->sa_num_attrs; i++) { 01272 if (!sa->sa_attr_table[i].sa_registered) { 01273 if (sa->sa_reg_attr_obj) 01274 dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj, 01275 B_TRUE, sa->sa_attr_table[i].sa_name); 01276 else 01277 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 01278 B_TRUE, sa->sa_attr_table[i].sa_name); 01279 } 01280 } 01281 } 01282 01283 01284 void 01285 dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) 01286 { 01287 dnode_t *dn; 01288 dmu_tx_hold_t *txh; 01289 01290 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, 01291 THT_SPILL, 0, 0); 01292 01293 dn = txh->txh_dnode; 01294 01295 if (dn == NULL) 01296 return; 01297 01298 /* If blkptr doesn't exist then add space to towrite */ 01299 if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) { 01300 txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 01301 } else { 01302 blkptr_t *bp; 01303 01304 bp = &dn->dn_phys->dn_spill; 01305 if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 01306 bp, bp->blk_birth)) 01307 txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; 01308 else 01309 txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 01310 if (!BP_IS_HOLE(bp)) 01311 txh->txh_space_tounref += SPA_MAXBLOCKSIZE; 01312 } 01313 } 01314 01324 void 01325 dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize) 01326 { 01327 sa_os_t *sa = tx->tx_objset->os_sa; 01328 01329 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 01330 01331 if (tx->tx_objset->os_sa->sa_master_obj == 0) 01332 return; 01333 01334 if (tx->tx_objset->os_sa->sa_layout_attr_obj) 01335 dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); 01336 else { 01337 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); 01338 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); 01339 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 01340 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 01341 } 01342 01343 dmu_tx_sa_registration_hold(sa, tx); 01344 01345 if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill) 01346 return; 01347 01348 (void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, 01349 THT_SPILL, 0, 0); 01350 } 01351 01361 void 01362 dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) 01363 { 01364 uint64_t object; 01365 sa_os_t *sa = tx->tx_objset->os_sa; 01366 01367 ASSERT(hdl != NULL); 01368 01369 object = sa_handle_object(hdl); 01370 01371 dmu_tx_hold_bonus(tx, object); 01372 01373 if (tx->tx_objset->os_sa->sa_master_obj == 0) 01374 return; 01375 01376 if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 || 01377 tx->tx_objset->os_sa->sa_layout_attr_obj == 0) { 01378 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); 01379 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); 01380 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 01381 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 01382 } 01383 01384 dmu_tx_sa_registration_hold(sa, tx); 01385 01386 if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj) 01387 dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); 01388 01389 if (sa->sa_force_spill || may_grow || hdl->sa_spill) { 01390 ASSERT(tx->tx_txg == 0); 01391 dmu_tx_hold_spill(tx, object); 01392 } else { 01393 dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; 01394 dnode_t *dn; 01395 01396 DB_DNODE_ENTER(db); 01397 dn = DB_DNODE(db); 01398 if (dn->dn_have_spill) { 01399 ASSERT(tx->tx_txg == 0); 01400 dmu_tx_hold_spill(tx, object); 01401 } 01402 DB_DNODE_EXIT(db); 01403 } 01404 }