FreeBSD ZFS
The Zettabyte File System
|
00001 /* 00002 * CDDL HEADER START 00003 * 00004 * The contents of this file are subject to the terms of the 00005 * Common Development and Distribution License (the "License"). 00006 * You may not use this file except in compliance with the License. 00007 * 00008 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 00009 * or http://www.opensolaris.org/os/licensing. 00010 * See the License for the specific language governing permissions 00011 * and limitations under the License. 00012 * 00013 * When distributing Covered Code, include this CDDL HEADER in each 00014 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 00015 * If applicable, add the following below this CDDL HEADER, with the 00016 * fields enclosed by brackets "[]" replaced with your own identifying 00017 * information: Portions Copyright [yyyy] [name of copyright owner] 00018 * 00019 * CDDL HEADER END 00020 */ 00021 /* 00022 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 00023 * Copyright (c) 2012 by Delphix. All rights reserved. 00024 */ 00025 00026 #include <sys/zio.h> 00027 #include <sys/spa.h> 00028 #include <sys/dmu.h> 00029 #include <sys/zfs_context.h> 00030 #include <sys/zap.h> 00031 #include <sys/refcount.h> 00032 #include <sys/zap_impl.h> 00033 #include <sys/zap_leaf.h> 00034 #include <sys/avl.h> 00035 #include <sys/arc.h> 00036 00037 #ifdef _KERNEL 00038 #include <sys/sunddi.h> 00039 #endif 00040 00041 static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); 00042 00043 uint64_t 00044 zap_getflags(zap_t *zap) 00045 { 00046 if (zap->zap_ismicro) 00047 return (0); 00048 return (zap->zap_u.zap_fat.zap_phys->zap_flags); 00049 } 00050 00051 int 00052 zap_hashbits(zap_t *zap) 00053 { 00054 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 00055 return (48); 00056 else 00057 return (28); 00058 } 00059 00060 uint32_t 00061 zap_maxcd(zap_t *zap) 00062 { 00063 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 00064 return ((1<<16)-1); 00065 else 00066 return (-1U); 00067 } 00068 00069 static uint64_t 00070 zap_hash(zap_name_t *zn) 00071 { 00072 zap_t *zap = zn->zn_zap; 00073 uint64_t h = 0; 00074 00075 if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { 00076 ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); 00077 h = *(uint64_t *)zn->zn_key_orig; 00078 } else { 00079 h = zap->zap_salt; 00080 ASSERT(h != 0); 00081 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 00082 00083 if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 00084 int i; 00085 const uint64_t *wp = zn->zn_key_norm; 00086 00087 ASSERT(zn->zn_key_intlen == 8); 00088 for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { 00089 int j; 00090 uint64_t word = *wp; 00091 00092 for (j = 0; j < zn->zn_key_intlen; j++) { 00093 h = (h >> 8) ^ 00094 zfs_crc64_table[(h ^ word) & 0xFF]; 00095 word >>= NBBY; 00096 } 00097 } 00098 } else { 00099 int i, len; 00100 const uint8_t *cp = zn->zn_key_norm; 00101 00102 /* 00103 * We previously stored the terminating null on 00104 * disk, but didn't hash it, so we need to 00105 * continue to not hash it. (The 00106 * zn_key_*_numints includes the terminating 00107 * null for non-binary keys.) 00108 */ 00109 len = zn->zn_key_norm_numints - 1; 00110 00111 ASSERT(zn->zn_key_intlen == 1); 00112 for (i = 0; i < len; cp++, i++) { 00113 h = (h >> 8) ^ 00114 zfs_crc64_table[(h ^ *cp) & 0xFF]; 00115 } 00116 } 00117 } 00118 /* 00119 * Don't use all 64 bits, since we need some in the cookie for 00120 * the collision differentiator. We MUST use the high bits, 00121 * since those are the ones that we first pay attention to when 00122 * chosing the bucket. 00123 */ 00124 h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); 00125 00126 return (h); 00127 } 00128 00129 static int 00130 zap_normalize(zap_t *zap, const char *name, char *namenorm) 00131 { 00132 size_t inlen, outlen; 00133 int err; 00134 00135 ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); 00136 00137 inlen = strlen(name) + 1; 00138 outlen = ZAP_MAXNAMELEN; 00139 00140 err = 0; 00141 (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, 00142 zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | 00143 U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); 00144 00145 return (err); 00146 } 00147 00148 boolean_t 00149 zap_match(zap_name_t *zn, const char *matchname) 00150 { 00151 ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); 00152 00153 if (zn->zn_matchtype == MT_FIRST) { 00154 char norm[ZAP_MAXNAMELEN]; 00155 00156 if (zap_normalize(zn->zn_zap, matchname, norm) != 0) 00157 return (B_FALSE); 00158 00159 return (strcmp(zn->zn_key_norm, norm) == 0); 00160 } else { 00161 /* MT_BEST or MT_EXACT */ 00162 return (strcmp(zn->zn_key_orig, matchname) == 0); 00163 } 00164 } 00165 00166 void 00167 zap_name_free(zap_name_t *zn) 00168 { 00169 kmem_free(zn, sizeof (zap_name_t)); 00170 } 00171 00172 zap_name_t * 00173 zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) 00174 { 00175 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 00176 00177 zn->zn_zap = zap; 00178 zn->zn_key_intlen = sizeof (*key); 00179 zn->zn_key_orig = key; 00180 zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; 00181 zn->zn_matchtype = mt; 00182 if (zap->zap_normflags) { 00183 if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { 00184 zap_name_free(zn); 00185 return (NULL); 00186 } 00187 zn->zn_key_norm = zn->zn_normbuf; 00188 zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; 00189 } else { 00190 if (mt != MT_EXACT) { 00191 zap_name_free(zn); 00192 return (NULL); 00193 } 00194 zn->zn_key_norm = zn->zn_key_orig; 00195 zn->zn_key_norm_numints = zn->zn_key_orig_numints; 00196 } 00197 00198 zn->zn_hash = zap_hash(zn); 00199 return (zn); 00200 } 00201 00202 zap_name_t * 00203 zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) 00204 { 00205 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 00206 00207 ASSERT(zap->zap_normflags == 0); 00208 zn->zn_zap = zap; 00209 zn->zn_key_intlen = sizeof (*key); 00210 zn->zn_key_orig = zn->zn_key_norm = key; 00211 zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; 00212 zn->zn_matchtype = MT_EXACT; 00213 00214 zn->zn_hash = zap_hash(zn); 00215 return (zn); 00216 } 00217 00218 static void 00219 mzap_byteswap(mzap_phys_t *buf, size_t size) 00220 { 00221 int i, max; 00222 buf->mz_block_type = BSWAP_64(buf->mz_block_type); 00223 buf->mz_salt = BSWAP_64(buf->mz_salt); 00224 buf->mz_normflags = BSWAP_64(buf->mz_normflags); 00225 max = (size / MZAP_ENT_LEN) - 1; 00226 for (i = 0; i < max; i++) { 00227 buf->mz_chunk[i].mze_value = 00228 BSWAP_64(buf->mz_chunk[i].mze_value); 00229 buf->mz_chunk[i].mze_cd = 00230 BSWAP_32(buf->mz_chunk[i].mze_cd); 00231 } 00232 } 00233 00234 void 00235 zap_byteswap(void *buf, size_t size) 00236 { 00237 uint64_t block_type; 00238 00239 block_type = *(uint64_t *)buf; 00240 00241 if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { 00242 /* ASSERT(magic == ZAP_LEAF_MAGIC); */ 00243 mzap_byteswap(buf, size); 00244 } else { 00245 fzap_byteswap(buf, size); 00246 } 00247 } 00248 00249 static int 00250 mze_compare(const void *arg1, const void *arg2) 00251 { 00252 const mzap_ent_t *mze1 = arg1; 00253 const mzap_ent_t *mze2 = arg2; 00254 00255 if (mze1->mze_hash > mze2->mze_hash) 00256 return (+1); 00257 if (mze1->mze_hash < mze2->mze_hash) 00258 return (-1); 00259 if (mze1->mze_cd > mze2->mze_cd) 00260 return (+1); 00261 if (mze1->mze_cd < mze2->mze_cd) 00262 return (-1); 00263 return (0); 00264 } 00265 00266 static int 00267 mze_insert(zap_t *zap, int chunkid, uint64_t hash) 00268 { 00269 mzap_ent_t *mze; 00270 avl_index_t idx; 00271 00272 ASSERT(zap->zap_ismicro); 00273 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 00274 00275 mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); 00276 mze->mze_chunkid = chunkid; 00277 mze->mze_hash = hash; 00278 mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; 00279 ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); 00280 if (avl_find(&zap->zap_m.zap_avl, mze, &idx) != NULL) { 00281 kmem_free(mze, sizeof (mzap_ent_t)); 00282 return (EEXIST); 00283 } 00284 avl_insert(&zap->zap_m.zap_avl, mze, idx); 00285 return (0); 00286 } 00287 00288 static mzap_ent_t * 00289 mze_find(zap_name_t *zn) 00290 { 00291 mzap_ent_t mze_tofind; 00292 mzap_ent_t *mze; 00293 avl_index_t idx; 00294 avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; 00295 00296 ASSERT(zn->zn_zap->zap_ismicro); 00297 ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); 00298 00299 mze_tofind.mze_hash = zn->zn_hash; 00300 mze_tofind.mze_cd = 0; 00301 00302 again: 00303 mze = avl_find(avl, &mze_tofind, &idx); 00304 if (mze == NULL) 00305 mze = avl_nearest(avl, idx, AVL_AFTER); 00306 for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { 00307 ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); 00308 if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) 00309 return (mze); 00310 } 00311 if (zn->zn_matchtype == MT_BEST) { 00312 zn->zn_matchtype = MT_FIRST; 00313 goto again; 00314 } 00315 return (NULL); 00316 } 00317 00318 static uint32_t 00319 mze_find_unused_cd(zap_t *zap, uint64_t hash) 00320 { 00321 mzap_ent_t mze_tofind; 00322 mzap_ent_t *mze; 00323 avl_index_t idx; 00324 avl_tree_t *avl = &zap->zap_m.zap_avl; 00325 uint32_t cd; 00326 00327 ASSERT(zap->zap_ismicro); 00328 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 00329 00330 mze_tofind.mze_hash = hash; 00331 mze_tofind.mze_cd = 0; 00332 00333 cd = 0; 00334 for (mze = avl_find(avl, &mze_tofind, &idx); 00335 mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 00336 if (mze->mze_cd != cd) 00337 break; 00338 cd++; 00339 } 00340 00341 return (cd); 00342 } 00343 00344 static void 00345 mze_remove(zap_t *zap, mzap_ent_t *mze) 00346 { 00347 ASSERT(zap->zap_ismicro); 00348 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 00349 00350 avl_remove(&zap->zap_m.zap_avl, mze); 00351 kmem_free(mze, sizeof (mzap_ent_t)); 00352 } 00353 00354 static void 00355 mze_destroy(zap_t *zap) 00356 { 00357 mzap_ent_t *mze; 00358 void *avlcookie = NULL; 00359 00360 while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)) 00361 kmem_free(mze, sizeof (mzap_ent_t)); 00362 avl_destroy(&zap->zap_m.zap_avl); 00363 } 00364 00365 static zap_t * 00366 mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) 00367 { 00368 zap_t *winner; 00369 zap_t *zap; 00370 int i; 00371 00372 ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); 00373 00374 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); 00375 rw_init(&zap->zap_rwlock, 0, 0, 0); 00376 rw_enter(&zap->zap_rwlock, RW_WRITER); 00377 zap->zap_objset = os; 00378 zap->zap_object = obj; 00379 zap->zap_dbuf = db; 00380 00381 if (*(uint64_t *)db->db_data != ZBT_MICRO) { 00382 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 00383 zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; 00384 } else { 00385 zap->zap_ismicro = TRUE; 00386 } 00387 00388 /* 00389 * Make sure that zap_ismicro is set before we let others see 00390 * it, because zap_lockdir() checks zap_ismicro without the lock 00391 * held. 00392 */ 00393 winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); 00394 00395 if (winner != NULL) { 00396 rw_exit(&zap->zap_rwlock); 00397 rw_destroy(&zap->zap_rwlock); 00398 if (!zap->zap_ismicro) 00399 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 00400 kmem_free(zap, sizeof (zap_t)); 00401 return (winner); 00402 } 00403 00404 if (zap->zap_ismicro) { 00405 zap->zap_salt = zap->zap_m.zap_phys->mz_salt; 00406 zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; 00407 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; 00408 avl_create(&zap->zap_m.zap_avl, mze_compare, 00409 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); 00410 00411 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 00412 mzap_ent_phys_t *mze = 00413 &zap->zap_m.zap_phys->mz_chunk[i]; 00414 if (mze->mze_name[0]) { 00415 zap_name_t *zn; 00416 00417 zn = zap_name_alloc(zap, mze->mze_name, 00418 MT_EXACT); 00419 if (mze_insert(zap, i, zn->zn_hash) == 0) 00420 zap->zap_m.zap_num_entries++; 00421 else { 00422 printf("ZFS WARNING: Duplicated ZAP " 00423 "entry detected (%s).\n", 00424 mze->mze_name); 00425 } 00426 zap_name_free(zn); 00427 } 00428 } 00429 } else { 00430 zap->zap_salt = zap->zap_f.zap_phys->zap_salt; 00431 zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; 00432 00433 ASSERT3U(sizeof (struct zap_leaf_header), ==, 00434 2*ZAP_LEAF_CHUNKSIZE); 00435 00436 /* 00437 * The embedded pointer table should not overlap the 00438 * other members. 00439 */ 00440 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, 00441 &zap->zap_f.zap_phys->zap_salt); 00442 00443 /* 00444 * The embedded pointer table should end at the end of 00445 * the block 00446 */ 00447 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 00448 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - 00449 (uintptr_t)zap->zap_f.zap_phys, ==, 00450 zap->zap_dbuf->db_size); 00451 } 00452 rw_exit(&zap->zap_rwlock); 00453 return (zap); 00454 } 00455 00456 int 00457 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, 00458 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) 00459 { 00460 zap_t *zap; 00461 dmu_buf_t *db; 00462 krw_t lt; 00463 int err; 00464 00465 *zapp = NULL; 00466 00467 err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); 00468 if (err) 00469 return (err); 00470 00471 #ifdef ZFS_DEBUG 00472 { 00473 dmu_object_info_t doi; 00474 dmu_object_info_from_db(db, &doi); 00475 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 00476 } 00477 #endif 00478 00479 zap = dmu_buf_get_user(db); 00480 if (zap == NULL) 00481 zap = mzap_open(os, obj, db); 00482 00483 /* 00484 * We're checking zap_ismicro without the lock held, in order to 00485 * tell what type of lock we want. Once we have some sort of 00486 * lock, see if it really is the right type. In practice this 00487 * can only be different if it was upgraded from micro to fat, 00488 * and micro wanted WRITER but fat only needs READER. 00489 */ 00490 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; 00491 rw_enter(&zap->zap_rwlock, lt); 00492 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { 00493 /* it was upgraded, now we only need reader */ 00494 ASSERT(lt == RW_WRITER); 00495 ASSERT(RW_READER == 00496 (!zap->zap_ismicro && fatreader) ? RW_READER : lti); 00497 rw_downgrade(&zap->zap_rwlock); 00498 lt = RW_READER; 00499 } 00500 00501 zap->zap_objset = os; 00502 00503 if (lt == RW_WRITER) 00504 dmu_buf_will_dirty(db, tx); 00505 00506 ASSERT3P(zap->zap_dbuf, ==, db); 00507 00508 ASSERT(!zap->zap_ismicro || 00509 zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); 00510 if (zap->zap_ismicro && tx && adding && 00511 zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { 00512 uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; 00513 if (newsz > MZAP_MAX_BLKSZ) { 00514 dprintf("upgrading obj %llu: num_entries=%u\n", 00515 obj, zap->zap_m.zap_num_entries); 00516 *zapp = zap; 00517 return (mzap_upgrade(zapp, tx, 0)); 00518 } 00519 err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); 00520 ASSERT0(err); 00521 zap->zap_m.zap_num_chunks = 00522 db->db_size / MZAP_ENT_LEN - 1; 00523 } 00524 00525 *zapp = zap; 00526 return (0); 00527 } 00528 00529 void 00530 zap_unlockdir(zap_t *zap) 00531 { 00532 rw_exit(&zap->zap_rwlock); 00533 dmu_buf_rele(zap->zap_dbuf, NULL); 00534 } 00535 00536 static int 00537 mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) 00538 { 00539 mzap_phys_t *mzp; 00540 int i, sz, nchunks; 00541 int err = 0; 00542 zap_t *zap = *zapp; 00543 00544 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 00545 00546 sz = zap->zap_dbuf->db_size; 00547 mzp = kmem_alloc(sz, KM_SLEEP); 00548 bcopy(zap->zap_dbuf->db_data, mzp, sz); 00549 nchunks = zap->zap_m.zap_num_chunks; 00550 00551 if (!flags) { 00552 err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 00553 1ULL << fzap_default_block_shift, 0, tx); 00554 if (err) { 00555 kmem_free(mzp, sz); 00556 return (err); 00557 } 00558 } 00559 00560 dprintf("upgrading obj=%llu with %u chunks\n", 00561 zap->zap_object, nchunks); 00562 /* XXX destroy the avl later, so we can use the stored hash value */ 00563 mze_destroy(zap); 00564 00565 fzap_upgrade(zap, tx, flags); 00566 00567 for (i = 0; i < nchunks; i++) { 00568 mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; 00569 zap_name_t *zn; 00570 if (mze->mze_name[0] == 0) 00571 continue; 00572 dprintf("adding %s=%llu\n", 00573 mze->mze_name, mze->mze_value); 00574 zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); 00575 err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); 00576 zap = zn->zn_zap; /* fzap_add_cd() may change zap */ 00577 zap_name_free(zn); 00578 if (err) 00579 break; 00580 } 00581 kmem_free(mzp, sz); 00582 *zapp = zap; 00583 return (err); 00584 } 00585 00586 static void 00587 mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, 00588 dmu_tx_t *tx) 00589 { 00590 dmu_buf_t *db; 00591 mzap_phys_t *zp; 00592 00593 VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); 00594 00595 #ifdef ZFS_DEBUG 00596 { 00597 dmu_object_info_t doi; 00598 dmu_object_info_from_db(db, &doi); 00599 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 00600 } 00601 #endif 00602 00603 dmu_buf_will_dirty(db, tx); 00604 zp = db->db_data; 00605 zp->mz_block_type = ZBT_MICRO; 00606 zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; 00607 zp->mz_normflags = normflags; 00608 dmu_buf_rele(db, FTAG); 00609 00610 if (flags != 0) { 00611 zap_t *zap; 00612 /* Only fat zap supports flags; upgrade immediately. */ 00613 VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, 00614 B_FALSE, B_FALSE, &zap)); 00615 VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); 00616 zap_unlockdir(zap); 00617 } 00618 } 00619 00620 int 00621 zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, 00622 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 00623 { 00624 return (zap_create_claim_norm(os, obj, 00625 0, ot, bonustype, bonuslen, tx)); 00626 } 00627 00628 int 00629 zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, 00630 dmu_object_type_t ot, 00631 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 00632 { 00633 int err; 00634 00635 err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); 00636 if (err != 0) 00637 return (err); 00638 mzap_create_impl(os, obj, normflags, 0, tx); 00639 return (0); 00640 } 00641 00642 uint64_t 00643 zap_create(objset_t *os, dmu_object_type_t ot, 00644 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 00645 { 00646 return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); 00647 } 00648 00649 uint64_t 00650 zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, 00651 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 00652 { 00653 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 00654 00655 mzap_create_impl(os, obj, normflags, 0, tx); 00656 return (obj); 00657 } 00658 00659 uint64_t 00660 zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, 00661 dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 00662 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 00663 { 00664 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 00665 00666 ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && 00667 leaf_blockshift <= SPA_MAXBLOCKSHIFT && 00668 indirect_blockshift >= SPA_MINBLOCKSHIFT && 00669 indirect_blockshift <= SPA_MAXBLOCKSHIFT); 00670 00671 VERIFY(dmu_object_set_blocksize(os, obj, 00672 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); 00673 00674 mzap_create_impl(os, obj, normflags, flags, tx); 00675 return (obj); 00676 } 00677 00678 int 00679 zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) 00680 { 00681 /* 00682 * dmu_object_free will free the object number and free the 00683 * data. Freeing the data will cause our pageout function to be 00684 * called, which will destroy our data (zap_leaf_t's and zap_t). 00685 */ 00686 00687 return (dmu_object_free(os, zapobj, tx)); 00688 } 00689 00690 _NOTE(ARGSUSED(0)) 00691 void 00692 zap_evict(dmu_buf_t *db, void *vzap) 00693 { 00694 zap_t *zap = vzap; 00695 00696 rw_destroy(&zap->zap_rwlock); 00697 00698 if (zap->zap_ismicro) 00699 mze_destroy(zap); 00700 else 00701 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 00702 00703 kmem_free(zap, sizeof (zap_t)); 00704 } 00705 00706 int 00707 zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) 00708 { 00709 zap_t *zap; 00710 int err; 00711 00712 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 00713 if (err) 00714 return (err); 00715 if (!zap->zap_ismicro) { 00716 err = fzap_count(zap, count); 00717 } else { 00718 *count = zap->zap_m.zap_num_entries; 00719 } 00720 zap_unlockdir(zap); 00721 return (err); 00722 } 00723 00728 static boolean_t 00729 mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) 00730 { 00731 mzap_ent_t *other; 00732 int direction = AVL_BEFORE; 00733 boolean_t allocdzn = B_FALSE; 00734 00735 if (zap->zap_normflags == 0) 00736 return (B_FALSE); 00737 00738 again: 00739 for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); 00740 other && other->mze_hash == mze->mze_hash; 00741 other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { 00742 00743 if (zn == NULL) { 00744 zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, 00745 MT_FIRST); 00746 allocdzn = B_TRUE; 00747 } 00748 if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { 00749 if (allocdzn) 00750 zap_name_free(zn); 00751 return (B_TRUE); 00752 } 00753 } 00754 00755 if (direction == AVL_BEFORE) { 00756 direction = AVL_AFTER; 00757 goto again; 00758 } 00759 00760 if (allocdzn) 00761 zap_name_free(zn); 00762 return (B_FALSE); 00763 } 00764 00769 int 00770 zap_lookup(objset_t *os, uint64_t zapobj, const char *name, 00771 uint64_t integer_size, uint64_t num_integers, void *buf) 00772 { 00773 return (zap_lookup_norm(os, zapobj, name, integer_size, 00774 num_integers, buf, MT_EXACT, NULL, 0, NULL)); 00775 } 00776 00777 int 00778 zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, 00779 uint64_t integer_size, uint64_t num_integers, void *buf, 00780 matchtype_t mt, char *realname, int rn_len, 00781 boolean_t *ncp) 00782 { 00783 zap_t *zap; 00784 int err; 00785 mzap_ent_t *mze; 00786 zap_name_t *zn; 00787 00788 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 00789 if (err) 00790 return (err); 00791 zn = zap_name_alloc(zap, name, mt); 00792 if (zn == NULL) { 00793 zap_unlockdir(zap); 00794 return (ENOTSUP); 00795 } 00796 00797 if (!zap->zap_ismicro) { 00798 err = fzap_lookup(zn, integer_size, num_integers, buf, 00799 realname, rn_len, ncp); 00800 } else { 00801 mze = mze_find(zn); 00802 if (mze == NULL) { 00803 err = ENOENT; 00804 } else { 00805 if (num_integers < 1) { 00806 err = EOVERFLOW; 00807 } else if (integer_size != 8) { 00808 err = EINVAL; 00809 } else { 00810 *(uint64_t *)buf = 00811 MZE_PHYS(zap, mze)->mze_value; 00812 (void) strlcpy(realname, 00813 MZE_PHYS(zap, mze)->mze_name, rn_len); 00814 if (ncp) { 00815 *ncp = mzap_normalization_conflict(zap, 00816 zn, mze); 00817 } 00818 } 00819 } 00820 } 00821 zap_name_free(zn); 00822 zap_unlockdir(zap); 00823 return (err); 00824 } 00825 00826 int 00827 zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 00828 int key_numints) 00829 { 00830 zap_t *zap; 00831 int err; 00832 zap_name_t *zn; 00833 00834 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 00835 if (err) 00836 return (err); 00837 zn = zap_name_alloc_uint64(zap, key, key_numints); 00838 if (zn == NULL) { 00839 zap_unlockdir(zap); 00840 return (ENOTSUP); 00841 } 00842 00843 fzap_prefetch(zn); 00844 zap_name_free(zn); 00845 zap_unlockdir(zap); 00846 return (err); 00847 } 00848 00849 int 00850 zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 00851 int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) 00852 { 00853 zap_t *zap; 00854 int err; 00855 zap_name_t *zn; 00856 00857 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 00858 if (err) 00859 return (err); 00860 zn = zap_name_alloc_uint64(zap, key, key_numints); 00861 if (zn == NULL) { 00862 zap_unlockdir(zap); 00863 return (ENOTSUP); 00864 } 00865 00866 err = fzap_lookup(zn, integer_size, num_integers, buf, 00867 NULL, 0, NULL); 00868 zap_name_free(zn); 00869 zap_unlockdir(zap); 00870 return (err); 00871 } 00872 00873 int 00874 zap_contains(objset_t *os, uint64_t zapobj, const char *name) 00875 { 00876 int err = (zap_lookup_norm(os, zapobj, name, 0, 00877 0, NULL, MT_EXACT, NULL, 0, NULL)); 00878 if (err == EOVERFLOW || err == EINVAL) 00879 err = 0; /* found, but skipped reading the value */ 00880 return (err); 00881 } 00882 00883 int 00884 zap_length(objset_t *os, uint64_t zapobj, const char *name, 00885 uint64_t *integer_size, uint64_t *num_integers) 00886 { 00887 zap_t *zap; 00888 int err; 00889 mzap_ent_t *mze; 00890 zap_name_t *zn; 00891 00892 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 00893 if (err) 00894 return (err); 00895 zn = zap_name_alloc(zap, name, MT_EXACT); 00896 if (zn == NULL) { 00897 zap_unlockdir(zap); 00898 return (ENOTSUP); 00899 } 00900 if (!zap->zap_ismicro) { 00901 err = fzap_length(zn, integer_size, num_integers); 00902 } else { 00903 mze = mze_find(zn); 00904 if (mze == NULL) { 00905 err = ENOENT; 00906 } else { 00907 if (integer_size) 00908 *integer_size = 8; 00909 if (num_integers) 00910 *num_integers = 1; 00911 } 00912 } 00913 zap_name_free(zn); 00914 zap_unlockdir(zap); 00915 return (err); 00916 } 00917 00918 int 00919 zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 00920 int key_numints, uint64_t *integer_size, uint64_t *num_integers) 00921 { 00922 zap_t *zap; 00923 int err; 00924 zap_name_t *zn; 00925 00926 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 00927 if (err) 00928 return (err); 00929 zn = zap_name_alloc_uint64(zap, key, key_numints); 00930 if (zn == NULL) { 00931 zap_unlockdir(zap); 00932 return (ENOTSUP); 00933 } 00934 err = fzap_length(zn, integer_size, num_integers); 00935 zap_name_free(zn); 00936 zap_unlockdir(zap); 00937 return (err); 00938 } 00939 00940 static void 00941 mzap_addent(zap_name_t *zn, uint64_t value) 00942 { 00943 int i; 00944 zap_t *zap = zn->zn_zap; 00945 int start = zap->zap_m.zap_alloc_next; 00946 uint32_t cd; 00947 00948 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 00949 00950 #ifdef ZFS_DEBUG 00951 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 00952 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 00953 ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); 00954 } 00955 #endif 00956 00957 cd = mze_find_unused_cd(zap, zn->zn_hash); 00958 /* given the limited size of the microzap, this can't happen */ 00959 ASSERT(cd < zap_maxcd(zap)); 00960 00961 again: 00962 for (i = start; i < zap->zap_m.zap_num_chunks; i++) { 00963 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 00964 if (mze->mze_name[0] == 0) { 00965 mze->mze_value = value; 00966 mze->mze_cd = cd; 00967 (void) strcpy(mze->mze_name, zn->zn_key_orig); 00968 zap->zap_m.zap_num_entries++; 00969 zap->zap_m.zap_alloc_next = i+1; 00970 if (zap->zap_m.zap_alloc_next == 00971 zap->zap_m.zap_num_chunks) 00972 zap->zap_m.zap_alloc_next = 0; 00973 VERIFY(0 == mze_insert(zap, i, zn->zn_hash)); 00974 return; 00975 } 00976 } 00977 if (start != 0) { 00978 start = 0; 00979 goto again; 00980 } 00981 ASSERT(!"out of entries!"); 00982 } 00983 00984 int 00985 zap_add(objset_t *os, uint64_t zapobj, const char *key, 00986 int integer_size, uint64_t num_integers, 00987 const void *val, dmu_tx_t *tx) 00988 { 00989 zap_t *zap; 00990 int err; 00991 mzap_ent_t *mze; 00992 const uint64_t *intval = val; 00993 zap_name_t *zn; 00994 00995 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 00996 if (err) 00997 return (err); 00998 zn = zap_name_alloc(zap, key, MT_EXACT); 00999 if (zn == NULL) { 01000 zap_unlockdir(zap); 01001 return (ENOTSUP); 01002 } 01003 if (!zap->zap_ismicro) { 01004 err = fzap_add(zn, integer_size, num_integers, val, tx); 01005 zap = zn->zn_zap; /* fzap_add() may change zap */ 01006 } else if (integer_size != 8 || num_integers != 1 || 01007 strlen(key) >= MZAP_NAME_LEN) { 01008 err = mzap_upgrade(&zn->zn_zap, tx, 0); 01009 if (err == 0) 01010 err = fzap_add(zn, integer_size, num_integers, val, tx); 01011 zap = zn->zn_zap; /* fzap_add() may change zap */ 01012 } else { 01013 mze = mze_find(zn); 01014 if (mze != NULL) { 01015 err = EEXIST; 01016 } else { 01017 mzap_addent(zn, *intval); 01018 } 01019 } 01020 ASSERT(zap == zn->zn_zap); 01021 zap_name_free(zn); 01022 if (zap != NULL) /* may be NULL if fzap_add() failed */ 01023 zap_unlockdir(zap); 01024 return (err); 01025 } 01026 01027 int 01028 zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 01029 int key_numints, int integer_size, uint64_t num_integers, 01030 const void *val, dmu_tx_t *tx) 01031 { 01032 zap_t *zap; 01033 int err; 01034 zap_name_t *zn; 01035 01036 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 01037 if (err) 01038 return (err); 01039 zn = zap_name_alloc_uint64(zap, key, key_numints); 01040 if (zn == NULL) { 01041 zap_unlockdir(zap); 01042 return (ENOTSUP); 01043 } 01044 err = fzap_add(zn, integer_size, num_integers, val, tx); 01045 zap = zn->zn_zap; /* fzap_add() may change zap */ 01046 zap_name_free(zn); 01047 if (zap != NULL) /* may be NULL if fzap_add() failed */ 01048 zap_unlockdir(zap); 01049 return (err); 01050 } 01051 01052 int 01053 zap_update(objset_t *os, uint64_t zapobj, const char *name, 01054 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 01055 { 01056 zap_t *zap; 01057 mzap_ent_t *mze; 01058 uint64_t oldval; 01059 const uint64_t *intval = val; 01060 zap_name_t *zn; 01061 int err; 01062 01063 #ifdef ZFS_DEBUG 01064 /* 01065 * If there is an old value, it shouldn't change across the 01066 * lockdir (eg, due to bprewrite's xlation). 01067 */ 01068 if (integer_size == 8 && num_integers == 1) 01069 (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); 01070 #endif 01071 01072 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 01073 if (err) 01074 return (err); 01075 zn = zap_name_alloc(zap, name, MT_EXACT); 01076 if (zn == NULL) { 01077 zap_unlockdir(zap); 01078 return (ENOTSUP); 01079 } 01080 if (!zap->zap_ismicro) { 01081 err = fzap_update(zn, integer_size, num_integers, val, tx); 01082 zap = zn->zn_zap; /* fzap_update() may change zap */ 01083 } else if (integer_size != 8 || num_integers != 1 || 01084 strlen(name) >= MZAP_NAME_LEN) { 01085 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 01086 zapobj, integer_size, num_integers, name); 01087 err = mzap_upgrade(&zn->zn_zap, tx, 0); 01088 if (err == 0) 01089 err = fzap_update(zn, integer_size, num_integers, 01090 val, tx); 01091 zap = zn->zn_zap; /* fzap_update() may change zap */ 01092 } else { 01093 mze = mze_find(zn); 01094 if (mze != NULL) { 01095 ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); 01096 MZE_PHYS(zap, mze)->mze_value = *intval; 01097 } else { 01098 mzap_addent(zn, *intval); 01099 } 01100 } 01101 ASSERT(zap == zn->zn_zap); 01102 zap_name_free(zn); 01103 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 01104 zap_unlockdir(zap); 01105 return (err); 01106 } 01107 01108 int 01109 zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 01110 int key_numints, 01111 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 01112 { 01113 zap_t *zap; 01114 zap_name_t *zn; 01115 int err; 01116 01117 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 01118 if (err) 01119 return (err); 01120 zn = zap_name_alloc_uint64(zap, key, key_numints); 01121 if (zn == NULL) { 01122 zap_unlockdir(zap); 01123 return (ENOTSUP); 01124 } 01125 err = fzap_update(zn, integer_size, num_integers, val, tx); 01126 zap = zn->zn_zap; /* fzap_update() may change zap */ 01127 zap_name_free(zn); 01128 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 01129 zap_unlockdir(zap); 01130 return (err); 01131 } 01132 01133 int 01134 zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) 01135 { 01136 return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); 01137 } 01138 01139 int 01140 zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, 01141 matchtype_t mt, dmu_tx_t *tx) 01142 { 01143 zap_t *zap; 01144 int err; 01145 mzap_ent_t *mze; 01146 zap_name_t *zn; 01147 01148 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 01149 if (err) 01150 return (err); 01151 zn = zap_name_alloc(zap, name, mt); 01152 if (zn == NULL) { 01153 zap_unlockdir(zap); 01154 return (ENOTSUP); 01155 } 01156 if (!zap->zap_ismicro) { 01157 err = fzap_remove(zn, tx); 01158 } else { 01159 mze = mze_find(zn); 01160 if (mze == NULL) { 01161 err = ENOENT; 01162 } else { 01163 zap->zap_m.zap_num_entries--; 01164 bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], 01165 sizeof (mzap_ent_phys_t)); 01166 mze_remove(zap, mze); 01167 } 01168 } 01169 zap_name_free(zn); 01170 zap_unlockdir(zap); 01171 return (err); 01172 } 01173 01174 int 01175 zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 01176 int key_numints, dmu_tx_t *tx) 01177 { 01178 zap_t *zap; 01179 int err; 01180 zap_name_t *zn; 01181 01182 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 01183 if (err) 01184 return (err); 01185 zn = zap_name_alloc_uint64(zap, key, key_numints); 01186 if (zn == NULL) { 01187 zap_unlockdir(zap); 01188 return (ENOTSUP); 01189 } 01190 err = fzap_remove(zn, tx); 01191 zap_name_free(zn); 01192 zap_unlockdir(zap); 01193 return (err); 01194 } 01195 01196 /* 01197 * Routines for iterating over the attributes. 01198 */ 01199 01200 void 01201 zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 01202 uint64_t serialized) 01203 { 01204 zc->zc_objset = os; 01205 zc->zc_zap = NULL; 01206 zc->zc_leaf = NULL; 01207 zc->zc_zapobj = zapobj; 01208 zc->zc_serialized = serialized; 01209 zc->zc_hash = 0; 01210 zc->zc_cd = 0; 01211 } 01212 01213 void 01214 zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 01215 { 01216 zap_cursor_init_serialized(zc, os, zapobj, 0); 01217 } 01218 01219 void 01220 zap_cursor_fini(zap_cursor_t *zc) 01221 { 01222 if (zc->zc_zap) { 01223 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 01224 zap_unlockdir(zc->zc_zap); 01225 zc->zc_zap = NULL; 01226 } 01227 if (zc->zc_leaf) { 01228 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 01229 zap_put_leaf(zc->zc_leaf); 01230 zc->zc_leaf = NULL; 01231 } 01232 zc->zc_objset = NULL; 01233 } 01234 01235 uint64_t 01236 zap_cursor_serialize(zap_cursor_t *zc) 01237 { 01238 if (zc->zc_hash == -1ULL) 01239 return (-1ULL); 01240 if (zc->zc_zap == NULL) 01241 return (zc->zc_serialized); 01242 ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); 01243 ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); 01244 01245 /* 01246 * We want to keep the high 32 bits of the cursor zero if we can, so 01247 * that 32-bit programs can access this. So usually use a small 01248 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits 01249 * of the cursor. 01250 * 01251 * [ collision differentiator | zap_hashbits()-bit hash value ] 01252 */ 01253 return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | 01254 ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); 01255 } 01256 01257 int 01258 zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) 01259 { 01260 int err; 01261 avl_index_t idx; 01262 mzap_ent_t mze_tofind; 01263 mzap_ent_t *mze; 01264 01265 if (zc->zc_hash == -1ULL) 01266 return (ENOENT); 01267 01268 if (zc->zc_zap == NULL) { 01269 int hb; 01270 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 01271 RW_READER, TRUE, FALSE, &zc->zc_zap); 01272 if (err) 01273 return (err); 01274 01275 /* 01276 * To support zap_cursor_init_serialized, advance, retrieve, 01277 * we must add to the existing zc_cd, which may already 01278 * be 1 due to the zap_cursor_advance. 01279 */ 01280 ASSERT(zc->zc_hash == 0); 01281 hb = zap_hashbits(zc->zc_zap); 01282 zc->zc_hash = zc->zc_serialized << (64 - hb); 01283 zc->zc_cd += zc->zc_serialized >> hb; 01284 if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ 01285 zc->zc_cd = 0; 01286 } else { 01287 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 01288 } 01289 if (!zc->zc_zap->zap_ismicro) { 01290 err = fzap_cursor_retrieve(zc->zc_zap, zc, za); 01291 } else { 01292 err = ENOENT; 01293 01294 mze_tofind.mze_hash = zc->zc_hash; 01295 mze_tofind.mze_cd = zc->zc_cd; 01296 01297 mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); 01298 if (mze == NULL) { 01299 mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, 01300 idx, AVL_AFTER); 01301 } 01302 if (mze) { 01303 mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); 01304 ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); 01305 za->za_normalization_conflict = 01306 mzap_normalization_conflict(zc->zc_zap, NULL, mze); 01307 za->za_integer_length = 8; 01308 za->za_num_integers = 1; 01309 za->za_first_integer = mzep->mze_value; 01310 (void) strcpy(za->za_name, mzep->mze_name); 01311 zc->zc_hash = mze->mze_hash; 01312 zc->zc_cd = mze->mze_cd; 01313 err = 0; 01314 } else { 01315 zc->zc_hash = -1ULL; 01316 } 01317 } 01318 rw_exit(&zc->zc_zap->zap_rwlock); 01319 return (err); 01320 } 01321 01322 void 01323 zap_cursor_advance(zap_cursor_t *zc) 01324 { 01325 if (zc->zc_hash == -1ULL) 01326 return; 01327 zc->zc_cd++; 01328 } 01329 01330 int 01331 zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) 01332 { 01333 int err = 0; 01334 mzap_ent_t *mze; 01335 zap_name_t *zn; 01336 01337 if (zc->zc_zap == NULL) { 01338 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 01339 RW_READER, TRUE, FALSE, &zc->zc_zap); 01340 if (err) 01341 return (err); 01342 } else { 01343 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 01344 } 01345 01346 zn = zap_name_alloc(zc->zc_zap, name, mt); 01347 if (zn == NULL) { 01348 rw_exit(&zc->zc_zap->zap_rwlock); 01349 return (ENOTSUP); 01350 } 01351 01352 if (!zc->zc_zap->zap_ismicro) { 01353 err = fzap_cursor_move_to_key(zc, zn); 01354 } else { 01355 mze = mze_find(zn); 01356 if (mze == NULL) { 01357 err = ENOENT; 01358 goto out; 01359 } 01360 zc->zc_hash = mze->mze_hash; 01361 zc->zc_cd = mze->mze_cd; 01362 } 01363 01364 out: 01365 zap_name_free(zn); 01366 rw_exit(&zc->zc_zap->zap_rwlock); 01367 return (err); 01368 } 01369 01370 int 01371 zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) 01372 { 01373 int err; 01374 zap_t *zap; 01375 01376 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 01377 if (err) 01378 return (err); 01379 01380 bzero(zs, sizeof (zap_stats_t)); 01381 01382 if (zap->zap_ismicro) { 01383 zs->zs_blocksize = zap->zap_dbuf->db_size; 01384 zs->zs_num_entries = zap->zap_m.zap_num_entries; 01385 zs->zs_num_blocks = 1; 01386 } else { 01387 fzap_get_stats(zap, zs); 01388 } 01389 zap_unlockdir(zap); 01390 return (0); 01391 } 01392 01393 int 01394 zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, 01395 uint64_t *towrite, uint64_t *tooverwrite) 01396 { 01397 zap_t *zap; 01398 int err = 0; 01399 01400 01401 /* 01402 * Since, we don't have a name, we cannot figure out which blocks will 01403 * be affected in this operation. So, account for the worst case : 01404 * - 3 blocks overwritten: target leaf, ptrtbl block, header block 01405 * - 4 new blocks written if adding: 01406 * - 2 blocks for possibly split leaves, 01407 * - 2 grown ptrtbl blocks 01408 * 01409 * This also accomodates the case where an add operation to a fairly 01410 * large microzap results in a promotion to fatzap. 01411 */ 01412 if (name == NULL) { 01413 *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; 01414 return (err); 01415 } 01416 01417 /* 01418 * We lock the zap with adding == FALSE. Because, if we pass 01419 * the actual value of add, it could trigger a mzap_upgrade(). 01420 * At present we are just evaluating the possibility of this operation 01421 * and hence we donot want to trigger an upgrade. 01422 */ 01423 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 01424 if (err) 01425 return (err); 01426 01427 if (!zap->zap_ismicro) { 01428 zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); 01429 if (zn) { 01430 err = fzap_count_write(zn, add, towrite, 01431 tooverwrite); 01432 zap_name_free(zn); 01433 } else { 01434 /* 01435 * We treat this case as similar to (name == NULL) 01436 */ 01437 *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; 01438 } 01439 } else { 01440 /* 01441 * We are here if (name != NULL) and this is a micro-zap. 01442 * We account for the header block depending on whether it 01443 * is freeable. 01444 * 01445 * Incase of an add-operation it is hard to find out 01446 * if this add will promote this microzap to fatzap. 01447 * Hence, we consider the worst case and account for the 01448 * blocks assuming this microzap would be promoted to a 01449 * fatzap. 01450 * 01451 * 1 block overwritten : header block 01452 * 4 new blocks written : 2 new split leaf, 2 grown 01453 * ptrtbl blocks 01454 */ 01455 if (dmu_buf_freeable(zap->zap_dbuf)) 01456 *tooverwrite += SPA_MAXBLOCKSIZE; 01457 else 01458 *towrite += SPA_MAXBLOCKSIZE; 01459 01460 if (add) { 01461 *towrite += 4 * SPA_MAXBLOCKSIZE; 01462 } 01463 } 01464 01465 zap_unlockdir(zap); 01466 return (err); 01467 }