FreeBSD ZFS
The Zettabyte File System

zap_micro.c

Go to the documentation of this file.
00001 /*
00002  * CDDL HEADER START
00003  *
00004  * The contents of this file are subject to the terms of the
00005  * Common Development and Distribution License (the "License").
00006  * You may not use this file except in compliance with the License.
00007  *
00008  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
00009  * or http://www.opensolaris.org/os/licensing.
00010  * See the License for the specific language governing permissions
00011  * and limitations under the License.
00012  *
00013  * When distributing Covered Code, include this CDDL HEADER in each
00014  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
00015  * If applicable, add the following below this CDDL HEADER, with the
00016  * fields enclosed by brackets "[]" replaced with your own identifying
00017  * information: Portions Copyright [yyyy] [name of copyright owner]
00018  *
00019  * CDDL HEADER END
00020  */
00021 /*
00022  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
00023  * Copyright (c) 2012 by Delphix. All rights reserved.
00024  */
00025 
00026 #include <sys/zio.h>
00027 #include <sys/spa.h>
00028 #include <sys/dmu.h>
00029 #include <sys/zfs_context.h>
00030 #include <sys/zap.h>
00031 #include <sys/refcount.h>
00032 #include <sys/zap_impl.h>
00033 #include <sys/zap_leaf.h>
00034 #include <sys/avl.h>
00035 #include <sys/arc.h>
00036 
00037 #ifdef _KERNEL
00038 #include <sys/sunddi.h>
00039 #endif
00040 
00041 static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
00042 
00043 uint64_t
00044 zap_getflags(zap_t *zap)
00045 {
00046         if (zap->zap_ismicro)
00047                 return (0);
00048         return (zap->zap_u.zap_fat.zap_phys->zap_flags);
00049 }
00050 
00051 int
00052 zap_hashbits(zap_t *zap)
00053 {
00054         if (zap_getflags(zap) & ZAP_FLAG_HASH64)
00055                 return (48);
00056         else
00057                 return (28);
00058 }
00059 
00060 uint32_t
00061 zap_maxcd(zap_t *zap)
00062 {
00063         if (zap_getflags(zap) & ZAP_FLAG_HASH64)
00064                 return ((1<<16)-1);
00065         else
00066                 return (-1U);
00067 }
00068 
00069 static uint64_t
00070 zap_hash(zap_name_t *zn)
00071 {
00072         zap_t *zap = zn->zn_zap;
00073         uint64_t h = 0;
00074 
00075         if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
00076                 ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
00077                 h = *(uint64_t *)zn->zn_key_orig;
00078         } else {
00079                 h = zap->zap_salt;
00080                 ASSERT(h != 0);
00081                 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
00082 
00083                 if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
00084                         int i;
00085                         const uint64_t *wp = zn->zn_key_norm;
00086 
00087                         ASSERT(zn->zn_key_intlen == 8);
00088                         for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) {
00089                                 int j;
00090                                 uint64_t word = *wp;
00091 
00092                                 for (j = 0; j < zn->zn_key_intlen; j++) {
00093                                         h = (h >> 8) ^
00094                                             zfs_crc64_table[(h ^ word) & 0xFF];
00095                                         word >>= NBBY;
00096                                 }
00097                         }
00098                 } else {
00099                         int i, len;
00100                         const uint8_t *cp = zn->zn_key_norm;
00101 
00102                         /*
00103                          * We previously stored the terminating null on
00104                          * disk, but didn't hash it, so we need to
00105                          * continue to not hash it.  (The
00106                          * zn_key_*_numints includes the terminating
00107                          * null for non-binary keys.)
00108                          */
00109                         len = zn->zn_key_norm_numints - 1;
00110 
00111                         ASSERT(zn->zn_key_intlen == 1);
00112                         for (i = 0; i < len; cp++, i++) {
00113                                 h = (h >> 8) ^
00114                                     zfs_crc64_table[(h ^ *cp) & 0xFF];
00115                         }
00116                 }
00117         }
00118         /*
00119          * Don't use all 64 bits, since we need some in the cookie for
00120          * the collision differentiator.  We MUST use the high bits,
00121          * since those are the ones that we first pay attention to when
00122          * chosing the bucket.
00123          */
00124         h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
00125 
00126         return (h);
00127 }
00128 
00129 static int
00130 zap_normalize(zap_t *zap, const char *name, char *namenorm)
00131 {
00132         size_t inlen, outlen;
00133         int err;
00134 
00135         ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
00136 
00137         inlen = strlen(name) + 1;
00138         outlen = ZAP_MAXNAMELEN;
00139 
00140         err = 0;
00141         (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
00142             zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL |
00143             U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err);
00144 
00145         return (err);
00146 }
00147 
00148 boolean_t
00149 zap_match(zap_name_t *zn, const char *matchname)
00150 {
00151         ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
00152 
00153         if (zn->zn_matchtype == MT_FIRST) {
00154                 char norm[ZAP_MAXNAMELEN];
00155 
00156                 if (zap_normalize(zn->zn_zap, matchname, norm) != 0)
00157                         return (B_FALSE);
00158 
00159                 return (strcmp(zn->zn_key_norm, norm) == 0);
00160         } else {
00161                 /* MT_BEST or MT_EXACT */
00162                 return (strcmp(zn->zn_key_orig, matchname) == 0);
00163         }
00164 }
00165 
00166 void
00167 zap_name_free(zap_name_t *zn)
00168 {
00169         kmem_free(zn, sizeof (zap_name_t));
00170 }
00171 
00172 zap_name_t *
00173 zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
00174 {
00175         zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
00176 
00177         zn->zn_zap = zap;
00178         zn->zn_key_intlen = sizeof (*key);
00179         zn->zn_key_orig = key;
00180         zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
00181         zn->zn_matchtype = mt;
00182         if (zap->zap_normflags) {
00183                 if (zap_normalize(zap, key, zn->zn_normbuf) != 0) {
00184                         zap_name_free(zn);
00185                         return (NULL);
00186                 }
00187                 zn->zn_key_norm = zn->zn_normbuf;
00188                 zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
00189         } else {
00190                 if (mt != MT_EXACT) {
00191                         zap_name_free(zn);
00192                         return (NULL);
00193                 }
00194                 zn->zn_key_norm = zn->zn_key_orig;
00195                 zn->zn_key_norm_numints = zn->zn_key_orig_numints;
00196         }
00197 
00198         zn->zn_hash = zap_hash(zn);
00199         return (zn);
00200 }
00201 
00202 zap_name_t *
00203 zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
00204 {
00205         zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
00206 
00207         ASSERT(zap->zap_normflags == 0);
00208         zn->zn_zap = zap;
00209         zn->zn_key_intlen = sizeof (*key);
00210         zn->zn_key_orig = zn->zn_key_norm = key;
00211         zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
00212         zn->zn_matchtype = MT_EXACT;
00213 
00214         zn->zn_hash = zap_hash(zn);
00215         return (zn);
00216 }
00217 
00218 static void
00219 mzap_byteswap(mzap_phys_t *buf, size_t size)
00220 {
00221         int i, max;
00222         buf->mz_block_type = BSWAP_64(buf->mz_block_type);
00223         buf->mz_salt = BSWAP_64(buf->mz_salt);
00224         buf->mz_normflags = BSWAP_64(buf->mz_normflags);
00225         max = (size / MZAP_ENT_LEN) - 1;
00226         for (i = 0; i < max; i++) {
00227                 buf->mz_chunk[i].mze_value =
00228                     BSWAP_64(buf->mz_chunk[i].mze_value);
00229                 buf->mz_chunk[i].mze_cd =
00230                     BSWAP_32(buf->mz_chunk[i].mze_cd);
00231         }
00232 }
00233 
00234 void
00235 zap_byteswap(void *buf, size_t size)
00236 {
00237         uint64_t block_type;
00238 
00239         block_type = *(uint64_t *)buf;
00240 
00241         if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
00242                 /* ASSERT(magic == ZAP_LEAF_MAGIC); */
00243                 mzap_byteswap(buf, size);
00244         } else {
00245                 fzap_byteswap(buf, size);
00246         }
00247 }
00248 
00249 static int
00250 mze_compare(const void *arg1, const void *arg2)
00251 {
00252         const mzap_ent_t *mze1 = arg1;
00253         const mzap_ent_t *mze2 = arg2;
00254 
00255         if (mze1->mze_hash > mze2->mze_hash)
00256                 return (+1);
00257         if (mze1->mze_hash < mze2->mze_hash)
00258                 return (-1);
00259         if (mze1->mze_cd > mze2->mze_cd)
00260                 return (+1);
00261         if (mze1->mze_cd < mze2->mze_cd)
00262                 return (-1);
00263         return (0);
00264 }
00265 
00266 static int
00267 mze_insert(zap_t *zap, int chunkid, uint64_t hash)
00268 {
00269         mzap_ent_t *mze;
00270         avl_index_t idx;
00271 
00272         ASSERT(zap->zap_ismicro);
00273         ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
00274 
00275         mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
00276         mze->mze_chunkid = chunkid;
00277         mze->mze_hash = hash;
00278         mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
00279         ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
00280         if (avl_find(&zap->zap_m.zap_avl, mze, &idx) != NULL) {
00281                 kmem_free(mze, sizeof (mzap_ent_t));
00282                 return (EEXIST);
00283         }
00284         avl_insert(&zap->zap_m.zap_avl, mze, idx);
00285         return (0);
00286 }
00287 
00288 static mzap_ent_t *
00289 mze_find(zap_name_t *zn)
00290 {
00291         mzap_ent_t mze_tofind;
00292         mzap_ent_t *mze;
00293         avl_index_t idx;
00294         avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
00295 
00296         ASSERT(zn->zn_zap->zap_ismicro);
00297         ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
00298 
00299         mze_tofind.mze_hash = zn->zn_hash;
00300         mze_tofind.mze_cd = 0;
00301 
00302 again:
00303         mze = avl_find(avl, &mze_tofind, &idx);
00304         if (mze == NULL)
00305                 mze = avl_nearest(avl, idx, AVL_AFTER);
00306         for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
00307                 ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
00308                 if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
00309                         return (mze);
00310         }
00311         if (zn->zn_matchtype == MT_BEST) {
00312                 zn->zn_matchtype = MT_FIRST;
00313                 goto again;
00314         }
00315         return (NULL);
00316 }
00317 
00318 static uint32_t
00319 mze_find_unused_cd(zap_t *zap, uint64_t hash)
00320 {
00321         mzap_ent_t mze_tofind;
00322         mzap_ent_t *mze;
00323         avl_index_t idx;
00324         avl_tree_t *avl = &zap->zap_m.zap_avl;
00325         uint32_t cd;
00326 
00327         ASSERT(zap->zap_ismicro);
00328         ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
00329 
00330         mze_tofind.mze_hash = hash;
00331         mze_tofind.mze_cd = 0;
00332 
00333         cd = 0;
00334         for (mze = avl_find(avl, &mze_tofind, &idx);
00335             mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
00336                 if (mze->mze_cd != cd)
00337                         break;
00338                 cd++;
00339         }
00340 
00341         return (cd);
00342 }
00343 
00344 static void
00345 mze_remove(zap_t *zap, mzap_ent_t *mze)
00346 {
00347         ASSERT(zap->zap_ismicro);
00348         ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
00349 
00350         avl_remove(&zap->zap_m.zap_avl, mze);
00351         kmem_free(mze, sizeof (mzap_ent_t));
00352 }
00353 
00354 static void
00355 mze_destroy(zap_t *zap)
00356 {
00357         mzap_ent_t *mze;
00358         void *avlcookie = NULL;
00359 
00360         while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
00361                 kmem_free(mze, sizeof (mzap_ent_t));
00362         avl_destroy(&zap->zap_m.zap_avl);
00363 }
00364 
00365 static zap_t *
00366 mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
00367 {
00368         zap_t *winner;
00369         zap_t *zap;
00370         int i;
00371 
00372         ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
00373 
00374         zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
00375         rw_init(&zap->zap_rwlock, 0, 0, 0);
00376         rw_enter(&zap->zap_rwlock, RW_WRITER);
00377         zap->zap_objset = os;
00378         zap->zap_object = obj;
00379         zap->zap_dbuf = db;
00380 
00381         if (*(uint64_t *)db->db_data != ZBT_MICRO) {
00382                 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
00383                 zap->zap_f.zap_block_shift = highbit(db->db_size) - 1;
00384         } else {
00385                 zap->zap_ismicro = TRUE;
00386         }
00387 
00388         /*
00389          * Make sure that zap_ismicro is set before we let others see
00390          * it, because zap_lockdir() checks zap_ismicro without the lock
00391          * held.
00392          */
00393         winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict);
00394 
00395         if (winner != NULL) {
00396                 rw_exit(&zap->zap_rwlock);
00397                 rw_destroy(&zap->zap_rwlock);
00398                 if (!zap->zap_ismicro)
00399                         mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
00400                 kmem_free(zap, sizeof (zap_t));
00401                 return (winner);
00402         }
00403 
00404         if (zap->zap_ismicro) {
00405                 zap->zap_salt = zap->zap_m.zap_phys->mz_salt;
00406                 zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags;
00407                 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
00408                 avl_create(&zap->zap_m.zap_avl, mze_compare,
00409                     sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
00410 
00411                 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
00412                         mzap_ent_phys_t *mze =
00413                             &zap->zap_m.zap_phys->mz_chunk[i];
00414                         if (mze->mze_name[0]) {
00415                                 zap_name_t *zn;
00416 
00417                                 zn = zap_name_alloc(zap, mze->mze_name,
00418                                     MT_EXACT);
00419                                 if (mze_insert(zap, i, zn->zn_hash) == 0)
00420                                         zap->zap_m.zap_num_entries++;
00421                                 else {
00422                                         printf("ZFS WARNING: Duplicated ZAP "
00423                                             "entry detected (%s).\n",
00424                                             mze->mze_name);
00425                                 }
00426                                 zap_name_free(zn);
00427                         }
00428                 }
00429         } else {
00430                 zap->zap_salt = zap->zap_f.zap_phys->zap_salt;
00431                 zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags;
00432 
00433                 ASSERT3U(sizeof (struct zap_leaf_header), ==,
00434                     2*ZAP_LEAF_CHUNKSIZE);
00435 
00436                 /*
00437                  * The embedded pointer table should not overlap the
00438                  * other members.
00439                  */
00440                 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
00441                     &zap->zap_f.zap_phys->zap_salt);
00442 
00443                 /*
00444                  * The embedded pointer table should end at the end of
00445                  * the block
00446                  */
00447                 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
00448                     1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
00449                     (uintptr_t)zap->zap_f.zap_phys, ==,
00450                     zap->zap_dbuf->db_size);
00451         }
00452         rw_exit(&zap->zap_rwlock);
00453         return (zap);
00454 }
00455 
00456 int
00457 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
00458     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
00459 {
00460         zap_t *zap;
00461         dmu_buf_t *db;
00462         krw_t lt;
00463         int err;
00464 
00465         *zapp = NULL;
00466 
00467         err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
00468         if (err)
00469                 return (err);
00470 
00471 #ifdef ZFS_DEBUG
00472         {
00473                 dmu_object_info_t doi;
00474                 dmu_object_info_from_db(db, &doi);
00475                 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
00476         }
00477 #endif
00478 
00479         zap = dmu_buf_get_user(db);
00480         if (zap == NULL)
00481                 zap = mzap_open(os, obj, db);
00482 
00483         /*
00484          * We're checking zap_ismicro without the lock held, in order to
00485          * tell what type of lock we want.  Once we have some sort of
00486          * lock, see if it really is the right type.  In practice this
00487          * can only be different if it was upgraded from micro to fat,
00488          * and micro wanted WRITER but fat only needs READER.
00489          */
00490         lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
00491         rw_enter(&zap->zap_rwlock, lt);
00492         if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
00493                 /* it was upgraded, now we only need reader */
00494                 ASSERT(lt == RW_WRITER);
00495                 ASSERT(RW_READER ==
00496                     (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
00497                 rw_downgrade(&zap->zap_rwlock);
00498                 lt = RW_READER;
00499         }
00500 
00501         zap->zap_objset = os;
00502 
00503         if (lt == RW_WRITER)
00504                 dmu_buf_will_dirty(db, tx);
00505 
00506         ASSERT3P(zap->zap_dbuf, ==, db);
00507 
00508         ASSERT(!zap->zap_ismicro ||
00509             zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
00510         if (zap->zap_ismicro && tx && adding &&
00511             zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
00512                 uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
00513                 if (newsz > MZAP_MAX_BLKSZ) {
00514                         dprintf("upgrading obj %llu: num_entries=%u\n",
00515                             obj, zap->zap_m.zap_num_entries);
00516                         *zapp = zap;
00517                         return (mzap_upgrade(zapp, tx, 0));
00518                 }
00519                 err = dmu_object_set_blocksize(os, obj, newsz, 0, tx);
00520                 ASSERT0(err);
00521                 zap->zap_m.zap_num_chunks =
00522                     db->db_size / MZAP_ENT_LEN - 1;
00523         }
00524 
00525         *zapp = zap;
00526         return (0);
00527 }
00528 
00529 void
00530 zap_unlockdir(zap_t *zap)
00531 {
00532         rw_exit(&zap->zap_rwlock);
00533         dmu_buf_rele(zap->zap_dbuf, NULL);
00534 }
00535 
00536 static int
00537 mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
00538 {
00539         mzap_phys_t *mzp;
00540         int i, sz, nchunks;
00541         int err = 0;
00542         zap_t *zap = *zapp;
00543 
00544         ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
00545 
00546         sz = zap->zap_dbuf->db_size;
00547         mzp = kmem_alloc(sz, KM_SLEEP);
00548         bcopy(zap->zap_dbuf->db_data, mzp, sz);
00549         nchunks = zap->zap_m.zap_num_chunks;
00550 
00551         if (!flags) {
00552                 err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
00553                     1ULL << fzap_default_block_shift, 0, tx);
00554                 if (err) {
00555                         kmem_free(mzp, sz);
00556                         return (err);
00557                 }
00558         }
00559 
00560         dprintf("upgrading obj=%llu with %u chunks\n",
00561             zap->zap_object, nchunks);
00562         /* XXX destroy the avl later, so we can use the stored hash value */
00563         mze_destroy(zap);
00564 
00565         fzap_upgrade(zap, tx, flags);
00566 
00567         for (i = 0; i < nchunks; i++) {
00568                 mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
00569                 zap_name_t *zn;
00570                 if (mze->mze_name[0] == 0)
00571                         continue;
00572                 dprintf("adding %s=%llu\n",
00573                     mze->mze_name, mze->mze_value);
00574                 zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT);
00575                 err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx);
00576                 zap = zn->zn_zap;       /* fzap_add_cd() may change zap */
00577                 zap_name_free(zn);
00578                 if (err)
00579                         break;
00580         }
00581         kmem_free(mzp, sz);
00582         *zapp = zap;
00583         return (err);
00584 }
00585 
00586 static void
00587 mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
00588     dmu_tx_t *tx)
00589 {
00590         dmu_buf_t *db;
00591         mzap_phys_t *zp;
00592 
00593         VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
00594 
00595 #ifdef ZFS_DEBUG
00596         {
00597                 dmu_object_info_t doi;
00598                 dmu_object_info_from_db(db, &doi);
00599                 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
00600         }
00601 #endif
00602 
00603         dmu_buf_will_dirty(db, tx);
00604         zp = db->db_data;
00605         zp->mz_block_type = ZBT_MICRO;
00606         zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL;
00607         zp->mz_normflags = normflags;
00608         dmu_buf_rele(db, FTAG);
00609 
00610         if (flags != 0) {
00611                 zap_t *zap;
00612                 /* Only fat zap supports flags; upgrade immediately. */
00613                 VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER,
00614                     B_FALSE, B_FALSE, &zap));
00615                 VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags));
00616                 zap_unlockdir(zap);
00617         }
00618 }
00619 
00620 int
00621 zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
00622     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
00623 {
00624         return (zap_create_claim_norm(os, obj,
00625             0, ot, bonustype, bonuslen, tx));
00626 }
00627 
00628 int
00629 zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
00630     dmu_object_type_t ot,
00631     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
00632 {
00633         int err;
00634 
00635         err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
00636         if (err != 0)
00637                 return (err);
00638         mzap_create_impl(os, obj, normflags, 0, tx);
00639         return (0);
00640 }
00641 
00642 uint64_t
00643 zap_create(objset_t *os, dmu_object_type_t ot,
00644     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
00645 {
00646         return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
00647 }
00648 
00649 uint64_t
00650 zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
00651     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
00652 {
00653         uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
00654 
00655         mzap_create_impl(os, obj, normflags, 0, tx);
00656         return (obj);
00657 }
00658 
00659 uint64_t
00660 zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
00661     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
00662     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
00663 {
00664         uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
00665 
00666         ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT &&
00667             leaf_blockshift <= SPA_MAXBLOCKSHIFT &&
00668             indirect_blockshift >= SPA_MINBLOCKSHIFT &&
00669             indirect_blockshift <= SPA_MAXBLOCKSHIFT);
00670 
00671         VERIFY(dmu_object_set_blocksize(os, obj,
00672             1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
00673 
00674         mzap_create_impl(os, obj, normflags, flags, tx);
00675         return (obj);
00676 }
00677 
00678 int
00679 zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
00680 {
00681         /*
00682          * dmu_object_free will free the object number and free the
00683          * data.  Freeing the data will cause our pageout function to be
00684          * called, which will destroy our data (zap_leaf_t's and zap_t).
00685          */
00686 
00687         return (dmu_object_free(os, zapobj, tx));
00688 }
00689 
00690 _NOTE(ARGSUSED(0))
00691 void
00692 zap_evict(dmu_buf_t *db, void *vzap)
00693 {
00694         zap_t *zap = vzap;
00695 
00696         rw_destroy(&zap->zap_rwlock);
00697 
00698         if (zap->zap_ismicro)
00699                 mze_destroy(zap);
00700         else
00701                 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
00702 
00703         kmem_free(zap, sizeof (zap_t));
00704 }
00705 
00706 int
00707 zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
00708 {
00709         zap_t *zap;
00710         int err;
00711 
00712         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
00713         if (err)
00714                 return (err);
00715         if (!zap->zap_ismicro) {
00716                 err = fzap_count(zap, count);
00717         } else {
00718                 *count = zap->zap_m.zap_num_entries;
00719         }
00720         zap_unlockdir(zap);
00721         return (err);
00722 }
00723 
00728 static boolean_t
00729 mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
00730 {
00731         mzap_ent_t *other;
00732         int direction = AVL_BEFORE;
00733         boolean_t allocdzn = B_FALSE;
00734 
00735         if (zap->zap_normflags == 0)
00736                 return (B_FALSE);
00737 
00738 again:
00739         for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
00740             other && other->mze_hash == mze->mze_hash;
00741             other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
00742 
00743                 if (zn == NULL) {
00744                         zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
00745                             MT_FIRST);
00746                         allocdzn = B_TRUE;
00747                 }
00748                 if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
00749                         if (allocdzn)
00750                                 zap_name_free(zn);
00751                         return (B_TRUE);
00752                 }
00753         }
00754 
00755         if (direction == AVL_BEFORE) {
00756                 direction = AVL_AFTER;
00757                 goto again;
00758         }
00759 
00760         if (allocdzn)
00761                 zap_name_free(zn);
00762         return (B_FALSE);
00763 }
00764 
00769 int
00770 zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
00771     uint64_t integer_size, uint64_t num_integers, void *buf)
00772 {
00773         return (zap_lookup_norm(os, zapobj, name, integer_size,
00774             num_integers, buf, MT_EXACT, NULL, 0, NULL));
00775 }
00776 
00777 int
00778 zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
00779     uint64_t integer_size, uint64_t num_integers, void *buf,
00780     matchtype_t mt, char *realname, int rn_len,
00781     boolean_t *ncp)
00782 {
00783         zap_t *zap;
00784         int err;
00785         mzap_ent_t *mze;
00786         zap_name_t *zn;
00787 
00788         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
00789         if (err)
00790                 return (err);
00791         zn = zap_name_alloc(zap, name, mt);
00792         if (zn == NULL) {
00793                 zap_unlockdir(zap);
00794                 return (ENOTSUP);
00795         }
00796 
00797         if (!zap->zap_ismicro) {
00798                 err = fzap_lookup(zn, integer_size, num_integers, buf,
00799                     realname, rn_len, ncp);
00800         } else {
00801                 mze = mze_find(zn);
00802                 if (mze == NULL) {
00803                         err = ENOENT;
00804                 } else {
00805                         if (num_integers < 1) {
00806                                 err = EOVERFLOW;
00807                         } else if (integer_size != 8) {
00808                                 err = EINVAL;
00809                         } else {
00810                                 *(uint64_t *)buf =
00811                                     MZE_PHYS(zap, mze)->mze_value;
00812                                 (void) strlcpy(realname,
00813                                     MZE_PHYS(zap, mze)->mze_name, rn_len);
00814                                 if (ncp) {
00815                                         *ncp = mzap_normalization_conflict(zap,
00816                                             zn, mze);
00817                                 }
00818                         }
00819                 }
00820         }
00821         zap_name_free(zn);
00822         zap_unlockdir(zap);
00823         return (err);
00824 }
00825 
00826 int
00827 zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
00828     int key_numints)
00829 {
00830         zap_t *zap;
00831         int err;
00832         zap_name_t *zn;
00833 
00834         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
00835         if (err)
00836                 return (err);
00837         zn = zap_name_alloc_uint64(zap, key, key_numints);
00838         if (zn == NULL) {
00839                 zap_unlockdir(zap);
00840                 return (ENOTSUP);
00841         }
00842 
00843         fzap_prefetch(zn);
00844         zap_name_free(zn);
00845         zap_unlockdir(zap);
00846         return (err);
00847 }
00848 
00849 int
00850 zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
00851     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
00852 {
00853         zap_t *zap;
00854         int err;
00855         zap_name_t *zn;
00856 
00857         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
00858         if (err)
00859                 return (err);
00860         zn = zap_name_alloc_uint64(zap, key, key_numints);
00861         if (zn == NULL) {
00862                 zap_unlockdir(zap);
00863                 return (ENOTSUP);
00864         }
00865 
00866         err = fzap_lookup(zn, integer_size, num_integers, buf,
00867             NULL, 0, NULL);
00868         zap_name_free(zn);
00869         zap_unlockdir(zap);
00870         return (err);
00871 }
00872 
00873 int
00874 zap_contains(objset_t *os, uint64_t zapobj, const char *name)
00875 {
00876         int err = (zap_lookup_norm(os, zapobj, name, 0,
00877             0, NULL, MT_EXACT, NULL, 0, NULL));
00878         if (err == EOVERFLOW || err == EINVAL)
00879                 err = 0; /* found, but skipped reading the value */
00880         return (err);
00881 }
00882 
00883 int
00884 zap_length(objset_t *os, uint64_t zapobj, const char *name,
00885     uint64_t *integer_size, uint64_t *num_integers)
00886 {
00887         zap_t *zap;
00888         int err;
00889         mzap_ent_t *mze;
00890         zap_name_t *zn;
00891 
00892         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
00893         if (err)
00894                 return (err);
00895         zn = zap_name_alloc(zap, name, MT_EXACT);
00896         if (zn == NULL) {
00897                 zap_unlockdir(zap);
00898                 return (ENOTSUP);
00899         }
00900         if (!zap->zap_ismicro) {
00901                 err = fzap_length(zn, integer_size, num_integers);
00902         } else {
00903                 mze = mze_find(zn);
00904                 if (mze == NULL) {
00905                         err = ENOENT;
00906                 } else {
00907                         if (integer_size)
00908                                 *integer_size = 8;
00909                         if (num_integers)
00910                                 *num_integers = 1;
00911                 }
00912         }
00913         zap_name_free(zn);
00914         zap_unlockdir(zap);
00915         return (err);
00916 }
00917 
00918 int
00919 zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
00920     int key_numints, uint64_t *integer_size, uint64_t *num_integers)
00921 {
00922         zap_t *zap;
00923         int err;
00924         zap_name_t *zn;
00925 
00926         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
00927         if (err)
00928                 return (err);
00929         zn = zap_name_alloc_uint64(zap, key, key_numints);
00930         if (zn == NULL) {
00931                 zap_unlockdir(zap);
00932                 return (ENOTSUP);
00933         }
00934         err = fzap_length(zn, integer_size, num_integers);
00935         zap_name_free(zn);
00936         zap_unlockdir(zap);
00937         return (err);
00938 }
00939 
00940 static void
00941 mzap_addent(zap_name_t *zn, uint64_t value)
00942 {
00943         int i;
00944         zap_t *zap = zn->zn_zap;
00945         int start = zap->zap_m.zap_alloc_next;
00946         uint32_t cd;
00947 
00948         ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
00949 
00950 #ifdef ZFS_DEBUG
00951         for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
00952                 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
00953                 ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
00954         }
00955 #endif
00956 
00957         cd = mze_find_unused_cd(zap, zn->zn_hash);
00958         /* given the limited size of the microzap, this can't happen */
00959         ASSERT(cd < zap_maxcd(zap));
00960 
00961 again:
00962         for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
00963                 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
00964                 if (mze->mze_name[0] == 0) {
00965                         mze->mze_value = value;
00966                         mze->mze_cd = cd;
00967                         (void) strcpy(mze->mze_name, zn->zn_key_orig);
00968                         zap->zap_m.zap_num_entries++;
00969                         zap->zap_m.zap_alloc_next = i+1;
00970                         if (zap->zap_m.zap_alloc_next ==
00971                             zap->zap_m.zap_num_chunks)
00972                                 zap->zap_m.zap_alloc_next = 0;
00973                         VERIFY(0 == mze_insert(zap, i, zn->zn_hash));
00974                         return;
00975                 }
00976         }
00977         if (start != 0) {
00978                 start = 0;
00979                 goto again;
00980         }
00981         ASSERT(!"out of entries!");
00982 }
00983 
00984 int
00985 zap_add(objset_t *os, uint64_t zapobj, const char *key,
00986     int integer_size, uint64_t num_integers,
00987     const void *val, dmu_tx_t *tx)
00988 {
00989         zap_t *zap;
00990         int err;
00991         mzap_ent_t *mze;
00992         const uint64_t *intval = val;
00993         zap_name_t *zn;
00994 
00995         err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
00996         if (err)
00997                 return (err);
00998         zn = zap_name_alloc(zap, key, MT_EXACT);
00999         if (zn == NULL) {
01000                 zap_unlockdir(zap);
01001                 return (ENOTSUP);
01002         }
01003         if (!zap->zap_ismicro) {
01004                 err = fzap_add(zn, integer_size, num_integers, val, tx);
01005                 zap = zn->zn_zap;       /* fzap_add() may change zap */
01006         } else if (integer_size != 8 || num_integers != 1 ||
01007             strlen(key) >= MZAP_NAME_LEN) {
01008                 err = mzap_upgrade(&zn->zn_zap, tx, 0);
01009                 if (err == 0)
01010                         err = fzap_add(zn, integer_size, num_integers, val, tx);
01011                 zap = zn->zn_zap;       /* fzap_add() may change zap */
01012         } else {
01013                 mze = mze_find(zn);
01014                 if (mze != NULL) {
01015                         err = EEXIST;
01016                 } else {
01017                         mzap_addent(zn, *intval);
01018                 }
01019         }
01020         ASSERT(zap == zn->zn_zap);
01021         zap_name_free(zn);
01022         if (zap != NULL)        /* may be NULL if fzap_add() failed */
01023                 zap_unlockdir(zap);
01024         return (err);
01025 }
01026 
01027 int
01028 zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
01029     int key_numints, int integer_size, uint64_t num_integers,
01030     const void *val, dmu_tx_t *tx)
01031 {
01032         zap_t *zap;
01033         int err;
01034         zap_name_t *zn;
01035 
01036         err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
01037         if (err)
01038                 return (err);
01039         zn = zap_name_alloc_uint64(zap, key, key_numints);
01040         if (zn == NULL) {
01041                 zap_unlockdir(zap);
01042                 return (ENOTSUP);
01043         }
01044         err = fzap_add(zn, integer_size, num_integers, val, tx);
01045         zap = zn->zn_zap;       /* fzap_add() may change zap */
01046         zap_name_free(zn);
01047         if (zap != NULL)        /* may be NULL if fzap_add() failed */
01048                 zap_unlockdir(zap);
01049         return (err);
01050 }
01051 
01052 int
01053 zap_update(objset_t *os, uint64_t zapobj, const char *name,
01054     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
01055 {
01056         zap_t *zap;
01057         mzap_ent_t *mze;
01058         uint64_t oldval;
01059         const uint64_t *intval = val;
01060         zap_name_t *zn;
01061         int err;
01062 
01063 #ifdef ZFS_DEBUG
01064         /*
01065          * If there is an old value, it shouldn't change across the
01066          * lockdir (eg, due to bprewrite's xlation).
01067          */
01068         if (integer_size == 8 && num_integers == 1)
01069                 (void) zap_lookup(os, zapobj, name, 8, 1, &oldval);
01070 #endif
01071 
01072         err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
01073         if (err)
01074                 return (err);
01075         zn = zap_name_alloc(zap, name, MT_EXACT);
01076         if (zn == NULL) {
01077                 zap_unlockdir(zap);
01078                 return (ENOTSUP);
01079         }
01080         if (!zap->zap_ismicro) {
01081                 err = fzap_update(zn, integer_size, num_integers, val, tx);
01082                 zap = zn->zn_zap;       /* fzap_update() may change zap */
01083         } else if (integer_size != 8 || num_integers != 1 ||
01084             strlen(name) >= MZAP_NAME_LEN) {
01085                 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
01086                     zapobj, integer_size, num_integers, name);
01087                 err = mzap_upgrade(&zn->zn_zap, tx, 0);
01088                 if (err == 0)
01089                         err = fzap_update(zn, integer_size, num_integers,
01090                             val, tx);
01091                 zap = zn->zn_zap;       /* fzap_update() may change zap */
01092         } else {
01093                 mze = mze_find(zn);
01094                 if (mze != NULL) {
01095                         ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval);
01096                         MZE_PHYS(zap, mze)->mze_value = *intval;
01097                 } else {
01098                         mzap_addent(zn, *intval);
01099                 }
01100         }
01101         ASSERT(zap == zn->zn_zap);
01102         zap_name_free(zn);
01103         if (zap != NULL)        /* may be NULL if fzap_upgrade() failed */
01104                 zap_unlockdir(zap);
01105         return (err);
01106 }
01107 
01108 int
01109 zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
01110     int key_numints,
01111     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
01112 {
01113         zap_t *zap;
01114         zap_name_t *zn;
01115         int err;
01116 
01117         err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
01118         if (err)
01119                 return (err);
01120         zn = zap_name_alloc_uint64(zap, key, key_numints);
01121         if (zn == NULL) {
01122                 zap_unlockdir(zap);
01123                 return (ENOTSUP);
01124         }
01125         err = fzap_update(zn, integer_size, num_integers, val, tx);
01126         zap = zn->zn_zap;       /* fzap_update() may change zap */
01127         zap_name_free(zn);
01128         if (zap != NULL)        /* may be NULL if fzap_upgrade() failed */
01129                 zap_unlockdir(zap);
01130         return (err);
01131 }
01132 
01133 int
01134 zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
01135 {
01136         return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx));
01137 }
01138 
01139 int
01140 zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
01141     matchtype_t mt, dmu_tx_t *tx)
01142 {
01143         zap_t *zap;
01144         int err;
01145         mzap_ent_t *mze;
01146         zap_name_t *zn;
01147 
01148         err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
01149         if (err)
01150                 return (err);
01151         zn = zap_name_alloc(zap, name, mt);
01152         if (zn == NULL) {
01153                 zap_unlockdir(zap);
01154                 return (ENOTSUP);
01155         }
01156         if (!zap->zap_ismicro) {
01157                 err = fzap_remove(zn, tx);
01158         } else {
01159                 mze = mze_find(zn);
01160                 if (mze == NULL) {
01161                         err = ENOENT;
01162                 } else {
01163                         zap->zap_m.zap_num_entries--;
01164                         bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid],
01165                             sizeof (mzap_ent_phys_t));
01166                         mze_remove(zap, mze);
01167                 }
01168         }
01169         zap_name_free(zn);
01170         zap_unlockdir(zap);
01171         return (err);
01172 }
01173 
01174 int
01175 zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
01176     int key_numints, dmu_tx_t *tx)
01177 {
01178         zap_t *zap;
01179         int err;
01180         zap_name_t *zn;
01181 
01182         err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
01183         if (err)
01184                 return (err);
01185         zn = zap_name_alloc_uint64(zap, key, key_numints);
01186         if (zn == NULL) {
01187                 zap_unlockdir(zap);
01188                 return (ENOTSUP);
01189         }
01190         err = fzap_remove(zn, tx);
01191         zap_name_free(zn);
01192         zap_unlockdir(zap);
01193         return (err);
01194 }
01195 
01196 /*
01197  * Routines for iterating over the attributes.
01198  */
01199 
01200 void
01201 zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
01202     uint64_t serialized)
01203 {
01204         zc->zc_objset = os;
01205         zc->zc_zap = NULL;
01206         zc->zc_leaf = NULL;
01207         zc->zc_zapobj = zapobj;
01208         zc->zc_serialized = serialized;
01209         zc->zc_hash = 0;
01210         zc->zc_cd = 0;
01211 }
01212 
01213 void
01214 zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
01215 {
01216         zap_cursor_init_serialized(zc, os, zapobj, 0);
01217 }
01218 
01219 void
01220 zap_cursor_fini(zap_cursor_t *zc)
01221 {
01222         if (zc->zc_zap) {
01223                 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
01224                 zap_unlockdir(zc->zc_zap);
01225                 zc->zc_zap = NULL;
01226         }
01227         if (zc->zc_leaf) {
01228                 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
01229                 zap_put_leaf(zc->zc_leaf);
01230                 zc->zc_leaf = NULL;
01231         }
01232         zc->zc_objset = NULL;
01233 }
01234 
01235 uint64_t
01236 zap_cursor_serialize(zap_cursor_t *zc)
01237 {
01238         if (zc->zc_hash == -1ULL)
01239                 return (-1ULL);
01240         if (zc->zc_zap == NULL)
01241                 return (zc->zc_serialized);
01242         ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0);
01243         ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
01244 
01245         /*
01246          * We want to keep the high 32 bits of the cursor zero if we can, so
01247          * that 32-bit programs can access this.  So usually use a small
01248          * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
01249          * of the cursor.
01250          *
01251          * [ collision differentiator | zap_hashbits()-bit hash value ]
01252          */
01253         return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) |
01254             ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap)));
01255 }
01256 
01257 int
01258 zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
01259 {
01260         int err;
01261         avl_index_t idx;
01262         mzap_ent_t mze_tofind;
01263         mzap_ent_t *mze;
01264 
01265         if (zc->zc_hash == -1ULL)
01266                 return (ENOENT);
01267 
01268         if (zc->zc_zap == NULL) {
01269                 int hb;
01270                 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
01271                     RW_READER, TRUE, FALSE, &zc->zc_zap);
01272                 if (err)
01273                         return (err);
01274 
01275                 /*
01276                  * To support zap_cursor_init_serialized, advance, retrieve,
01277                  * we must add to the existing zc_cd, which may already
01278                  * be 1 due to the zap_cursor_advance.
01279                  */
01280                 ASSERT(zc->zc_hash == 0);
01281                 hb = zap_hashbits(zc->zc_zap);
01282                 zc->zc_hash = zc->zc_serialized << (64 - hb);
01283                 zc->zc_cd += zc->zc_serialized >> hb;
01284                 if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
01285                         zc->zc_cd = 0;
01286         } else {
01287                 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
01288         }
01289         if (!zc->zc_zap->zap_ismicro) {
01290                 err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
01291         } else {
01292                 err = ENOENT;
01293 
01294                 mze_tofind.mze_hash = zc->zc_hash;
01295                 mze_tofind.mze_cd = zc->zc_cd;
01296 
01297                 mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
01298                 if (mze == NULL) {
01299                         mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
01300                             idx, AVL_AFTER);
01301                 }
01302                 if (mze) {
01303                         mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
01304                         ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
01305                         za->za_normalization_conflict =
01306                             mzap_normalization_conflict(zc->zc_zap, NULL, mze);
01307                         za->za_integer_length = 8;
01308                         za->za_num_integers = 1;
01309                         za->za_first_integer = mzep->mze_value;
01310                         (void) strcpy(za->za_name, mzep->mze_name);
01311                         zc->zc_hash = mze->mze_hash;
01312                         zc->zc_cd = mze->mze_cd;
01313                         err = 0;
01314                 } else {
01315                         zc->zc_hash = -1ULL;
01316                 }
01317         }
01318         rw_exit(&zc->zc_zap->zap_rwlock);
01319         return (err);
01320 }
01321 
01322 void
01323 zap_cursor_advance(zap_cursor_t *zc)
01324 {
01325         if (zc->zc_hash == -1ULL)
01326                 return;
01327         zc->zc_cd++;
01328 }
01329 
01330 int
01331 zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt)
01332 {
01333         int err = 0;
01334         mzap_ent_t *mze;
01335         zap_name_t *zn;
01336 
01337         if (zc->zc_zap == NULL) {
01338                 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
01339                     RW_READER, TRUE, FALSE, &zc->zc_zap);
01340                 if (err)
01341                         return (err);
01342         } else {
01343                 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
01344         }
01345 
01346         zn = zap_name_alloc(zc->zc_zap, name, mt);
01347         if (zn == NULL) {
01348                 rw_exit(&zc->zc_zap->zap_rwlock);
01349                 return (ENOTSUP);
01350         }
01351 
01352         if (!zc->zc_zap->zap_ismicro) {
01353                 err = fzap_cursor_move_to_key(zc, zn);
01354         } else {
01355                 mze = mze_find(zn);
01356                 if (mze == NULL) {
01357                         err = ENOENT;
01358                         goto out;
01359                 }
01360                 zc->zc_hash = mze->mze_hash;
01361                 zc->zc_cd = mze->mze_cd;
01362         }
01363 
01364 out:
01365         zap_name_free(zn);
01366         rw_exit(&zc->zc_zap->zap_rwlock);
01367         return (err);
01368 }
01369 
01370 int
01371 zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
01372 {
01373         int err;
01374         zap_t *zap;
01375 
01376         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
01377         if (err)
01378                 return (err);
01379 
01380         bzero(zs, sizeof (zap_stats_t));
01381 
01382         if (zap->zap_ismicro) {
01383                 zs->zs_blocksize = zap->zap_dbuf->db_size;
01384                 zs->zs_num_entries = zap->zap_m.zap_num_entries;
01385                 zs->zs_num_blocks = 1;
01386         } else {
01387                 fzap_get_stats(zap, zs);
01388         }
01389         zap_unlockdir(zap);
01390         return (0);
01391 }
01392 
01393 int
01394 zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
01395     uint64_t *towrite, uint64_t *tooverwrite)
01396 {
01397         zap_t *zap;
01398         int err = 0;
01399 
01400 
01401         /*
01402          * Since, we don't have a name, we cannot figure out which blocks will
01403          * be affected in this operation. So, account for the worst case :
01404          * - 3 blocks overwritten: target leaf, ptrtbl block, header block
01405          * - 4 new blocks written if adding:
01406          *      - 2 blocks for possibly split leaves,
01407          *      - 2 grown ptrtbl blocks
01408          *
01409          * This also accomodates the case where an add operation to a fairly
01410          * large microzap results in a promotion to fatzap.
01411          */
01412         if (name == NULL) {
01413                 *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
01414                 return (err);
01415         }
01416 
01417         /*
01418          * We lock the zap with adding == FALSE. Because, if we pass
01419          * the actual value of add, it could trigger a mzap_upgrade().
01420          * At present we are just evaluating the possibility of this operation
01421          * and hence we donot want to trigger an upgrade.
01422          */
01423         err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
01424         if (err)
01425                 return (err);
01426 
01427         if (!zap->zap_ismicro) {
01428                 zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
01429                 if (zn) {
01430                         err = fzap_count_write(zn, add, towrite,
01431                             tooverwrite);
01432                         zap_name_free(zn);
01433                 } else {
01434                         /*
01435                          * We treat this case as similar to (name == NULL)
01436                          */
01437                         *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
01438                 }
01439         } else {
01440                 /*
01441                  * We are here if (name != NULL) and this is a micro-zap.
01442                  * We account for the header block depending on whether it
01443                  * is freeable.
01444                  *
01445                  * Incase of an add-operation it is hard to find out
01446                  * if this add will promote this microzap to fatzap.
01447                  * Hence, we consider the worst case and account for the
01448                  * blocks assuming this microzap would be promoted to a
01449                  * fatzap.
01450                  *
01451                  * 1 block overwritten  : header block
01452                  * 4 new blocks written : 2 new split leaf, 2 grown
01453                  *                      ptrtbl blocks
01454                  */
01455                 if (dmu_buf_freeable(zap->zap_dbuf))
01456                         *tooverwrite += SPA_MAXBLOCKSIZE;
01457                 else
01458                         *towrite += SPA_MAXBLOCKSIZE;
01459 
01460                 if (add) {
01461                         *towrite += 4 * SPA_MAXBLOCKSIZE;
01462                 }
01463         }
01464 
01465         zap_unlockdir(zap);
01466         return (err);
01467 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines