diff --git a/sys/contrib/openzfs/cmd/zdb/zdb_il.c b/sys/contrib/openzfs/cmd/zdb/zdb_il.c index 970c45c9b3bb..55df1f559f6e 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb_il.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb_il.c @@ -307,23 +307,6 @@ zil_prt_rec_acl(zilog_t *zilog, int txtype, const void *arg) (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt); } -static void -zil_prt_rec_clone_range(zilog_t *zilog, int txtype, const void *arg) -{ - (void) zilog, (void) txtype; - const lr_clone_range_t *lr = arg; - - (void) printf("%sfoid %llu, offset %llx, length %llx, blksize %llx\n", - tab_prefix, (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset, - (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blksz); - - for (unsigned int i = 0; i < lr->lr_nbps; i++) { - (void) printf("%s[%u/%llu] ", tab_prefix, i + 1, - (u_longlong_t)lr->lr_nbps); - print_log_bp(&lr->lr_bps[i], ""); - } -} - typedef void (*zil_prt_rec_func_t)(zilog_t *, int, const void *); typedef struct zil_rec_info { zil_prt_rec_func_t zri_print; @@ -357,8 +340,6 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { .zri_name = "TX_SETSAXATTR "}, {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_EXCHANGE "}, {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_WHITEOUT "}, - {.zri_print = zil_prt_rec_clone_range, - .zri_name = "TX_CLONE_RANGE "}, }; static int diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c index b6b99bfff6db..fb9f83032e8f 100644 --- a/sys/contrib/openzfs/cmd/ztest.c +++ b/sys/contrib/openzfs/cmd/ztest.c @@ -1902,7 +1902,7 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) if (zil_replaying(zd->zd_zilog, tx)) return; - if (lr->lr_length > zil_max_log_data(zd->zd_zilog, sizeof (lr_write_t))) + if (lr->lr_length > zil_max_log_data(zd->zd_zilog)) write_state = WR_INDIRECT; itx = zil_itx_create(TX_WRITE, diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h index fb26a83b1844..41005639a0b0 100644 --- a/sys/contrib/openzfs/include/sys/dbuf.h +++ b/sys/contrib/openzfs/include/sys/dbuf.h @@ -172,7 +172,6 @@ typedef struct dbuf_dirty_record { override_states_t dr_override_state; uint8_t dr_copies; boolean_t dr_nopwrite; - boolean_t dr_brtwrite; boolean_t dr_has_raw_params; /* diff --git a/sys/contrib/openzfs/include/sys/dmu.h b/sys/contrib/openzfs/include/sys/dmu.h index 1b82ff620f27..7062931077c4 100644 --- a/sys/contrib/openzfs/include/sys/dmu.h +++ b/sys/contrib/openzfs/include/sys/dmu.h @@ -1061,12 +1061,6 @@ int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd); int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off); -int dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, - uint64_t length, dmu_tx_t *tx, struct blkptr *bps, size_t *nbpsp); -void dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, - uint64_t length, dmu_tx_t *tx, const struct blkptr *bps, size_t nbps, - boolean_t replay); - /* * Initial setup and final teardown. */ diff --git a/sys/contrib/openzfs/include/sys/dmu_tx.h b/sys/contrib/openzfs/include/sys/dmu_tx.h index ca8514e5d2d0..81e1ef6c1477 100644 --- a/sys/contrib/openzfs/include/sys/dmu_tx.h +++ b/sys/contrib/openzfs/include/sys/dmu_tx.h @@ -90,7 +90,6 @@ enum dmu_tx_hold_type { THT_ZAP, THT_SPACE, THT_SPILL, - THT_CLONE, THT_NUMTYPES }; diff --git a/sys/contrib/openzfs/include/sys/zil.h b/sys/contrib/openzfs/include/sys/zil.h index cff8ebcad819..7c2e085abf84 100644 --- a/sys/contrib/openzfs/include/sys/zil.h +++ b/sys/contrib/openzfs/include/sys/zil.h @@ -166,7 +166,6 @@ typedef enum zil_create { #define TX_SETSAXATTR 21 /* Set sa xattrs on file */ #define TX_RENAME_EXCHANGE 22 /* Atomic swap via renameat2 */ #define TX_RENAME_WHITEOUT 23 /* Atomic whiteout via renameat2 */ -#define TX_CLONE_RANGE 24 /* Clone a file range */ #define TX_MAX_TYPE 25 /* Max transaction type */ /* @@ -188,8 +187,7 @@ typedef enum zil_create { (txtype) == TX_ACL_V0 || \ (txtype) == TX_ACL || \ (txtype) == TX_WRITE2 || \ - (txtype) == TX_SETSAXATTR || \ - (txtype) == TX_CLONE_RANGE) + (txtype) == TX_SETSAXATTR) /* * The number of dnode slots consumed by the object is stored in the 8 @@ -389,17 +387,6 @@ typedef struct { /* lr_acl_bytes number of variable sized ace's follows */ } lr_acl_t; -typedef struct { - lr_t lr_common; /* common portion of log record */ - uint64_t lr_foid; /* file object to clone into */ - uint64_t lr_offset; /* offset to clone to */ - uint64_t lr_length; /* length of the blocks to clone */ - uint64_t lr_blksz; /* file's block size */ - uint64_t lr_nbps; /* number of block pointers */ - blkptr_t lr_bps[]; - /* block pointers of the blocks to clone follows */ -} lr_clone_range_t; - /* * ZIL structure definitions, interface function prototype and globals. */ @@ -587,7 +574,7 @@ extern void zil_set_sync(zilog_t *zilog, uint64_t syncval); extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval); extern uint64_t zil_max_copied_data(zilog_t *zilog); -extern uint64_t zil_max_log_data(zilog_t *zilog, size_t hdrsize); +extern uint64_t zil_max_log_data(zilog_t *zilog); extern void zil_sums_init(zil_sums_t *zs); extern void zil_sums_fini(zil_sums_t *zs); diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h index 3463682a1065..2fd24200a5f4 100644 --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -348,7 +348,6 @@ typedef struct zio_prop { boolean_t zp_dedup; boolean_t zp_dedup_verify; boolean_t zp_nopwrite; - boolean_t zp_brtwrite; boolean_t zp_encrypt; boolean_t zp_byteorder; uint8_t zp_salt[ZIO_DATA_SALT_LEN]; @@ -557,7 +556,7 @@ extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zio_priority_t priority, zio_flag_t flags, zbookmark_phys_t *zb); extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies, - boolean_t nopwrite, boolean_t brtwrite); + boolean_t nopwrite); extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c index 30851f5273a2..9fb2873132bf 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c @@ -153,12 +153,7 @@ struct vfsops zfs_vfsops = { .vfs_quotactl = zfs_quotactl, }; -#ifdef VFCF_CROSS_COPY_FILE_RANGE -VFS_SET(zfs_vfsops, zfs, - VFCF_DELEGADMIN | VFCF_JAIL | VFCF_CROSS_COPY_FILE_RANGE); -#else -VFS_SET(zfs_vfsops, zfs, VFCF_DELEGADMIN | VFCF_JAIL); -#endif +VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); /* * We need to keep a count of active fs's. diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index 8a0a1d07c590..7e882d8574aa 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -6214,102 +6214,6 @@ zfs_deallocate(struct vop_deallocate_args *ap) } #endif -#ifndef _SYS_SYSPROTO_H_ -struct vop_copy_file_range_args { - struct vnode *a_invp; - off_t *a_inoffp; - struct vnode *a_outvp; - off_t *a_outoffp; - size_t *a_lenp; - unsigned int a_flags; - struct ucred *a_incred; - struct ucred *a_outcred; - struct thread *a_fsizetd; -} -#endif -/* - * TODO: FreeBSD will only call file system-specific copy_file_range() if both - * files resides under the same mountpoint. In case of ZFS we want to be called - * even is files are in different datasets (but on the same pools, but we need - * to check that ourselves). - */ -static int -zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap) -{ - zfsvfs_t *outzfsvfs; - struct vnode *invp = ap->a_invp; - struct vnode *outvp = ap->a_outvp; - struct mount *mp; - struct uio io; - int error; - uint64_t len = *ap->a_lenp; - - /* - * TODO: If offset/length is not aligned to recordsize, use - * vn_generic_copy_file_range() on this fragment. - * It would be better to do this after we lock the vnodes, but then we - * need something else than vn_generic_copy_file_range(). - */ - - vn_start_write(outvp, &mp, V_WAIT); - if (__predict_true(mp == outvp->v_mount)) { - outzfsvfs = (zfsvfs_t *)mp->mnt_data; - if (!spa_feature_is_enabled(dmu_objset_spa(outzfsvfs->z_os), - SPA_FEATURE_BLOCK_CLONING)) { - goto bad_write_fallback; - } - } - if (invp == outvp) { - if (vn_lock(outvp, LK_EXCLUSIVE) != 0) { - goto bad_write_fallback; - } - } else { - vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false, - LK_EXCLUSIVE); - if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) { - goto bad_locked_fallback; - } - } - -#ifdef MAC - error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred, - outvp); - if (error != 0) - goto out_locked; -#endif - - io.uio_offset = *ap->a_outoffp; - io.uio_resid = *ap->a_lenp; - error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); - if (error != 0) - goto out_locked; - - error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp), - ap->a_outoffp, &len, ap->a_outcred); - if (error == EXDEV) - goto bad_locked_fallback; - *ap->a_lenp = (size_t)len; -out_locked: - if (invp != outvp) - VOP_UNLOCK(invp); - VOP_UNLOCK(outvp); - if (mp != NULL) - vn_finished_write(mp); - return (error); - -bad_locked_fallback: - if (invp != outvp) - VOP_UNLOCK(invp); - VOP_UNLOCK(outvp); -bad_write_fallback: - if (mp != NULL) - vn_finished_write(mp); - error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, - ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, - ap->a_incred, ap->a_outcred, ap->a_fsizetd); - return (error); -} - struct vop_vector zfs_vnodeops; struct vop_vector zfs_fifoops; struct vop_vector zfs_shareops; @@ -6373,7 +6277,6 @@ struct vop_vector zfs_vnodeops = { #if __FreeBSD_version >= 1400043 .vop_add_writecount = vop_stdadd_writecount_nomsync, #endif - .vop_copy_file_range = zfs_freebsd_copy_file_range, }; VFS_VOP_VECTOR_REGISTER(zfs_vnodeops); diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index c7f76e8d96f8..0111fd43800e 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -26,7 +26,6 @@ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2019, Klara Inc. * Copyright (c) 2019, Allan Jude - * Copyright (c) 2021, 2022 by Pawel Jakub Dawidek */ #include @@ -50,7 +49,6 @@ #include #include #include -#include #include #include #include @@ -1428,7 +1426,7 @@ dbuf_read_bonus(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags) } static void -dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *dbbp) +dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn) { blkptr_t *bps = db->db.db_data; uint32_t indbs = 1ULL << dn->dn_indblkshift; @@ -1437,12 +1435,12 @@ dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *dbbp) for (int i = 0; i < n_bps; i++) { blkptr_t *bp = &bps[i]; - ASSERT3U(BP_GET_LSIZE(dbbp), ==, indbs); - BP_SET_LSIZE(bp, BP_GET_LEVEL(dbbp) == 1 ? - dn->dn_datablksz : BP_GET_LSIZE(dbbp)); - BP_SET_TYPE(bp, BP_GET_TYPE(dbbp)); - BP_SET_LEVEL(bp, BP_GET_LEVEL(dbbp) - 1); - BP_SET_BIRTH(bp, dbbp->blk_birth, 0); + ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==, indbs); + BP_SET_LSIZE(bp, BP_GET_LEVEL(db->db_blkptr) == 1 ? + dn->dn_datablksz : BP_GET_LSIZE(db->db_blkptr)); + BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr)); + BP_SET_LEVEL(bp, BP_GET_LEVEL(db->db_blkptr) - 1); + BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0); } } @@ -1452,27 +1450,30 @@ dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *dbbp) * was taken, ENOENT if no action was taken. */ static int -dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp) +dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn) { ASSERT(MUTEX_HELD(&db->db_mtx)); - int is_hole = bp == NULL || BP_IS_HOLE(bp); + int is_hole = db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr); /* * For level 0 blocks only, if the above check fails: * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync() * processes the delete record and clears the bp while we are waiting * for the dn_mtx (resulting in a "no" from block_freed). */ - if (!is_hole && db->db_level == 0) - is_hole = dnode_block_freed(dn, db->db_blkid) || BP_IS_HOLE(bp); + if (!is_hole && db->db_level == 0) { + is_hole = dnode_block_freed(dn, db->db_blkid) || + BP_IS_HOLE(db->db_blkptr); + } if (is_hole) { dbuf_set_data(db, dbuf_alloc_arcbuf(db)); memset(db->db.db_data, 0, db->db.db_size); - if (bp != NULL && db->db_level > 0 && BP_IS_HOLE(bp) && - bp->blk_birth != 0) { - dbuf_handle_indirect_hole(db, dn, bp); + if (db->db_blkptr != NULL && db->db_level > 0 && + BP_IS_HOLE(db->db_blkptr) && + db->db_blkptr->blk_birth != 0) { + dbuf_handle_indirect_hole(db, dn); } db->db_state = DB_CACHED; DTRACE_SET_STATE(db, "hole read satisfied"); @@ -1549,13 +1550,12 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, zbookmark_phys_t zb; uint32_t aflags = ARC_FLAG_NOWAIT; int err, zio_flags; - blkptr_t bp, *bpp; DB_DNODE_ENTER(db); dn = DB_DNODE(db); ASSERT(!zfs_refcount_is_zero(&db->db_holds)); ASSERT(MUTEX_HELD(&db->db_mtx)); - ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); + ASSERT(db->db_state == DB_UNCACHED); ASSERT(db->db_buf == NULL); ASSERT(db->db_parent == NULL || RW_LOCK_HELD(&db->db_parent->db_rwlock)); @@ -1565,46 +1565,16 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, goto early_unlock; } - if (db->db_state == DB_UNCACHED) { - if (db->db_blkptr == NULL) { - bpp = NULL; - } else { - bp = *db->db_blkptr; - bpp = &bp; - } - } else { - struct dirty_leaf *dl; - dbuf_dirty_record_t *dr; - - ASSERT3S(db->db_state, ==, DB_NOFILL); - - dr = list_head(&db->db_dirty_records); - if (dr == NULL) { - err = EIO; - goto early_unlock; - } else { - dl = &dr->dt.dl; - if (!dl->dr_brtwrite) { - err = EIO; - goto early_unlock; - } - bp = dl->dr_overridden_by; - bpp = &bp; - } - } - - err = dbuf_read_hole(db, dn, bpp); + err = dbuf_read_hole(db, dn); if (err == 0) goto early_unlock; - ASSERT(bpp != NULL); - /* * Any attempt to read a redacted block should result in an error. This * will never happen under normal conditions, but can be useful for * debugging purposes. */ - if (BP_IS_REDACTED(bpp)) { + if (BP_IS_REDACTED(db->db_blkptr)) { ASSERT(dsl_dataset_feature_is_active( db->db_objset->os_dsl_dataset, SPA_FEATURE_REDACTED_DATASETS)); @@ -1619,7 +1589,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, * All bps of an encrypted os should have the encryption bit set. * If this is not true it indicates tampering and we report an error. */ - if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) { + if (db->db_objset->os_encrypted && !BP_USES_CRYPT(db->db_blkptr)) { spa_log_error(db->db_objset->os_spa, &zb, &db->db_blkptr->blk_birth); zfs_panic_recover("unencrypted block in encrypted " @@ -1651,14 +1621,15 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr)) zio_flags |= ZIO_FLAG_RAW; /* - * The zio layer will copy the provided blkptr later, but we have our - * own copy so that we can release the parent's rwlock. We have to - * do that so that if dbuf_read_done is called synchronously (on + * The zio layer will copy the provided blkptr later, but we need to + * do this now so that we can release the parent's rwlock. We have to + * do that now so that if dbuf_read_done is called synchronously (on * an l1 cache hit) we don't acquire the db_mtx while holding the * parent's rwlock, which would be a lock ordering violation. */ + blkptr_t bp = *db->db_blkptr; dmu_buf_unlock_parent(db, dblt, tag); - (void) arc_read(zio, db->db_objset->os_spa, bpp, + (void) arc_read(zio, db->db_objset->os_spa, &bp, dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); return (err); @@ -1760,6 +1731,9 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) */ ASSERT(!zfs_refcount_is_zero(&db->db_holds)); + if (db->db_state == DB_NOFILL) + return (SET_ERROR(EIO)); + DB_DNODE_ENTER(db); dn = DB_DNODE(db); @@ -1806,13 +1780,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) } DB_DNODE_EXIT(db); DBUF_STAT_BUMP(hash_hits); - } else if (db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL) { + } else if (db->db_state == DB_UNCACHED) { boolean_t need_wait = B_FALSE; db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG); - if (zio == NULL && (db->db_state == DB_NOFILL || - (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)))) { + if (zio == NULL && + db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) { spa_t *spa = dn->dn_objset->os_spa; zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); need_wait = B_TRUE; @@ -1939,8 +1913,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) * the buf thawed to save the effort of freezing & * immediately re-thawing it. */ - if (!dr->dt.dl.dr_brtwrite) - arc_release(dr->dt.dl.dr_data, db); + arc_release(dr->dt.dl.dr_data, db); } /* @@ -2023,11 +1996,6 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, db->db_blkid > dn->dn_maxblkid) dn->dn_maxblkid = db->db_blkid; dbuf_unoverride(dr); - if (dr->dt.dl.dr_brtwrite) { - ASSERT(db->db.db_data == NULL); - mutex_exit(&db->db_mtx); - continue; - } } else { /* * This dbuf is not dirty in the open context. @@ -2317,7 +2285,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); - if (db->db_blkid != DMU_BONUS_BLKID && db->db_state != DB_NOFILL) { + if (db->db_blkid != DMU_BONUS_BLKID) { dmu_objset_willuse_space(os, db->db.db_size, tx); } @@ -2360,9 +2328,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) sizeof (dbuf_dirty_record_t), offsetof(dbuf_dirty_record_t, dr_dirty_node)); } - if (db->db_blkid != DMU_BONUS_BLKID && db->db_state != DB_NOFILL) { + if (db->db_blkid != DMU_BONUS_BLKID) dr->dr_accounted = db->db.db_size; - } dr->dr_dbuf = db; dr->dr_txg = tx->tx_txg; list_insert_before(&db->db_dirty_records, dr_next, dr); @@ -2522,7 +2489,6 @@ boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) { uint64_t txg = tx->tx_txg; - boolean_t brtwrite; ASSERT(txg != 0); @@ -2547,16 +2513,6 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (B_FALSE); ASSERT(dr->dr_dbuf == db); - brtwrite = dr->dt.dl.dr_brtwrite; - if (brtwrite) { - /* - * We are freeing a block that we cloned in the same - * transaction group. - */ - brt_pending_remove(dmu_objset_spa(db->db_objset), - &dr->dt.dl.dr_overridden_by, tx); - } - dnode_t *dn = dr->dr_dnode; dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); @@ -2586,7 +2542,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) mutex_exit(&dn->dn_mtx); } - if (db->db_state != DB_NOFILL && !brtwrite) { + if (db->db_state != DB_NOFILL) { dbuf_unoverride(dr); ASSERT(db->db_buf != NULL); @@ -2601,8 +2557,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) db->db_dirtycnt -= 1; if (zfs_refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { - ASSERT(db->db_state == DB_NOFILL || brtwrite || - arc_released(db->db_buf)); + ASSERT(db->db_state == DB_NOFILL || arc_released(db->db_buf)); dbuf_destroy(db); return (B_TRUE); } @@ -4793,10 +4748,8 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); if (db->db_state != DB_NOFILL) { - if (dr->dt.dl.dr_data != NULL && - dr->dt.dl.dr_data != db->db_buf) { + if (dr->dt.dl.dr_data != db->db_buf) arc_buf_destroy(dr->dt.dl.dr_data, db); - } } } else { ASSERT(list_head(&dr->dt.di.dr_children) == NULL); @@ -5093,8 +5046,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) mutex_enter(&db->db_mtx); dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; zio_write_override(dr->dr_zio, &dr->dt.dl.dr_overridden_by, - dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite, - dr->dt.dl.dr_brtwrite); + dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite); mutex_exit(&db->db_mtx); } else if (db->db_state == DB_NOFILL) { ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF || diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index ce985d833f58..18c7bb8b5e57 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -515,7 +515,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, zio_t *zio = NULL; boolean_t missed = B_FALSE; - ASSERT(!read || length <= DMU_MAX_ACCESS); + ASSERT(length <= DMU_MAX_ACCESS); /* * Note: We directly notify the prefetch code of this read, so that @@ -2171,168 +2171,6 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) return (err); } -int -dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, - dmu_tx_t *tx, blkptr_t *bps, size_t *nbpsp) -{ - dmu_buf_t **dbp, *dbuf; - dmu_buf_impl_t *db; - blkptr_t *bp; - int error, numbufs; - - error = dmu_buf_hold_array(os, object, offset, length, FALSE, FTAG, - &numbufs, &dbp); - if (error != 0) { - if (error == ESRCH) { - error = SET_ERROR(ENXIO); - } - return (error); - } - - ASSERT3U(numbufs, <=, *nbpsp); - - for (int i = 0; i < numbufs; i++) { - dbuf = dbp[i]; - db = (dmu_buf_impl_t *)dbuf; - - mutex_enter(&db->db_mtx); - - /* - * If the block is not on the disk yet, it has no BP assigned. - * There is not much we can do... - */ - if (!list_is_empty(&db->db_dirty_records)) { - dbuf_dirty_record_t *dr; - - dr = list_head(&db->db_dirty_records); - if (dr->dt.dl.dr_brtwrite) { - /* - * This is very special case where we clone a - * block and in the same transaction group we - * read its BP (most likely to clone the clone). - */ - bp = &dr->dt.dl.dr_overridden_by; - } else { - /* - * The block was modified in the same - * transaction group. - */ - mutex_exit(&db->db_mtx); - error = SET_ERROR(EAGAIN); - goto out; - } - } else { - bp = db->db_blkptr; - } - - mutex_exit(&db->db_mtx); - - if (bp == NULL) { - /* - * The block was created in this transaction group, - * so it has no BP yet. - */ - error = SET_ERROR(EAGAIN); - goto out; - } - if (dmu_buf_is_dirty(dbuf, tx)) { - error = SET_ERROR(EAGAIN); - goto out; - } - /* - * Make sure we clone only data blocks. - */ - if (BP_IS_METADATA(bp) && !BP_IS_HOLE(bp)) { - error = SET_ERROR(EINVAL); - goto out; - } - - bps[i] = *bp; - } - - *nbpsp = numbufs; -out: - dmu_buf_rele_array(dbp, numbufs, FTAG); - - return (error); -} - -void -dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, - dmu_tx_t *tx, const blkptr_t *bps, size_t nbps, boolean_t replay) -{ - spa_t *spa; - dmu_buf_t **dbp, *dbuf; - dmu_buf_impl_t *db; - struct dirty_leaf *dl; - dbuf_dirty_record_t *dr; - const blkptr_t *bp; - int numbufs; - - spa = os->os_spa; - - VERIFY0(dmu_buf_hold_array(os, object, offset, length, FALSE, FTAG, - &numbufs, &dbp)); - ASSERT3U(nbps, ==, numbufs); - - for (int i = 0; i < numbufs; i++) { - dbuf = dbp[i]; - db = (dmu_buf_impl_t *)dbuf; - bp = &bps[i]; - - ASSERT0(db->db_level); - ASSERT(db->db_blkid != DMU_BONUS_BLKID); - ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp)); - - mutex_enter(&db->db_mtx); - - VERIFY(!dbuf_undirty(db, tx)); - ASSERT(list_head(&db->db_dirty_records) == NULL); - if (db->db_buf != NULL) { - arc_buf_destroy(db->db_buf, db); - db->db_buf = NULL; - } - - mutex_exit(&db->db_mtx); - - dmu_buf_will_not_fill(dbuf, tx); - - mutex_enter(&db->db_mtx); - - dr = list_head(&db->db_dirty_records); - VERIFY(dr != NULL); - ASSERT3U(dr->dr_txg, ==, tx->tx_txg); - dl = &dr->dt.dl; - dl->dr_overridden_by = *bp; - dl->dr_brtwrite = B_TRUE; - - dl->dr_override_state = DR_OVERRIDDEN; - if (BP_IS_HOLE(bp)) { - dl->dr_overridden_by.blk_birth = 0; - dl->dr_overridden_by.blk_phys_birth = 0; - } else { - dl->dr_overridden_by.blk_birth = dr->dr_txg; - dl->dr_overridden_by.blk_phys_birth = - BP_PHYSICAL_BIRTH(bp); - } - - mutex_exit(&db->db_mtx); - - /* - * When data in embedded into BP there is no need to create - * BRT entry as there is no data block. Just copy the BP as - * it contains the data. - * Also, when replaying ZIL we don't want to bump references - * in the BRT as it was already done during ZIL claim. - */ - if (!replay && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) { - brt_pending_add(spa, bp, tx); - } - } - - dmu_buf_rele_array(dbp, numbufs, FTAG); -} - void __dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) { diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c index 1c5608c4541b..815e27a6c7f7 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_tx.c +++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c @@ -349,7 +349,7 @@ dmu_tx_mark_netfree(dmu_tx_t *tx) } static void -dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) +dmu_tx_hold_free_impl(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) { dmu_tx_t *tx = txh->txh_tx; dnode_t *dn = txh->txh_dnode; @@ -357,11 +357,15 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) ASSERT(tx->tx_txg == 0); + dmu_tx_count_dnode(txh); + if (off >= (dn->dn_maxblkid + 1) * dn->dn_datablksz) return; if (len == DMU_OBJECT_END) len = (dn->dn_maxblkid + 1) * dn->dn_datablksz - off; + dmu_tx_count_dnode(txh); + /* * For i/o error checking, we read the first and last level-0 * blocks if they are not aligned, and all the level-1 blocks. @@ -441,10 +445,8 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_FREE, off, len); - if (txh != NULL) { - dmu_tx_count_dnode(txh); - dmu_tx_count_free(txh, off, len); - } + if (txh != NULL) + (void) dmu_tx_hold_free_impl(txh, off, len); } void @@ -453,35 +455,8 @@ dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) dmu_tx_hold_t *txh; txh = dmu_tx_hold_dnode_impl(tx, dn, THT_FREE, off, len); - if (txh != NULL) { - dmu_tx_count_dnode(txh); - dmu_tx_count_free(txh, off, len); - } -} - -static void -dmu_tx_count_clone(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) -{ - - /* - * Reuse dmu_tx_count_free(), it does exactly what we need for clone. - */ - dmu_tx_count_free(txh, off, len); -} - -void -dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len) -{ - dmu_tx_hold_t *txh; - - ASSERT0(tx->tx_txg); - ASSERT(len == 0 || UINT64_MAX - off >= len - 1); - - txh = dmu_tx_hold_dnode_impl(tx, dn, THT_CLONE, off, len); - if (txh != NULL) { - dmu_tx_count_dnode(txh); - dmu_tx_count_clone(txh, off, len); - } + if (txh != NULL) + (void) dmu_tx_hold_free_impl(txh, off, len); } static void @@ -692,10 +667,6 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) case THT_NEWOBJECT: match_object = TRUE; break; - case THT_CLONE: - if (blkid >= beginblk && blkid <= endblk) - match_offset = TRUE; - break; default: cmn_err(CE_PANIC, "bad txh_type %d", txh->txh_type); diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c index d009c58d8644..77bf9140d52d 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_log.c +++ b/sys/contrib/openzfs/module/zfs/zfs_log.c @@ -21,7 +21,6 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, 2018 by Delphix. All rights reserved. - * Copyright (c) 2022 by Pawel Jakub Dawidek */ @@ -892,56 +891,5 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, zil_itx_assign(zilog, itx, tx); } -/* - * Handles TX_CLONE_RANGE transactions. - */ -void -zfs_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, - uint64_t off, uint64_t len, uint64_t blksz, const blkptr_t *bps, - size_t nbps) -{ - itx_t *itx; - lr_clone_range_t *lr; - uint64_t partlen, max_log_data; - size_t i, partnbps; - - VERIFY(!zil_replaying(zilog, tx)); - - if (zp->z_unlinked) - return; - - max_log_data = zil_max_log_data(zilog, sizeof (lr_clone_range_t)); - - while (nbps > 0) { - partnbps = MIN(nbps, max_log_data / sizeof (bps[0])); - partlen = 0; - for (i = 0; i < partnbps; i++) { - partlen += BP_GET_LSIZE(&bps[i]); - } - partlen = MIN(partlen, len); - - itx = zil_itx_create(txtype, - sizeof (*lr) + sizeof (bps[0]) * partnbps); - lr = (lr_clone_range_t *)&itx->itx_lr; - lr->lr_foid = zp->z_id; - lr->lr_offset = off; - lr->lr_length = partlen; - lr->lr_blksz = blksz; - lr->lr_nbps = partnbps; - memcpy(lr->lr_bps, bps, sizeof (bps[0]) * partnbps); - - itx->itx_sync = (zp->z_sync_cnt != 0); - - zil_itx_assign(zilog, itx, tx); - - bps += partnbps; - ASSERT3U(nbps, >=, partnbps); - nbps -= partnbps; - off += partlen; - ASSERT3U(len, >=, partlen); - len -= partlen; - } -} - ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, S64, ZMOD_RW, "Largest data block to write to zil"); diff --git a/sys/contrib/openzfs/module/zfs/zfs_replay.c b/sys/contrib/openzfs/module/zfs/zfs_replay.c index 04dfda56b3f1..32be27a8ba6e 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_replay.c +++ b/sys/contrib/openzfs/module/zfs/zfs_replay.c @@ -22,7 +22,6 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Cyril Plisko. All rights reserved. * Copyright (c) 2013, 2017 by Delphix. All rights reserved. - * Copyright (c) 2021, 2022 by Pawel Jakub Dawidek */ #include @@ -1163,34 +1162,6 @@ zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap) return (error); } -static int -zfs_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap) -{ - zfsvfs_t *zfsvfs = arg1; - lr_clone_range_t *lr = arg2; - znode_t *zp; - int error; - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { - /* - * Clones can be logged out of order, so don't be surprised if - * the file is gone - just return success. - */ - if (error == ENOENT) - error = 0; - return (error); - } - - error = zfs_clone_range_replay(zp, lr->lr_offset, lr->lr_length, - lr->lr_blksz, lr->lr_bps, lr->lr_nbps); - - zrele(zp); - return (error); -} - /* * Callback vectors for replaying records */ @@ -1219,5 +1190,4 @@ zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE] = { zfs_replay_setsaxattr, /* TX_SETSAXATTR */ zfs_replay_rename_exchange, /* TX_RENAME_EXCHANGE */ zfs_replay_rename_whiteout, /* TX_RENAME_WHITEOUT */ - zfs_replay_clone_range, /* TX_CLONE_RANGE */ }; diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c index 91b594e41cda..d7cdfe129e7e 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c +++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -997,467 +997,6 @@ zfs_get_done(zgd_t *zgd, int error) kmem_free(zgd, sizeof (zgd_t)); } -static int -zfs_enter_two(zfsvfs_t *zfsvfs1, zfsvfs_t *zfsvfs2, const char *tag) -{ - int error; - - /* Swap. Not sure if the order of zfs_enter()s is important. */ - if (zfsvfs1 > zfsvfs2) { - zfsvfs_t *tmpzfsvfs; - - tmpzfsvfs = zfsvfs2; - zfsvfs2 = zfsvfs1; - zfsvfs1 = tmpzfsvfs; - } - - error = zfs_enter(zfsvfs1, tag); - if (error != 0) - return (error); - if (zfsvfs1 != zfsvfs2) { - error = zfs_enter(zfsvfs2, tag); - if (error != 0) { - zfs_exit(zfsvfs1, tag); - return (error); - } - } - - return (0); -} - -static void -zfs_exit_two(zfsvfs_t *zfsvfs1, zfsvfs_t *zfsvfs2, const char *tag) -{ - - zfs_exit(zfsvfs1, tag); - if (zfsvfs1 != zfsvfs2) - zfs_exit(zfsvfs2, tag); -} - -/* - * We split each clone request in chunks that can fit into a single ZIL - * log entry. Each ZIL log entry can fit 130816 bytes for a block cloning - * operation (see zil_max_log_data() and zfs_log_clone_range()). This gives - * us room for storing 1022 block pointers. - * - * On success, the function return the number of bytes copied in *lenp. - * Note, it doesn't return how much bytes are left to be copied. - */ -int -zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, - uint64_t *outoffp, uint64_t *lenp, cred_t *cr) -{ - zfsvfs_t *inzfsvfs, *outzfsvfs; - objset_t *inos, *outos; - zfs_locked_range_t *inlr, *outlr; - dmu_buf_impl_t *db; - dmu_tx_t *tx; - zilog_t *zilog; - uint64_t inoff, outoff, len, done; - uint64_t outsize, size; - int error; - int count = 0; - sa_bulk_attr_t bulk[3]; - uint64_t mtime[2], ctime[2]; - uint64_t uid, gid, projid; - blkptr_t *bps; - size_t maxblocks, nbps; - uint_t inblksz; - uint64_t clear_setid_bits_txg = 0; - - inoff = *inoffp; - outoff = *outoffp; - len = *lenp; - done = 0; - - inzfsvfs = ZTOZSB(inzp); - outzfsvfs = ZTOZSB(outzp); - inos = inzfsvfs->z_os; - outos = outzfsvfs->z_os; - - /* - * Both source and destination have to belong to the same storage pool. - */ - if (dmu_objset_spa(inos) != dmu_objset_spa(outos)) { - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - return (SET_ERROR(EXDEV)); - } - - /* - * We need to call zfs_enter() potentially on two different datasets, - * so we need a dedicated function for that. - */ - error = zfs_enter_two(inzfsvfs, outzfsvfs, FTAG); - if (error != 0) - return (error); - - ASSERT(!outzfsvfs->z_replay); - - error = zfs_verify_zp(inzp); - if (error == 0) - error = zfs_verify_zp(outzp); - if (error != 0) { - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - return (error); - } - - if (!spa_feature_is_enabled(dmu_objset_spa(outos), - SPA_FEATURE_BLOCK_CLONING)) { - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - return (SET_ERROR(EXDEV)); - } - - /* - * We don't copy source file's flags that's why we don't allow to clone - * files that are in quarantine. - */ - if (inzp->z_pflags & ZFS_AV_QUARANTINED) { - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - return (SET_ERROR(EACCES)); - } - - if (inoff >= inzp->z_size) { - *lenp = 0; - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - return (0); - } - if (len > inzp->z_size - inoff) { - len = inzp->z_size - inoff; - } - if (len == 0) { - *lenp = 0; - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - return (0); - } - - /* - * Callers might not be able to detect properly that we are read-only, - * so check it explicitly here. - */ - if (zfs_is_readonly(outzfsvfs)) { - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - return (SET_ERROR(EROFS)); - } - - /* - * If immutable or not appending then return EPERM. - * Intentionally allow ZFS_READONLY through here. - * See zfs_zaccess_common() - */ - if ((outzp->z_pflags & ZFS_IMMUTABLE) != 0) { - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - return (SET_ERROR(EPERM)); - } - - /* - * No overlapping if we are cloning within the same file. - */ - if (inzp == outzp) { - if (inoff < outoff + len && outoff < inoff + len) { - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - return (SET_ERROR(EINVAL)); - } - } - - /* - * Maintain predictable lock order. - */ - if (inzp < outzp || (inzp == outzp && inoff < outoff)) { - inlr = zfs_rangelock_enter(&inzp->z_rangelock, inoff, len, - RL_READER); - outlr = zfs_rangelock_enter(&outzp->z_rangelock, outoff, len, - RL_WRITER); - } else { - outlr = zfs_rangelock_enter(&outzp->z_rangelock, outoff, len, - RL_WRITER); - inlr = zfs_rangelock_enter(&inzp->z_rangelock, inoff, len, - RL_READER); - } - - inblksz = inzp->z_blksz; - - /* - * We cannot clone into files with different block size. - */ - if (inblksz != outzp->z_blksz && outzp->z_size > inblksz) { - error = SET_ERROR(EXDEV); - goto unlock; - } - - /* - * Offsets and len must be at block boundries. - */ - if ((inoff % inblksz) != 0 || (outoff % inblksz) != 0) { - error = SET_ERROR(EXDEV); - goto unlock; - } - /* - * Length must be multipe of blksz, except for the end of the file. - */ - if ((len % inblksz) != 0 && - (len < inzp->z_size - inoff || len < outzp->z_size - outoff)) { - error = SET_ERROR(EXDEV); - goto unlock; - } - - error = zn_rlimit_fsize(outoff + len); - if (error != 0) { - goto unlock; - } - - if (inoff >= MAXOFFSET_T || outoff >= MAXOFFSET_T) { - error = SET_ERROR(EFBIG); - goto unlock; - } - - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(outzfsvfs), NULL, - &mtime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(outzfsvfs), NULL, - &ctime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(outzfsvfs), NULL, - &outzp->z_size, 8); - - zilog = outzfsvfs->z_log; - maxblocks = zil_max_log_data(zilog, sizeof (lr_clone_range_t)) / - sizeof (bps[0]); - - uid = KUID_TO_SUID(ZTOUID(outzp)); - gid = KGID_TO_SGID(ZTOGID(outzp)); - projid = outzp->z_projid; - - bps = kmem_alloc(sizeof (bps[0]) * maxblocks, KM_SLEEP); - - /* - * Clone the file in reasonable size chunks. Each chunk is cloned - * in a separate transaction; this keeps the intent log records small - * and allows us to do more fine-grained space accounting. - */ - while (len > 0) { - size = MIN(inblksz * maxblocks, len); - - if (zfs_id_overblockquota(outzfsvfs, DMU_USERUSED_OBJECT, - uid) || - zfs_id_overblockquota(outzfsvfs, DMU_GROUPUSED_OBJECT, - gid) || - (projid != ZFS_DEFAULT_PROJID && - zfs_id_overblockquota(outzfsvfs, DMU_PROJECTUSED_OBJECT, - projid))) { - error = SET_ERROR(EDQUOT); - break; - } - - /* - * Start a transaction. - */ - tx = dmu_tx_create(outos); - - nbps = maxblocks; - error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, tx, bps, - &nbps); - if (error != 0) { - dmu_tx_abort(tx); - /* - * If we are tyring to clone a block that was created - * in the current transaction group. Return an error, - * so the caller can fallback to just copying the data. - */ - if (error == EAGAIN) { - error = SET_ERROR(EXDEV); - } - break; - } - /* - * Encrypted data is fine as long as it comes from the same - * dataset. - * TODO: We want to extend it in the future to allow cloning to - * datasets with the same keys, like clones or to be able to - * clone a file from a snapshot of an encrypted dataset into the - * dataset itself. - */ - if (BP_IS_PROTECTED(&bps[0])) { - if (inzfsvfs != outzfsvfs) { - dmu_tx_abort(tx); - error = SET_ERROR(EXDEV); - break; - } - } - - dmu_tx_hold_sa(tx, outzp->z_sa_hdl, B_FALSE); - db = (dmu_buf_impl_t *)sa_get_db(outzp->z_sa_hdl); - DB_DNODE_ENTER(db); - dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), outoff, size); - DB_DNODE_EXIT(db); - zfs_sa_upgrade_txholds(tx, outzp); - error = dmu_tx_assign(tx, TXG_WAIT); - if (error != 0) { - dmu_tx_abort(tx); - break; - } - - /* - * Copy source znode's block size. This only happens on the - * first iteration since zfs_rangelock_reduce() will shrink down - * lr_len to the appropriate size. - */ - if (outlr->lr_length == UINT64_MAX) { - zfs_grow_blocksize(outzp, inblksz, tx); - /* - * Round range lock up to the block boundary, so we - * prevent appends until we are done. - */ - zfs_rangelock_reduce(outlr, outoff, - ((len - 1) / inblksz + 1) * inblksz); - } - - dmu_brt_clone(outos, outzp->z_id, outoff, size, tx, bps, nbps, - B_FALSE); - - zfs_clear_setid_bits_if_necessary(outzfsvfs, outzp, cr, - &clear_setid_bits_txg, tx); - - zfs_tstamp_update_setup(outzp, CONTENT_MODIFIED, mtime, ctime); - - /* - * Update the file size (zp_size) if it has changed; - * account for possible concurrent updates. - */ - while ((outsize = outzp->z_size) < outoff + size) { - (void) atomic_cas_64(&outzp->z_size, outsize, - outoff + size); - } - - error = sa_bulk_update(outzp->z_sa_hdl, bulk, count, tx); - - zfs_log_clone_range(zilog, tx, TX_CLONE_RANGE, outzp, outoff, - size, inblksz, bps, nbps); - - dmu_tx_commit(tx); - - if (error != 0) - break; - - inoff += size; - outoff += size; - len -= size; - done += size; - } - - kmem_free(bps, sizeof (bps[0]) * maxblocks); - zfs_znode_update_vfs(outzp); - -unlock: - zfs_rangelock_exit(outlr); - zfs_rangelock_exit(inlr); - - if (done > 0) { - /* - * If we have made at least partial progress, reset the error. - */ - error = 0; - - ZFS_ACCESSTIME_STAMP(inzfsvfs, inzp); - - if (outos->os_sync == ZFS_SYNC_ALWAYS) { - zil_commit(zilog, outzp->z_id); - } - - *inoffp += done; - *outoffp += done; - *lenp = done; - } - - zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); - - return (error); -} - -/* - * Usual pattern would be to call zfs_clone_range() from zfs_replay_clone(), - * but we cannot do that, because when replaying we don't have source znode - * available. This is why we need a dedicated replay function. - */ -int -zfs_clone_range_replay(znode_t *zp, uint64_t off, uint64_t len, uint64_t blksz, - const blkptr_t *bps, size_t nbps) -{ - zfsvfs_t *zfsvfs; - dmu_buf_impl_t *db; - dmu_tx_t *tx; - int error; - int count = 0; - sa_bulk_attr_t bulk[3]; - uint64_t mtime[2], ctime[2]; - - ASSERT3U(off, <, MAXOFFSET_T); - ASSERT3U(len, >, 0); - ASSERT3U(nbps, >, 0); - - zfsvfs = ZTOZSB(zp); - - ASSERT(spa_feature_is_enabled(dmu_objset_spa(zfsvfs->z_os), - SPA_FEATURE_BLOCK_CLONING)); - - if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) - return (error); - - ASSERT(zfsvfs->z_replay); - ASSERT(!zfs_is_readonly(zfsvfs)); - - if ((off % blksz) != 0) { - zfs_exit(zfsvfs, FTAG); - return (SET_ERROR(EINVAL)); - } - - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, - &zp->z_size, 8); - - /* - * Start a transaction. - */ - tx = dmu_tx_create(zfsvfs->z_os); - - dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); - db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); - DB_DNODE_ENTER(db); - dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), off, len); - DB_DNODE_EXIT(db); - zfs_sa_upgrade_txholds(tx, zp); - error = dmu_tx_assign(tx, TXG_WAIT); - if (error != 0) { - dmu_tx_abort(tx); - zfs_exit(zfsvfs, FTAG); - return (error); - } - - if (zp->z_blksz < blksz) - zfs_grow_blocksize(zp, blksz, tx); - - dmu_brt_clone(zfsvfs->z_os, zp->z_id, off, len, tx, bps, nbps, B_TRUE); - - zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); - - if (zp->z_size < off + len) - zp->z_size = off + len; - - error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - - /* - * zil_replaying() not only check if we are replaying ZIL, but also - * updates the ZIL header to record replay progress. - */ - VERIFY(zil_replaying(zfsvfs->z_log, tx)); - - dmu_tx_commit(tx); - - zfs_znode_update_vfs(zp); - - zfs_exit(zfsvfs, FTAG); - - return (error); -} - EXPORT_SYMBOL(zfs_access); EXPORT_SYMBOL(zfs_fsync); EXPORT_SYMBOL(zfs_holey); @@ -1465,8 +1004,6 @@ EXPORT_SYMBOL(zfs_read); EXPORT_SYMBOL(zfs_write); EXPORT_SYMBOL(zfs_getsecattr); EXPORT_SYMBOL(zfs_setsecattr); -EXPORT_SYMBOL(zfs_clone_range); -EXPORT_SYMBOL(zfs_clone_range_replay); ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW, "Bytes to read per chunk"); diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c index eb26e4b32998..ca578b3110e3 100644 --- a/sys/contrib/openzfs/module/zfs/zil.c +++ b/sys/contrib/openzfs/module/zfs/zil.c @@ -43,7 +43,6 @@ #include #include #include -#include #include /* @@ -579,12 +578,14 @@ zil_claim_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx, } static int -zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg) +zil_claim_log_record(zilog_t *zilog, const lr_t *lrc, void *tx, + uint64_t first_txg) { lr_write_t *lr = (lr_write_t *)lrc; int error; - ASSERT(lrc->lrc_txtype == TX_WRITE); + if (lrc->lrc_txtype != TX_WRITE) + return (0); /* * If the block is not readable, don't claim it. This can happen @@ -603,57 +604,6 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg) return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg)); } -static int -zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx) -{ - const lr_clone_range_t *lr = (const lr_clone_range_t *)lrc; - const blkptr_t *bp; - spa_t *spa; - uint_t ii; - - ASSERT(lrc->lrc_txtype == TX_CLONE_RANGE); - - if (tx == NULL) { - return (0); - } - - /* - * XXX: Do we need to byteswap lr? - */ - - spa = zilog->zl_spa; - - for (ii = 0; ii < lr->lr_nbps; ii++) { - bp = &lr->lr_bps[ii]; - - /* - * When data in embedded into BP there is no need to create - * BRT entry as there is no data block. Just copy the BP as - * it contains the data. - */ - if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) { - brt_pending_add(spa, bp, tx); - } - } - - return (0); -} - -static int -zil_claim_log_record(zilog_t *zilog, const lr_t *lrc, void *tx, - uint64_t first_txg) -{ - - switch (lrc->lrc_txtype) { - case TX_WRITE: - return (zil_claim_write(zilog, lrc, tx, first_txg)); - case TX_CLONE_RANGE: - return (zil_claim_clone_range(zilog, lrc, tx)); - default: - return (0); - } -} - static int zil_free_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx, uint64_t claim_txg) @@ -666,70 +616,23 @@ zil_free_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx, } static int -zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg) +zil_free_log_record(zilog_t *zilog, const lr_t *lrc, void *tx, + uint64_t claim_txg) { lr_write_t *lr = (lr_write_t *)lrc; blkptr_t *bp = &lr->lr_blkptr; - ASSERT(lrc->lrc_txtype == TX_WRITE); - /* * If we previously claimed it, we need to free it. */ - if (bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0 && - !BP_IS_HOLE(bp)) { + if (claim_txg != 0 && lrc->lrc_txtype == TX_WRITE && + bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0 && + !BP_IS_HOLE(bp)) zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); - } return (0); } -static int -zil_free_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx) -{ - const lr_clone_range_t *lr = (const lr_clone_range_t *)lrc; - const blkptr_t *bp; - spa_t *spa; - uint_t ii; - - ASSERT(lrc->lrc_txtype == TX_CLONE_RANGE); - - if (tx == NULL) { - return (0); - } - - spa = zilog->zl_spa; - - for (ii = 0; ii < lr->lr_nbps; ii++) { - bp = &lr->lr_bps[ii]; - - if (!BP_IS_HOLE(bp)) { - zio_free(spa, dmu_tx_get_txg(tx), bp); - } - } - - return (0); -} - -static int -zil_free_log_record(zilog_t *zilog, const lr_t *lrc, void *tx, - uint64_t claim_txg) -{ - - if (claim_txg == 0) { - return (0); - } - - switch (lrc->lrc_txtype) { - case TX_WRITE: - return (zil_free_write(zilog, lrc, tx, claim_txg)); - case TX_CLONE_RANGE: - return (zil_free_clone_range(zilog, lrc, tx)); - default: - return (0); - } -} - static int zil_lwb_vdev_compare(const void *x1, const void *x2) { @@ -1899,12 +1802,13 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) } /* - * Maximum amount of data that can be put into single log block. + * Maximum amount of write data that can be put into single log block. */ uint64_t -zil_max_log_data(zilog_t *zilog, size_t hdrsize) +zil_max_log_data(zilog_t *zilog) { - return (zilog->zl_max_block_size - sizeof (zil_chain_t) - hdrsize); + return (zilog->zl_max_block_size - + sizeof (zil_chain_t) - sizeof (lr_write_t)); } /* @@ -1914,7 +1818,7 @@ zil_max_log_data(zilog_t *zilog, size_t hdrsize) static inline uint64_t zil_max_waste_space(zilog_t *zilog) { - return (zil_max_log_data(zilog, sizeof (lr_write_t)) / 8); + return (zil_max_log_data(zilog) / 8); } /* @@ -1987,7 +1891,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) * For WR_NEED_COPY optimize layout for minimal number of chunks. */ lwb_sp = lwb->lwb_sz - lwb->lwb_nused; - max_log_data = zil_max_log_data(zilog, sizeof (lr_write_t)); + max_log_data = zil_max_log_data(zilog); if (reclen > lwb_sp || (reclen + dlen > lwb_sp && lwb_sp < zil_max_waste_space(zilog) && (dlen % max_log_data == 0 || diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 0924fb6f40bc..6103634a0737 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -1178,14 +1178,12 @@ zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data, } void -zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite, - boolean_t brtwrite) +zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite) { ASSERT(zio->io_type == ZIO_TYPE_WRITE); ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); ASSERT(zio->io_stage == ZIO_STAGE_OPEN); ASSERT(zio->io_txg == spa_syncing_txg(zio->io_spa)); - ASSERT(!brtwrite || !nopwrite); /* * We must reset the io_prop to match the values that existed @@ -1194,7 +1192,6 @@ zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite, */ zio->io_prop.zp_dedup = nopwrite ? B_FALSE : zio->io_prop.zp_dedup; zio->io_prop.zp_nopwrite = nopwrite; - zio->io_prop.zp_brtwrite = brtwrite; zio->io_prop.zp_copies = copies; zio->io_bp_override = bp; } @@ -1602,15 +1599,11 @@ zio_write_bp_init(zio_t *zio) zio_prop_t *zp = &zio->io_prop; ASSERT(bp->blk_birth != zio->io_txg); + ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0); *bp = *zio->io_bp_override; zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; - if (zp->zp_brtwrite) - return (zio); - - ASSERT(!BP_GET_DEDUP(zio->io_bp_override)); - if (BP_IS_EMBEDDED(bp)) return (zio); diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index 06bc75c634a6..1511f763fd77 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -482,60 +482,6 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap) return (error); } -/* - * Replay a TX_CLONE_RANGE ZIL transaction that didn't get committed - * after a system failure. - * - * TODO: For now we drop block cloning transations for ZVOLs as they are - * unsupported, but we still need to inform BRT about that as we - * claimed them during pool import. - * This situation can occur when we try to import a pool from a ZFS - * version supporting block cloning for ZVOLs into a system that - * has this ZFS version, that doesn't support block cloning for ZVOLs. - */ -static int -zvol_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap) -{ - char name[ZFS_MAX_DATASET_NAME_LEN]; - zvol_state_t *zv = arg1; - objset_t *os = zv->zv_objset; - lr_clone_range_t *lr = arg2; - blkptr_t *bp; - dmu_tx_t *tx; - spa_t *spa; - uint_t ii; - int error; - - dmu_objset_name(os, name); - cmn_err(CE_WARN, "ZFS dropping block cloning transaction for %s.", - name); - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - tx = dmu_tx_create(os); - error = dmu_tx_assign(tx, TXG_WAIT); - if (error) { - dmu_tx_abort(tx); - return (error); - } - - spa = os->os_spa; - - for (ii = 0; ii < lr->lr_nbps; ii++) { - bp = &lr->lr_bps[ii]; - - if (!BP_IS_HOLE(bp)) { - zio_free(spa, dmu_tx_get_txg(tx), bp); - } - } - - (void) zil_replaying(zv->zv_zilog, tx); - dmu_tx_commit(tx); - - return (0); -} - static int zvol_replay_err(void *arg1, void *arg2, boolean_t byteswap) { @@ -570,7 +516,6 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* TX_SETSAXATTR */ zvol_replay_err, /* TX_RENAME_EXCHANGE */ zvol_replay_err, /* TX_RENAME_WHITEOUT */ - zvol_replay_clone_range /* TX_CLONE_RANGE */ }; /*