FreeBSD ZFS
The Zettabyte File System
|
#include <sys/dmu.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_tx.h>
#include <sys/dbuf.h>
#include <sys/dnode.h>
#include <sys/zfs_context.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_traverse.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_synctask.h>
#include <sys/dsl_prop.h>
#include <sys/dmu_zfetch.h>
#include <sys/zfs_ioctl.h>
#include <sys/zap.h>
#include <sys/zio_checksum.h>
#include <sys/sa.h>
#include <sys/zfs_znode.h>
Go to the source code of this file.
Data Structures | |
struct | dmu_sync_arg_t |
Functions | |
int | dmu_buf_hold (objset_t *os, uint64_t object, uint64_t offset, void *tag, dmu_buf_t **dbp, int flags) |
Obtain the DMU buffer from the specified object which contains the specified offset. | |
int | dmu_bonus_max (void) |
int | dmu_set_bonus (dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx) |
int | dmu_set_bonustype (dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx) |
dmu_object_type_t | dmu_get_bonustype (dmu_buf_t *db_fake) |
int | dmu_rm_spill (objset_t *os, uint64_t object, dmu_tx_t *tx) |
int | dmu_bonus_hold (objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) |
The bonus data is accessed more or less like a regular buffer. | |
int | dmu_spill_hold_by_dnode (dnode_t *dn, uint32_t flags, void *tag, dmu_buf_t **dbp) |
This interface will allocate a blank spill dbuf when a spill blk doesn't already exist on the dnode. | |
int | dmu_spill_hold_existing (dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) |
int | dmu_spill_hold_by_bonus (dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) |
static int | dmu_buf_hold_array_by_dnode (dnode_t *dn, uint64_t offset, uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) |
static int | dmu_buf_hold_array (objset_t *os, uint64_t object, uint64_t offset, uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) |
int | dmu_buf_hold_array_by_bonus (dmu_buf_t *db_fake, uint64_t offset, uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) |
Holds the DMU buffers which contain all bytes in a range of an object. | |
void | dmu_buf_rele_array (dmu_buf_t **dbp_fake, int numbufs, void *tag) |
Releases the hold on an array of dmu_buf_t*'s, and frees the array. | |
void | dmu_prefetch (objset_t *os, uint64_t object, uint64_t offset, uint64_t len) |
Asynchronously try to read in the data. | |
static int | get_next_chunk (dnode_t *dn, uint64_t *start, uint64_t limit) |
Get the next "chunk" of file data to free. | |
static int | dmu_free_long_range_impl (objset_t *os, dnode_t *dn, uint64_t offset, uint64_t length, boolean_t free_dnode) |
int | dmu_free_long_range (objset_t *os, uint64_t object, uint64_t offset, uint64_t length) |
int | dmu_free_object (objset_t *os, uint64_t object) |
int | dmu_free_range (objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx) |
Free up the data blocks for a defined range of a file. | |
int | dmu_read (objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void *buf, uint32_t flags) |
void | dmu_write (objset_t *os, uint64_t object, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx) |
void | dmu_prealloc (objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx) |
int | dmu_xuio_init (xuio_t *xuio, int nblk) |
void | dmu_xuio_fini (xuio_t *xuio) |
int | dmu_xuio_add (xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n) |
Initialize iov[priv->next] and priv->bufs[priv->next] with { off, n, abuf } and increase priv->next by 1. | |
int | dmu_xuio_cnt (xuio_t *xuio) |
arc_buf_t * | dmu_xuio_arcbuf (xuio_t *xuio, int i) |
void | dmu_xuio_clear (xuio_t *xuio, int i) |
static void | xuio_stat_init (void) |
static void | xuio_stat_fini (void) |
void | xuio_stat_wbuf_copied () |
void | xuio_stat_wbuf_nocopy () |
int | dmu_read_uio (objset_t *os, uint64_t object, uio_t *uio, uint64_t size) |
static int | dmu_write_uio_dnode (dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) |
int | dmu_write_uio_dbuf (dmu_buf_t *zdb, uio_t *uio, uint64_t size, dmu_tx_t *tx) |
int | dmu_write_uio (objset_t *os, uint64_t object, uio_t *uio, uint64_t size, dmu_tx_t *tx) |
arc_buf_t * | dmu_request_arcbuf (dmu_buf_t *handle, int size) |
Allocate a loaned anonymous arc buffer. | |
void | dmu_return_arcbuf (arc_buf_t *buf) |
Free a loaned arc buffer. | |
void | dmu_assign_arcbuf (dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, dmu_tx_t *tx) |
When possible directly assign passed loaned arc buffer to a dbuf. | |
static void | dmu_sync_ready (zio_t *zio, arc_buf_t *buf, void *varg) |
static void | dmu_sync_late_arrival_ready (zio_t *zio) |
static void | dmu_sync_done (zio_t *zio, arc_buf_t *buf, void *varg) |
static void | dmu_sync_late_arrival_done (zio_t *zio) |
static int | dmu_sync_late_arrival (zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, zio_prop_t *zp, zbookmark_t *zb) |
int | dmu_sync (zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) |
Intent log support: sync the block associated with db to disk. | |
int | dmu_object_set_blocksize (objset_t *os, uint64_t object, uint64_t size, int ibs, dmu_tx_t *tx) |
Set the data blocksize for an object. | |
void | dmu_object_set_checksum (objset_t *os, uint64_t object, uint8_t checksum, dmu_tx_t *tx) |
Set the checksum property on a dnode. | |
void | dmu_object_set_compress (objset_t *os, uint64_t object, uint8_t compress, dmu_tx_t *tx) |
Set the compress property on a dnode. | |
TUNABLE_INT ("vfs.zfs.mdcomp_disable",&zfs_mdcomp_disable) | |
SYSCTL_DECL (_vfs_zfs) | |
SYSCTL_INT (_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RW,&zfs_mdcomp_disable, 0,"Disable metadata compression") | |
void | dmu_write_policy (objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) |
Decide how to write a block. | |
int | dmu_offset_next (objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) |
Find the next hole or data block in file starting at *off. | |
void | dmu_object_info_from_dnode (dnode_t *dn, dmu_object_info_t *doi) |
Get information on a DMU object. | |
int | dmu_object_info (objset_t *os, uint64_t object, dmu_object_info_t *doi) |
Get information on a DMU object. | |
void | dmu_object_info_from_db (dmu_buf_t *db_fake, dmu_object_info_t *doi) |
Like dmu_object_info, but faster. | |
void | dmu_object_size_from_db (dmu_buf_t *db_fake, uint32_t *blksize, u_longlong_t *nblk512) |
Like dmu_object_info_from_db, but faster still. | |
void | byteswap_uint64_array (void *vbuf, size_t size) |
void | byteswap_uint32_array (void *vbuf, size_t size) |
void | byteswap_uint16_array (void *vbuf, size_t size) |
void | byteswap_uint8_array (void *vbuf, size_t size) |
void | dmu_init (void) |
Initial setup. | |
void | dmu_fini (void) |
Final teardown. | |
Variables | |
const dmu_object_type_info_t | dmu_ot [DMU_OT_NUMTYPES] |
const dmu_object_byteswap_info_t | dmu_ot_byteswap [DMU_BSWAP_NUMFUNCS] |
kstat_t * | xuio_ksp = NULL |
DMU support for xuio. | |
int | zfs_mdcomp_disable = 0 |
When possible directly assign passed loaned arc buffer to a dbuf.
If this is not possible copy the contents of passed arc buf via dmu_write().
The bonus data is accessed more or less like a regular buffer.
You must dmu_bonus_hold() to get the buffer, which will give you a dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus data. As with any normal buffer, you must call dmu_buf_read() to read db_data, dmu_buf_will_dirty() before modifying it, and the object must be held in an assigned transaction before calling dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus buffer as well. You must release your hold with dmu_buf_rele().
int dmu_buf_hold | ( | objset_t * | os, |
uint64_t | object, | ||
uint64_t | offset, | ||
void * | tag, | ||
dmu_buf_t ** | , | ||
int | flags | ||
) |
Obtain the DMU buffer from the specified object which contains the specified offset.
dmu_buf_hold() puts a "hold" on the buffer, so that it will remain in memory. You must release the hold with dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your hold. You must have a hold on any dmu_buf_t* you pass to the DMU.
You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill on the returned buffer before reading or writing the buffer's db_data. The comments for those routines describe what particular operations are valid after calling them.
The object number must be a valid, allocated object number.
void dmu_buf_rele_array | ( | dmu_buf_t ** | , |
int | numbufs, | ||
void * | tag | ||
) |
int dmu_free_long_range | ( | objset_t * | os, |
uint64_t | object, | ||
uint64_t | offset, | ||
uint64_t | length | ||
) |
dmu_object_type_t dmu_get_bonustype | ( | dmu_buf_t * | db_fake | ) |
int dmu_object_info | ( | objset_t * | os, |
uint64_t | object, | ||
dmu_object_info_t * | doi | ||
) |
void dmu_object_info_from_db | ( | dmu_buf_t * | db, |
dmu_object_info_t * | doi | ||
) |
Like dmu_object_info, but faster.
Can be used when you have a held dbuf in hand.
void dmu_object_info_from_dnode | ( | struct dnode * | dn, |
dmu_object_info_t * | doi | ||
) |
int dmu_object_set_blocksize | ( | objset_t * | os, |
uint64_t | object, | ||
uint64_t | size, | ||
int | ibs, | ||
dmu_tx_t * | tx | ||
) |
Set the data blocksize for an object.
The object cannot have any blocks allcated beyond the first. If the first block is allocated already, the new size must be greater than the current block size. If these conditions are not met, ENOTSUP will be returned.
0 | Success |
EBUSY | There are holds on the object contents |
ENOTSUP | as described above |
void dmu_object_size_from_db | ( | dmu_buf_t * | db, |
uint32_t * | blksize, | ||
u_longlong_t * | nblk512 | ||
) |
Like dmu_object_info_from_db, but faster still.
Faster still when you only care about the size. This is specifically optimized for zfs_getattr().
int dmu_offset_next | ( | objset_t * | os, |
uint64_t | object, | ||
boolean_t | hole, | ||
uint64_t * | off | ||
) |
void dmu_prefetch | ( | objset_t * | os, |
uint64_t | object, | ||
uint64_t | offset, | ||
uint64_t | len | ||
) |
int dmu_read | ( | objset_t * | os, |
uint64_t | object, | ||
uint64_t | offset, | ||
uint64_t | size, | ||
void * | buf, | ||
uint32_t | flags | ||
) |
int dmu_read_uio | ( | objset_t * | os, |
uint64_t | object, | ||
uio_t * | uio, | ||
uint64_t | size | ||
) |
void dmu_return_arcbuf | ( | arc_buf_t * | buf | ) |
int dmu_set_bonustype | ( | dmu_buf_t * | db_fake, |
dmu_object_type_t | type, | ||
dmu_tx_t * | tx | ||
) |
This interface will allocate a blank spill dbuf when a spill blk doesn't already exist on the dnode.
if you only want to find an already existing spill db, then dmu_spill_hold_existing() should be used.
int dmu_sync | ( | struct zio * | zio, |
uint64_t | txg, | ||
dmu_sync_cb_t * | done, | ||
zgd_t * | zgd | ||
) |
Intent log support: sync the block associated with db to disk.
If a parent zio is provided this function initiates a write on the provided buffer as a child of the parent zio. In the absence of a parent zio, the write is completed synchronously. At write completion, blk is filled with the bp of the written block. Note that while the data covered by this function will be on stable storage when the write completes this new data does not become a permanent part of the file until the associated transaction commits.
EEXIST | This txg has already been synced, so there's nothing to to. The caller should not log the write. |
ENOENT | The block was dbuf_free_range()'d, so there's nothing to do. The caller should not log the write. |
EALREADY | This block is already in the process of being synced. The caller should track its progress (somehow). |
EIO | Could not do the I/O. The caller should do a txg_wait_synced(). |
0 | The I/O has been initiated. The caller should log this blkptr in the done callback. It is possible that the I/O will fail, in which case the error will be reported to the done callback and propagated to pio from zio_done(). |
static int dmu_sync_late_arrival | ( | zio_t * | pio, |
objset_t * | os, | ||
dmu_sync_cb_t * | done, | ||
zgd_t * | zgd, | ||
zio_prop_t * | zp, | ||
zbookmark_t * | zb | ||
) | [static] |
static void dmu_sync_late_arrival_done | ( | zio_t * | zio | ) | [static] |
static void dmu_sync_late_arrival_ready | ( | zio_t * | zio | ) | [static] |
int dmu_xuio_add | ( | xuio_t * | xuio, |
arc_buf_t * | abuf, | ||
offset_t | off, | ||
size_t | n | ||
) |
static int get_next_chunk | ( | dnode_t * | dn, |
uint64_t * | start, | ||
uint64_t | limit | ||
) | [static] |
Get the next "chunk" of file data to free.
We traverse the file from the end so that the file gets shorter over time (if we crashes in the middle, this will leave us in a better state). We find allocated file data by simply searching the allocated level 1 indirects.
SYSCTL_DECL | ( | _vfs_zfs | ) |
SYSCTL_INT | ( | _vfs_zfs | , |
OID_AUTO | , | ||
mdcomp_disable | , | ||
CTLFLAG_RW | , | ||
& | zfs_mdcomp_disable, | ||
0 | , | ||
"Disable metadata compression" | |||
) |
TUNABLE_INT | ( | "vfs.zfs.mdcomp_disable" | , |
& | zfs_mdcomp_disable | ||
) |
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] |
const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] |
{ { byteswap_uint8_array, "uint8" }, { byteswap_uint16_array, "uint16" }, { byteswap_uint32_array, "uint32" }, { byteswap_uint64_array, "uint64" }, { zap_byteswap, "zap" }, { dnode_buf_byteswap, "dnode" }, { dmu_objset_byteswap, "objset" }, { zfs_znode_byteswap, "znode" }, { zfs_oldacl_byteswap, "oldacl" }, { zfs_acl_byteswap, "acl" } }
int zfs_mdcomp_disable = 0 |