FreeBSD ZFS
The Zettabyte File System
Data Structures | Defines | Typedefs | Functions | Variables

zil.c File Reference

ZFS Intent Log. More...

#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/dmu.h>
#include <sys/zap.h>
#include <sys/arc.h>
#include <sys/stat.h>
#include <sys/resource.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
#include <sys/dsl_dataset.h>
#include <sys/vdev_impl.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_pool.h>
Include dependency graph for zil.c:

Go to the source code of this file.

Data Structures

struct  zil_replay_arg

Defines

#define LWB_EMPTY(lwb)
#define ZILTEST_TXG   (UINT64_MAX - TXG_CONCURRENT_STATES)
 ziltest is by and large an ugly hack, but very useful in checking replay without tedious work.
#define USE_SLOG(zilog)

Typedefs

typedef struct zil_replay_arg zil_replay_arg_t

Functions

 SYSCTL_DECL (_vfs_zfs)
 TUNABLE_INT ("vfs.zfs.zil_replay_disable",&zil_replay_disable)
 SYSCTL_INT (_vfs_zfs, OID_AUTO, zil_replay_disable, CTLFLAG_RW,&zil_replay_disable, 0,"Disable intent logging replay")
 TUNABLE_INT ("vfs.zfs.cache_flush_disable",&zfs_nocacheflush)
 SYSCTL_INT (_vfs_zfs, OID_AUTO, cache_flush_disable, CTLFLAG_RDTUN,&zfs_nocacheflush, 0,"Disable cache flush")
 TUNABLE_INT ("vfs.zfs.trim_disable",&zfs_notrim)
 SYSCTL_INT (_vfs_zfs, OID_AUTO, trim_disable, CTLFLAG_RDTUN,&zfs_notrim, 0,"Disable trim")
static void zil_async_to_sync (zilog_t *zilog, uint64_t foid)
 Move the async itxs for a specified object to commit into sync lists.
static int zil_bp_compare (const void *x1, const void *x2)
static void zil_bp_tree_init (zilog_t *zilog)
static void zil_bp_tree_fini (zilog_t *zilog)
int zil_bp_tree_add (zilog_t *zilog, const blkptr_t *bp)
static zil_header_tzil_header_in_syncing_context (zilog_t *zilog)
static void zil_init_log_chain (zilog_t *zilog, blkptr_t *bp)
static int zil_read_log_block (zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, char **end)
 Read a log block and make sure it's valid.
static int zil_read_log_data (zilog_t *zilog, const lr_write_t *lr, void *wbuf)
 Read a TX_WRITE log data block.
int zil_parse (zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg)
 Parse the intent log, and call parse_func for each valid record within.
static int zil_claim_log_block (zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg)
static int zil_claim_log_record (zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg)
static int zil_free_log_block (zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t claim_txg)
static int zil_free_log_record (zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg)
static lwb_tzil_alloc_lwb (zilog_t *zilog, blkptr_t *bp, uint64_t txg)
void zilog_dirty (zilog_t *zilog, uint64_t txg)
 Called when we create in-memory log transactions so that we know to cleanup the itxs at the end of spa_sync().
boolean_t zilog_is_dirty (zilog_t *zilog)
static lwb_tzil_create (zilog_t *zilog)
 Create an on-disk intent log.
void zil_destroy (zilog_t *zilog, boolean_t keep_first)
 In one tx, free all log blocks and clear the log header.
void zil_destroy_sync (zilog_t *zilog, dmu_tx_t *tx)
int zil_claim (const char *osname, void *txarg)
int zil_check_log_chain (const char *osname, void *tx)
 Check the log by walking the log chain.
static int zil_vdev_compare (const void *x1, const void *x2)
void zil_add_block (zilog_t *zilog, const blkptr_t *bp)
static void zil_flush_vdevs (zilog_t *zilog)
static void zil_lwb_write_done (zio_t *zio)
 Function called when a log block write completes.
static void zil_lwb_write_init (zilog_t *zilog, lwb_t *lwb)
 Initialize the io for a log block.
static lwb_tzil_lwb_write_start (zilog_t *zilog, lwb_t *lwb)
 Start a log block write and advance to the next log block.
static lwb_tzil_lwb_commit (zilog_t *zilog, itx_t *itx, lwb_t *lwb)
itx_tzil_itx_create (uint64_t txtype, size_t lrsize)
void zil_itx_destroy (itx_t *itx)
static void zil_itxg_clean (itxs_t *itxs)
 Free up the sync and async itxs.
static int zil_aitx_compare (const void *x1, const void *x2)
static void zil_remove_async (zilog_t *zilog, uint64_t oid)
 Remove all async itx with the given oid.
void zil_itx_assign (zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
void zil_clean (zilog_t *zilog, uint64_t synced_txg)
 If there are any in-memory intent log transactions which have now been synced then start up a taskq to free them.
static void zil_get_commit_list (zilog_t *zilog)
 Get the list of itxs to commit into zl_itx_commit_list.
static void zil_commit_writer (zilog_t *zilog)
void zil_commit (zilog_t *zilog, uint64_t foid)
 Commit zfs transactions to stable storage.
void zil_sync (zilog_t *zilog, dmu_tx_t *tx)
 Called in syncing context to free committed log blocks and update log header.
void zil_init (void)
void zil_fini (void)
void zil_set_sync (zilog_t *zilog, uint64_t sync)
void zil_set_logbias (zilog_t *zilog, uint64_t logbias)
zilog_tzil_alloc (objset_t *os, zil_header_t *zh_phys)
void zil_free (zilog_t *zilog)
zilog_tzil_open (objset_t *os, zil_get_data_t *get_data)
 Open an intent log.
void zil_close (zilog_t *zilog)
 Close an intent log.
int zil_suspend (zilog_t *zilog)
 Suspend an intent log.
void zil_resume (zilog_t *zilog)
static int zil_replay_error (zilog_t *zilog, lr_t *lr, int error)
static int zil_replay_log_record (zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
static int zil_incr_blks (zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
void zil_replay (objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
 If this dataset has a non-empty intent log, replay it and destroy it.
boolean_t zil_replaying (zilog_t *zilog, dmu_tx_t *tx)
int zil_vdev_offline (const char *osname, void *arg)

Variables

int zil_replay_disable = 0
 Disable intent logging replay.
boolean_t zfs_nocacheflush = B_FALSE
 Tunable parameter for debugging or performance analysis.
boolean_t zfs_notrim = B_TRUE
static kmem_cache_t * zil_lwb_cache
uint64_t zil_block_buckets []
 Define a limited set of intent log block sizes.
uint64_t zil_slog_limit = 1024 * 1024
 Use the slog as long as the logbias is 'latency' and the current commit size is less than the limit or the total list size is less than 2X the limit.

Detailed Description

ZFS Intent Log.

The zfs intent log (ZIL) saves transaction records of system calls that change the file system in memory with enough information to be able to replay them. These are stored in memory until either the DMU transaction group (txg) commits them to the stable pool and they can be discarded, or they are flushed to the stable log (also in the pool) due to a fsync, O_DSYNC or other synchronous requirement. In the event of a panic or power fail then those log records (transactions) are replayed.

There is one ZIL per file system. Its on-disk (pool) format consists of 3 parts:

A log record holds a system call transaction. Log blocks can hold many log records and the blocks are chained together. Each ZIL block contains a block pointer (blkptr_t) to the next ZIL block in the chain. The ZIL header points to the first block in the chain. Note there is not a fixed place in the pool to hold blocks. They are dynamically allocated and freed as needed from the blocks available. Figure X shows the ZIL structure:

Definition in file zil.c.


Define Documentation

#define LWB_EMPTY (   lwb)
Value:
((BP_GET_LSIZE(&lwb->lwb_blk) - \
    sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused))

Definition at line 104 of file zil.c.

#define USE_SLOG (   zilog)
Value:
(((zilog)->zl_logbias == ZFS_LOGBIAS_LATENCY) && \
        (((zilog)->zl_cur_used < zil_slog_limit) || \
        ((zilog)->zl_itx_list_sz < (zil_slog_limit << 1))))

Definition at line 920 of file zil.c.

#define ZILTEST_TXG   (UINT64_MAX - TXG_CONCURRENT_STATES)

ziltest is by and large an ugly hack, but very useful in checking replay without tedious work.

When running ziltest we want to keep all itx's and so maintain a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG We subtract TXG_CONCURRENT_STATES to allow for common code.

Definition at line 115 of file zil.c.


Typedef Documentation


Function Documentation

SYSCTL_DECL ( _vfs_zfs  )
SYSCTL_INT ( _vfs_zfs  ,
OID_AUTO  ,
cache_flush_disable  ,
CTLFLAG_RDTUN  ,
zfs_nocacheflush,
,
"Disable cache flush"   
)
SYSCTL_INT ( _vfs_zfs  ,
OID_AUTO  ,
zil_replay_disable  ,
CTLFLAG_RW  ,
zil_replay_disable,
,
"Disable intent logging replay"   
)
SYSCTL_INT ( _vfs_zfs  ,
OID_AUTO  ,
trim_disable  ,
CTLFLAG_RDTUN  ,
zfs_notrim,
,
"Disable trim"   
)
TUNABLE_INT ( "vfs.zfs.cache_flush_disable"  ,
zfs_nocacheflush 
)
TUNABLE_INT ( "vfs.zfs.zil_replay_disable"  ,
zil_replay_disable 
)
TUNABLE_INT ( "vfs.zfs.trim_disable"  ,
zfs_notrim 
)
void zil_add_block ( zilog_t zilog,
const blkptr_t bp 
)

Definition at line 768 of file zil.c.

static int zil_aitx_compare ( const void *  x1,
const void *  x2 
) [static]

Definition at line 1192 of file zil.c.

zilog_t* zil_alloc ( objset_t os,
zil_header_t zh_phys 
)

Definition at line 1688 of file zil.c.

static lwb_t* zil_alloc_lwb ( zilog_t zilog,
blkptr_t bp,
uint64_t  txg 
) [static]

Definition at line 451 of file zil.c.

static void zil_async_to_sync ( zilog_t zilog,
uint64_t  foid 
) [static]

Move the async itxs for a specified object to commit into sync lists.

Definition at line 1409 of file zil.c.

static int zil_bp_compare ( const void *  x1,
const void *  x2 
) [static]

Definition at line 118 of file zil.c.

int zil_bp_tree_add ( zilog_t zilog,
const blkptr_t bp 
)

Definition at line 157 of file zil.c.

static void zil_bp_tree_fini ( zilog_t zilog) [static]

Definition at line 144 of file zil.c.

static void zil_bp_tree_init ( zilog_t zilog) [static]

Definition at line 137 of file zil.c.

int zil_check_log_chain ( const char *  osname,
void *  tx 
)

Check the log by walking the log chain.

Checksum errors are ok as they indicate the end of the chain. Any other error (no device or read failure) returns an error.

Definition at line 698 of file zil.c.

int zil_claim ( const char *  osname,
void *  txarg 
)

Definition at line 640 of file zil.c.

static int zil_claim_log_block ( zilog_t zilog,
blkptr_t bp,
void *  tx,
uint64_t  first_txg 
) [static]

Definition at line 388 of file zil.c.

static int zil_claim_log_record ( zilog_t zilog,
lr_t lrc,
void *  tx,
uint64_t  first_txg 
) [static]

Definition at line 403 of file zil.c.

void zil_clean ( zilog_t zilog,
uint64_t  synced_txg 
)

If there are any in-memory intent log transactions which have now been synced then start up a taskq to free them.

We should only do this after we have written out the uberblocks (i.e. txg has been comitted) so that don't inadvertently clean out in-memory log records that would be required by zil_commit().

Definition at line 1342 of file zil.c.

void zil_close ( zilog_t zilog)

Close an intent log.

Definition at line 1789 of file zil.c.

void zil_commit ( zilog_t zilog,
uint64_t  foid 
)

Commit zfs transactions to stable storage.

itxs are committed in batches. In a heavily stressed zil there will be a commit writer thread who is writing out a bunch of itxs to the log for a set of committing threads (cthreads) in the same batch as the writer. Those cthreads are all waiting on the same cv for that batch.

There will also be a different and growing batch of threads that are waiting to commit (qthreads). When the committing batch completes a transition occurs such that the cthreads exit and the qthreads become cthreads. One of the new cthreads becomes the writer thread for the batch. Any new threads arriving become new qthreads.

Only 2 condition variables are needed and there's no transition between the two cvs needed. They just flip-flop between qthreads and cthreads.

Using this scheme we can efficiently wakeup up only those threads that have been committed.

Parameters:
[in]foidif 0, push out all transactions. Otherwise push only those for that object or might reference that object

Definition at line 1557 of file zil.c.

static void zil_commit_writer ( zilog_t zilog) [static]

Definition at line 1458 of file zil.c.

static lwb_t* zil_create ( zilog_t zilog) [static]

Create an on-disk intent log.

Definition at line 512 of file zil.c.

void zil_destroy ( zilog_t zilog,
boolean_t  keep_first 
)

In one tx, free all log blocks and clear the log header.

If keep_first is set, then we're replaying a log with no content. We want to keep the first block, however, so that the first synchronous transaction doesn't require a txg_wait_synced() in zil_create(). We don't need to txg_wait_synced() here either when keep_first is set, because both zil_create() and zil_destroy() will wait for any in-progress destroys to complete.

Definition at line 585 of file zil.c.

void zil_destroy_sync ( zilog_t zilog,
dmu_tx_t tx 
)

Definition at line 632 of file zil.c.

void zil_fini ( void  )

Definition at line 1670 of file zil.c.

static void zil_flush_vdevs ( zilog_t zilog) [static]

Definition at line 799 of file zil.c.

void zil_free ( zilog_t zilog)

Definition at line 1730 of file zil.c.

static int zil_free_log_block ( zilog_t zilog,
blkptr_t bp,
void *  tx,
uint64_t  claim_txg 
) [static]

Definition at line 427 of file zil.c.

static int zil_free_log_record ( zilog_t zilog,
lr_t lrc,
void *  tx,
uint64_t  claim_txg 
) [static]

Definition at line 435 of file zil.c.

static void zil_get_commit_list ( zilog_t zilog) [static]

Get the list of itxs to commit into zl_itx_commit_list.

Definition at line 1376 of file zil.c.

static zil_header_t* zil_header_in_syncing_context ( zilog_t zilog) [static]

Definition at line 175 of file zil.c.

static int zil_incr_blks ( zilog_t zilog,
blkptr_t bp,
void *  arg,
uint64_t  claim_txg 
) [static]

Definition at line 1988 of file zil.c.

void zil_init ( void  )

Definition at line 1663 of file zil.c.

static void zil_init_log_chain ( zilog_t zilog,
blkptr_t bp 
) [static]

Definition at line 181 of file zil.c.

void zil_itx_assign ( zilog_t zilog,
itx_t itx,
dmu_tx_t tx 
)

Definition at line 1253 of file zil.c.

itx_t* zil_itx_create ( uint64_t  txtype,
size_t  lrsize 
)

Definition at line 1132 of file zil.c.

void zil_itx_destroy ( itx_t itx)

Definition at line 1149 of file zil.c.

static void zil_itxg_clean ( itxs_t itxs) [static]

Free up the sync and async itxs.

The itxs_t has already been detached so no locks are needed.

Definition at line 1159 of file zil.c.

static lwb_t* zil_lwb_commit ( zilog_t zilog,
itx_t itx,
lwb_t lwb 
) [static]

Definition at line 1040 of file zil.c.

static void zil_lwb_write_done ( zio_t zio) [static]

Function called when a log block write completes.

Definition at line 840 of file zil.c.

static void zil_lwb_write_init ( zilog_t zilog,
lwb_t lwb 
) [static]

Initialize the io for a log block.

Definition at line 880 of file zil.c.

static lwb_t* zil_lwb_write_start ( zilog_t zilog,
lwb_t lwb 
) [static]

Start a log block write and advance to the next log block.

Calls are serialized.

Definition at line 929 of file zil.c.

zilog_t* zil_open ( objset_t os,
zil_get_data_t get_data 
)

Open an intent log.

Definition at line 1770 of file zil.c.

int zil_parse ( zilog_t zilog,
zil_parse_blk_func_t parse_blk_func,
zil_parse_lr_func_t parse_lr_func,
void *  arg,
uint64_t  txg 
)

Parse the intent log, and call parse_func for each valid record within.

Definition at line 305 of file zil.c.

static int zil_read_log_block ( zilog_t zilog,
const blkptr_t bp,
blkptr_t nbp,
void *  dst,
char **  end 
) [static]

Read a log block and make sure it's valid.

Definition at line 195 of file zil.c.

static int zil_read_log_data ( zilog_t zilog,
const lr_write_t lr,
void *  wbuf 
) [static]

Read a TX_WRITE log data block.

Definition at line 268 of file zil.c.

static void zil_remove_async ( zilog_t zilog,
uint64_t  oid 
) [static]

Remove all async itx with the given oid.

Definition at line 1209 of file zil.c.

void zil_replay ( objset_t os,
void *  arg,
zil_replay_func_t replay_func[TX_MAX_TYPE] 
)

If this dataset has a non-empty intent log, replay it and destroy it.

Definition at line 1999 of file zil.c.

static int zil_replay_error ( zilog_t zilog,
lr_t lr,
int  error 
) [static]

Definition at line 1887 of file zil.c.

static int zil_replay_log_record ( zilog_t zilog,
lr_t lr,
void *  zra,
uint64_t  claim_txg 
) [static]

Definition at line 1905 of file zil.c.

boolean_t zil_replaying ( zilog_t zilog,
dmu_tx_t tx 
)

Definition at line 2035 of file zil.c.

void zil_resume ( zilog_t zilog)

Definition at line 1871 of file zil.c.

void zil_set_logbias ( zilog_t zilog,
uint64_t  logbias 
)

Definition at line 1682 of file zil.c.

void zil_set_sync ( zilog_t zilog,
uint64_t  sync 
)

Definition at line 1676 of file zil.c.

int zil_suspend ( zilog_t zilog)

Suspend an intent log.

While in suspended mode, we still honor synchronous semantics, but we rely on txg_wait_synced() to do it. We suspend the log briefly when taking a snapshot so that the snapshot contains all the data it's supposed to, and has an empty intent log.

Definition at line 1836 of file zil.c.

void zil_sync ( zilog_t zilog,
dmu_tx_t tx 
)

Called in syncing context to free committed log blocks and update log header.

Definition at line 1595 of file zil.c.

static int zil_vdev_compare ( const void *  x1,
const void *  x2 
) [static]

Definition at line 754 of file zil.c.

int zil_vdev_offline ( const char *  osname,
void *  arg 
)

Definition at line 2052 of file zil.c.

void zilog_dirty ( zilog_t zilog,
uint64_t  txg 
)

Called when we create in-memory log transactions so that we know to cleanup the itxs at the end of spa_sync().

Definition at line 482 of file zil.c.

boolean_t zilog_is_dirty ( zilog_t zilog)

Definition at line 497 of file zil.c.


Variable Documentation

boolean_t zfs_notrim = B_TRUE

Definition at line 95 of file zil.c.

uint64_t zil_block_buckets[]
Initial value:
 {
    4096,               
    8192+4096,          
    32*1024 + 4096,     
    UINT64_MAX
}

Define a limited set of intent log block sizes.

These must be a multiple of 4KB. Note only the amount used (again aligned to 4KB) actually gets written. However, we can't always just allocate SPA_MAXBLOCKSIZE as the slog space could be exhausted.

Definition at line 907 of file zil.c.

kmem_cache_t* zil_lwb_cache [static]

Definition at line 100 of file zil.c.

uint64_t zil_slog_limit = 1024 * 1024

Use the slog as long as the logbias is 'latency' and the current commit size is less than the limit or the total list size is less than 2X the limit.

Limit checking is disabled by setting zil_slog_limit to UINT64_MAX.

Definition at line 919 of file zil.c.

 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines