FreeBSD ZFS
The Zettabyte File System
Data Structures | Defines | Typedefs | Enumerations | Functions | Variables

sys/zil.h File Reference

Intent log format. More...

#include <sys/types.h>
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
Include dependency graph for zil.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  zil_header
 Intent log header. More...
struct  zil_chain
 Log block chaining. More...
struct  lr_t
 Format of log records. More...
struct  lr_ooo_t
 Common start of all out-of-order record types (TX_OOO() above). More...
struct  lr_attr_t
 Handle option extended vattr attributes. More...
struct  lr_create_t
 log record for creates without optional ACL. More...
struct  lr_acl_create_t
 Log record for creates with optional ACL This log record is also used for recording any FUID information needed for replaying the create. More...
struct  lr_remove_t
struct  lr_link_t
struct  lr_rename_t
struct  lr_write_t
struct  lr_truncate_t
struct  lr_setattr_t
struct  lr_acl_v0_t
struct  lr_acl_t
struct  itx

Defines

#define ZIL_MIN_BLKSZ   4096ULL
#define ZIL_MAX_BLKSZ   SPA_MAXBLOCKSIZE
#define ZIL_XVAT_SIZE(mapsize)
 size of xvattr log section.
#define ZIL_ACE_LENGTH(x)   (roundup(x, sizeof (uint64_t)))
 Size of ACL in log.
#define TX_CI   ((uint64_t)0x1 << 63)
 The transactions for mkdir, symlink, remove, rmdir, link, and rename may have the following bit set, indicating the original request specified case-insensitive handling of names.
#define TX_OOO(txtype)
 Transactions for write, truncate, setattr, acl_v0, and acl can be logged out of order.
zh_flags bit settings
#define ZIL_REPLAY_NEEDED   0x1
 replay needed - internal only
#define ZIL_CLAIM_LR_SEQ_VALID   0x2
 zh_claim_lr_seq field is valid
The words of a log block checksum.
#define ZIL_ZC_GUID_0   0
#define ZIL_ZC_GUID_1   1
#define ZIL_ZC_OBJSET   2
#define ZIL_ZC_SEQ   3
Intent log transaction types and record structures
#define TX_CREATE   1
 Create file.
#define TX_MKDIR   2
 Make directory.
#define TX_MKXATTR   3
 Make XATTR directory.
#define TX_SYMLINK   4
 Create symbolic link to a file.
#define TX_REMOVE   5
 Remove file.
#define TX_RMDIR   6
 Remove directory.
#define TX_LINK   7
 Create hard link to a file.
#define TX_RENAME   8
 Rename a file.
#define TX_WRITE   9
 File write.
#define TX_TRUNCATE   10
 Truncate a file.
#define TX_SETATTR   11
 Set file attributes.
#define TX_ACL_V0   12
 Set old formatted ACL.
#define TX_ACL   13
 Set ACL.
#define TX_CREATE_ACL   14
 create with ACL
#define TX_CREATE_ATTR   15
 create + attrs
#define TX_CREATE_ACL_ATTR   16
 create with ACL + attrs
#define TX_MKDIR_ACL   17
 mkdir with ACL
#define TX_MKDIR_ATTR   18
 mkdir with attr
#define TX_MKDIR_ACL_ATTR   19
 mkdir with ACL + attrs
#define TX_WRITE2   20
 dmu_sync EALREADY write
#define TX_MAX_TYPE   21
 Max transaction type.

Typedefs

typedef struct zil_header zil_header_t
 Intent log header.
typedef struct zil_chain zil_chain_t
 Log block chaining.
typedef enum zil_create zil_create_t
typedef struct itx itx_t
typedef int zil_parse_blk_func_t (zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t txg)
typedef int zil_parse_lr_func_t (zilog_t *zilog, lr_t *lr, void *arg, uint64_t txg)
typedef int zil_replay_func_t ()
typedef int zil_get_data_t (void *arg, lr_write_t *lr, char *dbuf, zio_t *zio)

Enumerations

enum  zil_create { Z_FILE, Z_DIR, Z_XATTRDIR }
enum  itx_wr_state_t { WR_INDIRECT, WR_COPIED, WR_NEED_COPY, WR_NUM_STATES }
 

Writes are handled in three different ways:

  • WR_INDIRECT:
    In this mode, if we need to commit the write later, then the block is immediately written into the file system (using dmu_sync), and a pointer to the block is put into the log record.
More...

Functions

int zil_parse (zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg)
 Parse the intent log, and call parse_func for each valid record within.
void zil_init (void)
void zil_fini (void)
zilog_tzil_alloc (objset_t *os, zil_header_t *zh_phys)
void zil_free (zilog_t *zilog)
zilog_tzil_open (objset_t *os, zil_get_data_t *get_data)
 Open an intent log.
void zil_close (zilog_t *zilog)
 Close an intent log.
void zil_replay (objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
 If this dataset has a non-empty intent log, replay it and destroy it.
boolean_t zil_replaying (zilog_t *zilog, dmu_tx_t *tx)
void zil_destroy (zilog_t *zilog, boolean_t keep_first)
 In one tx, free all log blocks and clear the log header.
void zil_destroy_sync (zilog_t *zilog, dmu_tx_t *tx)
void zil_rollback_destroy (zilog_t *zilog, dmu_tx_t *tx)
itx_tzil_itx_create (uint64_t txtype, size_t lrsize)
void zil_itx_destroy (itx_t *itx)
void zil_itx_assign (zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
void zil_commit (zilog_t *zilog, uint64_t oid)
 Commit zfs transactions to stable storage.
int zil_vdev_offline (const char *osname, void *txarg)
int zil_claim (const char *osname, void *txarg)
int zil_check_log_chain (const char *osname, void *txarg)
 Check the log by walking the log chain.
void zil_sync (zilog_t *zilog, dmu_tx_t *tx)
 Called in syncing context to free committed log blocks and update log header.
void zil_clean (zilog_t *zilog, uint64_t synced_txg)
 If there are any in-memory intent log transactions which have now been synced then start up a taskq to free them.
int zil_suspend (zilog_t *zilog)
 Suspend an intent log.
void zil_resume (zilog_t *zilog)
void zil_add_block (zilog_t *zilog, const blkptr_t *bp)
int zil_bp_tree_add (zilog_t *zilog, const blkptr_t *bp)
void zil_set_sync (zilog_t *zilog, uint64_t syncval)
void zil_set_logbias (zilog_t *zilog, uint64_t slogval)

Variables

int zil_replay_disable
 Disable intent logging replay.

Detailed Description

Intent log format.

Each objset has its own intent log. The log header (zil_header_t) for objset N's intent log is kept in the Nth object of the SPA's intent_log objset. The log header points to a chain of log blocks, each of which contains log records (i.e., transactions) followed by a log block trailer (zil_trailer_t). The format of a log record depends on the record (or transaction) type, but all records begin with a common structure that defines the type, length, and txg.

Definition in file zil.h.


Define Documentation

#define TX_ACL   13

Set ACL.

Definition at line 152 of file zil.h.

#define TX_ACL_V0   12

Set old formatted ACL.

Definition at line 151 of file zil.h.

#define TX_CI   ((uint64_t)0x1 << 63)

The transactions for mkdir, symlink, remove, rmdir, link, and rename may have the following bit set, indicating the original request specified case-insensitive handling of names.

Definition at line 168 of file zil.h.

#define TX_CREATE   1

Create file.

Definition at line 140 of file zil.h.

#define TX_CREATE_ACL   14

create with ACL

Definition at line 153 of file zil.h.

#define TX_CREATE_ACL_ATTR   16

create with ACL + attrs

Definition at line 155 of file zil.h.

#define TX_CREATE_ATTR   15

create + attrs

Definition at line 154 of file zil.h.

#define TX_LINK   7

Create hard link to a file.

Definition at line 146 of file zil.h.

#define TX_MAX_TYPE   21

Max transaction type.

Definition at line 160 of file zil.h.

#define TX_MKDIR   2

Make directory.

Definition at line 141 of file zil.h.

#define TX_MKDIR_ACL   17

mkdir with ACL

Definition at line 156 of file zil.h.

#define TX_MKDIR_ACL_ATTR   19

mkdir with ACL + attrs

Definition at line 158 of file zil.h.

#define TX_MKDIR_ATTR   18

mkdir with attr

Definition at line 157 of file zil.h.

#define TX_MKXATTR   3

Make XATTR directory.

Definition at line 142 of file zil.h.

#define TX_OOO (   txtype)
Value:
((txtype) == TX_WRITE ||        \
        (txtype) == TX_TRUNCATE ||      \
        (txtype) == TX_SETATTR ||       \
        (txtype) == TX_ACL_V0 ||        \
        (txtype) == TX_ACL ||           \
        (txtype) == TX_WRITE2)

Transactions for write, truncate, setattr, acl_v0, and acl can be logged out of order.

For convenience in the code, all such records must have lr_foid at the same offset.

Definition at line 175 of file zil.h.

#define TX_REMOVE   5

Remove file.

Definition at line 144 of file zil.h.

#define TX_RENAME   8

Rename a file.

Definition at line 147 of file zil.h.

#define TX_RMDIR   6

Remove directory.

Definition at line 145 of file zil.h.

#define TX_SETATTR   11

Set file attributes.

Definition at line 150 of file zil.h.

#define TX_SYMLINK   4

Create symbolic link to a file.

Definition at line 143 of file zil.h.

#define TX_TRUNCATE   10

Truncate a file.

Definition at line 149 of file zil.h.

#define TX_WRITE   9

File write.

Definition at line 148 of file zil.h.

#define TX_WRITE2   20

dmu_sync EALREADY write

Definition at line 159 of file zil.h.

#define ZIL_ACE_LENGTH (   x)    (roundup(x, sizeof (uint64_t)))

Size of ACL in log.

The ACE data is padded out to properly align on 8 byte boundary.

Definition at line 134 of file zil.h.

#define ZIL_CLAIM_LR_SEQ_VALID   0x2

zh_claim_lr_seq field is valid

Definition at line 74 of file zil.h.

#define ZIL_MAX_BLKSZ   SPA_MAXBLOCKSIZE

Definition at line 98 of file zil.h.

#define ZIL_MIN_BLKSZ   4096ULL

Definition at line 97 of file zil.h.

#define ZIL_REPLAY_NEEDED   0x1

replay needed - internal only

Definition at line 73 of file zil.h.

#define ZIL_XVAT_SIZE (   mapsize)
Value:
sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \
        (sizeof (uint64_t) * 7)

size of xvattr log section.

its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps for create time and a single 64 bit integer for all of the attributes, and 4 64 bit integers (32 bytes) for the scanstamp.

Definition at line 125 of file zil.h.

#define ZIL_ZC_GUID_0   0

Definition at line 104 of file zil.h.

#define ZIL_ZC_GUID_1   1

Definition at line 105 of file zil.h.

#define ZIL_ZC_OBJSET   2

Definition at line 106 of file zil.h.

#define ZIL_ZC_SEQ   3

Definition at line 107 of file zil.h.


Typedef Documentation

typedef struct itx itx_t
Note:
itx_lr is followed by type-specific part of lr_xx_t and its immediate data
typedef struct zil_chain zil_chain_t

Log block chaining.

Log blocks are chained together. Originally they were chained at the end of the block. For performance reasons the chain was moved to the beginning of the block which allows writes for only the data being used. The older position is supported for backwards compatability.

The zio_eck_t contains a zec_cksum which for the intent log is the sequence number of this log block. A seq of 0 is invalid. The zec_cksum is checked by the SPA against the sequence number passed in the blk_cksum field of the blkptr_t

typedef enum zil_create zil_create_t
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio)

Definition at line 392 of file zil.h.

typedef struct zil_header zil_header_t

Intent log header.

This on disk structure holds fields to manage the log. All fields are 64 bit to easily handle cross architectures.

typedef int zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t txg)

Definition at line 387 of file zil.h.

typedef int zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg, uint64_t txg)

Definition at line 389 of file zil.h.

typedef int zil_replay_func_t()

Definition at line 391 of file zil.h.


Enumeration Type Documentation

Writes are handled in three different ways:

  • WR_INDIRECT:
    In this mode, if we need to commit the write later, then the block is immediately written into the file system (using dmu_sync), and a pointer to the block is put into the log record.

When the txg commits the block is linked in. This saves additionally writing the data into the log record. There are a few requirements for this to occur:

  • write is greater than zfs/zvol_immediate_write_sz
  • not using slogs (as slogs are assumed to always be faster than writing into the main pool)
  • the write occupies only one block

WR_COPIED:
If we know we'll immediately be committing the transaction (FSYNC or FDSYNC), the we allocate a larger log record here for the data and copy the data in.

  • WR_NEED_COPY:
    Otherwise we don't allocate a buffer, and *if* we need to flush the write later then a buffer is allocated and we retrieve the data using the dmu.
Enumerator:
WR_INDIRECT 

indirect - a large write (dmu_sync() data

and put blkptr in log, rather than actual data)

WR_COPIED 

immediate - data is copied into lr_write_t

WR_NEED_COPY 

immediate - data needs to be copied if pushed

WR_NUM_STATES 

number of states

Definition at line 365 of file zil.h.

enum zil_create
Enumerator:
Z_FILE 
Z_DIR 
Z_XATTRDIR 

Definition at line 110 of file zil.h.


Function Documentation

void zil_add_block ( zilog_t zilog,
const blkptr_t bp 
)

Definition at line 768 of file zil.c.

zilog_t* zil_alloc ( objset_t os,
zil_header_t zh_phys 
)

Definition at line 1688 of file zil.c.

int zil_bp_tree_add ( zilog_t zilog,
const blkptr_t bp 
)

Definition at line 157 of file zil.c.

int zil_check_log_chain ( const char *  osname,
void *  tx 
)

Check the log by walking the log chain.

Checksum errors are ok as they indicate the end of the chain. Any other error (no device or read failure) returns an error.

Definition at line 698 of file zil.c.

int zil_claim ( const char *  osname,
void *  txarg 
)

Definition at line 640 of file zil.c.

void zil_clean ( zilog_t zilog,
uint64_t  synced_txg 
)

If there are any in-memory intent log transactions which have now been synced then start up a taskq to free them.

We should only do this after we have written out the uberblocks (i.e. txg has been comitted) so that don't inadvertently clean out in-memory log records that would be required by zil_commit().

Definition at line 1342 of file zil.c.

void zil_close ( zilog_t zilog)

Close an intent log.

Definition at line 1789 of file zil.c.

void zil_commit ( zilog_t zilog,
uint64_t  foid 
)

Commit zfs transactions to stable storage.

itxs are committed in batches. In a heavily stressed zil there will be a commit writer thread who is writing out a bunch of itxs to the log for a set of committing threads (cthreads) in the same batch as the writer. Those cthreads are all waiting on the same cv for that batch.

There will also be a different and growing batch of threads that are waiting to commit (qthreads). When the committing batch completes a transition occurs such that the cthreads exit and the qthreads become cthreads. One of the new cthreads becomes the writer thread for the batch. Any new threads arriving become new qthreads.

Only 2 condition variables are needed and there's no transition between the two cvs needed. They just flip-flop between qthreads and cthreads.

Using this scheme we can efficiently wakeup up only those threads that have been committed.

Parameters:
[in]foidif 0, push out all transactions. Otherwise push only those for that object or might reference that object

Definition at line 1557 of file zil.c.

void zil_destroy ( zilog_t zilog,
boolean_t  keep_first 
)

In one tx, free all log blocks and clear the log header.

If keep_first is set, then we're replaying a log with no content. We want to keep the first block, however, so that the first synchronous transaction doesn't require a txg_wait_synced() in zil_create(). We don't need to txg_wait_synced() here either when keep_first is set, because both zil_create() and zil_destroy() will wait for any in-progress destroys to complete.

Definition at line 585 of file zil.c.

void zil_destroy_sync ( zilog_t zilog,
dmu_tx_t tx 
)

Definition at line 632 of file zil.c.

void zil_fini ( void  )

Definition at line 1670 of file zil.c.

void zil_free ( zilog_t zilog)

Definition at line 1730 of file zil.c.

void zil_init ( void  )

Definition at line 1663 of file zil.c.

void zil_itx_assign ( zilog_t zilog,
itx_t itx,
dmu_tx_t tx 
)

Definition at line 1253 of file zil.c.

itx_t* zil_itx_create ( uint64_t  txtype,
size_t  lrsize 
)

Definition at line 1132 of file zil.c.

void zil_itx_destroy ( itx_t itx)

Definition at line 1149 of file zil.c.

zilog_t* zil_open ( objset_t os,
zil_get_data_t get_data 
)

Open an intent log.

Definition at line 1770 of file zil.c.

int zil_parse ( zilog_t zilog,
zil_parse_blk_func_t parse_blk_func,
zil_parse_lr_func_t parse_lr_func,
void *  arg,
uint64_t  txg 
)

Parse the intent log, and call parse_func for each valid record within.

Definition at line 305 of file zil.c.

void zil_replay ( objset_t os,
void *  arg,
zil_replay_func_t replay_func[TX_MAX_TYPE] 
)

If this dataset has a non-empty intent log, replay it and destroy it.

Definition at line 1999 of file zil.c.

boolean_t zil_replaying ( zilog_t zilog,
dmu_tx_t tx 
)

Definition at line 2035 of file zil.c.

void zil_resume ( zilog_t zilog)

Definition at line 1871 of file zil.c.

void zil_rollback_destroy ( zilog_t zilog,
dmu_tx_t tx 
)
void zil_set_logbias ( zilog_t zilog,
uint64_t  slogval 
)

Definition at line 1682 of file zil.c.

void zil_set_sync ( zilog_t zilog,
uint64_t  syncval 
)

Definition at line 1676 of file zil.c.

int zil_suspend ( zilog_t zilog)

Suspend an intent log.

While in suspended mode, we still honor synchronous semantics, but we rely on txg_wait_synced() to do it. We suspend the log briefly when taking a snapshot so that the snapshot contains all the data it's supposed to, and has an empty intent log.

Definition at line 1836 of file zil.c.

void zil_sync ( zilog_t zilog,
dmu_tx_t tx 
)

Called in syncing context to free committed log blocks and update log header.

Definition at line 1595 of file zil.c.

int zil_vdev_offline ( const char *  osname,
void *  txarg 
)

Definition at line 2052 of file zil.c.

 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines