FreeBSD ZFS
The Zettabyte File System
Data Structures | Defines | Typedefs | Enumerations | Functions | Variables

arc.c File Reference

DVA-based Adaptive Replacement Cache. More...

#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/zfs_context.h>
#include <sys/arc.h>
#include <sys/refcount.h>
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/dnlc.h>
#include <sys/callb.h>
#include <sys/kstat.h>
#include <zfs_fletcher.h>
#include <sys/sdt.h>
#include <vm/vm_pageout.h>
Include dependency graph for arc.c:

Go to the source code of this file.

Data Structures

struct  arcs_lock
struct  arc_state
struct  arc_stats
struct  arc_callback
struct  arc_write_callback
struct  arc_buf_hdr
struct  ht_lock
struct  buf_hash_table
struct  l2arc_dev
struct  l2arc_read_callback
struct  l2arc_write_callback
struct  l2arc_buf_hdr
struct  l2arc_data_free

Defines

#define ARC_REDUCE_DNLC_PERCENT   3
#define ARCS_LOCK_PAD   CACHE_LINE_SIZE
#define ARC_BUFC_NUMDATALISTS   16
 must be power of two for mask use to work
#define ARC_BUFC_NUMMETADATALISTS   16
#define ARC_BUFC_NUMLISTS   (ARC_BUFC_NUMMETADATALISTS + ARC_BUFC_NUMDATALISTS)
#define ARCS_LOCK(s, i)   (&((s)->arcs_locks[(i)].arcs_lock))
#define ARCSTAT(stat)   (arc_stats.stat.value.ui64)
#define ARCSTAT_INCR(stat, val)   atomic_add_64(&arc_stats.stat.value.ui64, (val));
#define ARCSTAT_BUMP(stat)   ARCSTAT_INCR(stat, 1)
#define ARCSTAT_BUMPDOWN(stat)   ARCSTAT_INCR(stat, -1)
#define ARCSTAT_MAX(stat, val)
#define ARCSTAT_MAXSTAT(stat)   ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)
#define ARCSTAT_CONDSTAT(cond1, stat1, notstat1, cond2, stat2, notstat2, stat)
 We define a macro to allow ARC hits/misses to be easily broken down by two separate conditions, giving a total of four different subtypes for each of hits and misses (so eight statistics total).
#define arc_size   ARCSTAT(arcstat_size)
 actual total arc size
#define arc_p   ARCSTAT(arcstat_p)
 target size of MRU
#define arc_c   ARCSTAT(arcstat_c)
 target size of cache
#define arc_c_min   ARCSTAT(arcstat_c_min)
 min target cache size
#define arc_c_max   ARCSTAT(arcstat_c_max)
 max target cache size
#define GHOST_STATE(state)
#define ARC_IN_HASH_TABLE   (1 << 9)
 this buffer is hashed
#define ARC_IO_IN_PROGRESS   (1 << 10)
 I/O in progress for buf.
#define ARC_IO_ERROR   (1 << 11)
 I/O failed for buf.
#define ARC_FREED_IN_READ   (1 << 12)
 buf freed while in read
#define ARC_BUF_AVAILABLE   (1 << 13)
 block not in active use
#define ARC_INDIRECT   (1 << 14)
 this is an indirect block
#define ARC_FREE_IN_PROGRESS   (1 << 15)
 hdr about to be freed
#define ARC_L2_WRITING   (1 << 16)
 L2ARC write in progress.
#define ARC_L2_EVICTED   (1 << 17)
 evicted during I/O
#define ARC_L2_WRITE_HEAD   (1 << 18)
 head of write list
#define HDR_IN_HASH_TABLE(hdr)   ((hdr)->b_flags & ARC_IN_HASH_TABLE)
#define HDR_IO_IN_PROGRESS(hdr)   ((hdr)->b_flags & ARC_IO_IN_PROGRESS)
#define HDR_IO_ERROR(hdr)   ((hdr)->b_flags & ARC_IO_ERROR)
#define HDR_PREFETCH(hdr)   ((hdr)->b_flags & ARC_PREFETCH)
#define HDR_FREED_IN_READ(hdr)   ((hdr)->b_flags & ARC_FREED_IN_READ)
#define HDR_BUF_AVAILABLE(hdr)   ((hdr)->b_flags & ARC_BUF_AVAILABLE)
#define HDR_FREE_IN_PROGRESS(hdr)   ((hdr)->b_flags & ARC_FREE_IN_PROGRESS)
#define HDR_L2CACHE(hdr)   ((hdr)->b_flags & ARC_L2CACHE)
#define HDR_L2_READING(hdr)
#define HDR_L2_WRITING(hdr)   ((hdr)->b_flags & ARC_L2_WRITING)
#define HDR_L2_EVICTED(hdr)   ((hdr)->b_flags & ARC_L2_EVICTED)
#define HDR_L2_WRITE_HEAD(hdr)   ((hdr)->b_flags & ARC_L2_WRITE_HEAD)
#define HDR_SIZE   ((int64_t)sizeof (arc_buf_hdr_t))
#define L2HDR_SIZE   ((int64_t)sizeof (l2arc_buf_hdr_t))
#define HT_LOCK_PAD   CACHE_LINE_SIZE
#define BUF_LOCKS   256
#define BUF_HASH_INDEX(spa, dva, birth)   (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
#define BUF_HASH_LOCK_NTRY(idx)   (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
#define BUF_HASH_LOCK(idx)   (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
#define HDR_LOCK(hdr)   (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
#define L2ARC_WRITE_SIZE   (8 * 1024 * 1024)
 initial write max
#define L2ARC_HEADROOM   2
 num of writes
#define L2ARC_FEED_SECS   1
 caching interval secs
#define L2ARC_FEED_MIN_MS   200
 min caching interval ms
#define l2arc_writes_sent   ARCSTAT(arcstat_l2_writes_sent)
#define l2arc_writes_done   ARCSTAT(arcstat_l2_writes_done)
#define BUF_EMPTY(buf)
#define BUF_EQUAL(spa, dva, birth, buf)
#define ARC_MINTIME   (hz>>4)

Typedefs

typedef enum arc_reclaim_strategy arc_reclaim_strategy_t
typedef struct arc_state arc_state_t
typedef struct arc_stats arc_stats_t
typedef struct l2arc_buf_hdr l2arc_buf_hdr_t
typedef struct arc_callback arc_callback_t
typedef struct arc_write_callback arc_write_callback_t
typedef struct buf_hash_table buf_hash_table_t
typedef struct l2arc_dev l2arc_dev_t
typedef struct l2arc_read_callback l2arc_read_callback_t
typedef struct l2arc_write_callback l2arc_write_callback_t
typedef struct l2arc_data_free l2arc_data_free_t

Enumerations

enum  arc_reclaim_strategy { ARC_RECLAIM_AGGR, ARC_RECLAIM_CONS }

Functions

 TUNABLE_QUAD ("vfs.zfs.arc_max",&zfs_arc_max)
 TUNABLE_QUAD ("vfs.zfs.arc_min",&zfs_arc_min)
 TUNABLE_QUAD ("vfs.zfs.arc_meta_limit",&zfs_arc_meta_limit)
 SYSCTL_DECL (_vfs_zfs)
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN,&zfs_arc_max, 0,"Maximum ARC size")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_RDTUN,&zfs_arc_min, 0,"Minimum ARC size")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, arc_meta_used, CTLFLAG_RD,&arc_meta_used, 0,"ARC metadata used")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, arc_meta_limit, CTLFLAG_RW,&arc_meta_limit, 0,"ARC metadata limit")
static void arc_get_data_buf (arc_buf_t *buf)
 The buffer, supplied as the first argument, needs a data block.
static void arc_access (arc_buf_hdr_t *buf, kmutex_t *hash_lock)
 This routine is called whenever a buffer is accessed.
static int arc_evict_needed (arc_buf_contents_t type)
 Check if the cache has reached its limits and eviction is required prior to insert.
static void arc_evict_ghost (arc_state_t *state, uint64_t spa, int64_t bytes)
 Remove buffers from list until we've removed the specified number of bytes.
static boolean_t l2arc_write_eligible (uint64_t spa_guid, arc_buf_hdr_t *ab)
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RW,&l2arc_write_max, 0,"max write size")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RW,&l2arc_write_boost, 0,"extra write during warmup")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RW,&l2arc_headroom, 0,"number of dev writes")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RW,&l2arc_feed_secs, 0,"interval seconds")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RW,&l2arc_feed_min_ms, 0,"min interval milliseconds")
 SYSCTL_INT (_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RW,&l2arc_noprefetch, 0,"don't cache prefetch bufs")
 SYSCTL_INT (_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RW,&l2arc_feed_again, 0,"turbo warmup")
 SYSCTL_INT (_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW,&l2arc_norw, 0,"no reads during writes")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,&ARC_anon.arcs_size, 0,"size of anonymous state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, anon_metadata_lsize, CTLFLAG_RD,&ARC_anon.arcs_lsize[ARC_BUFC_METADATA], 0,"size of anonymous state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, anon_data_lsize, CTLFLAG_RD,&ARC_anon.arcs_lsize[ARC_BUFC_DATA], 0,"size of anonymous state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,&ARC_mru.arcs_size, 0,"size of mru state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mru_metadata_lsize, CTLFLAG_RD,&ARC_mru.arcs_lsize[ARC_BUFC_METADATA], 0,"size of metadata in mru state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mru_data_lsize, CTLFLAG_RD,&ARC_mru.arcs_lsize[ARC_BUFC_DATA], 0,"size of data in mru state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,&ARC_mru_ghost.arcs_size, 0,"size of mru ghost state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mru_ghost_metadata_lsize, CTLFLAG_RD,&ARC_mru_ghost.arcs_lsize[ARC_BUFC_METADATA], 0,"size of metadata in mru ghost state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mru_ghost_data_lsize, CTLFLAG_RD,&ARC_mru_ghost.arcs_lsize[ARC_BUFC_DATA], 0,"size of data in mru ghost state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,&ARC_mfu.arcs_size, 0,"size of mfu state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mfu_metadata_lsize, CTLFLAG_RD,&ARC_mfu.arcs_lsize[ARC_BUFC_METADATA], 0,"size of metadata in mfu state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mfu_data_lsize, CTLFLAG_RD,&ARC_mfu.arcs_lsize[ARC_BUFC_DATA], 0,"size of data in mfu state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,&ARC_mfu_ghost.arcs_size, 0,"size of mfu ghost state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mfu_ghost_metadata_lsize, CTLFLAG_RD,&ARC_mfu_ghost.arcs_lsize[ARC_BUFC_METADATA], 0,"size of metadata in mfu ghost state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, mfu_ghost_data_lsize, CTLFLAG_RD,&ARC_mfu_ghost.arcs_lsize[ARC_BUFC_DATA], 0,"size of data in mfu ghost state")
 SYSCTL_UQUAD (_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,&ARC_l2c_only.arcs_size, 0,"size of mru state")
static void l2arc_read_done (zio_t *zio)
 A read to a cache device completed.
static void l2arc_hdr_stat_add (void)
static void l2arc_hdr_stat_remove (void)
static uint64_t buf_hash (uint64_t spa, const dva_t *dva, uint64_t birth)
static void buf_discard_identity (arc_buf_hdr_t *hdr)
static arc_buf_hdr_tbuf_hash_find (uint64_t spa, const dva_t *dva, uint64_t birth, kmutex_t **lockp)
static arc_buf_hdr_tbuf_hash_insert (arc_buf_hdr_t *buf, kmutex_t **lockp)
 Insert an entry into the hash table.
static void buf_hash_remove (arc_buf_hdr_t *buf)
static void buf_fini (void)
static int hdr_cons (void *vbuf, void *unused, int kmflag)
 Constructor callback - called when the cache is empty and a new buf is requested.
static int buf_cons (void *vbuf, void *unused, int kmflag)
static void hdr_dest (void *vbuf, void *unused)
 Destructor callback - called when a cached buf is no longer required.
static void buf_dest (void *vbuf, void *unused)
static void hdr_recl (void *unused)
 Reclaim callback -- invoked when memory is low.
static void buf_init (void)
static void arc_cksum_verify (arc_buf_t *buf)
static int arc_cksum_equal (arc_buf_t *buf)
static void arc_cksum_compute (arc_buf_t *buf, boolean_t force)
void arc_buf_thaw (arc_buf_t *buf)
void arc_buf_freeze (arc_buf_t *buf)
static void get_buf_info (arc_buf_hdr_t *ab, arc_state_t *state, list_t **list, kmutex_t **lock)
static void add_reference (arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
static int remove_reference (arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
static void arc_change_state (arc_state_t *new_state, arc_buf_hdr_t *ab, kmutex_t *hash_lock)
 Move the supplied buffer to the indicated state.
void arc_space_consume (uint64_t space, arc_space_type_t type)
void arc_space_return (uint64_t space, arc_space_type_t type)
void * arc_data_buf_alloc (uint64_t size)
void arc_data_buf_free (void *buf, uint64_t size)
arc_buf_tarc_buf_alloc (spa_t *spa, int size, void *tag, arc_buf_contents_t type)
arc_buf_tarc_loan_buf (spa_t *spa, int size)
 Loan out an anonymous arc buffer.
void arc_return_buf (arc_buf_t *buf, void *tag)
 Return a loaned arc buffer to the arc.
void arc_loan_inuse_buf (arc_buf_t *buf, void *tag)
 Detach an arc_buf from a dbuf (tag)
static arc_buf_tarc_buf_clone (arc_buf_t *from)
void arc_buf_add_ref (arc_buf_t *buf, void *tag)
static void arc_buf_data_free (arc_buf_t *buf, void(*free_func)(void *, size_t))
 Free the arc data buffer.
static void arc_buf_destroy (arc_buf_t *buf, boolean_t recycle, boolean_t all)
static void arc_hdr_destroy (arc_buf_hdr_t *hdr)
void arc_buf_free (arc_buf_t *buf, void *tag)
int arc_buf_remove_ref (arc_buf_t *buf, void *tag)
int arc_buf_size (arc_buf_t *buf)
boolean_t arc_buf_eviction_needed (arc_buf_t *buf)
 Called from the DMU to determine if the current buffer should be evicted.
static void * arc_evict (arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle, arc_buf_contents_t type)
 Evict buffers from list until we've removed the specified number of bytes.
static void arc_adjust (void)
static void arc_do_user_evicts (void)
void arc_flush (spa_t *spa)
 Flush all *evictable* data from the cache for the given spa.
void arc_shrink (void)
static int arc_reclaim_needed (void)
static void arc_kmem_reap_now (arc_reclaim_strategy_t strat)
static void arc_reclaim_thread (void *dummy __unused)
static void arc_adapt (int bytes, arc_state_t *state)
 Adapt arc info given the number of bytes we are trying to add and the state that we are comming from.
void arc_bcopy_func (zio_t *zio, arc_buf_t *buf, void *arg)
 a generic arc_done_func_t which you can use
void arc_getbuf_func (zio_t *zio, arc_buf_t *buf, void *arg)
 a generic arc_done_func_t
static void arc_read_done (zio_t *zio)
int arc_read (zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf, arc_done_func_t *done, void *private, int priority, int zio_flags, uint32_t *arc_flags, const zbookmark_t *zb)
 "Read" the block block at the specified DVA (in bp) via the cache.
int arc_read_nolock (zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, void *private, int priority, int zio_flags, uint32_t *arc_flags, const zbookmark_t *zb)
void arc_set_callback (arc_buf_t *buf, arc_evict_func_t *func, void *private)
int arc_buf_evict (arc_buf_t *buf)
 This is used by the DMU to let the ARC know that a buffer is being evicted, so the ARC should clean up.
void arc_release (arc_buf_t *buf, void *tag)
 Release this buffer from the cache.
int arc_release_bp (arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa, zbookmark_t *zb)
 Release this buffer.
int arc_released (arc_buf_t *buf)
int arc_has_callback (arc_buf_t *buf)
static void arc_write_ready (zio_t *zio)
static void arc_write_done (zio_t *zio)
zio_tarc_write (zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority, int zio_flags, const zbookmark_t *zb)
static int arc_memory_throttle (uint64_t reserve, uint64_t inflight_data, uint64_t txg)
void arc_tempreserve_clear (uint64_t reserve)
int arc_tempreserve_space (uint64_t reserve, uint64_t txg)
static void arc_lowmem (void *arg __unused, int howto __unused)
void arc_init (void)
void arc_fini (void)
static uint64_t l2arc_write_size (l2arc_dev_t *dev)
static clock_t l2arc_write_interval (clock_t began, uint64_t wanted, uint64_t wrote)
static l2arc_dev_tl2arc_dev_get_next (void)
 Cycle through L2ARC devices.
static void l2arc_do_free_on_write ()
 Free buffers that were tagged for destruction.
static void l2arc_write_done (zio_t *zio)
 A write to a cache device has completed.
static list_t * l2arc_list_locked (int list_num, kmutex_t **lock)
 This is the list priority from which the L2ARC will search for pages to cache.
static void l2arc_evict (l2arc_dev_t *dev, uint64_t distance, boolean_t all)
 Evict buffers from the device write hand to the distance specified in bytes.
static uint64_t l2arc_write_buffers (spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 Find and write ARC buffers to the L2ARC device.
static void l2arc_feed_thread (void *dummy __unused)
 Feed the L2ARC with buffers from the ARC at regular intervals.
boolean_t l2arc_vdev_present (vdev_t *vd)
void l2arc_add_vdev (spa_t *spa, vdev_t *vd)
 Add a vdev for use by the L2ARC.
void l2arc_remove_vdev (vdev_t *vd)
 Remove a vdev from the L2ARC.
void l2arc_init (void)
void l2arc_fini (void)
void l2arc_start (void)
void l2arc_stop (void)

Variables

static kmutex_t arc_reclaim_thr_lock
static kcondvar_t arc_reclaim_thr_cv
 used to signal reclaim thr
static uint8_t arc_thread_exit
int zfs_write_limit_shift
 1/8th of physical memory
uint64_t zfs_write_limit_max
 max data payload per txg
kmutex_t zfs_write_limit_lock
uint_t arc_reduce_dnlc_percent = ARC_REDUCE_DNLC_PERCENT
static int arc_grow_retry = 60
 number of seconds before growing cache again
static int arc_p_min_shift = 4
 shift of arc_c for calculating both min and max arc_p
static int arc_shrink_shift = 5
 log2(fraction of arc to reclaim)
static int arc_min_prefetch_lifespan
 minimum lifespan of a prefetch block in clock ticks (initialized in arc_init())
static int arc_dead
int zfs_prefetch_disable
static boolean_t arc_warm
 The arc has filled available memory and has now warmed up.
uint64_t zfs_arc_max
uint64_t zfs_arc_min
uint64_t zfs_arc_meta_limit = 0
int zfs_arc_grow_retry = 0
int zfs_arc_shrink_shift = 0
int zfs_arc_p_min_shift = 0
int zfs_disable_dup_eviction = 0
static arc_state_t ARC_anon
static arc_state_t ARC_mru
static arc_state_t ARC_mru_ghost
static arc_state_t ARC_mfu
static arc_state_t ARC_mfu_ghost
static arc_state_t ARC_l2c_only
static arc_stats_t arc_stats
kstat_t * arc_ksp
static arc_state_tarc_anon
static arc_state_tarc_mru
static arc_state_tarc_mru_ghost
static arc_state_tarc_mfu
static arc_state_tarc_mfu_ghost
static arc_state_tarc_l2c_only
static int arc_no_grow
 Don't try to grow cache size.
static uint64_t arc_tempreserve
static uint64_t arc_loaned_bytes
static uint64_t arc_meta_used
static uint64_t arc_meta_limit
static uint64_t arc_meta_max = 0
static arc_buf_tarc_eviction_list
static kmutex_t arc_eviction_mtx
static arc_buf_hdr_t arc_eviction_hdr
static buf_hash_table_t buf_hash_table
uint64_t zfs_crc64_table [256]
uint64_t l2arc_write_max = L2ARC_WRITE_SIZE
 default max write size
uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE
 extra write during warmup
uint64_t l2arc_headroom = L2ARC_HEADROOM
 number of dev writes
uint64_t l2arc_feed_secs = L2ARC_FEED_SECS
 interval seconds
uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS
 min interval milliseconds
boolean_t l2arc_noprefetch = B_TRUE
 don't cache prefetch bufs
boolean_t l2arc_feed_again = B_TRUE
 turbo warmup
boolean_t l2arc_norw = B_TRUE
 no reads during writes
static list_t L2ARC_dev_list
 device list
static list_t * l2arc_dev_list
 device list pointer
static kmutex_t l2arc_dev_mtx
 device list mutex
static l2arc_dev_tl2arc_dev_last
 last device used
static kmutex_t l2arc_buflist_mtx
 mutex for all buflists
static list_t L2ARC_free_on_write
 free after write buf list
static list_t * l2arc_free_on_write
 free after write list ptr
static kmutex_t l2arc_free_on_write_mtx
 mutex for list
static uint64_t l2arc_ndev
 number of devices
static kmutex_t l2arc_feed_thr_lock
static kcondvar_t l2arc_feed_thr_cv
static uint8_t l2arc_thread_exit
static kmem_cache_t * hdr_cache
static kmem_cache_t * buf_cache
static char * arc_onloan_tag = "onloan"
static int needfree = 0
kmem_cache_t * zio_buf_cache []
kmem_cache_t * zio_data_buf_cache []
static kmutex_t arc_lowmem_lock
static eventhandler_tag arc_event_lowmem = NULL

Detailed Description

DVA-based Adaptive Replacement Cache.

Megiddo and Modha's Adaptive Replacement Cache

While much of the theory of operation used here is based on the self-tuning, low overhead replacement cache presented by Megiddo and Modha at FAST 2003, there are some significant differences:

  1. The Megiddo and Modha model assumes any page is evictable. Pages in its cache cannot be "locked" into memory. This makes the eviction algorithm simple: evict the last page in the list. This also make the performance characteristics easy to reason about. Our cache is not so simple. At any given moment, some subset of the blocks in the cache are un-evictable because we have handed out a reference to them. Blocks are only evictable when there are no external references active. This makes eviction far more problematic: we choose to evict the evictable blocks that are the "lowest" in the list.

    There are times when it is not possible to evict the requested space. In these circumstances we are unable to adjust the cache size. To prevent the cache growing unbounded at these times we implement a "cache throttle" that slows the flow of new data into the cache until we can make space available.
  2. The Megiddo and Modha model assumes a fixed cache size. Pages are evicted when the cache is full and there is a cache miss. Our model has a variable sized cache. It grows with high use, but also tries to react to memory pressure from the operating system: decreasing its size when system memory is tight.
  3. The Megiddo and Modha model assumes a fixed page size. All elements of the cache are therefore exactly the same size. So when adjusting the cache size following a cache miss, it's simply a matter of choosing a single page to evict. In our model, we have variable sized cache blocks (ranging from 512 bytes to 128K bytes). We therefore choose a set of blocks to evict to make space for a cache miss that approximates as closely as possible the space used by the new block.

See also: "ARC: A Self-Tuning, Low Overhead Replacement Cache" by N. Megiddo & D. Modha, FAST 2003

Locking Model

A new reference to a cache buffer can be obtained in two ways:

  1. via a hash table lookup using the DVA as a key
  2. via one of the ARC lists

The arc_read() interface uses method 1, while the internal arc algorithms for adjusting the cache use method 2. We therefore provide two types of locks:

  1. the hash table lock array
  2. the arc list locks

Buffers do not have their own mutexes, rather they rely on the hash table mutexes for the bulk of their protection (i.e. most fields in the arc_buf_hdr_t are protected by these mutexes).

buf_hash_find() returns the appropriate mutex (held) when it locates the requested buffer in the hash table. It returns NULL for the mutex if the buffer was not in the table.

buf_hash_remove() expects the appropriate hash mutex to be already held before it is invoked.

Each arc state also has a mutex which is used to protect the buffer list associated with the state. When attempting to obtain a hash table lock while holding an arc list lock you must use: mutex_tryenter() to avoid deadlock. Also note that the active state mutex must be held before the ghost state mutex.

Arc buffers may have an associated eviction callback function. This function will be invoked prior to removing the buffer (e.g. in arc_do_user_evicts()). Note however that the data associated with the buffer may be evicted prior to the callback. The callback must be made with *no locks held* (to prevent deadlock). Additionally, the users of callbacks must ensure that their private data is protected from simultaneous callbacks from arc_buf_evict() and arc_do_user_evicts().

Note that the majority of the performance stats are manipulated with atomic operations.

The L2ARC uses the l2arc_buflist_mtx global mutex for the following:

Arc Buffer States

Buffers can be in one of 6 states:

Anonymous buffers are buffers that are not associated with a DVA. These are buffers that hold dirty block copies before they are written to stable storage. By definition, they are "ref'd" and are considered part of arc_mru that cannot be freed. Generally, they will aquire a DVA as they are written and migrate onto the arc_mru list.

The ARC_l2c_only state is for buffers that are in the second level ARC but no longer in any of the ARC_m* lists. The second level ARC itself may also contain buffers that are in any of the ARC_m* states - meaning that a buffer can exist in two places. The reason for the ARC_l2c_only state is to keep the buffer header in the hash table, so that reads that hit the second level ARC benefit from these fast lookups.

Level 2 ARC

The level 2 ARC (L2ARC) is a cache layer in-between main memory and disk. It uses dedicated storage devices to hold cached data, which are populated using large infrequent writes. The main role of this cache is to boost the performance of random read workloads. The intended L2ARC devices include short-stroked disks, solid state disks, and other media with substantially faster read latency than disk.

                   +-----------------------+
                   |         ARC           |
                   +-----------------------+
                      |         ^     ^
                      |         |     |
        l2arc_feed_thread()    arc_read()
                      |         |     |
                      |  l2arc read   |
                      V         |     |
                 +---------------+    |
                 |     L2ARC     |    |
                 +---------------+    |
                     |    ^           |
            l2arc_write() |           |
                     |    |           |
                     V    |           |
                   +-------+      +-------+
                   | vdev  |      | vdev  |
                   | cache |      | cache |
                   +-------+      +-------+
                   +=========+     .-----.
                   :  L2ARC  :    |-_____-|
                   : devices :    | Disks |
                   +=========+    `-_____-'
 

Read requests are satisfied from the following sources, in order:

  1. ARC
  2. vdev cache of L2ARC devices
  3. L2ARC devices
  4. vdev cache of disks
  5. disks

Some L2ARC device types exhibit extremely slow write performance. To accommodate for this there are some significant differences between the L2ARC and traditional cache design:

  1. There is no eviction path from the ARC to the L2ARC. Evictions from the ARC behave as usual, freeing buffers and placing headers on ghost lists. The ARC does not send buffers to the L2ARC during eviction as this would add inflated write latencies for all ARC memory pressure.
  2. The L2ARC attempts to cache data from the ARC before it is evicted. It does this by periodically scanning buffers from the eviction-end of the MFU and MRU ARC lists, copying them to the L2ARC devices if they are not already there. It scans until a headroom of buffers is satisfied, which itself is a buffer for ARC eviction. The thread that does this is l2arc_feed_thread(), illustrated below; example sizes are included to provide a better sense of ratio than this diagram:
      	       head -->                        tail
      	        +---------------------+----------+
      	ARC_mfu |:::::#:::::::::::::::|o#o###o###|-->.   # already on L2ARC
      	        +---------------------+----------+   |   o L2ARC eligible
      	ARC_mru |:#:::::::::::::::::::|#o#ooo####|-->|   : ARC buffer
      	        +---------------------+----------+   |
      	             15.9 Gbytes      ^ 32 Mbytes    |
      	                           headroom          |
      	                                      l2arc_feed_thread()
      	                                             |
      	                 l2arc write hand <--[oooo]--'
      	                         |           8 Mbyte
      	                         |          write max
      	                         V
      		  +==============================+
      	L2ARC dev |####|#|###|###|    |####| ... |
      	          +==============================+
      	                     32 Gbytes
     
  3. If an ARC buffer is copied to the L2ARC but then hit instead of evicted, then the L2ARC has cached a buffer much sooner than it probably needed to, potentially wasting L2ARC device bandwidth and storage. It is safe to say that this is an uncommon case, since buffers at the end of the ARC lists have moved there due to inactivity.
  4. If the ARC evicts faster than the L2ARC can maintain a headroom, then the L2ARC simply misses copying some buffers. This serves as a pressure valve to prevent heavy read workloads from both stalling the ARC with waits and clogging the L2ARC with writes. This also helps prevent the potential for the L2ARC to churn if it attempts to cache content too quickly, such as during backups of the entire pool.
  5. After system boot and before the ARC has filled main memory, there are no evictions from the ARC and so the tails of the ARC_mfu and ARC_mru lists can remain mostly static. Instead of searching from tail of these lists as pictured, the l2arc_feed_thread() will search from the list heads for eligible buffers, greatly increasing its chance of finding them.

    The L2ARC device write speed is also boosted during this time so that the L2ARC warms up faster. Since there have been no ARC evictions yet, there are no L2ARC reads, and no fear of degrading read performance through increased writes.
  6. Writes to the L2ARC devices are grouped and sent in-sequence, so that the vdev queue can aggregate them into larger and fewer writes. Each device is written to in a rotor fashion, sweeping writes through available space then repeating.
  7. The L2ARC does not store dirty content. It never needs to flush write buffers back to disk based storage.
  8. If an ARC buffer is written (and dirtied) which also exists in the L2ARC, the now stale L2ARC buffer is immediately dropped.

The performance of the L2ARC can be tweaked by a number of tunables, which may be necessary for different workloads:

Tunables may be removed or added as future performance improvements are integrated, and also may become zpool properties.

There are three key functions that control how the L2ARC warms up:

These three functions determine what to write, how much, and how quickly to send writes.

Definition in file arc.c.


Define Documentation

#define ARC_BUF_AVAILABLE   (1 << 13)

block not in active use

Definition at line 586 of file arc.c.

#define ARC_BUFC_NUMDATALISTS   16

must be power of two for mask use to work

Definition at line 261 of file arc.c.

#define ARC_BUFC_NUMLISTS   (ARC_BUFC_NUMMETADATALISTS + ARC_BUFC_NUMDATALISTS)

Definition at line 263 of file arc.c.

#define ARC_BUFC_NUMMETADATALISTS   16

Definition at line 262 of file arc.c.

#define arc_c   ARCSTAT(arcstat_c)

target size of cache

Definition at line 484 of file arc.c.

#define arc_c_max   ARCSTAT(arcstat_c_max)

max target cache size

Definition at line 486 of file arc.c.

#define arc_c_min   ARCSTAT(arcstat_c_min)

min target cache size

Definition at line 485 of file arc.c.

#define ARC_FREE_IN_PROGRESS   (1 << 15)

hdr about to be freed

Definition at line 588 of file arc.c.

#define ARC_FREED_IN_READ   (1 << 12)

buf freed while in read

Definition at line 585 of file arc.c.

#define ARC_IN_HASH_TABLE   (1 << 9)

this buffer is hashed

Definition at line 582 of file arc.c.

#define ARC_INDIRECT   (1 << 14)

this is an indirect block

Definition at line 587 of file arc.c.

#define ARC_IO_ERROR   (1 << 11)

I/O failed for buf.

Definition at line 584 of file arc.c.

#define ARC_IO_IN_PROGRESS   (1 << 10)

I/O in progress for buf.

Definition at line 583 of file arc.c.

#define ARC_L2_EVICTED   (1 << 17)

evicted during I/O

Definition at line 590 of file arc.c.

#define ARC_L2_WRITE_HEAD   (1 << 18)

head of write list

Definition at line 591 of file arc.c.

#define ARC_L2_WRITING   (1 << 16)

L2ARC write in progress.

Definition at line 589 of file arc.c.

#define ARC_MINTIME   (hz>>4)

Definition at line 1054 of file arc.c.

#define arc_p   ARCSTAT(arcstat_p)

target size of MRU

Definition at line 483 of file arc.c.

#define ARC_REDUCE_DNLC_PERCENT   3

Definition at line 158 of file arc.c.

#define arc_size   ARCSTAT(arcstat_size)

actual total arc size

Definition at line 482 of file arc.c.

#define ARCS_LOCK (   s,
 
)    (&((s)->arcs_locks[(i)].arcs_lock))

Definition at line 272 of file arc.c.

#define ARCS_LOCK_PAD   CACHE_LINE_SIZE

Definition at line 250 of file arc.c.

#define ARCSTAT (   stat)    (arc_stats.stat.value.ui64)

Definition at line 428 of file arc.c.

#define ARCSTAT_BUMP (   stat)    ARCSTAT_INCR(stat, 1)

Definition at line 433 of file arc.c.

#define ARCSTAT_BUMPDOWN (   stat)    ARCSTAT_INCR(stat, -1)

Definition at line 434 of file arc.c.

#define ARCSTAT_CONDSTAT (   cond1,
  stat1,
  notstat1,
  cond2,
  stat2,
  notstat2,
  stat 
)
Value:
if (cond1) {                                                    \
                if (cond2) {                                            \
                        ARCSTAT_BUMP(arcstat_##stat1##_##stat2##_##stat); \
                } else {                                                \
                        ARCSTAT_BUMP(arcstat_##stat1##_##notstat2##_##stat); \
                }                                                       \
        } else {                                                        \
                if (cond2) {                                            \
                        ARCSTAT_BUMP(arcstat_##notstat1##_##stat2##_##stat); \
                } else {                                                \
                        ARCSTAT_BUMP(arcstat_##notstat1##_##notstat2##_##stat);\
                }                                                       \
        }

We define a macro to allow ARC hits/misses to be easily broken down by two separate conditions, giving a total of four different subtypes for each of hits and misses (so eight statistics total).

Definition at line 451 of file arc.c.

#define ARCSTAT_INCR (   stat,
  val 
)    atomic_add_64(&arc_stats.stat.value.ui64, (val));

Definition at line 430 of file arc.c.

#define ARCSTAT_MAX (   stat,
  val 
)
Value:
{                                       \
        uint64_t m;                                                     \
        while ((val) > (m = arc_stats.stat.value.ui64) &&               \
            (m != atomic_cas_64(&arc_stats.stat.value.ui64, m, (val)))) \
                continue;                                               \
}

Definition at line 436 of file arc.c.

#define ARCSTAT_MAXSTAT (   stat)    ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)

Definition at line 443 of file arc.c.

#define BUF_EMPTY (   buf)
Value:
((buf)->b_dva.dva_word[0] == 0 &&                       \
        (buf)->b_dva.dva_word[1] == 0 &&                        \
        (buf)->b_birth == 0)

Definition at line 812 of file arc.c.

#define BUF_EQUAL (   spa,
  dva,
  birth,
  buf 
)
Value:
((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) &&     \
        ((buf)->b_dva.dva_word[1] == (dva)->dva_word[1]) &&     \
        ((buf)->b_birth == birth) && ((buf)->b_spa == spa)

Definition at line 817 of file arc.c.

#define BUF_HASH_INDEX (   spa,
  dva,
  birth 
)    (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)

Definition at line 636 of file arc.c.

#define BUF_HASH_LOCK (   idx)    (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))

Definition at line 639 of file arc.c.

#define BUF_HASH_LOCK_NTRY (   idx)    (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])

Definition at line 638 of file arc.c.

#define BUF_LOCKS   256

Definition at line 627 of file arc.c.

#define GHOST_STATE (   state)
Value:
((state) == arc_mru_ghost || (state) == arc_mfu_ghost ||        \
        (state) == arc_l2c_only)

Definition at line 570 of file arc.c.

#define HDR_BUF_AVAILABLE (   hdr)    ((hdr)->b_flags & ARC_BUF_AVAILABLE)

Definition at line 598 of file arc.c.

#define HDR_FREE_IN_PROGRESS (   hdr)    ((hdr)->b_flags & ARC_FREE_IN_PROGRESS)

Definition at line 599 of file arc.c.

#define HDR_FREED_IN_READ (   hdr)    ((hdr)->b_flags & ARC_FREED_IN_READ)

Definition at line 597 of file arc.c.

#define HDR_IN_HASH_TABLE (   hdr)    ((hdr)->b_flags & ARC_IN_HASH_TABLE)

Definition at line 593 of file arc.c.

#define HDR_IO_ERROR (   hdr)    ((hdr)->b_flags & ARC_IO_ERROR)

Definition at line 595 of file arc.c.

#define HDR_IO_IN_PROGRESS (   hdr)    ((hdr)->b_flags & ARC_IO_IN_PROGRESS)

Definition at line 594 of file arc.c.

#define HDR_L2_EVICTED (   hdr)    ((hdr)->b_flags & ARC_L2_EVICTED)

Definition at line 604 of file arc.c.

#define HDR_L2_READING (   hdr)
Value:
((hdr)->b_flags & ARC_IO_IN_PROGRESS && \
                                    (hdr)->b_l2hdr != NULL)

Definition at line 601 of file arc.c.

#define HDR_L2_WRITE_HEAD (   hdr)    ((hdr)->b_flags & ARC_L2_WRITE_HEAD)

Definition at line 605 of file arc.c.

#define HDR_L2_WRITING (   hdr)    ((hdr)->b_flags & ARC_L2_WRITING)

Definition at line 603 of file arc.c.

#define HDR_L2CACHE (   hdr)    ((hdr)->b_flags & ARC_L2CACHE)

Definition at line 600 of file arc.c.

#define HDR_LOCK (   hdr)    (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))

Definition at line 640 of file arc.c.

#define HDR_PREFETCH (   hdr)    ((hdr)->b_flags & ARC_PREFETCH)

Definition at line 596 of file arc.c.

#define HDR_SIZE   ((int64_t)sizeof (arc_buf_hdr_t))

Definition at line 611 of file arc.c.

#define HT_LOCK_PAD   CACHE_LINE_SIZE

Definition at line 618 of file arc.c.

#define L2ARC_FEED_MIN_MS   200

min caching interval ms

Definition at line 652 of file arc.c.

#define L2ARC_FEED_SECS   1

caching interval secs

Definition at line 651 of file arc.c.

#define L2ARC_HEADROOM   2

num of writes

Definition at line 650 of file arc.c.

#define L2ARC_WRITE_SIZE   (8 * 1024 * 1024)

initial write max

Definition at line 649 of file arc.c.

#define l2arc_writes_done   ARCSTAT(arcstat_l2_writes_done)

Definition at line 655 of file arc.c.

#define l2arc_writes_sent   ARCSTAT(arcstat_l2_writes_sent)

Definition at line 654 of file arc.c.

#define L2HDR_SIZE   ((int64_t)sizeof (l2arc_buf_hdr_t))

Definition at line 612 of file arc.c.


Typedef Documentation

typedef struct arc_callback arc_callback_t

Definition at line 501 of file arc.c.

typedef struct arc_state arc_state_t
typedef struct arc_stats arc_stats_t

Definition at line 511 of file arc.c.

Definition at line 499 of file arc.c.

typedef struct l2arc_dev l2arc_dev_t

Enumeration Type Documentation

Enumerator:
ARC_RECLAIM_AGGR 

Aggressive reclaim strategy.

ARC_RECLAIM_CONS 

Conservative reclaim strategy.

Definition at line 161 of file arc.c.


Function Documentation

static void add_reference ( arc_buf_hdr_t ab,
kmutex_t *  hash_lock,
void *  tag 
) [static]

Definition at line 1221 of file arc.c.

static void arc_access ( arc_buf_hdr_t buf,
kmutex_t *  hash_lock 
) [static]

This routine is called whenever a buffer is accessed.

Note:
The hash lock is dropped in this function.

Definition at line 2767 of file arc.c.

static void arc_adapt ( int  bytes,
arc_state_t state 
) [static]

Adapt arc info given the number of bytes we are trying to add and the state that we are comming from.

This function is only called when we are adding new content to the cache.

Definition at line 2569 of file arc.c.

static void arc_adjust ( void  ) [static]

Definition at line 2207 of file arc.c.

void arc_bcopy_func ( zio_t zio,
arc_buf_t buf,
void *  arg 
)

a generic arc_done_func_t which you can use

Definition at line 2901 of file arc.c.

void arc_buf_add_ref ( arc_buf_t buf,
void *  tag 
)

Definition at line 1561 of file arc.c.

arc_buf_t* arc_buf_alloc ( spa_t spa,
int  size,
void *  tag,
arc_buf_contents_t  type 
)

Definition at line 1446 of file arc.c.

static arc_buf_t* arc_buf_clone ( arc_buf_t from) [static]

Definition at line 1528 of file arc.c.

static void arc_buf_data_free ( arc_buf_t buf,
void(*)(void *, size_t)  free_func 
) [static]

Free the arc data buffer.

If it is an l2arc write in progress, the buffer is placed on l2arc_free_on_write to be freed later.

Definition at line 1598 of file arc.c.

static void arc_buf_destroy ( arc_buf_t buf,
boolean_t  recycle,
boolean_t  all 
) [static]

Definition at line 1618 of file arc.c.

int arc_buf_evict ( arc_buf_t buf)

This is used by the DMU to let the ARC know that a buffer is being evicted, so the ARC should clean up.

If this arc buf is not yet in the evicted state, it will be put there.

Definition at line 3385 of file arc.c.

boolean_t arc_buf_eviction_needed ( arc_buf_t buf)

Called from the DMU to determine if the current buffer should be evicted.

In order to ensure proper locking, the eviction must be initiated from the DMU. Return true if the buffer is associated with user data and duplicate buffers still exist.

Definition at line 1856 of file arc.c.

void arc_buf_free ( arc_buf_t buf,
void *  tag 
)

Definition at line 1763 of file arc.c.

void arc_buf_freeze ( arc_buf_t buf)

Definition at line 1186 of file arc.c.

int arc_buf_remove_ref ( arc_buf_t buf,
void *  tag 
)

Definition at line 1810 of file arc.c.

int arc_buf_size ( arc_buf_t buf)

Definition at line 1844 of file arc.c.

void arc_buf_thaw ( arc_buf_t buf)

Definition at line 1156 of file arc.c.

static void arc_change_state ( arc_state_t new_state,
arc_buf_hdr_t ab,
kmutex_t *  hash_lock 
) [static]

Move the supplied buffer to the indicated state.

The mutex for the buffer must be held by the caller.

Definition at line 1284 of file arc.c.

static void arc_cksum_compute ( arc_buf_t buf,
boolean_t  force 
) [static]

Definition at line 1091 of file arc.c.

static int arc_cksum_equal ( arc_buf_t buf) [static]

Definition at line 1077 of file arc.c.

static void arc_cksum_verify ( arc_buf_t buf) [static]

Definition at line 1057 of file arc.c.

void* arc_data_buf_alloc ( uint64_t  size)

Definition at line 1429 of file arc.c.

void arc_data_buf_free ( void *  buf,
uint64_t  size 
)

Definition at line 1438 of file arc.c.

static void arc_do_user_evicts ( void  ) [static]

Definition at line 2271 of file arc.c.

static void* arc_evict ( arc_state_t state,
uint64_t  spa,
int64_t  bytes,
boolean_t  recycle,
arc_buf_contents_t  type 
) [static]

Evict buffers from list until we've removed the specified number of bytes.

Move the removed buffers to the appropriate evict state. If the recycle flag is set, then attempt to "recycle" a buffer:

  • look for a buffer to evict that is `bytes' long.
  • return the data block from this buffer rather than freeing it. This flag is used by callers that are trying to make space for a new buffer in a full arc cache.

This function makes a "best effort". It skips over any buffers it can't get a hash_lock on, and so may not catch all candidates. It may also return without evicting as much space as requested.

Definition at line 1905 of file arc.c.

static void arc_evict_ghost ( arc_state_t state,
uint64_t  spa,
int64_t  bytes 
) [static]

Remove buffers from list until we've removed the specified number of bytes.

Destroy the buffers that are removed.

Definition at line 2102 of file arc.c.

static int arc_evict_needed ( arc_buf_contents_t  type) [static]

Check if the cache has reached its limits and eviction is required prior to insert.

Definition at line 2636 of file arc.c.

void arc_fini ( void  )

Definition at line 4105 of file arc.c.

void arc_flush ( spa_t spa)

Flush all *evictable* data from the cache for the given spa.

Note:
This will not touch "active" (i.e. referenced) data.

Definition at line 2309 of file arc.c.

static void arc_get_data_buf ( arc_buf_t buf) [static]

The buffer, supplied as the first argument, needs a data block.

So, if we are at cache max, determine which cache should be victimized. We have the following cases:

  1. Insert for MRU, p > sizeof(arc_anon + arc_mru) -> In this situation if we're out of space, but the resident size of the MFU is under the limit, victimize the MFU cache to satisfy this insertion request.
  2. Insert for MRU, p <= sizeof(arc_anon + arc_mru) -> Here, we've used up all of the available space for the MRU, so we need to evict from our own cache instead. Evict from the set of resident MRU entries.
  3. Insert for MFU (c - p) > sizeof(arc_mfu) -> c minus p represents the MFU space in the cache, since p is the size of the cache that is dedicated to the MRU. In this situation there's still space on the MFU side, so the MRU side needs to be victimized.
  4. Insert for MFU (c - p) < sizeof(arc_mfu) -> MFU's resident set is consuming more space than it has been allotted. In this situation, we must victimize our own cache, the MFU, for this insertion.

Definition at line 2680 of file arc.c.

void arc_getbuf_func ( zio_t zio,
arc_buf_t buf,
void *  arg 
)

a generic arc_done_func_t

Definition at line 2912 of file arc.c.

int arc_has_callback ( arc_buf_t buf)

Definition at line 3623 of file arc.c.

static void arc_hdr_destroy ( arc_buf_hdr_t hdr) [static]

Definition at line 1688 of file arc.c.

void arc_init ( void  )

Definition at line 3924 of file arc.c.

static void arc_kmem_reap_now ( arc_reclaim_strategy_t  strat) [static]

Definition at line 2451 of file arc.c.

arc_buf_t* arc_loan_buf ( spa_t spa,
int  size 
)

Loan out an anonymous arc buffer.

Loaned buffers are not counted as in flight data by arc_tempreserve_space() until they are "returned". Loaned buffers must be returned to the arc before they can be used by the DMU or freed.

Definition at line 1484 of file arc.c.

void arc_loan_inuse_buf ( arc_buf_t buf,
void *  tag 
)

Detach an arc_buf from a dbuf (tag)

Definition at line 1513 of file arc.c.

static void arc_lowmem ( void *arg  __unused,
int howto  __unused 
) [static]

Definition at line 3900 of file arc.c.

static int arc_memory_throttle ( uint64_t  reserve,
uint64_t  inflight_data,
uint64_t  txg 
) [static]

Definition at line 3775 of file arc.c.

int arc_read ( zio_t pio,
spa_t spa,
const blkptr_t bp,
arc_buf_t pbuf,
arc_done_func_t done,
void *  private,
int  priority,
int  zio_flags,
uint32_t *  arc_flags,
const zbookmark_t zb 
)

"Read" the block block at the specified DVA (in bp) via the cache.

If the block is found in the cache, invoke the provided callback immediately and return. Note that the `zio' parameter in the callback will be NULL in this case, since no IO was required. If the block is not in the cache pass the read request on to the spa with a substitute callback function, so that the requested block will be added to the cache.

If a read request arrives for a block that has a read in-progress, either wait for the in-progress read to complete (and return the results); or, if this is a read with a "done" func, add a record to the read to invoke the "done" func when the read completes, and return; or just return.

arc_read_done() will invoke all the requested "done" functions for readers of this block.

Normal callers should use arc_read and pass the arc buffer and offset for the bp. But if you know you don't need locking, you can use arc_read_nolock.

Definition at line 3076 of file arc.c.

static void arc_read_done ( zio_t zio) [static]

Definition at line 2925 of file arc.c.

int arc_read_nolock ( zio_t pio,
spa_t spa,
const blkptr_t bp,
arc_done_func_t done,
void *  private,
int  priority,
int  zio_flags,
uint32_t *  arc_flags,
const zbookmark_t zb 
)

Definition at line 3103 of file arc.c.

static int arc_reclaim_needed ( void  ) [static]

Definition at line 2378 of file arc.c.

static void arc_reclaim_thread ( void *dummy  __unused) [static]

Definition at line 2495 of file arc.c.

void arc_release ( arc_buf_t buf,
void *  tag 
)

Release this buffer from the cache.

This must be done after a read and prior to modifying the buffer contents. If the buffer has more than one reference, we must make a new hdr for the buffer.

Definition at line 3474 of file arc.c.

int arc_release_bp ( arc_buf_t buf,
void *  tag,
blkptr_t bp,
spa_t spa,
zbookmark_t zb 
)

Release this buffer.

If it does not match the provided BP, fill it with that block's contents.

Definition at line 3604 of file arc.c.

int arc_released ( arc_buf_t buf)

Definition at line 3612 of file arc.c.

void arc_return_buf ( arc_buf_t buf,
void *  tag 
)

Return a loaned arc buffer to the arc.

Definition at line 1498 of file arc.c.

void arc_set_callback ( arc_buf_t buf,
arc_evict_func_t func,
void *  private 
)

Definition at line 3367 of file arc.c.

void arc_shrink ( void  )

Definition at line 2347 of file arc.c.

void arc_space_consume ( uint64_t  space,
arc_space_type_t  type 
)

Definition at line 1377 of file arc.c.

void arc_space_return ( uint64_t  space,
arc_space_type_t  type 
)

Definition at line 1401 of file arc.c.

void arc_tempreserve_clear ( uint64_t  reserve)

Definition at line 3832 of file arc.c.

int arc_tempreserve_space ( uint64_t  reserve,
uint64_t  txg 
)

Definition at line 3839 of file arc.c.

zio_t* arc_write ( zio_t pio,
spa_t spa,
uint64_t  txg,
blkptr_t bp,
arc_buf_t buf,
boolean_t  l2arc,
const zio_prop_t zp,
arc_done_func_t ready,
arc_done_func_t done,
void *  private,
int  priority,
int  zio_flags,
const zbookmark_t zb 
)

Definition at line 3746 of file arc.c.

static void arc_write_done ( zio_t zio) [static]

Definition at line 3675 of file arc.c.

static void arc_write_ready ( zio_t zio) [static]

Definition at line 3647 of file arc.c.

static int buf_cons ( void *  vbuf,
void *  unused,
int  kmflag 
) [static]

Definition at line 959 of file arc.c.

static void buf_dest ( void *  vbuf,
void *  unused 
) [static]

Definition at line 990 of file arc.c.

static void buf_discard_identity ( arc_buf_hdr_t hdr) [static]

Definition at line 823 of file arc.c.

static void buf_fini ( void  ) [static]

Definition at line 926 of file arc.c.

static uint64_t buf_hash ( uint64_t  spa,
const dva_t dva,
uint64_t  birth 
) [static]

Definition at line 796 of file arc.c.

static arc_buf_hdr_t* buf_hash_find ( uint64_t  spa,
const dva_t dva,
uint64_t  birth,
kmutex_t **  lockp 
) [static]

Definition at line 832 of file arc.c.

static arc_buf_hdr_t* buf_hash_insert ( arc_buf_hdr_t buf,
kmutex_t **  lockp 
) [static]

Insert an entry into the hash table.

If there is already an element equal to elem in the hash table, then the already existing element will be returned and the new element will not be inserted. Otherwise returns NULL.

Definition at line 858 of file arc.c.

static void buf_hash_remove ( arc_buf_hdr_t buf) [static]

Definition at line 894 of file arc.c.

static void buf_init ( void  ) [static]

Definition at line 1016 of file arc.c.

static void get_buf_info ( arc_buf_hdr_t ab,
arc_state_t state,
list_t **  list,
kmutex_t **  lock 
) [static]

Definition at line 1204 of file arc.c.

static int hdr_cons ( void *  vbuf,
void *  unused,
int  kmflag 
) [static]

Constructor callback - called when the cache is empty and a new buf is requested.

Definition at line 944 of file arc.c.

static void hdr_dest ( void *  vbuf,
void *  unused 
) [static]

Destructor callback - called when a cached buf is no longer required.

Definition at line 977 of file arc.c.

static void hdr_recl ( void *  unused) [static]

Reclaim callback -- invoked when memory is low.

Definition at line 1004 of file arc.c.

void l2arc_add_vdev ( spa_t spa,
vdev_t vd 
)

Add a vdev for use by the L2ARC.

By this point the spa has already validated the vdev and opened it.

Definition at line 5078 of file arc.c.

static l2arc_dev_t* l2arc_dev_get_next ( void  ) [static]

Cycle through L2ARC devices.

This is how L2ARC load balances. If a device is returned, this also returns holding the spa config lock.

Definition at line 4373 of file arc.c.

static void l2arc_do_free_on_write ( ) [static]

Free buffers that were tagged for destruction.

Definition at line 4433 of file arc.c.

static void l2arc_evict ( l2arc_dev_t dev,
uint64_t  distance,
boolean_t  all 
) [static]

Evict buffers from the device write hand to the distance specified in bytes.

This distance may span populated buffers, it may span nothing. This is clearing a region on the L2ARC device ready for writing. If the 'all' boolean is set, every buffer is evicted.

Definition at line 4649 of file arc.c.

static void l2arc_feed_thread ( void *dummy  __unused) [static]

Feed the L2ARC with buffers from the ARC at regular intervals.

This thread is the beating heart of the L2ARC.

Definition at line 4965 of file arc.c.

void l2arc_fini ( void  )

Definition at line 5180 of file arc.c.

static void l2arc_hdr_stat_add ( void  ) [static]

Definition at line 4355 of file arc.c.

static void l2arc_hdr_stat_remove ( void  ) [static]

Definition at line 4362 of file arc.c.

void l2arc_init ( void  )

Definition at line 5158 of file arc.c.

static list_t* l2arc_list_locked ( int  list_num,
kmutex_t **  lock 
) [static]

This is the list priority from which the L2ARC will search for pages to cache.

This is used within loops (0..3) to cycle through lists in the desired order. This order can have a significant effect on cache performance.

Currently the metadata lists are hit first, MFU then MRU, followed by the data lists. This function returns a locked list, and also returns the lock pointer.

Definition at line 4611 of file arc.c.

static void l2arc_read_done ( zio_t zio) [static]

A read to a cache device completed.

Validate buffer contents before handing over to the regular ARC routines.

Definition at line 4534 of file arc.c.

void l2arc_remove_vdev ( vdev_t vd)

Remove a vdev from the L2ARC.

Definition at line 5123 of file arc.c.

void l2arc_start ( void  )

Definition at line 5201 of file arc.c.

void l2arc_stop ( void  )

Definition at line 5211 of file arc.c.

boolean_t l2arc_vdev_present ( vdev_t vd)

Definition at line 5058 of file arc.c.

static uint64_t l2arc_write_buffers ( spa_t spa,
l2arc_dev_t dev,
uint64_t  target_sz 
) [static]

Find and write ARC buffers to the L2ARC device.

An ARC_L2_WRITING flag is set so that the L2ARC buffers are not valid for reading until they have completed writing.

Definition at line 4780 of file arc.c.

static void l2arc_write_done ( zio_t zio) [static]

A write to a cache device has completed.

Update all headers to allow reads from these buffers to begin.

Definition at line 4458 of file arc.c.

static boolean_t l2arc_write_eligible ( uint64_t  spa_guid,
arc_buf_hdr_t ab 
) [static]

Definition at line 4289 of file arc.c.

static clock_t l2arc_write_interval ( clock_t  began,
uint64_t  wanted,
uint64_t  wrote 
) [static]

Definition at line 4333 of file arc.c.

static uint64_t l2arc_write_size ( l2arc_dev_t dev) [static]

Definition at line 4319 of file arc.c.

static int remove_reference ( arc_buf_hdr_t ab,
kmutex_t *  hash_lock,
void *  tag 
) [static]

Definition at line 1253 of file arc.c.

SYSCTL_DECL ( _vfs_zfs  )
SYSCTL_INT ( _vfs_zfs  ,
OID_AUTO  ,
l2arc_noprefetch  ,
CTLFLAG_RW  ,
l2arc_noprefetch,
,
"don't cache prefetch bufs"   
)
SYSCTL_INT ( _vfs_zfs  ,
OID_AUTO  ,
l2arc_norw  ,
CTLFLAG_RW  ,
l2arc_norw,
,
"no reads during writes"   
)
SYSCTL_INT ( _vfs_zfs  ,
OID_AUTO  ,
l2arc_feed_again  ,
CTLFLAG_RW  ,
l2arc_feed_again,
,
"turbo warmup"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
l2arc_feed_min_ms  ,
CTLFLAG_RW  ,
l2arc_feed_min_ms,
,
"min interval milliseconds"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mru_metadata_lsize  ,
CTLFLAG_RD  ,
&ARC_mru.  arcs_lsize[ARC_BUFC_METADATA],
,
"size of metadata in mru state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mru_ghost_data_lsize  ,
CTLFLAG_RD  ,
&ARC_mru_ghost.  arcs_lsize[ARC_BUFC_DATA],
,
"size of data in mru ghost state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mfu_data_lsize  ,
CTLFLAG_RD  ,
&ARC_mfu.  arcs_lsize[ARC_BUFC_DATA],
,
"size of data in mfu state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mfu_ghost_data_lsize  ,
CTLFLAG_RD  ,
&ARC_mfu_ghost.  arcs_lsize[ARC_BUFC_DATA],
,
"size of data in mfu ghost state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mru_data_lsize  ,
CTLFLAG_RD  ,
&ARC_mru.  arcs_lsize[ARC_BUFC_DATA],
,
"size of data in mru state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mru_ghost_size  ,
CTLFLAG_RD  ,
&ARC_mru_ghost.  arcs_size,
,
"size of mru ghost state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mfu_ghost_metadata_lsize  ,
CTLFLAG_RD  ,
&ARC_mfu_ghost.  arcs_lsize[ARC_BUFC_METADATA],
,
"size of metadata in mfu ghost state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
l2arc_write_max  ,
CTLFLAG_RW  ,
l2arc_write_max,
,
"max write size"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mfu_size  ,
CTLFLAG_RD  ,
&ARC_mfu.  arcs_size,
,
"size of mfu state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
arc_meta_limit  ,
CTLFLAG_RW  ,
arc_meta_limit,
,
"ARC metadata limit"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mru_size  ,
CTLFLAG_RD  ,
&ARC_mru.  arcs_size,
,
"size of mru state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
anon_data_lsize  ,
CTLFLAG_RD  ,
&ARC_anon.  arcs_lsize[ARC_BUFC_DATA],
,
"size of anonymous state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
l2arc_feed_secs  ,
CTLFLAG_RW  ,
l2arc_feed_secs,
,
"interval seconds"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
arc_meta_used  ,
CTLFLAG_RD  ,
arc_meta_used,
,
"ARC metadata used"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
l2arc_write_boost  ,
CTLFLAG_RW  ,
l2arc_write_boost,
,
"extra write during warmup"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mfu_metadata_lsize  ,
CTLFLAG_RD  ,
&ARC_mfu.  arcs_lsize[ARC_BUFC_METADATA],
,
"size of metadata in mfu state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mru_ghost_metadata_lsize  ,
CTLFLAG_RD  ,
&ARC_mru_ghost.  arcs_lsize[ARC_BUFC_METADATA],
,
"size of metadata in mru ghost state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
l2arc_headroom  ,
CTLFLAG_RW  ,
l2arc_headroom,
,
"number of dev writes"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
arc_min  ,
CTLFLAG_RDTUN  ,
zfs_arc_min,
,
"Minimum ARC size"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
anon_size  ,
CTLFLAG_RD  ,
&ARC_anon.  arcs_size,
,
"size of anonymous state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
arc_max  ,
CTLFLAG_RDTUN  ,
zfs_arc_max,
,
"Maximum ARC size"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
anon_metadata_lsize  ,
CTLFLAG_RD  ,
&ARC_anon.  arcs_lsize[ARC_BUFC_METADATA],
,
"size of anonymous state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
l2c_only_size  ,
CTLFLAG_RD  ,
&ARC_l2c_only.  arcs_size,
,
"size of mru state"   
)
SYSCTL_UQUAD ( _vfs_zfs  ,
OID_AUTO  ,
mfu_ghost_size  ,
CTLFLAG_RD  ,
&ARC_mfu_ghost.  arcs_size,
,
"size of mfu ghost state"   
)
TUNABLE_QUAD ( "vfs.zfs.arc_max"  ,
zfs_arc_max 
)
TUNABLE_QUAD ( "vfs.zfs.arc_meta_limit"  ,
zfs_arc_meta_limit 
)
TUNABLE_QUAD ( "vfs.zfs.arc_min"  ,
zfs_arc_min 
)

Variable Documentation

Definition at line 275 of file arc.c.

arc_state_t* arc_anon [static]

Definition at line 467 of file arc.c.

int arc_dead [static]

Definition at line 181 of file arc.c.

eventhandler_tag arc_event_lowmem = NULL [static]

Definition at line 3897 of file arc.c.

Definition at line 559 of file arc.c.

Definition at line 557 of file arc.c.

kmutex_t arc_eviction_mtx [static]

Definition at line 558 of file arc.c.

int arc_grow_retry = 60 [static]

number of seconds before growing cache again

Definition at line 167 of file arc.c.

kstat_t* arc_ksp

Definition at line 466 of file arc.c.

Definition at line 472 of file arc.c.

Definition at line 280 of file arc.c.

uint64_t arc_loaned_bytes [static]

Definition at line 490 of file arc.c.

kmutex_t arc_lowmem_lock [static]

Definition at line 3895 of file arc.c.

uint64_t arc_meta_limit [static]

Definition at line 492 of file arc.c.

uint64_t arc_meta_max = 0 [static]

Definition at line 493 of file arc.c.

uint64_t arc_meta_used [static]

Definition at line 491 of file arc.c.

arc_state_t* arc_mfu [static]

Definition at line 470 of file arc.c.

arc_state_t ARC_mfu [static]

Definition at line 278 of file arc.c.

Definition at line 471 of file arc.c.

Definition at line 279 of file arc.c.

minimum lifespan of a prefetch block in clock ticks (initialized in arc_init())

Definition at line 179 of file arc.c.

arc_state_t ARC_mru [static]

Definition at line 276 of file arc.c.

arc_state_t* arc_mru [static]

Definition at line 468 of file arc.c.

Definition at line 469 of file arc.c.

Definition at line 277 of file arc.c.

int arc_no_grow [static]

Don't try to grow cache size.

Definition at line 488 of file arc.c.

char* arc_onloan_tag = "onloan" [static]

Definition at line 1475 of file arc.c.

int arc_p_min_shift = 4 [static]

shift of arc_c for calculating both min and max arc_p

Definition at line 170 of file arc.c.

kcondvar_t arc_reclaim_thr_cv [static]

used to signal reclaim thr

Definition at line 151 of file arc.c.

kmutex_t arc_reclaim_thr_lock [static]

Definition at line 150 of file arc.c.

uint_t arc_reduce_dnlc_percent = ARC_REDUCE_DNLC_PERCENT

Definition at line 159 of file arc.c.

int arc_shrink_shift = 5 [static]

log2(fraction of arc to reclaim)

Definition at line 173 of file arc.c.

Definition at line 355 of file arc.c.

uint64_t arc_tempreserve [static]

Definition at line 489 of file arc.c.

uint8_t arc_thread_exit [static]

Definition at line 152 of file arc.c.

boolean_t arc_warm [static]

The arc has filled available memory and has now warmed up.

Definition at line 187 of file arc.c.

kmem_cache_t* buf_cache [static]

Definition at line 923 of file arc.c.

Definition at line 634 of file arc.c.

kmem_cache_t* hdr_cache [static]

Definition at line 922 of file arc.c.

kmutex_t l2arc_buflist_mtx [static]

mutex for all buflists

Definition at line 754 of file arc.c.

last device used

Definition at line 753 of file arc.c.

list_t L2ARC_dev_list [static]

device list

Definition at line 750 of file arc.c.

list_t* l2arc_dev_list [static]

device list pointer

Definition at line 751 of file arc.c.

kmutex_t l2arc_dev_mtx [static]

device list mutex

Definition at line 752 of file arc.c.

kcondvar_t l2arc_feed_thr_cv [static]

Definition at line 788 of file arc.c.

kmutex_t l2arc_feed_thr_lock [static]

Definition at line 787 of file arc.c.

list_t* l2arc_free_on_write [static]

free after write list ptr

Definition at line 756 of file arc.c.

list_t L2ARC_free_on_write [static]

free after write buf list

Definition at line 755 of file arc.c.

kmutex_t l2arc_free_on_write_mtx [static]

mutex for list

Definition at line 757 of file arc.c.

uint64_t l2arc_ndev [static]

number of devices

Definition at line 758 of file arc.c.

uint8_t l2arc_thread_exit [static]

Definition at line 789 of file arc.c.

int needfree = 0 [static]

Definition at line 2375 of file arc.c.

Definition at line 200 of file arc.c.

Definition at line 202 of file arc.c.

Definition at line 201 of file arc.c.

uint64_t zfs_crc64_table[256]

Definition at line 643 of file arc.c.

Definition at line 203 of file arc.c.

Definition at line 59 of file dsl_pool.c.

kmem_cache_t* zio_buf_cache[]

Definition at line 100 of file zio.c.

kmem_cache_t* zio_data_buf_cache[]

Definition at line 101 of file zio.c.

 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines