Index: vfs_aio.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_aio.c,v retrieving revision 1.116 diff -u -r1.116 vfs_aio.c --- vfs_aio.c 23 Feb 2002 11:12:54 -0000 1.116 +++ vfs_aio.c 5 Mar 2002 14:19:54 -0000 @@ -57,6 +57,10 @@ #include "opt_vfs_aio.h" +/* + * Counter for allocating reference ids to new jobs. Wrapped to 1 on + * overflow. + */ static long jobrefid; #define JOBST_NULL 0x0 @@ -98,59 +102,68 @@ #define AIOD_LIFETIME_DEFAULT (30 * hz) #endif -static int max_aio_procs = MAX_AIO_PROCS; -static int num_aio_procs = 0; -static int target_aio_procs = TARGET_AIO_PROCS; -static int max_queue_count = MAX_AIO_QUEUE; -static int num_queue_count = 0; -static int num_buf_aio = 0; -static int num_aio_resv_start = 0; -static int aiod_timeout; -static int aiod_lifetime; -static int unloadable = 0; - -static int max_aio_per_proc = MAX_AIO_PER_PROC; -static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC; -static int max_buf_aio = MAX_BUF_AIO; - -SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt"); - -SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, - CTLFLAG_RW, &max_aio_per_proc, 0, ""); - -SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, - CTLFLAG_RW, &max_aio_queue_per_proc, 0, ""); +SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "Async IO management"); +static int max_aio_procs = MAX_AIO_PROCS; SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, - CTLFLAG_RW, &max_aio_procs, 0, ""); + CTLFLAG_RW, &max_aio_procs, 0, + "Maximum number of kernel threads to use for handling async IO "); +static int num_aio_procs = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, - CTLFLAG_RD, &num_aio_procs, 0, ""); + CTLFLAG_RD, &num_aio_procs, 0, + "Number of presently active kernel threads for async IO"); -SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, - CTLFLAG_RD, &num_queue_count, 0, ""); +/* + * The code will adjust the actual number of AIO processes towards this + * number when it gets a chance. + */ +static int target_aio_procs = TARGET_AIO_PROCS; +SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, CTLFLAG_RW, &target_aio_procs, + 0, "Preferred number of ready kernel threads for async IO"); -SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, - CTLFLAG_RW, &max_queue_count, 0, ""); +static int max_queue_count = MAX_AIO_QUEUE; +SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, CTLFLAG_RW, &max_queue_count, 0, + "Maximum number of aio requests to queue, globally"); -SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, - CTLFLAG_RW, &target_aio_procs, 0, ""); +static int num_queue_count = 0; +SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, CTLFLAG_RD, &num_queue_count, 0, + "Number of queued aio requests"); -SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, - CTLFLAG_RW, &max_buf_aio, 0, ""); +static int num_buf_aio = 0; +SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, CTLFLAG_RD, &num_buf_aio, 0, + "Number of aio requests presently handled by the buf subsystem"); -SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, - CTLFLAG_RD, &num_buf_aio, 0, ""); +/* Number of async I/O thread in the process of being started */ +/* XXX This should be local to _aio_aqueue() */ +static int num_aio_resv_start = 0; -SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, - CTLFLAG_RW, &aiod_lifetime, 0, ""); +static int aiod_timeout; +SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout, CTLFLAG_RW, &aiod_timeout, 0, + "Timeout value for synchronous aio operations"); -SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout, - CTLFLAG_RW, &aiod_timeout, 0, ""); +static int aiod_lifetime; +SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, CTLFLAG_RW, &aiod_lifetime, 0, + "Maximum lifetime for idle aiod"); +static int unloadable = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, unloadable, CTLFLAG_RW, &unloadable, 0, "Allow unload of aio (not recommended)"); + +static int max_aio_per_proc = MAX_AIO_PER_PROC; +SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, CTLFLAG_RW, &max_aio_per_proc, + 0, "Maximum active aio requests per process (stored in the process)"); + +static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC; +SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, CTLFLAG_RW, + &max_aio_queue_per_proc, 0, + "Maximum queued aio requests per process (stored in the process)"); + +static int max_buf_aio = MAX_BUF_AIO; +SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, CTLFLAG_RW, &max_buf_aio, 0, + "Maximum buf aio requests per process (stored in the process)"); + struct aiocblist { TAILQ_ENTRY(aiocblist) list; /* List of jobs */ TAILQ_ENTRY(aiocblist) plist; /* List of jobs for proc */ @@ -227,7 +240,8 @@ #define KAIO_RUNDOWN 0x1 /* process is being run down */ #define KAIO_WAKEUP 0x2 /* wakeup process when there is a significant event */ -static TAILQ_HEAD(,aiothreadlist) aio_freeproc, aio_activeproc; +static TAILQ_HEAD(,aiothreadlist) aio_activeproc; /* Active daemons */ +static TAILQ_HEAD(,aiothreadlist) aio_freeproc; /* Idle daemons */ static TAILQ_HEAD(,aiocblist) aio_jobs; /* Async job list */ static TAILQ_HEAD(,aiocblist) aio_bufjobs; /* Phys I/O job list */ @@ -249,12 +263,23 @@ static void filt_aiodetach(struct knote *kn); static int filt_aio(struct knote *kn, long hint); -static vm_zone_t kaio_zone, aiop_zone, aiocb_zone, aiol_zone; -static vm_zone_t aiolio_zone; +/* + * Zones for: + * kaio Per process async io info + * aiop async io thread data + * aiocb async io jobs + * aiol list io job pointer - internal to aio_suspend XXX + * aiolio list io jobs + */ +static vm_zone_t kaio_zone, aiop_zone, aiocb_zone, aiol_zone, aiolio_zone; +/* kqueue filters for aio */ static struct filterops aio_filtops = { 0, filt_aioattach, filt_aiodetach, filt_aio }; +/* + * Main operations function for use as a kernel module. + */ static int aio_modload(struct module *module, int cmd, void *arg) { @@ -321,6 +346,9 @@ jobrefid = 1; } +/* + * Callback for unload of AIO when used as a module. + */ static int aio_unload(void) { @@ -784,8 +812,10 @@ mycp->p_fd = NULL; /* The daemon resides in its own pgrp. */ - MALLOC(newpgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP, M_WAITOK | M_ZERO); - MALLOC(newsess, struct session *, sizeof(struct session), M_SESSION, M_WAITOK | M_ZERO); + MALLOC(newpgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP, + M_WAITOK | M_ZERO); + MALLOC(newsess, struct session *, sizeof(struct session), M_SESSION, + M_WAITOK | M_ZERO); PGRPSESS_XLOCK(); enterpgrp(mycp, mycp->p_pid, newpgrp, newsess); @@ -1931,6 +1961,7 @@ return EINVAL; } +/* syscall - asynchronous read from a file (REALTIME) */ int aio_read(struct thread *td, struct aio_read_args *uap) { @@ -1938,6 +1969,7 @@ return aio_aqueue(td, uap->aiocbp, LIO_READ); } +/* syscall - asynchronous write to a file (REALTIME) */ int aio_write(struct thread *td, struct aio_write_args *uap) { @@ -1945,6 +1977,7 @@ return aio_aqueue(td, uap->aiocbp, LIO_WRITE); } +/* syscall - XXX undocumented */ int lio_listio(struct thread *td, struct lio_listio_args *uap) { @@ -2210,6 +2243,7 @@ } } +/* syscall - wait for the next completion of an aio request */ int aio_waitcomplete(struct thread *td, struct aio_waitcomplete_args *uap) { @@ -2285,6 +2319,7 @@ } } +/* kqueue attach function */ static int filt_aioattach(struct knote *kn) { @@ -2304,6 +2339,7 @@ return (0); } +/* kqueue detach function */ static void filt_aiodetach(struct knote *kn) { @@ -2312,6 +2348,7 @@ SLIST_REMOVE(&aiocbe->klist, kn, knote, kn_selnext); } +/* kqueue filter function */ /*ARGSUSED*/ static int filt_aio(struct knote *kn, long hint) Index: vfs_bio.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_bio.c,v retrieving revision 1.300 diff -u -r1.300 vfs_bio.c --- vfs_bio.c 27 Feb 2002 09:51:33 -0000 1.300 +++ vfs_bio.c 5 Mar 2002 14:26:48 -0000 @@ -63,8 +63,11 @@ bwrite }; +/* + * XXX buf is global because kern_shutdown.c and ffs_checkoverlap has + * carnal knowledge of buffers. This knowledge should be moved to vfs_bio.c. + */ struct buf *buf; /* buffer header pool */ -struct swqueue bswlist; struct mtx buftimelock; /* Interlock on setting prio and timo */ static void vm_hold_free_pages(struct buf * bp, vm_offset_t from, @@ -78,10 +81,82 @@ static void vfs_vmio_release(struct buf *bp); static void vfs_backgroundwritedone(struct buf *bp); static int flushbufqueues(void); +static void buf_daemon __P((void)); +int vmiodirenable = TRUE; +SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0, + "Use the VM system for directory writes"); +int runningbufspace; +SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0, + "Amount of presently outstanding async buffer io"); +static int bufspace; +SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0, + "KVA memory used for bufs"); +static int maxbufspace; +SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0, + "Maximum allowed value of bufspace (including buf_daemon)"); +static int bufmallocspace; +SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, + "Amount of malloced memory for buffers"); +static int maxbufmallocspace; +SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 0, + "Maximum amount of malloced memory for buffers"); +static int lobufspace; +SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0, + "Minimum amount of buffers we want to have"); +static int hibufspace; +SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0, + "Maximum allowed value of bufspace (excluding buf_daemon)"); +static int bufreusecnt; +SYSCTL_INT(_vfs, OID_AUTO, bufreusecnt, CTLFLAG_RW, &bufreusecnt, 0, + "Number of times we have reused a buffer"); +static int buffreekvacnt; +SYSCTL_INT(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt, 0, + "Number of times we have freed the KVA space from some buffer"); +static int bufdefragcnt; +SYSCTL_INT(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, &bufdefragcnt, 0, + "Number of times we have had to repeat buffer allocation to defragment"); +static int lorunningspace; +SYSCTL_INT(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0, + "Minimum preferred space used for in-progress I/O"); +static int hirunningspace; +SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0, + "Maximum amount of space to use for in-progress I/O"); +static int numdirtybuffers; +SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, &numdirtybuffers, 0, + "Number of buffers that are dirty (has unwritten changes) at the moment"); +static int lodirtybuffers; +SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0, + "How many buffers we want to have free before bufdaemon can sleep"); +static int hidirtybuffers; +SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0, + "When the number of dirty buffers is considered severe"); +static int numfreebuffers; +SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0, + "Number of free buffers"); +static int lofreebuffers; +SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0, + "XXX Unused"); +static int hifreebuffers; +SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0, + "XXX Complicatedly unused"); +static int getnewbufcalls; +SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW, &getnewbufcalls, 0, + "Number of calls to getnewbuf"); +static int getnewbufrestarts; +SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0, + "Number of times getnewbuf has had to restart a buffer aquisition"); +static int dobkgrdwrite = 1; +SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0, + "Do background writes (honoring the BX_BKGRDWRITE flag)?"); + +/* + * Wakeup point for bufdaemon, as well as indicator of whether it is already + * active. Set to 1 when the bufdaemon is already "on" the queue, 0 when it + * is idling. + */ static int bd_request; -static void buf_daemon __P((void)); /* * bogus page -- for I/O to/from partially complete buffers * this is a temporary solution to the problem, but it is not @@ -90,69 +165,54 @@ * but the code is intricate enough already. */ vm_page_t bogus_page; -int vmiodirenable = TRUE; -int runningbufspace; + +/* + * Offset for bogus_page. + * XXX bogus_offset should be local to bufinit + */ static vm_offset_t bogus_offset; -static int bufspace, maxbufspace, - bufmallocspace, maxbufmallocspace, lobufspace, hibufspace; -static int bufreusecnt, bufdefragcnt, buffreekvacnt; +/* + * Synchronization (sleep/wakeup) variable for active buffer space requests. + * Set when wait starts, cleared prior to wakeup(). + * Used in runningbufwakeup() and waitrunningbufspace(). + */ +static int runningbufreq; + +/* + * Synchronization (sleep/wakeup) variable for buffer requests. + * Can contain the VFS_BIO_NEED flags defined below; setting/clearing is done + * by and/or. + * Used in numdirtywakeup(), bufspacewakeup(), bufcountwakeup(), bwillwrite(), + * getnewbuf(), and getblk(). + */ static int needsbuffer; -static int lorunningspace, hirunningspace, runningbufreq; -static int numdirtybuffers, lodirtybuffers, hidirtybuffers; -static int numfreebuffers, lofreebuffers, hifreebuffers; -static int getnewbufcalls; -static int getnewbufrestarts; - -SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, - &numdirtybuffers, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, - &lodirtybuffers, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, - &hidirtybuffers, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, - &numfreebuffers, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, - &lofreebuffers, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, - &hifreebuffers, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, - &runningbufspace, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, - &lorunningspace, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, - &hirunningspace, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, - &maxbufspace, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, - &hibufspace, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, - &lobufspace, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, - &bufspace, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, - &maxbufmallocspace, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, - &bufmallocspace, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW, - &getnewbufcalls, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, - &getnewbufrestarts, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, - &vmiodirenable, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, - &bufdefragcnt, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, - &buffreekvacnt, 0, ""); -SYSCTL_INT(_vfs, OID_AUTO, bufreusecnt, CTLFLAG_RW, - &bufreusecnt, 0, ""); +/* + * Mask for index into the buffer hash table, which needs to be power of 2 in + * size. Set in kern_vfs_bio_buffer_alloc. + */ static int bufhashmask; -static LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; -struct bqueues bufqueues[BUFFER_QUEUES] = { { 0 } }; -char *buf_wmesg = BUF_WMESG; -extern int vm_swap_size; +/* + * Hash table for all buffers, with a linked list hanging from each table + * entry. Set in kern_vfs_bio_buffer_alloc, initialized in buf_init. + */ +static LIST_HEAD(bufhashhdr, buf) *bufhashtbl; + +/* + * Somewhere to store buffers when they are not in another list, to always + * have them in a list (and thus being able to use the same set of operations + * on them.) + */ +static struct bufhashhdr invalhash; +/* Queues for free buffers with various properties */ +static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } }; +/* + * Single global constant for BUF_WMESG, to avoid getting multiple references. + * buf_wmesg is referred from macros. + */ +char *buf_wmesg = BUF_WMESG; #define VFS_BIO_NEED_ANY 0x01 /* any freeable buffer */ #define VFS_BIO_NEED_DIRTYFLUSH 0x02 /* waiting for dirty buffer flush */ @@ -301,6 +361,7 @@ } } +/* Wake up the buffer deamon if necessary */ static __inline__ void bd_wakeup(int dirtybuflevel) @@ -400,6 +461,7 @@ return(v); } +/* Initialize the buffer subsystem. Called before use of any buffers. */ void bufinit(void) { @@ -408,7 +470,6 @@ GIANT_REQUIRED; - TAILQ_INIT(&bswlist); LIST_INIT(&invalhash); mtx_init(&buftimelock, "buftime lock", MTX_DEF); @@ -656,9 +717,6 @@ * here. */ -int dobkgrdwrite = 1; -SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0, ""); - int bwrite(struct buf * bp) { @@ -811,7 +869,8 @@ * If BX_BKGRDINPROG is not set in the original buffer it must * have been released and re-instantiated - which is not legal. */ - KASSERT((origbp->b_xflags & BX_BKGRDINPROG), ("backgroundwritedone: lost buffer2")); + KASSERT((origbp->b_xflags & BX_BKGRDINPROG), + ("backgroundwritedone: lost buffer2")); origbp->b_xflags &= ~BX_BKGRDINPROG; if (origbp->b_xflags & BX_BKGRDWAIT) { origbp->b_xflags &= ~BX_BKGRDWAIT; @@ -931,7 +990,8 @@ bdirty(bp) struct buf *bp; { - KASSERT(bp->b_qindex == QUEUE_NONE, ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex)); + KASSERT(bp->b_qindex == QUEUE_NONE, + ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex)); bp->b_flags &= ~(B_RELBUF); bp->b_iocmd = BIO_WRITE; @@ -959,7 +1019,8 @@ bundirty(bp) struct buf *bp; { - KASSERT(bp->b_qindex == QUEUE_NONE, ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex)); + KASSERT(bp->b_qindex == QUEUE_NONE, + ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex)); if (bp->b_flags & B_DELWRI) { bp->b_flags &= ~B_DELWRI; @@ -1038,7 +1099,8 @@ GIANT_REQUIRED; - KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); + KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), + ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); s = splbio(); @@ -1228,7 +1290,8 @@ LIST_INSERT_HEAD(&invalhash, bp, b_hash); bp->b_dev = NODEV; /* buffers with junk contents */ - } else if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) || (bp->b_ioflags & BIO_ERROR)) { + } else if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) || + (bp->b_ioflags & BIO_ERROR)) { bp->b_flags |= B_INVAL; bp->b_xflags &= ~BX_BKGRDWRITE; if (bp->b_xflags & BX_BKGRDINPROG) @@ -1360,6 +1423,7 @@ splx(s); } +/* Give pages used by the bp back to the VM system (where possible) */ static void vfs_vmio_release(bp) struct buf *bp; @@ -1392,7 +1456,8 @@ * no valid data. We also free the page if the * buffer was used for direct I/O */ - if ((bp->b_flags & B_ASYNC) == 0 && !m->valid && m->hold_count == 0) { + if ((bp->b_flags & B_ASYNC) == 0 && !m->valid && + m->hold_count == 0) { vm_page_busy(m); vm_page_protect(m, VM_PROT_NONE); vm_page_free(m); @@ -1884,7 +1949,7 @@ /* * Only clear bd_request if we have reached our low water - * mark. The buf_daemon normally waits 5 seconds and + * mark. The buf_daemon normally waits 1 second and * then incrementally flushes any dirty buffers that have * built up, within reason. * @@ -2191,7 +2256,7 @@ /* * The buffer is locked. B_CACHE is cleared if the buffer is - * invalid. Ohterwise, for a non-VMIO buffer, B_CACHE is set + * invalid. Otherwise, for a non-VMIO buffer, B_CACHE is set * and for a VMIO buffer B_CACHE is adjusted according to the * backing VM cache. */ @@ -3251,6 +3316,7 @@ bp->b_npages = index; } +/* Return pages associated with this buf to the vm system */ void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) { @@ -3286,6 +3352,7 @@ #ifdef DDB #include +/* DDB command to show buffer data */ DB_SHOW_COMMAND(buffer, db_show_buffer) { /* get args */ Index: vfs_cache.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_cache.c,v retrieving revision 1.66 diff -u -r1.66 vfs_cache.c --- vfs_cache.c 17 Feb 2002 20:40:29 -0000 1.66 +++ vfs_cache.c 5 Mar 2002 14:19:55 -0000 @@ -96,7 +96,7 @@ SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); static u_long ncnegfactor = 16; /* ratio of negative entries */ SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); -static u_long numneg; /* number of cache entries allocated */ +static u_long numneg; /* number of cache entries allocated */ SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); static u_long numcache; /* number of cache entries allocated */ SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); @@ -110,6 +110,8 @@ static int doingcache = 1; /* 1 => enable the cache */ SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); + +/* Export size information to userland */ SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); @@ -625,15 +627,22 @@ }; #endif +/* + * XXX All of these sysctls would probably be more productive dead. + */ static int disablecwd; -SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); +SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, + "Disable the getcwd syscall"); +/* Various statistics for the getcwd syscall */ static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); + +/* Implementation of the getcwd syscall */ int __getcwd(td, uap) struct thread *td; @@ -731,8 +740,8 @@ SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") static int disablefullpath; -SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, - &disablefullpath, 0, ""); +SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, + "Disable the vn_fullpath function"); STATNODE(numfullpathcalls); STATNODE(numfullpathfail1); @@ -741,6 +750,10 @@ STATNODE(numfullpathfail4); STATNODE(numfullpathfound); +/* + * Retrieve the full filesystem path that correspond to a vnode from the name + * cache (if available) + */ int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) { Index: vfs_cluster.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_cluster.c,v retrieving revision 1.115 diff -u -r1.115 vfs_cluster.c --- vfs_cluster.c 5 Nov 2001 18:48:53 -0000 1.115 +++ vfs_cluster.c 5 Mar 2002 14:19:55 -0000 @@ -57,7 +57,8 @@ #if defined(CLUSTERDEBUG) #include static int rcluster= 0; -SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0, ""); +SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0, + "Debug VFS clustering code"); #endif static MALLOC_DEFINE(M_SEGMENT, "cluster_save buffer", "cluster_save buffer"); @@ -69,10 +70,16 @@ daddr_t blkno, long size, int run, struct buf *fbp)); static int write_behind = 1; -SYSCTL_INT(_vfs, OID_AUTO, write_behind, CTLFLAG_RW, &write_behind, 0, ""); +SYSCTL_INT(_vfs, OID_AUTO, write_behind, CTLFLAG_RW, &write_behind, 0, + "Cluster write-behind; 0: disable, 1: enable, 2: backed off"); +/* Page expended to mark partially backed buffers */ extern vm_page_t bogus_page; +/* + * Number of physical bufs (pbufs) this subsystem is allowed. + * Manipulated by vm_pager.c + */ extern int cluster_pbuf_freecnt; /* @@ -81,7 +88,8 @@ #define MAXRA 32 /* - * This replaces bread. + * Read data to a buf, including read-ahead if we find this to be beneficial. + * cluster_read replaces bread. */ int cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) Index: vfs_conf.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_conf.c,v retrieving revision 1.64 diff -u -r1.64 vfs_conf.c --- vfs_conf.c 24 Nov 2001 01:34:12 -0000 1.64 +++ vfs_conf.c 5 Mar 2002 14:32:21 -0000 @@ -71,6 +71,10 @@ #define ROOTNAME "root_device" +/* + * The vnode of the system's root (/ in the filesystem, without chroot + * active.) + */ struct vnode *rootvnode; /* @@ -293,6 +297,9 @@ } } +/* + * Local helper function for vfs_mountroot_ask. + */ static void gets(char *cp) { @@ -367,6 +374,7 @@ return (1); } +/* Show the dev_t for a disk specified by name */ #ifdef DDB DB_SHOW_COMMAND(disk, db_getdiskbyname) { Index: vfs_default.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_default.c,v retrieving revision 1.56 diff -u -r1.56 vfs_default.c --- vfs_default.c 23 Oct 2001 01:23:41 -0000 1.56 +++ vfs_default.c 5 Mar 2002 14:19:55 -0000 @@ -109,6 +109,11 @@ VNODEOP_SET(default_vnodeop_opv_desc); +/* + * Series of placeholder functions for various error returns for + * VOPs. + */ + int vop_eopnotsupp(struct vop_generic_args *ap) { @@ -147,6 +152,9 @@ return (0); } +/* + * Used to make a defined VOP fall back to the default VOP. + */ int vop_defaultop(struct vop_generic_args *ap) { @@ -154,6 +162,9 @@ return (VOCALL(default_vnodeop_p, ap->a_desc->vdesc_offset, ap)); } +/* + * Helper function to panic on some bad VOPs in some filesystems. + */ int vop_panic(struct vop_generic_args *ap) { @@ -198,6 +209,14 @@ return (EOPNOTSUPP); } +/* + * vop_stdpathconf: + * + * Standard implementation of POSIX pathconf, to get information about limits + * for a filesystem. + * Override per filesystem for the case where the filesystem has smaller + * limits. + */ int vop_stdpathconf(ap) struct vop_pathconf_args /* { @@ -256,6 +275,7 @@ #endif } +/* See above. */ int vop_stdunlock(ap) struct vop_unlock_args /* { @@ -270,6 +290,7 @@ ap->a_td)); } +/* See above. */ int vop_stdislocked(ap) struct vop_islocked_args /* { @@ -281,6 +302,7 @@ return (lockstatus(&ap->a_vp->v_lock, ap->a_td)); } +/* Mark the vnode inactive */ int vop_stdinactive(ap) struct vop_inactive_args /* { @@ -512,6 +534,7 @@ return (0); } +/* Create the VM system backing object for this vnode */ int vop_stdcreatevobject(ap) struct vop_createvobject_args /* { @@ -570,6 +593,7 @@ return (error); } +/* Destroy the VM system object associated with this vnode */ int vop_stddestroyvobject(ap) struct vop_destroyvobject_args /* { @@ -627,6 +651,7 @@ return (vp->v_object ? 0 : EINVAL); } +/* XXX Needs good comment and a manpage */ int vop_stdbmap(ap) struct vop_bmap_args /* { @@ -650,6 +675,7 @@ return (0); } +/* XXX Needs good comment and more info in the manpage. */ int vop_stdgetpages(ap) struct vop_getpages_args /* { @@ -665,6 +691,7 @@ ap->a_count, ap->a_reqpage); } +/* XXX Needs good comment and more info in the manpage. */ int vop_stdputpages(ap) struct vop_putpages_args /* { @@ -685,7 +712,7 @@ /* * vfs default ops - * used to fill the vfs fucntion table to get reasonable default return values. + * used to fill the vfs function table to get reasonable default return values. */ int vfs_stdmount (mp, path, data, ndp, td) @@ -698,7 +725,7 @@ return (0); } -int +int vfs_stdunmount (mp, mntflags, td) struct mount *mp; int mntflags; @@ -707,7 +734,7 @@ return (0); } -int +int vfs_stdroot (mp, vpp) struct mount *mp; struct vnode **vpp; @@ -715,7 +742,7 @@ return (EOPNOTSUPP); } -int +int vfs_stdstatfs (mp, sbp, td) struct mount *mp; struct statfs *sbp; @@ -732,7 +759,7 @@ return (EOPNOTSUPP); } -int +int vfs_stdstart (mp, flags, td) struct mount *mp; int flags; Index: vfs_init.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_init.c,v retrieving revision 1.55 diff -u -r1.55 vfs_init.c --- vfs_init.c 29 Apr 2001 02:44:49 -0000 1.55 +++ vfs_init.c 5 Mar 2002 14:19:55 -0000 @@ -56,6 +56,11 @@ * The highest defined VFS number. */ int maxvfsconf = VFS_GENERIC + 1; + +/* + * Single-linked list of configured VFSes. + * New entries are added/deleted by vfs_register()/vfs_unregister() + */ struct vfsconf *vfsconf; /* @@ -81,10 +86,19 @@ /* Table of known descs (list of vnode op handlers "vop_access_desc") */ static struct vnodeop_desc **vfs_op_descs; -static int *vfs_op_desc_refs; /* reference counts */ +/* Reference counts for vfs_op_descs */ +static int *vfs_op_desc_refs; +/* Number of descriptions */ static int num_op_descs; +/* Number of entries in each description */ static int vfs_opv_numops; +/* + * Recalculate the operations vector/description (those parts of it that can + * be recalculated, that is.) + * XXX It may be preferable to replace this function with an invariant check + * and a set of functions that should keep the table invariant. + */ static void vfs_opv_recalc(void) { @@ -143,6 +157,7 @@ } } +/* Add a set of vnode operations (a description) to the table above. */ void vfs_add_vnodeops(const void *data) { @@ -210,6 +225,7 @@ vfs_opv_recalc(); } +/* Remove a vnode type from the vnode description table above. */ void vfs_rm_vnodeops(const void *data) { @@ -302,6 +318,7 @@ } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vfsinit, NULL) +/* Register a new file system type in the global table */ int vfs_register(struct vfsconf *vfc) { @@ -347,6 +364,7 @@ } +/* Remove registration of a file system type */ int vfs_unregister(struct vfsconf *vfc) { @@ -382,6 +400,10 @@ return 0; } +/* + * Standard kernel module handling code for file system modules. + * Referenced from VFS_SET(). + */ int vfs_modevent(module_t mod, int type, void *data) { Index: vfs_vnops.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_vnops.c,v retrieving revision 1.135 diff -u -r1.135 vfs_vnops.c --- vfs_vnops.c 27 Feb 2002 18:32:13 -0000 1.135 +++ vfs_vnops.c 5 Mar 2002 14:20:47 -0000 @@ -266,14 +266,14 @@ return (error); } +/* + * Sequential heuristic - detect sequential operation + */ static __inline int sequential_heuristic(struct uio *uio, struct file *fp) { - /* - * Sequential heuristic - detect sequential operation - */ if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || uio->uio_offset == fp->f_nextoff) { /* @@ -499,6 +499,9 @@ return (error); } +/* + * Stat a vnode; implementation for the stat syscall + */ int vn_stat(vp, sb, td) struct vnode *vp; @@ -891,6 +894,9 @@ wakeup(&mp->mnt_flag); } +/* + * Implement kqueues for files by translating it to vnode operation. + */ static int vn_kqfilter(struct file *fp, struct knote *kn) {