--- kern/vfs_bio.c.orig 2012-12-24 15:36:43.000000000 -0500 +++ kern/vfs_bio.c 2012-12-24 15:46:58.000000000 -0500 @@ -1983,7 +1983,7 @@ restart: bp->b_resid = 0; bp->b_bcount = 0; bp->b_npages = 0; - bp->b_dirtyoff = bp->b_dirtyend = 0; + buf_dirtyclear(bp); bp->b_bufobj = NULL; bp->b_pin_count = 0; bp->b_fsprivate1 = NULL; @@ -2538,12 +2538,8 @@ vfs_setdirty_locked_object(struct buf *b * dirty range. */ - if (boffset < eoffset) { - if (bp->b_dirtyoff > boffset) - bp->b_dirtyoff = boffset; - if (bp->b_dirtyend < eoffset) - bp->b_dirtyend = eoffset; - } + if (boffset < eoffset) + buf_dirtyupdate(bp, boffset, eoffset, 1); } } @@ -3958,6 +3954,144 @@ bunpin_wait(struct buf *bp) mtx_unlock(mtxp); } +/* + * Update the dirty region list with the new dirty area. If the update + * can't be done, due to the list being full, return 0. + */ +int +buf_dirtyupdate(struct buf *bp, int off, int end, int force) +{ + int i, j, merge_pos; + + /* + * Work down the list until the dirty region is merged. + */ + merge_pos = -1; + for (i = 0; i < bp->b_dirtycnt; ) { + /* + * Only merge with dirty region(s) that aren't before the + * new dirty region's range. + */ + if (bp->b_dirtyendlist[i] >= off) { + if (end < bp->b_dirtyofflist[i]) { + if (merge_pos == -1) { + /* If not yet merged, insert here. */ + if (bp->b_dirtycnt == B_DIRTYMAX) + break; + for (j = bp->b_dirtycnt - 1; j >= i; + j--) { + bp->b_dirtyofflist[j + 1] = + bp->b_dirtyofflist[j]; + bp->b_dirtyendlist[j + 1] = + bp->b_dirtyendlist[j]; + } + bp->b_dirtyofflist[i] = off; + bp->b_dirtyendlist[i] = end; + bp->b_dirtycnt++; + merge_pos = i; + } + break; + } + /* + * The new dirty region can be merged with this one. + */ + if (merge_pos == -1) { + /* Merging the new one in. */ + if (off < bp->b_dirtyofflist[i]) + bp->b_dirtyofflist[i] = off; + if (end > bp->b_dirtyendlist[i]) + bp->b_dirtyendlist[i] = end; + merge_pos = i; + i++; + } else { + /* Merging additional region in. */ + if (bp->b_dirtyendlist[i] > + bp->b_dirtyendlist[merge_pos]) + bp->b_dirtyendlist[merge_pos] = + bp->b_dirtyendlist[i]; + for (j = i; j < bp->b_dirtycnt - 1; j++) { + /* + * Shift the entrie(s) down, since + * entry [i] no longer exists. + * Do not increment i, since it is + * now at the next entry, due to the + * shift down. + */ + bp->b_dirtyofflist[j] = + bp->b_dirtyofflist[j + 1]; + bp->b_dirtyendlist[j] = + bp->b_dirtyendlist[j + 1]; + } + bp->b_dirtycnt--; + } + } else + i++; + } + if (merge_pos == -1) { + /* Needs to be added. */ + if (bp->b_dirtycnt == B_DIRTYMAX) { +printf("dirtymax frc=%d\n", force); + if (force == 0) + return (0); + /* + * Merge all regions into 1 so that all the dirty + * data is covered. + */ + if (off < bp->b_dirtyofflist[0]) + bp->b_dirtyofflist[0] = off; + if (end > bp->b_dirtyendlist[bp->b_dirtycnt - 1]) + bp->b_dirtyendlist[0] = end; + else + bp->b_dirtyendlist[0] = + bp->b_dirtyendlist[bp->b_dirtycnt - 1]; + bp->b_dirtycnt = 1; + } else { + /* Append to the end of the list. */ + bp->b_dirtyofflist[bp->b_dirtycnt] = off; + bp->b_dirtyendlist[bp->b_dirtycnt] = end; + bp->b_dirtycnt++; + } + } + return (1); +} + +/* + * Trim the dirty list back to bcount. + */ +void +buf_dirtytrim(struct buf *bp, int bcount) +{ + int i; + + i = bp->b_dirtycnt - 1; + while (i >= 0) { + if (bp->b_dirtyendlist[i] <= bcount) + break; + else if (bp->b_dirtyofflist[i] >= bcount) + bp->b_dirtycnt--; + else { + bp->b_dirtyendlist[i] = bcount; + break; + } + i--; + } +} + +/* + * Remove this element from the list. + */ +void +buf_dirtydone(struct buf *bp, int pos) +{ + int i; + + for (i = pos + 1; i < bp->b_dirtycnt; i++) { + bp->b_dirtyofflist[i - 1] = bp->b_dirtyofflist[i]; + bp->b_dirtyendlist[i - 1] = bp->b_dirtyendlist[i]; + } + bp->b_dirtycnt--; +} + #include "opt_ddb.h" #ifdef DDB #include --- kern/vfs_cluster.c.orig 2012-12-13 08:27:15.000000000 -0500 +++ kern/vfs_cluster.c 2012-12-24 15:46:58.000000000 -0500 @@ -535,7 +535,7 @@ cluster_callback(bp) tbp->b_ioflags |= BIO_ERROR; tbp->b_error = error; } else { - tbp->b_dirtyoff = tbp->b_dirtyend = 0; + buf_dirtyclear(tbp); tbp->b_flags &= ~B_INVAL; tbp->b_ioflags &= ~BIO_ERROR; /* @@ -964,8 +964,7 @@ cluster_wbuild(vp, size, start_lbn, len) bp->b_bufsize, bp->b_kvasize); bp->b_kvasize = bp->b_bufsize; totalwritten += bp->b_bufsize; - bp->b_dirtyoff = 0; - bp->b_dirtyend = bp->b_bufsize; + buf_dirtyupdate(bp, 0, bp->b_bufsize, 1); bawrite(bp); len -= i; --- fs/nfsclient/nfs_clbio.c.orig 2012-12-14 16:43:33.000000000 -0500 +++ fs/nfsclient/nfs_clbio.c 2012-12-24 15:58:44.000000000 -0500 @@ -1143,35 +1143,21 @@ again: * situation with dirtyoff/end, we 0 both of them. */ - if (bp->b_dirtyend > bcount) { + if (buf_dirtyend(bp) > bcount) { ncl_printf("NFS append race @%lx:%d\n", (long)bp->b_blkno * DEV_BSIZE, - bp->b_dirtyend - bcount); - bp->b_dirtyend = bcount; + buf_dirtyend(bp) - bcount); + buf_dirtytrim(bp, bcount); } - if (bp->b_dirtyoff >= bp->b_dirtyend) - bp->b_dirtyoff = bp->b_dirtyend = 0; - /* - * If the new write will leave a contiguous dirty - * area, just update the b_dirtyoff and b_dirtyend, - * otherwise force a write rpc of the old dirty area. - * - * While it is possible to merge discontiguous writes due to - * our having a B_CACHE buffer ( and thus valid read data - * for the hole), we don't because it could lead to - * significant cache coherency problems with multiple clients, - * especially if locking is implemented later on. - * - * As an optimization we could theoretically maintain - * a linked list of discontinuous areas, but we would still - * have to commit them separately so there isn't much - * advantage to it except perhaps a bit of asynchronization. + * Merge/add the new dirty byte region to the list for + * the buffer. If this cannot be done, buf_dirtyupdate() + * will return 0, so do the synchronous write(s) now and + * try it again. */ - if (bp->b_dirtyend > 0 && - (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { + if (n > 0 && buf_dirtyupdate(bp, on, on + n, 0) == 0) { if (bwrite(bp) == EINTR) { error = EINTR; break; @@ -1213,19 +1199,10 @@ again: n = local_resid - uio->uio_resid; /* - * Only update dirtyoff/dirtyend if not a degenerate - * condition. + * Only set valid if not a degenerate condition. */ - if (n > 0) { - if (bp->b_dirtyend > 0) { - bp->b_dirtyoff = min(on, bp->b_dirtyoff); - bp->b_dirtyend = max((on + n), bp->b_dirtyend); - } else { - bp->b_dirtyoff = on; - bp->b_dirtyend = on + n; - } + if (n > 0) vfs_bio_set_valid(bp, on, n); - } /* * If IO_SYNC do bwrite(). @@ -1571,7 +1548,7 @@ ncl_doio(struct vnode *vp, struct buf *b struct uio *uiop; struct nfsnode *np; struct nfsmount *nmp; - int error = 0, iomode, must_commit = 0; + int error = 0, i, iomode, must_commit = 0; struct uio uio; struct iovec io; struct proc *p = td ? td->td_proc : NULL; @@ -1674,11 +1651,12 @@ ncl_doio(struct vnode *vp, struct buf *b int retv; off_t off; - off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; - retv = ncl_commit(vp, off, bp->b_dirtyend-bp->b_dirtyoff, - bp->b_wcred, td); + off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + + buf_dirtyoff(bp); + retv = ncl_commit(vp, off, buf_dirtyend(bp) - + buf_dirtyoff(bp), bp->b_wcred, td); if (retv == 0) { - bp->b_dirtyoff = bp->b_dirtyend = 0; + buf_dirtyclear(bp); bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); bp->b_resid = 0; bufdone(bp); @@ -1693,112 +1671,130 @@ ncl_doio(struct vnode *vp, struct buf *b * Setup for actual write */ mtx_lock(&np->n_mtx); - if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size) - bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE; + if ((off_t)bp->b_blkno * DEV_BSIZE + buf_dirtyend(bp) > np->n_size) + buf_dirtytrim(bp, (int)(np->n_size - (off_t)bp->b_blkno * + DEV_BSIZE)); mtx_unlock(&np->n_mtx); - if (bp->b_dirtyend > bp->b_dirtyoff) { - io.iov_len = uiop->uio_resid = bp->b_dirtyend - - bp->b_dirtyoff; - uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE - + bp->b_dirtyoff; - io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; - uiop->uio_rw = UIO_WRITE; - NFSINCRGLOBAL(newnfsstats.write_bios); - - if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) - iomode = NFSWRITE_UNSTABLE; - else - iomode = NFSWRITE_FILESYNC; - - error = ncl_writerpc(vp, uiop, cr, &iomode, &must_commit, - called_from_strategy); - - /* - * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try - * to cluster the buffers needing commit. This will allow - * the system to submit a single commit rpc for the whole - * cluster. We can do this even if the buffer is not 100% - * dirty (relative to the NFS blocksize), so we optimize the - * append-to-file-case. - * - * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be - * cleared because write clustering only works for commit - * rpc's, not for the data portion of the write). - */ + if (bp->b_dirtycnt > 0) { + for (i = 0; i < bp->b_dirtycnt; ) { + io.iov_len = uiop->uio_resid = bp->b_dirtyendlist[i] + - bp->b_dirtyofflist[i]; + uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE + + bp->b_dirtyofflist[i]; + io.iov_base = (char *)bp->b_data + + bp->b_dirtyofflist[i]; + uiop->uio_rw = UIO_WRITE; + NFSINCRGLOBAL(newnfsstats.write_bios); + + if ((bp->b_flags & + (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == + B_ASYNC) + iomode = NFSWRITE_UNSTABLE; + else + iomode = NFSWRITE_FILESYNC; - if (!error && iomode == NFSWRITE_UNSTABLE) { - bp->b_flags |= B_NEEDCOMMIT; - if (bp->b_dirtyoff == 0 - && bp->b_dirtyend == bp->b_bcount) - bp->b_flags |= B_CLUSTEROK; - } else { - bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); - } + error = ncl_writerpc(vp, uiop, cr, &iomode, + &must_commit, called_from_strategy); - /* - * For an interrupted write, the buffer is still valid - * and the write hasn't been pushed to the server yet, - * so we can't set BIO_ERROR and report the interruption - * by setting B_EINTR. For the B_ASYNC case, B_EINTR - * is not relevant, so the rpc attempt is essentially - * a noop. For the case of a V3 write rpc not being - * committed to stable storage, the block is still - * dirty and requires either a commit rpc or another - * write rpc with iomode == NFSV3WRITE_FILESYNC before - * the block is reused. This is indicated by setting - * the B_DELWRI and B_NEEDCOMMIT flags. - * - * EIO is returned by ncl_writerpc() to indicate a recoverable - * write error and is handled as above, except that - * B_EINTR isn't set. One cause of this is a stale stateid - * error for the RPC that indicates recovery is required, - * when called with called_from_strategy != 0. - * - * If the buffer is marked B_PAGING, it does not reside on - * the vp's paging queues so we cannot call bdirty(). The - * bp in this case is not an NFS cache block so we should - * be safe. XXX - * - * The logic below breaks up errors into recoverable and - * unrecoverable. For the former, we clear B_INVAL|B_NOCACHE - * and keep the buffer around for potential write retries. - * For the latter (eg ESTALE), we toss the buffer away (B_INVAL) - * and save the error in the nfsnode. This is less than ideal - * but necessary. Keeping such buffers around could potentially - * cause buffer exhaustion eventually (they can never be written - * out, so will get constantly be re-dirtied). It also causes - * all sorts of vfs panics. For non-recoverable write errors, - * also invalidate the attrcache, so we'll be forced to go over - * the wire for this object, returning an error to user on next - * call (most of the time). - */ - if (error == EINTR || error == EIO || error == ETIMEDOUT - || (!error && (bp->b_flags & B_NEEDCOMMIT))) { - int s; - - s = splbio(); - bp->b_flags &= ~(B_INVAL|B_NOCACHE); - if ((bp->b_flags & B_PAGING) == 0) { - bdirty(bp); - bp->b_flags &= ~B_DONE; + /* + * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try + * to cluster the buffers needing commit. This will + * allow the system to submit a single commit rpc for + * the whole cluster. We can do this even if the buffer + * is not 100% dirty (relative to the NFS blocksize), so + * we optimize the append-to-file-case. + * + * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be + * cleared because write clustering only works for + * commit rpc's, not for the data portion of the write). + */ + + if (!error && iomode == NFSWRITE_UNSTABLE) { + bp->b_flags |= B_NEEDCOMMIT; + if (bp->b_dirtyofflist[i] == 0 + && bp->b_dirtyendlist[i] == bp->b_bcount) + bp->b_flags |= B_CLUSTEROK; + } else { + bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); + } + + /* + * For an interrupted write, the buffer is still valid + * and the write hasn't been pushed to the server yet, + * so we can't set BIO_ERROR and report the interruption + * by setting B_EINTR. For the B_ASYNC case, B_EINTR + * is not relevant, so the rpc attempt is essentially + * a noop. For the case of a V3 write rpc not being + * committed to stable storage, the block is still + * dirty and requires either a commit rpc or another + * write rpc with iomode == NFSV3WRITE_FILESYNC before + * the block is reused. This is indicated by setting + * the B_DELWRI and B_NEEDCOMMIT flags. + * + * EIO is returned by ncl_writerpc() to indicate a + * recoverable write error and is handled as above, + * except that B_EINTR isn't set. One cause of this is a + * stale stateid error for the RPC that indicates + * recovery is required, + * when called with called_from_strategy != 0. + * + * If the buffer is marked B_PAGING, it does not reside + * on the vp's paging queues so we cannot call bdirty(). + * The bp in this case is not an NFS cache block so we + * should be safe. XXX + * + * The logic below breaks up errors into recoverable + * and unrecoverable. For the former, we clear + * B_INVAL|B_NOCACHE and keep the buffer around for + * potential write retries. + * For the latter (eg ESTALE), we toss the buffer away + * (B_INVAL) and save the error in the nfsnode. This is + * less than ideal but necessary. Keeping such buffers + * around could potentially cause buffer exhaustion + * eventually (they can never be written out, so will + * get constantly be re-dirtied). It also causes all + * sorts of vfs panics. For non-recoverable write + * errors, also invalidate the attrcache, so we'll be + * forced to go over wire for this object, + * returning an error to user on next + * call (most of the time). + */ + if (error == EINTR || error == EIO || error == ETIMEDOUT + || (!error && (bp->b_flags & B_NEEDCOMMIT))) { + int s; + + s = splbio(); + bp->b_flags &= ~(B_INVAL|B_NOCACHE); + if ((bp->b_flags & B_PAGING) == 0) { + bdirty(bp); + bp->b_flags &= ~B_DONE; + } + if ((error == EINTR || error == ETIMEDOUT) && + (bp->b_flags & B_ASYNC) == 0) + bp->b_flags |= B_EINTR; + splx(s); + i++; + } else { + if (error) { + bp->b_ioflags |= BIO_ERROR; + bp->b_flags |= B_INVAL; + bp->b_error = np->n_error = error; + buf_dirtyclear(bp); + mtx_lock(&np->n_mtx); + np->n_flag |= NWRITEERR; + np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); + mtx_unlock(&np->n_mtx); + i++; + } else + /* + * Since buf_dirtydone() shifts the entries + * down in the list, "i" shouldn't be + * incremented. + */ + buf_dirtydone(bp, i); } - if ((error == EINTR || error == ETIMEDOUT) && - (bp->b_flags & B_ASYNC) == 0) - bp->b_flags |= B_EINTR; - splx(s); - } else { - if (error) { - bp->b_ioflags |= BIO_ERROR; - bp->b_flags |= B_INVAL; - bp->b_error = np->n_error = error; - mtx_lock(&np->n_mtx); - np->n_flag |= NWRITEERR; - np->n_attrstamp = 0; - KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); - mtx_unlock(&np->n_mtx); - } - bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; @@ -1850,10 +1846,9 @@ ncl_meta_setsize(struct vnode *vp, struc bp = nfs_getcacheblk(vp, lbn, bufsize, td); if (!bp) return EINTR; - if (bp->b_dirtyoff > bp->b_bcount) - bp->b_dirtyoff = bp->b_bcount; - if (bp->b_dirtyend > bp->b_bcount) - bp->b_dirtyend = bp->b_bcount; + if (buf_dirtyoff(bp) > bp->b_bcount || + buf_dirtyend(bp) > bp->b_bcount) + buf_dirtytrim(bp, bp->b_bcount); bp->b_flags |= B_RELBUF; /* don't leave garbage around */ brelse(bp); } else { --- fs/nfsclient/nfs_clvnops.c.orig 2012-12-14 16:43:33.000000000 -0500 +++ fs/nfsclient/nfs_clvnops.c 2012-12-24 15:46:58.000000000 -0500 @@ -2763,10 +2763,10 @@ again: */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + - bp->b_dirtyoff; + buf_dirtyoff(bp); if (toff < off) off = toff; - toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); + toff += (u_quad_t)(buf_dirtyend(bp) - buf_dirtyoff(bp)); if (toff > endoff) endoff = toff; } @@ -2788,9 +2788,9 @@ again: off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + - bp->b_dirtyoff; - size = (u_quad_t)(bp->b_dirtyend - - bp->b_dirtyoff); + buf_dirtyoff(bp); + size = (u_quad_t)(buf_dirtyend(bp) + - buf_dirtyoff(bp)); retv = ncl_commit(vp, off, (int)size, bp->b_wcred, td); if (retv) break; @@ -2818,7 +2818,7 @@ again: /* * Success, remove B_DELWRI ( bundirty() ). * - * b_dirtyoff/b_dirtyend seem to be NFS + * buf_dirtyoff()/buf_dirtyend() seem to be NFS * specific. We should probably move that * into bundirty(). XXX */ @@ -2827,7 +2827,7 @@ again: bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; - bp->b_dirtyoff = bp->b_dirtyend = 0; + buf_dirtyclear(bp); bufdone(bp); } } --- vm/swap_pager.c.orig 2012-12-13 08:28:11.000000000 -0500 +++ vm/swap_pager.c 2012-12-24 15:46:58.000000000 -0500 @@ -1399,8 +1399,7 @@ swap_pager_putpages(vm_object_t object, /* * Must set dirty range for NFS to work. */ - bp->b_dirtyoff = 0; - bp->b_dirtyend = bp->b_bcount; + buf_dirtyupdate(bp, 0, bp->b_bcount, 1); PCPU_INC(cnt.v_swapout); PCPU_ADD(cnt.v_swappgsout, bp->b_npages); --- sys/buf.h.orig 2012-12-13 08:26:57.000000000 -0500 +++ sys/buf.h 2012-12-24 15:46:58.000000000 -0500 @@ -72,6 +72,14 @@ struct vm_object; typedef unsigned char b_xflags_t; /* + * Define the size of the dirty byte range(s) table and macros to get + * the beginning/end of all the dirty region(s). + * b_dirtycnt - Is the number of valid dirty region(s). When set to 0, + * there are no dirty regions. + */ +#define B_DIRTYMAX 64 + +/* * The buffer header describes an I/O operation in the kernel. * * NOTES: @@ -120,8 +128,9 @@ struct buf { int b_kvasize; /* size of kva for buffer */ daddr_t b_lblkno; /* Logical block number. */ struct vnode *b_vp; /* Device vnode. */ - int b_dirtyoff; /* Offset in buffer of dirty region. */ - int b_dirtyend; /* Offset of end of dirty region. */ + int b_dirtycnt; /* Cnt of dirty regions in arrays. */ + int b_dirtyofflist[B_DIRTYMAX]; /* Offset of dirty region. */ + int b_dirtyendlist[B_DIRTYMAX]; /* Offset of end of dirty region. */ struct ucred *b_rcred; /* Read credentials reference. */ struct ucred *b_wcred; /* Write credentials reference. */ void *b_saveaddr; /* Original b_addr for physio. */ @@ -142,6 +151,8 @@ struct buf { }; #define b_object b_bufobj->bo_object +#define b_dirtyoff b_dirtyofflist[0] +#define b_dirtyend b_dirtyendlist[0] /* * These flags are kept in b_flags. @@ -438,6 +449,28 @@ buf_countdeps(struct buf *bp, int i) return (0); } +static __inline int +buf_dirtyoff(struct buf *bp) +{ + if (bp->b_dirtycnt > 0) + return (bp->b_dirtyofflist[0]); + return (0); +} + +static __inline int +buf_dirtyend(struct buf *bp) +{ + if (bp->b_dirtycnt > 0) + return (bp->b_dirtyendlist[bp->b_dirtycnt - 1]); + return (0); +} + +static __inline void +buf_dirtyclear(struct buf *bp) +{ + bp->b_dirtycnt = 0; +} + #endif /* _KERNEL */ /* @@ -530,6 +563,9 @@ void bdone(struct buf *); void bpin(struct buf *); void bunpin(struct buf *); void bunpin_wait(struct buf *); +int buf_dirtyupdate(struct buf *, int, int, int); +void buf_dirtytrim(struct buf *, int); +void buf_dirtydone(struct buf *, int); #endif /* _KERNEL */