diff -ur /usr/src/sys/amd64/conf/WARP9 sys/amd64/conf/WARP9 --- /usr/src/sys/amd64/conf/WARP9 Mon Apr 10 15:11:54 2006 +++ sys/amd64/conf/WARP9 Sat Jul 29 17:55:31 2006 @@ -72,6 +72,7 @@ #options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS #options WITNESS # Enable checks to detect deadlocks and cycles ##options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed +options BREAK_TO_DEBUGGER # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel diff -ur /usr/src/sys/kern/init_sysent.c sys/kern/init_sysent.c --- /usr/src/sys/kern/init_sysent.c Thu Jul 27 15:21:18 2006 +++ sys/kern/init_sysent.c Sat Jul 29 18:03:27 2006 @@ -2,7 +2,7 @@ * System call switch table. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/kern/init_sysent.c,v 1.214 2006/07/13 06:32:55 davidxu Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.219 2006/07/13 06:26:43 davidxu Exp */ @@ -498,4 +498,5 @@ { SYF_MPSAFE | AS(thr_setscheduler_args), (sy_call_t *)thr_setscheduler, AUE_NULL }, /* 466 = thr_setscheduler */ { SYF_MPSAFE | AS(thr_getscheduler_args), (sy_call_t *)thr_getscheduler, AUE_NULL }, /* 467 = thr_getscheduler */ { SYF_MPSAFE | AS(thr_setschedparam_args), (sy_call_t *)thr_setschedparam, AUE_NULL }, /* 468 = thr_setschedparam */ + { SYF_MPSAFE | AS(splice_args), (sy_call_t *)splice, AUE_NULL }, /* 469 = splice */ }; Only in sys/kern: init_sysent.c.bak diff -ur /usr/src/sys/kern/syscalls.c sys/kern/syscalls.c --- /usr/src/sys/kern/syscalls.c Thu Jul 27 15:21:21 2006 +++ sys/kern/syscalls.c Sat Jul 29 18:03:27 2006 @@ -2,7 +2,7 @@ * System call names. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/kern/syscalls.c,v 1.198 2006/07/13 06:32:55 davidxu Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.219 2006/07/13 06:26:43 davidxu Exp */ @@ -476,4 +476,5 @@ "thr_setscheduler", /* 466 = thr_setscheduler */ "thr_getscheduler", /* 467 = thr_getscheduler */ "thr_setschedparam", /* 468 = thr_setschedparam */ + "splice", /* 469 = splice */ }; Only in sys/kern: syscalls.c.bak diff -ur /usr/src/sys/kern/syscalls.master sys/kern/syscalls.master --- /usr/src/sys/kern/syscalls.master Thu Jul 27 15:21:21 2006 +++ sys/kern/syscalls.master Sat Jul 29 17:08:35 2006 @@ -831,5 +831,7 @@ 468 AUE_NULL MSTD { int thr_setschedparam(long id, \ const struct sched_param *param, \ int param_size); } +469 AUE_NULL MSTD { int splice(int infd, int outfd, int len, \ + int flags); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff -ur /usr/src/sys/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c --- /usr/src/sys/kern/uipc_syscalls.c Thu Jul 27 15:21:23 2006 +++ sys/kern/uipc_syscalls.c Sat Jul 29 21:08:38 2006 @@ -2274,3 +2274,58 @@ return (error); } + +#ifndef _SYS_SYSPROTO_H_ +struct splice_args { + int infd; + int outfd; + int len; + int flags; +}; +#endif +int +splice(struct thread *td, struct splice_args *uap) +{ + struct sf_buf *sf; + struct file *in, *out; + int error, flags, len, written, xfsize; + + flags = uap->flags; + len = uap->len; + + if ((error = fget(td, uap->infd, &in)) != 0) + return (error); + if ((error = fget(td, uap->outfd, &out)) != 0) + goto done2; + + if (!(in->f_ops->fo_flags & DFLAG_SPLICABLE)) { + error = EINVAL; + goto done1; + } + if (!(out->f_ops->fo_flags & DFLAG_SPLICABLE)) { + error = EINVAL; + goto done1; + } + + for (written = 0; len > 0; len -= PAGE_SIZE) { + xfsize = len > PAGE_SIZE ? len : PAGE_SIZE; + + if ((error = fo_readsf(in, &sf, xfsize, flags)) != 0) + break; + /* XXX Where do we free the sf_buf? */ + /* XXX How to deal with errors? */ + if ((error = fo_writesf(out, sf, xfsize, flags)) != 0) + break; + + written += xfsize; + } + + td->td_retval[0] = written; + +done1: + fdrop(out, td); +done2: + fdrop(in, td); + + return (error); +} diff -ur /usr/src/sys/kern/vfs_vnops.c sys/kern/vfs_vnops.c --- /usr/src/sys/kern/vfs_vnops.c Thu Jul 27 15:21:23 2006 +++ sys/kern/vfs_vnops.c Sat Jul 29 21:22:07 2006 @@ -61,6 +61,11 @@ #include #include #include +#include + +#include +#include +#include static fo_rdwr_t vn_read; static fo_rdwr_t vn_write; @@ -69,6 +74,8 @@ static fo_kqfilter_t vn_kqfilter; static fo_stat_t vn_statfile; static fo_close_t vn_closefile; +static fo_readsf_t vn_readsf; +static fo_writesf_t vn_writesf; struct fileops vnops = { .fo_read = vn_read, @@ -78,7 +85,9 @@ .fo_kqfilter = vn_kqfilter, .fo_stat = vn_statfile, .fo_close = vn_closefile, - .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE + .fo_readsf = vn_readsf, + .fo_writesf = vn_writesf, + .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | DFLAG_SPLICABLE }; int @@ -1249,6 +1258,157 @@ vn_finished_write(mp); VOP_UNLOCK(vp, 0, td); } + + return (error); +} + +int +vn_readsf(struct file *fp, struct sf_buf **sf, int len, int flags) +{ + struct vm_object *obj; + struct thread *td; + struct vnode *vp; + vm_page_t pg; + int error, pindex, vfslocked; + + error = 0; + td = curthread; /* XXX */ + vp = fp->f_vnode; + vref(vp); + + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + obj = vp->v_object; + if (obj != NULL) { + /* + * Temporarily increase the backing VM object's reference + * count so that a forced reclamation of its vnode does not + * immediately destroy it. + */ + VM_OBJECT_LOCK(obj); + if ((obj->flags & OBJ_DEAD) == 0) { + vm_object_reference_locked(obj); + VM_OBJECT_UNLOCK(obj); + } else { + VM_OBJECT_UNLOCK(obj); + obj = NULL; + } + } + VOP_UNLOCK(vp, 0, td); + VFS_UNLOCK_GIANT(vfslocked); + if (obj == NULL) { + error = EINVAL; + goto done; + } + + pindex = OFF_TO_IDX(fp->f_offset); + VM_OBJECT_LOCK(obj); +retry_lookup: + pg = vm_page_lookup(obj, pindex); + if (pg == NULL) { + pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | + VM_ALLOC_NORMAL | VM_ALLOC_WIRED); + if (pg == NULL) { + VM_OBJECT_UNLOCK(obj); + VM_WAIT; + VM_OBJECT_LOCK(obj); + goto retry_lookup; + } + vm_page_lock_queues(); + } else { + vm_page_lock_queues(); + if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) + goto retry_lookup; + vm_page_wire(pg); + } + + if (pg->valid && vm_page_is_valid(pg, 0, len)) { + VM_OBJECT_UNLOCK(obj); + } else { /* XXX NONBLOCK */ + int bsize, resid; + + vm_page_io_start(pg); + vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(obj); + + bsize = vp->v_mount->mnt_stat.f_iosize; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vn_lock(vp, LK_SHARED | LK_RETRY, td); + error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 0 /* offset */, + UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | + ((MAXBSIZE / bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, + &resid, td); + VOP_UNLOCK(vp, 0, td); + VFS_UNLOCK_GIANT(vfslocked); + vm_page_lock_queues(); + vm_page_io_finish(pg); + if (!error) + VM_OBJECT_UNLOCK(obj); + } + + if (error) { + vm_page_unwire(pg, 0); + if (pg->wire_count == 0 && pg->valid == 0 && + pg->busy == 0 && !(pg->flags & PG_BUSY) && + pg->hold_count == 0) { + vm_page_free(pg); + } + vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(obj); + goto done; + } + vm_page_unlock_queues(); + + if ((*sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { + vm_page_lock_queues(); + vm_page_unwire(pg, 0); + if (pg->wire_count == 0 && pg->object == NULL) + vm_page_free(pg); + vm_page_unlock_queues(); + error = EINTR; + } + //printf("%s: sending page %p\n", __func__, (void *)sf_buf_kva(*sf)); + + /* XXX Need to update fp->f_offset */ + +done: + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vrele(vp); + VFS_UNLOCK_GIANT(vfslocked); + + return (error); +} + +int +vn_writesf(struct file *fp, struct sf_buf *sf, int len, int flags) +{ + struct thread *td; + struct vnode *vp; + vm_page_t pg; + int error, vfslocked; + + td = curthread; + + vp = fp->f_vnode; + vref(vp); + + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + //printf("%s: writing page %p first char %c\n", __func__, + // (void *)sf_buf_kva(sf), *(char *)sf_buf_kva(sf)); + error = vn_rdwr(UIO_WRITE, vp, (caddr_t)sf_buf_kva(sf), len, (off_t)0, + UIO_SYSSPACE, IO_VMIO, td->td_ucred, NOCRED, NULL, td); + + vrele(vp); + VFS_UNLOCK_GIANT(vfslocked); + + pg = sf_buf_page(sf); + sf_buf_free(sf); + vm_page_lock_queues(); + vm_page_unwire(pg, 0); + /* XXX Is it safe to unwire and free the page here? */ + if (pg->wire_count == 0 && pg->object == NULL) + vm_page_free(pg); + vm_page_unlock_queues(); return (error); } diff -ur /usr/src/sys/sys/file.h sys/sys/file.h --- /usr/src/sys/sys/file.h Fri May 26 13:11:13 2006 +++ sys/sys/file.h Sat Jul 29 17:59:26 2006 @@ -48,6 +48,7 @@ struct knote; struct vnode; struct socket; +struct sf_buf; #endif /* _KERNEL */ @@ -77,6 +78,10 @@ typedef int fo_stat_t(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td); typedef int fo_close_t(struct file *fp, struct thread *td); +typedef int fo_readsf_t(struct file *fp, struct sf_buf **sf, int len, + int flags); +typedef int fo_writesf_t(struct file *fp, struct sf_buf *sf, int len, + int flags); typedef int fo_flags_t; struct fileops { @@ -87,11 +92,14 @@ fo_kqfilter_t *fo_kqfilter; fo_stat_t *fo_stat; fo_close_t *fo_close; + fo_readsf_t *fo_readsf; + fo_writesf_t *fo_writesf; fo_flags_t fo_flags; /* DFLAG_* below */ }; #define DFLAG_PASSABLE 0x01 /* may be passed via unix sockets. */ #define DFLAG_SEEKABLE 0x02 /* seekable / nonsequential */ +#define DFLAG_SPLICABLE 0x04 /* can be used in splice(2) */ /* * Kernel descriptor table. @@ -228,6 +236,8 @@ static __inline fo_kqfilter_t fo_kqfilter; static __inline fo_stat_t fo_stat; static __inline fo_close_t fo_close; +static __inline fo_readsf_t fo_readsf; +static __inline fo_writesf_t fo_writesf; static __inline int fo_read(fp, uio, active_cred, flags, td) @@ -303,6 +313,18 @@ { return ((*fp->f_ops->fo_kqfilter)(fp, kn)); +} + +static __inline int +fo_readsf(struct file *fp, struct sf_buf **sf, int len, int flags) +{ + return ((*fp->f_ops->fo_readsf)(fp, sf, len, flags)); +} + +static __inline int +fo_writesf(struct file *fp, struct sf_buf *sf, int len, int flags) +{ + return ((*fp->f_ops->fo_writesf)(fp, sf, len, flags)); } #endif /* _KERNEL */ diff -ur /usr/src/sys/sys/syscall.h sys/sys/syscall.h --- /usr/src/sys/sys/syscall.h Thu Jul 27 15:21:36 2006 +++ sys/sys/syscall.h Sat Jul 29 18:03:27 2006 @@ -2,7 +2,7 @@ * System call numbers. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/sys/syscall.h,v 1.195 2006/07/13 06:32:55 davidxu Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.219 2006/07/13 06:26:43 davidxu Exp */ @@ -392,4 +392,5 @@ #define SYS_thr_setscheduler 466 #define SYS_thr_getscheduler 467 #define SYS_thr_setschedparam 468 -#define SYS_MAXSYSCALL 469 +#define SYS_splice 469 +#define SYS_MAXSYSCALL 470 Only in sys/sys: syscall.h.bak diff -ur /usr/src/sys/sys/syscall.mk sys/sys/syscall.mk --- /usr/src/sys/sys/syscall.mk Thu Jul 27 15:21:36 2006 +++ sys/sys/syscall.mk Sat Jul 29 18:03:27 2006 @@ -1,6 +1,6 @@ # FreeBSD system call names. # DO NOT EDIT-- this file is automatically generated. -# $FreeBSD: src/sys/sys/syscall.mk,v 1.150 2006/07/13 06:32:55 davidxu Exp $ +# $FreeBSD$ # created from FreeBSD: src/sys/kern/syscalls.master,v 1.219 2006/07/13 06:26:43 davidxu Exp MIASM = \ syscall.o \ @@ -333,4 +333,5 @@ aio_fsync.o \ thr_setscheduler.o \ thr_getscheduler.o \ - thr_setschedparam.o + thr_setschedparam.o \ + splice.o Only in sys/sys: syscall.mk.bak diff -ur /usr/src/sys/sys/sysproto.h sys/sys/sysproto.h --- /usr/src/sys/sys/sysproto.h Thu Jul 27 15:21:36 2006 +++ sys/sys/sysproto.h Sat Jul 29 18:03:27 2006 @@ -2,7 +2,7 @@ * System call prototypes. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/sys/sysproto.h,v 1.197 2006/07/13 06:32:55 davidxu Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.219 2006/07/13 06:26:43 davidxu Exp */ @@ -1464,6 +1464,12 @@ char param_l_[PADL_(const struct sched_param *)]; const struct sched_param * param; char param_r_[PADR_(const struct sched_param *)]; char param_size_l_[PADL_(int)]; int param_size; char param_size_r_[PADR_(int)]; }; +struct splice_args { + char infd_l_[PADL_(int)]; int infd; char infd_r_[PADR_(int)]; + char outfd_l_[PADL_(int)]; int outfd; char outfd_r_[PADR_(int)]; + char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_exit(struct thread *, struct sys_exit_args *); int fork(struct thread *, struct fork_args *); @@ -1794,6 +1800,7 @@ int thr_setscheduler(struct thread *, struct thr_setscheduler_args *); int thr_getscheduler(struct thread *, struct thr_getscheduler_args *); int thr_setschedparam(struct thread *, struct thr_setschedparam_args *); +int splice(struct thread *, struct splice_args *); #ifdef COMPAT_43 Only in sys/sys: sysproto.h.bak