--- //depot/vendor/freebsd/src/sys/kern/init_sysent.c 2006/07/28 19:11:10 +++ //depot/user/ssouhlal/splice/sys/kern/init_sysent.c 2006/07/30 07:58:57 @@ -2,7 +2,7 @@ * System call switch table. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/kern/init_sysent.c,v 1.215 2006/07/28 19:08:36 jhb Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.221 2006/07/28 19:05:28 jhb Exp */ @@ -498,4 +498,5 @@ { AS(thr_setscheduler_args), (sy_call_t *)thr_setscheduler, AUE_NULL }, /* 466 = thr_setscheduler */ { AS(thr_getscheduler_args), (sy_call_t *)thr_getscheduler, AUE_NULL }, /* 467 = thr_getscheduler */ { AS(thr_setschedparam_args), (sy_call_t *)thr_setschedparam, AUE_NULL }, /* 468 = thr_setschedparam */ + { AS(splice_args), (sy_call_t *)splice, AUE_NULL }, /* 469 = splice */ }; --- //depot/vendor/freebsd/src/sys/kern/sys_socket.c 2006/07/24 15:22:01 +++ //depot/user/ssouhlal/splice/sys/kern/sys_socket.c 2006/07/30 12:05:29 @@ -51,10 +51,14 @@ #include #include #include +#include #include #include +#include +#include + struct fileops socketops = { .fo_read = soo_read, .fo_write = soo_write, @@ -63,7 +67,9 @@ .fo_kqfilter = soo_kqfilter, .fo_stat = soo_stat, .fo_close = soo_close, - .fo_flags = DFLAG_PASSABLE + .fo_readsf = soo_readsf, + .fo_writesf = soo_writesf, + .fo_flags = DFLAG_PASSABLE | DFLAG_SPLICABLE }; /* ARGSUSED */ @@ -316,3 +322,124 @@ NET_UNLOCK_GIANT(); return (error); } + +int +soo_readsf(struct file *fp, struct sf_buf **sf, int len, int flags) +{ + struct socket *so; + struct iovec iov; + struct uio uio; + vm_page_t pg; + int error; + + so = fp->f_data; + + /* + * XXX It should be possible to avoid doing a copy here by using + * scatter/gather lists instead of sf_bufs and adding a protocol + * operation that generates such a list from the socket buffer. + */ + + pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + + if ((*sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { + vm_page_lock_queues(); + vm_page_unwire(pg, 0); + KASSERT(pg->wire_count == 0, ("socket sf page still wired")); + vm_page_free(pg); + vm_page_unlock_queues(); + return (EINTR); + } + + bzero(&iov, sizeof(iov)); + bzero(&uio, sizeof(uio)); + + iov.iov_base = (void *)sf_buf_kva(*sf); + iov.iov_len = len; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_resid = len; + uio.uio_segflg = UIO_SYSSPACE; + + NET_LOCK_GIANT(); + error = soreceive(so, 0, &uio, 0, 0, 0); + NET_UNLOCK_GIANT(); + + return (error); +} + +int +soo_writesf(struct file *fp, struct sf_buf *sf, int len, int flags) +{ + struct socket *so; + struct thread *td; + struct mbuf *m; + int error; + + so = fp->f_data; + td = curthread; + + NET_LOCK_GIANT(); + + SOCKBUF_LOCK(&so->so_snd); + sblock(&so->so_snd, M_WAITOK); + SOCKBUF_UNLOCK(&so->so_snd); + + MGETHDR(m, M_TRYWAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + SOCKBUF_LOCK(&so->so_snd); + sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); + goto done; + } + + MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, + EXT_SFBUF); + m->m_data = (char *)sf_buf_kva(sf); + m->m_pkthdr.len = m->m_len = len; + + SOCKBUF_LOCK(&so->so_snd); +retry_space: + SOCKBUF_LOCK_ASSERT(&so->so_snd); + if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { + if (so->so_snd.sb_state & SBS_CANTSENDMORE) { + error = EPIPE; + } else { + error = so->so_error; + so->so_error = 0; + } + m_freem(m); + sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); + goto done; + } + if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { + if (so->so_state & SS_NBIO) { + m_freem(m); + sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); + error = EAGAIN; + goto done; + } + error = sbwait(&so->so_snd); + if (error) { + m_freem(m); + sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); + goto done; + } + goto retry_space; + } + SOCKBUF_UNLOCK(&so->so_snd); + error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); + + SOCKBUF_LOCK(&so->so_snd); + sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); + +done: + NET_UNLOCK_GIANT(); + return (error); +} --- //depot/vendor/freebsd/src/sys/kern/syscalls.c 2006/07/28 19:11:10 +++ //depot/user/ssouhlal/splice/sys/kern/syscalls.c 2006/07/30 07:58:57 @@ -2,7 +2,7 @@ * System call names. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/kern/syscalls.c,v 1.199 2006/07/28 19:08:37 jhb Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.221 2006/07/28 19:05:28 jhb Exp */ @@ -476,4 +476,5 @@ "thr_setscheduler", /* 466 = thr_setscheduler */ "thr_getscheduler", /* 467 = thr_getscheduler */ "thr_setschedparam", /* 468 = thr_setschedparam */ + "splice", /* 469 = splice */ }; --- //depot/vendor/freebsd/src/sys/kern/syscalls.master 2006/07/28 19:06:28 +++ //depot/user/ssouhlal/splice/sys/kern/syscalls.master 2006/07/30 07:57:32 @@ -827,5 +827,7 @@ 468 AUE_NULL STD { int thr_setschedparam(long id, \ const struct sched_param *param, \ int param_size); } +469 AUE_NULL STD { int splice(int infd, int outfd, int len, \ + int flags); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master --- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c 2006/07/27 19:55:49 +++ //depot/user/ssouhlal/splice/sys/kern/uipc_syscalls.c 2006/07/30 08:51:55 @@ -2284,3 +2284,62 @@ return (error); } + +#ifndef _SYS_SYSPROTO_H_ +struct splice_args { + int infd; + int outfd; + int len; + int flags; +}; +#endif +int +splice(struct thread *td, struct splice_args *uap) +{ + struct sf_buf *sf; + struct file *in, *out; + int error, flags, len, written, xfsize; + + flags = uap->flags; + len = uap->len; + + if ((error = fget(td, uap->infd, &in)) != 0) + return (error); + if ((error = fget(td, uap->outfd, &out)) != 0) + goto done2; + + if (!(in->f_ops->fo_flags & DFLAG_SPLICABLE)) { + error = EINVAL; + goto done1; + } + if (!(out->f_ops->fo_flags & DFLAG_SPLICABLE)) { + error = EINVAL; + goto done1; + } + + for (written = 0; len > 0; len -= PAGE_SIZE) { + xfsize = len < PAGE_SIZE ? len : PAGE_SIZE; + + if ((error = fo_readsf(in, &sf, xfsize, flags)) != 0) { + printf("%s: fo_readsf returned %d\n", __func__, error); + break; + } + /* XXX Where do we free the sf_buf? */ + /* XXX How to deal with errors? */ + if ((error = fo_writesf(out, sf, xfsize, flags)) != 0) { + printf("%s: fo_writesf returned %d\n", __func__, error); + break; + } + + written += xfsize; + } + + td->td_retval[0] = written; + +done1: + fdrop(out, td); +done2: + fdrop(in, td); + + return (error); +} --- //depot/vendor/freebsd/src/sys/kern/vfs_vnops.c 2006/06/24 22:56:22 +++ //depot/user/ssouhlal/splice/sys/kern/vfs_vnops.c 2006/07/30 08:51:55 @@ -61,6 +61,11 @@ #include #include #include +#include + +#include +#include +#include static fo_rdwr_t vn_read; static fo_rdwr_t vn_write; @@ -69,6 +74,8 @@ static fo_kqfilter_t vn_kqfilter; static fo_stat_t vn_statfile; static fo_close_t vn_closefile; +static fo_readsf_t vn_readsf; +static fo_writesf_t vn_writesf; struct fileops vnops = { .fo_read = vn_read, @@ -78,7 +85,9 @@ .fo_kqfilter = vn_kqfilter, .fo_stat = vn_statfile, .fo_close = vn_closefile, - .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE + .fo_readsf = vn_readsf, + .fo_writesf = vn_writesf, + .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | DFLAG_SPLICABLE }; int @@ -1252,3 +1261,157 @@ return (error); } + +int +vn_readsf(struct file *fp, struct sf_buf **sf, int len, int flags) +{ + struct vm_object *obj; + struct thread *td; + struct vnode *vp; + vm_page_t pg; + int error, pindex, vfslocked; + + error = 0; + td = curthread; /* XXX */ + vp = fp->f_vnode; + vref(vp); + + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + obj = vp->v_object; + if (obj != NULL) { + /* + * Temporarily increase the backing VM object's reference + * count so that a forced reclamation of its vnode does not + * immediately destroy it. + */ + VM_OBJECT_LOCK(obj); + if ((obj->flags & OBJ_DEAD) == 0) { + vm_object_reference_locked(obj); + VM_OBJECT_UNLOCK(obj); + } else { + VM_OBJECT_UNLOCK(obj); + obj = NULL; + } + } + VOP_UNLOCK(vp, 0, td); + VFS_UNLOCK_GIANT(vfslocked); + if (obj == NULL) { + error = EINVAL; + goto done; + } + + pindex = OFF_TO_IDX(fp->f_offset); + VM_OBJECT_LOCK(obj); +retry_lookup: + pg = vm_page_lookup(obj, pindex); + if (pg == NULL) { + pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | + VM_ALLOC_NORMAL | VM_ALLOC_WIRED); + if (pg == NULL) { + VM_OBJECT_UNLOCK(obj); + VM_WAIT; + VM_OBJECT_LOCK(obj); + goto retry_lookup; + } + vm_page_lock_queues(); + } else { + vm_page_lock_queues(); + if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) + goto retry_lookup; + vm_page_wire(pg); + } + + if (pg->valid && vm_page_is_valid(pg, 0, len)) { + VM_OBJECT_UNLOCK(obj); + } else { /* XXX NONBLOCK */ + int bsize, resid; + + vm_page_io_start(pg); + vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(obj); + + bsize = vp->v_mount->mnt_stat.f_iosize; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vn_lock(vp, LK_SHARED | LK_RETRY, td); + error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 0 /* offset */, + UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | + ((MAXBSIZE / bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, + &resid, td); + VOP_UNLOCK(vp, 0, td); + VFS_UNLOCK_GIANT(vfslocked); + vm_page_lock_queues(); + vm_page_io_finish(pg); + if (!error) + VM_OBJECT_UNLOCK(obj); + } + + if (error) { + vm_page_unwire(pg, 0); + if (pg->wire_count == 0 && pg->valid == 0 && + pg->busy == 0 && !(pg->flags & PG_BUSY) && + pg->hold_count == 0) { + vm_page_free(pg); + } + vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(obj); + goto done; + } + vm_page_unlock_queues(); + + if ((*sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { + vm_page_lock_queues(); + vm_page_unwire(pg, 0); + if (pg->wire_count == 0 && pg->object == NULL) + vm_page_free(pg); + vm_page_unlock_queues(); + error = EINTR; + } + + fp->f_offset += len; + +done: + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vrele(vp); + VFS_UNLOCK_GIANT(vfslocked); + + return (error); +} + +int +vn_writesf(struct file *fp, struct sf_buf *sf, int len, int flags) +{ + struct thread *td; + struct vnode *vp; + vm_page_t pg; + int error, vfslocked; + + td = curthread; + + vp = fp->f_vnode; + vref(vp); + + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + error = vn_rdwr(UIO_WRITE, vp, (caddr_t)sf_buf_kva(sf), len, + fp->f_offset, UIO_SYSSPACE, IO_UNIT | IO_VMIO, td->td_ucred, + NOCRED, NULL, td); + + if (error == 0) + fp->f_offset += len; + else + printf("%s: vn_rdwr() returned %d\n", __func__, error); + + vrele(vp); + VFS_UNLOCK_GIANT(vfslocked); + + pg = sf_buf_page(sf); + sf_buf_free(sf); + vm_page_lock_queues(); + vm_page_unwire(pg, 0); + /* XXX Is it safe to unwire and free the page here? */ + if (pg->wire_count == 0 && pg->object == NULL) + vm_page_free(pg); + vm_page_unlock_queues(); + + return (error); +} --- //depot/vendor/freebsd/src/sys/sys/file.h 2006/05/16 07:52:13 +++ //depot/user/ssouhlal/splice/sys/sys/file.h 2006/07/30 11:47:52 @@ -48,6 +48,7 @@ struct knote; struct vnode; struct socket; +struct sf_buf; #endif /* _KERNEL */ @@ -77,6 +78,10 @@ typedef int fo_stat_t(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td); typedef int fo_close_t(struct file *fp, struct thread *td); +typedef int fo_readsf_t(struct file *fp, struct sf_buf **sf, int len, + int flags); +typedef int fo_writesf_t(struct file *fp, struct sf_buf *sf, int len, + int flags); typedef int fo_flags_t; struct fileops { @@ -87,11 +92,14 @@ fo_kqfilter_t *fo_kqfilter; fo_stat_t *fo_stat; fo_close_t *fo_close; + fo_readsf_t *fo_readsf; + fo_writesf_t *fo_writesf; fo_flags_t fo_flags; /* DFLAG_* below */ }; #define DFLAG_PASSABLE 0x01 /* may be passed via unix sockets. */ #define DFLAG_SEEKABLE 0x02 /* seekable / nonsequential */ +#define DFLAG_SPLICABLE 0x04 /* can be used in splice(2) */ /* * Kernel descriptor table. @@ -194,6 +202,8 @@ fo_kqfilter_t soo_kqfilter; fo_stat_t soo_stat; fo_close_t soo_close; +fo_readsf_t soo_readsf; +fo_writesf_t soo_writesf; /* Lock a file. */ #define FILE_LOCK(f) mtx_lock((f)->f_mtxp) @@ -228,6 +238,8 @@ static __inline fo_kqfilter_t fo_kqfilter; static __inline fo_stat_t fo_stat; static __inline fo_close_t fo_close; +static __inline fo_readsf_t fo_readsf; +static __inline fo_writesf_t fo_writesf; static __inline int fo_read(fp, uio, active_cred, flags, td) @@ -305,6 +317,18 @@ return ((*fp->f_ops->fo_kqfilter)(fp, kn)); } +static __inline int +fo_readsf(struct file *fp, struct sf_buf **sf, int len, int flags) +{ + return ((*fp->f_ops->fo_readsf)(fp, sf, len, flags)); +} + +static __inline int +fo_writesf(struct file *fp, struct sf_buf *sf, int len, int flags) +{ + return ((*fp->f_ops->fo_writesf)(fp, sf, len, flags)); +} + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */ --- //depot/vendor/freebsd/src/sys/sys/syscall.h 2006/07/28 19:11:10 +++ //depot/user/ssouhlal/splice/sys/sys/syscall.h 2006/07/30 07:58:57 @@ -2,7 +2,7 @@ * System call numbers. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/sys/syscall.h,v 1.196 2006/07/28 19:08:37 jhb Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.221 2006/07/28 19:05:28 jhb Exp */ @@ -392,4 +392,5 @@ #define SYS_thr_setscheduler 466 #define SYS_thr_getscheduler 467 #define SYS_thr_setschedparam 468 -#define SYS_MAXSYSCALL 469 +#define SYS_splice 469 +#define SYS_MAXSYSCALL 470 --- //depot/vendor/freebsd/src/sys/sys/syscall.mk 2006/07/28 19:11:10 +++ //depot/user/ssouhlal/splice/sys/sys/syscall.mk 2006/07/30 07:58:57 @@ -1,6 +1,6 @@ # FreeBSD system call names. # DO NOT EDIT-- this file is automatically generated. -# $FreeBSD: src/sys/sys/syscall.mk,v 1.151 2006/07/28 19:08:37 jhb Exp $ +# $FreeBSD$ # created from FreeBSD: src/sys/kern/syscalls.master,v 1.221 2006/07/28 19:05:28 jhb Exp MIASM = \ syscall.o \ @@ -333,4 +333,5 @@ aio_fsync.o \ thr_setscheduler.o \ thr_getscheduler.o \ - thr_setschedparam.o + thr_setschedparam.o \ + splice.o --- //depot/vendor/freebsd/src/sys/sys/sysproto.h 2006/07/28 19:11:10 +++ //depot/user/ssouhlal/splice/sys/sys/sysproto.h 2006/07/30 07:58:57 @@ -2,7 +2,7 @@ * System call prototypes. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/sys/sysproto.h,v 1.198 2006/07/28 19:08:37 jhb Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.221 2006/07/28 19:05:28 jhb Exp */ @@ -1464,6 +1464,12 @@ char param_l_[PADL_(const struct sched_param *)]; const struct sched_param * param; char param_r_[PADR_(const struct sched_param *)]; char param_size_l_[PADL_(int)]; int param_size; char param_size_r_[PADR_(int)]; }; +struct splice_args { + char infd_l_[PADL_(int)]; int infd; char infd_r_[PADR_(int)]; + char outfd_l_[PADL_(int)]; int outfd; char outfd_r_[PADR_(int)]; + char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_exit(struct thread *, struct sys_exit_args *); int fork(struct thread *, struct fork_args *); @@ -1794,6 +1800,7 @@ int thr_setscheduler(struct thread *, struct thr_setscheduler_args *); int thr_getscheduler(struct thread *, struct thr_getscheduler_args *); int thr_setschedparam(struct thread *, struct thr_setschedparam_args *); +int splice(struct thread *, struct splice_args *); #ifdef COMPAT_43