diff --git a/share/man/man9/copy.9 b/share/man/man9/copy.9 index b6b975f..6008934 100644 --- a/share/man/man9/copy.9 +++ b/share/man/man9/copy.9 @@ -34,13 +34,15 @@ .\" .\" $FreeBSD$ .\" -.Dd January 7, 1996 +.Dd March 20, 2011 .Dt COPY 9 .Os .Sh NAME .Nm copy , .Nm copyin , +.Nm copyin_nofault , .Nm copyout , +.Nm copyout_nofault , .Nm copystr , .Nm copyinstr .Nd kernel copy functions @@ -50,8 +52,12 @@ .Ft int .Fn copyin "const void *uaddr" "void *kaddr" "size_t len" .Ft int +.Fn copyin_nofault "const void *uaddr" "void *kaddr" "size_t len" +.Ft int .Fn copyout "const void *kaddr" "void *uaddr" "size_t len" .Ft int +.Fn copyout_nofault "const void *kaddr" "void *uaddr" "size_t len" +.Ft int .Fn copystr "const void *kfaddr" "void *kdaddr" "size_t len" "size_t *done" .Ft int .Fn copyinstr "const void *uaddr" "void *kaddr" "size_t len" "size_t *done" @@ -70,20 +76,34 @@ The .Nm routines provide the following functionality: .Bl -tag -width "copyoutstr()" -.It Fn copyin -Copies +.It Fn copyin , +.It Fn copyin_nofault +Copy .Fa len bytes of data from the user-space address .Fa uaddr to the kernel-space address .Fa kaddr . -.It Fn copyout -Copies +The +.Fn copyin_nofault +will not handle the page-in for valid user address that does not have +the backing page resident, returning +.Er EFAULT +instead. +.It Fn copyout , +.It Fn copyout_nofault +Copy .Fa len bytes of data from the kernel-space address .Fa kaddr to the user-space address .Fa uaddr . +The +.Fn copyout_nofault +will not handle the page-in for valid user address that does not have +the backing page resident, returning +.Er EFAULT +instead. .It Fn copystr Copies a NUL-terminated string, at most .Fa len diff --git a/share/man/man9/uio.9 b/share/man/man9/uio.9 index fb27c9c..c14564f 100644 --- a/share/man/man9/uio.9 +++ b/share/man/man9/uio.9 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 21, 2010 +.Dd March 20, 2011 .Dt UIO 9 .Os .Sh NAME @@ -48,10 +48,14 @@ struct uio { .Ed .Ft int .Fn uiomove "void *buf" "int howmuch" "struct uio *uiop" +.Ft int +.Fn uiomove_nofault "void *buf" "int howmuch" "struct uio *uiop" .Sh DESCRIPTION -The function +The functions .Fn uiomove -is used to handle transfer of data between buffers and I/O vectors +and +.Fn uiomove_nofault +are used to handle transfer of data between buffers and I/O vectors that might possibly also cross the user/kernel space boundary. .Pp As a result of any @@ -110,6 +114,21 @@ for the associated thread; used if indicates that the transfer is to be made from/to a process's address space. .El +.Pp +The function +.Fn uiomove_nofault +will not handle the page-in for valid user address that does not have +the backing page resident, returning +.Er EFAULT +instead. +It is the caller responsibility to make sure that the pages are resident +before the call, for instance, by using +.Xr vm_fault_quick_hold_pages 9 +service. +Then, the +.Fn uiomove_nofault +can be called from the context that disallows sleep or where recursing +into virtual memory system locks is prohibited. .Sh RETURN VALUES On success .Fn uiomove diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c index 9385dc4..f09c927 100644 --- a/sys/kern/subr_uio.c +++ b/sys/kern/subr_uio.c @@ -64,6 +64,8 @@ __FBSDID("$FreeBSD$"); SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); +static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault); + #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; @@ -129,23 +131,66 @@ retry: #endif /* ZERO_COPY_SOCKETS */ int +copyin_nofault(const void *udaddr, void *kaddr, size_t len) +{ + int error, save; + + save = vm_fault_disable_pagefaults(); + error = copyin(udaddr, kaddr, len); + vm_fault_enable_pagefaults(save); + return (error); +} + +int +copyout_nofault(const void *kaddr, void *udaddr, size_t len) +{ + int error, save; + + save = vm_fault_disable_pagefaults(); + error = copyout(kaddr, udaddr, len); + vm_fault_enable_pagefaults(save); + return (error); +} + +int uiomove(void *cp, int n, struct uio *uio) { - struct thread *td = curthread; + + return (uiomove_faultflag(cp, n, uio, 0)); +} + +int +uiomove_nofault(void *cp, int n, struct uio *uio) +{ + + return (uiomove_faultflag(cp, n, uio, 1)); +} + +static int +uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault) +{ + struct thread *td; struct iovec *iov; u_int cnt; - int error = 0; - int save = 0; + int error, newflags, save; + + td = curthread; + error = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); - KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, + KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, ("uiomove proc")); - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, - "Calling uiomove()"); + if (!nofault) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "Calling uiomove()"); - save = td->td_pflags & TDP_DEADLKTREAT; - td->td_pflags |= TDP_DEADLKTREAT; + /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */ + newflags = TDP_DEADLKTREAT; + if (uio->uio_segflg == UIO_USERSPACE && nofault) + newflags |= TDP_NSLPPFAULT; + save = ~newflags | (td->td_pflags & newflags); + td->td_pflags |= newflags; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; @@ -187,8 +232,7 @@ uiomove(void *cp, int n, struct uio *uio) n -= cnt; } out: - if (save == 0) - td->td_pflags &= ~TDP_DEADLKTREAT; + td->td_pflags &= save; return (error); } diff --git a/sys/sys/proc.h b/sys/sys/proc.h index c54a956..157bad1 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -393,7 +393,7 @@ do { \ #define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock aquisition - deadlock treatment. */ -#define TDP_UNUSED80 0x00000080 /* available. */ +#define TDP_NSLPPFAULT 0x00000080 /* Do not sleep in the page fault handler. */ #define TDP_NOSLEEPING 0x00000100 /* Thread is not allowed to sleep on a sq. */ #define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */ #define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */ diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 35d34e4..8b0de57 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -217,8 +217,12 @@ int copyinstr(const void * __restrict udaddr, void * __restrict kaddr, __nonnull(1) __nonnull(2); int copyin(const void * __restrict udaddr, void * __restrict kaddr, size_t len) __nonnull(1) __nonnull(2); +int copyin_nofault(const void * __restrict udaddr, void * __restrict kaddr, + size_t len) __nonnull(1) __nonnull(2); int copyout(const void * __restrict kaddr, void * __restrict udaddr, size_t len) __nonnull(1) __nonnull(2); +int copyout_nofault(const void * __restrict kaddr, void * __restrict udaddr, + size_t len) __nonnull(1) __nonnull(2); int fubyte(const void *base); long fuword(const void *base); diff --git a/sys/sys/uio.h b/sys/sys/uio.h index d7fa124..45a5740 100644 --- a/sys/sys/uio.h +++ b/sys/sys/uio.h @@ -97,6 +97,7 @@ int copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop); int copyout_map(struct thread *td, vm_offset_t *addr, size_t sz); int copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz); int uiomove(void *cp, int n, struct uio *uio); +int uiomove_nofault(void *cp, int n, struct uio *uio); int uiomove_frombuf(void *buf, int buflen, struct uio *uio); int uiomove_fromphys(struct vm_page *ma[], vm_offset_t offset, int n, struct uio *uio); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 55abe86..2ab3342 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -209,6 +209,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags) { + if ((curthread->td_pflags & TDP_NSLPPFAULT) != 0) + return (KERN_PROTECTION_FAILURE); return (vm_fault_hold(map, vaddr, fault_type, fault_flags, NULL)); } @@ -1475,3 +1477,22 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) /* return number of pages */ return i; } + +int +vm_fault_disable_pagefaults(void) +{ + struct thread *td; + int save; + + td = curthread; + save = ~TDP_NSLPPFAULT | (td->td_pflags & TDP_NSLPPFAULT); + td->td_pflags |= TDP_NSLPPFAULT; + return (save); +} + +void +vm_fault_enable_pagefaults(int save) +{ + + curthread->td_pflags &= save; +}